rucio 37.7.0__py3-none-any.whl → 38.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rucio might be problematic. Click here for more details.

Files changed (122) hide show
  1. rucio/alembicrevision.py +1 -1
  2. rucio/cli/bin_legacy/rucio.py +51 -107
  3. rucio/cli/bin_legacy/rucio_admin.py +26 -26
  4. rucio/cli/command.py +1 -0
  5. rucio/cli/did.py +2 -2
  6. rucio/cli/opendata.py +132 -0
  7. rucio/cli/replica.py +15 -5
  8. rucio/cli/rule.py +7 -2
  9. rucio/cli/scope.py +3 -2
  10. rucio/cli/utils.py +28 -4
  11. rucio/client/baseclient.py +9 -1
  12. rucio/client/client.py +2 -0
  13. rucio/client/diracclient.py +73 -12
  14. rucio/client/opendataclient.py +249 -0
  15. rucio/client/subscriptionclient.py +30 -0
  16. rucio/client/uploadclient.py +10 -13
  17. rucio/common/constants.py +4 -1
  18. rucio/common/exception.py +55 -0
  19. rucio/common/plugins.py +45 -8
  20. rucio/common/schema/generic.py +5 -3
  21. rucio/common/schema/generic_multi_vo.py +4 -2
  22. rucio/common/types.py +8 -7
  23. rucio/common/utils.py +176 -11
  24. rucio/core/dirac.py +5 -5
  25. rucio/core/opendata.py +744 -0
  26. rucio/core/rule.py +63 -8
  27. rucio/core/transfer.py +1 -1
  28. rucio/daemons/hermes/hermes.py +26 -17
  29. rucio/db/sqla/constants.py +6 -0
  30. rucio/db/sqla/migrate_repo/versions/a62db546a1f1_opendata_initial_model.py +85 -0
  31. rucio/db/sqla/models.py +67 -0
  32. rucio/db/sqla/util.py +2 -2
  33. rucio/gateway/dirac.py +1 -1
  34. rucio/gateway/opendata.py +190 -0
  35. rucio/gateway/subscription.py +5 -3
  36. rucio/rse/protocols/protocol.py +9 -5
  37. rucio/rse/translation.py +17 -6
  38. rucio/transfertool/fts3.py +1 -0
  39. rucio/transfertool/fts3_plugins.py +6 -1
  40. rucio/vcsversion.py +4 -4
  41. rucio/web/rest/flaskapi/v1/common.py +34 -14
  42. rucio/web/rest/flaskapi/v1/config.py +1 -1
  43. rucio/web/rest/flaskapi/v1/dids.py +447 -160
  44. rucio/web/rest/flaskapi/v1/heartbeats.py +1 -1
  45. rucio/web/rest/flaskapi/v1/identities.py +1 -1
  46. rucio/web/rest/flaskapi/v1/lifetime_exceptions.py +1 -1
  47. rucio/web/rest/flaskapi/v1/locks.py +1 -1
  48. rucio/web/rest/flaskapi/v1/main.py +3 -8
  49. rucio/web/rest/flaskapi/v1/meta_conventions.py +1 -16
  50. rucio/web/rest/flaskapi/v1/nongrid_traces.py +1 -1
  51. rucio/web/rest/flaskapi/v1/opendata.py +391 -0
  52. rucio/web/rest/flaskapi/v1/opendata_public.py +146 -0
  53. rucio/web/rest/flaskapi/v1/requests.py +1 -1
  54. rucio/web/rest/flaskapi/v1/rses.py +1 -1
  55. rucio/web/rest/flaskapi/v1/rules.py +1 -1
  56. rucio/web/rest/flaskapi/v1/scopes.py +1 -1
  57. rucio/web/rest/flaskapi/v1/subscriptions.py +6 -9
  58. rucio/web/rest/flaskapi/v1/traces.py +1 -1
  59. rucio/web/rest/flaskapi/v1/vos.py +1 -1
  60. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/data/rucio/etc/alembic.ini.template +1 -1
  61. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/data/rucio/etc/alembic_offline.ini.template +1 -1
  62. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/data/rucio/etc/rucio.cfg.template +2 -2
  63. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/data/rucio/etc/rucio_multi_vo.cfg.template +3 -3
  64. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/data/rucio/requirements.server.txt +6 -3
  65. rucio-38.0.0rc1.data/data/rucio/tools/reset_database.py +87 -0
  66. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio +2 -1
  67. {rucio-37.7.0.dist-info → rucio-38.0.0rc1.dist-info}/METADATA +36 -36
  68. {rucio-37.7.0.dist-info → rucio-38.0.0rc1.dist-info}/RECORD +120 -114
  69. rucio/client/fileclient.py +0 -57
  70. rucio-37.7.0.data/data/rucio/tools/reset_database.py +0 -40
  71. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/data/rucio/etc/globus-config.yml.template +0 -0
  72. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/data/rucio/etc/ldap.cfg.template +0 -0
  73. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/data/rucio/etc/mail_templates/rule_approval_request.tmpl +0 -0
  74. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/data/rucio/etc/mail_templates/rule_approved_admin.tmpl +0 -0
  75. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/data/rucio/etc/mail_templates/rule_approved_user.tmpl +0 -0
  76. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/data/rucio/etc/mail_templates/rule_denied_admin.tmpl +0 -0
  77. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/data/rucio/etc/mail_templates/rule_denied_user.tmpl +0 -0
  78. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/data/rucio/etc/mail_templates/rule_ok_notification.tmpl +0 -0
  79. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/data/rucio/etc/rse-accounts.cfg.template +0 -0
  80. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/data/rucio/etc/rucio.cfg.atlas.client.template +0 -0
  81. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/data/rucio/tools/bootstrap.py +0 -0
  82. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/data/rucio/tools/merge_rucio_configs.py +0 -0
  83. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-abacus-account +0 -0
  84. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-abacus-collection-replica +0 -0
  85. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-abacus-rse +0 -0
  86. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-admin +0 -0
  87. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-atropos +0 -0
  88. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-auditor +0 -0
  89. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-automatix +0 -0
  90. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-bb8 +0 -0
  91. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-cache-client +0 -0
  92. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-cache-consumer +0 -0
  93. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-conveyor-finisher +0 -0
  94. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-conveyor-poller +0 -0
  95. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-conveyor-preparer +0 -0
  96. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-conveyor-receiver +0 -0
  97. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-conveyor-stager +0 -0
  98. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-conveyor-submitter +0 -0
  99. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-conveyor-throttler +0 -0
  100. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-dark-reaper +0 -0
  101. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-dumper +0 -0
  102. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-follower +0 -0
  103. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-hermes +0 -0
  104. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-judge-cleaner +0 -0
  105. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-judge-evaluator +0 -0
  106. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-judge-injector +0 -0
  107. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-judge-repairer +0 -0
  108. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-kronos +0 -0
  109. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-minos +0 -0
  110. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-minos-temporary-expiration +0 -0
  111. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-necromancer +0 -0
  112. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-oauth-manager +0 -0
  113. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-reaper +0 -0
  114. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-replica-recoverer +0 -0
  115. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-rse-decommissioner +0 -0
  116. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-storage-consistency-actions +0 -0
  117. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-transmogrifier +0 -0
  118. {rucio-37.7.0.data → rucio-38.0.0rc1.data}/scripts/rucio-undertaker +0 -0
  119. {rucio-37.7.0.dist-info → rucio-38.0.0rc1.dist-info}/WHEEL +0 -0
  120. {rucio-37.7.0.dist-info → rucio-38.0.0rc1.dist-info}/licenses/AUTHORS.rst +0 -0
  121. {rucio-37.7.0.dist-info → rucio-38.0.0rc1.dist-info}/licenses/LICENSE +0 -0
  122. {rucio-37.7.0.dist-info → rucio-38.0.0rc1.dist-info}/top_level.txt +0 -0
rucio/core/opendata.py ADDED
@@ -0,0 +1,744 @@
1
+ # Copyright European Organization for Nuclear Research (CERN) since 2012
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import json
16
+ from re import match, search
17
+ from typing import TYPE_CHECKING, Any, Optional, Union, cast
18
+
19
+ from sqlalchemy import and_, delete, insert, update
20
+ from sqlalchemy.exc import DataError, IntegrityError
21
+ from sqlalchemy.sql.expression import bindparam, select
22
+
23
+ from rucio.common import exception
24
+ from rucio.common.exception import OpenDataError, OpenDataInvalidStateUpdate
25
+ from rucio.core.did import list_files
26
+ from rucio.core.monitor import MetricManager
27
+ from rucio.core.replica import list_replicas
28
+ from rucio.db.sqla import models
29
+ from rucio.db.sqla.constants import DIDType, OpenDataDIDState
30
+
31
+ if TYPE_CHECKING:
32
+ from collections.abc import Sequence
33
+
34
+ from sqlalchemy.orm import Session
35
+
36
+ from rucio.common.constants import OPENDATA_DID_STATE_LITERAL
37
+ from rucio.common.types import InternalScope
38
+
39
+ METRICS = MetricManager(module=__name__)
40
+
41
+
42
+ def is_valid_opendata_did_state(state: str) -> bool:
43
+ """
44
+ Checks if the provided state string corresponds to a valid Opendata DID state.
45
+
46
+ Parameters:
47
+ state: The state string to validate (e.g., 'draft', 'public', 'suspended').
48
+
49
+ Returns:
50
+ True if the state is valid, False otherwise.
51
+ """
52
+
53
+ try:
54
+ _ = OpenDataDIDState[state.upper()]
55
+ return True
56
+ except KeyError:
57
+ return False
58
+
59
+
60
+ def validate_opendata_did_state(state: str) -> "OPENDATA_DID_STATE_LITERAL":
61
+ """
62
+ Validate the provided Opendata DID state string and return it in a consistent format.
63
+ If the state is invalid, raise an OpenDataError with a message listing valid states.
64
+
65
+ Parameters:
66
+ state: The state string to validate (e.g., 'draft', 'public', 'suspended').
67
+
68
+ Returns:
69
+ The validated state string in lowercase.
70
+ """
71
+
72
+ state = state.lower()
73
+ if not is_valid_opendata_did_state(state):
74
+ raise OpenDataError(
75
+ f"Invalid state '{state}'. Valid opendata states are: {', '.join([s.name.lower() for s in OpenDataDIDState])}")
76
+
77
+ return cast("OPENDATA_DID_STATE_LITERAL", state)
78
+
79
+
80
+ def opendata_state_str_to_enum(state: "OPENDATA_DID_STATE_LITERAL") -> OpenDataDIDState:
81
+ """
82
+ Convert a string representation of an Opendata DID state to the corresponding OpenDataDIDState enum.
83
+ If the state is invalid, raise an OpenDataError with a message listing valid states.
84
+
85
+ Parameters:
86
+ state: The state string to convert (e.g., 'draft', 'public', 'suspended').
87
+
88
+ Returns:
89
+ The corresponding OpenDataDIDState enum value.
90
+ """
91
+
92
+ return OpenDataDIDState[validate_opendata_did_state(state).upper()]
93
+
94
+
95
+ def _check_opendata_did_exists(
96
+ *,
97
+ scope: "InternalScope",
98
+ name: str,
99
+ session: "Session",
100
+ ) -> bool:
101
+ """
102
+ Check if an Opendata DID does exist in the database.
103
+ """
104
+
105
+ query = select(models.OpenDataDid).where(
106
+ and_(
107
+ models.OpenDataDid.scope == scope,
108
+ models.OpenDataDid.name == name
109
+ )
110
+ )
111
+ result = session.execute(query).scalar()
112
+ return result is not None
113
+
114
+
115
+ def list_opendata_dids(
116
+ *,
117
+ limit: Optional[int] = None,
118
+ offset: Optional[int] = None,
119
+ state: Optional[OpenDataDIDState] = None,
120
+ session: "Session",
121
+ ) -> dict[str, list[dict[str, Any]]]:
122
+ """
123
+ List Opendata DIDs with optional filtering by state, limit, and offset.
124
+
125
+ Parameters:
126
+ limit: Maximum number of DIDs to return.
127
+ offset: Offset for pagination.
128
+ state: Filter by Opendata DID state.
129
+ session: SQLAlchemy session to use for the query.
130
+
131
+ Returns:
132
+ A dictionary containing the total count, offset, and a list of DIDs.
133
+ """
134
+
135
+ query = select(
136
+ models.OpenDataDid.scope,
137
+ models.OpenDataDid.name,
138
+ models.OpenDataDid.state,
139
+ models.OpenDataDid.created_at,
140
+ models.OpenDataDid.updated_at,
141
+ ).order_by(
142
+ models.OpenDataDid.updated_at
143
+ )
144
+
145
+ if limit is not None:
146
+ query = query.limit(limit)
147
+
148
+ if offset is not None:
149
+ query = query.offset(offset)
150
+
151
+ if state is not None:
152
+ query = query.where(models.OpenDataDid.state == state)
153
+
154
+ dids = [{"scope": scope, "name": name, "state": state, "created_at": created_at, "updated_at": updated_at} for
155
+ scope, name, state, created_at, updated_at in session.execute(query)]
156
+
157
+ response = {
158
+ "total": len(dids),
159
+ "offset": offset if offset is not None else 0,
160
+ "dids": dids,
161
+ }
162
+
163
+ return response
164
+
165
+
166
+ def get_opendata_meta(
167
+ *,
168
+ scope: "InternalScope",
169
+ name: str,
170
+ session: "Session",
171
+ ) -> dict:
172
+ """
173
+ Retrieve the metadata associated with an Opendata DID.
174
+
175
+ Parameters:
176
+ scope: The scope of the Opendata DID.
177
+ name: The name of the Opendata DID.
178
+ session: SQLAlchemy session to use for the query.
179
+
180
+ Returns:
181
+ A dictionary containing the metadata for the specified Opendata DID.
182
+ """
183
+
184
+ query = select(
185
+ models.OpenDataMeta.meta,
186
+ ).where(
187
+ and_(
188
+ models.OpenDataMeta.name == name,
189
+ models.OpenDataMeta.scope == scope,
190
+ )
191
+ )
192
+
193
+ result = session.execute(query).mappings().fetchone()
194
+
195
+ if not result:
196
+ return {}
197
+ else:
198
+ return result["meta"]
199
+
200
+
201
+ def get_opendata_doi(
202
+ *,
203
+ scope: "InternalScope",
204
+ name: str,
205
+ session: "Session",
206
+ ) -> Optional[str]:
207
+ """
208
+ Retrieve the DOI (Digital Object Identifier) associated with an Opendata DID.
209
+
210
+ Parameters:
211
+ scope: The scope of the Opendata DID.
212
+ name: The name of the Opendata DID.
213
+ session: SQLAlchemy session to use for the query.
214
+
215
+ Returns:
216
+ The DOI associated with the Opendata DID, or None if not found.
217
+ """
218
+
219
+ query = select(
220
+ models.OpenDataDOI.doi,
221
+ ).where(
222
+ and_(
223
+ models.OpenDataDOI.name == name,
224
+ models.OpenDataDOI.scope == scope,
225
+ )
226
+ )
227
+
228
+ result = session.execute(query).mappings().fetchone()
229
+
230
+ if not result:
231
+ return None
232
+ else:
233
+ return result["doi"]
234
+
235
+
236
+ def get_opendata_did_files(
237
+ *,
238
+ scope: "InternalScope",
239
+ name: str,
240
+ session: "Session",
241
+ ) -> list[dict[str, Any]]:
242
+ """
243
+ Retrieve the files associated with an Opendata DID.
244
+
245
+ Parameters:
246
+ scope: The scope of the Opendata DID.
247
+ name: The name of the Opendata DID.
248
+ session: SQLAlchemy session to use for the query.
249
+
250
+ Returns:
251
+ A list of dictionaries containing file information associated with the Opendata DID.
252
+ """
253
+
254
+ query = select(
255
+ models.OpenDataDid.scope,
256
+ models.OpenDataDid.name,
257
+ ).where(
258
+ and_(
259
+ models.OpenDataDid.scope == scope,
260
+ models.OpenDataDid.name == name,
261
+ )
262
+ )
263
+
264
+ result = session.execute(query).mappings().fetchone()
265
+
266
+ if not result:
267
+ raise exception.OpenDataDataIdentifierNotFound(f"OpenData DID {scope}:{name} not found.")
268
+
269
+ files = list_files(scope=scope, name=name)
270
+ result = [
271
+ {
272
+ "scope": file["scope"],
273
+ "name": file["name"],
274
+ "bytes": file["bytes"],
275
+ "adler32": file["adler32"],
276
+ }
277
+ for file in files
278
+ ]
279
+
280
+ for i, file in enumerate(result):
281
+ replicas = list_replicas(dids=[{"scope": file["scope"], "name": file["name"]}], session=session)
282
+ uris = []
283
+ for replica in replicas:
284
+ pfns = replica["pfns"]
285
+ for uri, data in pfns.items():
286
+ if data["type"] != "DISK":
287
+ continue
288
+ uris.append(uri)
289
+
290
+ result[i]["uris"] = uris
291
+
292
+ return result
293
+
294
+
295
+ def get_opendata_did(
296
+ *,
297
+ scope: "InternalScope",
298
+ name: str,
299
+ state: Optional[OpenDataDIDState] = None,
300
+ include_files: bool = True,
301
+ include_metadata: bool = False,
302
+ include_doi: bool = True,
303
+ session: "Session",
304
+ ) -> dict[str, Any]:
305
+ """
306
+ Retrieve information about an Opendata DID (Data Identifier).
307
+
308
+ Parameters:
309
+ scope: The scope under which the DID is registered.
310
+ name: The name of the DID.
311
+ state: Filter by Opendata DID state.
312
+ include_files: If True, include a list of associated files. Defaults to True.
313
+ include_metadata: If True, include extended metadata. Defaults to False.
314
+ include_doi: If True, include DOI (Digital Object Identifier) information. Defaults to True.
315
+ session: SQLAlchemy session to use for the query.
316
+
317
+ Returns:
318
+ A dictionary containing metadata about the specified DID.
319
+ """
320
+
321
+ query = select(
322
+ models.OpenDataDid.scope,
323
+ models.OpenDataDid.name,
324
+ models.OpenDataDid.state,
325
+ models.OpenDataDid.created_at,
326
+ models.OpenDataDid.updated_at,
327
+ ).where(
328
+ and_(
329
+ models.OpenDataDid.scope == scope,
330
+ models.OpenDataDid.name == name,
331
+ )
332
+ )
333
+
334
+ if state is not None:
335
+ query = query.where(models.OpenDataDid.state == state)
336
+
337
+ result = session.execute(query).mappings().fetchone()
338
+
339
+ if not result:
340
+ raise exception.OpenDataDataIdentifierNotFound(f"OpenData DID {scope}:{name} not found.")
341
+
342
+ result = dict(result)
343
+
344
+ if include_doi:
345
+ result["doi"] = get_opendata_doi(scope=scope, name=name, session=session)
346
+ if include_metadata:
347
+ result["meta"] = get_opendata_meta(scope=scope, name=name, session=session)
348
+ if include_files:
349
+ opendata_files = get_opendata_did_files(scope=scope, name=name, session=session)
350
+ result["files"] = opendata_files
351
+
352
+ bytes_sum = sum(file["bytes"] for file in opendata_files)
353
+ extensions = set()
354
+ replicas_missing = 0
355
+ for file in opendata_files:
356
+ if "uris" not in file or not file["uris"]:
357
+ replicas_missing += 1
358
+ continue
359
+ for replica in file["uris"]:
360
+ filename = replica.split("/")[-1]
361
+ if "." in filename:
362
+ extensions.add(filename.split(".")[-1])
363
+
364
+ result["files_summary"] = {
365
+ "count": len(opendata_files),
366
+ "bytes": bytes_sum,
367
+ "extensions": list(extensions),
368
+ "replicas_missing": replicas_missing,
369
+ }
370
+
371
+ return result
372
+
373
+
374
+ def add_opendata_did(
375
+ *,
376
+ scope: "InternalScope",
377
+ name: str,
378
+ session: "Session",
379
+ ) -> None:
380
+ """
381
+ Add an existing DID to the Opendata catalog.
382
+
383
+ Parameters:
384
+ scope: The scope under which the DID is registered.
385
+ name: The name of the DID.
386
+ session: SQLAlchemy session to use for the operation.
387
+
388
+ Raises:
389
+ DataIdentifierNotFound: If the DID does not exist.
390
+ OpenDataDataIdentifierAlreadyExists: If the Opendata DID already exists in the catalog.
391
+ """
392
+
393
+ try:
394
+ return add_opendata_dids([{"scope": scope, "name": name}], session=session)
395
+ except exception.DataIdentifierNotFound:
396
+ raise exception.DataIdentifierNotFound(f"OpenData DID {scope}:{name} not found.")
397
+ except exception.OpenDataDataIdentifierAlreadyExists:
398
+ raise exception.OpenDataDataIdentifierAlreadyExists(f"OpenData DID {scope}:{name} already exists.")
399
+
400
+
401
+ def add_opendata_dids(
402
+ dids: "Sequence[dict[str, Any]]",
403
+ *,
404
+ session: "Session",
405
+ ) -> None:
406
+ """
407
+ Add multiple Opendata DIDs to the catalog.
408
+
409
+ Parameters:
410
+ dids: A sequence of dictionaries, each containing 'scope' and 'name' keys for the DIDs to be added.
411
+ session: SQLAlchemy session to use for the operation.
412
+
413
+ Raises:
414
+ InputValidationError: If any DID does not have 'scope' or 'name' keys.
415
+ OpenDataDataIdentifierAlreadyExists: If any of the DIDs already exist in the catalog.
416
+ DataIdentifierNotFound: If any of the DIDs do not exist in the database.
417
+ """
418
+
419
+ for did in dids:
420
+ if "scope" not in did or "name" not in did:
421
+ raise exception.InputValidationError("DID must have 'scope' and 'name' keys.")
422
+
423
+ try:
424
+ # The default state is DRAFT, set in the model
425
+ session.execute(
426
+ insert(models.OpenDataDid),
427
+ [
428
+ {
429
+ "scope": did["scope"],
430
+ "name": did["name"],
431
+ }
432
+ for did in dids]
433
+ )
434
+ except IntegrityError as error:
435
+ msg = str(error)
436
+
437
+ if (
438
+ search(r'ORA-00001: unique constraint \([^)]+DIDS_OPENDATA_PK\) violated', msg)
439
+ or search(r'UNIQUE constraint failed: dids_opendata\.scope, dids_opendata\.name', msg)
440
+ or search(r'1062.*Duplicate entry.*for key', msg)
441
+ or search(r'duplicate key value violates unique constraint', msg)
442
+ or search(r'UniqueViolation.*duplicate key value violates unique constraint', msg)
443
+ or search(r'columns?.*not unique', msg)
444
+ ):
445
+ raise exception.OpenDataDataIdentifierAlreadyExists()
446
+
447
+ raise exception.DataIdentifierNotFound()
448
+
449
+
450
+ def delete_opendata_did(
451
+ *,
452
+ scope: "InternalScope",
453
+ name: str,
454
+ session: "Session",
455
+ ) -> None:
456
+ """
457
+ Delete an Opendata DID from the catalog.
458
+
459
+ Parameters:
460
+ scope: The scope under which the DID is registered.
461
+ name: The name of the DID to be deleted.
462
+ session: SQLAlchemy session to use for the operation.
463
+
464
+ Raises:
465
+ OpenDataDataIdentifierNotFound: If the Opendata DID does not exist.
466
+ OpenDataInvalidState: If the Opendata DID is not in a valid state for deletion (must be DRAFT).
467
+ ValueError: If there is an error during the deletion process.
468
+ """
469
+
470
+ query = select(
471
+ models.OpenDataDid.scope,
472
+ models.OpenDataDid.name,
473
+ models.OpenDataDid.state,
474
+ ).where(
475
+ and_(
476
+ models.OpenDataDid.scope == scope,
477
+ models.OpenDataDid.name == name
478
+ )
479
+ )
480
+
481
+ result = session.execute(query).mappings().fetchone()
482
+ if not result:
483
+ raise exception.OpenDataDataIdentifierNotFound(f"OpenData DID '{scope}:{name}' not found.")
484
+
485
+ # state needs to be draft to be deleted
486
+ if result["state"] != OpenDataDIDState.DRAFT:
487
+ raise exception.OpenDataInvalidState(
488
+ f"OpenData entry '{scope}:{name}' not in a valid state for deletion. State: {result['state']}, expected: {OpenDataDIDState.DRAFT}")
489
+
490
+ delete_stmt = delete(models.OpenDataDid).where(
491
+ and_(
492
+ models.OpenDataDid.scope == bindparam("scope"),
493
+ models.OpenDataDid.name == bindparam("name")
494
+ )
495
+ )
496
+
497
+ result = session.execute(delete_stmt, {"scope": scope, "name": name})
498
+
499
+ if result.rowcount == 0:
500
+ raise ValueError(f"Error deleting Opendata entry '{scope}:{name}'.")
501
+
502
+
503
+ def update_opendata_did(
504
+ *,
505
+ scope: "InternalScope",
506
+ name: str,
507
+ state: Optional[OpenDataDIDState] = None,
508
+ meta: Optional[Union[dict, str]] = None,
509
+ doi: Optional[str] = None,
510
+ session: "Session",
511
+ ) -> None:
512
+ """
513
+ Update an existing Opendata DID in the catalog.
514
+
515
+ Parameters:
516
+ scope: The scope under which the DID is registered.
517
+ name: The name of the DID to be updated.
518
+ state: The new state to set for the DID.
519
+ meta: Metadata to update for the DID. Must be a valid JSON object or string.
520
+ doi: DOI to associate with the DID. Must be a valid DOI string (e.g., "10.1234/foo.bar").
521
+ session: SQLAlchemy session to use for the operation.
522
+
523
+ Raises:
524
+ InputValidationError: If none of 'state', 'meta', or 'doi' are provided, or if the provided data is invalid.
525
+ OpenDataDataIdentifierNotFound: If the Opendata DID does not exist.
526
+ OpenDataInvalidStateUpdate: If the state update is not valid (e.g., trying to set DRAFT after PUBLIC).
527
+ ValueError: If there is an error during the update process.
528
+ """
529
+
530
+ if state is None and meta is None and doi is None:
531
+ raise exception.InputValidationError(
532
+ "Either 'state', 'meta', or 'doi' must be provided to update the Opendata DID.")
533
+ if not _check_opendata_did_exists(scope=scope, name=name, session=session):
534
+ raise exception.OpenDataDataIdentifierNotFound(f"OpenData DID '{scope}:{name}' not found.")
535
+
536
+ if state is not None:
537
+ update_opendata_state(scope=scope, name=name, state=state, session=session)
538
+
539
+ if meta is not None:
540
+ update_opendata_meta(scope=scope, name=name, meta=meta, session=session)
541
+
542
+ if doi is not None:
543
+ update_opendata_doi(scope=scope, name=name, doi=doi, session=session)
544
+
545
+
546
+ def update_opendata_meta(
547
+ *,
548
+ scope: "InternalScope",
549
+ name: str,
550
+ meta: Union[dict, str],
551
+ session: "Session",
552
+ ) -> None:
553
+ """
554
+ Update the metadata associated with an Opendata DID.
555
+
556
+ Parameters:
557
+ scope: The scope under which the Opendata DID is registered.
558
+ name: The name of the Opendata DID.
559
+ meta: Metadata to update for the DID. Must be a valid JSON object or string.
560
+ session: SQLAlchemy session to use for the operation.
561
+
562
+ Raises:
563
+ InputValidationError: If 'meta' is not a dictionary or a valid JSON string.
564
+ OpenDataDataIdentifierNotFound: If the Opendata DID does not exist.
565
+ ValueError: If there is an error during the update or insert process.
566
+ """
567
+
568
+ if isinstance(meta, str):
569
+ try:
570
+ meta = json.loads(meta)
571
+ except ValueError as error:
572
+ raise exception.InputValidationError(f"Invalid JSON data: {error}")
573
+
574
+ if not isinstance(meta, dict):
575
+ raise exception.InputValidationError("'meta' must be a dictionary.")
576
+
577
+ try:
578
+ stmt = update(models.OpenDataMeta).where(
579
+ and_(
580
+ models.OpenDataMeta.scope == scope,
581
+ models.OpenDataMeta.name == name
582
+ )
583
+ ).values(meta=meta).execution_options(synchronize_session="fetch")
584
+ result = session.execute(stmt)
585
+
586
+ if result.rowcount == 0:
587
+ # If no rows were updated, insert a new row
588
+ insert_stmt = insert(models.OpenDataMeta).values(
589
+ scope=scope,
590
+ name=name,
591
+ meta=meta
592
+ )
593
+ result = session.execute(insert_stmt)
594
+
595
+ if result.rowcount == 0:
596
+ raise ValueError(f"Error inserting Opendata meta for DID '{scope}:{name}'.")
597
+
598
+ except DataError as error:
599
+ raise exception.InputValidationError(f"Invalid data: {error}")
600
+
601
+
602
+ def update_opendata_state(
603
+ *,
604
+ scope: "InternalScope",
605
+ name: str,
606
+ state: OpenDataDIDState,
607
+ session: "Session",
608
+ ) -> None:
609
+ """
610
+ Update the state of an Opendata DID.
611
+
612
+ Parameters:
613
+ scope: The scope under which the Opendata DID is registered.
614
+ name: The name of the Opendata DID.
615
+ state: The new state to set for the Opendata DID.
616
+ session: SQLAlchemy session to use for the operation.
617
+
618
+ Raises:
619
+ InputValidationError: If the provided state is not a valid OpenDataDIDState.
620
+ OpenDataDataIdentifierNotFound: If the Opendata DID does not exist.
621
+ OpenDataInvalidStateUpdate: If the state update is not valid (e.g., trying to set DRAFT after PUBLIC).
622
+ ValueError: If there is an error during the update process.
623
+ """
624
+
625
+ if not isinstance(state, OpenDataDIDState):
626
+ raise exception.InputValidationError(
627
+ f"Invalid state '{state}'. Valid opendata states are: {', '.join([s.name for s in OpenDataDIDState])}")
628
+
629
+ state_before = session.execute(
630
+ select(models.OpenDataDid.state).where(
631
+ and_(
632
+ models.OpenDataDid.scope == scope,
633
+ models.OpenDataDid.name == name
634
+ )
635
+ )
636
+ ).scalar()
637
+
638
+ update_query = update(models.OpenDataDid).where(
639
+ and_(
640
+ models.OpenDataDid.scope == scope,
641
+ models.OpenDataDid.name == name
642
+ )
643
+ ).values({"state": state})
644
+
645
+ if state == OpenDataDIDState.DRAFT:
646
+ if state_before != OpenDataDIDState.DRAFT:
647
+ raise OpenDataInvalidStateUpdate(
648
+ "Cannot set state to DRAFT. Once a DID is made public, it cannot be reverted to DRAFT.")
649
+ elif state == OpenDataDIDState.PUBLIC:
650
+ # All states can be set to PUBLIC
651
+ # DID needs to be closed before going public
652
+
653
+ did_is_file = session.execute(
654
+ select(models.DataIdentifier.did_type).where(
655
+ and_(
656
+ models.DataIdentifier.scope == scope,
657
+ models.DataIdentifier.name == name
658
+ )
659
+ )
660
+ ).scalar() == DIDType.FILE
661
+
662
+ if not did_is_file:
663
+ did_is_open = session.execute(
664
+ select(models.DataIdentifier.is_open).where(
665
+ and_(
666
+ models.DataIdentifier.scope == scope,
667
+ models.DataIdentifier.name == name
668
+ )
669
+ )
670
+ ).scalar()
671
+
672
+ if did_is_open:
673
+ raise OpenDataInvalidStateUpdate(
674
+ "Cannot set state to PUBLIC. The DID must be closed first.")
675
+ elif state == OpenDataDIDState.SUSPENDED:
676
+ if state_before == OpenDataDIDState.DRAFT:
677
+ raise OpenDataInvalidStateUpdate("Cannot set state to SUSPENDED from DRAFT. First set it to PUBLIC.")
678
+
679
+ try:
680
+ result = session.execute(update_query)
681
+
682
+ if result.rowcount == 0:
683
+ raise ValueError(f"Error updating Opendata state for DID '{scope}:{name}'.")
684
+
685
+ except DataError as error:
686
+ raise exception.InputValidationError(f"Invalid data: {error}")
687
+
688
+
689
+ def update_opendata_doi(
690
+ *,
691
+ scope: "InternalScope",
692
+ name: str,
693
+ doi: str,
694
+ session: "Session",
695
+ ) -> None:
696
+ """
697
+ Update the DOI (Digital Object Identifier) associated with an Opendata DID.
698
+
699
+ Parameters:
700
+ scope: The scope under which the Opendata DID is registered.
701
+ name: The name of the Opendata DID.
702
+ doi: The new DOI to associate with the Opendata DID. Must be a valid DOI string.
703
+ session: SQLAlchemy session to use for the operation.
704
+
705
+ Raises:
706
+ InputValidationError: If the provided DOI is not a valid string or does not match the expected format.
707
+ OpenDataDataIdentifierNotFound: If the Opendata DID does not exist.
708
+ ValueError: If there is an error during the update process.
709
+ """
710
+
711
+ if not _check_opendata_did_exists(scope=scope, name=name, session=session):
712
+ raise exception.OpenDataDataIdentifierNotFound(f"OpenData DID '{scope}:{name}' not found.")
713
+
714
+ if not isinstance(doi, str):
715
+ raise exception.InputValidationError("DOI must be a string.")
716
+ if not match(r'^10\.\d{4,9}/[-._;()/:A-Za-z0-9]+$', doi):
717
+ raise exception.InputValidationError("Invalid DOI format.")
718
+
719
+ # insert on the DOI table if it does not exist, otherwise update it
720
+ doi_before = session.execute(select(models.OpenDataDOI.doi).where(
721
+ and_(
722
+ models.OpenDataDOI.scope == scope,
723
+ models.OpenDataDOI.name == name
724
+ )
725
+ )).scalar()
726
+ if doi_before is None:
727
+ update_query = insert(models.OpenDataDOI).values(scope=scope, name=name, doi=doi)
728
+ else:
729
+ # TODO: do not freely prevent DOI updates? To be discussed
730
+ update_query = update(models.OpenDataDOI).where(
731
+ and_(
732
+ models.OpenDataDOI.scope == scope,
733
+ models.OpenDataDOI.name == name
734
+ )
735
+ ).values(doi=doi)
736
+
737
+ try:
738
+ result = session.execute(update_query)
739
+
740
+ if result.rowcount == 0:
741
+ raise ValueError(f"Error updating Opendata DOI for DID '{scope}:{name}'.")
742
+
743
+ except DataError as error:
744
+ raise exception.InputValidationError(f"Invalid data: {error}")