invenio-app-rdm 14.0.0b2.dev1__py2.py3-none-any.whl → 14.0.0b2.dev2__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,6 +17,6 @@
17
17
  #
18
18
  # See PEP 0440 for details - https://www.python.org/dev/peps/pep-0440
19
19
 
20
- __version__ = "14.0.0b2.dev1"
20
+ __version__ = "14.0.0b2.dev2"
21
21
 
22
22
  __all__ = ("__version__",)
@@ -20,68 +20,91 @@ This script has been tested for the following scenarios:
20
20
  4. Records with multiple versions
21
21
  """
22
22
 
23
- import time
24
-
25
23
  from click import secho
26
24
  from invenio_access.permissions import system_identity
27
25
  from invenio_db import db
28
26
  from invenio_drafts_resources.resources.records.errors import DraftNotCreatedError
29
27
  from invenio_rdm_records.proxies import current_rdm_records_service as records_service
30
- from invenio_search.engine import dsl
28
+ from invenio_rdm_records.records.api import RDMDraft, RDMRecord
29
+ from invenio_search.api import RecordsSearchV2
31
30
 
32
31
 
33
- def run_upgrade(has, migrate_record, migrate_draft):
32
+ def run_upgrade(migrate_record, migrate_draft):
34
33
  """Run upgrade on selected records and drafts.
35
34
 
36
35
  Args:
37
- has (dsl.Q): Query filter to select records/drafts to update.
38
36
  migrate_record (callable): Function to migrate a record.
39
37
  migrate_draft (callable): Function to migrate a draft.
40
38
  """
39
+ errored_record_ids = []
40
+ errored_draft_ids = []
41
+
41
42
  # Handle published records
42
- published_records = records_service.scan(
43
- identity=system_identity,
44
- params={"allversions": True},
45
- extra_filter=has,
46
- )
47
- for result in published_records.hits:
43
+ published_records = (
44
+ RecordsSearchV2(index=records_service.record_cls.index._name)
45
+ .filter("term", deletion_status="P")
46
+ .filter(
47
+ "query_string",
48
+ query="metadata.resource_type.id:publication-thesis OR metadata.related_identifiers.resource_type.id:publication-thesis",
49
+ )
50
+ .source(["id"])
51
+ .scan()
52
+ ) # Only need to fetch the record IDs to make the query faster
53
+ # Convert the search results to a list to avoid keeping the scroll context open, as it errors out after 15 minutes
54
+ published_record_ids = [result["id"] for result in published_records]
55
+ for record_id in published_record_ids:
48
56
  try:
49
- migrate_record(result)
57
+ migrate_record(record_id)
50
58
  except Exception as error:
51
59
  secho(f"> Error {repr(error)}", fg="red")
52
- error = f"Record {result['id']} failed to update"
60
+ secho(f"Record {record_id} failed to update", fg="red")
61
+ errored_record_ids.append((record_id, error))
53
62
 
54
63
  # Handle draft records
55
- draft_records = records_service._search(
56
- identity=system_identity,
57
- action="scan",
58
- params={"allversions": True},
59
- search_preference=None,
60
- record_cls=records_service.draft_cls,
61
- search_opts=records_service.config.search_drafts,
62
- extra_filter=has,
63
- permission_action="read_draft",
64
- ).scan()
65
- for result in draft_records:
64
+ draft_records = (
65
+ RecordsSearchV2(index=records_service.draft_cls.index._name)
66
+ .filter("term", has_draft=False)
67
+ .filter(
68
+ "query_string",
69
+ query="metadata.resource_type.id:publication-thesis OR metadata.related_identifiers.resource_type.id:publication-thesis",
70
+ )
71
+ .source(["id"])
72
+ .scan()
73
+ )
74
+ # Convert the search results to a list to avoid keeping the scroll context open, as it errors out after 15 minutes
75
+ draft_record_ids = [result["id"] for result in draft_records]
76
+ for draft_id in draft_record_ids:
66
77
  try:
67
- migrate_draft(result)
78
+ migrate_draft(draft_id)
68
79
  except Exception as error:
69
80
  secho(f"> Error {repr(error)}", fg="red")
70
- error = f"Draft {result['id']} failed to update"
81
+ secho(f"Draft {draft_id} failed to update", fg="red")
82
+ errored_draft_ids.append((draft_id, error))
83
+
84
+ print(f"Errored record IDs:", *errored_record_ids, sep="\n")
85
+ print(f"Errored draft IDs:", *errored_draft_ids, sep="\n")
71
86
 
72
87
 
73
88
  def run_update_for_resource_type():
74
89
  """Run update for resource type."""
75
90
 
76
- def migrate_resource_type_in_record(hit_result):
91
+ def migrate_resource_type_in_record(record_id):
77
92
  """
78
93
  Update resource type from publication-thesis to publication-dissertation.
79
94
 
80
95
  We go through the service layer to automatically trigger the DOI update and re-indexing.
81
96
  """
82
- secho(f"Updating resource type for record {hit_result['id']}", fg="yellow")
83
- record = records_service.read(system_identity, hit_result["id"])
84
- if record.data["metadata"]["resource_type"]["id"] != "publication-thesis":
97
+ secho(f"Updating resource type for record {record_id}", fg="yellow")
98
+ record = records_service.read(system_identity, record_id)
99
+ if record.data["metadata"]["resource_type"][
100
+ "id"
101
+ ] != "publication-thesis" and not any(
102
+ related_identifier.get("resource_type", {}).get("id")
103
+ == "publication-thesis"
104
+ for related_identifier in record.data["metadata"].get(
105
+ "related_identifiers", []
106
+ )
107
+ ):
85
108
  secho(
86
109
  f"Skipping record <{record.id}> because it doesn't have resource-type 'publication-thesis'!",
87
110
  fg="yellow",
@@ -97,31 +120,78 @@ def run_update_for_resource_type():
97
120
  fg="yellow",
98
121
  )
99
122
  # Update the record directly without affecting the draft
100
- record._record["metadata"]["resource_type"][
101
- "id"
102
- ] = "publication-dissertation"
123
+ if (
124
+ record._record["metadata"]["resource_type"]["id"]
125
+ == "publication-thesis"
126
+ ):
127
+ record._record["metadata"]["resource_type"][
128
+ "id"
129
+ ] = "publication-dissertation"
130
+ for related_identifier in record._record["metadata"].get(
131
+ "related_identifiers", []
132
+ ):
133
+ if (
134
+ related_identifier.get("resource_type", {}).get("id")
135
+ == "publication-thesis"
136
+ ):
137
+ related_identifier["resource_type"][
138
+ "id"
139
+ ] = "publication-dissertation"
103
140
  # Save the record changes and reindex
141
+ secho(
142
+ f"Record <{record.id}> has been updated... committing changes.",
143
+ fg="green",
144
+ )
104
145
  record._record.commit()
146
+ # Step 2: Update the resource type in the draft
147
+ if draft._record["metadata"]["resource_type"]["id"] == "publication-thesis":
148
+ draft._record["metadata"]["resource_type"][
149
+ "id"
150
+ ] = "publication-dissertation"
151
+ for related_identifier in draft._record["metadata"].get(
152
+ "related_identifiers", []
153
+ ):
154
+ if (
155
+ related_identifier.get("resource_type", {}).get("id")
156
+ == "publication-thesis"
157
+ ):
158
+ related_identifier["resource_type"][
159
+ "id"
160
+ ] = "publication-dissertation"
161
+ # After updating the record, update the draft's fork_version_id to match the record's new version_id, to avoid conflicts when publishing
162
+ draft._record.fork_version_id = record._record.revision_id
163
+ draft._record.commit()
164
+ # Commit the changes for both the record and the draft in one transaction
105
165
  db.session.commit()
106
166
  records_service.indexer.index(record._record)
167
+ records_service.indexer.index(draft._record)
168
+ secho(f"Draft <{draft.id}> has been updated successfully.", fg="green")
107
169
  # Update DOI metadata if record has DOI
108
170
  if hasattr(record, "pids") and record.pids.get("doi", None):
109
171
  records_service.pids.register_or_update(
110
172
  system_identity, record.id, "doi", parent=False
111
173
  )
112
- # Step 2: Update the resource type in the draft
113
- secho(f"Updating resource type for draft {draft.id}", fg="yellow")
114
- draft.data["metadata"]["resource_type"]["id"] = "publication-dissertation"
115
- # After updating the record, update the draft's fork_version_id to match the record's new version_id, to avoid conflicts when publishing
116
- draft._record.fork_version_id = record._record.revision_id
117
- updated_draft = records_service.update_draft(
118
- system_identity, draft.id, draft.data
119
- )
120
- secho(f"Draft {draft.id} has been updated successfully.", fg="green")
174
+ secho(
175
+ f"DOI metadata for record {record.id} has been updated successfully.",
176
+ fg="green",
177
+ )
121
178
  except DraftNotCreatedError:
122
179
  # If the draft didn't exist, we simply edit and publish the record
123
180
  draft = records_service.edit(system_identity, record.id)
124
- draft.data["metadata"]["resource_type"]["id"] = "publication-dissertation"
181
+ if draft.data["metadata"]["resource_type"]["id"] == "publication-thesis":
182
+ draft.data["metadata"]["resource_type"][
183
+ "id"
184
+ ] = "publication-dissertation"
185
+ for related_identifier in draft.data["metadata"].get(
186
+ "related_identifiers", []
187
+ ):
188
+ if (
189
+ related_identifier.get("resource_type", {}).get("id")
190
+ == "publication-thesis"
191
+ ):
192
+ related_identifier["resource_type"][
193
+ "id"
194
+ ] = "publication-dissertation"
125
195
  updated_draft = records_service.update_draft(
126
196
  system_identity, draft.id, draft.data
127
197
  )
@@ -129,36 +199,45 @@ def run_update_for_resource_type():
129
199
 
130
200
  secho(f"Record <{record.id}> has been updated successfully.", fg="green")
131
201
 
132
- def migrate_resource_type_in_draft(hit_result):
202
+ def migrate_resource_type_in_draft(draft_id):
133
203
  """
134
204
  Update resource type from publication-thesis to publication-dissertation.
135
205
 
136
206
  We go through the service layer to automatically trigger the DOI update and re-indexing.
137
207
  """
138
- secho(f"Updating resource type for draft {hit_result['id']}", fg="yellow")
139
- draft = records_service.edit(system_identity, hit_result["id"])
140
- if draft.data["metadata"]["resource_type"]["id"] != "publication-thesis":
208
+ secho(f"Updating resource type for draft {draft_id}", fg="yellow")
209
+ draft = records_service.edit(system_identity, draft_id)
210
+ if draft.data["metadata"]["resource_type"][
211
+ "id"
212
+ ] != "publication-thesis" and not any(
213
+ related_identifier.get("resource_type", {}).get("id")
214
+ == "publication-thesis"
215
+ for related_identifier in draft.data["metadata"].get(
216
+ "related_identifiers", []
217
+ )
218
+ ):
141
219
  secho(
142
220
  f"Skipping draft <{draft.id}> because it doesn't have resource-type 'publication-thesis'!",
143
221
  fg="yellow",
144
222
  )
145
223
  return
146
224
 
147
- draft.data["metadata"]["resource_type"]["id"] = "publication-dissertation"
225
+ if draft.data["metadata"]["resource_type"]["id"] == "publication-thesis":
226
+ draft.data["metadata"]["resource_type"]["id"] = "publication-dissertation"
227
+ for related_identifier in draft.data["metadata"].get("related_identifiers", []):
228
+ if (
229
+ related_identifier.get("resource_type", {}).get("id")
230
+ == "publication-thesis"
231
+ ):
232
+ related_identifier["resource_type"]["id"] = "publication-dissertation"
148
233
  updated_draft = records_service.update_draft(
149
234
  system_identity, draft.id, draft.data
150
235
  )
151
236
  secho(f"Draft <{updated_draft.id}> has been updated successfully.", fg="green")
152
237
 
153
- # Query records/drafts with resource type publication-thesis
154
- has_resource_type = dsl.Q(
155
- "query_string", query="metadata.resource_type.id:publication-thesis"
156
- )
157
-
158
238
  secho("Resource type update has started.", fg="green")
159
239
 
160
240
  run_upgrade(
161
- has_resource_type,
162
241
  migrate_resource_type_in_record,
163
242
  migrate_resource_type_in_draft,
164
243
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: invenio-app-rdm
3
- Version: 14.0.0b2.dev1
3
+ Version: 14.0.0b2.dev2
4
4
  Summary: Invenio Research Data Management.
5
5
  Home-page: https://github.com/inveniosoftware/invenio-app-rdm
6
6
  Author: CERN
@@ -101,6 +101,10 @@ https://inveniordm.docs.cern.ch
101
101
  Changes
102
102
  =======
103
103
 
104
+ Version v14.0.0b2.dev2 (released 2025-10-24)
105
+
106
+ - upgrade_scripts: v14: optimize scan to avoid scroll context overhead
107
+
104
108
  Version v14.0.0b2.dev1 (released 2025-10-21)
105
109
 
106
110
  - deposit-form: updated related works options vocab
@@ -1,4 +1,4 @@
1
- invenio_app_rdm/__init__.py,sha256=YKw26EOYsN7GAX40s95XuSugzggJs6SM10HE0xQ7P-s,704
1
+ invenio_app_rdm/__init__.py,sha256=_vvW0mLU8A2mkHzDXnANLUM4PlYFMn68veC_cQZKYCs,704
2
2
  invenio_app_rdm/cli.py,sha256=G6QqNU2W6n6ICtTMnpeKFXIsdorncDmVXwwwsGH5F2k,2746
3
3
  invenio_app_rdm/config.py,sha256=kxAliLtzXLRysO8qxWxoSt1FhOjuDfzUi8xr40BBIYE,53632
4
4
  invenio_app_rdm/ext.py,sha256=K7syn5CU5If7yOclFeNOCZX_u5q6VB7NJEQVm41mlng,5286
@@ -475,7 +475,7 @@ invenio_app_rdm/upgrade_scripts/fix_migrated_records_from_8_0_to_9_0.py,sha256=p
475
475
  invenio_app_rdm/upgrade_scripts/migrate_10_0_to_11_0.py,sha256=TX6FCWXY4qM4z7IYzDO5qaMTheo3zAjFrmR1sXaEf4U,1333
476
476
  invenio_app_rdm/upgrade_scripts/migrate_11_0_to_12_0.py,sha256=Tp7jfT2JHrYCFzF2qIYqG7yr7k-GhX2zkw61CWJGA78,6941
477
477
  invenio_app_rdm/upgrade_scripts/migrate_12_0_to_13_0.py,sha256=pyO68jyGyKXVTcja8tpi2XgNx_FxXk7JhgDTV-wx3xM,8205
478
- invenio_app_rdm/upgrade_scripts/migrate_13_0_to_14_0.py,sha256=TGq3Sfze6Sn97d45TloD7iyNHuaLK-O8O8tXJJiJytQ,7715
478
+ invenio_app_rdm/upgrade_scripts/migrate_13_0_to_14_0.py,sha256=-JyKmXQolkeh0dUKdEekVhVGTYZ4AncyjZdy4n5DIeQ,11356
479
479
  invenio_app_rdm/upgrade_scripts/migrate_1_0_records_to_2_0.py,sha256=mRDv_Ao5zMgA6X0aogMfvhspO1CIApKtDW_ziJp5fjI,3325
480
480
  invenio_app_rdm/upgrade_scripts/migrate_2_0_to_3_0.py,sha256=jL_2I61Q9qt3fjBzYYueeT4EMQ9FlNPxYE4nzDQbLEY,2698
481
481
  invenio_app_rdm/upgrade_scripts/migrate_3_0_to_4_0.py,sha256=BNjGufwLBvLHnu0gz5b_Are-FuxYjXlCtkLgNQckV3U,4768
@@ -496,9 +496,9 @@ invenio_app_rdm/users_ui/views/__init__.py,sha256=SMdY2NJj9GICfr3Xuok7qdNYVtA2bJ
496
496
  invenio_app_rdm/users_ui/views/dashboard.py,sha256=iUn2PrODAwb8ugmMosJKAjPhUzjCiWiAWoXQr9RUFuc,1793
497
497
  invenio_app_rdm/users_ui/views/ui.py,sha256=W_eXM8dLVIrNHQB2UEh37C9BYoHauft6RyvcDNFHovA,1742
498
498
  invenio_app_rdm/utils/files.py,sha256=CruDyO2gDVadSlWEJD-WHpWHeOQ0juh-Ei9jz3D9yjc,3923
499
- invenio_app_rdm-14.0.0b2.dev1.dist-info/licenses/LICENSE,sha256=AZXFHRrZa5s4m9DV7zZr4bPGTMUvcEPCodeV_AmFI8k,1204
500
- invenio_app_rdm-14.0.0b2.dev1.dist-info/METADATA,sha256=ythPvg1U9pqyZyITAJJTZPqMDJQ3_GB5HSNqvaNvcuA,19345
501
- invenio_app_rdm-14.0.0b2.dev1.dist-info/WHEEL,sha256=JNWh1Fm1UdwIQV075glCn4MVuCRs0sotJIq-J6rbxCU,109
502
- invenio_app_rdm-14.0.0b2.dev1.dist-info/entry_points.txt,sha256=MwtT1SN5saWOgTYhNb5y0YGA9VGAi0kXN0cykIfsb4U,2405
503
- invenio_app_rdm-14.0.0b2.dev1.dist-info/top_level.txt,sha256=quZejDUw2vLfKQboNIuVLJ9fxZifdnCT_s2PNf1dfmk,16
504
- invenio_app_rdm-14.0.0b2.dev1.dist-info/RECORD,,
499
+ invenio_app_rdm-14.0.0b2.dev2.dist-info/licenses/LICENSE,sha256=AZXFHRrZa5s4m9DV7zZr4bPGTMUvcEPCodeV_AmFI8k,1204
500
+ invenio_app_rdm-14.0.0b2.dev2.dist-info/METADATA,sha256=0hlSvkdwVNB33JLqW1cdDKSQ75MdsYT0wjIsERoreJY,19463
501
+ invenio_app_rdm-14.0.0b2.dev2.dist-info/WHEEL,sha256=JNWh1Fm1UdwIQV075glCn4MVuCRs0sotJIq-J6rbxCU,109
502
+ invenio_app_rdm-14.0.0b2.dev2.dist-info/entry_points.txt,sha256=MwtT1SN5saWOgTYhNb5y0YGA9VGAi0kXN0cykIfsb4U,2405
503
+ invenio_app_rdm-14.0.0b2.dev2.dist-info/top_level.txt,sha256=quZejDUw2vLfKQboNIuVLJ9fxZifdnCT_s2PNf1dfmk,16
504
+ invenio_app_rdm-14.0.0b2.dev2.dist-info/RECORD,,