folio-migration-tools 1.2.1__py3-none-any.whl → 1.9.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- folio_migration_tools/__init__.py +11 -0
- folio_migration_tools/__main__.py +169 -85
- folio_migration_tools/circulation_helper.py +96 -59
- folio_migration_tools/config_file_load.py +66 -0
- folio_migration_tools/custom_dict.py +6 -4
- folio_migration_tools/custom_exceptions.py +21 -19
- folio_migration_tools/extradata_writer.py +46 -0
- folio_migration_tools/folder_structure.py +63 -66
- folio_migration_tools/helper.py +29 -21
- folio_migration_tools/holdings_helper.py +57 -34
- folio_migration_tools/i18n_config.py +9 -0
- folio_migration_tools/library_configuration.py +173 -13
- folio_migration_tools/mapper_base.py +317 -106
- folio_migration_tools/mapping_file_transformation/courses_mapper.py +203 -0
- folio_migration_tools/mapping_file_transformation/holdings_mapper.py +83 -69
- folio_migration_tools/mapping_file_transformation/item_mapper.py +98 -94
- folio_migration_tools/mapping_file_transformation/manual_fee_fines_mapper.py +352 -0
- folio_migration_tools/mapping_file_transformation/mapping_file_mapper_base.py +702 -223
- folio_migration_tools/mapping_file_transformation/notes_mapper.py +90 -0
- folio_migration_tools/mapping_file_transformation/order_mapper.py +492 -0
- folio_migration_tools/mapping_file_transformation/organization_mapper.py +389 -0
- folio_migration_tools/mapping_file_transformation/ref_data_mapping.py +38 -27
- folio_migration_tools/mapping_file_transformation/user_mapper.py +149 -361
- folio_migration_tools/marc_rules_transformation/conditions.py +650 -246
- folio_migration_tools/marc_rules_transformation/holdings_statementsparser.py +292 -130
- folio_migration_tools/marc_rules_transformation/hrid_handler.py +244 -0
- folio_migration_tools/marc_rules_transformation/loc_language_codes.xml +20846 -0
- folio_migration_tools/marc_rules_transformation/marc_file_processor.py +300 -0
- folio_migration_tools/marc_rules_transformation/marc_reader_wrapper.py +136 -0
- folio_migration_tools/marc_rules_transformation/rules_mapper_authorities.py +241 -0
- folio_migration_tools/marc_rules_transformation/rules_mapper_base.py +681 -201
- folio_migration_tools/marc_rules_transformation/rules_mapper_bibs.py +395 -429
- folio_migration_tools/marc_rules_transformation/rules_mapper_holdings.py +531 -100
- folio_migration_tools/migration_report.py +85 -38
- folio_migration_tools/migration_tasks/__init__.py +1 -3
- folio_migration_tools/migration_tasks/authority_transformer.py +119 -0
- folio_migration_tools/migration_tasks/batch_poster.py +911 -198
- folio_migration_tools/migration_tasks/bibs_transformer.py +121 -116
- folio_migration_tools/migration_tasks/courses_migrator.py +192 -0
- folio_migration_tools/migration_tasks/holdings_csv_transformer.py +252 -247
- folio_migration_tools/migration_tasks/holdings_marc_transformer.py +321 -115
- folio_migration_tools/migration_tasks/items_transformer.py +264 -84
- folio_migration_tools/migration_tasks/loans_migrator.py +506 -195
- folio_migration_tools/migration_tasks/manual_fee_fines_transformer.py +187 -0
- folio_migration_tools/migration_tasks/migration_task_base.py +364 -74
- folio_migration_tools/migration_tasks/orders_transformer.py +373 -0
- folio_migration_tools/migration_tasks/organization_transformer.py +451 -0
- folio_migration_tools/migration_tasks/requests_migrator.py +130 -62
- folio_migration_tools/migration_tasks/reserves_migrator.py +253 -0
- folio_migration_tools/migration_tasks/user_transformer.py +180 -139
- folio_migration_tools/task_configuration.py +46 -0
- folio_migration_tools/test_infrastructure/__init__.py +0 -0
- folio_migration_tools/test_infrastructure/mocked_classes.py +406 -0
- folio_migration_tools/transaction_migration/legacy_loan.py +148 -34
- folio_migration_tools/transaction_migration/legacy_request.py +65 -25
- folio_migration_tools/transaction_migration/legacy_reserve.py +47 -0
- folio_migration_tools/transaction_migration/transaction_result.py +12 -1
- folio_migration_tools/translations/en.json +476 -0
- folio_migration_tools-1.9.10.dist-info/METADATA +169 -0
- folio_migration_tools-1.9.10.dist-info/RECORD +67 -0
- {folio_migration_tools-1.2.1.dist-info → folio_migration_tools-1.9.10.dist-info}/WHEEL +1 -2
- folio_migration_tools-1.9.10.dist-info/entry_points.txt +3 -0
- folio_migration_tools/generate_schemas.py +0 -46
- folio_migration_tools/mapping_file_transformation/mapping_file_mapping_base_impl.py +0 -44
- folio_migration_tools/mapping_file_transformation/user_mapper_base.py +0 -212
- folio_migration_tools/marc_rules_transformation/bibs_processor.py +0 -163
- folio_migration_tools/marc_rules_transformation/holdings_processor.py +0 -284
- folio_migration_tools/report_blurbs.py +0 -219
- folio_migration_tools/transaction_migration/legacy_fee_fine.py +0 -36
- folio_migration_tools-1.2.1.dist-info/METADATA +0 -134
- folio_migration_tools-1.2.1.dist-info/RECORD +0 -50
- folio_migration_tools-1.2.1.dist-info/top_level.txt +0 -1
- {folio_migration_tools-1.2.1.dist-info → folio_migration_tools-1.9.10.dist-info/licenses}/LICENSE +0 -0
|
@@ -1,14 +1,20 @@
|
|
|
1
|
-
|
|
1
|
+
import asyncio
|
|
2
|
+
import copy
|
|
2
3
|
import json
|
|
3
4
|
import logging
|
|
5
|
+
import re
|
|
4
6
|
import sys
|
|
5
7
|
import time
|
|
6
8
|
import traceback
|
|
7
|
-
from datetime import datetime
|
|
9
|
+
from datetime import datetime, timezone
|
|
10
|
+
from typing import Annotated, List, Optional
|
|
8
11
|
from uuid import uuid4
|
|
9
|
-
|
|
10
|
-
import
|
|
12
|
+
|
|
13
|
+
import httpx
|
|
14
|
+
import i18n
|
|
11
15
|
from folio_uuid.folio_namespaces import FOLIONamespaces
|
|
16
|
+
from pydantic import Field
|
|
17
|
+
|
|
12
18
|
from folio_migration_tools.custom_exceptions import (
|
|
13
19
|
TransformationProcessError,
|
|
14
20
|
TransformationRecordFailedError,
|
|
@@ -17,8 +23,9 @@ from folio_migration_tools.library_configuration import (
|
|
|
17
23
|
FileDefinition,
|
|
18
24
|
LibraryConfiguration,
|
|
19
25
|
)
|
|
26
|
+
from folio_migration_tools.migration_report import MigrationReport
|
|
20
27
|
from folio_migration_tools.migration_tasks.migration_task_base import MigrationTaskBase
|
|
21
|
-
from
|
|
28
|
+
from folio_migration_tools.task_configuration import AbstractTaskConfiguration
|
|
22
29
|
|
|
23
30
|
|
|
24
31
|
def write_failed_batch_to_file(batch, file):
|
|
@@ -27,13 +34,169 @@ def write_failed_batch_to_file(batch, file):
|
|
|
27
34
|
|
|
28
35
|
|
|
29
36
|
class BatchPoster(MigrationTaskBase):
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
+
"""BatchPoster
|
|
38
|
+
|
|
39
|
+
Parents:
|
|
40
|
+
MigrationTaskBase (_type_): _description_
|
|
41
|
+
|
|
42
|
+
Raises:
|
|
43
|
+
ee: _description_
|
|
44
|
+
TransformationRecordFailedError: _description_
|
|
45
|
+
TransformationProcessError: _description_
|
|
46
|
+
TransformationRecordFailedError: _description_
|
|
47
|
+
TransformationRecordFailedError: _description_
|
|
48
|
+
TransformationProcessError: _description_
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
_type_: _description_
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
class TaskConfiguration(AbstractTaskConfiguration):
|
|
55
|
+
name: Annotated[
|
|
56
|
+
str,
|
|
57
|
+
Field(
|
|
58
|
+
title="Task name",
|
|
59
|
+
description="The name of the task",
|
|
60
|
+
),
|
|
61
|
+
]
|
|
62
|
+
migration_task_type: Annotated[
|
|
63
|
+
str,
|
|
64
|
+
Field(
|
|
65
|
+
title="Migration task type",
|
|
66
|
+
description="The type of migration task",
|
|
67
|
+
),
|
|
68
|
+
]
|
|
69
|
+
object_type: Annotated[
|
|
70
|
+
str,
|
|
71
|
+
Field(
|
|
72
|
+
title="Object type",
|
|
73
|
+
description=(
|
|
74
|
+
"The type of object being migrated"
|
|
75
|
+
"Examples of possible values: "
|
|
76
|
+
"'Extradata', 'SRS', Instances', 'Holdings', 'Items'"
|
|
77
|
+
),
|
|
78
|
+
),
|
|
79
|
+
]
|
|
80
|
+
files: Annotated[
|
|
81
|
+
List[FileDefinition],
|
|
82
|
+
Field(
|
|
83
|
+
title="List of files",
|
|
84
|
+
description="List of files to be processed",
|
|
85
|
+
),
|
|
86
|
+
]
|
|
87
|
+
batch_size: Annotated[
|
|
88
|
+
int,
|
|
89
|
+
Field(
|
|
90
|
+
title="Batch size",
|
|
91
|
+
description="The batch size for processing files",
|
|
92
|
+
),
|
|
93
|
+
]
|
|
94
|
+
rerun_failed_records: Annotated[
|
|
95
|
+
bool,
|
|
96
|
+
Field(
|
|
97
|
+
title="Rerun failed records",
|
|
98
|
+
description=(
|
|
99
|
+
"Toggles whether or not BatchPoster should try to rerun "
|
|
100
|
+
"failed batches or just leave the failing records on disk."
|
|
101
|
+
),
|
|
102
|
+
),
|
|
103
|
+
] = True
|
|
104
|
+
use_safe_inventory_endpoints: Annotated[
|
|
105
|
+
bool,
|
|
106
|
+
Field(
|
|
107
|
+
title="Use safe inventory endpoints",
|
|
108
|
+
description=(
|
|
109
|
+
"Toggles the use of the safe/unsafe Inventory storage "
|
|
110
|
+
"endpoints. Unsafe circumvents the Optimistic locking "
|
|
111
|
+
"in FOLIO. Defaults to True (using the 'safe' options)"
|
|
112
|
+
),
|
|
113
|
+
),
|
|
114
|
+
] = True
|
|
115
|
+
extradata_endpoints: Annotated[
|
|
116
|
+
dict,
|
|
117
|
+
Field(
|
|
118
|
+
title="Extradata endpoints",
|
|
119
|
+
description=(
|
|
120
|
+
"A dictionary of extradata endpoints. "
|
|
121
|
+
"The key is the object type and the value is the endpoint"
|
|
122
|
+
),
|
|
123
|
+
),
|
|
124
|
+
] = {}
|
|
125
|
+
upsert: Annotated[
|
|
126
|
+
bool,
|
|
127
|
+
Field(
|
|
128
|
+
title="Upsert",
|
|
129
|
+
description=(
|
|
130
|
+
"Toggles whether or not to use the upsert feature "
|
|
131
|
+
"of the Inventory storage endpoints. Defaults to False"
|
|
132
|
+
),
|
|
133
|
+
),
|
|
134
|
+
] = False
|
|
135
|
+
preserve_statistical_codes: Annotated[
|
|
136
|
+
bool,
|
|
137
|
+
Field(
|
|
138
|
+
title="Preserve statistical codes",
|
|
139
|
+
description=(
|
|
140
|
+
"Toggles whether or not to preserve statistical codes "
|
|
141
|
+
"during the upsert process. Defaults to False"
|
|
142
|
+
),
|
|
143
|
+
),
|
|
144
|
+
] = False
|
|
145
|
+
preserve_administrative_notes: Annotated[
|
|
146
|
+
bool,
|
|
147
|
+
Field(
|
|
148
|
+
title="Preserve administrative notes",
|
|
149
|
+
description=(
|
|
150
|
+
"Toggles whether or not to preserve administrative notes "
|
|
151
|
+
"during the upsert process. Defaults to False"
|
|
152
|
+
),
|
|
153
|
+
),
|
|
154
|
+
] = False
|
|
155
|
+
preserve_temporary_locations: Annotated[
|
|
156
|
+
bool,
|
|
157
|
+
Field(
|
|
158
|
+
title="Preserve temporary locations",
|
|
159
|
+
description=(
|
|
160
|
+
"Toggles whether or not to preserve temporary locations "
|
|
161
|
+
"on items during the upsert process. Defaults to False"
|
|
162
|
+
),
|
|
163
|
+
),
|
|
164
|
+
] = False
|
|
165
|
+
preserve_temporary_loan_types: Annotated[
|
|
166
|
+
bool,
|
|
167
|
+
Field(
|
|
168
|
+
title="Preserve temporary loan types",
|
|
169
|
+
description=(
|
|
170
|
+
"Toggles whether or not to preserve temporary loan types "
|
|
171
|
+
"on items during the upsert process. Defaults to False"
|
|
172
|
+
),
|
|
173
|
+
),
|
|
174
|
+
] = False
|
|
175
|
+
preserve_item_status: Annotated[
|
|
176
|
+
bool,
|
|
177
|
+
Field(
|
|
178
|
+
title="Preserve item status",
|
|
179
|
+
description=(
|
|
180
|
+
"Toggles whether or not to preserve item status "
|
|
181
|
+
"on items during the upsert process. Defaults to False"
|
|
182
|
+
),
|
|
183
|
+
),
|
|
184
|
+
] = True
|
|
185
|
+
patch_existing_records: Annotated[bool, Field(
|
|
186
|
+
title="Patch existing records",
|
|
187
|
+
description=(
|
|
188
|
+
"Toggles whether or not to patch existing records "
|
|
189
|
+
"during the upsert process. Defaults to False"
|
|
190
|
+
),
|
|
191
|
+
)] = False
|
|
192
|
+
patch_paths: Annotated[List[str], Field(
|
|
193
|
+
title="Patch paths",
|
|
194
|
+
description=(
|
|
195
|
+
"A list of fields in JSON Path notation to patch during the upsert process (leave off the $). If empty, all fields will be patched. Examples: ['statisticalCodeIds', 'administrativeNotes', 'instanceStatusId']"
|
|
196
|
+
),
|
|
197
|
+
)] = []
|
|
198
|
+
|
|
199
|
+
task_configuration: TaskConfiguration
|
|
37
200
|
|
|
38
201
|
@staticmethod
|
|
39
202
|
def get_object_type() -> FOLIONamespaces:
|
|
@@ -43,98 +206,321 @@ class BatchPoster(MigrationTaskBase):
|
|
|
43
206
|
self,
|
|
44
207
|
task_config: TaskConfiguration,
|
|
45
208
|
library_config: LibraryConfiguration,
|
|
209
|
+
folio_client,
|
|
46
210
|
use_logging: bool = True,
|
|
47
211
|
):
|
|
48
|
-
super().__init__(library_config, task_config, use_logging)
|
|
49
|
-
self.
|
|
50
|
-
self.
|
|
212
|
+
super().__init__(library_config, task_config, folio_client, use_logging)
|
|
213
|
+
self.migration_report = MigrationReport()
|
|
214
|
+
self.performing_rerun = False
|
|
215
|
+
self.failed_ids: list = []
|
|
51
216
|
self.first_batch = True
|
|
52
|
-
self.
|
|
217
|
+
self.api_info = get_api_info(
|
|
218
|
+
self.task_configuration.object_type,
|
|
219
|
+
self.task_configuration.use_safe_inventory_endpoints,
|
|
220
|
+
)
|
|
221
|
+
self.query_params = {}
|
|
222
|
+
if self.api_info["supports_upsert"]:
|
|
223
|
+
self.query_params["upsert"] = self.task_configuration.upsert
|
|
224
|
+
elif self.task_configuration.upsert and not self.api_info["supports_upsert"]:
|
|
225
|
+
logging.info(
|
|
226
|
+
"Upsert is not supported for this object type. Query parameter will not be set.")
|
|
53
227
|
self.snapshot_id = str(uuid4())
|
|
54
|
-
self.failed_objects = []
|
|
55
|
-
self.batch_size = self.
|
|
228
|
+
self.failed_objects: list = []
|
|
229
|
+
self.batch_size = self.task_configuration.batch_size
|
|
56
230
|
logging.info("Batch size is %s", self.batch_size)
|
|
57
231
|
self.processed = 0
|
|
58
232
|
self.failed_batches = 0
|
|
59
|
-
self.failed_records = 0
|
|
60
233
|
self.users_created = 0
|
|
61
234
|
self.users_updated = 0
|
|
62
|
-
self.users_per_group = {}
|
|
63
|
-
self.failed_fields = set()
|
|
235
|
+
self.users_per_group: dict = {}
|
|
236
|
+
self.failed_fields: set = set()
|
|
64
237
|
self.num_failures = 0
|
|
65
238
|
self.num_posted = 0
|
|
239
|
+
self.okapi_headers = self.folio_client.okapi_headers
|
|
240
|
+
self.http_client = None
|
|
241
|
+
self.starting_record_count_in_folio: Optional[int] = None
|
|
242
|
+
self.finished_record_count_in_folio: Optional[int] = None
|
|
66
243
|
|
|
67
244
|
def do_work(self):
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
245
|
+
with self.folio_client.get_folio_http_client() as httpx_client:
|
|
246
|
+
self.http_client = httpx_client
|
|
247
|
+
with open(
|
|
248
|
+
self.folder_structure.failed_recs_path, "w", encoding='utf-8'
|
|
249
|
+
) as failed_recs_file:
|
|
250
|
+
self.get_starting_record_count()
|
|
251
|
+
try:
|
|
252
|
+
batch = []
|
|
253
|
+
if self.task_configuration.object_type == "SRS":
|
|
254
|
+
self.create_snapshot()
|
|
255
|
+
for idx, file_def in enumerate(self.task_configuration.files):
|
|
256
|
+
path = self.folder_structure.results_folder / file_def.file_name
|
|
257
|
+
with open(path) as rows:
|
|
258
|
+
logging.info("Running %s", path)
|
|
259
|
+
last_row = ""
|
|
260
|
+
for self.processed, row in enumerate(rows, start=1):
|
|
261
|
+
last_row = row
|
|
262
|
+
if row.strip():
|
|
263
|
+
try:
|
|
264
|
+
if self.task_configuration.object_type == "Extradata":
|
|
265
|
+
self.post_extra_data(
|
|
266
|
+
row, self.processed, failed_recs_file
|
|
267
|
+
)
|
|
268
|
+
elif not self.api_info["is_batch"]:
|
|
269
|
+
self.post_single_records(
|
|
270
|
+
row, self.processed, failed_recs_file
|
|
93
271
|
)
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
batch, failed_recs_file, num_records
|
|
272
|
+
else:
|
|
273
|
+
batch = self.post_record_batch(
|
|
274
|
+
batch, failed_recs_file, row
|
|
98
275
|
)
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
self.
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
276
|
+
except UnicodeDecodeError as unicode_error:
|
|
277
|
+
self.handle_unicode_error(unicode_error, last_row)
|
|
278
|
+
except TransformationProcessError as tpe:
|
|
279
|
+
self.handle_generic_exception(
|
|
280
|
+
tpe,
|
|
281
|
+
last_row,
|
|
282
|
+
batch,
|
|
283
|
+
self.processed,
|
|
284
|
+
failed_recs_file,
|
|
285
|
+
)
|
|
286
|
+
batch = []
|
|
287
|
+
raise
|
|
288
|
+
except TransformationRecordFailedError as exception:
|
|
289
|
+
self.handle_generic_exception(
|
|
290
|
+
exception,
|
|
291
|
+
last_row,
|
|
292
|
+
batch,
|
|
293
|
+
self.processed,
|
|
294
|
+
failed_recs_file,
|
|
295
|
+
)
|
|
296
|
+
batch = []
|
|
297
|
+
except (FileNotFoundError, PermissionError) as ose:
|
|
298
|
+
logging.error("Error reading file: %s", ose)
|
|
299
|
+
|
|
300
|
+
except Exception as ee:
|
|
301
|
+
if "idx" in locals() and self.task_configuration.files[idx:]:
|
|
302
|
+
for file_def in self.task_configuration.files[idx:]:
|
|
303
|
+
path = self.folder_structure.results_folder / file_def.file_name
|
|
304
|
+
try:
|
|
305
|
+
with open(path, "r") as failed_file:
|
|
306
|
+
failed_file.seek(self.processed)
|
|
307
|
+
failed_recs_file.write(failed_file.read())
|
|
308
|
+
self.processed = 0
|
|
309
|
+
except (FileNotFoundError, PermissionError) as ose:
|
|
310
|
+
logging.error("Error reading file: %s", ose)
|
|
311
|
+
raise ee
|
|
312
|
+
finally:
|
|
313
|
+
if self.task_configuration.object_type != "Extradata" and any(batch):
|
|
314
|
+
try:
|
|
315
|
+
self.post_batch(batch, failed_recs_file, self.processed)
|
|
316
|
+
except Exception as exception:
|
|
317
|
+
self.handle_generic_exception(
|
|
318
|
+
exception, last_row, batch, self.processed, failed_recs_file
|
|
319
|
+
)
|
|
320
|
+
logging.info("Done posting %s records. ", self.processed)
|
|
321
|
+
if self.task_configuration.object_type == "SRS":
|
|
322
|
+
self.commit_snapshot()
|
|
323
|
+
|
|
324
|
+
@staticmethod
|
|
325
|
+
def set_consortium_source(json_rec):
|
|
326
|
+
if json_rec['source'] == 'MARC':
|
|
327
|
+
json_rec['source'] = 'CONSORTIUM-MARC'
|
|
328
|
+
elif json_rec['source'] == 'FOLIO':
|
|
329
|
+
json_rec['source'] = 'CONSORTIUM-FOLIO'
|
|
330
|
+
|
|
331
|
+
def set_version(self, batch, query_api, object_type) -> None:
|
|
332
|
+
"""
|
|
333
|
+
Synchronous wrapper for set_version_async
|
|
334
|
+
"""
|
|
335
|
+
try:
|
|
336
|
+
loop = asyncio.get_running_loop()
|
|
337
|
+
except RuntimeError:
|
|
338
|
+
loop = asyncio.new_event_loop()
|
|
339
|
+
asyncio.set_event_loop(loop)
|
|
340
|
+
loop.run_until_complete(self.set_version_async(batch, query_api, object_type))
|
|
341
|
+
asyncio.set_event_loop(None) # Reset the event loop
|
|
342
|
+
else:
|
|
343
|
+
loop.run_until_complete(self.set_version_async(batch, query_api, object_type))
|
|
344
|
+
|
|
345
|
+
async def set_version_async(self, batch, query_api, object_type) -> None:
|
|
346
|
+
"""
|
|
347
|
+
Fetches the current version of the records in the batch if the record exists in FOLIO
|
|
348
|
+
|
|
349
|
+
Args:
|
|
350
|
+
batch (list): List of records to fetch versions for
|
|
351
|
+
query_api (str): The query API endpoint to use
|
|
352
|
+
object_type (str): The key in the API response that contains the records
|
|
353
|
+
|
|
354
|
+
Returns:
|
|
355
|
+
None
|
|
356
|
+
"""
|
|
357
|
+
fetch_batch_size = 90
|
|
358
|
+
fetch_tasks = []
|
|
359
|
+
existing_records = {}
|
|
360
|
+
async with httpx.AsyncClient(base_url=self.folio_client.gateway_url) as client:
|
|
361
|
+
for i in range(0, len(batch), fetch_batch_size):
|
|
362
|
+
batch_slice = batch[i:i + fetch_batch_size]
|
|
363
|
+
fetch_tasks.append(
|
|
364
|
+
self.get_with_retry(
|
|
365
|
+
client,
|
|
366
|
+
query_api,
|
|
367
|
+
params={
|
|
368
|
+
"query": (
|
|
369
|
+
"id==("
|
|
370
|
+
f"{' OR '.join([r['id'] for r in batch_slice if 'id' in r])})"
|
|
371
|
+
),
|
|
372
|
+
"limit": fetch_batch_size
|
|
373
|
+
},
|
|
374
|
+
)
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
responses = await asyncio.gather(*fetch_tasks)
|
|
378
|
+
|
|
379
|
+
for response in responses:
|
|
380
|
+
self.collect_existing_records_for_upsert(object_type, response, existing_records)
|
|
381
|
+
for record in batch:
|
|
382
|
+
if record["id"] in existing_records:
|
|
383
|
+
self.prepare_record_for_upsert(record, existing_records[record["id"]])
|
|
384
|
+
|
|
385
|
+
def patch_record(self, new_record: dict, existing_record: dict, patch_paths: List[str]):
|
|
386
|
+
"""
|
|
387
|
+
Updates new_record with values from existing_record according to patch_paths.
|
|
388
|
+
|
|
389
|
+
Args:
|
|
390
|
+
new_record (dict): The new record to be updated.
|
|
391
|
+
existing_record (dict): The existing record to patch from.
|
|
392
|
+
patch_paths (List[str]): List of fields in JSON Path notation (e.g., ['statisticalCodeIds', 'administrativeNotes', 'instanceStatusId']) to patch during the upsert process. If empty, all fields will be patched.
|
|
393
|
+
"""
|
|
394
|
+
updates = {}
|
|
395
|
+
updates.update(existing_record)
|
|
396
|
+
keep_existing = {}
|
|
397
|
+
self.handle_upsert_for_administrative_notes(updates, keep_existing)
|
|
398
|
+
self.handle_upsert_for_statistical_codes(updates, keep_existing)
|
|
399
|
+
if not patch_paths:
|
|
400
|
+
keep_new = new_record
|
|
401
|
+
else:
|
|
402
|
+
keep_new = extract_paths(new_record, patch_paths)
|
|
403
|
+
if "instanceStatusId" in new_record:
|
|
404
|
+
updates["instanceStatusId"] = new_record["instanceStatusId"]
|
|
405
|
+
deep_update(updates, keep_new)
|
|
406
|
+
for key, value in keep_existing.items():
|
|
407
|
+
if isinstance(value, list) and key in keep_new:
|
|
408
|
+
updates[key] = list(dict.fromkeys(updates.get(key, []) + value))
|
|
409
|
+
elif key not in keep_new:
|
|
410
|
+
updates[key] = value
|
|
411
|
+
new_record.clear()
|
|
412
|
+
new_record.update(updates)
|
|
413
|
+
|
|
414
|
+
@staticmethod
|
|
415
|
+
def collect_existing_records_for_upsert(object_type: str, response: httpx.Response, existing_records: dict):
|
|
416
|
+
if response.status_code == 200:
|
|
417
|
+
response_json = response.json()
|
|
418
|
+
for record in response_json[object_type]:
|
|
419
|
+
existing_records[record["id"]] = record
|
|
420
|
+
else:
|
|
421
|
+
logging.error(
|
|
422
|
+
"Failed to fetch current records. HTTP %s\t%s",
|
|
423
|
+
response.status_code,
|
|
424
|
+
response.text,
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
def handle_upsert_for_statistical_codes(self, updates: dict, keep_existing: dict):
|
|
428
|
+
if not self.task_configuration.preserve_statistical_codes:
|
|
429
|
+
updates["statisticalCodeIds"] = []
|
|
430
|
+
keep_existing["statisticalCodeIds"] = []
|
|
431
|
+
else:
|
|
432
|
+
keep_existing["statisticalCodeIds"] = updates.pop("statisticalCodeIds", [])
|
|
433
|
+
updates["statisticalCodeIds"] = []
|
|
434
|
+
|
|
435
|
+
def handle_upsert_for_administrative_notes(self, updates: dict, keep_existing: dict):
|
|
436
|
+
if not self.task_configuration.preserve_administrative_notes:
|
|
437
|
+
updates["administrativeNotes"] = []
|
|
438
|
+
keep_existing["administrativeNotes"] = []
|
|
439
|
+
else:
|
|
440
|
+
keep_existing["administrativeNotes"] = updates.pop("administrativeNotes", [])
|
|
441
|
+
updates["administrativeNotes"] = []
|
|
442
|
+
|
|
443
|
+
def handle_upsert_for_temporary_locations(self, updates: dict, keep_existing: dict):
|
|
444
|
+
if self.task_configuration.preserve_temporary_locations:
|
|
445
|
+
keep_existing["temporaryLocationId"] = updates.pop("temporaryLocationId", None)
|
|
446
|
+
|
|
447
|
+
def handle_upsert_for_temporary_loan_types(self, updates: dict, keep_existing: dict):
|
|
448
|
+
if self.task_configuration.preserve_temporary_loan_types:
|
|
449
|
+
keep_existing["temporaryLoanTypeId"] = updates.pop("temporaryLoanTypeId", None)
|
|
450
|
+
|
|
451
|
+
def keep_existing_fields(self, updates: dict, existing_record: dict):
|
|
452
|
+
keep_existing_fields = ["hrid", "lastCheckIn"]
|
|
453
|
+
if self.task_configuration.preserve_item_status:
|
|
454
|
+
keep_existing_fields.append("status")
|
|
455
|
+
for key in keep_existing_fields:
|
|
456
|
+
if key in existing_record:
|
|
457
|
+
updates[key] = existing_record[key]
|
|
458
|
+
|
|
459
|
+
def prepare_record_for_upsert(self, new_record: dict, existing_record: dict):
|
|
460
|
+
if "source" in existing_record and "MARC" in existing_record["source"]:
|
|
461
|
+
if self.task_configuration.patch_paths:
|
|
462
|
+
logging.debug(
|
|
463
|
+
"Record %s is a MARC record, patch_paths will be ignored",
|
|
464
|
+
existing_record["id"],
|
|
465
|
+
)
|
|
466
|
+
self.patch_record(new_record, existing_record, ["statisticalCodeIds", "administrativeNotes", "instanceStatusId"])
|
|
467
|
+
elif self.task_configuration.patch_existing_records:
|
|
468
|
+
self.patch_record(new_record, existing_record, self.task_configuration.patch_paths)
|
|
469
|
+
else:
|
|
470
|
+
updates = {
|
|
471
|
+
"_version": existing_record["_version"],
|
|
472
|
+
}
|
|
473
|
+
self.keep_existing_fields(updates, existing_record)
|
|
474
|
+
keep_new = {k: v for k, v in new_record.items() if k in ["statisticalCodeIds", "administrativeNotes"]}
|
|
475
|
+
keep_existing = {}
|
|
476
|
+
self.handle_upsert_for_statistical_codes(existing_record, keep_existing)
|
|
477
|
+
self.handle_upsert_for_administrative_notes(existing_record, keep_existing)
|
|
478
|
+
self.handle_upsert_for_temporary_locations(existing_record, keep_existing)
|
|
479
|
+
self.handle_upsert_for_temporary_loan_types(existing_record, keep_existing)
|
|
480
|
+
for k, v in keep_existing.items():
|
|
481
|
+
if isinstance(v, list) and k in keep_new:
|
|
482
|
+
keep_new[k] = list(dict.fromkeys(v + keep_new.get(k, [])))
|
|
483
|
+
elif k not in keep_new:
|
|
484
|
+
keep_new[k] = v
|
|
485
|
+
updates.update(keep_new)
|
|
486
|
+
new_record.update(updates)
|
|
487
|
+
|
|
488
|
+
async def get_with_retry(self, client: httpx.AsyncClient, url: str, params=None):
|
|
489
|
+
if params is None:
|
|
490
|
+
params = {}
|
|
491
|
+
retries = 3
|
|
492
|
+
for attempt in range(retries):
|
|
493
|
+
try:
|
|
494
|
+
response = await client.get(
|
|
495
|
+
url, params=params, headers=self.folio_client.okapi_headers)
|
|
496
|
+
response.raise_for_status()
|
|
497
|
+
return response
|
|
498
|
+
except httpx.HTTPError as e:
|
|
499
|
+
if attempt < retries - 1:
|
|
500
|
+
logging.warning(f"Retrying due to {e}")
|
|
501
|
+
await asyncio.sleep(2 ** attempt)
|
|
502
|
+
else:
|
|
503
|
+
logging.error(f"Failed to connect after {retries} attempts: {e}")
|
|
504
|
+
raise
|
|
505
|
+
|
|
506
|
+
def post_record_batch(self, batch, failed_recs_file, row):
|
|
507
|
+
json_rec = json.loads(row.split("\t")[-1])
|
|
508
|
+
if self.task_configuration.object_type == "ShadowInstances":
|
|
509
|
+
self.set_consortium_source(json_rec)
|
|
510
|
+
if self.task_configuration.object_type == "SRS":
|
|
511
|
+
json_rec["snapshotId"] = self.snapshot_id
|
|
512
|
+
if self.processed == 1:
|
|
513
|
+
logging.info(json.dumps(json_rec, indent=True))
|
|
514
|
+
batch.append(json_rec)
|
|
515
|
+
if len(batch) == int(self.batch_size):
|
|
516
|
+
self.post_batch(batch, failed_recs_file, self.processed)
|
|
517
|
+
batch = []
|
|
518
|
+
return batch
|
|
133
519
|
|
|
134
520
|
def post_extra_data(self, row: str, num_records: int, failed_recs_file):
|
|
135
521
|
(object_name, data) = row.split("\t")
|
|
136
|
-
endpoint = get_extradata_endpoint(object_name)
|
|
137
|
-
url = f"{self.folio_client.
|
|
522
|
+
endpoint = self.get_extradata_endpoint(self.task_configuration, object_name, data)
|
|
523
|
+
url = f"{self.folio_client.gateway_url}/{endpoint}"
|
|
138
524
|
body = data
|
|
139
525
|
response = self.post_objects(url, body)
|
|
140
526
|
if response.status_code == 201:
|
|
@@ -142,19 +528,66 @@ class BatchPoster(MigrationTaskBase):
|
|
|
142
528
|
elif response.status_code == 422:
|
|
143
529
|
self.num_failures += 1
|
|
144
530
|
error_msg = json.loads(response.text)["errors"][0]["message"]
|
|
145
|
-
logging.error(
|
|
146
|
-
|
|
147
|
-
)
|
|
148
|
-
if (
|
|
149
|
-
"id value already exists"
|
|
150
|
-
not in json.loads(response.text)["errors"][0]["message"]
|
|
151
|
-
):
|
|
531
|
+
logging.error("Row %s\tHTTP %s\t %s", num_records, response.status_code, error_msg)
|
|
532
|
+
if "id value already exists" not in json.loads(response.text)["errors"][0]["message"]:
|
|
152
533
|
failed_recs_file.write(row)
|
|
153
534
|
else:
|
|
154
535
|
self.num_failures += 1
|
|
155
|
-
logging.error(
|
|
156
|
-
|
|
536
|
+
logging.error("Row %s\tHTTP %s\t%s", num_records, response.status_code, response.text)
|
|
537
|
+
failed_recs_file.write(row)
|
|
538
|
+
if num_records % 50 == 0:
|
|
539
|
+
logging.info(
|
|
540
|
+
"%s records posted successfully. %s failed",
|
|
541
|
+
self.num_posted,
|
|
542
|
+
self.num_failures,
|
|
157
543
|
)
|
|
544
|
+
|
|
545
|
+
@staticmethod
|
|
546
|
+
def get_extradata_endpoint(
|
|
547
|
+
task_configuration: TaskConfiguration, object_name: str, string_object: str
|
|
548
|
+
):
|
|
549
|
+
object_types = {
|
|
550
|
+
"precedingSucceedingTitles": "preceding-succeeding-titles",
|
|
551
|
+
"precedingTitles": "preceding-succeeding-titles",
|
|
552
|
+
"succeedingTitles": "preceding-succeeding-titles",
|
|
553
|
+
"boundwithPart": "inventory-storage/bound-with-parts",
|
|
554
|
+
"notes": "notes",
|
|
555
|
+
"course": "coursereserves/courses",
|
|
556
|
+
"courselisting": "coursereserves/courselistings",
|
|
557
|
+
"contacts": "organizations-storage/contacts",
|
|
558
|
+
"interfaces": "organizations-storage/interfaces",
|
|
559
|
+
"account": "accounts",
|
|
560
|
+
"feefineaction": "feefineactions",
|
|
561
|
+
"bankInfo": "organizations/banking-information",
|
|
562
|
+
}
|
|
563
|
+
object_types.update(task_configuration.extradata_endpoints)
|
|
564
|
+
if object_name == "instructor":
|
|
565
|
+
instructor = json.loads(string_object)
|
|
566
|
+
return f'coursereserves/courselistings/{instructor["courseListingId"]}/instructors'
|
|
567
|
+
|
|
568
|
+
if object_name == "interfaceCredential":
|
|
569
|
+
credential = json.loads(string_object)
|
|
570
|
+
return f'organizations-storage/interfaces/{credential["interfaceId"]}/credentials'
|
|
571
|
+
|
|
572
|
+
return object_types[object_name]
|
|
573
|
+
|
|
574
|
+
def post_single_records(self, row: str, num_records: int, failed_recs_file):
|
|
575
|
+
if self.api_info["is_batch"]:
|
|
576
|
+
raise TypeError("This record type supports batch processing, use post_batch method")
|
|
577
|
+
api_endpoint = self.api_info.get("api_endpoint")
|
|
578
|
+
url = f"{self.folio_client.gateway_url}{api_endpoint}"
|
|
579
|
+
response = self.post_objects(url, row)
|
|
580
|
+
if response.status_code == 201:
|
|
581
|
+
self.num_posted += 1
|
|
582
|
+
elif response.status_code == 422:
|
|
583
|
+
self.num_failures += 1
|
|
584
|
+
error_msg = json.loads(response.text)["errors"][0]["message"]
|
|
585
|
+
logging.error("Row %s\tHTTP %s\t %s", num_records, response.status_code, error_msg)
|
|
586
|
+
if "id value already exists" not in json.loads(response.text)["errors"][0]["message"]:
|
|
587
|
+
failed_recs_file.write(row)
|
|
588
|
+
else:
|
|
589
|
+
self.num_failures += 1
|
|
590
|
+
logging.error("Row %s\tHTTP %s\t%s", num_records, response.status_code, response.text)
|
|
158
591
|
failed_recs_file.write(row)
|
|
159
592
|
if num_records % 50 == 0:
|
|
160
593
|
logging.info(
|
|
@@ -164,21 +597,23 @@ class BatchPoster(MigrationTaskBase):
|
|
|
164
597
|
)
|
|
165
598
|
|
|
166
599
|
def post_objects(self, url, body):
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
600
|
+
if self.http_client and not self.http_client.is_closed:
|
|
601
|
+
return self.http_client.post(
|
|
602
|
+
url, data=body.encode("utf-8"), headers=self.folio_client.okapi_headers
|
|
603
|
+
)
|
|
604
|
+
else:
|
|
605
|
+
return httpx.post(
|
|
606
|
+
url, headers=self.okapi_headers, data=body.encode("utf-8"), timeout=None
|
|
607
|
+
)
|
|
170
608
|
|
|
171
|
-
def handle_generic_exception(
|
|
172
|
-
self, exception, last_row, batch, num_records, failed_recs_file
|
|
173
|
-
):
|
|
609
|
+
def handle_generic_exception(self, exception, last_row, batch, num_records, failed_recs_file):
|
|
174
610
|
logging.error("%s", exception)
|
|
611
|
+
self.migration_report.add("Details", i18n.t("Generic exceptions (see log for details)"))
|
|
175
612
|
# logging.error("Failed row: %s", last_row)
|
|
176
613
|
self.failed_batches += 1
|
|
177
|
-
self.
|
|
614
|
+
self.num_failures += len(batch)
|
|
178
615
|
write_failed_batch_to_file(batch, failed_recs_file)
|
|
179
|
-
logging.info(
|
|
180
|
-
"Resetting batch...Number of failed batches: %s", self.failed_batches
|
|
181
|
-
)
|
|
616
|
+
logging.info("Resetting batch...Number of failed batches: %s", self.failed_batches)
|
|
182
617
|
batch = []
|
|
183
618
|
if self.failed_batches > 50000:
|
|
184
619
|
logging.error("Exceeded number of failed batches at row %s", num_records)
|
|
@@ -186,6 +621,7 @@ class BatchPoster(MigrationTaskBase):
|
|
|
186
621
|
sys.exit(1)
|
|
187
622
|
|
|
188
623
|
def handle_unicode_error(self, unicode_error, last_row):
|
|
624
|
+
self.migration_report.add("Details", i18n.t("Encoding errors"))
|
|
189
625
|
logging.info("=========ERROR==============")
|
|
190
626
|
logging.info(
|
|
191
627
|
"%s Posting failed. Encoding error reading file",
|
|
@@ -193,15 +629,21 @@ class BatchPoster(MigrationTaskBase):
|
|
|
193
629
|
)
|
|
194
630
|
logging.info(
|
|
195
631
|
"Failing row, either the one shown here or the next row in %s",
|
|
196
|
-
self.
|
|
632
|
+
self.task_configuration.file.file_name,
|
|
197
633
|
)
|
|
198
634
|
logging.info(last_row)
|
|
199
635
|
logging.info("=========Stack trace==============")
|
|
200
|
-
traceback.logging.info_exc()
|
|
201
|
-
logging.info("======================="
|
|
636
|
+
traceback.logging.info_exc() # type: ignore
|
|
637
|
+
logging.info("=======================")
|
|
202
638
|
|
|
203
639
|
def post_batch(self, batch, failed_recs_file, num_records, recursion_depth=0):
|
|
640
|
+
if self.query_params.get("upsert", False) and self.api_info.get("query_endpoint", ""):
|
|
641
|
+
self.set_version(batch, self.api_info['query_endpoint'], self.api_info['object_name'])
|
|
204
642
|
response = self.do_post(batch)
|
|
643
|
+
if response.status_code == 401:
|
|
644
|
+
logging.error("Authorization failed (%s). Fetching new auth token...", response.text)
|
|
645
|
+
self.folio_client.login()
|
|
646
|
+
response = self.do_post(batch)
|
|
205
647
|
if response.status_code == 201:
|
|
206
648
|
logging.info(
|
|
207
649
|
(
|
|
@@ -210,7 +652,7 @@ class BatchPoster(MigrationTaskBase):
|
|
|
210
652
|
"Batch Size: %s Request size: %s "
|
|
211
653
|
),
|
|
212
654
|
num_records,
|
|
213
|
-
self.
|
|
655
|
+
self.num_failures,
|
|
214
656
|
response.elapsed.total_seconds(),
|
|
215
657
|
len(batch),
|
|
216
658
|
get_req_size(response),
|
|
@@ -219,7 +661,8 @@ class BatchPoster(MigrationTaskBase):
|
|
|
219
661
|
json_report = json.loads(response.text)
|
|
220
662
|
self.users_created += json_report.get("createdRecords", 0)
|
|
221
663
|
self.users_updated += json_report.get("updatedRecords", 0)
|
|
222
|
-
self.
|
|
664
|
+
self.num_posted = self.users_updated + self.users_created
|
|
665
|
+
self.num_failures += json_report.get("failedRecords", 0)
|
|
223
666
|
if json_report.get("failedRecords", 0) > 0:
|
|
224
667
|
logging.error(
|
|
225
668
|
"%s users in batch failed to load",
|
|
@@ -227,7 +670,7 @@ class BatchPoster(MigrationTaskBase):
|
|
|
227
670
|
)
|
|
228
671
|
write_failed_batch_to_file(batch, failed_recs_file)
|
|
229
672
|
if json_report.get("failedUsers", []):
|
|
230
|
-
logging.error("
|
|
673
|
+
logging.error("Error message: %s", json_report.get("error", []))
|
|
231
674
|
for failed_user in json_report.get("failedUsers"):
|
|
232
675
|
logging.error(
|
|
233
676
|
"User failed. %s\t%s\t%s",
|
|
@@ -235,6 +678,7 @@ class BatchPoster(MigrationTaskBase):
|
|
|
235
678
|
failed_user.get("externalSystemId", ""),
|
|
236
679
|
failed_user.get("errorMessage", ""),
|
|
237
680
|
)
|
|
681
|
+
self.migration_report.add("Details", failed_user.get("errorMessage", ""))
|
|
238
682
|
logging.info(
|
|
239
683
|
(
|
|
240
684
|
"Posting successful! Total rows: %s Total failed: %s "
|
|
@@ -242,7 +686,7 @@ class BatchPoster(MigrationTaskBase):
|
|
|
242
686
|
"Message from server: %s"
|
|
243
687
|
),
|
|
244
688
|
num_records,
|
|
245
|
-
self.
|
|
689
|
+
self.num_failures,
|
|
246
690
|
self.users_created,
|
|
247
691
|
self.users_updated,
|
|
248
692
|
response.elapsed.total_seconds(),
|
|
@@ -256,18 +700,16 @@ class BatchPoster(MigrationTaskBase):
|
|
|
256
700
|
"",
|
|
257
701
|
f"HTTP {response.status_code}\t"
|
|
258
702
|
f"Request size: {get_req_size(response)}"
|
|
259
|
-
f"{datetime.
|
|
703
|
+
f"{datetime.now(timezone.utc).isoformat()}\n",
|
|
260
704
|
json.dumps(resp, indent=4),
|
|
261
705
|
)
|
|
262
706
|
elif response.status_code == 400:
|
|
263
707
|
# Likely a json parsing error
|
|
264
708
|
logging.error(response.text)
|
|
265
|
-
raise TransformationProcessError(
|
|
266
|
-
|
|
267
|
-
)
|
|
268
|
-
elif self.task_config.object_type == "SRS" and response.status_code == 500:
|
|
709
|
+
raise TransformationProcessError("", "HTTP 400. Something is wrong. Quitting")
|
|
710
|
+
elif self.task_configuration.object_type == "SRS" and response.status_code >= 500:
|
|
269
711
|
logging.info(
|
|
270
|
-
"Post failed. Size: %s Waiting
|
|
712
|
+
"Post failed. Size: %s Waiting 30s until reposting. Number of tries: %s of 5",
|
|
271
713
|
get_req_size(response),
|
|
272
714
|
recursion_depth,
|
|
273
715
|
)
|
|
@@ -278,201 +720,390 @@ class BatchPoster(MigrationTaskBase):
|
|
|
278
720
|
"",
|
|
279
721
|
f"HTTP {response.status_code}\t"
|
|
280
722
|
f"Request size: {get_req_size(response)}"
|
|
281
|
-
f"{datetime.
|
|
723
|
+
f"{datetime.now(timezone.utc).isoformat()}\n",
|
|
282
724
|
response.text,
|
|
283
725
|
)
|
|
284
726
|
else:
|
|
285
|
-
self.post_batch(
|
|
286
|
-
|
|
287
|
-
|
|
727
|
+
self.post_batch(batch, failed_recs_file, num_records, recursion_depth + 1)
|
|
728
|
+
elif (
|
|
729
|
+
response.status_code == 413 and "DB_ALLOW_SUPPRESS_OPTIMISTIC_LOCKING" in response.text
|
|
730
|
+
):
|
|
731
|
+
logging.error(response.text)
|
|
732
|
+
raise TransformationProcessError("", response.text, "")
|
|
733
|
+
|
|
288
734
|
else:
|
|
289
735
|
try:
|
|
290
736
|
logging.info(response.text)
|
|
291
737
|
resp = json.dumps(response, indent=4)
|
|
292
|
-
except
|
|
293
|
-
|
|
738
|
+
except TypeError:
|
|
739
|
+
resp = response
|
|
740
|
+
except Exception as e:
|
|
741
|
+
logging.exception(f"something unexpected happened, {e}")
|
|
294
742
|
resp = response
|
|
295
743
|
raise TransformationRecordFailedError(
|
|
296
744
|
"",
|
|
297
745
|
f"HTTP {response.status_code}\t"
|
|
298
746
|
f"Request size: {get_req_size(response)}"
|
|
299
|
-
f"{datetime.
|
|
747
|
+
f"{datetime.now(timezone.utc).isoformat()}\n",
|
|
300
748
|
resp,
|
|
301
749
|
)
|
|
302
750
|
|
|
303
751
|
def do_post(self, batch):
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
elif kind["total_records"]:
|
|
752
|
+
path = self.api_info["api_endpoint"]
|
|
753
|
+
url = self.folio_client.gateway_url + path
|
|
754
|
+
if self.api_info["object_name"] == "users":
|
|
755
|
+
payload = {self.api_info["object_name"]: list(batch), "totalRecords": len(batch)}
|
|
756
|
+
elif self.api_info["total_records"]:
|
|
310
757
|
payload = {"records": list(batch), "totalRecords": len(batch)}
|
|
311
758
|
else:
|
|
312
|
-
payload = {
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
759
|
+
payload = {self.api_info["object_name"]: batch}
|
|
760
|
+
if self.http_client and not self.http_client.is_closed:
|
|
761
|
+
return self.http_client.post(
|
|
762
|
+
url,
|
|
763
|
+
json=payload,
|
|
764
|
+
headers=self.folio_client.okapi_headers,
|
|
765
|
+
params=self.query_params
|
|
766
|
+
)
|
|
767
|
+
else:
|
|
768
|
+
return httpx.post(
|
|
769
|
+
url,
|
|
770
|
+
headers=self.okapi_headers,
|
|
771
|
+
json=payload,
|
|
772
|
+
params=self.query_params,
|
|
773
|
+
timeout=None)
|
|
774
|
+
|
|
775
|
+
def get_current_record_count_in_folio(self):
|
|
776
|
+
if "query_endpoint" in self.api_info:
|
|
777
|
+
url = f"{self.folio_client.gateway_url}{self.api_info['query_endpoint']}"
|
|
778
|
+
query_params = {"query": "cql.allRecords=1", "limit": 0}
|
|
779
|
+
if self.http_client and not self.http_client.is_closed:
|
|
780
|
+
res = self.http_client.get(
|
|
781
|
+
url,
|
|
782
|
+
headers=self.folio_client.okapi_headers,
|
|
783
|
+
params=query_params
|
|
784
|
+
)
|
|
785
|
+
else:
|
|
786
|
+
res = httpx.get(url, headers=self.okapi_headers, params=query_params, timeout=None)
|
|
787
|
+
try:
|
|
788
|
+
res.raise_for_status()
|
|
789
|
+
return res.json()["totalRecords"]
|
|
790
|
+
except httpx.HTTPStatusError:
|
|
791
|
+
logging.error("Failed to get current record count. HTTP %s", res.status_code)
|
|
792
|
+
return 0
|
|
793
|
+
except KeyError:
|
|
794
|
+
logging.error(
|
|
795
|
+
"Failed to get current record count. "
|
|
796
|
+
f"No 'totalRecords' in response: {res.json()}"
|
|
797
|
+
)
|
|
798
|
+
return 0
|
|
799
|
+
else:
|
|
800
|
+
raise ValueError(
|
|
801
|
+
"No 'query_endpoint' available for %s. Cannot get current record count.",
|
|
802
|
+
self.task_configuration.object_type
|
|
803
|
+
)
|
|
804
|
+
|
|
805
|
+
def get_starting_record_count(self):
|
|
806
|
+
if "query_endpoint" in self.api_info and not self.starting_record_count_in_folio:
|
|
807
|
+
logging.info("Getting starting record count in FOLIO")
|
|
808
|
+
self.starting_record_count_in_folio = self.get_current_record_count_in_folio()
|
|
809
|
+
else:
|
|
810
|
+
logging.info(
|
|
811
|
+
"No query_endpoint available for %s. Cannot get starting record count.",
|
|
812
|
+
self.task_configuration.object_type
|
|
813
|
+
)
|
|
814
|
+
|
|
815
|
+
def get_finished_record_count(self):
|
|
816
|
+
if "query_endpoint" in self.api_info:
|
|
817
|
+
logging.info("Getting finished record count in FOLIO")
|
|
818
|
+
self.finished_record_count_in_folio = self.get_current_record_count_in_folio()
|
|
819
|
+
else:
|
|
820
|
+
logging.info(
|
|
821
|
+
"No query_endpoint available for %s. Cannot get ending record count.",
|
|
822
|
+
self.task_configuration.object_type
|
|
823
|
+
)
|
|
316
824
|
|
|
317
825
|
def wrap_up(self):
|
|
318
826
|
logging.info("Done. Wrapping up")
|
|
319
|
-
|
|
827
|
+
self.extradata_writer.flush()
|
|
828
|
+
if self.task_configuration.object_type == "SRS":
|
|
320
829
|
self.commit_snapshot()
|
|
321
|
-
if self.
|
|
830
|
+
if self.task_configuration.object_type != "Extradata":
|
|
322
831
|
logging.info(
|
|
323
832
|
(
|
|
324
833
|
"Failed records: %s failed records in %s "
|
|
325
834
|
"failed batches. Failed records saved to %s"
|
|
326
835
|
),
|
|
327
|
-
self.
|
|
836
|
+
self.num_failures,
|
|
328
837
|
self.failed_batches,
|
|
329
838
|
self.folder_structure.failed_recs_path,
|
|
330
839
|
)
|
|
331
|
-
|
|
332
840
|
else:
|
|
333
|
-
logging.info(
|
|
334
|
-
|
|
841
|
+
logging.info("Done posting %s records. %s failed", self.num_posted, self.num_failures)
|
|
842
|
+
if self.starting_record_count_in_folio:
|
|
843
|
+
self.get_finished_record_count()
|
|
844
|
+
total_on_server = (
|
|
845
|
+
self.finished_record_count_in_folio - self.starting_record_count_in_folio
|
|
846
|
+
)
|
|
847
|
+
discrepancy = self.processed - self.num_failures - total_on_server
|
|
848
|
+
if discrepancy != 0:
|
|
849
|
+
logging.error(
|
|
850
|
+
(
|
|
851
|
+
"Discrepancy in record count. "
|
|
852
|
+
"Starting record count: %s. Finished record count: %s. "
|
|
853
|
+
"Records posted: %s. Discrepancy: %s"
|
|
854
|
+
),
|
|
855
|
+
self.starting_record_count_in_folio,
|
|
856
|
+
self.finished_record_count_in_folio,
|
|
857
|
+
self.num_posted - self.num_failures,
|
|
858
|
+
discrepancy,
|
|
859
|
+
)
|
|
860
|
+
else:
|
|
861
|
+
discrepancy = 0
|
|
862
|
+
run = "second time" if self.performing_rerun else "first time"
|
|
863
|
+
self.migration_report.set("GeneralStatistics", f"Records processed {run}", self.processed)
|
|
864
|
+
self.migration_report.set("GeneralStatistics", f"Records posted {run}", self.num_posted)
|
|
865
|
+
self.migration_report.set("GeneralStatistics", f"Failed to post {run}", self.num_failures)
|
|
866
|
+
if discrepancy:
|
|
867
|
+
self.migration_report.set(
|
|
868
|
+
"GeneralStatistics",
|
|
869
|
+
f"Discrepancy in record count {run}",
|
|
870
|
+
discrepancy,
|
|
335
871
|
)
|
|
336
|
-
|
|
337
872
|
self.rerun_run()
|
|
873
|
+
with open(self.folder_structure.migration_reports_file, "w+") as report_file:
|
|
874
|
+
self.migration_report.write_migration_report(
|
|
875
|
+
f"{self.task_configuration.object_type} loading report",
|
|
876
|
+
report_file,
|
|
877
|
+
self.start_datetime,
|
|
878
|
+
)
|
|
879
|
+
self.clean_out_empty_logs()
|
|
338
880
|
|
|
339
881
|
def rerun_run(self):
|
|
340
|
-
if self.
|
|
341
|
-
self.failed_records > 0 or self.num_failures > 0
|
|
342
|
-
):
|
|
882
|
+
if self.task_configuration.rerun_failed_records and (self.num_failures > 0):
|
|
343
883
|
logging.info(
|
|
344
|
-
"Rerunning the failed records from the load with a batchsize of 1"
|
|
884
|
+
"Rerunning the %s failed records from the load with a batchsize of 1",
|
|
885
|
+
self.num_failures,
|
|
345
886
|
)
|
|
346
887
|
try:
|
|
347
|
-
self.
|
|
348
|
-
self.
|
|
349
|
-
FileDefinition(
|
|
350
|
-
file_name=str(self.folder_structure.failed_recs_path.name)
|
|
351
|
-
)
|
|
888
|
+
self.task_configuration.batch_size = 1
|
|
889
|
+
self.task_configuration.files = [
|
|
890
|
+
FileDefinition(file_name=str(self.folder_structure.failed_recs_path.name))
|
|
352
891
|
]
|
|
353
|
-
|
|
354
|
-
|
|
892
|
+
temp_report = copy.deepcopy(self.migration_report)
|
|
893
|
+
temp_start = self.start_datetime
|
|
894
|
+
self.task_configuration.rerun_failed_records = False
|
|
895
|
+
self.__init__(
|
|
896
|
+
self.task_configuration,
|
|
897
|
+
self.library_configuration,
|
|
898
|
+
self.folio_client)
|
|
899
|
+
self.performing_rerun = True
|
|
900
|
+
self.migration_report = temp_report
|
|
901
|
+
self.start_datetime = temp_start
|
|
355
902
|
self.do_work()
|
|
356
903
|
self.wrap_up()
|
|
357
904
|
logging.info("Done rerunning the posting")
|
|
358
905
|
except Exception as ee:
|
|
359
|
-
logging.exception("
|
|
906
|
+
logging.exception("Occurred during rerun")
|
|
360
907
|
raise TransformationProcessError("Error during rerun") from ee
|
|
908
|
+
elif not self.task_configuration.rerun_failed_records and (self.num_failures > 0):
|
|
909
|
+
logging.info(
|
|
910
|
+
(
|
|
911
|
+
"Task configured to not rerun failed records. "
|
|
912
|
+
" File with failed records is located at %s"
|
|
913
|
+
),
|
|
914
|
+
str(self.folder_structure.failed_recs_path),
|
|
915
|
+
)
|
|
361
916
|
|
|
362
917
|
def create_snapshot(self):
|
|
363
918
|
snapshot = {
|
|
364
919
|
"jobExecutionId": self.snapshot_id,
|
|
365
920
|
"status": "PARSING_IN_PROGRESS",
|
|
366
|
-
"processingStartedDate": datetime.
|
|
367
|
-
timespec="milliseconds"
|
|
368
|
-
),
|
|
921
|
+
"processingStartedDate": datetime.now(timezone.utc).isoformat(timespec="milliseconds"),
|
|
369
922
|
}
|
|
370
923
|
try:
|
|
371
|
-
url = f"{self.folio_client.
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
924
|
+
url = f"{self.folio_client.gateway_url}/source-storage/snapshots"
|
|
925
|
+
if self.http_client and not self.http_client.is_closed:
|
|
926
|
+
res = self.http_client.post(
|
|
927
|
+
url, json=snapshot, headers=self.folio_client.okapi_headers
|
|
928
|
+
)
|
|
929
|
+
else:
|
|
930
|
+
res = httpx.post(url, headers=self.okapi_headers, json=snapshot, timeout=None)
|
|
375
931
|
res.raise_for_status()
|
|
376
932
|
logging.info("Posted Snapshot to FOLIO: %s", json.dumps(snapshot, indent=4))
|
|
377
|
-
get_url =
|
|
378
|
-
|
|
379
|
-
|
|
933
|
+
get_url = (
|
|
934
|
+
f"{self.folio_client.gateway_url}/source-storage/snapshots/{self.snapshot_id}"
|
|
935
|
+
)
|
|
936
|
+
got = False
|
|
937
|
+
while not got:
|
|
380
938
|
logging.info("Sleeping while waiting for the snapshot to get created")
|
|
381
939
|
time.sleep(5)
|
|
382
|
-
|
|
940
|
+
if self.http_client and not self.http_client.is_closed:
|
|
941
|
+
res = self.http_client.get(get_url, headers=self.folio_client.okapi_headers)
|
|
942
|
+
else:
|
|
943
|
+
res = httpx.get(get_url, headers=self.okapi_headers, timeout=None)
|
|
383
944
|
if res.status_code == 200:
|
|
384
|
-
|
|
945
|
+
got = True
|
|
385
946
|
else:
|
|
386
947
|
logging.info(res.status_code)
|
|
387
|
-
except
|
|
388
|
-
logging.exception("
|
|
948
|
+
except httpx.HTTPStatusError as exc:
|
|
949
|
+
logging.exception("HTTP error occurred while posting the snapshot: %s", exc)
|
|
950
|
+
sys.exit(1)
|
|
951
|
+
except Exception as exc:
|
|
952
|
+
logging.exception("Could not post the snapshot: %s", exc)
|
|
389
953
|
sys.exit(1)
|
|
390
954
|
|
|
391
955
|
def commit_snapshot(self):
|
|
392
956
|
snapshot = {"jobExecutionId": self.snapshot_id, "status": "COMMITTED"}
|
|
393
957
|
try:
|
|
394
|
-
url = f"{self.folio_client.
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
958
|
+
url = f"{self.folio_client.gateway_url}/source-storage/snapshots/{self.snapshot_id}"
|
|
959
|
+
if self.http_client and not self.http_client.is_closed:
|
|
960
|
+
res = self.http_client.put(
|
|
961
|
+
url, json=snapshot, headers=self.folio_client.okapi_headers
|
|
962
|
+
)
|
|
963
|
+
else:
|
|
964
|
+
res = httpx.put(url, headers=self.okapi_headers, json=snapshot, timeout=None)
|
|
398
965
|
res.raise_for_status()
|
|
399
|
-
logging.info(
|
|
400
|
-
|
|
401
|
-
)
|
|
402
|
-
|
|
966
|
+
logging.info("Posted Committed snapshot to FOLIO: %s", json.dumps(snapshot, indent=4))
|
|
967
|
+
except httpx.HTTPStatusError as exc:
|
|
968
|
+
logging.exception("HTTP error occurred while posting the snapshot: %s", exc)
|
|
969
|
+
sys.exit(1)
|
|
970
|
+
except Exception as exc:
|
|
403
971
|
logging.exception(
|
|
404
|
-
"Could not commit snapshot with id %s. Post
|
|
972
|
+
"Could not commit snapshot with id %s. Post this to /source-storage/snapshots/%s:",
|
|
405
973
|
self.snapshot_id,
|
|
406
974
|
self.snapshot_id,
|
|
975
|
+
exc,
|
|
407
976
|
)
|
|
408
977
|
logging.info("%s", json.dumps(snapshot, indent=4))
|
|
409
978
|
sys.exit(1)
|
|
410
979
|
|
|
411
980
|
|
|
412
|
-
def
|
|
981
|
+
def get_api_info(object_type: str, use_safe: bool = True):
|
|
413
982
|
choices = {
|
|
414
983
|
"Extradata": {
|
|
415
984
|
"object_name": "",
|
|
416
985
|
"api_endpoint": "",
|
|
417
986
|
"total_records": False,
|
|
418
987
|
"addSnapshotId": False,
|
|
988
|
+
"supports_upsert": False,
|
|
419
989
|
},
|
|
420
990
|
"Items": {
|
|
421
991
|
"object_name": "items",
|
|
422
|
-
"api_endpoint":
|
|
992
|
+
"api_endpoint": (
|
|
993
|
+
"/item-storage/batch/synchronous"
|
|
994
|
+
if use_safe
|
|
995
|
+
else "/item-storage/batch/synchronous-unsafe"
|
|
996
|
+
),
|
|
997
|
+
"query_endpoint": "/item-storage/items",
|
|
998
|
+
"is_batch": True,
|
|
423
999
|
"total_records": False,
|
|
424
1000
|
"addSnapshotId": False,
|
|
1001
|
+
"supports_upsert": True,
|
|
425
1002
|
},
|
|
426
1003
|
"Holdings": {
|
|
427
1004
|
"object_name": "holdingsRecords",
|
|
428
|
-
"api_endpoint":
|
|
1005
|
+
"api_endpoint": (
|
|
1006
|
+
"/holdings-storage/batch/synchronous"
|
|
1007
|
+
if use_safe
|
|
1008
|
+
else "/holdings-storage/batch/synchronous-unsafe"
|
|
1009
|
+
),
|
|
1010
|
+
"query_endpoint": "/holdings-storage/holdings",
|
|
1011
|
+
"is_batch": True,
|
|
429
1012
|
"total_records": False,
|
|
430
1013
|
"addSnapshotId": False,
|
|
1014
|
+
"supports_upsert": True,
|
|
431
1015
|
},
|
|
432
1016
|
"Instances": {
|
|
433
1017
|
"object_name": "instances",
|
|
434
|
-
"api_endpoint":
|
|
1018
|
+
"api_endpoint": (
|
|
1019
|
+
"/instance-storage/batch/synchronous"
|
|
1020
|
+
if use_safe
|
|
1021
|
+
else "/instance-storage/batch/synchronous-unsafe"
|
|
1022
|
+
),
|
|
1023
|
+
"query_endpoint": "/instance-storage/instances",
|
|
1024
|
+
"is_batch": True,
|
|
1025
|
+
"total_records": False,
|
|
1026
|
+
"addSnapshotId": False,
|
|
1027
|
+
"supports_upsert": True,
|
|
1028
|
+
},
|
|
1029
|
+
"ShadowInstances": {
|
|
1030
|
+
"object_name": "instances",
|
|
1031
|
+
"api_endpoint": (
|
|
1032
|
+
"/instance-storage/batch/synchronous"
|
|
1033
|
+
if use_safe
|
|
1034
|
+
else "/instance-storage/batch/synchronous-unsafe"
|
|
1035
|
+
),
|
|
1036
|
+
"is_batch": True,
|
|
1037
|
+
"total_records": False,
|
|
1038
|
+
"addSnapshotId": False,
|
|
1039
|
+
"supports_upsert": True,
|
|
1040
|
+
},
|
|
1041
|
+
"Authorities": {
|
|
1042
|
+
"object_name": "",
|
|
1043
|
+
"api_endpoint": "/authority-storage/authorities",
|
|
1044
|
+
"is_batch": False,
|
|
435
1045
|
"total_records": False,
|
|
436
1046
|
"addSnapshotId": False,
|
|
1047
|
+
"supports_upsert": False,
|
|
437
1048
|
},
|
|
438
1049
|
"SRS": {
|
|
439
1050
|
"object_name": "records",
|
|
440
1051
|
"api_endpoint": "/source-storage/batch/records",
|
|
1052
|
+
"is_batch": True,
|
|
441
1053
|
"total_records": True,
|
|
442
1054
|
"addSnapshotId": True,
|
|
1055
|
+
"supports_upsert": False,
|
|
443
1056
|
},
|
|
444
1057
|
"Users": {
|
|
445
1058
|
"object_name": "users",
|
|
446
1059
|
"api_endpoint": "/user-import",
|
|
1060
|
+
"is_batch": True,
|
|
447
1061
|
"total_records": True,
|
|
448
1062
|
"addSnapshotId": False,
|
|
1063
|
+
"supports_upsert": False,
|
|
1064
|
+
},
|
|
1065
|
+
"Organizations": {
|
|
1066
|
+
"object_name": "",
|
|
1067
|
+
"api_endpoint": "/organizations/organizations",
|
|
1068
|
+
"is_batch": False,
|
|
1069
|
+
"total_records": False,
|
|
1070
|
+
"addSnapshotId": False,
|
|
1071
|
+
"supports_upsert": False,
|
|
1072
|
+
},
|
|
1073
|
+
"Orders": {
|
|
1074
|
+
"object_name": "",
|
|
1075
|
+
"api_endpoint": "/orders/composite-orders",
|
|
1076
|
+
"is_batch": False,
|
|
1077
|
+
"total_records": False,
|
|
1078
|
+
"addSnapshotId": False,
|
|
1079
|
+
"supports_upsert": False,
|
|
449
1080
|
},
|
|
450
1081
|
}
|
|
451
1082
|
|
|
452
1083
|
try:
|
|
453
1084
|
return choices[object_type]
|
|
454
1085
|
except KeyError:
|
|
455
|
-
key_string = ",".join(choices.keys())
|
|
456
|
-
logging.error(
|
|
1086
|
+
key_string = ", ".join(choices.keys())
|
|
1087
|
+
logging.error(
|
|
1088
|
+
f"Wrong type. Only one of {key_string} are allowed, "
|
|
1089
|
+
f"received {object_type=} instead"
|
|
1090
|
+
)
|
|
457
1091
|
logging.error("Halting")
|
|
458
1092
|
sys.exit(1)
|
|
459
1093
|
|
|
460
1094
|
|
|
461
1095
|
def chunks(records, number_of_chunks):
|
|
462
|
-
"""Yield successive n-sized chunks from lst.
|
|
463
|
-
for i in range(0, len(records), number_of_chunks):
|
|
464
|
-
yield records[i : i + number_of_chunks]
|
|
1096
|
+
"""Yield successive n-sized chunks from lst.
|
|
465
1097
|
|
|
1098
|
+
Args:
|
|
1099
|
+
records (_type_): _description_
|
|
1100
|
+
number_of_chunks (_type_): _description_
|
|
466
1101
|
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
"boundwithPart": "inventory-storage/bound-with-parts",
|
|
473
|
-
"notes": "notes",
|
|
474
|
-
}
|
|
475
|
-
return object_types[object_name]
|
|
1102
|
+
Yields:
|
|
1103
|
+
_type_: _description_
|
|
1104
|
+
"""
|
|
1105
|
+
for i in range(0, len(records), number_of_chunks):
|
|
1106
|
+
yield records[i: i + number_of_chunks]
|
|
476
1107
|
|
|
477
1108
|
|
|
478
1109
|
def get_human_readable(size, precision=2):
|
|
@@ -484,9 +1115,91 @@ def get_human_readable(size, precision=2):
|
|
|
484
1115
|
return "%.*f%s" % (precision, size, suffixes[suffix_index])
|
|
485
1116
|
|
|
486
1117
|
|
|
487
|
-
def get_req_size(response):
|
|
1118
|
+
def get_req_size(response: httpx.Response):
|
|
488
1119
|
size = response.request.method
|
|
489
|
-
size += response.request.url
|
|
1120
|
+
size += str(response.request.url)
|
|
490
1121
|
size += "\r\n".join(f"{k}{v}" for k, v in response.request.headers.items())
|
|
491
|
-
size += response.request.
|
|
1122
|
+
size += response.request.content.decode("utf-8") or ""
|
|
492
1123
|
return get_human_readable(len(size.encode("utf-8")))
|
|
1124
|
+
|
|
1125
|
+
def parse_path(path):
|
|
1126
|
+
"""
|
|
1127
|
+
Parses a path like 'foo.bar[0].baz' into ['foo', 'bar', 0, 'baz']
|
|
1128
|
+
"""
|
|
1129
|
+
tokens = []
|
|
1130
|
+
# Split by dot, then extract indices
|
|
1131
|
+
for part in path.split('.'):
|
|
1132
|
+
# Find all [index] parts
|
|
1133
|
+
matches = re.findall(r'([^\[\]]+)|\[(\d+)\]', part)
|
|
1134
|
+
for name, idx in matches:
|
|
1135
|
+
if name:
|
|
1136
|
+
tokens.append(name)
|
|
1137
|
+
if idx:
|
|
1138
|
+
tokens.append(int(idx))
|
|
1139
|
+
return tokens
|
|
1140
|
+
|
|
1141
|
+
def get_by_path(data, path):
|
|
1142
|
+
keys = parse_path(path)
|
|
1143
|
+
for key in keys:
|
|
1144
|
+
data = data[key]
|
|
1145
|
+
return data
|
|
1146
|
+
|
|
1147
|
+
def set_by_path(data, path, value):
|
|
1148
|
+
keys = parse_path(path)
|
|
1149
|
+
for i, key in enumerate(keys[:-1]):
|
|
1150
|
+
next_key = keys[i + 1]
|
|
1151
|
+
if isinstance(key, int):
|
|
1152
|
+
while len(data) <= key:
|
|
1153
|
+
data.append({} if not isinstance(next_key, int) else [])
|
|
1154
|
+
data = data[key]
|
|
1155
|
+
else:
|
|
1156
|
+
if key not in data or not isinstance(data[key], (dict, list)):
|
|
1157
|
+
data[key] = {} if not isinstance(next_key, int) else []
|
|
1158
|
+
data = data[key]
|
|
1159
|
+
last_key = keys[-1]
|
|
1160
|
+
if isinstance(last_key, int):
|
|
1161
|
+
while len(data) <= last_key:
|
|
1162
|
+
data.append(None)
|
|
1163
|
+
data[last_key] = value
|
|
1164
|
+
else:
|
|
1165
|
+
data[last_key] = value
|
|
1166
|
+
|
|
1167
|
+
def extract_paths(data, paths):
|
|
1168
|
+
result = {}
|
|
1169
|
+
for path in paths:
|
|
1170
|
+
try:
|
|
1171
|
+
value = get_by_path(data, path)
|
|
1172
|
+
set_by_path(result, path, value)
|
|
1173
|
+
except KeyError:
|
|
1174
|
+
continue
|
|
1175
|
+
return result
|
|
1176
|
+
|
|
1177
|
+
def deep_update(target, patch):
|
|
1178
|
+
"""
|
|
1179
|
+
Recursively update target dict/list with values from patch dict/list.
|
|
1180
|
+
For lists, only non-None values in patch are merged into target.
|
|
1181
|
+
"""
|
|
1182
|
+
if isinstance(patch, dict):
|
|
1183
|
+
for k, v in patch.items():
|
|
1184
|
+
if (
|
|
1185
|
+
k in target
|
|
1186
|
+
and isinstance(target[k], (dict, list))
|
|
1187
|
+
and isinstance(v, (dict, list))
|
|
1188
|
+
):
|
|
1189
|
+
deep_update(target[k], v)
|
|
1190
|
+
else:
|
|
1191
|
+
target[k] = v
|
|
1192
|
+
elif isinstance(patch, list):
|
|
1193
|
+
for i, v in enumerate(patch):
|
|
1194
|
+
if v is None:
|
|
1195
|
+
continue # Skip None values, leave target unchanged
|
|
1196
|
+
if i < len(target):
|
|
1197
|
+
if isinstance(target[i], (dict, list)) and isinstance(v, (dict, list)):
|
|
1198
|
+
deep_update(target[i], v)
|
|
1199
|
+
else:
|
|
1200
|
+
target[i] = v
|
|
1201
|
+
else:
|
|
1202
|
+
# Only append if not None
|
|
1203
|
+
target.append(v)
|
|
1204
|
+
else:
|
|
1205
|
+
return patch
|