folio-migration-tools 1.9.10__py3-none-any.whl → 1.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- folio_migration_tools/__init__.py +3 -4
- folio_migration_tools/__main__.py +53 -31
- folio_migration_tools/circulation_helper.py +118 -108
- folio_migration_tools/custom_dict.py +2 -2
- folio_migration_tools/custom_exceptions.py +4 -5
- folio_migration_tools/folder_structure.py +17 -7
- folio_migration_tools/helper.py +8 -7
- folio_migration_tools/holdings_helper.py +4 -3
- folio_migration_tools/i18n_cache.py +79 -0
- folio_migration_tools/library_configuration.py +77 -37
- folio_migration_tools/mapper_base.py +45 -31
- folio_migration_tools/mapping_file_transformation/courses_mapper.py +1 -1
- folio_migration_tools/mapping_file_transformation/holdings_mapper.py +7 -3
- folio_migration_tools/mapping_file_transformation/item_mapper.py +13 -26
- folio_migration_tools/mapping_file_transformation/manual_fee_fines_mapper.py +1 -2
- folio_migration_tools/mapping_file_transformation/mapping_file_mapper_base.py +13 -11
- folio_migration_tools/mapping_file_transformation/order_mapper.py +6 -5
- folio_migration_tools/mapping_file_transformation/organization_mapper.py +3 -3
- folio_migration_tools/mapping_file_transformation/user_mapper.py +47 -28
- folio_migration_tools/marc_rules_transformation/conditions.py +82 -97
- folio_migration_tools/marc_rules_transformation/holdings_statementsparser.py +13 -5
- folio_migration_tools/marc_rules_transformation/hrid_handler.py +3 -2
- folio_migration_tools/marc_rules_transformation/marc_file_processor.py +26 -24
- folio_migration_tools/marc_rules_transformation/rules_mapper_base.py +56 -51
- folio_migration_tools/marc_rules_transformation/rules_mapper_bibs.py +28 -17
- folio_migration_tools/marc_rules_transformation/rules_mapper_holdings.py +68 -37
- folio_migration_tools/migration_report.py +18 -7
- folio_migration_tools/migration_tasks/batch_poster.py +285 -354
- folio_migration_tools/migration_tasks/bibs_transformer.py +14 -9
- folio_migration_tools/migration_tasks/courses_migrator.py +2 -3
- folio_migration_tools/migration_tasks/holdings_csv_transformer.py +23 -24
- folio_migration_tools/migration_tasks/holdings_marc_transformer.py +14 -24
- folio_migration_tools/migration_tasks/items_transformer.py +23 -34
- folio_migration_tools/migration_tasks/loans_migrator.py +67 -144
- folio_migration_tools/migration_tasks/manual_fee_fines_transformer.py +3 -3
- folio_migration_tools/migration_tasks/migration_task_base.py +47 -60
- folio_migration_tools/migration_tasks/orders_transformer.py +25 -42
- folio_migration_tools/migration_tasks/organization_transformer.py +9 -18
- folio_migration_tools/migration_tasks/requests_migrator.py +21 -24
- folio_migration_tools/migration_tasks/reserves_migrator.py +6 -5
- folio_migration_tools/migration_tasks/user_transformer.py +25 -20
- folio_migration_tools/task_configuration.py +6 -7
- folio_migration_tools/transaction_migration/legacy_loan.py +15 -27
- folio_migration_tools/transaction_migration/legacy_request.py +1 -1
- folio_migration_tools/translations/en.json +0 -7
- {folio_migration_tools-1.9.10.dist-info → folio_migration_tools-1.10.0.dist-info}/METADATA +19 -28
- folio_migration_tools-1.10.0.dist-info/RECORD +63 -0
- folio_migration_tools-1.10.0.dist-info/WHEEL +4 -0
- folio_migration_tools-1.10.0.dist-info/entry_points.txt +3 -0
- folio_migration_tools/marc_rules_transformation/rules_mapper_authorities.py +0 -241
- folio_migration_tools/migration_tasks/authority_transformer.py +0 -119
- folio_migration_tools/test_infrastructure/__init__.py +0 -0
- folio_migration_tools/test_infrastructure/mocked_classes.py +0 -406
- folio_migration_tools-1.9.10.dist-info/RECORD +0 -67
- folio_migration_tools-1.9.10.dist-info/WHEEL +0 -4
- folio_migration_tools-1.9.10.dist-info/entry_points.txt +0 -3
- folio_migration_tools-1.9.10.dist-info/licenses/LICENSE +0 -21
|
@@ -4,14 +4,15 @@ import json
|
|
|
4
4
|
import logging
|
|
5
5
|
import re
|
|
6
6
|
import sys
|
|
7
|
-
import time
|
|
8
7
|
import traceback
|
|
9
8
|
from datetime import datetime, timezone
|
|
10
|
-
from typing import Annotated, List, Optional
|
|
9
|
+
from typing import TYPE_CHECKING, Annotated, List, Optional
|
|
11
10
|
from uuid import uuid4
|
|
12
11
|
|
|
13
|
-
import
|
|
14
|
-
|
|
12
|
+
import folioclient
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from httpx import Response
|
|
15
16
|
from folio_uuid.folio_namespaces import FOLIONamespaces
|
|
16
17
|
from pydantic import Field
|
|
17
18
|
|
|
@@ -19,6 +20,7 @@ from folio_migration_tools.custom_exceptions import (
|
|
|
19
20
|
TransformationProcessError,
|
|
20
21
|
TransformationRecordFailedError,
|
|
21
22
|
)
|
|
23
|
+
from folio_migration_tools.i18n_cache import i18n_t
|
|
22
24
|
from folio_migration_tools.library_configuration import (
|
|
23
25
|
FileDefinition,
|
|
24
26
|
LibraryConfiguration,
|
|
@@ -73,7 +75,7 @@ class BatchPoster(MigrationTaskBase):
|
|
|
73
75
|
description=(
|
|
74
76
|
"The type of object being migrated"
|
|
75
77
|
"Examples of possible values: "
|
|
76
|
-
"'Extradata', '
|
|
78
|
+
"'Extradata', 'Instances', 'Holdings', 'Items'"
|
|
77
79
|
),
|
|
78
80
|
),
|
|
79
81
|
]
|
|
@@ -182,19 +184,27 @@ class BatchPoster(MigrationTaskBase):
|
|
|
182
184
|
),
|
|
183
185
|
),
|
|
184
186
|
] = True
|
|
185
|
-
patch_existing_records: Annotated[
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
"
|
|
189
|
-
|
|
187
|
+
patch_existing_records: Annotated[
|
|
188
|
+
bool,
|
|
189
|
+
Field(
|
|
190
|
+
title="Patch existing records",
|
|
191
|
+
description=(
|
|
192
|
+
"Toggles whether or not to patch existing records "
|
|
193
|
+
"during the upsert process. Defaults to False"
|
|
194
|
+
),
|
|
190
195
|
),
|
|
191
|
-
|
|
192
|
-
patch_paths: Annotated[
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
"
|
|
196
|
+
] = False
|
|
197
|
+
patch_paths: Annotated[
|
|
198
|
+
List[str],
|
|
199
|
+
Field(
|
|
200
|
+
title="Patch paths",
|
|
201
|
+
description=(
|
|
202
|
+
"A list of fields in JSON Path notation to patch during the upsert process "
|
|
203
|
+
"(leave off the $). If empty, all fields will be patched. Examples: "
|
|
204
|
+
"['statisticalCodeIds', 'administrativeNotes', 'instanceStatusId']"
|
|
205
|
+
),
|
|
196
206
|
),
|
|
197
|
-
|
|
207
|
+
] = []
|
|
198
208
|
|
|
199
209
|
task_configuration: TaskConfiguration
|
|
200
210
|
|
|
@@ -223,7 +233,8 @@ class BatchPoster(MigrationTaskBase):
|
|
|
223
233
|
self.query_params["upsert"] = self.task_configuration.upsert
|
|
224
234
|
elif self.task_configuration.upsert and not self.api_info["supports_upsert"]:
|
|
225
235
|
logging.info(
|
|
226
|
-
"Upsert is not supported for this object type. Query parameter will not be set."
|
|
236
|
+
"Upsert is not supported for this object type. Query parameter will not be set."
|
|
237
|
+
)
|
|
227
238
|
self.snapshot_id = str(uuid4())
|
|
228
239
|
self.failed_objects: list = []
|
|
229
240
|
self.batch_size = self.task_configuration.batch_size
|
|
@@ -236,97 +247,87 @@ class BatchPoster(MigrationTaskBase):
|
|
|
236
247
|
self.failed_fields: set = set()
|
|
237
248
|
self.num_failures = 0
|
|
238
249
|
self.num_posted = 0
|
|
239
|
-
self.okapi_headers = self.folio_client.okapi_headers
|
|
240
|
-
self.http_client = None
|
|
241
250
|
self.starting_record_count_in_folio: Optional[int] = None
|
|
242
251
|
self.finished_record_count_in_folio: Optional[int] = None
|
|
243
252
|
|
|
244
|
-
def do_work(self):
|
|
245
|
-
with
|
|
246
|
-
self.
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
self.post_extra_data(
|
|
266
|
-
row, self.processed, failed_recs_file
|
|
267
|
-
)
|
|
268
|
-
elif not self.api_info["is_batch"]:
|
|
269
|
-
self.post_single_records(
|
|
270
|
-
row, self.processed, failed_recs_file
|
|
271
|
-
)
|
|
272
|
-
else:
|
|
273
|
-
batch = self.post_record_batch(
|
|
274
|
-
batch, failed_recs_file, row
|
|
275
|
-
)
|
|
276
|
-
except UnicodeDecodeError as unicode_error:
|
|
277
|
-
self.handle_unicode_error(unicode_error, last_row)
|
|
278
|
-
except TransformationProcessError as tpe:
|
|
279
|
-
self.handle_generic_exception(
|
|
280
|
-
tpe,
|
|
281
|
-
last_row,
|
|
282
|
-
batch,
|
|
283
|
-
self.processed,
|
|
284
|
-
failed_recs_file,
|
|
253
|
+
def do_work(self): # noqa: C901
|
|
254
|
+
with open(
|
|
255
|
+
self.folder_structure.failed_recs_path, "w", encoding="utf-8"
|
|
256
|
+
) as failed_recs_file:
|
|
257
|
+
self.get_starting_record_count()
|
|
258
|
+
try:
|
|
259
|
+
batch = []
|
|
260
|
+
for idx, file_def in enumerate(self.task_configuration.files): # noqa: B007
|
|
261
|
+
path = self.folder_structure.results_folder / file_def.file_name
|
|
262
|
+
with open(path) as rows:
|
|
263
|
+
logging.info("Running %s", path)
|
|
264
|
+
last_row = ""
|
|
265
|
+
for self.processed, row in enumerate(rows, start=1):
|
|
266
|
+
last_row = row
|
|
267
|
+
if row.strip():
|
|
268
|
+
try:
|
|
269
|
+
if self.task_configuration.object_type == "Extradata":
|
|
270
|
+
self.post_extra_data(row, self.processed, failed_recs_file)
|
|
271
|
+
elif not self.api_info["is_batch"]:
|
|
272
|
+
self.post_single_records(
|
|
273
|
+
row, self.processed, failed_recs_file
|
|
285
274
|
)
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
self.handle_generic_exception(
|
|
290
|
-
exception,
|
|
291
|
-
last_row,
|
|
292
|
-
batch,
|
|
293
|
-
self.processed,
|
|
294
|
-
failed_recs_file,
|
|
275
|
+
else:
|
|
276
|
+
batch = self.post_record_batch(
|
|
277
|
+
batch, failed_recs_file, row
|
|
295
278
|
)
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
279
|
+
except UnicodeDecodeError as unicode_error:
|
|
280
|
+
self.handle_unicode_error(unicode_error, last_row)
|
|
281
|
+
except TransformationProcessError as tpe:
|
|
282
|
+
self.handle_generic_exception(
|
|
283
|
+
tpe,
|
|
284
|
+
last_row,
|
|
285
|
+
batch,
|
|
286
|
+
self.processed,
|
|
287
|
+
failed_recs_file,
|
|
288
|
+
)
|
|
289
|
+
batch = []
|
|
290
|
+
raise
|
|
291
|
+
except TransformationRecordFailedError as exception:
|
|
292
|
+
self.handle_generic_exception(
|
|
293
|
+
exception,
|
|
294
|
+
last_row,
|
|
295
|
+
batch,
|
|
296
|
+
self.processed,
|
|
297
|
+
failed_recs_file,
|
|
298
|
+
)
|
|
299
|
+
batch = []
|
|
300
|
+
except (FileNotFoundError, PermissionError) as ose:
|
|
301
|
+
logging.error("Error reading file: %s", ose)
|
|
302
|
+
|
|
303
|
+
except Exception as ee:
|
|
304
|
+
if "idx" in locals() and self.task_configuration.files[idx:]:
|
|
305
|
+
for file_def in self.task_configuration.files[idx:]:
|
|
306
|
+
path = self.folder_structure.results_folder / file_def.file_name
|
|
314
307
|
try:
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
308
|
+
with open(path, "r") as failed_file:
|
|
309
|
+
failed_file.seek(self.processed)
|
|
310
|
+
failed_recs_file.write(failed_file.read())
|
|
311
|
+
self.processed = 0
|
|
312
|
+
except (FileNotFoundError, PermissionError) as ose:
|
|
313
|
+
logging.error("Error reading file: %s", ose)
|
|
314
|
+
raise ee
|
|
315
|
+
finally:
|
|
316
|
+
if self.task_configuration.object_type != "Extradata" and any(batch):
|
|
317
|
+
try:
|
|
318
|
+
self.post_batch(batch, failed_recs_file, self.processed)
|
|
319
|
+
except Exception as exception:
|
|
320
|
+
self.handle_generic_exception(
|
|
321
|
+
exception, last_row, batch, self.processed, failed_recs_file
|
|
322
|
+
)
|
|
323
|
+
logging.info("Done posting %s records. ", self.processed)
|
|
323
324
|
|
|
324
325
|
@staticmethod
|
|
325
326
|
def set_consortium_source(json_rec):
|
|
326
|
-
if json_rec[
|
|
327
|
-
json_rec[
|
|
328
|
-
elif json_rec[
|
|
329
|
-
json_rec[
|
|
327
|
+
if json_rec["source"] == "MARC":
|
|
328
|
+
json_rec["source"] = "CONSORTIUM-MARC"
|
|
329
|
+
elif json_rec["source"] == "FOLIO":
|
|
330
|
+
json_rec["source"] = "CONSORTIUM-FOLIO"
|
|
330
331
|
|
|
331
332
|
def set_version(self, batch, query_api, object_type) -> None:
|
|
332
333
|
"""
|
|
@@ -357,27 +358,26 @@ class BatchPoster(MigrationTaskBase):
|
|
|
357
358
|
fetch_batch_size = 90
|
|
358
359
|
fetch_tasks = []
|
|
359
360
|
existing_records = {}
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
"
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
"limit": fetch_batch_size
|
|
373
|
-
},
|
|
374
|
-
)
|
|
361
|
+
|
|
362
|
+
for i in range(0, len(batch), fetch_batch_size):
|
|
363
|
+
batch_slice = batch[i : i + fetch_batch_size]
|
|
364
|
+
fetch_tasks.append(
|
|
365
|
+
self.get_with_retry(
|
|
366
|
+
query_api,
|
|
367
|
+
params={
|
|
368
|
+
"query": (
|
|
369
|
+
f"id==({' OR '.join([r['id'] for r in batch_slice if 'id' in r])})"
|
|
370
|
+
),
|
|
371
|
+
"limit": fetch_batch_size,
|
|
372
|
+
},
|
|
375
373
|
)
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
responses = await asyncio.gather(*fetch_tasks)
|
|
376
377
|
|
|
377
|
-
|
|
378
|
+
for response in responses:
|
|
379
|
+
self.collect_existing_records_for_upsert(object_type, response, existing_records)
|
|
378
380
|
|
|
379
|
-
for response in responses:
|
|
380
|
-
self.collect_existing_records_for_upsert(object_type, response, existing_records)
|
|
381
381
|
for record in batch:
|
|
382
382
|
if record["id"] in existing_records:
|
|
383
383
|
self.prepare_record_for_upsert(record, existing_records[record["id"]])
|
|
@@ -390,7 +390,7 @@ class BatchPoster(MigrationTaskBase):
|
|
|
390
390
|
new_record (dict): The new record to be updated.
|
|
391
391
|
existing_record (dict): The existing record to patch from.
|
|
392
392
|
patch_paths (List[str]): List of fields in JSON Path notation (e.g., ['statisticalCodeIds', 'administrativeNotes', 'instanceStatusId']) to patch during the upsert process. If empty, all fields will be patched.
|
|
393
|
-
"""
|
|
393
|
+
""" # noqa: E501
|
|
394
394
|
updates = {}
|
|
395
395
|
updates.update(existing_record)
|
|
396
396
|
keep_existing = {}
|
|
@@ -412,17 +412,19 @@ class BatchPoster(MigrationTaskBase):
|
|
|
412
412
|
new_record.update(updates)
|
|
413
413
|
|
|
414
414
|
@staticmethod
|
|
415
|
-
def collect_existing_records_for_upsert(
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
415
|
+
def collect_existing_records_for_upsert(
|
|
416
|
+
object_type: str, response_json: dict, existing_records: dict
|
|
417
|
+
):
|
|
418
|
+
"""
|
|
419
|
+
Collects existing records from API response into existing_records dict.
|
|
420
|
+
|
|
421
|
+
Args:
|
|
422
|
+
object_type: The key in response containing the records array
|
|
423
|
+
response_json: Parsed JSON response from API
|
|
424
|
+
existing_records: Dict to populate with {record_id: record_data}
|
|
425
|
+
"""
|
|
426
|
+
for record in response_json.get(object_type, []):
|
|
427
|
+
existing_records[record["id"]] = record
|
|
426
428
|
|
|
427
429
|
def handle_upsert_for_statistical_codes(self, updates: dict, keep_existing: dict):
|
|
428
430
|
if not self.task_configuration.preserve_statistical_codes:
|
|
@@ -458,12 +460,23 @@ class BatchPoster(MigrationTaskBase):
|
|
|
458
460
|
|
|
459
461
|
def prepare_record_for_upsert(self, new_record: dict, existing_record: dict):
|
|
460
462
|
if "source" in existing_record and "MARC" in existing_record["source"]:
|
|
461
|
-
|
|
463
|
+
patch_paths = [
|
|
464
|
+
x
|
|
465
|
+
for x in self.task_configuration.patch_paths
|
|
466
|
+
if ("suppress" in x.lower() or x.lower() == "deleted")
|
|
467
|
+
]
|
|
468
|
+
if patch_paths:
|
|
469
|
+
logging.debug(
|
|
470
|
+
"Record %s is a MARC record, only suppression related fields will be patched",
|
|
471
|
+
existing_record["id"],
|
|
472
|
+
)
|
|
473
|
+
else:
|
|
462
474
|
logging.debug(
|
|
463
475
|
"Record %s is a MARC record, patch_paths will be ignored",
|
|
464
476
|
existing_record["id"],
|
|
465
477
|
)
|
|
466
|
-
|
|
478
|
+
patch_paths.extend(["statisticalCodeIds", "administrativeNotes", "instanceStatusId"])
|
|
479
|
+
self.patch_record(new_record, existing_record, patch_paths)
|
|
467
480
|
elif self.task_configuration.patch_existing_records:
|
|
468
481
|
self.patch_record(new_record, existing_record, self.task_configuration.patch_paths)
|
|
469
482
|
else:
|
|
@@ -471,7 +484,11 @@ class BatchPoster(MigrationTaskBase):
|
|
|
471
484
|
"_version": existing_record["_version"],
|
|
472
485
|
}
|
|
473
486
|
self.keep_existing_fields(updates, existing_record)
|
|
474
|
-
keep_new = {
|
|
487
|
+
keep_new = {
|
|
488
|
+
k: v
|
|
489
|
+
for k, v in new_record.items()
|
|
490
|
+
if k in ["statisticalCodeIds", "administrativeNotes"]
|
|
491
|
+
}
|
|
475
492
|
keep_existing = {}
|
|
476
493
|
self.handle_upsert_for_statistical_codes(existing_record, keep_existing)
|
|
477
494
|
self.handle_upsert_for_administrative_notes(existing_record, keep_existing)
|
|
@@ -485,30 +502,66 @@ class BatchPoster(MigrationTaskBase):
|
|
|
485
502
|
updates.update(keep_new)
|
|
486
503
|
new_record.update(updates)
|
|
487
504
|
|
|
488
|
-
async def get_with_retry(self,
|
|
505
|
+
async def get_with_retry(self, url: str, params=None):
|
|
506
|
+
"""
|
|
507
|
+
Wrapper around folio_get_async with selective retry logic.
|
|
508
|
+
|
|
509
|
+
Retries on:
|
|
510
|
+
- Connection errors (FolioConnectionError): Always retry
|
|
511
|
+
- Server errors (5xx): Transient failures
|
|
512
|
+
- Rate limiting (429): Too many requests
|
|
513
|
+
|
|
514
|
+
Does NOT retry on:
|
|
515
|
+
- Client errors (4xx except 429): Bad request, won't succeed on retry
|
|
516
|
+
"""
|
|
489
517
|
if params is None:
|
|
490
518
|
params = {}
|
|
491
519
|
retries = 3
|
|
520
|
+
|
|
492
521
|
for attempt in range(retries):
|
|
493
522
|
try:
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
except httpx.HTTPError as e:
|
|
523
|
+
return await self.folio_client.folio_get_async(url, query_params=params)
|
|
524
|
+
|
|
525
|
+
except folioclient.FolioConnectionError as e:
|
|
526
|
+
# Network/connection errors - always retry
|
|
499
527
|
if attempt < retries - 1:
|
|
500
|
-
|
|
501
|
-
|
|
528
|
+
wait_time = 2**attempt
|
|
529
|
+
logging.warning(
|
|
530
|
+
f"Connection error, retrying in {wait_time}s "
|
|
531
|
+
f"(attempt {attempt + 1}/{retries}): {e}"
|
|
532
|
+
)
|
|
533
|
+
await asyncio.sleep(wait_time)
|
|
502
534
|
else:
|
|
503
|
-
logging.error(f"
|
|
535
|
+
logging.error(f"Connection failed after {retries} attempts: {e}")
|
|
536
|
+
raise
|
|
537
|
+
|
|
538
|
+
except folioclient.FolioHTTPError as e:
|
|
539
|
+
# HTTP errors - selective retry based on status code
|
|
540
|
+
status_code = e.response.status_code
|
|
541
|
+
should_retry = status_code >= 500 or status_code == 429
|
|
542
|
+
|
|
543
|
+
if should_retry and attempt < retries - 1:
|
|
544
|
+
# Longer wait for rate limiting
|
|
545
|
+
wait_time = 5 if status_code == 429 else 2**attempt
|
|
546
|
+
logging.warning(
|
|
547
|
+
f"HTTP {status_code} error, retrying in {wait_time}s "
|
|
548
|
+
f"(attempt {attempt + 1}/{retries}): {e}"
|
|
549
|
+
)
|
|
550
|
+
await asyncio.sleep(wait_time)
|
|
551
|
+
else:
|
|
552
|
+
# Either not retryable or out of attempts
|
|
553
|
+
if should_retry:
|
|
554
|
+
logging.error(
|
|
555
|
+
f"HTTP {status_code} error persisted after {retries} attempts: {e}"
|
|
556
|
+
)
|
|
557
|
+
else:
|
|
558
|
+
logging.error(f"HTTP {status_code} error (not retryable): {e}")
|
|
504
559
|
raise
|
|
505
560
|
|
|
506
561
|
def post_record_batch(self, batch, failed_recs_file, row):
|
|
507
562
|
json_rec = json.loads(row.split("\t")[-1])
|
|
508
563
|
if self.task_configuration.object_type == "ShadowInstances":
|
|
509
564
|
self.set_consortium_source(json_rec)
|
|
510
|
-
if self.task_configuration.object_type == "SRS":
|
|
511
|
-
json_rec["snapshotId"] = self.snapshot_id
|
|
512
565
|
if self.processed == 1:
|
|
513
566
|
logging.info(json.dumps(json_rec, indent=True))
|
|
514
567
|
batch.append(json_rec)
|
|
@@ -519,22 +572,29 @@ class BatchPoster(MigrationTaskBase):
|
|
|
519
572
|
|
|
520
573
|
def post_extra_data(self, row: str, num_records: int, failed_recs_file):
|
|
521
574
|
(object_name, data) = row.split("\t")
|
|
522
|
-
|
|
523
|
-
url = f"{self.folio_client.gateway_url}/{endpoint}"
|
|
575
|
+
url = self.get_extradata_endpoint(self.task_configuration, object_name, data)
|
|
524
576
|
body = data
|
|
525
|
-
|
|
526
|
-
|
|
577
|
+
try:
|
|
578
|
+
_ = self.folio_client.folio_post(url, payload=body)
|
|
527
579
|
self.num_posted += 1
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
580
|
+
except folioclient.FolioHTTPError as fhe:
|
|
581
|
+
if fhe.response.status_code == 422:
|
|
582
|
+
self.num_failures += 1
|
|
583
|
+
error_msg = json.loads(fhe.response.text)["errors"][0]["message"]
|
|
584
|
+
logging.error(
|
|
585
|
+
"Row %s\tHTTP %s\t %s", num_records, fhe.response.status_code, error_msg
|
|
586
|
+
)
|
|
587
|
+
if (
|
|
588
|
+
"id value already exists"
|
|
589
|
+
not in json.loads(fhe.response.text)["errors"][0]["message"]
|
|
590
|
+
):
|
|
591
|
+
failed_recs_file.write(row)
|
|
592
|
+
else:
|
|
593
|
+
self.num_failures += 1
|
|
594
|
+
logging.error(
|
|
595
|
+
"Row %s\tHTTP %s\t%s", num_records, fhe.response.status_code, fhe.response.text
|
|
596
|
+
)
|
|
533
597
|
failed_recs_file.write(row)
|
|
534
|
-
else:
|
|
535
|
-
self.num_failures += 1
|
|
536
|
-
logging.error("Row %s\tHTTP %s\t%s", num_records, response.status_code, response.text)
|
|
537
|
-
failed_recs_file.write(row)
|
|
538
598
|
if num_records % 50 == 0:
|
|
539
599
|
logging.info(
|
|
540
600
|
"%s records posted successfully. %s failed",
|
|
@@ -563,52 +623,52 @@ class BatchPoster(MigrationTaskBase):
|
|
|
563
623
|
object_types.update(task_configuration.extradata_endpoints)
|
|
564
624
|
if object_name == "instructor":
|
|
565
625
|
instructor = json.loads(string_object)
|
|
566
|
-
return f
|
|
626
|
+
return f"coursereserves/courselistings/{instructor['courseListingId']}/instructors"
|
|
567
627
|
|
|
568
628
|
if object_name == "interfaceCredential":
|
|
569
629
|
credential = json.loads(string_object)
|
|
570
|
-
return f
|
|
630
|
+
return f"organizations-storage/interfaces/{credential['interfaceId']}/credentials"
|
|
571
631
|
|
|
572
632
|
return object_types[object_name]
|
|
573
633
|
|
|
574
634
|
def post_single_records(self, row: str, num_records: int, failed_recs_file):
|
|
575
635
|
if self.api_info["is_batch"]:
|
|
576
636
|
raise TypeError("This record type supports batch processing, use post_batch method")
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
if response.status_code == 201:
|
|
637
|
+
url = self.api_info.get("api_endpoint")
|
|
638
|
+
try:
|
|
639
|
+
_ = self.folio_client.folio_post(url, payload=row)
|
|
581
640
|
self.num_posted += 1
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
641
|
+
except folioclient.FolioHTTPError as fhe:
|
|
642
|
+
if fhe.response.status_code == 422:
|
|
643
|
+
self.num_failures += 1
|
|
644
|
+
error_msg = json.loads(fhe.response.text)["errors"][0]["message"]
|
|
645
|
+
logging.error(
|
|
646
|
+
"Row %s\tHTTP %s\t %s", num_records, fhe.response.status_code, error_msg
|
|
647
|
+
)
|
|
648
|
+
if (
|
|
649
|
+
"id value already exists"
|
|
650
|
+
not in json.loads(fhe.response.text)["errors"][0]["message"]
|
|
651
|
+
):
|
|
652
|
+
failed_recs_file.write(row)
|
|
653
|
+
else:
|
|
654
|
+
self.num_failures += 1
|
|
655
|
+
logging.error(
|
|
656
|
+
"Row %s\tHTTP %s\t%s",
|
|
657
|
+
num_records,
|
|
658
|
+
fhe.response.status_code,
|
|
659
|
+
fhe.response.text,
|
|
660
|
+
)
|
|
587
661
|
failed_recs_file.write(row)
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
"%s records posted successfully. %s failed",
|
|
595
|
-
self.num_posted,
|
|
596
|
-
self.num_failures,
|
|
597
|
-
)
|
|
598
|
-
|
|
599
|
-
def post_objects(self, url, body):
|
|
600
|
-
if self.http_client and not self.http_client.is_closed:
|
|
601
|
-
return self.http_client.post(
|
|
602
|
-
url, data=body.encode("utf-8"), headers=self.folio_client.okapi_headers
|
|
603
|
-
)
|
|
604
|
-
else:
|
|
605
|
-
return httpx.post(
|
|
606
|
-
url, headers=self.okapi_headers, data=body.encode("utf-8"), timeout=None
|
|
607
|
-
)
|
|
662
|
+
if num_records % 50 == 0:
|
|
663
|
+
logging.info(
|
|
664
|
+
"%s records posted successfully. %s failed",
|
|
665
|
+
self.num_posted,
|
|
666
|
+
self.num_failures,
|
|
667
|
+
)
|
|
608
668
|
|
|
609
669
|
def handle_generic_exception(self, exception, last_row, batch, num_records, failed_recs_file):
|
|
610
670
|
logging.error("%s", exception)
|
|
611
|
-
self.migration_report.add("Details",
|
|
671
|
+
self.migration_report.add("Details", i18n_t("Generic exceptions (see log for details)"))
|
|
612
672
|
# logging.error("Failed row: %s", last_row)
|
|
613
673
|
self.failed_batches += 1
|
|
614
674
|
self.num_failures += len(batch)
|
|
@@ -621,7 +681,7 @@ class BatchPoster(MigrationTaskBase):
|
|
|
621
681
|
sys.exit(1)
|
|
622
682
|
|
|
623
683
|
def handle_unicode_error(self, unicode_error, last_row):
|
|
624
|
-
self.migration_report.add("Details",
|
|
684
|
+
self.migration_report.add("Details", i18n_t("Encoding errors"))
|
|
625
685
|
logging.info("=========ERROR==============")
|
|
626
686
|
logging.info(
|
|
627
687
|
"%s Posting failed. Encoding error reading file",
|
|
@@ -636,9 +696,9 @@ class BatchPoster(MigrationTaskBase):
|
|
|
636
696
|
traceback.logging.info_exc() # type: ignore
|
|
637
697
|
logging.info("=======================")
|
|
638
698
|
|
|
639
|
-
def post_batch(self, batch, failed_recs_file, num_records
|
|
699
|
+
def post_batch(self, batch, failed_recs_file, num_records):
|
|
640
700
|
if self.query_params.get("upsert", False) and self.api_info.get("query_endpoint", ""):
|
|
641
|
-
self.set_version(batch, self.api_info[
|
|
701
|
+
self.set_version(batch, self.api_info["query_endpoint"], self.api_info["object_name"])
|
|
642
702
|
response = self.do_post(batch)
|
|
643
703
|
if response.status_code == 401:
|
|
644
704
|
logging.error("Authorization failed (%s). Fetching new auth token...", response.text)
|
|
@@ -707,24 +767,6 @@ class BatchPoster(MigrationTaskBase):
|
|
|
707
767
|
# Likely a json parsing error
|
|
708
768
|
logging.error(response.text)
|
|
709
769
|
raise TransformationProcessError("", "HTTP 400. Something is wrong. Quitting")
|
|
710
|
-
elif self.task_configuration.object_type == "SRS" and response.status_code >= 500:
|
|
711
|
-
logging.info(
|
|
712
|
-
"Post failed. Size: %s Waiting 30s until reposting. Number of tries: %s of 5",
|
|
713
|
-
get_req_size(response),
|
|
714
|
-
recursion_depth,
|
|
715
|
-
)
|
|
716
|
-
logging.info(response.text)
|
|
717
|
-
time.sleep(30)
|
|
718
|
-
if recursion_depth > 4:
|
|
719
|
-
raise TransformationRecordFailedError(
|
|
720
|
-
"",
|
|
721
|
-
f"HTTP {response.status_code}\t"
|
|
722
|
-
f"Request size: {get_req_size(response)}"
|
|
723
|
-
f"{datetime.now(timezone.utc).isoformat()}\n",
|
|
724
|
-
response.text,
|
|
725
|
-
)
|
|
726
|
-
else:
|
|
727
|
-
self.post_batch(batch, failed_recs_file, num_records, recursion_depth + 1)
|
|
728
770
|
elif (
|
|
729
771
|
response.status_code == 413 and "DB_ALLOW_SUPPRESS_OPTIMISTIC_LOCKING" in response.text
|
|
730
772
|
):
|
|
@@ -749,57 +791,42 @@ class BatchPoster(MigrationTaskBase):
|
|
|
749
791
|
)
|
|
750
792
|
|
|
751
793
|
def do_post(self, batch):
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
return self.http_client.post(
|
|
762
|
-
url,
|
|
763
|
-
json=payload,
|
|
764
|
-
headers=self.folio_client.okapi_headers,
|
|
765
|
-
params=self.query_params
|
|
766
|
-
)
|
|
767
|
-
else:
|
|
768
|
-
return httpx.post(
|
|
794
|
+
with self.folio_client.get_folio_http_client() as http_client:
|
|
795
|
+
url = self.api_info["api_endpoint"]
|
|
796
|
+
if self.api_info["object_name"] == "users":
|
|
797
|
+
payload = {self.api_info["object_name"]: list(batch), "totalRecords": len(batch)}
|
|
798
|
+
elif self.api_info["total_records"]:
|
|
799
|
+
payload = {"records": list(batch), "totalRecords": len(batch)}
|
|
800
|
+
else:
|
|
801
|
+
payload = {self.api_info["object_name"]: batch}
|
|
802
|
+
return http_client.post(
|
|
769
803
|
url,
|
|
770
|
-
headers=self.okapi_headers,
|
|
771
804
|
json=payload,
|
|
772
805
|
params=self.query_params,
|
|
773
|
-
|
|
806
|
+
)
|
|
774
807
|
|
|
775
808
|
def get_current_record_count_in_folio(self):
|
|
776
809
|
if "query_endpoint" in self.api_info:
|
|
777
|
-
url =
|
|
810
|
+
url = self.api_info["query_endpoint"]
|
|
778
811
|
query_params = {"query": "cql.allRecords=1", "limit": 0}
|
|
779
|
-
if self.http_client and not self.http_client.is_closed:
|
|
780
|
-
res = self.http_client.get(
|
|
781
|
-
url,
|
|
782
|
-
headers=self.folio_client.okapi_headers,
|
|
783
|
-
params=query_params
|
|
784
|
-
)
|
|
785
|
-
else:
|
|
786
|
-
res = httpx.get(url, headers=self.okapi_headers, params=query_params, timeout=None)
|
|
787
812
|
try:
|
|
788
|
-
res.
|
|
789
|
-
return res
|
|
790
|
-
except
|
|
791
|
-
logging.error(
|
|
813
|
+
res = self.folio_client.folio_get(url, query_params=query_params)
|
|
814
|
+
return res["totalRecords"]
|
|
815
|
+
except folioclient.FolioHTTPError as fhe:
|
|
816
|
+
logging.error(
|
|
817
|
+
"Failed to get current record count. HTTP %s", fhe.response.status_code
|
|
818
|
+
)
|
|
792
819
|
return 0
|
|
793
820
|
except KeyError:
|
|
794
821
|
logging.error(
|
|
795
822
|
"Failed to get current record count. "
|
|
796
|
-
f"No 'totalRecords' in response: {
|
|
823
|
+
f"No 'totalRecords' in response: {json.dumps(res, indent=2)}"
|
|
797
824
|
)
|
|
798
825
|
return 0
|
|
799
826
|
else:
|
|
800
827
|
raise ValueError(
|
|
801
828
|
"No 'query_endpoint' available for %s. Cannot get current record count.",
|
|
802
|
-
self.task_configuration.object_type
|
|
829
|
+
self.task_configuration.object_type,
|
|
803
830
|
)
|
|
804
831
|
|
|
805
832
|
def get_starting_record_count(self):
|
|
@@ -809,7 +836,7 @@ class BatchPoster(MigrationTaskBase):
|
|
|
809
836
|
else:
|
|
810
837
|
logging.info(
|
|
811
838
|
"No query_endpoint available for %s. Cannot get starting record count.",
|
|
812
|
-
self.task_configuration.object_type
|
|
839
|
+
self.task_configuration.object_type,
|
|
813
840
|
)
|
|
814
841
|
|
|
815
842
|
def get_finished_record_count(self):
|
|
@@ -819,14 +846,12 @@ class BatchPoster(MigrationTaskBase):
|
|
|
819
846
|
else:
|
|
820
847
|
logging.info(
|
|
821
848
|
"No query_endpoint available for %s. Cannot get ending record count.",
|
|
822
|
-
self.task_configuration.object_type
|
|
849
|
+
self.task_configuration.object_type,
|
|
823
850
|
)
|
|
824
851
|
|
|
825
852
|
def wrap_up(self):
|
|
826
853
|
logging.info("Done. Wrapping up")
|
|
827
854
|
self.extradata_writer.flush()
|
|
828
|
-
if self.task_configuration.object_type == "SRS":
|
|
829
|
-
self.commit_snapshot()
|
|
830
855
|
if self.task_configuration.object_type != "Extradata":
|
|
831
856
|
logging.info(
|
|
832
857
|
(
|
|
@@ -842,7 +867,7 @@ class BatchPoster(MigrationTaskBase):
|
|
|
842
867
|
if self.starting_record_count_in_folio:
|
|
843
868
|
self.get_finished_record_count()
|
|
844
869
|
total_on_server = (
|
|
845
|
-
|
|
870
|
+
self.finished_record_count_in_folio - self.starting_record_count_in_folio
|
|
846
871
|
)
|
|
847
872
|
discrepancy = self.processed - self.num_failures - total_on_server
|
|
848
873
|
if discrepancy != 0:
|
|
@@ -893,9 +918,8 @@ class BatchPoster(MigrationTaskBase):
|
|
|
893
918
|
temp_start = self.start_datetime
|
|
894
919
|
self.task_configuration.rerun_failed_records = False
|
|
895
920
|
self.__init__(
|
|
896
|
-
self.task_configuration,
|
|
897
|
-
|
|
898
|
-
self.folio_client)
|
|
921
|
+
self.task_configuration, self.library_configuration, self.folio_client
|
|
922
|
+
)
|
|
899
923
|
self.performing_rerun = True
|
|
900
924
|
self.migration_report = temp_report
|
|
901
925
|
self.start_datetime = temp_start
|
|
@@ -914,69 +938,6 @@ class BatchPoster(MigrationTaskBase):
|
|
|
914
938
|
str(self.folder_structure.failed_recs_path),
|
|
915
939
|
)
|
|
916
940
|
|
|
917
|
-
def create_snapshot(self):
|
|
918
|
-
snapshot = {
|
|
919
|
-
"jobExecutionId": self.snapshot_id,
|
|
920
|
-
"status": "PARSING_IN_PROGRESS",
|
|
921
|
-
"processingStartedDate": datetime.now(timezone.utc).isoformat(timespec="milliseconds"),
|
|
922
|
-
}
|
|
923
|
-
try:
|
|
924
|
-
url = f"{self.folio_client.gateway_url}/source-storage/snapshots"
|
|
925
|
-
if self.http_client and not self.http_client.is_closed:
|
|
926
|
-
res = self.http_client.post(
|
|
927
|
-
url, json=snapshot, headers=self.folio_client.okapi_headers
|
|
928
|
-
)
|
|
929
|
-
else:
|
|
930
|
-
res = httpx.post(url, headers=self.okapi_headers, json=snapshot, timeout=None)
|
|
931
|
-
res.raise_for_status()
|
|
932
|
-
logging.info("Posted Snapshot to FOLIO: %s", json.dumps(snapshot, indent=4))
|
|
933
|
-
get_url = (
|
|
934
|
-
f"{self.folio_client.gateway_url}/source-storage/snapshots/{self.snapshot_id}"
|
|
935
|
-
)
|
|
936
|
-
got = False
|
|
937
|
-
while not got:
|
|
938
|
-
logging.info("Sleeping while waiting for the snapshot to get created")
|
|
939
|
-
time.sleep(5)
|
|
940
|
-
if self.http_client and not self.http_client.is_closed:
|
|
941
|
-
res = self.http_client.get(get_url, headers=self.folio_client.okapi_headers)
|
|
942
|
-
else:
|
|
943
|
-
res = httpx.get(get_url, headers=self.okapi_headers, timeout=None)
|
|
944
|
-
if res.status_code == 200:
|
|
945
|
-
got = True
|
|
946
|
-
else:
|
|
947
|
-
logging.info(res.status_code)
|
|
948
|
-
except httpx.HTTPStatusError as exc:
|
|
949
|
-
logging.exception("HTTP error occurred while posting the snapshot: %s", exc)
|
|
950
|
-
sys.exit(1)
|
|
951
|
-
except Exception as exc:
|
|
952
|
-
logging.exception("Could not post the snapshot: %s", exc)
|
|
953
|
-
sys.exit(1)
|
|
954
|
-
|
|
955
|
-
def commit_snapshot(self):
|
|
956
|
-
snapshot = {"jobExecutionId": self.snapshot_id, "status": "COMMITTED"}
|
|
957
|
-
try:
|
|
958
|
-
url = f"{self.folio_client.gateway_url}/source-storage/snapshots/{self.snapshot_id}"
|
|
959
|
-
if self.http_client and not self.http_client.is_closed:
|
|
960
|
-
res = self.http_client.put(
|
|
961
|
-
url, json=snapshot, headers=self.folio_client.okapi_headers
|
|
962
|
-
)
|
|
963
|
-
else:
|
|
964
|
-
res = httpx.put(url, headers=self.okapi_headers, json=snapshot, timeout=None)
|
|
965
|
-
res.raise_for_status()
|
|
966
|
-
logging.info("Posted Committed snapshot to FOLIO: %s", json.dumps(snapshot, indent=4))
|
|
967
|
-
except httpx.HTTPStatusError as exc:
|
|
968
|
-
logging.exception("HTTP error occurred while posting the snapshot: %s", exc)
|
|
969
|
-
sys.exit(1)
|
|
970
|
-
except Exception as exc:
|
|
971
|
-
logging.exception(
|
|
972
|
-
"Could not commit snapshot with id %s. Post this to /source-storage/snapshots/%s:",
|
|
973
|
-
self.snapshot_id,
|
|
974
|
-
self.snapshot_id,
|
|
975
|
-
exc,
|
|
976
|
-
)
|
|
977
|
-
logging.info("%s", json.dumps(snapshot, indent=4))
|
|
978
|
-
sys.exit(1)
|
|
979
|
-
|
|
980
941
|
|
|
981
942
|
def get_api_info(object_type: str, use_safe: bool = True):
|
|
982
943
|
choices = {
|
|
@@ -1038,22 +999,6 @@ def get_api_info(object_type: str, use_safe: bool = True):
|
|
|
1038
999
|
"addSnapshotId": False,
|
|
1039
1000
|
"supports_upsert": True,
|
|
1040
1001
|
},
|
|
1041
|
-
"Authorities": {
|
|
1042
|
-
"object_name": "",
|
|
1043
|
-
"api_endpoint": "/authority-storage/authorities",
|
|
1044
|
-
"is_batch": False,
|
|
1045
|
-
"total_records": False,
|
|
1046
|
-
"addSnapshotId": False,
|
|
1047
|
-
"supports_upsert": False,
|
|
1048
|
-
},
|
|
1049
|
-
"SRS": {
|
|
1050
|
-
"object_name": "records",
|
|
1051
|
-
"api_endpoint": "/source-storage/batch/records",
|
|
1052
|
-
"is_batch": True,
|
|
1053
|
-
"total_records": True,
|
|
1054
|
-
"addSnapshotId": True,
|
|
1055
|
-
"supports_upsert": False,
|
|
1056
|
-
},
|
|
1057
1002
|
"Users": {
|
|
1058
1003
|
"object_name": "users",
|
|
1059
1004
|
"api_endpoint": "/user-import",
|
|
@@ -1085,27 +1030,12 @@ def get_api_info(object_type: str, use_safe: bool = True):
|
|
|
1085
1030
|
except KeyError:
|
|
1086
1031
|
key_string = ", ".join(choices.keys())
|
|
1087
1032
|
logging.error(
|
|
1088
|
-
f"Wrong type. Only one of {key_string} are allowed, "
|
|
1089
|
-
f"received {object_type=} instead"
|
|
1033
|
+
f"Wrong type. Only one of {key_string} are allowed, received {object_type=} instead"
|
|
1090
1034
|
)
|
|
1091
1035
|
logging.error("Halting")
|
|
1092
1036
|
sys.exit(1)
|
|
1093
1037
|
|
|
1094
1038
|
|
|
1095
|
-
def chunks(records, number_of_chunks):
|
|
1096
|
-
"""Yield successive n-sized chunks from lst.
|
|
1097
|
-
|
|
1098
|
-
Args:
|
|
1099
|
-
records (_type_): _description_
|
|
1100
|
-
number_of_chunks (_type_): _description_
|
|
1101
|
-
|
|
1102
|
-
Yields:
|
|
1103
|
-
_type_: _description_
|
|
1104
|
-
"""
|
|
1105
|
-
for i in range(0, len(records), number_of_chunks):
|
|
1106
|
-
yield records[i: i + number_of_chunks]
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
1039
|
def get_human_readable(size, precision=2):
|
|
1110
1040
|
suffixes = ["B", "KB", "MB", "GB", "TB"]
|
|
1111
1041
|
suffix_index = 0
|
|
@@ -1115,22 +1045,23 @@ def get_human_readable(size, precision=2):
|
|
|
1115
1045
|
return "%.*f%s" % (precision, size, suffixes[suffix_index])
|
|
1116
1046
|
|
|
1117
1047
|
|
|
1118
|
-
def get_req_size(response:
|
|
1048
|
+
def get_req_size(response: "Response"):
|
|
1119
1049
|
size = response.request.method
|
|
1120
1050
|
size += str(response.request.url)
|
|
1121
1051
|
size += "\r\n".join(f"{k}{v}" for k, v in response.request.headers.items())
|
|
1122
1052
|
size += response.request.content.decode("utf-8") or ""
|
|
1123
1053
|
return get_human_readable(len(size.encode("utf-8")))
|
|
1124
1054
|
|
|
1055
|
+
|
|
1125
1056
|
def parse_path(path):
|
|
1126
1057
|
"""
|
|
1127
1058
|
Parses a path like 'foo.bar[0].baz' into ['foo', 'bar', 0, 'baz']
|
|
1128
1059
|
"""
|
|
1129
1060
|
tokens = []
|
|
1130
1061
|
# Split by dot, then extract indices
|
|
1131
|
-
for part in path.split(
|
|
1062
|
+
for part in path.split("."):
|
|
1132
1063
|
# Find all [index] parts
|
|
1133
|
-
matches = re.findall(r
|
|
1064
|
+
matches = re.findall(r"([^\[\]]+)|\[(\d+)\]", part)
|
|
1134
1065
|
for name, idx in matches:
|
|
1135
1066
|
if name:
|
|
1136
1067
|
tokens.append(name)
|
|
@@ -1138,12 +1069,14 @@ def parse_path(path):
|
|
|
1138
1069
|
tokens.append(int(idx))
|
|
1139
1070
|
return tokens
|
|
1140
1071
|
|
|
1072
|
+
|
|
1141
1073
|
def get_by_path(data, path):
|
|
1142
1074
|
keys = parse_path(path)
|
|
1143
1075
|
for key in keys:
|
|
1144
1076
|
data = data[key]
|
|
1145
1077
|
return data
|
|
1146
1078
|
|
|
1079
|
+
|
|
1147
1080
|
def set_by_path(data, path, value):
|
|
1148
1081
|
keys = parse_path(path)
|
|
1149
1082
|
for i, key in enumerate(keys[:-1]):
|
|
@@ -1164,6 +1097,7 @@ def set_by_path(data, path, value):
|
|
|
1164
1097
|
else:
|
|
1165
1098
|
data[last_key] = value
|
|
1166
1099
|
|
|
1100
|
+
|
|
1167
1101
|
def extract_paths(data, paths):
|
|
1168
1102
|
result = {}
|
|
1169
1103
|
for path in paths:
|
|
@@ -1174,6 +1108,7 @@ def extract_paths(data, paths):
|
|
|
1174
1108
|
continue
|
|
1175
1109
|
return result
|
|
1176
1110
|
|
|
1111
|
+
|
|
1177
1112
|
def deep_update(target, patch):
|
|
1178
1113
|
"""
|
|
1179
1114
|
Recursively update target dict/list with values from patch dict/list.
|
|
@@ -1181,11 +1116,7 @@ def deep_update(target, patch):
|
|
|
1181
1116
|
"""
|
|
1182
1117
|
if isinstance(patch, dict):
|
|
1183
1118
|
for k, v in patch.items():
|
|
1184
|
-
if (
|
|
1185
|
-
k in target
|
|
1186
|
-
and isinstance(target[k], (dict, list))
|
|
1187
|
-
and isinstance(v, (dict, list))
|
|
1188
|
-
):
|
|
1119
|
+
if k in target and isinstance(target[k], (dict, list)) and isinstance(v, (dict, list)):
|
|
1189
1120
|
deep_update(target[k], v)
|
|
1190
1121
|
else:
|
|
1191
1122
|
target[k] = v
|