folio-migration-tools 1.9.10__py3-none-any.whl → 1.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. folio_migration_tools/__init__.py +3 -4
  2. folio_migration_tools/__main__.py +53 -31
  3. folio_migration_tools/circulation_helper.py +118 -108
  4. folio_migration_tools/custom_dict.py +2 -2
  5. folio_migration_tools/custom_exceptions.py +4 -5
  6. folio_migration_tools/folder_structure.py +17 -7
  7. folio_migration_tools/helper.py +8 -7
  8. folio_migration_tools/holdings_helper.py +4 -3
  9. folio_migration_tools/i18n_cache.py +79 -0
  10. folio_migration_tools/library_configuration.py +77 -37
  11. folio_migration_tools/mapper_base.py +45 -31
  12. folio_migration_tools/mapping_file_transformation/courses_mapper.py +1 -1
  13. folio_migration_tools/mapping_file_transformation/holdings_mapper.py +7 -3
  14. folio_migration_tools/mapping_file_transformation/item_mapper.py +13 -26
  15. folio_migration_tools/mapping_file_transformation/manual_fee_fines_mapper.py +1 -2
  16. folio_migration_tools/mapping_file_transformation/mapping_file_mapper_base.py +13 -11
  17. folio_migration_tools/mapping_file_transformation/order_mapper.py +6 -5
  18. folio_migration_tools/mapping_file_transformation/organization_mapper.py +3 -3
  19. folio_migration_tools/mapping_file_transformation/user_mapper.py +47 -28
  20. folio_migration_tools/marc_rules_transformation/conditions.py +82 -97
  21. folio_migration_tools/marc_rules_transformation/holdings_statementsparser.py +13 -5
  22. folio_migration_tools/marc_rules_transformation/hrid_handler.py +3 -2
  23. folio_migration_tools/marc_rules_transformation/marc_file_processor.py +26 -24
  24. folio_migration_tools/marc_rules_transformation/rules_mapper_base.py +56 -51
  25. folio_migration_tools/marc_rules_transformation/rules_mapper_bibs.py +28 -17
  26. folio_migration_tools/marc_rules_transformation/rules_mapper_holdings.py +68 -37
  27. folio_migration_tools/migration_report.py +18 -7
  28. folio_migration_tools/migration_tasks/batch_poster.py +285 -354
  29. folio_migration_tools/migration_tasks/bibs_transformer.py +14 -9
  30. folio_migration_tools/migration_tasks/courses_migrator.py +2 -3
  31. folio_migration_tools/migration_tasks/holdings_csv_transformer.py +23 -24
  32. folio_migration_tools/migration_tasks/holdings_marc_transformer.py +14 -24
  33. folio_migration_tools/migration_tasks/items_transformer.py +23 -34
  34. folio_migration_tools/migration_tasks/loans_migrator.py +67 -144
  35. folio_migration_tools/migration_tasks/manual_fee_fines_transformer.py +3 -3
  36. folio_migration_tools/migration_tasks/migration_task_base.py +47 -60
  37. folio_migration_tools/migration_tasks/orders_transformer.py +25 -42
  38. folio_migration_tools/migration_tasks/organization_transformer.py +9 -18
  39. folio_migration_tools/migration_tasks/requests_migrator.py +21 -24
  40. folio_migration_tools/migration_tasks/reserves_migrator.py +6 -5
  41. folio_migration_tools/migration_tasks/user_transformer.py +25 -20
  42. folio_migration_tools/task_configuration.py +6 -7
  43. folio_migration_tools/transaction_migration/legacy_loan.py +15 -27
  44. folio_migration_tools/transaction_migration/legacy_request.py +1 -1
  45. folio_migration_tools/translations/en.json +0 -7
  46. {folio_migration_tools-1.9.10.dist-info → folio_migration_tools-1.10.0.dist-info}/METADATA +19 -28
  47. folio_migration_tools-1.10.0.dist-info/RECORD +63 -0
  48. folio_migration_tools-1.10.0.dist-info/WHEEL +4 -0
  49. folio_migration_tools-1.10.0.dist-info/entry_points.txt +3 -0
  50. folio_migration_tools/marc_rules_transformation/rules_mapper_authorities.py +0 -241
  51. folio_migration_tools/migration_tasks/authority_transformer.py +0 -119
  52. folio_migration_tools/test_infrastructure/__init__.py +0 -0
  53. folio_migration_tools/test_infrastructure/mocked_classes.py +0 -406
  54. folio_migration_tools-1.9.10.dist-info/RECORD +0 -67
  55. folio_migration_tools-1.9.10.dist-info/WHEEL +0 -4
  56. folio_migration_tools-1.9.10.dist-info/entry_points.txt +0 -3
  57. folio_migration_tools-1.9.10.dist-info/licenses/LICENSE +0 -21
@@ -4,14 +4,15 @@ import json
4
4
  import logging
5
5
  import re
6
6
  import sys
7
- import time
8
7
  import traceback
9
8
  from datetime import datetime, timezone
10
- from typing import Annotated, List, Optional
9
+ from typing import TYPE_CHECKING, Annotated, List, Optional
11
10
  from uuid import uuid4
12
11
 
13
- import httpx
14
- import i18n
12
+ import folioclient
13
+
14
+ if TYPE_CHECKING:
15
+ from httpx import Response
15
16
  from folio_uuid.folio_namespaces import FOLIONamespaces
16
17
  from pydantic import Field
17
18
 
@@ -19,6 +20,7 @@ from folio_migration_tools.custom_exceptions import (
19
20
  TransformationProcessError,
20
21
  TransformationRecordFailedError,
21
22
  )
23
+ from folio_migration_tools.i18n_cache import i18n_t
22
24
  from folio_migration_tools.library_configuration import (
23
25
  FileDefinition,
24
26
  LibraryConfiguration,
@@ -73,7 +75,7 @@ class BatchPoster(MigrationTaskBase):
73
75
  description=(
74
76
  "The type of object being migrated"
75
77
  "Examples of possible values: "
76
- "'Extradata', 'SRS', Instances', 'Holdings', 'Items'"
78
+ "'Extradata', 'Instances', 'Holdings', 'Items'"
77
79
  ),
78
80
  ),
79
81
  ]
@@ -182,19 +184,27 @@ class BatchPoster(MigrationTaskBase):
182
184
  ),
183
185
  ),
184
186
  ] = True
185
- patch_existing_records: Annotated[bool, Field(
186
- title="Patch existing records",
187
- description=(
188
- "Toggles whether or not to patch existing records "
189
- "during the upsert process. Defaults to False"
187
+ patch_existing_records: Annotated[
188
+ bool,
189
+ Field(
190
+ title="Patch existing records",
191
+ description=(
192
+ "Toggles whether or not to patch existing records "
193
+ "during the upsert process. Defaults to False"
194
+ ),
190
195
  ),
191
- )] = False
192
- patch_paths: Annotated[List[str], Field(
193
- title="Patch paths",
194
- description=(
195
- "A list of fields in JSON Path notation to patch during the upsert process (leave off the $). If empty, all fields will be patched. Examples: ['statisticalCodeIds', 'administrativeNotes', 'instanceStatusId']"
196
+ ] = False
197
+ patch_paths: Annotated[
198
+ List[str],
199
+ Field(
200
+ title="Patch paths",
201
+ description=(
202
+ "A list of fields in JSON Path notation to patch during the upsert process "
203
+ "(leave off the $). If empty, all fields will be patched. Examples: "
204
+ "['statisticalCodeIds', 'administrativeNotes', 'instanceStatusId']"
205
+ ),
196
206
  ),
197
- )] = []
207
+ ] = []
198
208
 
199
209
  task_configuration: TaskConfiguration
200
210
 
@@ -223,7 +233,8 @@ class BatchPoster(MigrationTaskBase):
223
233
  self.query_params["upsert"] = self.task_configuration.upsert
224
234
  elif self.task_configuration.upsert and not self.api_info["supports_upsert"]:
225
235
  logging.info(
226
- "Upsert is not supported for this object type. Query parameter will not be set.")
236
+ "Upsert is not supported for this object type. Query parameter will not be set."
237
+ )
227
238
  self.snapshot_id = str(uuid4())
228
239
  self.failed_objects: list = []
229
240
  self.batch_size = self.task_configuration.batch_size
@@ -236,97 +247,87 @@ class BatchPoster(MigrationTaskBase):
236
247
  self.failed_fields: set = set()
237
248
  self.num_failures = 0
238
249
  self.num_posted = 0
239
- self.okapi_headers = self.folio_client.okapi_headers
240
- self.http_client = None
241
250
  self.starting_record_count_in_folio: Optional[int] = None
242
251
  self.finished_record_count_in_folio: Optional[int] = None
243
252
 
244
- def do_work(self):
245
- with self.folio_client.get_folio_http_client() as httpx_client:
246
- self.http_client = httpx_client
247
- with open(
248
- self.folder_structure.failed_recs_path, "w", encoding='utf-8'
249
- ) as failed_recs_file:
250
- self.get_starting_record_count()
251
- try:
252
- batch = []
253
- if self.task_configuration.object_type == "SRS":
254
- self.create_snapshot()
255
- for idx, file_def in enumerate(self.task_configuration.files):
256
- path = self.folder_structure.results_folder / file_def.file_name
257
- with open(path) as rows:
258
- logging.info("Running %s", path)
259
- last_row = ""
260
- for self.processed, row in enumerate(rows, start=1):
261
- last_row = row
262
- if row.strip():
263
- try:
264
- if self.task_configuration.object_type == "Extradata":
265
- self.post_extra_data(
266
- row, self.processed, failed_recs_file
267
- )
268
- elif not self.api_info["is_batch"]:
269
- self.post_single_records(
270
- row, self.processed, failed_recs_file
271
- )
272
- else:
273
- batch = self.post_record_batch(
274
- batch, failed_recs_file, row
275
- )
276
- except UnicodeDecodeError as unicode_error:
277
- self.handle_unicode_error(unicode_error, last_row)
278
- except TransformationProcessError as tpe:
279
- self.handle_generic_exception(
280
- tpe,
281
- last_row,
282
- batch,
283
- self.processed,
284
- failed_recs_file,
253
+ def do_work(self): # noqa: C901
254
+ with open(
255
+ self.folder_structure.failed_recs_path, "w", encoding="utf-8"
256
+ ) as failed_recs_file:
257
+ self.get_starting_record_count()
258
+ try:
259
+ batch = []
260
+ for idx, file_def in enumerate(self.task_configuration.files): # noqa: B007
261
+ path = self.folder_structure.results_folder / file_def.file_name
262
+ with open(path) as rows:
263
+ logging.info("Running %s", path)
264
+ last_row = ""
265
+ for self.processed, row in enumerate(rows, start=1):
266
+ last_row = row
267
+ if row.strip():
268
+ try:
269
+ if self.task_configuration.object_type == "Extradata":
270
+ self.post_extra_data(row, self.processed, failed_recs_file)
271
+ elif not self.api_info["is_batch"]:
272
+ self.post_single_records(
273
+ row, self.processed, failed_recs_file
285
274
  )
286
- batch = []
287
- raise
288
- except TransformationRecordFailedError as exception:
289
- self.handle_generic_exception(
290
- exception,
291
- last_row,
292
- batch,
293
- self.processed,
294
- failed_recs_file,
275
+ else:
276
+ batch = self.post_record_batch(
277
+ batch, failed_recs_file, row
295
278
  )
296
- batch = []
297
- except (FileNotFoundError, PermissionError) as ose:
298
- logging.error("Error reading file: %s", ose)
299
-
300
- except Exception as ee:
301
- if "idx" in locals() and self.task_configuration.files[idx:]:
302
- for file_def in self.task_configuration.files[idx:]:
303
- path = self.folder_structure.results_folder / file_def.file_name
304
- try:
305
- with open(path, "r") as failed_file:
306
- failed_file.seek(self.processed)
307
- failed_recs_file.write(failed_file.read())
308
- self.processed = 0
309
- except (FileNotFoundError, PermissionError) as ose:
310
- logging.error("Error reading file: %s", ose)
311
- raise ee
312
- finally:
313
- if self.task_configuration.object_type != "Extradata" and any(batch):
279
+ except UnicodeDecodeError as unicode_error:
280
+ self.handle_unicode_error(unicode_error, last_row)
281
+ except TransformationProcessError as tpe:
282
+ self.handle_generic_exception(
283
+ tpe,
284
+ last_row,
285
+ batch,
286
+ self.processed,
287
+ failed_recs_file,
288
+ )
289
+ batch = []
290
+ raise
291
+ except TransformationRecordFailedError as exception:
292
+ self.handle_generic_exception(
293
+ exception,
294
+ last_row,
295
+ batch,
296
+ self.processed,
297
+ failed_recs_file,
298
+ )
299
+ batch = []
300
+ except (FileNotFoundError, PermissionError) as ose:
301
+ logging.error("Error reading file: %s", ose)
302
+
303
+ except Exception as ee:
304
+ if "idx" in locals() and self.task_configuration.files[idx:]:
305
+ for file_def in self.task_configuration.files[idx:]:
306
+ path = self.folder_structure.results_folder / file_def.file_name
314
307
  try:
315
- self.post_batch(batch, failed_recs_file, self.processed)
316
- except Exception as exception:
317
- self.handle_generic_exception(
318
- exception, last_row, batch, self.processed, failed_recs_file
319
- )
320
- logging.info("Done posting %s records. ", self.processed)
321
- if self.task_configuration.object_type == "SRS":
322
- self.commit_snapshot()
308
+ with open(path, "r") as failed_file:
309
+ failed_file.seek(self.processed)
310
+ failed_recs_file.write(failed_file.read())
311
+ self.processed = 0
312
+ except (FileNotFoundError, PermissionError) as ose:
313
+ logging.error("Error reading file: %s", ose)
314
+ raise ee
315
+ finally:
316
+ if self.task_configuration.object_type != "Extradata" and any(batch):
317
+ try:
318
+ self.post_batch(batch, failed_recs_file, self.processed)
319
+ except Exception as exception:
320
+ self.handle_generic_exception(
321
+ exception, last_row, batch, self.processed, failed_recs_file
322
+ )
323
+ logging.info("Done posting %s records. ", self.processed)
323
324
 
324
325
  @staticmethod
325
326
  def set_consortium_source(json_rec):
326
- if json_rec['source'] == 'MARC':
327
- json_rec['source'] = 'CONSORTIUM-MARC'
328
- elif json_rec['source'] == 'FOLIO':
329
- json_rec['source'] = 'CONSORTIUM-FOLIO'
327
+ if json_rec["source"] == "MARC":
328
+ json_rec["source"] = "CONSORTIUM-MARC"
329
+ elif json_rec["source"] == "FOLIO":
330
+ json_rec["source"] = "CONSORTIUM-FOLIO"
330
331
 
331
332
  def set_version(self, batch, query_api, object_type) -> None:
332
333
  """
@@ -357,27 +358,26 @@ class BatchPoster(MigrationTaskBase):
357
358
  fetch_batch_size = 90
358
359
  fetch_tasks = []
359
360
  existing_records = {}
360
- async with httpx.AsyncClient(base_url=self.folio_client.gateway_url) as client:
361
- for i in range(0, len(batch), fetch_batch_size):
362
- batch_slice = batch[i:i + fetch_batch_size]
363
- fetch_tasks.append(
364
- self.get_with_retry(
365
- client,
366
- query_api,
367
- params={
368
- "query": (
369
- "id==("
370
- f"{' OR '.join([r['id'] for r in batch_slice if 'id' in r])})"
371
- ),
372
- "limit": fetch_batch_size
373
- },
374
- )
361
+
362
+ for i in range(0, len(batch), fetch_batch_size):
363
+ batch_slice = batch[i : i + fetch_batch_size]
364
+ fetch_tasks.append(
365
+ self.get_with_retry(
366
+ query_api,
367
+ params={
368
+ "query": (
369
+ f"id==({' OR '.join([r['id'] for r in batch_slice if 'id' in r])})"
370
+ ),
371
+ "limit": fetch_batch_size,
372
+ },
375
373
  )
374
+ )
375
+
376
+ responses = await asyncio.gather(*fetch_tasks)
376
377
 
377
- responses = await asyncio.gather(*fetch_tasks)
378
+ for response in responses:
379
+ self.collect_existing_records_for_upsert(object_type, response, existing_records)
378
380
 
379
- for response in responses:
380
- self.collect_existing_records_for_upsert(object_type, response, existing_records)
381
381
  for record in batch:
382
382
  if record["id"] in existing_records:
383
383
  self.prepare_record_for_upsert(record, existing_records[record["id"]])
@@ -390,7 +390,7 @@ class BatchPoster(MigrationTaskBase):
390
390
  new_record (dict): The new record to be updated.
391
391
  existing_record (dict): The existing record to patch from.
392
392
  patch_paths (List[str]): List of fields in JSON Path notation (e.g., ['statisticalCodeIds', 'administrativeNotes', 'instanceStatusId']) to patch during the upsert process. If empty, all fields will be patched.
393
- """
393
+ """ # noqa: E501
394
394
  updates = {}
395
395
  updates.update(existing_record)
396
396
  keep_existing = {}
@@ -412,17 +412,19 @@ class BatchPoster(MigrationTaskBase):
412
412
  new_record.update(updates)
413
413
 
414
414
  @staticmethod
415
- def collect_existing_records_for_upsert(object_type: str, response: httpx.Response, existing_records: dict):
416
- if response.status_code == 200:
417
- response_json = response.json()
418
- for record in response_json[object_type]:
419
- existing_records[record["id"]] = record
420
- else:
421
- logging.error(
422
- "Failed to fetch current records. HTTP %s\t%s",
423
- response.status_code,
424
- response.text,
425
- )
415
+ def collect_existing_records_for_upsert(
416
+ object_type: str, response_json: dict, existing_records: dict
417
+ ):
418
+ """
419
+ Collects existing records from API response into existing_records dict.
420
+
421
+ Args:
422
+ object_type: The key in response containing the records array
423
+ response_json: Parsed JSON response from API
424
+ existing_records: Dict to populate with {record_id: record_data}
425
+ """
426
+ for record in response_json.get(object_type, []):
427
+ existing_records[record["id"]] = record
426
428
 
427
429
  def handle_upsert_for_statistical_codes(self, updates: dict, keep_existing: dict):
428
430
  if not self.task_configuration.preserve_statistical_codes:
@@ -458,12 +460,23 @@ class BatchPoster(MigrationTaskBase):
458
460
 
459
461
  def prepare_record_for_upsert(self, new_record: dict, existing_record: dict):
460
462
  if "source" in existing_record and "MARC" in existing_record["source"]:
461
- if self.task_configuration.patch_paths:
463
+ patch_paths = [
464
+ x
465
+ for x in self.task_configuration.patch_paths
466
+ if ("suppress" in x.lower() or x.lower() == "deleted")
467
+ ]
468
+ if patch_paths:
469
+ logging.debug(
470
+ "Record %s is a MARC record, only suppression related fields will be patched",
471
+ existing_record["id"],
472
+ )
473
+ else:
462
474
  logging.debug(
463
475
  "Record %s is a MARC record, patch_paths will be ignored",
464
476
  existing_record["id"],
465
477
  )
466
- self.patch_record(new_record, existing_record, ["statisticalCodeIds", "administrativeNotes", "instanceStatusId"])
478
+ patch_paths.extend(["statisticalCodeIds", "administrativeNotes", "instanceStatusId"])
479
+ self.patch_record(new_record, existing_record, patch_paths)
467
480
  elif self.task_configuration.patch_existing_records:
468
481
  self.patch_record(new_record, existing_record, self.task_configuration.patch_paths)
469
482
  else:
@@ -471,7 +484,11 @@ class BatchPoster(MigrationTaskBase):
471
484
  "_version": existing_record["_version"],
472
485
  }
473
486
  self.keep_existing_fields(updates, existing_record)
474
- keep_new = {k: v for k, v in new_record.items() if k in ["statisticalCodeIds", "administrativeNotes"]}
487
+ keep_new = {
488
+ k: v
489
+ for k, v in new_record.items()
490
+ if k in ["statisticalCodeIds", "administrativeNotes"]
491
+ }
475
492
  keep_existing = {}
476
493
  self.handle_upsert_for_statistical_codes(existing_record, keep_existing)
477
494
  self.handle_upsert_for_administrative_notes(existing_record, keep_existing)
@@ -485,30 +502,66 @@ class BatchPoster(MigrationTaskBase):
485
502
  updates.update(keep_new)
486
503
  new_record.update(updates)
487
504
 
488
- async def get_with_retry(self, client: httpx.AsyncClient, url: str, params=None):
505
+ async def get_with_retry(self, url: str, params=None):
506
+ """
507
+ Wrapper around folio_get_async with selective retry logic.
508
+
509
+ Retries on:
510
+ - Connection errors (FolioConnectionError): Always retry
511
+ - Server errors (5xx): Transient failures
512
+ - Rate limiting (429): Too many requests
513
+
514
+ Does NOT retry on:
515
+ - Client errors (4xx except 429): Bad request, won't succeed on retry
516
+ """
489
517
  if params is None:
490
518
  params = {}
491
519
  retries = 3
520
+
492
521
  for attempt in range(retries):
493
522
  try:
494
- response = await client.get(
495
- url, params=params, headers=self.folio_client.okapi_headers)
496
- response.raise_for_status()
497
- return response
498
- except httpx.HTTPError as e:
523
+ return await self.folio_client.folio_get_async(url, query_params=params)
524
+
525
+ except folioclient.FolioConnectionError as e:
526
+ # Network/connection errors - always retry
499
527
  if attempt < retries - 1:
500
- logging.warning(f"Retrying due to {e}")
501
- await asyncio.sleep(2 ** attempt)
528
+ wait_time = 2**attempt
529
+ logging.warning(
530
+ f"Connection error, retrying in {wait_time}s "
531
+ f"(attempt {attempt + 1}/{retries}): {e}"
532
+ )
533
+ await asyncio.sleep(wait_time)
502
534
  else:
503
- logging.error(f"Failed to connect after {retries} attempts: {e}")
535
+ logging.error(f"Connection failed after {retries} attempts: {e}")
536
+ raise
537
+
538
+ except folioclient.FolioHTTPError as e:
539
+ # HTTP errors - selective retry based on status code
540
+ status_code = e.response.status_code
541
+ should_retry = status_code >= 500 or status_code == 429
542
+
543
+ if should_retry and attempt < retries - 1:
544
+ # Longer wait for rate limiting
545
+ wait_time = 5 if status_code == 429 else 2**attempt
546
+ logging.warning(
547
+ f"HTTP {status_code} error, retrying in {wait_time}s "
548
+ f"(attempt {attempt + 1}/{retries}): {e}"
549
+ )
550
+ await asyncio.sleep(wait_time)
551
+ else:
552
+ # Either not retryable or out of attempts
553
+ if should_retry:
554
+ logging.error(
555
+ f"HTTP {status_code} error persisted after {retries} attempts: {e}"
556
+ )
557
+ else:
558
+ logging.error(f"HTTP {status_code} error (not retryable): {e}")
504
559
  raise
505
560
 
506
561
  def post_record_batch(self, batch, failed_recs_file, row):
507
562
  json_rec = json.loads(row.split("\t")[-1])
508
563
  if self.task_configuration.object_type == "ShadowInstances":
509
564
  self.set_consortium_source(json_rec)
510
- if self.task_configuration.object_type == "SRS":
511
- json_rec["snapshotId"] = self.snapshot_id
512
565
  if self.processed == 1:
513
566
  logging.info(json.dumps(json_rec, indent=True))
514
567
  batch.append(json_rec)
@@ -519,22 +572,29 @@ class BatchPoster(MigrationTaskBase):
519
572
 
520
573
  def post_extra_data(self, row: str, num_records: int, failed_recs_file):
521
574
  (object_name, data) = row.split("\t")
522
- endpoint = self.get_extradata_endpoint(self.task_configuration, object_name, data)
523
- url = f"{self.folio_client.gateway_url}/{endpoint}"
575
+ url = self.get_extradata_endpoint(self.task_configuration, object_name, data)
524
576
  body = data
525
- response = self.post_objects(url, body)
526
- if response.status_code == 201:
577
+ try:
578
+ _ = self.folio_client.folio_post(url, payload=body)
527
579
  self.num_posted += 1
528
- elif response.status_code == 422:
529
- self.num_failures += 1
530
- error_msg = json.loads(response.text)["errors"][0]["message"]
531
- logging.error("Row %s\tHTTP %s\t %s", num_records, response.status_code, error_msg)
532
- if "id value already exists" not in json.loads(response.text)["errors"][0]["message"]:
580
+ except folioclient.FolioHTTPError as fhe:
581
+ if fhe.response.status_code == 422:
582
+ self.num_failures += 1
583
+ error_msg = json.loads(fhe.response.text)["errors"][0]["message"]
584
+ logging.error(
585
+ "Row %s\tHTTP %s\t %s", num_records, fhe.response.status_code, error_msg
586
+ )
587
+ if (
588
+ "id value already exists"
589
+ not in json.loads(fhe.response.text)["errors"][0]["message"]
590
+ ):
591
+ failed_recs_file.write(row)
592
+ else:
593
+ self.num_failures += 1
594
+ logging.error(
595
+ "Row %s\tHTTP %s\t%s", num_records, fhe.response.status_code, fhe.response.text
596
+ )
533
597
  failed_recs_file.write(row)
534
- else:
535
- self.num_failures += 1
536
- logging.error("Row %s\tHTTP %s\t%s", num_records, response.status_code, response.text)
537
- failed_recs_file.write(row)
538
598
  if num_records % 50 == 0:
539
599
  logging.info(
540
600
  "%s records posted successfully. %s failed",
@@ -563,52 +623,52 @@ class BatchPoster(MigrationTaskBase):
563
623
  object_types.update(task_configuration.extradata_endpoints)
564
624
  if object_name == "instructor":
565
625
  instructor = json.loads(string_object)
566
- return f'coursereserves/courselistings/{instructor["courseListingId"]}/instructors'
626
+ return f"coursereserves/courselistings/{instructor['courseListingId']}/instructors"
567
627
 
568
628
  if object_name == "interfaceCredential":
569
629
  credential = json.loads(string_object)
570
- return f'organizations-storage/interfaces/{credential["interfaceId"]}/credentials'
630
+ return f"organizations-storage/interfaces/{credential['interfaceId']}/credentials"
571
631
 
572
632
  return object_types[object_name]
573
633
 
574
634
  def post_single_records(self, row: str, num_records: int, failed_recs_file):
575
635
  if self.api_info["is_batch"]:
576
636
  raise TypeError("This record type supports batch processing, use post_batch method")
577
- api_endpoint = self.api_info.get("api_endpoint")
578
- url = f"{self.folio_client.gateway_url}{api_endpoint}"
579
- response = self.post_objects(url, row)
580
- if response.status_code == 201:
637
+ url = self.api_info.get("api_endpoint")
638
+ try:
639
+ _ = self.folio_client.folio_post(url, payload=row)
581
640
  self.num_posted += 1
582
- elif response.status_code == 422:
583
- self.num_failures += 1
584
- error_msg = json.loads(response.text)["errors"][0]["message"]
585
- logging.error("Row %s\tHTTP %s\t %s", num_records, response.status_code, error_msg)
586
- if "id value already exists" not in json.loads(response.text)["errors"][0]["message"]:
641
+ except folioclient.FolioHTTPError as fhe:
642
+ if fhe.response.status_code == 422:
643
+ self.num_failures += 1
644
+ error_msg = json.loads(fhe.response.text)["errors"][0]["message"]
645
+ logging.error(
646
+ "Row %s\tHTTP %s\t %s", num_records, fhe.response.status_code, error_msg
647
+ )
648
+ if (
649
+ "id value already exists"
650
+ not in json.loads(fhe.response.text)["errors"][0]["message"]
651
+ ):
652
+ failed_recs_file.write(row)
653
+ else:
654
+ self.num_failures += 1
655
+ logging.error(
656
+ "Row %s\tHTTP %s\t%s",
657
+ num_records,
658
+ fhe.response.status_code,
659
+ fhe.response.text,
660
+ )
587
661
  failed_recs_file.write(row)
588
- else:
589
- self.num_failures += 1
590
- logging.error("Row %s\tHTTP %s\t%s", num_records, response.status_code, response.text)
591
- failed_recs_file.write(row)
592
- if num_records % 50 == 0:
593
- logging.info(
594
- "%s records posted successfully. %s failed",
595
- self.num_posted,
596
- self.num_failures,
597
- )
598
-
599
- def post_objects(self, url, body):
600
- if self.http_client and not self.http_client.is_closed:
601
- return self.http_client.post(
602
- url, data=body.encode("utf-8"), headers=self.folio_client.okapi_headers
603
- )
604
- else:
605
- return httpx.post(
606
- url, headers=self.okapi_headers, data=body.encode("utf-8"), timeout=None
607
- )
662
+ if num_records % 50 == 0:
663
+ logging.info(
664
+ "%s records posted successfully. %s failed",
665
+ self.num_posted,
666
+ self.num_failures,
667
+ )
608
668
 
609
669
  def handle_generic_exception(self, exception, last_row, batch, num_records, failed_recs_file):
610
670
  logging.error("%s", exception)
611
- self.migration_report.add("Details", i18n.t("Generic exceptions (see log for details)"))
671
+ self.migration_report.add("Details", i18n_t("Generic exceptions (see log for details)"))
612
672
  # logging.error("Failed row: %s", last_row)
613
673
  self.failed_batches += 1
614
674
  self.num_failures += len(batch)
@@ -621,7 +681,7 @@ class BatchPoster(MigrationTaskBase):
621
681
  sys.exit(1)
622
682
 
623
683
  def handle_unicode_error(self, unicode_error, last_row):
624
- self.migration_report.add("Details", i18n.t("Encoding errors"))
684
+ self.migration_report.add("Details", i18n_t("Encoding errors"))
625
685
  logging.info("=========ERROR==============")
626
686
  logging.info(
627
687
  "%s Posting failed. Encoding error reading file",
@@ -636,9 +696,9 @@ class BatchPoster(MigrationTaskBase):
636
696
  traceback.logging.info_exc() # type: ignore
637
697
  logging.info("=======================")
638
698
 
639
- def post_batch(self, batch, failed_recs_file, num_records, recursion_depth=0):
699
+ def post_batch(self, batch, failed_recs_file, num_records):
640
700
  if self.query_params.get("upsert", False) and self.api_info.get("query_endpoint", ""):
641
- self.set_version(batch, self.api_info['query_endpoint'], self.api_info['object_name'])
701
+ self.set_version(batch, self.api_info["query_endpoint"], self.api_info["object_name"])
642
702
  response = self.do_post(batch)
643
703
  if response.status_code == 401:
644
704
  logging.error("Authorization failed (%s). Fetching new auth token...", response.text)
@@ -707,24 +767,6 @@ class BatchPoster(MigrationTaskBase):
707
767
  # Likely a json parsing error
708
768
  logging.error(response.text)
709
769
  raise TransformationProcessError("", "HTTP 400. Something is wrong. Quitting")
710
- elif self.task_configuration.object_type == "SRS" and response.status_code >= 500:
711
- logging.info(
712
- "Post failed. Size: %s Waiting 30s until reposting. Number of tries: %s of 5",
713
- get_req_size(response),
714
- recursion_depth,
715
- )
716
- logging.info(response.text)
717
- time.sleep(30)
718
- if recursion_depth > 4:
719
- raise TransformationRecordFailedError(
720
- "",
721
- f"HTTP {response.status_code}\t"
722
- f"Request size: {get_req_size(response)}"
723
- f"{datetime.now(timezone.utc).isoformat()}\n",
724
- response.text,
725
- )
726
- else:
727
- self.post_batch(batch, failed_recs_file, num_records, recursion_depth + 1)
728
770
  elif (
729
771
  response.status_code == 413 and "DB_ALLOW_SUPPRESS_OPTIMISTIC_LOCKING" in response.text
730
772
  ):
@@ -749,57 +791,42 @@ class BatchPoster(MigrationTaskBase):
749
791
  )
750
792
 
751
793
  def do_post(self, batch):
752
- path = self.api_info["api_endpoint"]
753
- url = self.folio_client.gateway_url + path
754
- if self.api_info["object_name"] == "users":
755
- payload = {self.api_info["object_name"]: list(batch), "totalRecords": len(batch)}
756
- elif self.api_info["total_records"]:
757
- payload = {"records": list(batch), "totalRecords": len(batch)}
758
- else:
759
- payload = {self.api_info["object_name"]: batch}
760
- if self.http_client and not self.http_client.is_closed:
761
- return self.http_client.post(
762
- url,
763
- json=payload,
764
- headers=self.folio_client.okapi_headers,
765
- params=self.query_params
766
- )
767
- else:
768
- return httpx.post(
794
+ with self.folio_client.get_folio_http_client() as http_client:
795
+ url = self.api_info["api_endpoint"]
796
+ if self.api_info["object_name"] == "users":
797
+ payload = {self.api_info["object_name"]: list(batch), "totalRecords": len(batch)}
798
+ elif self.api_info["total_records"]:
799
+ payload = {"records": list(batch), "totalRecords": len(batch)}
800
+ else:
801
+ payload = {self.api_info["object_name"]: batch}
802
+ return http_client.post(
769
803
  url,
770
- headers=self.okapi_headers,
771
804
  json=payload,
772
805
  params=self.query_params,
773
- timeout=None)
806
+ )
774
807
 
775
808
  def get_current_record_count_in_folio(self):
776
809
  if "query_endpoint" in self.api_info:
777
- url = f"{self.folio_client.gateway_url}{self.api_info['query_endpoint']}"
810
+ url = self.api_info["query_endpoint"]
778
811
  query_params = {"query": "cql.allRecords=1", "limit": 0}
779
- if self.http_client and not self.http_client.is_closed:
780
- res = self.http_client.get(
781
- url,
782
- headers=self.folio_client.okapi_headers,
783
- params=query_params
784
- )
785
- else:
786
- res = httpx.get(url, headers=self.okapi_headers, params=query_params, timeout=None)
787
812
  try:
788
- res.raise_for_status()
789
- return res.json()["totalRecords"]
790
- except httpx.HTTPStatusError:
791
- logging.error("Failed to get current record count. HTTP %s", res.status_code)
813
+ res = self.folio_client.folio_get(url, query_params=query_params)
814
+ return res["totalRecords"]
815
+ except folioclient.FolioHTTPError as fhe:
816
+ logging.error(
817
+ "Failed to get current record count. HTTP %s", fhe.response.status_code
818
+ )
792
819
  return 0
793
820
  except KeyError:
794
821
  logging.error(
795
822
  "Failed to get current record count. "
796
- f"No 'totalRecords' in response: {res.json()}"
823
+ f"No 'totalRecords' in response: {json.dumps(res, indent=2)}"
797
824
  )
798
825
  return 0
799
826
  else:
800
827
  raise ValueError(
801
828
  "No 'query_endpoint' available for %s. Cannot get current record count.",
802
- self.task_configuration.object_type
829
+ self.task_configuration.object_type,
803
830
  )
804
831
 
805
832
  def get_starting_record_count(self):
@@ -809,7 +836,7 @@ class BatchPoster(MigrationTaskBase):
809
836
  else:
810
837
  logging.info(
811
838
  "No query_endpoint available for %s. Cannot get starting record count.",
812
- self.task_configuration.object_type
839
+ self.task_configuration.object_type,
813
840
  )
814
841
 
815
842
  def get_finished_record_count(self):
@@ -819,14 +846,12 @@ class BatchPoster(MigrationTaskBase):
819
846
  else:
820
847
  logging.info(
821
848
  "No query_endpoint available for %s. Cannot get ending record count.",
822
- self.task_configuration.object_type
849
+ self.task_configuration.object_type,
823
850
  )
824
851
 
825
852
  def wrap_up(self):
826
853
  logging.info("Done. Wrapping up")
827
854
  self.extradata_writer.flush()
828
- if self.task_configuration.object_type == "SRS":
829
- self.commit_snapshot()
830
855
  if self.task_configuration.object_type != "Extradata":
831
856
  logging.info(
832
857
  (
@@ -842,7 +867,7 @@ class BatchPoster(MigrationTaskBase):
842
867
  if self.starting_record_count_in_folio:
843
868
  self.get_finished_record_count()
844
869
  total_on_server = (
845
- self.finished_record_count_in_folio - self.starting_record_count_in_folio
870
+ self.finished_record_count_in_folio - self.starting_record_count_in_folio
846
871
  )
847
872
  discrepancy = self.processed - self.num_failures - total_on_server
848
873
  if discrepancy != 0:
@@ -893,9 +918,8 @@ class BatchPoster(MigrationTaskBase):
893
918
  temp_start = self.start_datetime
894
919
  self.task_configuration.rerun_failed_records = False
895
920
  self.__init__(
896
- self.task_configuration,
897
- self.library_configuration,
898
- self.folio_client)
921
+ self.task_configuration, self.library_configuration, self.folio_client
922
+ )
899
923
  self.performing_rerun = True
900
924
  self.migration_report = temp_report
901
925
  self.start_datetime = temp_start
@@ -914,69 +938,6 @@ class BatchPoster(MigrationTaskBase):
914
938
  str(self.folder_structure.failed_recs_path),
915
939
  )
916
940
 
917
- def create_snapshot(self):
918
- snapshot = {
919
- "jobExecutionId": self.snapshot_id,
920
- "status": "PARSING_IN_PROGRESS",
921
- "processingStartedDate": datetime.now(timezone.utc).isoformat(timespec="milliseconds"),
922
- }
923
- try:
924
- url = f"{self.folio_client.gateway_url}/source-storage/snapshots"
925
- if self.http_client and not self.http_client.is_closed:
926
- res = self.http_client.post(
927
- url, json=snapshot, headers=self.folio_client.okapi_headers
928
- )
929
- else:
930
- res = httpx.post(url, headers=self.okapi_headers, json=snapshot, timeout=None)
931
- res.raise_for_status()
932
- logging.info("Posted Snapshot to FOLIO: %s", json.dumps(snapshot, indent=4))
933
- get_url = (
934
- f"{self.folio_client.gateway_url}/source-storage/snapshots/{self.snapshot_id}"
935
- )
936
- got = False
937
- while not got:
938
- logging.info("Sleeping while waiting for the snapshot to get created")
939
- time.sleep(5)
940
- if self.http_client and not self.http_client.is_closed:
941
- res = self.http_client.get(get_url, headers=self.folio_client.okapi_headers)
942
- else:
943
- res = httpx.get(get_url, headers=self.okapi_headers, timeout=None)
944
- if res.status_code == 200:
945
- got = True
946
- else:
947
- logging.info(res.status_code)
948
- except httpx.HTTPStatusError as exc:
949
- logging.exception("HTTP error occurred while posting the snapshot: %s", exc)
950
- sys.exit(1)
951
- except Exception as exc:
952
- logging.exception("Could not post the snapshot: %s", exc)
953
- sys.exit(1)
954
-
955
- def commit_snapshot(self):
956
- snapshot = {"jobExecutionId": self.snapshot_id, "status": "COMMITTED"}
957
- try:
958
- url = f"{self.folio_client.gateway_url}/source-storage/snapshots/{self.snapshot_id}"
959
- if self.http_client and not self.http_client.is_closed:
960
- res = self.http_client.put(
961
- url, json=snapshot, headers=self.folio_client.okapi_headers
962
- )
963
- else:
964
- res = httpx.put(url, headers=self.okapi_headers, json=snapshot, timeout=None)
965
- res.raise_for_status()
966
- logging.info("Posted Committed snapshot to FOLIO: %s", json.dumps(snapshot, indent=4))
967
- except httpx.HTTPStatusError as exc:
968
- logging.exception("HTTP error occurred while posting the snapshot: %s", exc)
969
- sys.exit(1)
970
- except Exception as exc:
971
- logging.exception(
972
- "Could not commit snapshot with id %s. Post this to /source-storage/snapshots/%s:",
973
- self.snapshot_id,
974
- self.snapshot_id,
975
- exc,
976
- )
977
- logging.info("%s", json.dumps(snapshot, indent=4))
978
- sys.exit(1)
979
-
980
941
 
981
942
  def get_api_info(object_type: str, use_safe: bool = True):
982
943
  choices = {
@@ -1038,22 +999,6 @@ def get_api_info(object_type: str, use_safe: bool = True):
1038
999
  "addSnapshotId": False,
1039
1000
  "supports_upsert": True,
1040
1001
  },
1041
- "Authorities": {
1042
- "object_name": "",
1043
- "api_endpoint": "/authority-storage/authorities",
1044
- "is_batch": False,
1045
- "total_records": False,
1046
- "addSnapshotId": False,
1047
- "supports_upsert": False,
1048
- },
1049
- "SRS": {
1050
- "object_name": "records",
1051
- "api_endpoint": "/source-storage/batch/records",
1052
- "is_batch": True,
1053
- "total_records": True,
1054
- "addSnapshotId": True,
1055
- "supports_upsert": False,
1056
- },
1057
1002
  "Users": {
1058
1003
  "object_name": "users",
1059
1004
  "api_endpoint": "/user-import",
@@ -1085,27 +1030,12 @@ def get_api_info(object_type: str, use_safe: bool = True):
1085
1030
  except KeyError:
1086
1031
  key_string = ", ".join(choices.keys())
1087
1032
  logging.error(
1088
- f"Wrong type. Only one of {key_string} are allowed, "
1089
- f"received {object_type=} instead"
1033
+ f"Wrong type. Only one of {key_string} are allowed, received {object_type=} instead"
1090
1034
  )
1091
1035
  logging.error("Halting")
1092
1036
  sys.exit(1)
1093
1037
 
1094
1038
 
1095
- def chunks(records, number_of_chunks):
1096
- """Yield successive n-sized chunks from lst.
1097
-
1098
- Args:
1099
- records (_type_): _description_
1100
- number_of_chunks (_type_): _description_
1101
-
1102
- Yields:
1103
- _type_: _description_
1104
- """
1105
- for i in range(0, len(records), number_of_chunks):
1106
- yield records[i: i + number_of_chunks]
1107
-
1108
-
1109
1039
  def get_human_readable(size, precision=2):
1110
1040
  suffixes = ["B", "KB", "MB", "GB", "TB"]
1111
1041
  suffix_index = 0
@@ -1115,22 +1045,23 @@ def get_human_readable(size, precision=2):
1115
1045
  return "%.*f%s" % (precision, size, suffixes[suffix_index])
1116
1046
 
1117
1047
 
1118
- def get_req_size(response: httpx.Response):
1048
+ def get_req_size(response: "Response"):
1119
1049
  size = response.request.method
1120
1050
  size += str(response.request.url)
1121
1051
  size += "\r\n".join(f"{k}{v}" for k, v in response.request.headers.items())
1122
1052
  size += response.request.content.decode("utf-8") or ""
1123
1053
  return get_human_readable(len(size.encode("utf-8")))
1124
1054
 
1055
+
1125
1056
  def parse_path(path):
1126
1057
  """
1127
1058
  Parses a path like 'foo.bar[0].baz' into ['foo', 'bar', 0, 'baz']
1128
1059
  """
1129
1060
  tokens = []
1130
1061
  # Split by dot, then extract indices
1131
- for part in path.split('.'):
1062
+ for part in path.split("."):
1132
1063
  # Find all [index] parts
1133
- matches = re.findall(r'([^\[\]]+)|\[(\d+)\]', part)
1064
+ matches = re.findall(r"([^\[\]]+)|\[(\d+)\]", part)
1134
1065
  for name, idx in matches:
1135
1066
  if name:
1136
1067
  tokens.append(name)
@@ -1138,12 +1069,14 @@ def parse_path(path):
1138
1069
  tokens.append(int(idx))
1139
1070
  return tokens
1140
1071
 
1072
+
1141
1073
  def get_by_path(data, path):
1142
1074
  keys = parse_path(path)
1143
1075
  for key in keys:
1144
1076
  data = data[key]
1145
1077
  return data
1146
1078
 
1079
+
1147
1080
  def set_by_path(data, path, value):
1148
1081
  keys = parse_path(path)
1149
1082
  for i, key in enumerate(keys[:-1]):
@@ -1164,6 +1097,7 @@ def set_by_path(data, path, value):
1164
1097
  else:
1165
1098
  data[last_key] = value
1166
1099
 
1100
+
1167
1101
  def extract_paths(data, paths):
1168
1102
  result = {}
1169
1103
  for path in paths:
@@ -1174,6 +1108,7 @@ def extract_paths(data, paths):
1174
1108
  continue
1175
1109
  return result
1176
1110
 
1111
+
1177
1112
  def deep_update(target, patch):
1178
1113
  """
1179
1114
  Recursively update target dict/list with values from patch dict/list.
@@ -1181,11 +1116,7 @@ def deep_update(target, patch):
1181
1116
  """
1182
1117
  if isinstance(patch, dict):
1183
1118
  for k, v in patch.items():
1184
- if (
1185
- k in target
1186
- and isinstance(target[k], (dict, list))
1187
- and isinstance(v, (dict, list))
1188
- ):
1119
+ if k in target and isinstance(target[k], (dict, list)) and isinstance(v, (dict, list)):
1189
1120
  deep_update(target[k], v)
1190
1121
  else:
1191
1122
  target[k] = v