folio-migration-tools 1.10.0b3__py3-none-any.whl → 1.10.0b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,7 @@ import logging
4
4
  import sys
5
5
  from os import environ
6
6
  from pathlib import Path
7
+ from warnings import warn
7
8
 
8
9
  import httpx
9
10
  import humps
@@ -126,6 +127,14 @@ def main():
126
127
  i18n.set("locale", args.report_language)
127
128
  config_file, library_config = prep_library_config(args)
128
129
  try:
130
+ if args.task_name == "AuthorityTransformer":
131
+ warn(
132
+ "The AuthorityTransformer has been removed."
133
+ " Please update your configuration accordingly."
134
+ " Use Data Import to load authority records.",
135
+ DeprecationWarning,
136
+ stacklevel=2,
137
+ )
129
138
  migration_task_config = next(
130
139
  t for t in config_file["migration_tasks"] if t["name"] == args.task_name
131
140
  )
@@ -110,9 +110,6 @@ class FolderStructure:
110
110
  self.instance_id_map_path = (
111
111
  self.results_folder / f"{str(FOLIONamespaces.instances.name).lower()}_id_map.json"
112
112
  )
113
- self.auth_id_map_path = (
114
- self.results_folder / f"{str(FOLIONamespaces.authorities.name).lower()}_id_map.json"
115
- )
116
113
 
117
114
  self.holdings_id_map_path = (
118
115
  self.results_folder / f"{str(FOLIONamespaces.holdings.name).lower()}_id_map.json"
@@ -47,8 +47,6 @@ class Conditions:
47
47
  if object_type == "bibs":
48
48
  self.setup_reference_data_for_all()
49
49
  self.setup_reference_data_for_bibs()
50
- elif object_type == "auth":
51
- self.setup_reference_data_for_auth()
52
50
  else:
53
51
  self.setup_reference_data_for_all()
54
52
  self.setup_reference_data_for_items_and_holdings(default_call_number_type_name)
@@ -150,15 +148,6 @@ class Conditions:
150
148
  if not self.folio.class_types:
151
149
  raise TransformationProcessError("", "No class_types in FOLIO")
152
150
 
153
- def setup_reference_data_for_auth(self):
154
- self.authority_note_types = list(
155
- self.folio.folio_get_all(
156
- "/authority-note-types", "authorityNoteTypes", self.folio.cql_all, 1000
157
- )
158
- )
159
- logging.info(f"{len(self.authority_note_types)} \tAuthority note types")
160
- logging.info(f"{len(self.folio.identifier_types)} \tidentifier types") # type: ignore
161
-
162
151
  def get_condition(
163
152
  self, name, legacy_id, value, parameter=None, marc_field: field.Field | None = None
164
153
  ):
@@ -430,24 +419,6 @@ class Conditions:
430
419
  parameter.get("name", ""),
431
420
  ) from ee
432
421
 
433
- def condition_set_authority_note_type_id(
434
- self, legacy_id, _, parameter, marc_field: field.Field
435
- ):
436
- try:
437
- t = self.get_ref_data_tuple_by_name(
438
- self.authority_note_types, "authority_note_types", parameter["name"]
439
- )
440
- self.mapper.migration_report.add("MappedNoteTypes", t[1])
441
- return t[0]
442
- except Exception as ee:
443
- logging.error(ee)
444
- raise TransformationProcessError(
445
- legacy_id,
446
- f"Authority note type mapping error.\tParameter: {parameter.get('name', '')}\t"
447
- f"MARC Field: {marc_field}. Is mapping rules and ref data aligned?",
448
- parameter.get("name", ""),
449
- ) from ee
450
-
451
422
  def condition_set_classification_type_id(
452
423
  self, legacy_id, value, parameter, marc_field: field.Field
453
424
  ):
@@ -962,7 +962,6 @@ class RulesMapperBase(MapperBase):
962
962
  srs_types = {
963
963
  FOLIONamespaces.holdings: FOLIONamespaces.srs_records_holdingsrecord,
964
964
  FOLIONamespaces.instances: FOLIONamespaces.srs_records_bib,
965
- FOLIONamespaces.authorities: FOLIONamespaces.srs_records_auth,
966
965
  FOLIONamespaces.edifact: FOLIONamespaces.srs_records_edifact,
967
966
  }
968
967
 
@@ -1020,7 +1019,6 @@ class RulesMapperBase(MapperBase):
1020
1019
  record_types = {
1021
1020
  FOLIONamespaces.holdings: "MARC_HOLDING",
1022
1021
  FOLIONamespaces.instances: "MARC_BIB",
1023
- FOLIONamespaces.authorities: "MARC_AUTHORITY",
1024
1022
  FOLIONamespaces.edifact: "EDIFACT",
1025
1023
  }
1026
1024
 
@@ -1033,10 +1031,6 @@ class RulesMapperBase(MapperBase):
1033
1031
  "holdingsId": folio_object["id"],
1034
1032
  "holdingsHrid": folio_object.get("hrid", ""),
1035
1033
  },
1036
- FOLIONamespaces.authorities: {
1037
- "authorityId": folio_object["id"],
1038
- "authorityHrid": marc_record["001"].data,
1039
- },
1040
1034
  FOLIONamespaces.edifact: {},
1041
1035
  }
1042
1036
 
@@ -7,11 +7,14 @@ import sys
7
7
  import time
8
8
  import traceback
9
9
  from datetime import datetime, timezone
10
- from typing import Annotated, List, Optional
10
+ from typing import TYPE_CHECKING, Annotated, List, Optional
11
11
  from uuid import uuid4
12
12
 
13
- import httpx
13
+ import folioclient
14
14
  import i18n
15
+
16
+ if TYPE_CHECKING:
17
+ from httpx import Response
15
18
  from folio_uuid.folio_namespaces import FOLIONamespaces
16
19
  from pydantic import Field
17
20
 
@@ -73,7 +76,7 @@ class BatchPoster(MigrationTaskBase):
73
76
  description=(
74
77
  "The type of object being migrated"
75
78
  "Examples of possible values: "
76
- "'Extradata', 'SRS', Instances', 'Holdings', 'Items'"
79
+ "'Extradata', 'Instances', 'Holdings', 'Items'"
77
80
  ),
78
81
  ),
79
82
  ]
@@ -245,90 +248,80 @@ class BatchPoster(MigrationTaskBase):
245
248
  self.failed_fields: set = set()
246
249
  self.num_failures = 0
247
250
  self.num_posted = 0
248
- self.okapi_headers = self.folio_client.okapi_headers
249
- self.http_client = None
250
251
  self.starting_record_count_in_folio: Optional[int] = None
251
252
  self.finished_record_count_in_folio: Optional[int] = None
252
253
 
253
254
  def do_work(self): # noqa: C901
254
- with self.folio_client.get_folio_http_client() as httpx_client:
255
- self.http_client = httpx_client
256
- with open(
257
- self.folder_structure.failed_recs_path, "w", encoding="utf-8"
258
- ) as failed_recs_file:
259
- self.get_starting_record_count()
260
- try:
261
- batch = []
262
- if self.task_configuration.object_type == "SRS":
263
- self.create_snapshot()
264
- for idx, file_def in enumerate(self.task_configuration.files): # noqa: B007
265
- path = self.folder_structure.results_folder / file_def.file_name
266
- with open(path) as rows:
267
- logging.info("Running %s", path)
268
- last_row = ""
269
- for self.processed, row in enumerate(rows, start=1):
270
- last_row = row
271
- if row.strip():
272
- try:
273
- if self.task_configuration.object_type == "Extradata":
274
- self.post_extra_data(
275
- row, self.processed, failed_recs_file
276
- )
277
- elif not self.api_info["is_batch"]:
278
- self.post_single_records(
279
- row, self.processed, failed_recs_file
280
- )
281
- else:
282
- batch = self.post_record_batch(
283
- batch, failed_recs_file, row
284
- )
285
- except UnicodeDecodeError as unicode_error:
286
- self.handle_unicode_error(unicode_error, last_row)
287
- except TransformationProcessError as tpe:
288
- self.handle_generic_exception(
289
- tpe,
290
- last_row,
291
- batch,
292
- self.processed,
293
- failed_recs_file,
255
+ with open(
256
+ self.folder_structure.failed_recs_path, "w", encoding="utf-8"
257
+ ) as failed_recs_file:
258
+ self.get_starting_record_count()
259
+ try:
260
+ batch = []
261
+ for idx, file_def in enumerate(self.task_configuration.files): # noqa: B007
262
+ path = self.folder_structure.results_folder / file_def.file_name
263
+ with open(path) as rows:
264
+ logging.info("Running %s", path)
265
+ last_row = ""
266
+ for self.processed, row in enumerate(rows, start=1):
267
+ last_row = row
268
+ if row.strip():
269
+ try:
270
+ if self.task_configuration.object_type == "Extradata":
271
+ self.post_extra_data(row, self.processed, failed_recs_file)
272
+ elif not self.api_info["is_batch"]:
273
+ self.post_single_records(
274
+ row, self.processed, failed_recs_file
294
275
  )
295
- batch = []
296
- raise
297
- except TransformationRecordFailedError as exception:
298
- self.handle_generic_exception(
299
- exception,
300
- last_row,
301
- batch,
302
- self.processed,
303
- failed_recs_file,
276
+ else:
277
+ batch = self.post_record_batch(
278
+ batch, failed_recs_file, row
304
279
  )
305
- batch = []
306
- except (FileNotFoundError, PermissionError) as ose:
307
- logging.error("Error reading file: %s", ose)
308
-
309
- except Exception as ee:
310
- if "idx" in locals() and self.task_configuration.files[idx:]:
311
- for file_def in self.task_configuration.files[idx:]:
312
- path = self.folder_structure.results_folder / file_def.file_name
313
- try:
314
- with open(path, "r") as failed_file:
315
- failed_file.seek(self.processed)
316
- failed_recs_file.write(failed_file.read())
317
- self.processed = 0
318
- except (FileNotFoundError, PermissionError) as ose:
319
- logging.error("Error reading file: %s", ose)
320
- raise ee
321
- finally:
322
- if self.task_configuration.object_type != "Extradata" and any(batch):
280
+ except UnicodeDecodeError as unicode_error:
281
+ self.handle_unicode_error(unicode_error, last_row)
282
+ except TransformationProcessError as tpe:
283
+ self.handle_generic_exception(
284
+ tpe,
285
+ last_row,
286
+ batch,
287
+ self.processed,
288
+ failed_recs_file,
289
+ )
290
+ batch = []
291
+ raise
292
+ except TransformationRecordFailedError as exception:
293
+ self.handle_generic_exception(
294
+ exception,
295
+ last_row,
296
+ batch,
297
+ self.processed,
298
+ failed_recs_file,
299
+ )
300
+ batch = []
301
+ except (FileNotFoundError, PermissionError) as ose:
302
+ logging.error("Error reading file: %s", ose)
303
+
304
+ except Exception as ee:
305
+ if "idx" in locals() and self.task_configuration.files[idx:]:
306
+ for file_def in self.task_configuration.files[idx:]:
307
+ path = self.folder_structure.results_folder / file_def.file_name
323
308
  try:
324
- self.post_batch(batch, failed_recs_file, self.processed)
325
- except Exception as exception:
326
- self.handle_generic_exception(
327
- exception, last_row, batch, self.processed, failed_recs_file
328
- )
329
- logging.info("Done posting %s records. ", self.processed)
330
- if self.task_configuration.object_type == "SRS":
331
- self.commit_snapshot()
309
+ with open(path, "r") as failed_file:
310
+ failed_file.seek(self.processed)
311
+ failed_recs_file.write(failed_file.read())
312
+ self.processed = 0
313
+ except (FileNotFoundError, PermissionError) as ose:
314
+ logging.error("Error reading file: %s", ose)
315
+ raise ee
316
+ finally:
317
+ if self.task_configuration.object_type != "Extradata" and any(batch):
318
+ try:
319
+ self.post_batch(batch, failed_recs_file, self.processed)
320
+ except Exception as exception:
321
+ self.handle_generic_exception(
322
+ exception, last_row, batch, self.processed, failed_recs_file
323
+ )
324
+ logging.info("Done posting %s records. ", self.processed)
332
325
 
333
326
  @staticmethod
334
327
  def set_consortium_source(json_rec):
@@ -366,26 +359,26 @@ class BatchPoster(MigrationTaskBase):
366
359
  fetch_batch_size = 90
367
360
  fetch_tasks = []
368
361
  existing_records = {}
369
- async with httpx.AsyncClient(base_url=self.folio_client.gateway_url) as client:
370
- for i in range(0, len(batch), fetch_batch_size):
371
- batch_slice = batch[i : i + fetch_batch_size]
372
- fetch_tasks.append(
373
- self.get_with_retry(
374
- client,
375
- query_api,
376
- params={
377
- "query": (
378
- f"id==({' OR '.join([r['id'] for r in batch_slice if 'id' in r])})"
379
- ),
380
- "limit": fetch_batch_size,
381
- },
382
- )
362
+
363
+ for i in range(0, len(batch), fetch_batch_size):
364
+ batch_slice = batch[i : i + fetch_batch_size]
365
+ fetch_tasks.append(
366
+ self.get_with_retry(
367
+ query_api,
368
+ params={
369
+ "query": (
370
+ f"id==({' OR '.join([r['id'] for r in batch_slice if 'id' in r])})"
371
+ ),
372
+ "limit": fetch_batch_size,
373
+ },
383
374
  )
375
+ )
376
+
377
+ responses = await asyncio.gather(*fetch_tasks)
384
378
 
385
- responses = await asyncio.gather(*fetch_tasks)
379
+ for response in responses:
380
+ self.collect_existing_records_for_upsert(object_type, response, existing_records)
386
381
 
387
- for response in responses:
388
- self.collect_existing_records_for_upsert(object_type, response, existing_records)
389
382
  for record in batch:
390
383
  if record["id"] in existing_records:
391
384
  self.prepare_record_for_upsert(record, existing_records[record["id"]])
@@ -421,18 +414,18 @@ class BatchPoster(MigrationTaskBase):
421
414
 
422
415
  @staticmethod
423
416
  def collect_existing_records_for_upsert(
424
- object_type: str, response: httpx.Response, existing_records: dict
417
+ object_type: str, response_json: dict, existing_records: dict
425
418
  ):
426
- if response.status_code == 200:
427
- response_json = response.json()
428
- for record in response_json[object_type]:
429
- existing_records[record["id"]] = record
430
- else:
431
- logging.error(
432
- "Failed to fetch current records. HTTP %s\t%s",
433
- response.status_code,
434
- response.text,
435
- )
419
+ """
420
+ Collects existing records from API response into existing_records dict.
421
+
422
+ Args:
423
+ object_type: The key in response containing the records array
424
+ response_json: Parsed JSON response from API
425
+ existing_records: Dict to populate with {record_id: record_data}
426
+ """
427
+ for record in response_json.get(object_type, []):
428
+ existing_records[record["id"]] = record
436
429
 
437
430
  def handle_upsert_for_statistical_codes(self, updates: dict, keep_existing: dict):
438
431
  if not self.task_configuration.preserve_statistical_codes:
@@ -510,31 +503,66 @@ class BatchPoster(MigrationTaskBase):
510
503
  updates.update(keep_new)
511
504
  new_record.update(updates)
512
505
 
513
- async def get_with_retry(self, client: httpx.AsyncClient, url: str, params=None):
506
+ async def get_with_retry(self, url: str, params=None):
507
+ """
508
+ Wrapper around folio_get_async with selective retry logic.
509
+
510
+ Retries on:
511
+ - Connection errors (FolioConnectionError): Always retry
512
+ - Server errors (5xx): Transient failures
513
+ - Rate limiting (429): Too many requests
514
+
515
+ Does NOT retry on:
516
+ - Client errors (4xx except 429): Bad request, won't succeed on retry
517
+ """
514
518
  if params is None:
515
519
  params = {}
516
520
  retries = 3
521
+
517
522
  for attempt in range(retries):
518
523
  try:
519
- response = await client.get(
520
- url, params=params, headers=self.folio_client.okapi_headers
521
- )
522
- response.raise_for_status()
523
- return response
524
- except httpx.HTTPError as e:
524
+ return await self.folio_client.folio_get_async(url, query_params=params)
525
+
526
+ except folioclient.FolioConnectionError as e:
527
+ # Network/connection errors - always retry
525
528
  if attempt < retries - 1:
526
- logging.warning(f"Retrying due to {e}")
527
- await asyncio.sleep(2**attempt)
529
+ wait_time = 2**attempt
530
+ logging.warning(
531
+ f"Connection error, retrying in {wait_time}s "
532
+ f"(attempt {attempt + 1}/{retries}): {e}"
533
+ )
534
+ await asyncio.sleep(wait_time)
528
535
  else:
529
- logging.error(f"Failed to connect after {retries} attempts: {e}")
536
+ logging.error(f"Connection failed after {retries} attempts: {e}")
537
+ raise
538
+
539
+ except folioclient.FolioHTTPError as e:
540
+ # HTTP errors - selective retry based on status code
541
+ status_code = e.response.status_code
542
+ should_retry = status_code >= 500 or status_code == 429
543
+
544
+ if should_retry and attempt < retries - 1:
545
+ # Longer wait for rate limiting
546
+ wait_time = 5 if status_code == 429 else 2**attempt
547
+ logging.warning(
548
+ f"HTTP {status_code} error, retrying in {wait_time}s "
549
+ f"(attempt {attempt + 1}/{retries}): {e}"
550
+ )
551
+ await asyncio.sleep(wait_time)
552
+ else:
553
+ # Either not retryable or out of attempts
554
+ if should_retry:
555
+ logging.error(
556
+ f"HTTP {status_code} error persisted after {retries} attempts: {e}"
557
+ )
558
+ else:
559
+ logging.error(f"HTTP {status_code} error (not retryable): {e}")
530
560
  raise
531
561
 
532
562
  def post_record_batch(self, batch, failed_recs_file, row):
533
563
  json_rec = json.loads(row.split("\t")[-1])
534
564
  if self.task_configuration.object_type == "ShadowInstances":
535
565
  self.set_consortium_source(json_rec)
536
- if self.task_configuration.object_type == "SRS":
537
- json_rec["snapshotId"] = self.snapshot_id
538
566
  if self.processed == 1:
539
567
  logging.info(json.dumps(json_rec, indent=True))
540
568
  batch.append(json_rec)
@@ -545,22 +573,29 @@ class BatchPoster(MigrationTaskBase):
545
573
 
546
574
  def post_extra_data(self, row: str, num_records: int, failed_recs_file):
547
575
  (object_name, data) = row.split("\t")
548
- endpoint = self.get_extradata_endpoint(self.task_configuration, object_name, data)
549
- url = f"{self.folio_client.gateway_url}/{endpoint}"
576
+ url = self.get_extradata_endpoint(self.task_configuration, object_name, data)
550
577
  body = data
551
- response = self.post_objects(url, body)
552
- if response.status_code == 201:
578
+ try:
579
+ _ = self.folio_client.folio_post(url, payload=body)
553
580
  self.num_posted += 1
554
- elif response.status_code == 422:
555
- self.num_failures += 1
556
- error_msg = json.loads(response.text)["errors"][0]["message"]
557
- logging.error("Row %s\tHTTP %s\t %s", num_records, response.status_code, error_msg)
558
- if "id value already exists" not in json.loads(response.text)["errors"][0]["message"]:
581
+ except folioclient.FolioHTTPError as fhe:
582
+ if fhe.response.status_code == 422:
583
+ self.num_failures += 1
584
+ error_msg = json.loads(fhe.response.text)["errors"][0]["message"]
585
+ logging.error(
586
+ "Row %s\tHTTP %s\t %s", num_records, fhe.response.status_code, error_msg
587
+ )
588
+ if (
589
+ "id value already exists"
590
+ not in json.loads(fhe.response.text)["errors"][0]["message"]
591
+ ):
592
+ failed_recs_file.write(row)
593
+ else:
594
+ self.num_failures += 1
595
+ logging.error(
596
+ "Row %s\tHTTP %s\t%s", num_records, fhe.response.status_code, fhe.response.text
597
+ )
559
598
  failed_recs_file.write(row)
560
- else:
561
- self.num_failures += 1
562
- logging.error("Row %s\tHTTP %s\t%s", num_records, response.status_code, response.text)
563
- failed_recs_file.write(row)
564
599
  if num_records % 50 == 0:
565
600
  logging.info(
566
601
  "%s records posted successfully. %s failed",
@@ -600,37 +635,37 @@ class BatchPoster(MigrationTaskBase):
600
635
  def post_single_records(self, row: str, num_records: int, failed_recs_file):
601
636
  if self.api_info["is_batch"]:
602
637
  raise TypeError("This record type supports batch processing, use post_batch method")
603
- api_endpoint = self.api_info.get("api_endpoint")
604
- url = f"{self.folio_client.gateway_url}{api_endpoint}"
605
- response = self.post_objects(url, row)
606
- if response.status_code == 201:
638
+ url = self.api_info.get("api_endpoint")
639
+ try:
640
+ _ = self.folio_client.folio_post(url, payload=row)
607
641
  self.num_posted += 1
608
- elif response.status_code == 422:
609
- self.num_failures += 1
610
- error_msg = json.loads(response.text)["errors"][0]["message"]
611
- logging.error("Row %s\tHTTP %s\t %s", num_records, response.status_code, error_msg)
612
- if "id value already exists" not in json.loads(response.text)["errors"][0]["message"]:
642
+ except folioclient.FolioHTTPError as fhe:
643
+ if fhe.response.status_code == 422:
644
+ self.num_failures += 1
645
+ error_msg = json.loads(fhe.response.text)["errors"][0]["message"]
646
+ logging.error(
647
+ "Row %s\tHTTP %s\t %s", num_records, fhe.response.status_code, error_msg
648
+ )
649
+ if (
650
+ "id value already exists"
651
+ not in json.loads(fhe.response.text)["errors"][0]["message"]
652
+ ):
653
+ failed_recs_file.write(row)
654
+ else:
655
+ self.num_failures += 1
656
+ logging.error(
657
+ "Row %s\tHTTP %s\t%s",
658
+ num_records,
659
+ fhe.response.status_code,
660
+ fhe.response.text,
661
+ )
613
662
  failed_recs_file.write(row)
614
- else:
615
- self.num_failures += 1
616
- logging.error("Row %s\tHTTP %s\t%s", num_records, response.status_code, response.text)
617
- failed_recs_file.write(row)
618
- if num_records % 50 == 0:
619
- logging.info(
620
- "%s records posted successfully. %s failed",
621
- self.num_posted,
622
- self.num_failures,
623
- )
624
-
625
- def post_objects(self, url, body):
626
- if self.http_client and not self.http_client.is_closed:
627
- return self.http_client.post(
628
- url, data=body.encode("utf-8"), headers=self.folio_client.okapi_headers
629
- )
630
- else:
631
- return httpx.post(
632
- url, headers=self.okapi_headers, data=body.encode("utf-8"), timeout=None
633
- )
663
+ if num_records % 50 == 0:
664
+ logging.info(
665
+ "%s records posted successfully. %s failed",
666
+ self.num_posted,
667
+ self.num_failures,
668
+ )
634
669
 
635
670
  def handle_generic_exception(self, exception, last_row, batch, num_records, failed_recs_file):
636
671
  logging.error("%s", exception)
@@ -662,7 +697,7 @@ class BatchPoster(MigrationTaskBase):
662
697
  traceback.logging.info_exc() # type: ignore
663
698
  logging.info("=======================")
664
699
 
665
- def post_batch(self, batch, failed_recs_file, num_records, recursion_depth=0):
700
+ def post_batch(self, batch, failed_recs_file, num_records):
666
701
  if self.query_params.get("upsert", False) and self.api_info.get("query_endpoint", ""):
667
702
  self.set_version(batch, self.api_info["query_endpoint"], self.api_info["object_name"])
668
703
  response = self.do_post(batch)
@@ -733,24 +768,6 @@ class BatchPoster(MigrationTaskBase):
733
768
  # Likely a json parsing error
734
769
  logging.error(response.text)
735
770
  raise TransformationProcessError("", "HTTP 400. Something is wrong. Quitting")
736
- elif self.task_configuration.object_type == "SRS" and response.status_code >= 500:
737
- logging.info(
738
- "Post failed. Size: %s Waiting 30s until reposting. Number of tries: %s of 5",
739
- get_req_size(response),
740
- recursion_depth,
741
- )
742
- logging.info(response.text)
743
- time.sleep(30)
744
- if recursion_depth > 4:
745
- raise TransformationRecordFailedError(
746
- "",
747
- f"HTTP {response.status_code}\t"
748
- f"Request size: {get_req_size(response)}"
749
- f"{datetime.now(timezone.utc).isoformat()}\n",
750
- response.text,
751
- )
752
- else:
753
- self.post_batch(batch, failed_recs_file, num_records, recursion_depth + 1)
754
771
  elif (
755
772
  response.status_code == 413 and "DB_ALLOW_SUPPRESS_OPTIMISTIC_LOCKING" in response.text
756
773
  ):
@@ -775,50 +792,35 @@ class BatchPoster(MigrationTaskBase):
775
792
  )
776
793
 
777
794
  def do_post(self, batch):
778
- path = self.api_info["api_endpoint"]
779
- url = self.folio_client.gateway_url + path
795
+ url = self.api_info["api_endpoint"]
780
796
  if self.api_info["object_name"] == "users":
781
797
  payload = {self.api_info["object_name"]: list(batch), "totalRecords": len(batch)}
782
798
  elif self.api_info["total_records"]:
783
799
  payload = {"records": list(batch), "totalRecords": len(batch)}
784
800
  else:
785
801
  payload = {self.api_info["object_name"]: batch}
786
- if self.http_client and not self.http_client.is_closed:
787
- return self.http_client.post(
788
- url,
789
- json=payload,
790
- headers=self.folio_client.okapi_headers,
791
- params=self.query_params,
792
- )
793
- else:
794
- return httpx.post(
795
- url,
796
- headers=self.okapi_headers,
797
- json=payload,
798
- params=self.query_params,
799
- timeout=None,
800
- )
802
+ return self.folio_client.folio_post(
803
+ url,
804
+ payload,
805
+ query_params=self.query_params,
806
+ )
801
807
 
802
808
  def get_current_record_count_in_folio(self):
803
809
  if "query_endpoint" in self.api_info:
804
- url = f"{self.folio_client.gateway_url}{self.api_info['query_endpoint']}"
810
+ url = self.api_info["query_endpoint"]
805
811
  query_params = {"query": "cql.allRecords=1", "limit": 0}
806
- if self.http_client and not self.http_client.is_closed:
807
- res = self.http_client.get(
808
- url, headers=self.folio_client.okapi_headers, params=query_params
809
- )
810
- else:
811
- res = httpx.get(url, headers=self.okapi_headers, params=query_params, timeout=None)
812
812
  try:
813
- res.raise_for_status()
814
- return res.json()["totalRecords"]
815
- except httpx.HTTPStatusError:
816
- logging.error("Failed to get current record count. HTTP %s", res.status_code)
813
+ res = self.folio_client.folio_get(url, query_params=query_params)
814
+ return res["totalRecords"]
815
+ except folioclient.FolioHTTPError as fhe:
816
+ logging.error(
817
+ "Failed to get current record count. HTTP %s", fhe.response.status_code
818
+ )
817
819
  return 0
818
820
  except KeyError:
819
821
  logging.error(
820
822
  "Failed to get current record count. "
821
- f"No 'totalRecords' in response: {res.json()}"
823
+ f"No 'totalRecords' in response: {json.dumps(res, indent=2)}"
822
824
  )
823
825
  return 0
824
826
  else:
@@ -850,8 +852,6 @@ class BatchPoster(MigrationTaskBase):
850
852
  def wrap_up(self):
851
853
  logging.info("Done. Wrapping up")
852
854
  self.extradata_writer.flush()
853
- if self.task_configuration.object_type == "SRS":
854
- self.commit_snapshot()
855
855
  if self.task_configuration.object_type != "Extradata":
856
856
  logging.info(
857
857
  (
@@ -938,69 +938,6 @@ class BatchPoster(MigrationTaskBase):
938
938
  str(self.folder_structure.failed_recs_path),
939
939
  )
940
940
 
941
- def create_snapshot(self):
942
- snapshot = {
943
- "jobExecutionId": self.snapshot_id,
944
- "status": "PARSING_IN_PROGRESS",
945
- "processingStartedDate": datetime.now(timezone.utc).isoformat(timespec="milliseconds"),
946
- }
947
- try:
948
- url = f"{self.folio_client.gateway_url}/source-storage/snapshots"
949
- if self.http_client and not self.http_client.is_closed:
950
- res = self.http_client.post(
951
- url, json=snapshot, headers=self.folio_client.okapi_headers
952
- )
953
- else:
954
- res = httpx.post(url, headers=self.okapi_headers, json=snapshot, timeout=None)
955
- res.raise_for_status()
956
- logging.info("Posted Snapshot to FOLIO: %s", json.dumps(snapshot, indent=4))
957
- get_url = (
958
- f"{self.folio_client.gateway_url}/source-storage/snapshots/{self.snapshot_id}"
959
- )
960
- got = False
961
- while not got:
962
- logging.info("Sleeping while waiting for the snapshot to get created")
963
- time.sleep(5)
964
- if self.http_client and not self.http_client.is_closed:
965
- res = self.http_client.get(get_url, headers=self.folio_client.okapi_headers)
966
- else:
967
- res = httpx.get(get_url, headers=self.okapi_headers, timeout=None)
968
- if res.status_code == 200:
969
- got = True
970
- else:
971
- logging.info(res.status_code)
972
- except httpx.HTTPStatusError as exc:
973
- logging.exception("HTTP error occurred while posting the snapshot: %s", exc)
974
- sys.exit(1)
975
- except Exception as exc:
976
- logging.exception("Could not post the snapshot: %s", exc)
977
- sys.exit(1)
978
-
979
- def commit_snapshot(self):
980
- snapshot = {"jobExecutionId": self.snapshot_id, "status": "COMMITTED"}
981
- try:
982
- url = f"{self.folio_client.gateway_url}/source-storage/snapshots/{self.snapshot_id}"
983
- if self.http_client and not self.http_client.is_closed:
984
- res = self.http_client.put(
985
- url, json=snapshot, headers=self.folio_client.okapi_headers
986
- )
987
- else:
988
- res = httpx.put(url, headers=self.okapi_headers, json=snapshot, timeout=None)
989
- res.raise_for_status()
990
- logging.info("Posted Committed snapshot to FOLIO: %s", json.dumps(snapshot, indent=4))
991
- except httpx.HTTPStatusError as exc:
992
- logging.exception("HTTP error occurred while posting the snapshot: %s", exc)
993
- sys.exit(1)
994
- except Exception as exc:
995
- logging.exception(
996
- "Could not commit snapshot with id %s. Post this to /source-storage/snapshots/%s:",
997
- self.snapshot_id,
998
- self.snapshot_id,
999
- exc,
1000
- )
1001
- logging.info("%s", json.dumps(snapshot, indent=4))
1002
- sys.exit(1)
1003
-
1004
941
 
1005
942
  def get_api_info(object_type: str, use_safe: bool = True):
1006
943
  choices = {
@@ -1062,22 +999,6 @@ def get_api_info(object_type: str, use_safe: bool = True):
1062
999
  "addSnapshotId": False,
1063
1000
  "supports_upsert": True,
1064
1001
  },
1065
- "Authorities": {
1066
- "object_name": "",
1067
- "api_endpoint": "/authority-storage/authorities",
1068
- "is_batch": False,
1069
- "total_records": False,
1070
- "addSnapshotId": False,
1071
- "supports_upsert": False,
1072
- },
1073
- "SRS": {
1074
- "object_name": "records",
1075
- "api_endpoint": "/source-storage/batch/records",
1076
- "is_batch": True,
1077
- "total_records": True,
1078
- "addSnapshotId": True,
1079
- "supports_upsert": False,
1080
- },
1081
1002
  "Users": {
1082
1003
  "object_name": "users",
1083
1004
  "api_endpoint": "/user-import",
@@ -1124,7 +1045,7 @@ def get_human_readable(size, precision=2):
1124
1045
  return "%.*f%s" % (precision, size, suffixes[suffix_index])
1125
1046
 
1126
1047
 
1127
- def get_req_size(response: httpx.Response):
1048
+ def get_req_size(response: "Response"):
1128
1049
  size = response.request.method
1129
1050
  size += str(response.request.url)
1130
1051
  size += "\r\n".join(f"{k}{v}" for k, v in response.request.headers.items())
@@ -24,7 +24,6 @@
24
24
  "Aged to lost and checked out": "Aged to lost and checked out",
25
25
  "Already set to %{value}. %{leader_key} was %{leader}": "Already set to %{value}. %{leader_key} was %{leader}",
26
26
  "An Unmapped": "An Unmapped",
27
- "Authority records transformation report": "Authority records transformation report",
28
27
  "BW Items found tied to previously created BW Holdings": "BW Items found tied to previously created BW Holdings",
29
28
  "Bib identifier not in instances_id_map, no instance linked": "Bib identifier not in instances_id_map, no instance linked",
30
29
  "Bib ids referenced in bound-with items": "Bib ids referenced in bound-with items",
@@ -264,12 +263,6 @@
264
263
  "blurbs.AcquisitionMethodMapping.title": "POL Acquisition Method Mapping",
265
264
  "blurbs.AddedValueFromParameter.description": "",
266
265
  "blurbs.AddedValueFromParameter.title": "Added value from parameter since value is empty",
267
- "blurbs.AuthorityEncodingLevel.description": "Library action: **All values that are not n or o will be set to n. If this is not what you want, you need to correct these values in your system. **<br/>An overview of the Encoding levels (Leader position 17) present in your source data. Allowed values according to the MARC standard are n or o",
268
- "blurbs.AuthorityEncodingLevel.title": "Encoding level (leader pos 17)",
269
- "blurbs.AuthoritySourceFileMapping.description": "Mappings based on FOLIO authority `naturalId` alpha prefix",
270
- "blurbs.AuthoritySourceFileMapping.title": "Authority Source File Mapping Results",
271
- "blurbs.AuthoritySources.description": "",
272
- "blurbs.AuthoritySources.title": "Authorization sources and related information",
273
266
  "blurbs.BoundWithMappings.description": "",
274
267
  "blurbs.BoundWithMappings.title": "Bound-with mapping",
275
268
  "blurbs.CallNumberTypeMapping.description": "Call number types in MFHDs are mapped from 852, Indicator 1 according to a certain scheme. (LOC documentation)[https://www.loc.gov/marc/holdings/hd852.html]",
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: folio-migration-tools
3
- Version: 1.10.0b3
3
+ Version: 1.10.0b4
4
4
  Summary: A tool allowing you to migrate data from legacy ILS:s (Library systems) into FOLIO LSP
5
5
  Keywords: FOLIO,ILS,LSP,Library Systems,MARC21,Library data
6
6
  Author: Theodor Tolstoy, Lisa Sjögren, Brooks Travis, Jeremy Nelson, Clinton Bradford
7
7
  Author-email: Theodor Tolstoy <github.teddes@tolstoy.se>, Brooks Travis <brooks.travis@gmail.com>
8
8
  License-Expression: MIT
9
- Requires-Dist: folioclient>=1.0.1
9
+ Requires-Dist: folioclient>=1.0.4
10
10
  Requires-Dist: pyhumps>=3.7.3,<4.0.0
11
11
  Requires-Dist: defusedxml>=0.7.1,<1.0.0
12
12
  Requires-Dist: python-dateutil>=2.8.2,<3.0.0
@@ -1,12 +1,12 @@
1
1
  folio_migration_tools/__init__.py,sha256=lnYgqA47l0iA-iORkVH3dgevk7gyGxVwg3MnLltA-U8,223
2
- folio_migration_tools/__main__.py,sha256=MlF8Fj_E4EcW6rU0posfj3ZyPzSWzSm6zXz2jlN7OGw,8835
2
+ folio_migration_tools/__main__.py,sha256=KJdmLkKwAygTKuIKfvDL3M0JdVgsCbf2_LTL1FP6GxU,9233
3
3
  folio_migration_tools/circulation_helper.py,sha256=r1zpOKy47VFRHyXHvwUEjPfQ4jyJpjMAYc1IktJ94WU,14661
4
4
  folio_migration_tools/colors.py,sha256=GP0wdI_GZ2WD5SjrbPN-S3u8vvN_u6rGQIBBcWv_0ZM,227
5
5
  folio_migration_tools/config_file_load.py,sha256=zHHa6NDkN6EJiQE4DgjrFQPVKsd70POsfbGkB8308jg,2822
6
6
  folio_migration_tools/custom_dict.py,sha256=rRd9_RQqI85171p7wTfpMM0Mladh-LChbgMSmLvN7N0,680
7
7
  folio_migration_tools/custom_exceptions.py,sha256=BLP1gMPbTHSN-rqxzTawT4sRLiyAU3blBdkUBwiiPRk,2642
8
8
  folio_migration_tools/extradata_writer.py,sha256=fuchNcMc6BYb9IyfAcvXg7X4J2TfX6YiROfT2hr0JMw,1678
9
- folio_migration_tools/folder_structure.py,sha256=yqeeB1uADw9J6TCHUxKOv7wrTglfAeX9KpwNgOHjTg0,6917
9
+ folio_migration_tools/folder_structure.py,sha256=ExrXNEWvCB5QMH17kQSyTDQ04thq--t8_p3F_iuyf0k,6776
10
10
  folio_migration_tools/helper.py,sha256=Jb-9PrMkgOUGYScRf8jMmGGTcPIohm3eFHenGSi3cUA,2979
11
11
  folio_migration_tools/holdings_helper.py,sha256=yJpz6aJrKRBiJ1MtT5bs2vXAc88uJuGh2_KDuCySOKc,7559
12
12
  folio_migration_tools/i18n_config.py,sha256=3AH_2b9zTsxE4XTe4isM_zYtPJSlK0ix6eBmV7kAYUM,228
@@ -24,20 +24,18 @@ folio_migration_tools/mapping_file_transformation/organization_mapper.py,sha256=
24
24
  folio_migration_tools/mapping_file_transformation/ref_data_mapping.py,sha256=rROcBiL5TE7bWsJ95A6shurPZ1e4In6PTwR5BN9amzU,8991
25
25
  folio_migration_tools/mapping_file_transformation/user_mapper.py,sha256=13cvFr7Vp6uxZNpAmLxGvPVLC1_En2NVvLtuP75HAzU,8846
26
26
  folio_migration_tools/marc_rules_transformation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
- folio_migration_tools/marc_rules_transformation/conditions.py,sha256=zzl18PsqUaXsT5mcolPC89ar4RpQTs_beKAgVg4bttI,48190
27
+ folio_migration_tools/marc_rules_transformation/conditions.py,sha256=-5U6nBGcBO49C9MMyxOL2wMhHGxUawkIM9e-MwNaM_4,46938
28
28
  folio_migration_tools/marc_rules_transformation/holdings_statementsparser.py,sha256=-mOGtoPa3qmEqGWtyBTN-fQ743ZmT8caDLc9ES9J74Y,13667
29
29
  folio_migration_tools/marc_rules_transformation/hrid_handler.py,sha256=WudBOzCwcJAuhEm4urLhAk5OQWGfbKz9_4Ou8fmjm1E,10022
30
30
  folio_migration_tools/marc_rules_transformation/loc_language_codes.xml,sha256=ztn2_yKws6qySL4oSsZh7sOjxq5bCC1PhAnXJdtgmJ0,382912
31
31
  folio_migration_tools/marc_rules_transformation/marc_file_processor.py,sha256=o03d_G-4MR4e5VPfu7ljxAVDl79o2ONpQIqQ-V2RCdA,12523
32
32
  folio_migration_tools/marc_rules_transformation/marc_reader_wrapper.py,sha256=9ATjYMRAjy0QcXtmNZaHVhHLJ5hE1WUgOcF6KMJjbgo,5309
33
- folio_migration_tools/marc_rules_transformation/rules_mapper_authorities.py,sha256=fhe1vTt9F5u5PJliklSGP3TPZHRO3KV8GHeb-pdLdnA,9622
34
- folio_migration_tools/marc_rules_transformation/rules_mapper_base.py,sha256=ijOs9r0Mcx0XyNhDAq7fw1aFJ-JnAEhHx98-t262uRo,46158
33
+ folio_migration_tools/marc_rules_transformation/rules_mapper_base.py,sha256=KxyZjizbLwwAY2PfMSyh6u_mVTpfhyvdaii_PlpLscw,45857
35
34
  folio_migration_tools/marc_rules_transformation/rules_mapper_bibs.py,sha256=F8tKn59zHUV3Gqa9NY-JvTbWgfDjNTcPvQONk8gzwGs,30428
36
35
  folio_migration_tools/marc_rules_transformation/rules_mapper_holdings.py,sha256=YILyEfO-LkQPk-4OjiuY68X5xDA0LlI7UUp7_mvzLUE,29184
37
36
  folio_migration_tools/migration_report.py,sha256=B8e4tMfT0xCJ3BxkSg7ZZJYmg0VLQVXmmVnWwmojZD4,4260
38
37
  folio_migration_tools/migration_tasks/__init__.py,sha256=ZkbY_yGyB84Ke8OMlYUzyyBj4cxxNrhMTwQlu_GbdDs,211
39
- folio_migration_tools/migration_tasks/authority_transformer.py,sha256=tB9XBJn5BPJ1Xa79R9blDz31jN4UvLB1VFbyFjFjfM4,4228
40
- folio_migration_tools/migration_tasks/batch_poster.py,sha256=x3DQPrI1QnRtg9Bdf-e3ztv4llWPt5JpeCIyE7mMNWU,50634
38
+ folio_migration_tools/migration_tasks/batch_poster.py,sha256=dIsqqjizuJN0BOj98uK4EsAQgdCcQwMwvK2CUtifd7w,46120
41
39
  folio_migration_tools/migration_tasks/bibs_transformer.py,sha256=zPxh2tjyqx88fuH1FuKLwhT6lhZ5fVTQAqE08IggYgM,6351
42
40
  folio_migration_tools/migration_tasks/courses_migrator.py,sha256=sKIeyUlc7o189lw88XbGILVkwnR9krqO0PgS-vLCCm8,7039
43
41
  folio_migration_tools/migration_tasks/holdings_csv_transformer.py,sha256=JzOufqjSR2V-gUvOq0pdQFsXjpxk1ldGJBQWIWGfCps,21915
@@ -57,8 +55,8 @@ folio_migration_tools/transaction_migration/legacy_loan.py,sha256=A5qvThfP3g62Yn
57
55
  folio_migration_tools/transaction_migration/legacy_request.py,sha256=Kv7jpBIuZ_qyay8BdaeCPJID67l43Cl6x-ws9Lt49NI,6121
58
56
  folio_migration_tools/transaction_migration/legacy_reserve.py,sha256=qzw0okg4axAE_ezXopP9gFsQ_e60o0zh7zqRzFBSWHY,1806
59
57
  folio_migration_tools/transaction_migration/transaction_result.py,sha256=cTdCN0BnlI9_ZJB2Z3Fdkl9gpymIi-9mGZsRFlQcmDk,656
60
- folio_migration_tools/translations/en.json,sha256=4Ac66PR5Y78ll4_grhBm2IdTMsoZUv0q3IJvX8SQiJI,41778
61
- folio_migration_tools-1.10.0b3.dist-info/WHEEL,sha256=eh7sammvW2TypMMMGKgsM83HyA_3qQ5Lgg3ynoecH3M,79
62
- folio_migration_tools-1.10.0b3.dist-info/entry_points.txt,sha256=mJRRiCNP9j7_NpVXamHEiW8pDEjWQs1vEqD89G354cM,79
63
- folio_migration_tools-1.10.0b3.dist-info/METADATA,sha256=SgteVRYKSUGrgKVAHV8y4706ylBe7LiFsp4tXtyBxOg,7162
64
- folio_migration_tools-1.10.0b3.dist-info/RECORD,,
58
+ folio_migration_tools/translations/en.json,sha256=pS7dhHmj4XBqTcFNIcqFgRMY557fQan1RomdNg6PtdA,40941
59
+ folio_migration_tools-1.10.0b4.dist-info/WHEEL,sha256=eh7sammvW2TypMMMGKgsM83HyA_3qQ5Lgg3ynoecH3M,79
60
+ folio_migration_tools-1.10.0b4.dist-info/entry_points.txt,sha256=mJRRiCNP9j7_NpVXamHEiW8pDEjWQs1vEqD89G354cM,79
61
+ folio_migration_tools-1.10.0b4.dist-info/METADATA,sha256=JlJpqIeLAOLOFk0kqT7_ZcS5pO-0o-NgaXlBPt2Kh7A,7162
62
+ folio_migration_tools-1.10.0b4.dist-info/RECORD,,
@@ -1,242 +0,0 @@
1
- """The default mapper, responsible for parsing MARC21 records acording to the
2
- FOLIO community specifications"""
3
-
4
- import logging
5
- import re
6
- import time
7
- import uuid
8
- from typing import List
9
-
10
- import i18n
11
- import pymarc
12
- from folio_uuid.folio_namespaces import FOLIONamespaces
13
- from folio_uuid.folio_uuid import FolioUUID
14
- from folioclient import FolioClient
15
- from pymarc import Leader, Record
16
-
17
- from folio_migration_tools.custom_exceptions import TransformationProcessError
18
- from folio_migration_tools.helper import Helper
19
- from folio_migration_tools.library_configuration import (
20
- FileDefinition,
21
- IlsFlavour,
22
- LibraryConfiguration,
23
- )
24
- from folio_migration_tools.marc_rules_transformation.conditions import Conditions
25
- from folio_migration_tools.marc_rules_transformation.hrid_handler import HRIDHandler
26
- from folio_migration_tools.marc_rules_transformation.rules_mapper_base import (
27
- RulesMapperBase,
28
- )
29
-
30
-
31
- class AuthorityMapper(RulesMapperBase):
32
- non_repatable_fields = [
33
- "100",
34
- "110",
35
- "111",
36
- "130",
37
- "147",
38
- "148",
39
- "150",
40
- "151",
41
- "155",
42
- "162",
43
- "180",
44
- "181",
45
- "182",
46
- "185",
47
- "378",
48
- "384",
49
- ]
50
- """_summary_
51
-
52
- Args:
53
- RulesMapperBase (_type_): _description_
54
- """
55
-
56
- def __init__(
57
- self,
58
- folio_client,
59
- library_configuration: LibraryConfiguration,
60
- task_configuration,
61
- ):
62
- super().__init__(
63
- folio_client,
64
- library_configuration,
65
- task_configuration,
66
- None,
67
- self.get_authority_json_schema(folio_client, library_configuration),
68
- Conditions(folio_client, self, "auth", library_configuration.folio_release),
69
- )
70
- self.srs_recs: list = []
71
- logging.info("Fetching mapping rules from the tenant")
72
- rules_endpoint = "/mapping-rules/marc-authority"
73
- self.mappings = self.folio_client.folio_get_single_object(rules_endpoint)
74
- self.source_file_mapping: dict = {}
75
- self.setup_source_file_mapping()
76
- self.start = time.time()
77
-
78
- def get_legacy_ids(self, marc_record: Record, idx: int) -> List[str]:
79
- ils_flavour: IlsFlavour = self.task_configuration.ils_flavour
80
- if ils_flavour in {IlsFlavour.sierra, IlsFlavour.millennium}:
81
- raise TransformationProcessError("", f"ILS {ils_flavour} not configured")
82
- elif ils_flavour == IlsFlavour.tag907y:
83
- return RulesMapperBase.get_bib_id_from_907y(marc_record, idx)
84
- elif ils_flavour == IlsFlavour.tagf990a:
85
- return RulesMapperBase.get_bib_id_from_990a(marc_record, idx)
86
- elif ils_flavour == IlsFlavour.aleph:
87
- raise TransformationProcessError("", f"ILS {ils_flavour} not configured")
88
- elif ils_flavour in {IlsFlavour.voyager, "voyager", IlsFlavour.tag001}:
89
- return RulesMapperBase.get_bib_id_from_001(marc_record, idx)
90
- elif ils_flavour == IlsFlavour.koha:
91
- raise TransformationProcessError("", f"ILS {ils_flavour} not configured")
92
- elif ils_flavour == IlsFlavour.none:
93
- return [str(uuid.uuid4())]
94
- else:
95
- raise TransformationProcessError("", f"ILS {ils_flavour} not configured")
96
-
97
- def parse_record(
98
- self, marc_record: pymarc.Record, file_def: FileDefinition, legacy_ids: List[str]
99
- ) -> list[dict]:
100
- """Parses an auth recod into a FOLIO Authority object
101
- This is the main function
102
-
103
- Args:
104
- legacy_ids (_type_): _description_
105
- marc_record (Record): _description_
106
- file_def (FileDefinition): _description_
107
-
108
- Returns:
109
- dict: _description_
110
- """
111
- self.print_progress()
112
- ignored_subsequent_fields: set = set()
113
- bad_tags = set(self.task_configuration.tags_to_delete) # "907"
114
- folio_authority = self.perform_initial_preparation(marc_record, legacy_ids)
115
- for marc_field in marc_record:
116
- self.report_marc_stats(marc_field, bad_tags, legacy_ids, ignored_subsequent_fields)
117
- if marc_field.tag not in ignored_subsequent_fields:
118
- self.process_marc_field(
119
- folio_authority,
120
- marc_field,
121
- ignored_subsequent_fields,
122
- legacy_ids,
123
- )
124
-
125
- self.perform_additional_parsing(folio_authority)
126
- clean_folio_authority = self.validate_required_properties(
127
- "-".join(legacy_ids), folio_authority, self.schema, FOLIONamespaces.instances
128
- )
129
- self.dedupe_rec(clean_folio_authority)
130
- marc_record.remove_fields(*list(bad_tags))
131
- self.report_folio_mapping(clean_folio_authority, self.schema)
132
- return [clean_folio_authority]
133
-
134
- def perform_initial_preparation(self, marc_record: pymarc.Record, legacy_ids):
135
- folio_authority = {}
136
- folio_authority["id"] = str(
137
- FolioUUID(
138
- self.base_string_for_folio_uuid,
139
- FOLIONamespaces.authorities,
140
- str(legacy_ids[-1]),
141
- )
142
- )
143
- HRIDHandler.handle_035_generation(
144
- marc_record, legacy_ids, self.migration_report, False, False
145
- )
146
- self.map_source_file_and_natural_id(marc_record, folio_authority)
147
- self.handle_leader_17(marc_record, legacy_ids)
148
- return folio_authority
149
-
150
- def map_source_file_and_natural_id(self, marc_record, folio_authority):
151
- """Implement source file and natural ID mappings according to MODDICORE-283"""
152
- match_prefix_patt = re.compile("^[A-Za-z]+")
153
- natural_id = None
154
- source_file_id = None
155
- has_010 = marc_record.get("010")
156
- if has_010 and (has_010a := has_010.get_subfields("a")):
157
- for a_subfield in has_010a:
158
- natural_id_prefix = match_prefix_patt.match(a_subfield)
159
- if natural_id_prefix and (
160
- source_file := self.source_file_mapping.get(natural_id_prefix.group(0), None)
161
- ):
162
- natural_id = "".join(a_subfield.split())
163
- source_file_id = source_file["id"]
164
- self.migration_report.add_general_statistics(
165
- i18n.t("naturalId mapped from %{fro}", fro="010$a")
166
- )
167
- self.migration_report.add(
168
- "AuthoritySourceFileMapping",
169
- f"{source_file['name']} -- {natural_id_prefix.group(0)} -- 010$a",
170
- number=1,
171
- )
172
- break
173
- if not source_file_id:
174
- natural_id = "".join(marc_record["001"].data.split())
175
- self.migration_report.add_general_statistics(
176
- i18n.t("naturalId mapped from %{fro}", fro="001")
177
- )
178
- natural_id_prefix = match_prefix_patt.match(natural_id)
179
- if natural_id_prefix:
180
- if source_file := self.source_file_mapping.get(natural_id_prefix.group(0), None):
181
- source_file_id = source_file["id"]
182
- self.migration_report.add(
183
- "AuthoritySourceFileMapping",
184
- f"{source_file['name']} -- {natural_id_prefix.group(0)} -- 001",
185
- number=1,
186
- )
187
- folio_authority["naturalId"] = natural_id
188
- if source_file_id:
189
- folio_authority["sourceFileId"] = source_file_id
190
-
191
- def setup_source_file_mapping(self):
192
- if self.folio_client.authority_source_files:
193
- logging.info(
194
- f"{len(self.folio_client.authority_source_files)} \tAuthority source files"
195
- )
196
- for source_file in self.folio_client.authority_source_files:
197
- for sf_code in source_file.get("codes", []):
198
- self.source_file_mapping[sf_code] = source_file
199
-
200
- def handle_leader_17(self, marc_record, legacy_ids):
201
- leader_17 = marc_record.leader[17] or "Empty"
202
- self.migration_report.add(
203
- "AuthorityEncodingLevel", i18n.t("Original value") + f": {leader_17}"
204
- )
205
- if leader_17 not in ["n", "o"]:
206
- Helper.log_data_issue(
207
- legacy_ids,
208
- f"LDR pos. 17 is '{leader_17}'. Is this correct? Value has been changed to 'n'.",
209
- marc_record.leader,
210
- )
211
- marc_record.leader = Leader(f"{marc_record.leader[:17]}n{marc_record.leader[18:]}")
212
- self.migration_report.add(
213
- "AuthorityEncodingLevel", i18n.t("Changed %{a} to %{b}", a=leader_17, b="n")
214
- )
215
-
216
- def perform_additional_parsing(
217
- self,
218
- folio_authority: dict,
219
- ) -> None:
220
- """Do stuff not easily captured by the mapping rules
221
-
222
- Args:
223
- folio_authority (dict): _description_
224
- """
225
- folio_authority["source"] = "MARC"
226
-
227
- def get_authority_json_schema(self, folio_client: FolioClient, library_configuration):
228
- """Fetches the JSON Schema for autorities"""
229
- if library_configuration.folio_release.name.lower()[0] < "p":
230
- schema = folio_client.get_from_github(
231
- "folio-org", "mod-inventory-storage", "/ramls/authorities/authority.json"
232
- )
233
- else:
234
- schema = folio_client.get_from_github(
235
- "folio-org",
236
- "mod-entities-links",
237
- "/src/main/resources/swagger.api/schemas/authority-storage/authorityDto.yaml",
238
- )
239
- return schema
240
-
241
- def wrap_up(self):
242
- logging.info("Mapper wrapping up")
@@ -1,118 +0,0 @@
1
- import logging
2
- from typing import Annotated
3
- from typing import List
4
- import i18n
5
-
6
- from folio_uuid.folio_namespaces import FOLIONamespaces
7
- from pydantic import Field
8
-
9
- from folio_migration_tools.helper import Helper
10
- from folio_migration_tools.library_configuration import FileDefinition
11
- from folio_migration_tools.library_configuration import IlsFlavour
12
- from folio_migration_tools.library_configuration import LibraryConfiguration
13
- from folio_migration_tools.marc_rules_transformation.marc_file_processor import (
14
- MarcFileProcessor,
15
- )
16
- from folio_migration_tools.marc_rules_transformation.rules_mapper_authorities import (
17
- AuthorityMapper,
18
- )
19
- from folio_migration_tools.migration_tasks.migration_task_base import MigrationTaskBase
20
- from folio_migration_tools.task_configuration import AbstractTaskConfiguration
21
-
22
-
23
- class AuthorityTransformer(MigrationTaskBase):
24
- class TaskConfiguration(AbstractTaskConfiguration):
25
- name: Annotated[
26
- str,
27
- Field(
28
- description=(
29
- "Name of this migration task. The name is being used to call the specific "
30
- "task, and to distinguish tasks of similar types"
31
- )
32
- ),
33
- ]
34
- migration_task_type: Annotated[
35
- str,
36
- Field(
37
- title="Migration task type",
38
- description=("The type of migration task you want to perform"),
39
- ),
40
- ]
41
- files: Annotated[
42
- List[FileDefinition],
43
- Field(
44
- title="Source files", description=("List of MARC21 files with authority records")
45
- ),
46
- ]
47
- ils_flavour: Annotated[
48
- IlsFlavour,
49
- Field(
50
- title="ILS flavour", description="The type of ILS you are migrating records from."
51
- ),
52
- ]
53
- tags_to_delete: Annotated[
54
- List[str],
55
- Field(
56
- title="Tags to delete from MARC record",
57
- description=(
58
- "Tags in the incoming MARC authority that the process should remove "
59
- "before adding them into FOLIO. These tags will be used in the "
60
- "transformation before getting removed."
61
- ),
62
- ),
63
- ] = []
64
- create_source_records: Annotated[
65
- bool,
66
- Field(
67
- title="Create source records",
68
- description=(
69
- "Controls wheter or not to retain the MARC records in Source Record Storage."
70
- ),
71
- ),
72
- ] = True
73
-
74
- @staticmethod
75
- def get_object_type() -> FOLIONamespaces:
76
- return FOLIONamespaces.authorities
77
-
78
- def __init__(
79
- self,
80
- task_config: TaskConfiguration,
81
- library_config: LibraryConfiguration,
82
- use_logging: bool = True,
83
- ):
84
- super().__init__(library_config, task_config, use_logging)
85
- self.processor: MarcFileProcessor
86
- self.check_source_files(
87
- self.folder_structure.legacy_records_folder, self.task_configuration.files
88
- )
89
- self.mapper: AuthorityMapper = AuthorityMapper(
90
- self.folio_client, library_config, task_config
91
- )
92
- self.auth_ids: set = set()
93
- logging.info("Init done")
94
-
95
- def do_work(self):
96
- self.do_work_marc_transformer()
97
-
98
- def wrap_up(self):
99
- logging.info("Done. Transformer Wrapping up...")
100
- self.extradata_writer.flush()
101
- self.processor.wrap_up()
102
- with open(self.folder_structure.migration_reports_file, "w+") as report_file:
103
- self.mapper.migration_report.write_migration_report(
104
- i18n.t("Authority records transformation report"),
105
- report_file,
106
- self.start_datetime,
107
- )
108
- Helper.print_mapping_report(
109
- report_file,
110
- self.mapper.parsed_records,
111
- self.mapper.mapped_folio_fields,
112
- self.mapper.mapped_legacy_fields,
113
- )
114
- logging.info(
115
- "Done. Transformation report written to %s",
116
- self.folder_structure.migration_reports_file.name,
117
- )
118
- self.clean_out_empty_logs()