folio-migration-tools 1.10.0b3__py3-none-any.whl → 1.10.0b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- folio_migration_tools/__main__.py +9 -0
- folio_migration_tools/folder_structure.py +0 -3
- folio_migration_tools/marc_rules_transformation/conditions.py +0 -29
- folio_migration_tools/marc_rules_transformation/rules_mapper_base.py +0 -6
- folio_migration_tools/migration_tasks/batch_poster.py +213 -292
- folio_migration_tools/translations/en.json +0 -7
- {folio_migration_tools-1.10.0b3.dist-info → folio_migration_tools-1.10.0b4.dist-info}/METADATA +2 -2
- {folio_migration_tools-1.10.0b3.dist-info → folio_migration_tools-1.10.0b4.dist-info}/RECORD +10 -12
- folio_migration_tools/marc_rules_transformation/rules_mapper_authorities.py +0 -242
- folio_migration_tools/migration_tasks/authority_transformer.py +0 -118
- {folio_migration_tools-1.10.0b3.dist-info → folio_migration_tools-1.10.0b4.dist-info}/WHEEL +0 -0
- {folio_migration_tools-1.10.0b3.dist-info → folio_migration_tools-1.10.0b4.dist-info}/entry_points.txt +0 -0
|
@@ -4,6 +4,7 @@ import logging
|
|
|
4
4
|
import sys
|
|
5
5
|
from os import environ
|
|
6
6
|
from pathlib import Path
|
|
7
|
+
from warnings import warn
|
|
7
8
|
|
|
8
9
|
import httpx
|
|
9
10
|
import humps
|
|
@@ -126,6 +127,14 @@ def main():
|
|
|
126
127
|
i18n.set("locale", args.report_language)
|
|
127
128
|
config_file, library_config = prep_library_config(args)
|
|
128
129
|
try:
|
|
130
|
+
if args.task_name == "AuthorityTransformer":
|
|
131
|
+
warn(
|
|
132
|
+
"The AuthorityTransformer has been removed."
|
|
133
|
+
" Please update your configuration accordingly."
|
|
134
|
+
" Use Data Import to load authority records.",
|
|
135
|
+
DeprecationWarning,
|
|
136
|
+
stacklevel=2,
|
|
137
|
+
)
|
|
129
138
|
migration_task_config = next(
|
|
130
139
|
t for t in config_file["migration_tasks"] if t["name"] == args.task_name
|
|
131
140
|
)
|
|
@@ -110,9 +110,6 @@ class FolderStructure:
|
|
|
110
110
|
self.instance_id_map_path = (
|
|
111
111
|
self.results_folder / f"{str(FOLIONamespaces.instances.name).lower()}_id_map.json"
|
|
112
112
|
)
|
|
113
|
-
self.auth_id_map_path = (
|
|
114
|
-
self.results_folder / f"{str(FOLIONamespaces.authorities.name).lower()}_id_map.json"
|
|
115
|
-
)
|
|
116
113
|
|
|
117
114
|
self.holdings_id_map_path = (
|
|
118
115
|
self.results_folder / f"{str(FOLIONamespaces.holdings.name).lower()}_id_map.json"
|
|
@@ -47,8 +47,6 @@ class Conditions:
|
|
|
47
47
|
if object_type == "bibs":
|
|
48
48
|
self.setup_reference_data_for_all()
|
|
49
49
|
self.setup_reference_data_for_bibs()
|
|
50
|
-
elif object_type == "auth":
|
|
51
|
-
self.setup_reference_data_for_auth()
|
|
52
50
|
else:
|
|
53
51
|
self.setup_reference_data_for_all()
|
|
54
52
|
self.setup_reference_data_for_items_and_holdings(default_call_number_type_name)
|
|
@@ -150,15 +148,6 @@ class Conditions:
|
|
|
150
148
|
if not self.folio.class_types:
|
|
151
149
|
raise TransformationProcessError("", "No class_types in FOLIO")
|
|
152
150
|
|
|
153
|
-
def setup_reference_data_for_auth(self):
|
|
154
|
-
self.authority_note_types = list(
|
|
155
|
-
self.folio.folio_get_all(
|
|
156
|
-
"/authority-note-types", "authorityNoteTypes", self.folio.cql_all, 1000
|
|
157
|
-
)
|
|
158
|
-
)
|
|
159
|
-
logging.info(f"{len(self.authority_note_types)} \tAuthority note types")
|
|
160
|
-
logging.info(f"{len(self.folio.identifier_types)} \tidentifier types") # type: ignore
|
|
161
|
-
|
|
162
151
|
def get_condition(
|
|
163
152
|
self, name, legacy_id, value, parameter=None, marc_field: field.Field | None = None
|
|
164
153
|
):
|
|
@@ -430,24 +419,6 @@ class Conditions:
|
|
|
430
419
|
parameter.get("name", ""),
|
|
431
420
|
) from ee
|
|
432
421
|
|
|
433
|
-
def condition_set_authority_note_type_id(
|
|
434
|
-
self, legacy_id, _, parameter, marc_field: field.Field
|
|
435
|
-
):
|
|
436
|
-
try:
|
|
437
|
-
t = self.get_ref_data_tuple_by_name(
|
|
438
|
-
self.authority_note_types, "authority_note_types", parameter["name"]
|
|
439
|
-
)
|
|
440
|
-
self.mapper.migration_report.add("MappedNoteTypes", t[1])
|
|
441
|
-
return t[0]
|
|
442
|
-
except Exception as ee:
|
|
443
|
-
logging.error(ee)
|
|
444
|
-
raise TransformationProcessError(
|
|
445
|
-
legacy_id,
|
|
446
|
-
f"Authority note type mapping error.\tParameter: {parameter.get('name', '')}\t"
|
|
447
|
-
f"MARC Field: {marc_field}. Is mapping rules and ref data aligned?",
|
|
448
|
-
parameter.get("name", ""),
|
|
449
|
-
) from ee
|
|
450
|
-
|
|
451
422
|
def condition_set_classification_type_id(
|
|
452
423
|
self, legacy_id, value, parameter, marc_field: field.Field
|
|
453
424
|
):
|
|
@@ -962,7 +962,6 @@ class RulesMapperBase(MapperBase):
|
|
|
962
962
|
srs_types = {
|
|
963
963
|
FOLIONamespaces.holdings: FOLIONamespaces.srs_records_holdingsrecord,
|
|
964
964
|
FOLIONamespaces.instances: FOLIONamespaces.srs_records_bib,
|
|
965
|
-
FOLIONamespaces.authorities: FOLIONamespaces.srs_records_auth,
|
|
966
965
|
FOLIONamespaces.edifact: FOLIONamespaces.srs_records_edifact,
|
|
967
966
|
}
|
|
968
967
|
|
|
@@ -1020,7 +1019,6 @@ class RulesMapperBase(MapperBase):
|
|
|
1020
1019
|
record_types = {
|
|
1021
1020
|
FOLIONamespaces.holdings: "MARC_HOLDING",
|
|
1022
1021
|
FOLIONamespaces.instances: "MARC_BIB",
|
|
1023
|
-
FOLIONamespaces.authorities: "MARC_AUTHORITY",
|
|
1024
1022
|
FOLIONamespaces.edifact: "EDIFACT",
|
|
1025
1023
|
}
|
|
1026
1024
|
|
|
@@ -1033,10 +1031,6 @@ class RulesMapperBase(MapperBase):
|
|
|
1033
1031
|
"holdingsId": folio_object["id"],
|
|
1034
1032
|
"holdingsHrid": folio_object.get("hrid", ""),
|
|
1035
1033
|
},
|
|
1036
|
-
FOLIONamespaces.authorities: {
|
|
1037
|
-
"authorityId": folio_object["id"],
|
|
1038
|
-
"authorityHrid": marc_record["001"].data,
|
|
1039
|
-
},
|
|
1040
1034
|
FOLIONamespaces.edifact: {},
|
|
1041
1035
|
}
|
|
1042
1036
|
|
|
@@ -7,11 +7,14 @@ import sys
|
|
|
7
7
|
import time
|
|
8
8
|
import traceback
|
|
9
9
|
from datetime import datetime, timezone
|
|
10
|
-
from typing import Annotated, List, Optional
|
|
10
|
+
from typing import TYPE_CHECKING, Annotated, List, Optional
|
|
11
11
|
from uuid import uuid4
|
|
12
12
|
|
|
13
|
-
import
|
|
13
|
+
import folioclient
|
|
14
14
|
import i18n
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from httpx import Response
|
|
15
18
|
from folio_uuid.folio_namespaces import FOLIONamespaces
|
|
16
19
|
from pydantic import Field
|
|
17
20
|
|
|
@@ -73,7 +76,7 @@ class BatchPoster(MigrationTaskBase):
|
|
|
73
76
|
description=(
|
|
74
77
|
"The type of object being migrated"
|
|
75
78
|
"Examples of possible values: "
|
|
76
|
-
"'Extradata', '
|
|
79
|
+
"'Extradata', 'Instances', 'Holdings', 'Items'"
|
|
77
80
|
),
|
|
78
81
|
),
|
|
79
82
|
]
|
|
@@ -245,90 +248,80 @@ class BatchPoster(MigrationTaskBase):
|
|
|
245
248
|
self.failed_fields: set = set()
|
|
246
249
|
self.num_failures = 0
|
|
247
250
|
self.num_posted = 0
|
|
248
|
-
self.okapi_headers = self.folio_client.okapi_headers
|
|
249
|
-
self.http_client = None
|
|
250
251
|
self.starting_record_count_in_folio: Optional[int] = None
|
|
251
252
|
self.finished_record_count_in_folio: Optional[int] = None
|
|
252
253
|
|
|
253
254
|
def do_work(self): # noqa: C901
|
|
254
|
-
with
|
|
255
|
-
self.
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
self.post_extra_data(
|
|
275
|
-
row, self.processed, failed_recs_file
|
|
276
|
-
)
|
|
277
|
-
elif not self.api_info["is_batch"]:
|
|
278
|
-
self.post_single_records(
|
|
279
|
-
row, self.processed, failed_recs_file
|
|
280
|
-
)
|
|
281
|
-
else:
|
|
282
|
-
batch = self.post_record_batch(
|
|
283
|
-
batch, failed_recs_file, row
|
|
284
|
-
)
|
|
285
|
-
except UnicodeDecodeError as unicode_error:
|
|
286
|
-
self.handle_unicode_error(unicode_error, last_row)
|
|
287
|
-
except TransformationProcessError as tpe:
|
|
288
|
-
self.handle_generic_exception(
|
|
289
|
-
tpe,
|
|
290
|
-
last_row,
|
|
291
|
-
batch,
|
|
292
|
-
self.processed,
|
|
293
|
-
failed_recs_file,
|
|
255
|
+
with open(
|
|
256
|
+
self.folder_structure.failed_recs_path, "w", encoding="utf-8"
|
|
257
|
+
) as failed_recs_file:
|
|
258
|
+
self.get_starting_record_count()
|
|
259
|
+
try:
|
|
260
|
+
batch = []
|
|
261
|
+
for idx, file_def in enumerate(self.task_configuration.files): # noqa: B007
|
|
262
|
+
path = self.folder_structure.results_folder / file_def.file_name
|
|
263
|
+
with open(path) as rows:
|
|
264
|
+
logging.info("Running %s", path)
|
|
265
|
+
last_row = ""
|
|
266
|
+
for self.processed, row in enumerate(rows, start=1):
|
|
267
|
+
last_row = row
|
|
268
|
+
if row.strip():
|
|
269
|
+
try:
|
|
270
|
+
if self.task_configuration.object_type == "Extradata":
|
|
271
|
+
self.post_extra_data(row, self.processed, failed_recs_file)
|
|
272
|
+
elif not self.api_info["is_batch"]:
|
|
273
|
+
self.post_single_records(
|
|
274
|
+
row, self.processed, failed_recs_file
|
|
294
275
|
)
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
self.handle_generic_exception(
|
|
299
|
-
exception,
|
|
300
|
-
last_row,
|
|
301
|
-
batch,
|
|
302
|
-
self.processed,
|
|
303
|
-
failed_recs_file,
|
|
276
|
+
else:
|
|
277
|
+
batch = self.post_record_batch(
|
|
278
|
+
batch, failed_recs_file, row
|
|
304
279
|
)
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
280
|
+
except UnicodeDecodeError as unicode_error:
|
|
281
|
+
self.handle_unicode_error(unicode_error, last_row)
|
|
282
|
+
except TransformationProcessError as tpe:
|
|
283
|
+
self.handle_generic_exception(
|
|
284
|
+
tpe,
|
|
285
|
+
last_row,
|
|
286
|
+
batch,
|
|
287
|
+
self.processed,
|
|
288
|
+
failed_recs_file,
|
|
289
|
+
)
|
|
290
|
+
batch = []
|
|
291
|
+
raise
|
|
292
|
+
except TransformationRecordFailedError as exception:
|
|
293
|
+
self.handle_generic_exception(
|
|
294
|
+
exception,
|
|
295
|
+
last_row,
|
|
296
|
+
batch,
|
|
297
|
+
self.processed,
|
|
298
|
+
failed_recs_file,
|
|
299
|
+
)
|
|
300
|
+
batch = []
|
|
301
|
+
except (FileNotFoundError, PermissionError) as ose:
|
|
302
|
+
logging.error("Error reading file: %s", ose)
|
|
303
|
+
|
|
304
|
+
except Exception as ee:
|
|
305
|
+
if "idx" in locals() and self.task_configuration.files[idx:]:
|
|
306
|
+
for file_def in self.task_configuration.files[idx:]:
|
|
307
|
+
path = self.folder_structure.results_folder / file_def.file_name
|
|
323
308
|
try:
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
309
|
+
with open(path, "r") as failed_file:
|
|
310
|
+
failed_file.seek(self.processed)
|
|
311
|
+
failed_recs_file.write(failed_file.read())
|
|
312
|
+
self.processed = 0
|
|
313
|
+
except (FileNotFoundError, PermissionError) as ose:
|
|
314
|
+
logging.error("Error reading file: %s", ose)
|
|
315
|
+
raise ee
|
|
316
|
+
finally:
|
|
317
|
+
if self.task_configuration.object_type != "Extradata" and any(batch):
|
|
318
|
+
try:
|
|
319
|
+
self.post_batch(batch, failed_recs_file, self.processed)
|
|
320
|
+
except Exception as exception:
|
|
321
|
+
self.handle_generic_exception(
|
|
322
|
+
exception, last_row, batch, self.processed, failed_recs_file
|
|
323
|
+
)
|
|
324
|
+
logging.info("Done posting %s records. ", self.processed)
|
|
332
325
|
|
|
333
326
|
@staticmethod
|
|
334
327
|
def set_consortium_source(json_rec):
|
|
@@ -366,26 +359,26 @@ class BatchPoster(MigrationTaskBase):
|
|
|
366
359
|
fetch_batch_size = 90
|
|
367
360
|
fetch_tasks = []
|
|
368
361
|
existing_records = {}
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
"
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
},
|
|
382
|
-
)
|
|
362
|
+
|
|
363
|
+
for i in range(0, len(batch), fetch_batch_size):
|
|
364
|
+
batch_slice = batch[i : i + fetch_batch_size]
|
|
365
|
+
fetch_tasks.append(
|
|
366
|
+
self.get_with_retry(
|
|
367
|
+
query_api,
|
|
368
|
+
params={
|
|
369
|
+
"query": (
|
|
370
|
+
f"id==({' OR '.join([r['id'] for r in batch_slice if 'id' in r])})"
|
|
371
|
+
),
|
|
372
|
+
"limit": fetch_batch_size,
|
|
373
|
+
},
|
|
383
374
|
)
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
responses = await asyncio.gather(*fetch_tasks)
|
|
384
378
|
|
|
385
|
-
|
|
379
|
+
for response in responses:
|
|
380
|
+
self.collect_existing_records_for_upsert(object_type, response, existing_records)
|
|
386
381
|
|
|
387
|
-
for response in responses:
|
|
388
|
-
self.collect_existing_records_for_upsert(object_type, response, existing_records)
|
|
389
382
|
for record in batch:
|
|
390
383
|
if record["id"] in existing_records:
|
|
391
384
|
self.prepare_record_for_upsert(record, existing_records[record["id"]])
|
|
@@ -421,18 +414,18 @@ class BatchPoster(MigrationTaskBase):
|
|
|
421
414
|
|
|
422
415
|
@staticmethod
|
|
423
416
|
def collect_existing_records_for_upsert(
|
|
424
|
-
object_type: str,
|
|
417
|
+
object_type: str, response_json: dict, existing_records: dict
|
|
425
418
|
):
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
419
|
+
"""
|
|
420
|
+
Collects existing records from API response into existing_records dict.
|
|
421
|
+
|
|
422
|
+
Args:
|
|
423
|
+
object_type: The key in response containing the records array
|
|
424
|
+
response_json: Parsed JSON response from API
|
|
425
|
+
existing_records: Dict to populate with {record_id: record_data}
|
|
426
|
+
"""
|
|
427
|
+
for record in response_json.get(object_type, []):
|
|
428
|
+
existing_records[record["id"]] = record
|
|
436
429
|
|
|
437
430
|
def handle_upsert_for_statistical_codes(self, updates: dict, keep_existing: dict):
|
|
438
431
|
if not self.task_configuration.preserve_statistical_codes:
|
|
@@ -510,31 +503,66 @@ class BatchPoster(MigrationTaskBase):
|
|
|
510
503
|
updates.update(keep_new)
|
|
511
504
|
new_record.update(updates)
|
|
512
505
|
|
|
513
|
-
async def get_with_retry(self,
|
|
506
|
+
async def get_with_retry(self, url: str, params=None):
|
|
507
|
+
"""
|
|
508
|
+
Wrapper around folio_get_async with selective retry logic.
|
|
509
|
+
|
|
510
|
+
Retries on:
|
|
511
|
+
- Connection errors (FolioConnectionError): Always retry
|
|
512
|
+
- Server errors (5xx): Transient failures
|
|
513
|
+
- Rate limiting (429): Too many requests
|
|
514
|
+
|
|
515
|
+
Does NOT retry on:
|
|
516
|
+
- Client errors (4xx except 429): Bad request, won't succeed on retry
|
|
517
|
+
"""
|
|
514
518
|
if params is None:
|
|
515
519
|
params = {}
|
|
516
520
|
retries = 3
|
|
521
|
+
|
|
517
522
|
for attempt in range(retries):
|
|
518
523
|
try:
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
return response
|
|
524
|
-
except httpx.HTTPError as e:
|
|
524
|
+
return await self.folio_client.folio_get_async(url, query_params=params)
|
|
525
|
+
|
|
526
|
+
except folioclient.FolioConnectionError as e:
|
|
527
|
+
# Network/connection errors - always retry
|
|
525
528
|
if attempt < retries - 1:
|
|
526
|
-
|
|
527
|
-
|
|
529
|
+
wait_time = 2**attempt
|
|
530
|
+
logging.warning(
|
|
531
|
+
f"Connection error, retrying in {wait_time}s "
|
|
532
|
+
f"(attempt {attempt + 1}/{retries}): {e}"
|
|
533
|
+
)
|
|
534
|
+
await asyncio.sleep(wait_time)
|
|
528
535
|
else:
|
|
529
|
-
logging.error(f"
|
|
536
|
+
logging.error(f"Connection failed after {retries} attempts: {e}")
|
|
537
|
+
raise
|
|
538
|
+
|
|
539
|
+
except folioclient.FolioHTTPError as e:
|
|
540
|
+
# HTTP errors - selective retry based on status code
|
|
541
|
+
status_code = e.response.status_code
|
|
542
|
+
should_retry = status_code >= 500 or status_code == 429
|
|
543
|
+
|
|
544
|
+
if should_retry and attempt < retries - 1:
|
|
545
|
+
# Longer wait for rate limiting
|
|
546
|
+
wait_time = 5 if status_code == 429 else 2**attempt
|
|
547
|
+
logging.warning(
|
|
548
|
+
f"HTTP {status_code} error, retrying in {wait_time}s "
|
|
549
|
+
f"(attempt {attempt + 1}/{retries}): {e}"
|
|
550
|
+
)
|
|
551
|
+
await asyncio.sleep(wait_time)
|
|
552
|
+
else:
|
|
553
|
+
# Either not retryable or out of attempts
|
|
554
|
+
if should_retry:
|
|
555
|
+
logging.error(
|
|
556
|
+
f"HTTP {status_code} error persisted after {retries} attempts: {e}"
|
|
557
|
+
)
|
|
558
|
+
else:
|
|
559
|
+
logging.error(f"HTTP {status_code} error (not retryable): {e}")
|
|
530
560
|
raise
|
|
531
561
|
|
|
532
562
|
def post_record_batch(self, batch, failed_recs_file, row):
|
|
533
563
|
json_rec = json.loads(row.split("\t")[-1])
|
|
534
564
|
if self.task_configuration.object_type == "ShadowInstances":
|
|
535
565
|
self.set_consortium_source(json_rec)
|
|
536
|
-
if self.task_configuration.object_type == "SRS":
|
|
537
|
-
json_rec["snapshotId"] = self.snapshot_id
|
|
538
566
|
if self.processed == 1:
|
|
539
567
|
logging.info(json.dumps(json_rec, indent=True))
|
|
540
568
|
batch.append(json_rec)
|
|
@@ -545,22 +573,29 @@ class BatchPoster(MigrationTaskBase):
|
|
|
545
573
|
|
|
546
574
|
def post_extra_data(self, row: str, num_records: int, failed_recs_file):
|
|
547
575
|
(object_name, data) = row.split("\t")
|
|
548
|
-
|
|
549
|
-
url = f"{self.folio_client.gateway_url}/{endpoint}"
|
|
576
|
+
url = self.get_extradata_endpoint(self.task_configuration, object_name, data)
|
|
550
577
|
body = data
|
|
551
|
-
|
|
552
|
-
|
|
578
|
+
try:
|
|
579
|
+
_ = self.folio_client.folio_post(url, payload=body)
|
|
553
580
|
self.num_posted += 1
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
581
|
+
except folioclient.FolioHTTPError as fhe:
|
|
582
|
+
if fhe.response.status_code == 422:
|
|
583
|
+
self.num_failures += 1
|
|
584
|
+
error_msg = json.loads(fhe.response.text)["errors"][0]["message"]
|
|
585
|
+
logging.error(
|
|
586
|
+
"Row %s\tHTTP %s\t %s", num_records, fhe.response.status_code, error_msg
|
|
587
|
+
)
|
|
588
|
+
if (
|
|
589
|
+
"id value already exists"
|
|
590
|
+
not in json.loads(fhe.response.text)["errors"][0]["message"]
|
|
591
|
+
):
|
|
592
|
+
failed_recs_file.write(row)
|
|
593
|
+
else:
|
|
594
|
+
self.num_failures += 1
|
|
595
|
+
logging.error(
|
|
596
|
+
"Row %s\tHTTP %s\t%s", num_records, fhe.response.status_code, fhe.response.text
|
|
597
|
+
)
|
|
559
598
|
failed_recs_file.write(row)
|
|
560
|
-
else:
|
|
561
|
-
self.num_failures += 1
|
|
562
|
-
logging.error("Row %s\tHTTP %s\t%s", num_records, response.status_code, response.text)
|
|
563
|
-
failed_recs_file.write(row)
|
|
564
599
|
if num_records % 50 == 0:
|
|
565
600
|
logging.info(
|
|
566
601
|
"%s records posted successfully. %s failed",
|
|
@@ -600,37 +635,37 @@ class BatchPoster(MigrationTaskBase):
|
|
|
600
635
|
def post_single_records(self, row: str, num_records: int, failed_recs_file):
|
|
601
636
|
if self.api_info["is_batch"]:
|
|
602
637
|
raise TypeError("This record type supports batch processing, use post_batch method")
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
if response.status_code == 201:
|
|
638
|
+
url = self.api_info.get("api_endpoint")
|
|
639
|
+
try:
|
|
640
|
+
_ = self.folio_client.folio_post(url, payload=row)
|
|
607
641
|
self.num_posted += 1
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
642
|
+
except folioclient.FolioHTTPError as fhe:
|
|
643
|
+
if fhe.response.status_code == 422:
|
|
644
|
+
self.num_failures += 1
|
|
645
|
+
error_msg = json.loads(fhe.response.text)["errors"][0]["message"]
|
|
646
|
+
logging.error(
|
|
647
|
+
"Row %s\tHTTP %s\t %s", num_records, fhe.response.status_code, error_msg
|
|
648
|
+
)
|
|
649
|
+
if (
|
|
650
|
+
"id value already exists"
|
|
651
|
+
not in json.loads(fhe.response.text)["errors"][0]["message"]
|
|
652
|
+
):
|
|
653
|
+
failed_recs_file.write(row)
|
|
654
|
+
else:
|
|
655
|
+
self.num_failures += 1
|
|
656
|
+
logging.error(
|
|
657
|
+
"Row %s\tHTTP %s\t%s",
|
|
658
|
+
num_records,
|
|
659
|
+
fhe.response.status_code,
|
|
660
|
+
fhe.response.text,
|
|
661
|
+
)
|
|
613
662
|
failed_recs_file.write(row)
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
"%s records posted successfully. %s failed",
|
|
621
|
-
self.num_posted,
|
|
622
|
-
self.num_failures,
|
|
623
|
-
)
|
|
624
|
-
|
|
625
|
-
def post_objects(self, url, body):
|
|
626
|
-
if self.http_client and not self.http_client.is_closed:
|
|
627
|
-
return self.http_client.post(
|
|
628
|
-
url, data=body.encode("utf-8"), headers=self.folio_client.okapi_headers
|
|
629
|
-
)
|
|
630
|
-
else:
|
|
631
|
-
return httpx.post(
|
|
632
|
-
url, headers=self.okapi_headers, data=body.encode("utf-8"), timeout=None
|
|
633
|
-
)
|
|
663
|
+
if num_records % 50 == 0:
|
|
664
|
+
logging.info(
|
|
665
|
+
"%s records posted successfully. %s failed",
|
|
666
|
+
self.num_posted,
|
|
667
|
+
self.num_failures,
|
|
668
|
+
)
|
|
634
669
|
|
|
635
670
|
def handle_generic_exception(self, exception, last_row, batch, num_records, failed_recs_file):
|
|
636
671
|
logging.error("%s", exception)
|
|
@@ -662,7 +697,7 @@ class BatchPoster(MigrationTaskBase):
|
|
|
662
697
|
traceback.logging.info_exc() # type: ignore
|
|
663
698
|
logging.info("=======================")
|
|
664
699
|
|
|
665
|
-
def post_batch(self, batch, failed_recs_file, num_records
|
|
700
|
+
def post_batch(self, batch, failed_recs_file, num_records):
|
|
666
701
|
if self.query_params.get("upsert", False) and self.api_info.get("query_endpoint", ""):
|
|
667
702
|
self.set_version(batch, self.api_info["query_endpoint"], self.api_info["object_name"])
|
|
668
703
|
response = self.do_post(batch)
|
|
@@ -733,24 +768,6 @@ class BatchPoster(MigrationTaskBase):
|
|
|
733
768
|
# Likely a json parsing error
|
|
734
769
|
logging.error(response.text)
|
|
735
770
|
raise TransformationProcessError("", "HTTP 400. Something is wrong. Quitting")
|
|
736
|
-
elif self.task_configuration.object_type == "SRS" and response.status_code >= 500:
|
|
737
|
-
logging.info(
|
|
738
|
-
"Post failed. Size: %s Waiting 30s until reposting. Number of tries: %s of 5",
|
|
739
|
-
get_req_size(response),
|
|
740
|
-
recursion_depth,
|
|
741
|
-
)
|
|
742
|
-
logging.info(response.text)
|
|
743
|
-
time.sleep(30)
|
|
744
|
-
if recursion_depth > 4:
|
|
745
|
-
raise TransformationRecordFailedError(
|
|
746
|
-
"",
|
|
747
|
-
f"HTTP {response.status_code}\t"
|
|
748
|
-
f"Request size: {get_req_size(response)}"
|
|
749
|
-
f"{datetime.now(timezone.utc).isoformat()}\n",
|
|
750
|
-
response.text,
|
|
751
|
-
)
|
|
752
|
-
else:
|
|
753
|
-
self.post_batch(batch, failed_recs_file, num_records, recursion_depth + 1)
|
|
754
771
|
elif (
|
|
755
772
|
response.status_code == 413 and "DB_ALLOW_SUPPRESS_OPTIMISTIC_LOCKING" in response.text
|
|
756
773
|
):
|
|
@@ -775,50 +792,35 @@ class BatchPoster(MigrationTaskBase):
|
|
|
775
792
|
)
|
|
776
793
|
|
|
777
794
|
def do_post(self, batch):
|
|
778
|
-
|
|
779
|
-
url = self.folio_client.gateway_url + path
|
|
795
|
+
url = self.api_info["api_endpoint"]
|
|
780
796
|
if self.api_info["object_name"] == "users":
|
|
781
797
|
payload = {self.api_info["object_name"]: list(batch), "totalRecords": len(batch)}
|
|
782
798
|
elif self.api_info["total_records"]:
|
|
783
799
|
payload = {"records": list(batch), "totalRecords": len(batch)}
|
|
784
800
|
else:
|
|
785
801
|
payload = {self.api_info["object_name"]: batch}
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
params=self.query_params,
|
|
792
|
-
)
|
|
793
|
-
else:
|
|
794
|
-
return httpx.post(
|
|
795
|
-
url,
|
|
796
|
-
headers=self.okapi_headers,
|
|
797
|
-
json=payload,
|
|
798
|
-
params=self.query_params,
|
|
799
|
-
timeout=None,
|
|
800
|
-
)
|
|
802
|
+
return self.folio_client.folio_post(
|
|
803
|
+
url,
|
|
804
|
+
payload,
|
|
805
|
+
query_params=self.query_params,
|
|
806
|
+
)
|
|
801
807
|
|
|
802
808
|
def get_current_record_count_in_folio(self):
|
|
803
809
|
if "query_endpoint" in self.api_info:
|
|
804
|
-
url =
|
|
810
|
+
url = self.api_info["query_endpoint"]
|
|
805
811
|
query_params = {"query": "cql.allRecords=1", "limit": 0}
|
|
806
|
-
if self.http_client and not self.http_client.is_closed:
|
|
807
|
-
res = self.http_client.get(
|
|
808
|
-
url, headers=self.folio_client.okapi_headers, params=query_params
|
|
809
|
-
)
|
|
810
|
-
else:
|
|
811
|
-
res = httpx.get(url, headers=self.okapi_headers, params=query_params, timeout=None)
|
|
812
812
|
try:
|
|
813
|
-
res.
|
|
814
|
-
return res
|
|
815
|
-
except
|
|
816
|
-
logging.error(
|
|
813
|
+
res = self.folio_client.folio_get(url, query_params=query_params)
|
|
814
|
+
return res["totalRecords"]
|
|
815
|
+
except folioclient.FolioHTTPError as fhe:
|
|
816
|
+
logging.error(
|
|
817
|
+
"Failed to get current record count. HTTP %s", fhe.response.status_code
|
|
818
|
+
)
|
|
817
819
|
return 0
|
|
818
820
|
except KeyError:
|
|
819
821
|
logging.error(
|
|
820
822
|
"Failed to get current record count. "
|
|
821
|
-
f"No 'totalRecords' in response: {
|
|
823
|
+
f"No 'totalRecords' in response: {json.dumps(res, indent=2)}"
|
|
822
824
|
)
|
|
823
825
|
return 0
|
|
824
826
|
else:
|
|
@@ -850,8 +852,6 @@ class BatchPoster(MigrationTaskBase):
|
|
|
850
852
|
def wrap_up(self):
|
|
851
853
|
logging.info("Done. Wrapping up")
|
|
852
854
|
self.extradata_writer.flush()
|
|
853
|
-
if self.task_configuration.object_type == "SRS":
|
|
854
|
-
self.commit_snapshot()
|
|
855
855
|
if self.task_configuration.object_type != "Extradata":
|
|
856
856
|
logging.info(
|
|
857
857
|
(
|
|
@@ -938,69 +938,6 @@ class BatchPoster(MigrationTaskBase):
|
|
|
938
938
|
str(self.folder_structure.failed_recs_path),
|
|
939
939
|
)
|
|
940
940
|
|
|
941
|
-
def create_snapshot(self):
|
|
942
|
-
snapshot = {
|
|
943
|
-
"jobExecutionId": self.snapshot_id,
|
|
944
|
-
"status": "PARSING_IN_PROGRESS",
|
|
945
|
-
"processingStartedDate": datetime.now(timezone.utc).isoformat(timespec="milliseconds"),
|
|
946
|
-
}
|
|
947
|
-
try:
|
|
948
|
-
url = f"{self.folio_client.gateway_url}/source-storage/snapshots"
|
|
949
|
-
if self.http_client and not self.http_client.is_closed:
|
|
950
|
-
res = self.http_client.post(
|
|
951
|
-
url, json=snapshot, headers=self.folio_client.okapi_headers
|
|
952
|
-
)
|
|
953
|
-
else:
|
|
954
|
-
res = httpx.post(url, headers=self.okapi_headers, json=snapshot, timeout=None)
|
|
955
|
-
res.raise_for_status()
|
|
956
|
-
logging.info("Posted Snapshot to FOLIO: %s", json.dumps(snapshot, indent=4))
|
|
957
|
-
get_url = (
|
|
958
|
-
f"{self.folio_client.gateway_url}/source-storage/snapshots/{self.snapshot_id}"
|
|
959
|
-
)
|
|
960
|
-
got = False
|
|
961
|
-
while not got:
|
|
962
|
-
logging.info("Sleeping while waiting for the snapshot to get created")
|
|
963
|
-
time.sleep(5)
|
|
964
|
-
if self.http_client and not self.http_client.is_closed:
|
|
965
|
-
res = self.http_client.get(get_url, headers=self.folio_client.okapi_headers)
|
|
966
|
-
else:
|
|
967
|
-
res = httpx.get(get_url, headers=self.okapi_headers, timeout=None)
|
|
968
|
-
if res.status_code == 200:
|
|
969
|
-
got = True
|
|
970
|
-
else:
|
|
971
|
-
logging.info(res.status_code)
|
|
972
|
-
except httpx.HTTPStatusError as exc:
|
|
973
|
-
logging.exception("HTTP error occurred while posting the snapshot: %s", exc)
|
|
974
|
-
sys.exit(1)
|
|
975
|
-
except Exception as exc:
|
|
976
|
-
logging.exception("Could not post the snapshot: %s", exc)
|
|
977
|
-
sys.exit(1)
|
|
978
|
-
|
|
979
|
-
def commit_snapshot(self):
|
|
980
|
-
snapshot = {"jobExecutionId": self.snapshot_id, "status": "COMMITTED"}
|
|
981
|
-
try:
|
|
982
|
-
url = f"{self.folio_client.gateway_url}/source-storage/snapshots/{self.snapshot_id}"
|
|
983
|
-
if self.http_client and not self.http_client.is_closed:
|
|
984
|
-
res = self.http_client.put(
|
|
985
|
-
url, json=snapshot, headers=self.folio_client.okapi_headers
|
|
986
|
-
)
|
|
987
|
-
else:
|
|
988
|
-
res = httpx.put(url, headers=self.okapi_headers, json=snapshot, timeout=None)
|
|
989
|
-
res.raise_for_status()
|
|
990
|
-
logging.info("Posted Committed snapshot to FOLIO: %s", json.dumps(snapshot, indent=4))
|
|
991
|
-
except httpx.HTTPStatusError as exc:
|
|
992
|
-
logging.exception("HTTP error occurred while posting the snapshot: %s", exc)
|
|
993
|
-
sys.exit(1)
|
|
994
|
-
except Exception as exc:
|
|
995
|
-
logging.exception(
|
|
996
|
-
"Could not commit snapshot with id %s. Post this to /source-storage/snapshots/%s:",
|
|
997
|
-
self.snapshot_id,
|
|
998
|
-
self.snapshot_id,
|
|
999
|
-
exc,
|
|
1000
|
-
)
|
|
1001
|
-
logging.info("%s", json.dumps(snapshot, indent=4))
|
|
1002
|
-
sys.exit(1)
|
|
1003
|
-
|
|
1004
941
|
|
|
1005
942
|
def get_api_info(object_type: str, use_safe: bool = True):
|
|
1006
943
|
choices = {
|
|
@@ -1062,22 +999,6 @@ def get_api_info(object_type: str, use_safe: bool = True):
|
|
|
1062
999
|
"addSnapshotId": False,
|
|
1063
1000
|
"supports_upsert": True,
|
|
1064
1001
|
},
|
|
1065
|
-
"Authorities": {
|
|
1066
|
-
"object_name": "",
|
|
1067
|
-
"api_endpoint": "/authority-storage/authorities",
|
|
1068
|
-
"is_batch": False,
|
|
1069
|
-
"total_records": False,
|
|
1070
|
-
"addSnapshotId": False,
|
|
1071
|
-
"supports_upsert": False,
|
|
1072
|
-
},
|
|
1073
|
-
"SRS": {
|
|
1074
|
-
"object_name": "records",
|
|
1075
|
-
"api_endpoint": "/source-storage/batch/records",
|
|
1076
|
-
"is_batch": True,
|
|
1077
|
-
"total_records": True,
|
|
1078
|
-
"addSnapshotId": True,
|
|
1079
|
-
"supports_upsert": False,
|
|
1080
|
-
},
|
|
1081
1002
|
"Users": {
|
|
1082
1003
|
"object_name": "users",
|
|
1083
1004
|
"api_endpoint": "/user-import",
|
|
@@ -1124,7 +1045,7 @@ def get_human_readable(size, precision=2):
|
|
|
1124
1045
|
return "%.*f%s" % (precision, size, suffixes[suffix_index])
|
|
1125
1046
|
|
|
1126
1047
|
|
|
1127
|
-
def get_req_size(response:
|
|
1048
|
+
def get_req_size(response: "Response"):
|
|
1128
1049
|
size = response.request.method
|
|
1129
1050
|
size += str(response.request.url)
|
|
1130
1051
|
size += "\r\n".join(f"{k}{v}" for k, v in response.request.headers.items())
|
|
@@ -24,7 +24,6 @@
|
|
|
24
24
|
"Aged to lost and checked out": "Aged to lost and checked out",
|
|
25
25
|
"Already set to %{value}. %{leader_key} was %{leader}": "Already set to %{value}. %{leader_key} was %{leader}",
|
|
26
26
|
"An Unmapped": "An Unmapped",
|
|
27
|
-
"Authority records transformation report": "Authority records transformation report",
|
|
28
27
|
"BW Items found tied to previously created BW Holdings": "BW Items found tied to previously created BW Holdings",
|
|
29
28
|
"Bib identifier not in instances_id_map, no instance linked": "Bib identifier not in instances_id_map, no instance linked",
|
|
30
29
|
"Bib ids referenced in bound-with items": "Bib ids referenced in bound-with items",
|
|
@@ -264,12 +263,6 @@
|
|
|
264
263
|
"blurbs.AcquisitionMethodMapping.title": "POL Acquisition Method Mapping",
|
|
265
264
|
"blurbs.AddedValueFromParameter.description": "",
|
|
266
265
|
"blurbs.AddedValueFromParameter.title": "Added value from parameter since value is empty",
|
|
267
|
-
"blurbs.AuthorityEncodingLevel.description": "Library action: **All values that are not n or o will be set to n. If this is not what you want, you need to correct these values in your system. **<br/>An overview of the Encoding levels (Leader position 17) present in your source data. Allowed values according to the MARC standard are n or o",
|
|
268
|
-
"blurbs.AuthorityEncodingLevel.title": "Encoding level (leader pos 17)",
|
|
269
|
-
"blurbs.AuthoritySourceFileMapping.description": "Mappings based on FOLIO authority `naturalId` alpha prefix",
|
|
270
|
-
"blurbs.AuthoritySourceFileMapping.title": "Authority Source File Mapping Results",
|
|
271
|
-
"blurbs.AuthoritySources.description": "",
|
|
272
|
-
"blurbs.AuthoritySources.title": "Authorization sources and related information",
|
|
273
266
|
"blurbs.BoundWithMappings.description": "",
|
|
274
267
|
"blurbs.BoundWithMappings.title": "Bound-with mapping",
|
|
275
268
|
"blurbs.CallNumberTypeMapping.description": "Call number types in MFHDs are mapped from 852, Indicator 1 according to a certain scheme. (LOC documentation)[https://www.loc.gov/marc/holdings/hd852.html]",
|
{folio_migration_tools-1.10.0b3.dist-info → folio_migration_tools-1.10.0b4.dist-info}/METADATA
RENAMED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: folio-migration-tools
|
|
3
|
-
Version: 1.10.
|
|
3
|
+
Version: 1.10.0b4
|
|
4
4
|
Summary: A tool allowing you to migrate data from legacy ILS:s (Library systems) into FOLIO LSP
|
|
5
5
|
Keywords: FOLIO,ILS,LSP,Library Systems,MARC21,Library data
|
|
6
6
|
Author: Theodor Tolstoy, Lisa Sjögren, Brooks Travis, Jeremy Nelson, Clinton Bradford
|
|
7
7
|
Author-email: Theodor Tolstoy <github.teddes@tolstoy.se>, Brooks Travis <brooks.travis@gmail.com>
|
|
8
8
|
License-Expression: MIT
|
|
9
|
-
Requires-Dist: folioclient>=1.0.
|
|
9
|
+
Requires-Dist: folioclient>=1.0.4
|
|
10
10
|
Requires-Dist: pyhumps>=3.7.3,<4.0.0
|
|
11
11
|
Requires-Dist: defusedxml>=0.7.1,<1.0.0
|
|
12
12
|
Requires-Dist: python-dateutil>=2.8.2,<3.0.0
|
{folio_migration_tools-1.10.0b3.dist-info → folio_migration_tools-1.10.0b4.dist-info}/RECORD
RENAMED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
folio_migration_tools/__init__.py,sha256=lnYgqA47l0iA-iORkVH3dgevk7gyGxVwg3MnLltA-U8,223
|
|
2
|
-
folio_migration_tools/__main__.py,sha256=
|
|
2
|
+
folio_migration_tools/__main__.py,sha256=KJdmLkKwAygTKuIKfvDL3M0JdVgsCbf2_LTL1FP6GxU,9233
|
|
3
3
|
folio_migration_tools/circulation_helper.py,sha256=r1zpOKy47VFRHyXHvwUEjPfQ4jyJpjMAYc1IktJ94WU,14661
|
|
4
4
|
folio_migration_tools/colors.py,sha256=GP0wdI_GZ2WD5SjrbPN-S3u8vvN_u6rGQIBBcWv_0ZM,227
|
|
5
5
|
folio_migration_tools/config_file_load.py,sha256=zHHa6NDkN6EJiQE4DgjrFQPVKsd70POsfbGkB8308jg,2822
|
|
6
6
|
folio_migration_tools/custom_dict.py,sha256=rRd9_RQqI85171p7wTfpMM0Mladh-LChbgMSmLvN7N0,680
|
|
7
7
|
folio_migration_tools/custom_exceptions.py,sha256=BLP1gMPbTHSN-rqxzTawT4sRLiyAU3blBdkUBwiiPRk,2642
|
|
8
8
|
folio_migration_tools/extradata_writer.py,sha256=fuchNcMc6BYb9IyfAcvXg7X4J2TfX6YiROfT2hr0JMw,1678
|
|
9
|
-
folio_migration_tools/folder_structure.py,sha256=
|
|
9
|
+
folio_migration_tools/folder_structure.py,sha256=ExrXNEWvCB5QMH17kQSyTDQ04thq--t8_p3F_iuyf0k,6776
|
|
10
10
|
folio_migration_tools/helper.py,sha256=Jb-9PrMkgOUGYScRf8jMmGGTcPIohm3eFHenGSi3cUA,2979
|
|
11
11
|
folio_migration_tools/holdings_helper.py,sha256=yJpz6aJrKRBiJ1MtT5bs2vXAc88uJuGh2_KDuCySOKc,7559
|
|
12
12
|
folio_migration_tools/i18n_config.py,sha256=3AH_2b9zTsxE4XTe4isM_zYtPJSlK0ix6eBmV7kAYUM,228
|
|
@@ -24,20 +24,18 @@ folio_migration_tools/mapping_file_transformation/organization_mapper.py,sha256=
|
|
|
24
24
|
folio_migration_tools/mapping_file_transformation/ref_data_mapping.py,sha256=rROcBiL5TE7bWsJ95A6shurPZ1e4In6PTwR5BN9amzU,8991
|
|
25
25
|
folio_migration_tools/mapping_file_transformation/user_mapper.py,sha256=13cvFr7Vp6uxZNpAmLxGvPVLC1_En2NVvLtuP75HAzU,8846
|
|
26
26
|
folio_migration_tools/marc_rules_transformation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
27
|
-
folio_migration_tools/marc_rules_transformation/conditions.py,sha256
|
|
27
|
+
folio_migration_tools/marc_rules_transformation/conditions.py,sha256=-5U6nBGcBO49C9MMyxOL2wMhHGxUawkIM9e-MwNaM_4,46938
|
|
28
28
|
folio_migration_tools/marc_rules_transformation/holdings_statementsparser.py,sha256=-mOGtoPa3qmEqGWtyBTN-fQ743ZmT8caDLc9ES9J74Y,13667
|
|
29
29
|
folio_migration_tools/marc_rules_transformation/hrid_handler.py,sha256=WudBOzCwcJAuhEm4urLhAk5OQWGfbKz9_4Ou8fmjm1E,10022
|
|
30
30
|
folio_migration_tools/marc_rules_transformation/loc_language_codes.xml,sha256=ztn2_yKws6qySL4oSsZh7sOjxq5bCC1PhAnXJdtgmJ0,382912
|
|
31
31
|
folio_migration_tools/marc_rules_transformation/marc_file_processor.py,sha256=o03d_G-4MR4e5VPfu7ljxAVDl79o2ONpQIqQ-V2RCdA,12523
|
|
32
32
|
folio_migration_tools/marc_rules_transformation/marc_reader_wrapper.py,sha256=9ATjYMRAjy0QcXtmNZaHVhHLJ5hE1WUgOcF6KMJjbgo,5309
|
|
33
|
-
folio_migration_tools/marc_rules_transformation/
|
|
34
|
-
folio_migration_tools/marc_rules_transformation/rules_mapper_base.py,sha256=ijOs9r0Mcx0XyNhDAq7fw1aFJ-JnAEhHx98-t262uRo,46158
|
|
33
|
+
folio_migration_tools/marc_rules_transformation/rules_mapper_base.py,sha256=KxyZjizbLwwAY2PfMSyh6u_mVTpfhyvdaii_PlpLscw,45857
|
|
35
34
|
folio_migration_tools/marc_rules_transformation/rules_mapper_bibs.py,sha256=F8tKn59zHUV3Gqa9NY-JvTbWgfDjNTcPvQONk8gzwGs,30428
|
|
36
35
|
folio_migration_tools/marc_rules_transformation/rules_mapper_holdings.py,sha256=YILyEfO-LkQPk-4OjiuY68X5xDA0LlI7UUp7_mvzLUE,29184
|
|
37
36
|
folio_migration_tools/migration_report.py,sha256=B8e4tMfT0xCJ3BxkSg7ZZJYmg0VLQVXmmVnWwmojZD4,4260
|
|
38
37
|
folio_migration_tools/migration_tasks/__init__.py,sha256=ZkbY_yGyB84Ke8OMlYUzyyBj4cxxNrhMTwQlu_GbdDs,211
|
|
39
|
-
folio_migration_tools/migration_tasks/
|
|
40
|
-
folio_migration_tools/migration_tasks/batch_poster.py,sha256=x3DQPrI1QnRtg9Bdf-e3ztv4llWPt5JpeCIyE7mMNWU,50634
|
|
38
|
+
folio_migration_tools/migration_tasks/batch_poster.py,sha256=dIsqqjizuJN0BOj98uK4EsAQgdCcQwMwvK2CUtifd7w,46120
|
|
41
39
|
folio_migration_tools/migration_tasks/bibs_transformer.py,sha256=zPxh2tjyqx88fuH1FuKLwhT6lhZ5fVTQAqE08IggYgM,6351
|
|
42
40
|
folio_migration_tools/migration_tasks/courses_migrator.py,sha256=sKIeyUlc7o189lw88XbGILVkwnR9krqO0PgS-vLCCm8,7039
|
|
43
41
|
folio_migration_tools/migration_tasks/holdings_csv_transformer.py,sha256=JzOufqjSR2V-gUvOq0pdQFsXjpxk1ldGJBQWIWGfCps,21915
|
|
@@ -57,8 +55,8 @@ folio_migration_tools/transaction_migration/legacy_loan.py,sha256=A5qvThfP3g62Yn
|
|
|
57
55
|
folio_migration_tools/transaction_migration/legacy_request.py,sha256=Kv7jpBIuZ_qyay8BdaeCPJID67l43Cl6x-ws9Lt49NI,6121
|
|
58
56
|
folio_migration_tools/transaction_migration/legacy_reserve.py,sha256=qzw0okg4axAE_ezXopP9gFsQ_e60o0zh7zqRzFBSWHY,1806
|
|
59
57
|
folio_migration_tools/transaction_migration/transaction_result.py,sha256=cTdCN0BnlI9_ZJB2Z3Fdkl9gpymIi-9mGZsRFlQcmDk,656
|
|
60
|
-
folio_migration_tools/translations/en.json,sha256=
|
|
61
|
-
folio_migration_tools-1.10.
|
|
62
|
-
folio_migration_tools-1.10.
|
|
63
|
-
folio_migration_tools-1.10.
|
|
64
|
-
folio_migration_tools-1.10.
|
|
58
|
+
folio_migration_tools/translations/en.json,sha256=pS7dhHmj4XBqTcFNIcqFgRMY557fQan1RomdNg6PtdA,40941
|
|
59
|
+
folio_migration_tools-1.10.0b4.dist-info/WHEEL,sha256=eh7sammvW2TypMMMGKgsM83HyA_3qQ5Lgg3ynoecH3M,79
|
|
60
|
+
folio_migration_tools-1.10.0b4.dist-info/entry_points.txt,sha256=mJRRiCNP9j7_NpVXamHEiW8pDEjWQs1vEqD89G354cM,79
|
|
61
|
+
folio_migration_tools-1.10.0b4.dist-info/METADATA,sha256=JlJpqIeLAOLOFk0kqT7_ZcS5pO-0o-NgaXlBPt2Kh7A,7162
|
|
62
|
+
folio_migration_tools-1.10.0b4.dist-info/RECORD,,
|
|
@@ -1,242 +0,0 @@
|
|
|
1
|
-
"""The default mapper, responsible for parsing MARC21 records acording to the
|
|
2
|
-
FOLIO community specifications"""
|
|
3
|
-
|
|
4
|
-
import logging
|
|
5
|
-
import re
|
|
6
|
-
import time
|
|
7
|
-
import uuid
|
|
8
|
-
from typing import List
|
|
9
|
-
|
|
10
|
-
import i18n
|
|
11
|
-
import pymarc
|
|
12
|
-
from folio_uuid.folio_namespaces import FOLIONamespaces
|
|
13
|
-
from folio_uuid.folio_uuid import FolioUUID
|
|
14
|
-
from folioclient import FolioClient
|
|
15
|
-
from pymarc import Leader, Record
|
|
16
|
-
|
|
17
|
-
from folio_migration_tools.custom_exceptions import TransformationProcessError
|
|
18
|
-
from folio_migration_tools.helper import Helper
|
|
19
|
-
from folio_migration_tools.library_configuration import (
|
|
20
|
-
FileDefinition,
|
|
21
|
-
IlsFlavour,
|
|
22
|
-
LibraryConfiguration,
|
|
23
|
-
)
|
|
24
|
-
from folio_migration_tools.marc_rules_transformation.conditions import Conditions
|
|
25
|
-
from folio_migration_tools.marc_rules_transformation.hrid_handler import HRIDHandler
|
|
26
|
-
from folio_migration_tools.marc_rules_transformation.rules_mapper_base import (
|
|
27
|
-
RulesMapperBase,
|
|
28
|
-
)
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
class AuthorityMapper(RulesMapperBase):
|
|
32
|
-
non_repatable_fields = [
|
|
33
|
-
"100",
|
|
34
|
-
"110",
|
|
35
|
-
"111",
|
|
36
|
-
"130",
|
|
37
|
-
"147",
|
|
38
|
-
"148",
|
|
39
|
-
"150",
|
|
40
|
-
"151",
|
|
41
|
-
"155",
|
|
42
|
-
"162",
|
|
43
|
-
"180",
|
|
44
|
-
"181",
|
|
45
|
-
"182",
|
|
46
|
-
"185",
|
|
47
|
-
"378",
|
|
48
|
-
"384",
|
|
49
|
-
]
|
|
50
|
-
"""_summary_
|
|
51
|
-
|
|
52
|
-
Args:
|
|
53
|
-
RulesMapperBase (_type_): _description_
|
|
54
|
-
"""
|
|
55
|
-
|
|
56
|
-
def __init__(
|
|
57
|
-
self,
|
|
58
|
-
folio_client,
|
|
59
|
-
library_configuration: LibraryConfiguration,
|
|
60
|
-
task_configuration,
|
|
61
|
-
):
|
|
62
|
-
super().__init__(
|
|
63
|
-
folio_client,
|
|
64
|
-
library_configuration,
|
|
65
|
-
task_configuration,
|
|
66
|
-
None,
|
|
67
|
-
self.get_authority_json_schema(folio_client, library_configuration),
|
|
68
|
-
Conditions(folio_client, self, "auth", library_configuration.folio_release),
|
|
69
|
-
)
|
|
70
|
-
self.srs_recs: list = []
|
|
71
|
-
logging.info("Fetching mapping rules from the tenant")
|
|
72
|
-
rules_endpoint = "/mapping-rules/marc-authority"
|
|
73
|
-
self.mappings = self.folio_client.folio_get_single_object(rules_endpoint)
|
|
74
|
-
self.source_file_mapping: dict = {}
|
|
75
|
-
self.setup_source_file_mapping()
|
|
76
|
-
self.start = time.time()
|
|
77
|
-
|
|
78
|
-
def get_legacy_ids(self, marc_record: Record, idx: int) -> List[str]:
|
|
79
|
-
ils_flavour: IlsFlavour = self.task_configuration.ils_flavour
|
|
80
|
-
if ils_flavour in {IlsFlavour.sierra, IlsFlavour.millennium}:
|
|
81
|
-
raise TransformationProcessError("", f"ILS {ils_flavour} not configured")
|
|
82
|
-
elif ils_flavour == IlsFlavour.tag907y:
|
|
83
|
-
return RulesMapperBase.get_bib_id_from_907y(marc_record, idx)
|
|
84
|
-
elif ils_flavour == IlsFlavour.tagf990a:
|
|
85
|
-
return RulesMapperBase.get_bib_id_from_990a(marc_record, idx)
|
|
86
|
-
elif ils_flavour == IlsFlavour.aleph:
|
|
87
|
-
raise TransformationProcessError("", f"ILS {ils_flavour} not configured")
|
|
88
|
-
elif ils_flavour in {IlsFlavour.voyager, "voyager", IlsFlavour.tag001}:
|
|
89
|
-
return RulesMapperBase.get_bib_id_from_001(marc_record, idx)
|
|
90
|
-
elif ils_flavour == IlsFlavour.koha:
|
|
91
|
-
raise TransformationProcessError("", f"ILS {ils_flavour} not configured")
|
|
92
|
-
elif ils_flavour == IlsFlavour.none:
|
|
93
|
-
return [str(uuid.uuid4())]
|
|
94
|
-
else:
|
|
95
|
-
raise TransformationProcessError("", f"ILS {ils_flavour} not configured")
|
|
96
|
-
|
|
97
|
-
def parse_record(
|
|
98
|
-
self, marc_record: pymarc.Record, file_def: FileDefinition, legacy_ids: List[str]
|
|
99
|
-
) -> list[dict]:
|
|
100
|
-
"""Parses an auth recod into a FOLIO Authority object
|
|
101
|
-
This is the main function
|
|
102
|
-
|
|
103
|
-
Args:
|
|
104
|
-
legacy_ids (_type_): _description_
|
|
105
|
-
marc_record (Record): _description_
|
|
106
|
-
file_def (FileDefinition): _description_
|
|
107
|
-
|
|
108
|
-
Returns:
|
|
109
|
-
dict: _description_
|
|
110
|
-
"""
|
|
111
|
-
self.print_progress()
|
|
112
|
-
ignored_subsequent_fields: set = set()
|
|
113
|
-
bad_tags = set(self.task_configuration.tags_to_delete) # "907"
|
|
114
|
-
folio_authority = self.perform_initial_preparation(marc_record, legacy_ids)
|
|
115
|
-
for marc_field in marc_record:
|
|
116
|
-
self.report_marc_stats(marc_field, bad_tags, legacy_ids, ignored_subsequent_fields)
|
|
117
|
-
if marc_field.tag not in ignored_subsequent_fields:
|
|
118
|
-
self.process_marc_field(
|
|
119
|
-
folio_authority,
|
|
120
|
-
marc_field,
|
|
121
|
-
ignored_subsequent_fields,
|
|
122
|
-
legacy_ids,
|
|
123
|
-
)
|
|
124
|
-
|
|
125
|
-
self.perform_additional_parsing(folio_authority)
|
|
126
|
-
clean_folio_authority = self.validate_required_properties(
|
|
127
|
-
"-".join(legacy_ids), folio_authority, self.schema, FOLIONamespaces.instances
|
|
128
|
-
)
|
|
129
|
-
self.dedupe_rec(clean_folio_authority)
|
|
130
|
-
marc_record.remove_fields(*list(bad_tags))
|
|
131
|
-
self.report_folio_mapping(clean_folio_authority, self.schema)
|
|
132
|
-
return [clean_folio_authority]
|
|
133
|
-
|
|
134
|
-
def perform_initial_preparation(self, marc_record: pymarc.Record, legacy_ids):
|
|
135
|
-
folio_authority = {}
|
|
136
|
-
folio_authority["id"] = str(
|
|
137
|
-
FolioUUID(
|
|
138
|
-
self.base_string_for_folio_uuid,
|
|
139
|
-
FOLIONamespaces.authorities,
|
|
140
|
-
str(legacy_ids[-1]),
|
|
141
|
-
)
|
|
142
|
-
)
|
|
143
|
-
HRIDHandler.handle_035_generation(
|
|
144
|
-
marc_record, legacy_ids, self.migration_report, False, False
|
|
145
|
-
)
|
|
146
|
-
self.map_source_file_and_natural_id(marc_record, folio_authority)
|
|
147
|
-
self.handle_leader_17(marc_record, legacy_ids)
|
|
148
|
-
return folio_authority
|
|
149
|
-
|
|
150
|
-
def map_source_file_and_natural_id(self, marc_record, folio_authority):
|
|
151
|
-
"""Implement source file and natural ID mappings according to MODDICORE-283"""
|
|
152
|
-
match_prefix_patt = re.compile("^[A-Za-z]+")
|
|
153
|
-
natural_id = None
|
|
154
|
-
source_file_id = None
|
|
155
|
-
has_010 = marc_record.get("010")
|
|
156
|
-
if has_010 and (has_010a := has_010.get_subfields("a")):
|
|
157
|
-
for a_subfield in has_010a:
|
|
158
|
-
natural_id_prefix = match_prefix_patt.match(a_subfield)
|
|
159
|
-
if natural_id_prefix and (
|
|
160
|
-
source_file := self.source_file_mapping.get(natural_id_prefix.group(0), None)
|
|
161
|
-
):
|
|
162
|
-
natural_id = "".join(a_subfield.split())
|
|
163
|
-
source_file_id = source_file["id"]
|
|
164
|
-
self.migration_report.add_general_statistics(
|
|
165
|
-
i18n.t("naturalId mapped from %{fro}", fro="010$a")
|
|
166
|
-
)
|
|
167
|
-
self.migration_report.add(
|
|
168
|
-
"AuthoritySourceFileMapping",
|
|
169
|
-
f"{source_file['name']} -- {natural_id_prefix.group(0)} -- 010$a",
|
|
170
|
-
number=1,
|
|
171
|
-
)
|
|
172
|
-
break
|
|
173
|
-
if not source_file_id:
|
|
174
|
-
natural_id = "".join(marc_record["001"].data.split())
|
|
175
|
-
self.migration_report.add_general_statistics(
|
|
176
|
-
i18n.t("naturalId mapped from %{fro}", fro="001")
|
|
177
|
-
)
|
|
178
|
-
natural_id_prefix = match_prefix_patt.match(natural_id)
|
|
179
|
-
if natural_id_prefix:
|
|
180
|
-
if source_file := self.source_file_mapping.get(natural_id_prefix.group(0), None):
|
|
181
|
-
source_file_id = source_file["id"]
|
|
182
|
-
self.migration_report.add(
|
|
183
|
-
"AuthoritySourceFileMapping",
|
|
184
|
-
f"{source_file['name']} -- {natural_id_prefix.group(0)} -- 001",
|
|
185
|
-
number=1,
|
|
186
|
-
)
|
|
187
|
-
folio_authority["naturalId"] = natural_id
|
|
188
|
-
if source_file_id:
|
|
189
|
-
folio_authority["sourceFileId"] = source_file_id
|
|
190
|
-
|
|
191
|
-
def setup_source_file_mapping(self):
|
|
192
|
-
if self.folio_client.authority_source_files:
|
|
193
|
-
logging.info(
|
|
194
|
-
f"{len(self.folio_client.authority_source_files)} \tAuthority source files"
|
|
195
|
-
)
|
|
196
|
-
for source_file in self.folio_client.authority_source_files:
|
|
197
|
-
for sf_code in source_file.get("codes", []):
|
|
198
|
-
self.source_file_mapping[sf_code] = source_file
|
|
199
|
-
|
|
200
|
-
def handle_leader_17(self, marc_record, legacy_ids):
|
|
201
|
-
leader_17 = marc_record.leader[17] or "Empty"
|
|
202
|
-
self.migration_report.add(
|
|
203
|
-
"AuthorityEncodingLevel", i18n.t("Original value") + f": {leader_17}"
|
|
204
|
-
)
|
|
205
|
-
if leader_17 not in ["n", "o"]:
|
|
206
|
-
Helper.log_data_issue(
|
|
207
|
-
legacy_ids,
|
|
208
|
-
f"LDR pos. 17 is '{leader_17}'. Is this correct? Value has been changed to 'n'.",
|
|
209
|
-
marc_record.leader,
|
|
210
|
-
)
|
|
211
|
-
marc_record.leader = Leader(f"{marc_record.leader[:17]}n{marc_record.leader[18:]}")
|
|
212
|
-
self.migration_report.add(
|
|
213
|
-
"AuthorityEncodingLevel", i18n.t("Changed %{a} to %{b}", a=leader_17, b="n")
|
|
214
|
-
)
|
|
215
|
-
|
|
216
|
-
def perform_additional_parsing(
|
|
217
|
-
self,
|
|
218
|
-
folio_authority: dict,
|
|
219
|
-
) -> None:
|
|
220
|
-
"""Do stuff not easily captured by the mapping rules
|
|
221
|
-
|
|
222
|
-
Args:
|
|
223
|
-
folio_authority (dict): _description_
|
|
224
|
-
"""
|
|
225
|
-
folio_authority["source"] = "MARC"
|
|
226
|
-
|
|
227
|
-
def get_authority_json_schema(self, folio_client: FolioClient, library_configuration):
|
|
228
|
-
"""Fetches the JSON Schema for autorities"""
|
|
229
|
-
if library_configuration.folio_release.name.lower()[0] < "p":
|
|
230
|
-
schema = folio_client.get_from_github(
|
|
231
|
-
"folio-org", "mod-inventory-storage", "/ramls/authorities/authority.json"
|
|
232
|
-
)
|
|
233
|
-
else:
|
|
234
|
-
schema = folio_client.get_from_github(
|
|
235
|
-
"folio-org",
|
|
236
|
-
"mod-entities-links",
|
|
237
|
-
"/src/main/resources/swagger.api/schemas/authority-storage/authorityDto.yaml",
|
|
238
|
-
)
|
|
239
|
-
return schema
|
|
240
|
-
|
|
241
|
-
def wrap_up(self):
|
|
242
|
-
logging.info("Mapper wrapping up")
|
|
@@ -1,118 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
from typing import Annotated
|
|
3
|
-
from typing import List
|
|
4
|
-
import i18n
|
|
5
|
-
|
|
6
|
-
from folio_uuid.folio_namespaces import FOLIONamespaces
|
|
7
|
-
from pydantic import Field
|
|
8
|
-
|
|
9
|
-
from folio_migration_tools.helper import Helper
|
|
10
|
-
from folio_migration_tools.library_configuration import FileDefinition
|
|
11
|
-
from folio_migration_tools.library_configuration import IlsFlavour
|
|
12
|
-
from folio_migration_tools.library_configuration import LibraryConfiguration
|
|
13
|
-
from folio_migration_tools.marc_rules_transformation.marc_file_processor import (
|
|
14
|
-
MarcFileProcessor,
|
|
15
|
-
)
|
|
16
|
-
from folio_migration_tools.marc_rules_transformation.rules_mapper_authorities import (
|
|
17
|
-
AuthorityMapper,
|
|
18
|
-
)
|
|
19
|
-
from folio_migration_tools.migration_tasks.migration_task_base import MigrationTaskBase
|
|
20
|
-
from folio_migration_tools.task_configuration import AbstractTaskConfiguration
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class AuthorityTransformer(MigrationTaskBase):
|
|
24
|
-
class TaskConfiguration(AbstractTaskConfiguration):
|
|
25
|
-
name: Annotated[
|
|
26
|
-
str,
|
|
27
|
-
Field(
|
|
28
|
-
description=(
|
|
29
|
-
"Name of this migration task. The name is being used to call the specific "
|
|
30
|
-
"task, and to distinguish tasks of similar types"
|
|
31
|
-
)
|
|
32
|
-
),
|
|
33
|
-
]
|
|
34
|
-
migration_task_type: Annotated[
|
|
35
|
-
str,
|
|
36
|
-
Field(
|
|
37
|
-
title="Migration task type",
|
|
38
|
-
description=("The type of migration task you want to perform"),
|
|
39
|
-
),
|
|
40
|
-
]
|
|
41
|
-
files: Annotated[
|
|
42
|
-
List[FileDefinition],
|
|
43
|
-
Field(
|
|
44
|
-
title="Source files", description=("List of MARC21 files with authority records")
|
|
45
|
-
),
|
|
46
|
-
]
|
|
47
|
-
ils_flavour: Annotated[
|
|
48
|
-
IlsFlavour,
|
|
49
|
-
Field(
|
|
50
|
-
title="ILS flavour", description="The type of ILS you are migrating records from."
|
|
51
|
-
),
|
|
52
|
-
]
|
|
53
|
-
tags_to_delete: Annotated[
|
|
54
|
-
List[str],
|
|
55
|
-
Field(
|
|
56
|
-
title="Tags to delete from MARC record",
|
|
57
|
-
description=(
|
|
58
|
-
"Tags in the incoming MARC authority that the process should remove "
|
|
59
|
-
"before adding them into FOLIO. These tags will be used in the "
|
|
60
|
-
"transformation before getting removed."
|
|
61
|
-
),
|
|
62
|
-
),
|
|
63
|
-
] = []
|
|
64
|
-
create_source_records: Annotated[
|
|
65
|
-
bool,
|
|
66
|
-
Field(
|
|
67
|
-
title="Create source records",
|
|
68
|
-
description=(
|
|
69
|
-
"Controls wheter or not to retain the MARC records in Source Record Storage."
|
|
70
|
-
),
|
|
71
|
-
),
|
|
72
|
-
] = True
|
|
73
|
-
|
|
74
|
-
@staticmethod
|
|
75
|
-
def get_object_type() -> FOLIONamespaces:
|
|
76
|
-
return FOLIONamespaces.authorities
|
|
77
|
-
|
|
78
|
-
def __init__(
|
|
79
|
-
self,
|
|
80
|
-
task_config: TaskConfiguration,
|
|
81
|
-
library_config: LibraryConfiguration,
|
|
82
|
-
use_logging: bool = True,
|
|
83
|
-
):
|
|
84
|
-
super().__init__(library_config, task_config, use_logging)
|
|
85
|
-
self.processor: MarcFileProcessor
|
|
86
|
-
self.check_source_files(
|
|
87
|
-
self.folder_structure.legacy_records_folder, self.task_configuration.files
|
|
88
|
-
)
|
|
89
|
-
self.mapper: AuthorityMapper = AuthorityMapper(
|
|
90
|
-
self.folio_client, library_config, task_config
|
|
91
|
-
)
|
|
92
|
-
self.auth_ids: set = set()
|
|
93
|
-
logging.info("Init done")
|
|
94
|
-
|
|
95
|
-
def do_work(self):
|
|
96
|
-
self.do_work_marc_transformer()
|
|
97
|
-
|
|
98
|
-
def wrap_up(self):
|
|
99
|
-
logging.info("Done. Transformer Wrapping up...")
|
|
100
|
-
self.extradata_writer.flush()
|
|
101
|
-
self.processor.wrap_up()
|
|
102
|
-
with open(self.folder_structure.migration_reports_file, "w+") as report_file:
|
|
103
|
-
self.mapper.migration_report.write_migration_report(
|
|
104
|
-
i18n.t("Authority records transformation report"),
|
|
105
|
-
report_file,
|
|
106
|
-
self.start_datetime,
|
|
107
|
-
)
|
|
108
|
-
Helper.print_mapping_report(
|
|
109
|
-
report_file,
|
|
110
|
-
self.mapper.parsed_records,
|
|
111
|
-
self.mapper.mapped_folio_fields,
|
|
112
|
-
self.mapper.mapped_legacy_fields,
|
|
113
|
-
)
|
|
114
|
-
logging.info(
|
|
115
|
-
"Done. Transformation report written to %s",
|
|
116
|
-
self.folder_structure.migration_reports_file.name,
|
|
117
|
-
)
|
|
118
|
-
self.clean_out_empty_logs()
|
|
File without changes
|
|
File without changes
|