folio-migration-tools 1.2.1__py3-none-any.whl → 1.9.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- folio_migration_tools/__init__.py +11 -0
- folio_migration_tools/__main__.py +169 -85
- folio_migration_tools/circulation_helper.py +96 -59
- folio_migration_tools/config_file_load.py +66 -0
- folio_migration_tools/custom_dict.py +6 -4
- folio_migration_tools/custom_exceptions.py +21 -19
- folio_migration_tools/extradata_writer.py +46 -0
- folio_migration_tools/folder_structure.py +63 -66
- folio_migration_tools/helper.py +29 -21
- folio_migration_tools/holdings_helper.py +57 -34
- folio_migration_tools/i18n_config.py +9 -0
- folio_migration_tools/library_configuration.py +173 -13
- folio_migration_tools/mapper_base.py +317 -106
- folio_migration_tools/mapping_file_transformation/courses_mapper.py +203 -0
- folio_migration_tools/mapping_file_transformation/holdings_mapper.py +83 -69
- folio_migration_tools/mapping_file_transformation/item_mapper.py +98 -94
- folio_migration_tools/mapping_file_transformation/manual_fee_fines_mapper.py +352 -0
- folio_migration_tools/mapping_file_transformation/mapping_file_mapper_base.py +702 -223
- folio_migration_tools/mapping_file_transformation/notes_mapper.py +90 -0
- folio_migration_tools/mapping_file_transformation/order_mapper.py +492 -0
- folio_migration_tools/mapping_file_transformation/organization_mapper.py +389 -0
- folio_migration_tools/mapping_file_transformation/ref_data_mapping.py +38 -27
- folio_migration_tools/mapping_file_transformation/user_mapper.py +149 -361
- folio_migration_tools/marc_rules_transformation/conditions.py +650 -246
- folio_migration_tools/marc_rules_transformation/holdings_statementsparser.py +292 -130
- folio_migration_tools/marc_rules_transformation/hrid_handler.py +244 -0
- folio_migration_tools/marc_rules_transformation/loc_language_codes.xml +20846 -0
- folio_migration_tools/marc_rules_transformation/marc_file_processor.py +300 -0
- folio_migration_tools/marc_rules_transformation/marc_reader_wrapper.py +136 -0
- folio_migration_tools/marc_rules_transformation/rules_mapper_authorities.py +241 -0
- folio_migration_tools/marc_rules_transformation/rules_mapper_base.py +681 -201
- folio_migration_tools/marc_rules_transformation/rules_mapper_bibs.py +395 -429
- folio_migration_tools/marc_rules_transformation/rules_mapper_holdings.py +531 -100
- folio_migration_tools/migration_report.py +85 -38
- folio_migration_tools/migration_tasks/__init__.py +1 -3
- folio_migration_tools/migration_tasks/authority_transformer.py +119 -0
- folio_migration_tools/migration_tasks/batch_poster.py +911 -198
- folio_migration_tools/migration_tasks/bibs_transformer.py +121 -116
- folio_migration_tools/migration_tasks/courses_migrator.py +192 -0
- folio_migration_tools/migration_tasks/holdings_csv_transformer.py +252 -247
- folio_migration_tools/migration_tasks/holdings_marc_transformer.py +321 -115
- folio_migration_tools/migration_tasks/items_transformer.py +264 -84
- folio_migration_tools/migration_tasks/loans_migrator.py +506 -195
- folio_migration_tools/migration_tasks/manual_fee_fines_transformer.py +187 -0
- folio_migration_tools/migration_tasks/migration_task_base.py +364 -74
- folio_migration_tools/migration_tasks/orders_transformer.py +373 -0
- folio_migration_tools/migration_tasks/organization_transformer.py +451 -0
- folio_migration_tools/migration_tasks/requests_migrator.py +130 -62
- folio_migration_tools/migration_tasks/reserves_migrator.py +253 -0
- folio_migration_tools/migration_tasks/user_transformer.py +180 -139
- folio_migration_tools/task_configuration.py +46 -0
- folio_migration_tools/test_infrastructure/__init__.py +0 -0
- folio_migration_tools/test_infrastructure/mocked_classes.py +406 -0
- folio_migration_tools/transaction_migration/legacy_loan.py +148 -34
- folio_migration_tools/transaction_migration/legacy_request.py +65 -25
- folio_migration_tools/transaction_migration/legacy_reserve.py +47 -0
- folio_migration_tools/transaction_migration/transaction_result.py +12 -1
- folio_migration_tools/translations/en.json +476 -0
- folio_migration_tools-1.9.10.dist-info/METADATA +169 -0
- folio_migration_tools-1.9.10.dist-info/RECORD +67 -0
- {folio_migration_tools-1.2.1.dist-info → folio_migration_tools-1.9.10.dist-info}/WHEEL +1 -2
- folio_migration_tools-1.9.10.dist-info/entry_points.txt +3 -0
- folio_migration_tools/generate_schemas.py +0 -46
- folio_migration_tools/mapping_file_transformation/mapping_file_mapping_base_impl.py +0 -44
- folio_migration_tools/mapping_file_transformation/user_mapper_base.py +0 -212
- folio_migration_tools/marc_rules_transformation/bibs_processor.py +0 -163
- folio_migration_tools/marc_rules_transformation/holdings_processor.py +0 -284
- folio_migration_tools/report_blurbs.py +0 -219
- folio_migration_tools/transaction_migration/legacy_fee_fine.py +0 -36
- folio_migration_tools-1.2.1.dist-info/METADATA +0 -134
- folio_migration_tools-1.2.1.dist-info/RECORD +0 -50
- folio_migration_tools-1.2.1.dist-info/top_level.txt +0 -1
- {folio_migration_tools-1.2.1.dist-info → folio_migration_tools-1.9.10.dist-info/licenses}/LICENSE +0 -0
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
import sys
|
|
4
|
+
import time
|
|
5
|
+
import traceback
|
|
6
|
+
from typing import BinaryIO, Dict, List, Set, TextIO
|
|
7
|
+
|
|
8
|
+
import i18n
|
|
9
|
+
from folio_uuid.folio_namespaces import FOLIONamespaces
|
|
10
|
+
from pymarc import Field, Record, Subfield
|
|
11
|
+
|
|
12
|
+
from folio_migration_tools.custom_exceptions import (
|
|
13
|
+
TransformationProcessError,
|
|
14
|
+
TransformationRecordFailedError,
|
|
15
|
+
)
|
|
16
|
+
from folio_migration_tools.folder_structure import FolderStructure
|
|
17
|
+
from folio_migration_tools.helper import Helper
|
|
18
|
+
from folio_migration_tools.library_configuration import FileDefinition, HridHandling
|
|
19
|
+
from folio_migration_tools.marc_rules_transformation.rules_mapper_base import (
|
|
20
|
+
RulesMapperBase,
|
|
21
|
+
)
|
|
22
|
+
from folio_migration_tools.migration_report import MigrationReport
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class MarcFileProcessor:
|
|
26
|
+
def __init__(
|
|
27
|
+
self, mapper: RulesMapperBase, folder_structure: FolderStructure, created_objects_file: TextIO
|
|
28
|
+
):
|
|
29
|
+
self.object_type: FOLIONamespaces = folder_structure.object_type
|
|
30
|
+
self.folder_structure: FolderStructure = folder_structure
|
|
31
|
+
self.mapper: RulesMapperBase = mapper
|
|
32
|
+
self.created_objects_file: TextIO = created_objects_file
|
|
33
|
+
if mapper.create_source_records and any(
|
|
34
|
+
x.create_source_records for x in mapper.task_configuration.files
|
|
35
|
+
):
|
|
36
|
+
self.srs_records_file: TextIO = open(self.folder_structure.srs_records_path, "w+")
|
|
37
|
+
if getattr(mapper.task_configuration, "data_import_marc", False):
|
|
38
|
+
self.data_import_marc_file: BinaryIO = open(self.folder_structure.data_import_marc_path, "wb+")
|
|
39
|
+
self.unique_001s: Set[str] = set()
|
|
40
|
+
self.failed_records_count: int = 0
|
|
41
|
+
self.records_count: int = 0
|
|
42
|
+
self.start: float = time.time()
|
|
43
|
+
self.legacy_ids: Set[str] = set()
|
|
44
|
+
if (
|
|
45
|
+
self.object_type == FOLIONamespaces.holdings
|
|
46
|
+
and self.mapper.create_source_records
|
|
47
|
+
):
|
|
48
|
+
logging.info("Loading Parent HRID map for SRS creation")
|
|
49
|
+
self.parent_hrids = {entity[1]: entity[2] for entity in mapper.parent_id_map.values()}
|
|
50
|
+
|
|
51
|
+
def process_record(self, idx: int, marc_record: Record, file_def: FileDefinition):
|
|
52
|
+
"""processes a marc holdings record and saves it
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
idx (int): Index in file being parsed
|
|
56
|
+
marc_record (Record): _description_
|
|
57
|
+
file_def (FileDefinition): _description_
|
|
58
|
+
|
|
59
|
+
Raises:
|
|
60
|
+
TransformationProcessError: _description_
|
|
61
|
+
TransformationRecordFailedError: _description_
|
|
62
|
+
"""
|
|
63
|
+
success = True
|
|
64
|
+
folio_recs = []
|
|
65
|
+
self.records_count += 1
|
|
66
|
+
try:
|
|
67
|
+
# Transform the MARC21 to a FOLIO record
|
|
68
|
+
try:
|
|
69
|
+
legacy_ids = self.mapper.get_legacy_ids(marc_record, idx)
|
|
70
|
+
except ValueError as e:
|
|
71
|
+
raise TransformationRecordFailedError(
|
|
72
|
+
f"{idx} in {file_def.file_name}", str(e), idx
|
|
73
|
+
) from e
|
|
74
|
+
if not legacy_ids:
|
|
75
|
+
raise TransformationRecordFailedError(
|
|
76
|
+
f"Index in file: {idx}", "No legacy id found", idx
|
|
77
|
+
)
|
|
78
|
+
folio_recs = self.mapper.parse_record(marc_record, file_def, legacy_ids)
|
|
79
|
+
for idx, folio_rec in enumerate(folio_recs):
|
|
80
|
+
if idx == 0:
|
|
81
|
+
filtered_legacy_ids = self.get_valid_folio_record_ids(
|
|
82
|
+
legacy_ids, self.legacy_ids, self.mapper.migration_report
|
|
83
|
+
)
|
|
84
|
+
self.add_legacy_ids_to_map(folio_rec, filtered_legacy_ids)
|
|
85
|
+
|
|
86
|
+
if (
|
|
87
|
+
file_def.create_source_records
|
|
88
|
+
and self.mapper.create_source_records
|
|
89
|
+
):
|
|
90
|
+
self.save_srs_record(
|
|
91
|
+
marc_record,
|
|
92
|
+
file_def,
|
|
93
|
+
folio_rec,
|
|
94
|
+
legacy_ids,
|
|
95
|
+
self.object_type,
|
|
96
|
+
)
|
|
97
|
+
if getattr(self.mapper.task_configuration, "data_import_marc", False):
|
|
98
|
+
self.save_marc_record(
|
|
99
|
+
marc_record,
|
|
100
|
+
folio_rec,
|
|
101
|
+
self.object_type
|
|
102
|
+
)
|
|
103
|
+
Helper.write_to_file(self.created_objects_file, folio_rec)
|
|
104
|
+
self.mapper.migration_report.add_general_statistics(
|
|
105
|
+
i18n.t("Inventory records written to disk")
|
|
106
|
+
)
|
|
107
|
+
self.exit_on_too_many_exceptions()
|
|
108
|
+
|
|
109
|
+
except TransformationRecordFailedError as error:
|
|
110
|
+
success = False
|
|
111
|
+
raise TransformationRecordFailedError(
|
|
112
|
+
f"{error.index_or_id} in {file_def.file_name}", error.message, error.data_value
|
|
113
|
+
) from error
|
|
114
|
+
except TransformationProcessError as tpe:
|
|
115
|
+
raise TransformationProcessError(
|
|
116
|
+
f"{tpe.index_or_id} in {file_def.file_name}", tpe.message, tpe.data_value
|
|
117
|
+
) from tpe
|
|
118
|
+
except Exception as inst:
|
|
119
|
+
success = False
|
|
120
|
+
traceback.print_exc()
|
|
121
|
+
logging.error(type(inst))
|
|
122
|
+
logging.error(inst.args)
|
|
123
|
+
logging.error(inst)
|
|
124
|
+
logging.error(marc_record)
|
|
125
|
+
logging.error(folio_recs)
|
|
126
|
+
raise TransformationProcessError("", inst.args, "") from inst
|
|
127
|
+
finally:
|
|
128
|
+
if not success:
|
|
129
|
+
self.failed_records_count += 1
|
|
130
|
+
remove_from_id_map = getattr(self.mapper, "remove_from_id_map", None)
|
|
131
|
+
for folio_rec in folio_recs:
|
|
132
|
+
if (
|
|
133
|
+
callable(remove_from_id_map)
|
|
134
|
+
and "folio_rec" in locals()
|
|
135
|
+
and folio_rec.get("formerIds", "")
|
|
136
|
+
):
|
|
137
|
+
self.mapper.remove_from_id_map(folio_rec.get("formerIds", []))
|
|
138
|
+
|
|
139
|
+
def save_marc_record(
|
|
140
|
+
self,
|
|
141
|
+
marc_record: Record,
|
|
142
|
+
folio_rec: Dict,
|
|
143
|
+
object_type: FOLIONamespaces
|
|
144
|
+
):
|
|
145
|
+
self.mapper.save_data_import_marc_record(
|
|
146
|
+
self.data_import_marc_file,
|
|
147
|
+
object_type,
|
|
148
|
+
marc_record,
|
|
149
|
+
folio_rec,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
def save_srs_record(
|
|
153
|
+
self,
|
|
154
|
+
marc_record: Record,
|
|
155
|
+
file_def: FileDefinition,
|
|
156
|
+
folio_rec: Dict,
|
|
157
|
+
legacy_ids: List[str],
|
|
158
|
+
object_type: FOLIONamespaces,
|
|
159
|
+
):
|
|
160
|
+
if object_type in [FOLIONamespaces.holdings]:
|
|
161
|
+
if "008" in marc_record and len(marc_record["008"].data) > 32:
|
|
162
|
+
remain, rest = (
|
|
163
|
+
marc_record["008"].data[:32],
|
|
164
|
+
marc_record["008"].data[32:],
|
|
165
|
+
)
|
|
166
|
+
marc_record["008"].data = remain
|
|
167
|
+
self.mapper.migration_report.add(
|
|
168
|
+
"MarcValidation",
|
|
169
|
+
i18n.t("008 length invalid. '%{rest}' was stripped out", rest=rest),
|
|
170
|
+
)
|
|
171
|
+
self.add_mapped_location_code_to_record(marc_record, folio_rec)
|
|
172
|
+
new_004 = Field(tag="004", data=self.parent_hrids[folio_rec["instanceId"]])
|
|
173
|
+
marc_record.remove_fields("004")
|
|
174
|
+
marc_record.add_ordered_field(new_004)
|
|
175
|
+
for former_id in legacy_ids:
|
|
176
|
+
if self.mapper.task_configuration.hrid_handling == HridHandling.default:
|
|
177
|
+
new_035 = Field(
|
|
178
|
+
tag="035",
|
|
179
|
+
indicators=[" ", " "],
|
|
180
|
+
subfields=[Subfield(code="a", value=former_id)],
|
|
181
|
+
)
|
|
182
|
+
marc_record.add_ordered_field(new_035)
|
|
183
|
+
self.mapper.save_source_record(
|
|
184
|
+
self.srs_records_file,
|
|
185
|
+
self.object_type,
|
|
186
|
+
self.mapper.folio_client,
|
|
187
|
+
marc_record,
|
|
188
|
+
folio_rec,
|
|
189
|
+
legacy_ids,
|
|
190
|
+
file_def.discovery_suppressed,
|
|
191
|
+
)
|
|
192
|
+
self.mapper.migration_report.add_general_statistics(i18n.t("SRS records written to disk"))
|
|
193
|
+
|
|
194
|
+
def add_mapped_location_code_to_record(self, marc_record: Record, folio_rec: Dict):
|
|
195
|
+
location_code = next(
|
|
196
|
+
(
|
|
197
|
+
location["code"]
|
|
198
|
+
for location in self.mapper.folio_client.locations
|
|
199
|
+
if location["id"] == folio_rec["permanentLocationId"]
|
|
200
|
+
),
|
|
201
|
+
None,
|
|
202
|
+
)
|
|
203
|
+
if "852" not in marc_record:
|
|
204
|
+
raise TransformationRecordFailedError(
|
|
205
|
+
"", "No 852 in record when storing new location code", ""
|
|
206
|
+
)
|
|
207
|
+
first_852 = marc_record.get_fields("852")[0]
|
|
208
|
+
first_852.delete_subfield("b")
|
|
209
|
+
while old_b := first_852.delete_subfield("b"):
|
|
210
|
+
first_852.add_subfield("x", old_b, 0)
|
|
211
|
+
self.mapper.migration_report.add(
|
|
212
|
+
"LocationMapping", i18n.t("Additional 852$b was moved to 852$x")
|
|
213
|
+
)
|
|
214
|
+
first_852.add_subfield("b", location_code, 0)
|
|
215
|
+
self.mapper.migration_report.add(
|
|
216
|
+
"LocationMapping", i18n.t("Set 852 to FOLIO location code")
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
def exit_on_too_many_exceptions(self):
|
|
220
|
+
if (
|
|
221
|
+
self.failed_records_count / (self.records_count + 1)
|
|
222
|
+
> (self.mapper.library_configuration.failed_percentage_threshold / 100)
|
|
223
|
+
and self.failed_records_count
|
|
224
|
+
> self.mapper.library_configuration.failed_records_threshold
|
|
225
|
+
):
|
|
226
|
+
logging.critical("More than 20 percent of the records have failed. Halting")
|
|
227
|
+
sys.exit(1)
|
|
228
|
+
|
|
229
|
+
@staticmethod
|
|
230
|
+
def get_valid_folio_record_ids(
|
|
231
|
+
legacy_ids: List[str], folio_record_identifiers: Set[str], migration_report: MigrationReport
|
|
232
|
+
) -> List[str]:
|
|
233
|
+
new_ids: Set[str] = set()
|
|
234
|
+
for legacy_id in legacy_ids:
|
|
235
|
+
if legacy_id not in folio_record_identifiers:
|
|
236
|
+
new_ids.add(legacy_id)
|
|
237
|
+
else:
|
|
238
|
+
migration_report.add_general_statistics(
|
|
239
|
+
i18n.t("Duplicate MARC record identifiers ")
|
|
240
|
+
)
|
|
241
|
+
if not any(new_ids):
|
|
242
|
+
s = i18n.t("Failed records. No unique record identifiers in legacy record")
|
|
243
|
+
migration_report.add_general_statistics(s)
|
|
244
|
+
raise TransformationRecordFailedError(
|
|
245
|
+
"-".join(legacy_ids),
|
|
246
|
+
"Duplicate recod identifier(s). See logs. Record Failed",
|
|
247
|
+
"-".join(legacy_ids),
|
|
248
|
+
)
|
|
249
|
+
return list(new_ids)
|
|
250
|
+
|
|
251
|
+
def wrap_up(self):
|
|
252
|
+
"""Finalizes the mapping by writing things out."""
|
|
253
|
+
logging.info(
|
|
254
|
+
"Saving map of %s old and new IDs to %s",
|
|
255
|
+
len(self.mapper.id_map),
|
|
256
|
+
self.folder_structure.id_map_path,
|
|
257
|
+
)
|
|
258
|
+
self.mapper.save_id_map_file(self.folder_structure.id_map_path, self.mapper.id_map)
|
|
259
|
+
logging.info("%s records processed", self.records_count)
|
|
260
|
+
with open(self.folder_structure.migration_reports_file, "w+") as report_file:
|
|
261
|
+
self.mapper.migration_report.write_migration_report(
|
|
262
|
+
i18n.t("MFHD records transformation report"),
|
|
263
|
+
report_file,
|
|
264
|
+
self.mapper.start_datetime,
|
|
265
|
+
)
|
|
266
|
+
Helper.print_mapping_report(
|
|
267
|
+
report_file,
|
|
268
|
+
self.mapper.parsed_records,
|
|
269
|
+
self.mapper.mapped_folio_fields,
|
|
270
|
+
self.mapper.mapped_legacy_fields,
|
|
271
|
+
)
|
|
272
|
+
if hasattr(self, "srs_records_file"):
|
|
273
|
+
self.srs_records_file.seek(0)
|
|
274
|
+
if not self.srs_records_file.seek(0):
|
|
275
|
+
os.remove(self.srs_records_file.name)
|
|
276
|
+
self.srs_records_file.close()
|
|
277
|
+
if hasattr(self, "data_import_marc_file"):
|
|
278
|
+
self.data_import_marc_file.seek(0)
|
|
279
|
+
if not self.data_import_marc_file.read(1):
|
|
280
|
+
os.remove(self.data_import_marc_file.name)
|
|
281
|
+
self.data_import_marc_file.close()
|
|
282
|
+
self.mapper.wrap_up()
|
|
283
|
+
|
|
284
|
+
logging.info("Transformation report written to %s", report_file.name)
|
|
285
|
+
logging.info("Processor is done.")
|
|
286
|
+
|
|
287
|
+
def add_legacy_ids_to_map(self, folio_rec: Dict, filtered_legacy_ids: List[str]):
|
|
288
|
+
for legacy_id in filtered_legacy_ids:
|
|
289
|
+
self.legacy_ids.add(legacy_id)
|
|
290
|
+
if legacy_id not in self.mapper.id_map:
|
|
291
|
+
self.mapper.id_map[legacy_id] = self.mapper.get_id_map_tuple(
|
|
292
|
+
legacy_id, folio_rec, self.object_type
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
else:
|
|
296
|
+
raise TransformationRecordFailedError(
|
|
297
|
+
legacy_id,
|
|
298
|
+
"Legacy ID already added to Legacy Id map.",
|
|
299
|
+
",".join(filtered_legacy_ids),
|
|
300
|
+
)
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import sys
|
|
3
|
+
from io import IOBase
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import i18n
|
|
7
|
+
from pymarc import Leader, MARCReader, Record
|
|
8
|
+
|
|
9
|
+
from folio_migration_tools.custom_exceptions import (
|
|
10
|
+
TransformationProcessError,
|
|
11
|
+
TransformationRecordFailedError,
|
|
12
|
+
)
|
|
13
|
+
from folio_migration_tools.folder_structure import FolderStructure
|
|
14
|
+
from folio_migration_tools.library_configuration import FileDefinition
|
|
15
|
+
from folio_migration_tools.marc_rules_transformation.marc_file_processor import (
|
|
16
|
+
MarcFileProcessor,
|
|
17
|
+
)
|
|
18
|
+
from folio_migration_tools.migration_report import MigrationReport
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class MARCReaderWrapper:
|
|
22
|
+
@staticmethod
|
|
23
|
+
def process_single_file(
|
|
24
|
+
file_def: FileDefinition,
|
|
25
|
+
processor,
|
|
26
|
+
failed_records_path: Path,
|
|
27
|
+
folder_structure: FolderStructure,
|
|
28
|
+
):
|
|
29
|
+
try:
|
|
30
|
+
with open(failed_records_path, "ab") as failed_marc_records_file:
|
|
31
|
+
with open(
|
|
32
|
+
folder_structure.legacy_records_folder / file_def.file_name,
|
|
33
|
+
"rb",
|
|
34
|
+
) as marc_file:
|
|
35
|
+
reader = MARCReader(marc_file, to_unicode=True, permissive=True)
|
|
36
|
+
reader.hide_utf8_warnings = True
|
|
37
|
+
reader.force_utf8 = False
|
|
38
|
+
logging.info("Running %s", file_def.file_name)
|
|
39
|
+
MARCReaderWrapper.read_records(
|
|
40
|
+
reader, file_def, failed_marc_records_file, processor
|
|
41
|
+
)
|
|
42
|
+
except TransformationProcessError as tpe:
|
|
43
|
+
logging.critical(tpe)
|
|
44
|
+
sys.exit(1)
|
|
45
|
+
except Exception:
|
|
46
|
+
logging.exception("Failure in Main: %s", file_def.file_name, stack_info=True)
|
|
47
|
+
|
|
48
|
+
@staticmethod
|
|
49
|
+
def read_records(
|
|
50
|
+
reader,
|
|
51
|
+
source_file: FileDefinition,
|
|
52
|
+
failed_records_file: IOBase,
|
|
53
|
+
processor: MarcFileProcessor,
|
|
54
|
+
):
|
|
55
|
+
for idx, record in enumerate(reader):
|
|
56
|
+
processor.mapper.migration_report.add_general_statistics(
|
|
57
|
+
i18n.t("Records in file before parsing")
|
|
58
|
+
)
|
|
59
|
+
try:
|
|
60
|
+
# None = Something bad happened
|
|
61
|
+
if record is None:
|
|
62
|
+
report_failed_parsing(
|
|
63
|
+
reader,
|
|
64
|
+
source_file,
|
|
65
|
+
failed_records_file,
|
|
66
|
+
idx,
|
|
67
|
+
processor.mapper.migration_report,
|
|
68
|
+
)
|
|
69
|
+
# The normal case
|
|
70
|
+
else:
|
|
71
|
+
MARCReaderWrapper.set_leader(record, processor.mapper.migration_report)
|
|
72
|
+
processor.mapper.migration_report.add_general_statistics(
|
|
73
|
+
i18n.t("Records successfully decoded from MARC21"),
|
|
74
|
+
)
|
|
75
|
+
processor.process_record(idx, record, source_file)
|
|
76
|
+
except TransformationRecordFailedError as error:
|
|
77
|
+
error.log_it()
|
|
78
|
+
processor.mapper.migration_report.add_general_statistics(
|
|
79
|
+
i18n.t("Records that failed transformation. Check log for details"),
|
|
80
|
+
)
|
|
81
|
+
except ValueError as error:
|
|
82
|
+
logging.error(error)
|
|
83
|
+
logging.info("Done reading %s records from file", idx + 1)
|
|
84
|
+
|
|
85
|
+
@staticmethod
|
|
86
|
+
def set_leader(marc_record: Record, migration_report: MigrationReport):
|
|
87
|
+
if marc_record.leader[9] != "a":
|
|
88
|
+
migration_report.add(
|
|
89
|
+
"LeaderManipulation",
|
|
90
|
+
i18n.t(
|
|
91
|
+
"Set leader 09 (Character coding scheme) from %{field} to a",
|
|
92
|
+
field=marc_record.leader[9],
|
|
93
|
+
),
|
|
94
|
+
)
|
|
95
|
+
marc_record.leader = Leader(f"{marc_record.leader[:9]}a{marc_record.leader[10:]}")
|
|
96
|
+
|
|
97
|
+
if not str(marc_record.leader).endswith("4500"):
|
|
98
|
+
migration_report.add(
|
|
99
|
+
"LeaderManipulation",
|
|
100
|
+
i18n.t("Set leader 20-23 from %{field} to 4500", field=marc_record.leader[-4:]),
|
|
101
|
+
)
|
|
102
|
+
marc_record.leader = Leader(f"{marc_record.leader[:-4]}4500")
|
|
103
|
+
|
|
104
|
+
if marc_record.leader[10] != "2":
|
|
105
|
+
migration_report.add(
|
|
106
|
+
"LeaderManipulation",
|
|
107
|
+
i18n.t(
|
|
108
|
+
"Set leader 10 (Indicator count) from %{field} to 2",
|
|
109
|
+
field=marc_record.leader[10],
|
|
110
|
+
),
|
|
111
|
+
)
|
|
112
|
+
marc_record.leader = Leader(f"{marc_record.leader[:10]}2{marc_record.leader[11:]}")
|
|
113
|
+
|
|
114
|
+
if marc_record.leader[11] != "2":
|
|
115
|
+
migration_report.add(
|
|
116
|
+
"LeaderManipulation",
|
|
117
|
+
i18n.t(
|
|
118
|
+
"Set leader 11 (Subfield code count) from %{record} to 2",
|
|
119
|
+
record=marc_record.leader[11],
|
|
120
|
+
),
|
|
121
|
+
)
|
|
122
|
+
marc_record.leader = Leader(f"{marc_record.leader[:11]}2{marc_record.leader[12:]}")
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def report_failed_parsing(
|
|
126
|
+
reader, source_file, failed_bibs_file, idx, migration_report: MigrationReport
|
|
127
|
+
):
|
|
128
|
+
migration_report.add_general_statistics(
|
|
129
|
+
i18n.t("Records with encoding errors - parsing failed"),
|
|
130
|
+
)
|
|
131
|
+
failed_bibs_file.write(reader.current_chunk)
|
|
132
|
+
raise TransformationRecordFailedError(
|
|
133
|
+
f"Index in {source_file.file_name}:{idx}",
|
|
134
|
+
f"MARC parsing error: {reader.current_exception}",
|
|
135
|
+
"Failed records stored in results/failed_bib_records.mrc",
|
|
136
|
+
)
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
"""The default mapper, responsible for parsing MARC21 records acording to the
|
|
2
|
+
FOLIO community specifications"""
|
|
3
|
+
import logging
|
|
4
|
+
import re
|
|
5
|
+
import time
|
|
6
|
+
import uuid
|
|
7
|
+
from typing import List
|
|
8
|
+
|
|
9
|
+
import i18n
|
|
10
|
+
import pymarc
|
|
11
|
+
from folio_uuid.folio_namespaces import FOLIONamespaces
|
|
12
|
+
from folio_uuid.folio_uuid import FolioUUID
|
|
13
|
+
from folioclient import FolioClient
|
|
14
|
+
from pymarc import Leader, Record
|
|
15
|
+
|
|
16
|
+
from folio_migration_tools.custom_exceptions import TransformationProcessError
|
|
17
|
+
from folio_migration_tools.helper import Helper
|
|
18
|
+
from folio_migration_tools.library_configuration import (
|
|
19
|
+
FileDefinition,
|
|
20
|
+
IlsFlavour,
|
|
21
|
+
LibraryConfiguration,
|
|
22
|
+
)
|
|
23
|
+
from folio_migration_tools.marc_rules_transformation.conditions import Conditions
|
|
24
|
+
from folio_migration_tools.marc_rules_transformation.hrid_handler import HRIDHandler
|
|
25
|
+
from folio_migration_tools.marc_rules_transformation.rules_mapper_base import (
|
|
26
|
+
RulesMapperBase,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class AuthorityMapper(RulesMapperBase):
|
|
31
|
+
non_repatable_fields = [
|
|
32
|
+
"100",
|
|
33
|
+
"110",
|
|
34
|
+
"111",
|
|
35
|
+
"130",
|
|
36
|
+
"147",
|
|
37
|
+
"148",
|
|
38
|
+
"150",
|
|
39
|
+
"151",
|
|
40
|
+
"155",
|
|
41
|
+
"162",
|
|
42
|
+
"180",
|
|
43
|
+
"181",
|
|
44
|
+
"182",
|
|
45
|
+
"185",
|
|
46
|
+
"378",
|
|
47
|
+
"384",
|
|
48
|
+
]
|
|
49
|
+
"""_summary_
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
RulesMapperBase (_type_): _description_
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def __init__(
|
|
56
|
+
self,
|
|
57
|
+
folio_client,
|
|
58
|
+
library_configuration: LibraryConfiguration,
|
|
59
|
+
task_configuration,
|
|
60
|
+
):
|
|
61
|
+
super().__init__(
|
|
62
|
+
folio_client,
|
|
63
|
+
library_configuration,
|
|
64
|
+
task_configuration,
|
|
65
|
+
None,
|
|
66
|
+
self.get_authority_json_schema(folio_client, library_configuration),
|
|
67
|
+
Conditions(folio_client, self, "auth", library_configuration.folio_release),
|
|
68
|
+
)
|
|
69
|
+
self.srs_recs: list = []
|
|
70
|
+
logging.info("Fetching mapping rules from the tenant")
|
|
71
|
+
rules_endpoint = "/mapping-rules/marc-authority"
|
|
72
|
+
self.mappings = self.folio_client.folio_get_single_object(rules_endpoint)
|
|
73
|
+
self.source_file_mapping: dict = {}
|
|
74
|
+
self.setup_source_file_mapping()
|
|
75
|
+
self.start = time.time()
|
|
76
|
+
|
|
77
|
+
def get_legacy_ids(self, marc_record: Record, idx: int) -> List[str]:
|
|
78
|
+
ils_flavour: IlsFlavour = self.task_configuration.ils_flavour
|
|
79
|
+
if ils_flavour in {IlsFlavour.sierra, IlsFlavour.millennium}:
|
|
80
|
+
raise TransformationProcessError("", f"ILS {ils_flavour} not configured")
|
|
81
|
+
elif ils_flavour == IlsFlavour.tag907y:
|
|
82
|
+
return RulesMapperBase.get_bib_id_from_907y(marc_record, idx)
|
|
83
|
+
elif ils_flavour == IlsFlavour.tagf990a:
|
|
84
|
+
return RulesMapperBase.get_bib_id_from_990a(marc_record, idx)
|
|
85
|
+
elif ils_flavour == IlsFlavour.aleph:
|
|
86
|
+
raise TransformationProcessError("", f"ILS {ils_flavour} not configured")
|
|
87
|
+
elif ils_flavour in {IlsFlavour.voyager, "voyager", IlsFlavour.tag001}:
|
|
88
|
+
return RulesMapperBase.get_bib_id_from_001(marc_record, idx)
|
|
89
|
+
elif ils_flavour == IlsFlavour.koha:
|
|
90
|
+
raise TransformationProcessError("", f"ILS {ils_flavour} not configured")
|
|
91
|
+
elif ils_flavour == IlsFlavour.none:
|
|
92
|
+
return [str(uuid.uuid4())]
|
|
93
|
+
else:
|
|
94
|
+
raise TransformationProcessError("", f"ILS {ils_flavour} not configured")
|
|
95
|
+
|
|
96
|
+
def parse_record(
|
|
97
|
+
self, marc_record: pymarc.Record, file_def: FileDefinition, legacy_ids: List[str]
|
|
98
|
+
) -> list[dict]:
|
|
99
|
+
"""Parses an auth recod into a FOLIO Authority object
|
|
100
|
+
This is the main function
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
legacy_ids (_type_): _description_
|
|
104
|
+
marc_record (Record): _description_
|
|
105
|
+
file_def (FileDefinition): _description_
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
dict: _description_
|
|
109
|
+
"""
|
|
110
|
+
self.print_progress()
|
|
111
|
+
ignored_subsequent_fields: set = set()
|
|
112
|
+
bad_tags = set(self.task_configuration.tags_to_delete) # "907"
|
|
113
|
+
folio_authority = self.perform_initial_preparation(marc_record, legacy_ids)
|
|
114
|
+
for marc_field in marc_record:
|
|
115
|
+
self.report_marc_stats(marc_field, bad_tags, legacy_ids, ignored_subsequent_fields)
|
|
116
|
+
if marc_field.tag not in ignored_subsequent_fields:
|
|
117
|
+
self.process_marc_field(
|
|
118
|
+
folio_authority,
|
|
119
|
+
marc_field,
|
|
120
|
+
ignored_subsequent_fields,
|
|
121
|
+
legacy_ids,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
self.perform_additional_parsing(folio_authority)
|
|
125
|
+
clean_folio_authority = self.validate_required_properties(
|
|
126
|
+
"-".join(legacy_ids), folio_authority, self.schema, FOLIONamespaces.instances
|
|
127
|
+
)
|
|
128
|
+
self.dedupe_rec(clean_folio_authority)
|
|
129
|
+
marc_record.remove_fields(*list(bad_tags))
|
|
130
|
+
self.report_folio_mapping(clean_folio_authority, self.schema)
|
|
131
|
+
return [clean_folio_authority]
|
|
132
|
+
|
|
133
|
+
def perform_initial_preparation(self, marc_record: pymarc.Record, legacy_ids):
|
|
134
|
+
folio_authority = {}
|
|
135
|
+
folio_authority["id"] = str(
|
|
136
|
+
FolioUUID(
|
|
137
|
+
self.base_string_for_folio_uuid,
|
|
138
|
+
FOLIONamespaces.authorities,
|
|
139
|
+
str(legacy_ids[-1]),
|
|
140
|
+
)
|
|
141
|
+
)
|
|
142
|
+
HRIDHandler.handle_035_generation(
|
|
143
|
+
marc_record, legacy_ids, self.migration_report, False, False
|
|
144
|
+
)
|
|
145
|
+
self.map_source_file_and_natural_id(marc_record, folio_authority)
|
|
146
|
+
self.handle_leader_17(marc_record, legacy_ids)
|
|
147
|
+
return folio_authority
|
|
148
|
+
|
|
149
|
+
def map_source_file_and_natural_id(self, marc_record, folio_authority):
|
|
150
|
+
"""Implement source file and natural ID mappings according to MODDICORE-283"""
|
|
151
|
+
match_prefix_patt = re.compile("^[A-Za-z]+")
|
|
152
|
+
natural_id = None
|
|
153
|
+
source_file_id = None
|
|
154
|
+
has_010 = marc_record.get("010")
|
|
155
|
+
if has_010 and (has_010a := has_010.get_subfields("a")):
|
|
156
|
+
for a_subfield in has_010a:
|
|
157
|
+
natural_id_prefix = match_prefix_patt.match(a_subfield)
|
|
158
|
+
if natural_id_prefix and (
|
|
159
|
+
source_file := self.source_file_mapping.get(natural_id_prefix.group(0), None)
|
|
160
|
+
):
|
|
161
|
+
natural_id = "".join(a_subfield.split())
|
|
162
|
+
source_file_id = source_file["id"]
|
|
163
|
+
self.migration_report.add_general_statistics(
|
|
164
|
+
i18n.t("naturalId mapped from %{fro}", fro="010$a")
|
|
165
|
+
)
|
|
166
|
+
self.migration_report.add(
|
|
167
|
+
"AuthoritySourceFileMapping",
|
|
168
|
+
f"{source_file['name']} -- {natural_id_prefix.group(0)} -- 010$a",
|
|
169
|
+
number=1,
|
|
170
|
+
)
|
|
171
|
+
break
|
|
172
|
+
if not source_file_id:
|
|
173
|
+
natural_id = "".join(marc_record["001"].data.split())
|
|
174
|
+
self.migration_report.add_general_statistics(
|
|
175
|
+
i18n.t("naturalId mapped from %{fro}", fro="001")
|
|
176
|
+
)
|
|
177
|
+
natural_id_prefix = match_prefix_patt.match(natural_id)
|
|
178
|
+
if natural_id_prefix:
|
|
179
|
+
if source_file := self.source_file_mapping.get(natural_id_prefix.group(0), None):
|
|
180
|
+
source_file_id = source_file["id"]
|
|
181
|
+
self.migration_report.add(
|
|
182
|
+
"AuthoritySourceFileMapping",
|
|
183
|
+
f"{source_file['name']} -- {natural_id_prefix.group(0)} -- 001",
|
|
184
|
+
number=1,
|
|
185
|
+
)
|
|
186
|
+
folio_authority["naturalId"] = natural_id
|
|
187
|
+
if source_file_id:
|
|
188
|
+
folio_authority["sourceFileId"] = source_file_id
|
|
189
|
+
|
|
190
|
+
def setup_source_file_mapping(self):
|
|
191
|
+
if self.folio_client.authority_source_files:
|
|
192
|
+
logging.info(
|
|
193
|
+
f"{len(self.folio_client.authority_source_files)} \tAuthority source files"
|
|
194
|
+
)
|
|
195
|
+
for source_file in self.folio_client.authority_source_files:
|
|
196
|
+
for sf_code in source_file.get("codes", []):
|
|
197
|
+
self.source_file_mapping[sf_code] = source_file
|
|
198
|
+
|
|
199
|
+
def handle_leader_17(self, marc_record, legacy_ids):
|
|
200
|
+
leader_17 = marc_record.leader[17] or "Empty"
|
|
201
|
+
self.migration_report.add(
|
|
202
|
+
"AuthorityEncodingLevel", i18n.t("Original value") + f": {leader_17}"
|
|
203
|
+
)
|
|
204
|
+
if leader_17 not in ["n", "o"]:
|
|
205
|
+
Helper.log_data_issue(
|
|
206
|
+
legacy_ids,
|
|
207
|
+
f"LDR pos. 17 is '{leader_17}'. Is this correct? Value has been changed to 'n'.",
|
|
208
|
+
marc_record.leader,
|
|
209
|
+
)
|
|
210
|
+
marc_record.leader = Leader(f"{marc_record.leader[:17]}n{marc_record.leader[18:]}")
|
|
211
|
+
self.migration_report.add(
|
|
212
|
+
"AuthorityEncodingLevel", i18n.t("Changed %{a} to %{b}", a=leader_17, b="n")
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
def perform_additional_parsing(
|
|
216
|
+
self,
|
|
217
|
+
folio_authority: dict,
|
|
218
|
+
) -> None:
|
|
219
|
+
"""Do stuff not easily captured by the mapping rules
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
folio_authority (dict): _description_
|
|
223
|
+
"""
|
|
224
|
+
folio_authority["source"] = "MARC"
|
|
225
|
+
|
|
226
|
+
def get_authority_json_schema(self, folio_client: FolioClient, library_configuration):
|
|
227
|
+
"""Fetches the JSON Schema for autorities"""
|
|
228
|
+
if library_configuration.folio_release.name.lower()[0] < "p":
|
|
229
|
+
schema = folio_client.get_from_github(
|
|
230
|
+
"folio-org", "mod-inventory-storage", "/ramls/authorities/authority.json"
|
|
231
|
+
)
|
|
232
|
+
else:
|
|
233
|
+
schema = folio_client.get_from_github(
|
|
234
|
+
"folio-org",
|
|
235
|
+
"mod-entities-links",
|
|
236
|
+
"/src/main/resources/swagger.api/schemas/authority-storage/authorityDto.yaml",
|
|
237
|
+
)
|
|
238
|
+
return schema
|
|
239
|
+
|
|
240
|
+
def wrap_up(self):
|
|
241
|
+
logging.info("Mapper wrapping up")
|