folio-migration-tools 1.2.1__py3-none-any.whl → 1.9.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. folio_migration_tools/__init__.py +11 -0
  2. folio_migration_tools/__main__.py +169 -85
  3. folio_migration_tools/circulation_helper.py +96 -59
  4. folio_migration_tools/config_file_load.py +66 -0
  5. folio_migration_tools/custom_dict.py +6 -4
  6. folio_migration_tools/custom_exceptions.py +21 -19
  7. folio_migration_tools/extradata_writer.py +46 -0
  8. folio_migration_tools/folder_structure.py +63 -66
  9. folio_migration_tools/helper.py +29 -21
  10. folio_migration_tools/holdings_helper.py +57 -34
  11. folio_migration_tools/i18n_config.py +9 -0
  12. folio_migration_tools/library_configuration.py +173 -13
  13. folio_migration_tools/mapper_base.py +317 -106
  14. folio_migration_tools/mapping_file_transformation/courses_mapper.py +203 -0
  15. folio_migration_tools/mapping_file_transformation/holdings_mapper.py +83 -69
  16. folio_migration_tools/mapping_file_transformation/item_mapper.py +98 -94
  17. folio_migration_tools/mapping_file_transformation/manual_fee_fines_mapper.py +352 -0
  18. folio_migration_tools/mapping_file_transformation/mapping_file_mapper_base.py +702 -223
  19. folio_migration_tools/mapping_file_transformation/notes_mapper.py +90 -0
  20. folio_migration_tools/mapping_file_transformation/order_mapper.py +492 -0
  21. folio_migration_tools/mapping_file_transformation/organization_mapper.py +389 -0
  22. folio_migration_tools/mapping_file_transformation/ref_data_mapping.py +38 -27
  23. folio_migration_tools/mapping_file_transformation/user_mapper.py +149 -361
  24. folio_migration_tools/marc_rules_transformation/conditions.py +650 -246
  25. folio_migration_tools/marc_rules_transformation/holdings_statementsparser.py +292 -130
  26. folio_migration_tools/marc_rules_transformation/hrid_handler.py +244 -0
  27. folio_migration_tools/marc_rules_transformation/loc_language_codes.xml +20846 -0
  28. folio_migration_tools/marc_rules_transformation/marc_file_processor.py +300 -0
  29. folio_migration_tools/marc_rules_transformation/marc_reader_wrapper.py +136 -0
  30. folio_migration_tools/marc_rules_transformation/rules_mapper_authorities.py +241 -0
  31. folio_migration_tools/marc_rules_transformation/rules_mapper_base.py +681 -201
  32. folio_migration_tools/marc_rules_transformation/rules_mapper_bibs.py +395 -429
  33. folio_migration_tools/marc_rules_transformation/rules_mapper_holdings.py +531 -100
  34. folio_migration_tools/migration_report.py +85 -38
  35. folio_migration_tools/migration_tasks/__init__.py +1 -3
  36. folio_migration_tools/migration_tasks/authority_transformer.py +119 -0
  37. folio_migration_tools/migration_tasks/batch_poster.py +911 -198
  38. folio_migration_tools/migration_tasks/bibs_transformer.py +121 -116
  39. folio_migration_tools/migration_tasks/courses_migrator.py +192 -0
  40. folio_migration_tools/migration_tasks/holdings_csv_transformer.py +252 -247
  41. folio_migration_tools/migration_tasks/holdings_marc_transformer.py +321 -115
  42. folio_migration_tools/migration_tasks/items_transformer.py +264 -84
  43. folio_migration_tools/migration_tasks/loans_migrator.py +506 -195
  44. folio_migration_tools/migration_tasks/manual_fee_fines_transformer.py +187 -0
  45. folio_migration_tools/migration_tasks/migration_task_base.py +364 -74
  46. folio_migration_tools/migration_tasks/orders_transformer.py +373 -0
  47. folio_migration_tools/migration_tasks/organization_transformer.py +451 -0
  48. folio_migration_tools/migration_tasks/requests_migrator.py +130 -62
  49. folio_migration_tools/migration_tasks/reserves_migrator.py +253 -0
  50. folio_migration_tools/migration_tasks/user_transformer.py +180 -139
  51. folio_migration_tools/task_configuration.py +46 -0
  52. folio_migration_tools/test_infrastructure/__init__.py +0 -0
  53. folio_migration_tools/test_infrastructure/mocked_classes.py +406 -0
  54. folio_migration_tools/transaction_migration/legacy_loan.py +148 -34
  55. folio_migration_tools/transaction_migration/legacy_request.py +65 -25
  56. folio_migration_tools/transaction_migration/legacy_reserve.py +47 -0
  57. folio_migration_tools/transaction_migration/transaction_result.py +12 -1
  58. folio_migration_tools/translations/en.json +476 -0
  59. folio_migration_tools-1.9.10.dist-info/METADATA +169 -0
  60. folio_migration_tools-1.9.10.dist-info/RECORD +67 -0
  61. {folio_migration_tools-1.2.1.dist-info → folio_migration_tools-1.9.10.dist-info}/WHEEL +1 -2
  62. folio_migration_tools-1.9.10.dist-info/entry_points.txt +3 -0
  63. folio_migration_tools/generate_schemas.py +0 -46
  64. folio_migration_tools/mapping_file_transformation/mapping_file_mapping_base_impl.py +0 -44
  65. folio_migration_tools/mapping_file_transformation/user_mapper_base.py +0 -212
  66. folio_migration_tools/marc_rules_transformation/bibs_processor.py +0 -163
  67. folio_migration_tools/marc_rules_transformation/holdings_processor.py +0 -284
  68. folio_migration_tools/report_blurbs.py +0 -219
  69. folio_migration_tools/transaction_migration/legacy_fee_fine.py +0 -36
  70. folio_migration_tools-1.2.1.dist-info/METADATA +0 -134
  71. folio_migration_tools-1.2.1.dist-info/RECORD +0 -50
  72. folio_migration_tools-1.2.1.dist-info/top_level.txt +0 -1
  73. {folio_migration_tools-1.2.1.dist-info → folio_migration_tools-1.9.10.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,300 @@
1
+ import logging
2
+ import os
3
+ import sys
4
+ import time
5
+ import traceback
6
+ from typing import BinaryIO, Dict, List, Set, TextIO
7
+
8
+ import i18n
9
+ from folio_uuid.folio_namespaces import FOLIONamespaces
10
+ from pymarc import Field, Record, Subfield
11
+
12
+ from folio_migration_tools.custom_exceptions import (
13
+ TransformationProcessError,
14
+ TransformationRecordFailedError,
15
+ )
16
+ from folio_migration_tools.folder_structure import FolderStructure
17
+ from folio_migration_tools.helper import Helper
18
+ from folio_migration_tools.library_configuration import FileDefinition, HridHandling
19
+ from folio_migration_tools.marc_rules_transformation.rules_mapper_base import (
20
+ RulesMapperBase,
21
+ )
22
+ from folio_migration_tools.migration_report import MigrationReport
23
+
24
+
25
+ class MarcFileProcessor:
26
+ def __init__(
27
+ self, mapper: RulesMapperBase, folder_structure: FolderStructure, created_objects_file: TextIO
28
+ ):
29
+ self.object_type: FOLIONamespaces = folder_structure.object_type
30
+ self.folder_structure: FolderStructure = folder_structure
31
+ self.mapper: RulesMapperBase = mapper
32
+ self.created_objects_file: TextIO = created_objects_file
33
+ if mapper.create_source_records and any(
34
+ x.create_source_records for x in mapper.task_configuration.files
35
+ ):
36
+ self.srs_records_file: TextIO = open(self.folder_structure.srs_records_path, "w+")
37
+ if getattr(mapper.task_configuration, "data_import_marc", False):
38
+ self.data_import_marc_file: BinaryIO = open(self.folder_structure.data_import_marc_path, "wb+")
39
+ self.unique_001s: Set[str] = set()
40
+ self.failed_records_count: int = 0
41
+ self.records_count: int = 0
42
+ self.start: float = time.time()
43
+ self.legacy_ids: Set[str] = set()
44
+ if (
45
+ self.object_type == FOLIONamespaces.holdings
46
+ and self.mapper.create_source_records
47
+ ):
48
+ logging.info("Loading Parent HRID map for SRS creation")
49
+ self.parent_hrids = {entity[1]: entity[2] for entity in mapper.parent_id_map.values()}
50
+
51
+ def process_record(self, idx: int, marc_record: Record, file_def: FileDefinition):
52
+ """processes a marc holdings record and saves it
53
+
54
+ Args:
55
+ idx (int): Index in file being parsed
56
+ marc_record (Record): _description_
57
+ file_def (FileDefinition): _description_
58
+
59
+ Raises:
60
+ TransformationProcessError: _description_
61
+ TransformationRecordFailedError: _description_
62
+ """
63
+ success = True
64
+ folio_recs = []
65
+ self.records_count += 1
66
+ try:
67
+ # Transform the MARC21 to a FOLIO record
68
+ try:
69
+ legacy_ids = self.mapper.get_legacy_ids(marc_record, idx)
70
+ except ValueError as e:
71
+ raise TransformationRecordFailedError(
72
+ f"{idx} in {file_def.file_name}", str(e), idx
73
+ ) from e
74
+ if not legacy_ids:
75
+ raise TransformationRecordFailedError(
76
+ f"Index in file: {idx}", "No legacy id found", idx
77
+ )
78
+ folio_recs = self.mapper.parse_record(marc_record, file_def, legacy_ids)
79
+ for idx, folio_rec in enumerate(folio_recs):
80
+ if idx == 0:
81
+ filtered_legacy_ids = self.get_valid_folio_record_ids(
82
+ legacy_ids, self.legacy_ids, self.mapper.migration_report
83
+ )
84
+ self.add_legacy_ids_to_map(folio_rec, filtered_legacy_ids)
85
+
86
+ if (
87
+ file_def.create_source_records
88
+ and self.mapper.create_source_records
89
+ ):
90
+ self.save_srs_record(
91
+ marc_record,
92
+ file_def,
93
+ folio_rec,
94
+ legacy_ids,
95
+ self.object_type,
96
+ )
97
+ if getattr(self.mapper.task_configuration, "data_import_marc", False):
98
+ self.save_marc_record(
99
+ marc_record,
100
+ folio_rec,
101
+ self.object_type
102
+ )
103
+ Helper.write_to_file(self.created_objects_file, folio_rec)
104
+ self.mapper.migration_report.add_general_statistics(
105
+ i18n.t("Inventory records written to disk")
106
+ )
107
+ self.exit_on_too_many_exceptions()
108
+
109
+ except TransformationRecordFailedError as error:
110
+ success = False
111
+ raise TransformationRecordFailedError(
112
+ f"{error.index_or_id} in {file_def.file_name}", error.message, error.data_value
113
+ ) from error
114
+ except TransformationProcessError as tpe:
115
+ raise TransformationProcessError(
116
+ f"{tpe.index_or_id} in {file_def.file_name}", tpe.message, tpe.data_value
117
+ ) from tpe
118
+ except Exception as inst:
119
+ success = False
120
+ traceback.print_exc()
121
+ logging.error(type(inst))
122
+ logging.error(inst.args)
123
+ logging.error(inst)
124
+ logging.error(marc_record)
125
+ logging.error(folio_recs)
126
+ raise TransformationProcessError("", inst.args, "") from inst
127
+ finally:
128
+ if not success:
129
+ self.failed_records_count += 1
130
+ remove_from_id_map = getattr(self.mapper, "remove_from_id_map", None)
131
+ for folio_rec in folio_recs:
132
+ if (
133
+ callable(remove_from_id_map)
134
+ and "folio_rec" in locals()
135
+ and folio_rec.get("formerIds", "")
136
+ ):
137
+ self.mapper.remove_from_id_map(folio_rec.get("formerIds", []))
138
+
139
+ def save_marc_record(
140
+ self,
141
+ marc_record: Record,
142
+ folio_rec: Dict,
143
+ object_type: FOLIONamespaces
144
+ ):
145
+ self.mapper.save_data_import_marc_record(
146
+ self.data_import_marc_file,
147
+ object_type,
148
+ marc_record,
149
+ folio_rec,
150
+ )
151
+
152
+ def save_srs_record(
153
+ self,
154
+ marc_record: Record,
155
+ file_def: FileDefinition,
156
+ folio_rec: Dict,
157
+ legacy_ids: List[str],
158
+ object_type: FOLIONamespaces,
159
+ ):
160
+ if object_type in [FOLIONamespaces.holdings]:
161
+ if "008" in marc_record and len(marc_record["008"].data) > 32:
162
+ remain, rest = (
163
+ marc_record["008"].data[:32],
164
+ marc_record["008"].data[32:],
165
+ )
166
+ marc_record["008"].data = remain
167
+ self.mapper.migration_report.add(
168
+ "MarcValidation",
169
+ i18n.t("008 length invalid. '%{rest}' was stripped out", rest=rest),
170
+ )
171
+ self.add_mapped_location_code_to_record(marc_record, folio_rec)
172
+ new_004 = Field(tag="004", data=self.parent_hrids[folio_rec["instanceId"]])
173
+ marc_record.remove_fields("004")
174
+ marc_record.add_ordered_field(new_004)
175
+ for former_id in legacy_ids:
176
+ if self.mapper.task_configuration.hrid_handling == HridHandling.default:
177
+ new_035 = Field(
178
+ tag="035",
179
+ indicators=[" ", " "],
180
+ subfields=[Subfield(code="a", value=former_id)],
181
+ )
182
+ marc_record.add_ordered_field(new_035)
183
+ self.mapper.save_source_record(
184
+ self.srs_records_file,
185
+ self.object_type,
186
+ self.mapper.folio_client,
187
+ marc_record,
188
+ folio_rec,
189
+ legacy_ids,
190
+ file_def.discovery_suppressed,
191
+ )
192
+ self.mapper.migration_report.add_general_statistics(i18n.t("SRS records written to disk"))
193
+
194
+ def add_mapped_location_code_to_record(self, marc_record: Record, folio_rec: Dict):
195
+ location_code = next(
196
+ (
197
+ location["code"]
198
+ for location in self.mapper.folio_client.locations
199
+ if location["id"] == folio_rec["permanentLocationId"]
200
+ ),
201
+ None,
202
+ )
203
+ if "852" not in marc_record:
204
+ raise TransformationRecordFailedError(
205
+ "", "No 852 in record when storing new location code", ""
206
+ )
207
+ first_852 = marc_record.get_fields("852")[0]
208
+ first_852.delete_subfield("b")
209
+ while old_b := first_852.delete_subfield("b"):
210
+ first_852.add_subfield("x", old_b, 0)
211
+ self.mapper.migration_report.add(
212
+ "LocationMapping", i18n.t("Additional 852$b was moved to 852$x")
213
+ )
214
+ first_852.add_subfield("b", location_code, 0)
215
+ self.mapper.migration_report.add(
216
+ "LocationMapping", i18n.t("Set 852 to FOLIO location code")
217
+ )
218
+
219
+ def exit_on_too_many_exceptions(self):
220
+ if (
221
+ self.failed_records_count / (self.records_count + 1)
222
+ > (self.mapper.library_configuration.failed_percentage_threshold / 100)
223
+ and self.failed_records_count
224
+ > self.mapper.library_configuration.failed_records_threshold
225
+ ):
226
+ logging.critical("More than 20 percent of the records have failed. Halting")
227
+ sys.exit(1)
228
+
229
+ @staticmethod
230
+ def get_valid_folio_record_ids(
231
+ legacy_ids: List[str], folio_record_identifiers: Set[str], migration_report: MigrationReport
232
+ ) -> List[str]:
233
+ new_ids: Set[str] = set()
234
+ for legacy_id in legacy_ids:
235
+ if legacy_id not in folio_record_identifiers:
236
+ new_ids.add(legacy_id)
237
+ else:
238
+ migration_report.add_general_statistics(
239
+ i18n.t("Duplicate MARC record identifiers ")
240
+ )
241
+ if not any(new_ids):
242
+ s = i18n.t("Failed records. No unique record identifiers in legacy record")
243
+ migration_report.add_general_statistics(s)
244
+ raise TransformationRecordFailedError(
245
+ "-".join(legacy_ids),
246
+ "Duplicate recod identifier(s). See logs. Record Failed",
247
+ "-".join(legacy_ids),
248
+ )
249
+ return list(new_ids)
250
+
251
+ def wrap_up(self):
252
+ """Finalizes the mapping by writing things out."""
253
+ logging.info(
254
+ "Saving map of %s old and new IDs to %s",
255
+ len(self.mapper.id_map),
256
+ self.folder_structure.id_map_path,
257
+ )
258
+ self.mapper.save_id_map_file(self.folder_structure.id_map_path, self.mapper.id_map)
259
+ logging.info("%s records processed", self.records_count)
260
+ with open(self.folder_structure.migration_reports_file, "w+") as report_file:
261
+ self.mapper.migration_report.write_migration_report(
262
+ i18n.t("MFHD records transformation report"),
263
+ report_file,
264
+ self.mapper.start_datetime,
265
+ )
266
+ Helper.print_mapping_report(
267
+ report_file,
268
+ self.mapper.parsed_records,
269
+ self.mapper.mapped_folio_fields,
270
+ self.mapper.mapped_legacy_fields,
271
+ )
272
+ if hasattr(self, "srs_records_file"):
273
+ self.srs_records_file.seek(0)
274
+ if not self.srs_records_file.seek(0):
275
+ os.remove(self.srs_records_file.name)
276
+ self.srs_records_file.close()
277
+ if hasattr(self, "data_import_marc_file"):
278
+ self.data_import_marc_file.seek(0)
279
+ if not self.data_import_marc_file.read(1):
280
+ os.remove(self.data_import_marc_file.name)
281
+ self.data_import_marc_file.close()
282
+ self.mapper.wrap_up()
283
+
284
+ logging.info("Transformation report written to %s", report_file.name)
285
+ logging.info("Processor is done.")
286
+
287
+ def add_legacy_ids_to_map(self, folio_rec: Dict, filtered_legacy_ids: List[str]):
288
+ for legacy_id in filtered_legacy_ids:
289
+ self.legacy_ids.add(legacy_id)
290
+ if legacy_id not in self.mapper.id_map:
291
+ self.mapper.id_map[legacy_id] = self.mapper.get_id_map_tuple(
292
+ legacy_id, folio_rec, self.object_type
293
+ )
294
+
295
+ else:
296
+ raise TransformationRecordFailedError(
297
+ legacy_id,
298
+ "Legacy ID already added to Legacy Id map.",
299
+ ",".join(filtered_legacy_ids),
300
+ )
@@ -0,0 +1,136 @@
1
+ import logging
2
+ import sys
3
+ from io import IOBase
4
+ from pathlib import Path
5
+
6
+ import i18n
7
+ from pymarc import Leader, MARCReader, Record
8
+
9
+ from folio_migration_tools.custom_exceptions import (
10
+ TransformationProcessError,
11
+ TransformationRecordFailedError,
12
+ )
13
+ from folio_migration_tools.folder_structure import FolderStructure
14
+ from folio_migration_tools.library_configuration import FileDefinition
15
+ from folio_migration_tools.marc_rules_transformation.marc_file_processor import (
16
+ MarcFileProcessor,
17
+ )
18
+ from folio_migration_tools.migration_report import MigrationReport
19
+
20
+
21
+ class MARCReaderWrapper:
22
+ @staticmethod
23
+ def process_single_file(
24
+ file_def: FileDefinition,
25
+ processor,
26
+ failed_records_path: Path,
27
+ folder_structure: FolderStructure,
28
+ ):
29
+ try:
30
+ with open(failed_records_path, "ab") as failed_marc_records_file:
31
+ with open(
32
+ folder_structure.legacy_records_folder / file_def.file_name,
33
+ "rb",
34
+ ) as marc_file:
35
+ reader = MARCReader(marc_file, to_unicode=True, permissive=True)
36
+ reader.hide_utf8_warnings = True
37
+ reader.force_utf8 = False
38
+ logging.info("Running %s", file_def.file_name)
39
+ MARCReaderWrapper.read_records(
40
+ reader, file_def, failed_marc_records_file, processor
41
+ )
42
+ except TransformationProcessError as tpe:
43
+ logging.critical(tpe)
44
+ sys.exit(1)
45
+ except Exception:
46
+ logging.exception("Failure in Main: %s", file_def.file_name, stack_info=True)
47
+
48
+ @staticmethod
49
+ def read_records(
50
+ reader,
51
+ source_file: FileDefinition,
52
+ failed_records_file: IOBase,
53
+ processor: MarcFileProcessor,
54
+ ):
55
+ for idx, record in enumerate(reader):
56
+ processor.mapper.migration_report.add_general_statistics(
57
+ i18n.t("Records in file before parsing")
58
+ )
59
+ try:
60
+ # None = Something bad happened
61
+ if record is None:
62
+ report_failed_parsing(
63
+ reader,
64
+ source_file,
65
+ failed_records_file,
66
+ idx,
67
+ processor.mapper.migration_report,
68
+ )
69
+ # The normal case
70
+ else:
71
+ MARCReaderWrapper.set_leader(record, processor.mapper.migration_report)
72
+ processor.mapper.migration_report.add_general_statistics(
73
+ i18n.t("Records successfully decoded from MARC21"),
74
+ )
75
+ processor.process_record(idx, record, source_file)
76
+ except TransformationRecordFailedError as error:
77
+ error.log_it()
78
+ processor.mapper.migration_report.add_general_statistics(
79
+ i18n.t("Records that failed transformation. Check log for details"),
80
+ )
81
+ except ValueError as error:
82
+ logging.error(error)
83
+ logging.info("Done reading %s records from file", idx + 1)
84
+
85
+ @staticmethod
86
+ def set_leader(marc_record: Record, migration_report: MigrationReport):
87
+ if marc_record.leader[9] != "a":
88
+ migration_report.add(
89
+ "LeaderManipulation",
90
+ i18n.t(
91
+ "Set leader 09 (Character coding scheme) from %{field} to a",
92
+ field=marc_record.leader[9],
93
+ ),
94
+ )
95
+ marc_record.leader = Leader(f"{marc_record.leader[:9]}a{marc_record.leader[10:]}")
96
+
97
+ if not str(marc_record.leader).endswith("4500"):
98
+ migration_report.add(
99
+ "LeaderManipulation",
100
+ i18n.t("Set leader 20-23 from %{field} to 4500", field=marc_record.leader[-4:]),
101
+ )
102
+ marc_record.leader = Leader(f"{marc_record.leader[:-4]}4500")
103
+
104
+ if marc_record.leader[10] != "2":
105
+ migration_report.add(
106
+ "LeaderManipulation",
107
+ i18n.t(
108
+ "Set leader 10 (Indicator count) from %{field} to 2",
109
+ field=marc_record.leader[10],
110
+ ),
111
+ )
112
+ marc_record.leader = Leader(f"{marc_record.leader[:10]}2{marc_record.leader[11:]}")
113
+
114
+ if marc_record.leader[11] != "2":
115
+ migration_report.add(
116
+ "LeaderManipulation",
117
+ i18n.t(
118
+ "Set leader 11 (Subfield code count) from %{record} to 2",
119
+ record=marc_record.leader[11],
120
+ ),
121
+ )
122
+ marc_record.leader = Leader(f"{marc_record.leader[:11]}2{marc_record.leader[12:]}")
123
+
124
+
125
+ def report_failed_parsing(
126
+ reader, source_file, failed_bibs_file, idx, migration_report: MigrationReport
127
+ ):
128
+ migration_report.add_general_statistics(
129
+ i18n.t("Records with encoding errors - parsing failed"),
130
+ )
131
+ failed_bibs_file.write(reader.current_chunk)
132
+ raise TransformationRecordFailedError(
133
+ f"Index in {source_file.file_name}:{idx}",
134
+ f"MARC parsing error: {reader.current_exception}",
135
+ "Failed records stored in results/failed_bib_records.mrc",
136
+ )
@@ -0,0 +1,241 @@
1
+ """The default mapper, responsible for parsing MARC21 records acording to the
2
+ FOLIO community specifications"""
3
+ import logging
4
+ import re
5
+ import time
6
+ import uuid
7
+ from typing import List
8
+
9
+ import i18n
10
+ import pymarc
11
+ from folio_uuid.folio_namespaces import FOLIONamespaces
12
+ from folio_uuid.folio_uuid import FolioUUID
13
+ from folioclient import FolioClient
14
+ from pymarc import Leader, Record
15
+
16
+ from folio_migration_tools.custom_exceptions import TransformationProcessError
17
+ from folio_migration_tools.helper import Helper
18
+ from folio_migration_tools.library_configuration import (
19
+ FileDefinition,
20
+ IlsFlavour,
21
+ LibraryConfiguration,
22
+ )
23
+ from folio_migration_tools.marc_rules_transformation.conditions import Conditions
24
+ from folio_migration_tools.marc_rules_transformation.hrid_handler import HRIDHandler
25
+ from folio_migration_tools.marc_rules_transformation.rules_mapper_base import (
26
+ RulesMapperBase,
27
+ )
28
+
29
+
30
+ class AuthorityMapper(RulesMapperBase):
31
+ non_repatable_fields = [
32
+ "100",
33
+ "110",
34
+ "111",
35
+ "130",
36
+ "147",
37
+ "148",
38
+ "150",
39
+ "151",
40
+ "155",
41
+ "162",
42
+ "180",
43
+ "181",
44
+ "182",
45
+ "185",
46
+ "378",
47
+ "384",
48
+ ]
49
+ """_summary_
50
+
51
+ Args:
52
+ RulesMapperBase (_type_): _description_
53
+ """
54
+
55
+ def __init__(
56
+ self,
57
+ folio_client,
58
+ library_configuration: LibraryConfiguration,
59
+ task_configuration,
60
+ ):
61
+ super().__init__(
62
+ folio_client,
63
+ library_configuration,
64
+ task_configuration,
65
+ None,
66
+ self.get_authority_json_schema(folio_client, library_configuration),
67
+ Conditions(folio_client, self, "auth", library_configuration.folio_release),
68
+ )
69
+ self.srs_recs: list = []
70
+ logging.info("Fetching mapping rules from the tenant")
71
+ rules_endpoint = "/mapping-rules/marc-authority"
72
+ self.mappings = self.folio_client.folio_get_single_object(rules_endpoint)
73
+ self.source_file_mapping: dict = {}
74
+ self.setup_source_file_mapping()
75
+ self.start = time.time()
76
+
77
+ def get_legacy_ids(self, marc_record: Record, idx: int) -> List[str]:
78
+ ils_flavour: IlsFlavour = self.task_configuration.ils_flavour
79
+ if ils_flavour in {IlsFlavour.sierra, IlsFlavour.millennium}:
80
+ raise TransformationProcessError("", f"ILS {ils_flavour} not configured")
81
+ elif ils_flavour == IlsFlavour.tag907y:
82
+ return RulesMapperBase.get_bib_id_from_907y(marc_record, idx)
83
+ elif ils_flavour == IlsFlavour.tagf990a:
84
+ return RulesMapperBase.get_bib_id_from_990a(marc_record, idx)
85
+ elif ils_flavour == IlsFlavour.aleph:
86
+ raise TransformationProcessError("", f"ILS {ils_flavour} not configured")
87
+ elif ils_flavour in {IlsFlavour.voyager, "voyager", IlsFlavour.tag001}:
88
+ return RulesMapperBase.get_bib_id_from_001(marc_record, idx)
89
+ elif ils_flavour == IlsFlavour.koha:
90
+ raise TransformationProcessError("", f"ILS {ils_flavour} not configured")
91
+ elif ils_flavour == IlsFlavour.none:
92
+ return [str(uuid.uuid4())]
93
+ else:
94
+ raise TransformationProcessError("", f"ILS {ils_flavour} not configured")
95
+
96
+ def parse_record(
97
+ self, marc_record: pymarc.Record, file_def: FileDefinition, legacy_ids: List[str]
98
+ ) -> list[dict]:
99
+ """Parses an auth recod into a FOLIO Authority object
100
+ This is the main function
101
+
102
+ Args:
103
+ legacy_ids (_type_): _description_
104
+ marc_record (Record): _description_
105
+ file_def (FileDefinition): _description_
106
+
107
+ Returns:
108
+ dict: _description_
109
+ """
110
+ self.print_progress()
111
+ ignored_subsequent_fields: set = set()
112
+ bad_tags = set(self.task_configuration.tags_to_delete) # "907"
113
+ folio_authority = self.perform_initial_preparation(marc_record, legacy_ids)
114
+ for marc_field in marc_record:
115
+ self.report_marc_stats(marc_field, bad_tags, legacy_ids, ignored_subsequent_fields)
116
+ if marc_field.tag not in ignored_subsequent_fields:
117
+ self.process_marc_field(
118
+ folio_authority,
119
+ marc_field,
120
+ ignored_subsequent_fields,
121
+ legacy_ids,
122
+ )
123
+
124
+ self.perform_additional_parsing(folio_authority)
125
+ clean_folio_authority = self.validate_required_properties(
126
+ "-".join(legacy_ids), folio_authority, self.schema, FOLIONamespaces.instances
127
+ )
128
+ self.dedupe_rec(clean_folio_authority)
129
+ marc_record.remove_fields(*list(bad_tags))
130
+ self.report_folio_mapping(clean_folio_authority, self.schema)
131
+ return [clean_folio_authority]
132
+
133
+ def perform_initial_preparation(self, marc_record: pymarc.Record, legacy_ids):
134
+ folio_authority = {}
135
+ folio_authority["id"] = str(
136
+ FolioUUID(
137
+ self.base_string_for_folio_uuid,
138
+ FOLIONamespaces.authorities,
139
+ str(legacy_ids[-1]),
140
+ )
141
+ )
142
+ HRIDHandler.handle_035_generation(
143
+ marc_record, legacy_ids, self.migration_report, False, False
144
+ )
145
+ self.map_source_file_and_natural_id(marc_record, folio_authority)
146
+ self.handle_leader_17(marc_record, legacy_ids)
147
+ return folio_authority
148
+
149
+ def map_source_file_and_natural_id(self, marc_record, folio_authority):
150
+ """Implement source file and natural ID mappings according to MODDICORE-283"""
151
+ match_prefix_patt = re.compile("^[A-Za-z]+")
152
+ natural_id = None
153
+ source_file_id = None
154
+ has_010 = marc_record.get("010")
155
+ if has_010 and (has_010a := has_010.get_subfields("a")):
156
+ for a_subfield in has_010a:
157
+ natural_id_prefix = match_prefix_patt.match(a_subfield)
158
+ if natural_id_prefix and (
159
+ source_file := self.source_file_mapping.get(natural_id_prefix.group(0), None)
160
+ ):
161
+ natural_id = "".join(a_subfield.split())
162
+ source_file_id = source_file["id"]
163
+ self.migration_report.add_general_statistics(
164
+ i18n.t("naturalId mapped from %{fro}", fro="010$a")
165
+ )
166
+ self.migration_report.add(
167
+ "AuthoritySourceFileMapping",
168
+ f"{source_file['name']} -- {natural_id_prefix.group(0)} -- 010$a",
169
+ number=1,
170
+ )
171
+ break
172
+ if not source_file_id:
173
+ natural_id = "".join(marc_record["001"].data.split())
174
+ self.migration_report.add_general_statistics(
175
+ i18n.t("naturalId mapped from %{fro}", fro="001")
176
+ )
177
+ natural_id_prefix = match_prefix_patt.match(natural_id)
178
+ if natural_id_prefix:
179
+ if source_file := self.source_file_mapping.get(natural_id_prefix.group(0), None):
180
+ source_file_id = source_file["id"]
181
+ self.migration_report.add(
182
+ "AuthoritySourceFileMapping",
183
+ f"{source_file['name']} -- {natural_id_prefix.group(0)} -- 001",
184
+ number=1,
185
+ )
186
+ folio_authority["naturalId"] = natural_id
187
+ if source_file_id:
188
+ folio_authority["sourceFileId"] = source_file_id
189
+
190
+ def setup_source_file_mapping(self):
191
+ if self.folio_client.authority_source_files:
192
+ logging.info(
193
+ f"{len(self.folio_client.authority_source_files)} \tAuthority source files"
194
+ )
195
+ for source_file in self.folio_client.authority_source_files:
196
+ for sf_code in source_file.get("codes", []):
197
+ self.source_file_mapping[sf_code] = source_file
198
+
199
+ def handle_leader_17(self, marc_record, legacy_ids):
200
+ leader_17 = marc_record.leader[17] or "Empty"
201
+ self.migration_report.add(
202
+ "AuthorityEncodingLevel", i18n.t("Original value") + f": {leader_17}"
203
+ )
204
+ if leader_17 not in ["n", "o"]:
205
+ Helper.log_data_issue(
206
+ legacy_ids,
207
+ f"LDR pos. 17 is '{leader_17}'. Is this correct? Value has been changed to 'n'.",
208
+ marc_record.leader,
209
+ )
210
+ marc_record.leader = Leader(f"{marc_record.leader[:17]}n{marc_record.leader[18:]}")
211
+ self.migration_report.add(
212
+ "AuthorityEncodingLevel", i18n.t("Changed %{a} to %{b}", a=leader_17, b="n")
213
+ )
214
+
215
+ def perform_additional_parsing(
216
+ self,
217
+ folio_authority: dict,
218
+ ) -> None:
219
+ """Do stuff not easily captured by the mapping rules
220
+
221
+ Args:
222
+ folio_authority (dict): _description_
223
+ """
224
+ folio_authority["source"] = "MARC"
225
+
226
+ def get_authority_json_schema(self, folio_client: FolioClient, library_configuration):
227
+ """Fetches the JSON Schema for autorities"""
228
+ if library_configuration.folio_release.name.lower()[0] < "p":
229
+ schema = folio_client.get_from_github(
230
+ "folio-org", "mod-inventory-storage", "/ramls/authorities/authority.json"
231
+ )
232
+ else:
233
+ schema = folio_client.get_from_github(
234
+ "folio-org",
235
+ "mod-entities-links",
236
+ "/src/main/resources/swagger.api/schemas/authority-storage/authorityDto.yaml",
237
+ )
238
+ return schema
239
+
240
+ def wrap_up(self):
241
+ logging.info("Mapper wrapping up")