folio-migration-tools 1.2.1__py3-none-any.whl → 1.9.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- folio_migration_tools/__init__.py +11 -0
- folio_migration_tools/__main__.py +169 -85
- folio_migration_tools/circulation_helper.py +96 -59
- folio_migration_tools/config_file_load.py +66 -0
- folio_migration_tools/custom_dict.py +6 -4
- folio_migration_tools/custom_exceptions.py +21 -19
- folio_migration_tools/extradata_writer.py +46 -0
- folio_migration_tools/folder_structure.py +63 -66
- folio_migration_tools/helper.py +29 -21
- folio_migration_tools/holdings_helper.py +57 -34
- folio_migration_tools/i18n_config.py +9 -0
- folio_migration_tools/library_configuration.py +173 -13
- folio_migration_tools/mapper_base.py +317 -106
- folio_migration_tools/mapping_file_transformation/courses_mapper.py +203 -0
- folio_migration_tools/mapping_file_transformation/holdings_mapper.py +83 -69
- folio_migration_tools/mapping_file_transformation/item_mapper.py +98 -94
- folio_migration_tools/mapping_file_transformation/manual_fee_fines_mapper.py +352 -0
- folio_migration_tools/mapping_file_transformation/mapping_file_mapper_base.py +702 -223
- folio_migration_tools/mapping_file_transformation/notes_mapper.py +90 -0
- folio_migration_tools/mapping_file_transformation/order_mapper.py +492 -0
- folio_migration_tools/mapping_file_transformation/organization_mapper.py +389 -0
- folio_migration_tools/mapping_file_transformation/ref_data_mapping.py +38 -27
- folio_migration_tools/mapping_file_transformation/user_mapper.py +149 -361
- folio_migration_tools/marc_rules_transformation/conditions.py +650 -246
- folio_migration_tools/marc_rules_transformation/holdings_statementsparser.py +292 -130
- folio_migration_tools/marc_rules_transformation/hrid_handler.py +244 -0
- folio_migration_tools/marc_rules_transformation/loc_language_codes.xml +20846 -0
- folio_migration_tools/marc_rules_transformation/marc_file_processor.py +300 -0
- folio_migration_tools/marc_rules_transformation/marc_reader_wrapper.py +136 -0
- folio_migration_tools/marc_rules_transformation/rules_mapper_authorities.py +241 -0
- folio_migration_tools/marc_rules_transformation/rules_mapper_base.py +681 -201
- folio_migration_tools/marc_rules_transformation/rules_mapper_bibs.py +395 -429
- folio_migration_tools/marc_rules_transformation/rules_mapper_holdings.py +531 -100
- folio_migration_tools/migration_report.py +85 -38
- folio_migration_tools/migration_tasks/__init__.py +1 -3
- folio_migration_tools/migration_tasks/authority_transformer.py +119 -0
- folio_migration_tools/migration_tasks/batch_poster.py +911 -198
- folio_migration_tools/migration_tasks/bibs_transformer.py +121 -116
- folio_migration_tools/migration_tasks/courses_migrator.py +192 -0
- folio_migration_tools/migration_tasks/holdings_csv_transformer.py +252 -247
- folio_migration_tools/migration_tasks/holdings_marc_transformer.py +321 -115
- folio_migration_tools/migration_tasks/items_transformer.py +264 -84
- folio_migration_tools/migration_tasks/loans_migrator.py +506 -195
- folio_migration_tools/migration_tasks/manual_fee_fines_transformer.py +187 -0
- folio_migration_tools/migration_tasks/migration_task_base.py +364 -74
- folio_migration_tools/migration_tasks/orders_transformer.py +373 -0
- folio_migration_tools/migration_tasks/organization_transformer.py +451 -0
- folio_migration_tools/migration_tasks/requests_migrator.py +130 -62
- folio_migration_tools/migration_tasks/reserves_migrator.py +253 -0
- folio_migration_tools/migration_tasks/user_transformer.py +180 -139
- folio_migration_tools/task_configuration.py +46 -0
- folio_migration_tools/test_infrastructure/__init__.py +0 -0
- folio_migration_tools/test_infrastructure/mocked_classes.py +406 -0
- folio_migration_tools/transaction_migration/legacy_loan.py +148 -34
- folio_migration_tools/transaction_migration/legacy_request.py +65 -25
- folio_migration_tools/transaction_migration/legacy_reserve.py +47 -0
- folio_migration_tools/transaction_migration/transaction_result.py +12 -1
- folio_migration_tools/translations/en.json +476 -0
- folio_migration_tools-1.9.10.dist-info/METADATA +169 -0
- folio_migration_tools-1.9.10.dist-info/RECORD +67 -0
- {folio_migration_tools-1.2.1.dist-info → folio_migration_tools-1.9.10.dist-info}/WHEEL +1 -2
- folio_migration_tools-1.9.10.dist-info/entry_points.txt +3 -0
- folio_migration_tools/generate_schemas.py +0 -46
- folio_migration_tools/mapping_file_transformation/mapping_file_mapping_base_impl.py +0 -44
- folio_migration_tools/mapping_file_transformation/user_mapper_base.py +0 -212
- folio_migration_tools/marc_rules_transformation/bibs_processor.py +0 -163
- folio_migration_tools/marc_rules_transformation/holdings_processor.py +0 -284
- folio_migration_tools/report_blurbs.py +0 -219
- folio_migration_tools/transaction_migration/legacy_fee_fine.py +0 -36
- folio_migration_tools-1.2.1.dist-info/METADATA +0 -134
- folio_migration_tools-1.2.1.dist-info/RECORD +0 -50
- folio_migration_tools-1.2.1.dist-info/top_level.txt +0 -1
- {folio_migration_tools-1.2.1.dist-info → folio_migration_tools-1.9.10.dist-info/licenses}/LICENSE +0 -0
|
@@ -2,22 +2,31 @@ import datetime
|
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
4
|
import time
|
|
5
|
+
import urllib.parse
|
|
5
6
|
import uuid
|
|
7
|
+
from abc import abstractmethod
|
|
6
8
|
from textwrap import wrap
|
|
9
|
+
from typing import Dict, List, Tuple
|
|
7
10
|
|
|
11
|
+
import i18n
|
|
8
12
|
import pymarc
|
|
13
|
+
from dateutil.parser import parse
|
|
9
14
|
from folio_uuid.folio_uuid import FOLIONamespaces, FolioUUID
|
|
10
15
|
from folioclient import FolioClient
|
|
16
|
+
from pymarc import Field, Optional, Record, Subfield
|
|
17
|
+
|
|
11
18
|
from folio_migration_tools.custom_exceptions import (
|
|
12
19
|
TransformationFieldMappingError,
|
|
13
20
|
TransformationProcessError,
|
|
14
21
|
TransformationRecordFailedError,
|
|
15
22
|
)
|
|
16
23
|
from folio_migration_tools.helper import Helper
|
|
17
|
-
from folio_migration_tools.library_configuration import
|
|
24
|
+
from folio_migration_tools.library_configuration import (
|
|
25
|
+
FileDefinition,
|
|
26
|
+
LibraryConfiguration,
|
|
27
|
+
)
|
|
18
28
|
from folio_migration_tools.mapper_base import MapperBase
|
|
19
|
-
from folio_migration_tools.
|
|
20
|
-
from pymarc import Field, Record, Leader
|
|
29
|
+
from folio_migration_tools.marc_rules_transformation.hrid_handler import HRIDHandler
|
|
21
30
|
|
|
22
31
|
|
|
23
32
|
class RulesMapperBase(MapperBase):
|
|
@@ -25,23 +34,37 @@ class RulesMapperBase(MapperBase):
|
|
|
25
34
|
self,
|
|
26
35
|
folio_client: FolioClient,
|
|
27
36
|
library_configuration: LibraryConfiguration,
|
|
37
|
+
task_configuration,
|
|
38
|
+
statistical_codes_map: Optional[Dict],
|
|
39
|
+
schema: dict,
|
|
28
40
|
conditions=None,
|
|
41
|
+
parent_id_map: dict[str, tuple] = None,
|
|
29
42
|
):
|
|
30
|
-
super().__init__(library_configuration, folio_client)
|
|
43
|
+
super().__init__(library_configuration, task_configuration, folio_client, parent_id_map)
|
|
31
44
|
self.parsed_records = 0
|
|
45
|
+
self.id_map: dict[str, tuple] = {}
|
|
32
46
|
self.start = time.time()
|
|
33
47
|
self.last_batch_time = time.time()
|
|
34
48
|
self.folio_client: FolioClient = folio_client
|
|
35
|
-
self.
|
|
36
|
-
self.instance_json_schema = self.get_instance_schema()
|
|
37
|
-
self.schema = {}
|
|
49
|
+
self.schema: dict = schema
|
|
38
50
|
self.conditions = conditions
|
|
39
51
|
self.item_json_schema = ""
|
|
40
|
-
self.mappings = {}
|
|
52
|
+
self.mappings: dict = {}
|
|
41
53
|
self.schema_properties = None
|
|
54
|
+
self.create_source_records = all(
|
|
55
|
+
[self.task_configuration.create_source_records, (not getattr(self.task_configuration, "data_import_marc", False))]
|
|
56
|
+
)
|
|
57
|
+
if hasattr(self.task_configuration, "hrid_handling"):
|
|
58
|
+
self.hrid_handler = HRIDHandler(
|
|
59
|
+
folio_client,
|
|
60
|
+
self.task_configuration.hrid_handling,
|
|
61
|
+
self.migration_report,
|
|
62
|
+
self.task_configuration.deactivate035_from001,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
self.setup_statistical_codes_map(statistical_codes_map)
|
|
42
66
|
logging.info("Current user id is %s", self.folio_client.current_user)
|
|
43
67
|
|
|
44
|
-
# TODO: Rebuild and move
|
|
45
68
|
def print_progress(self):
|
|
46
69
|
self.parsed_records += 1
|
|
47
70
|
num_recs = 5000
|
|
@@ -56,11 +79,17 @@ class RulesMapperBase(MapperBase):
|
|
|
56
79
|
)
|
|
57
80
|
self.last_batch_time = time.time()
|
|
58
81
|
|
|
82
|
+
@abstractmethod
|
|
83
|
+
def get_legacy_ids(self, marc_record: Record, idx: int):
|
|
84
|
+
raise NotImplementedError()
|
|
85
|
+
|
|
59
86
|
@staticmethod
|
|
60
|
-
def dedupe_rec(rec):
|
|
87
|
+
def dedupe_rec(rec, props_to_not_dedupe=None):
|
|
88
|
+
if props_to_not_dedupe is None:
|
|
89
|
+
props_to_not_dedupe = []
|
|
61
90
|
# remove duplicates
|
|
62
91
|
for key, value in rec.items():
|
|
63
|
-
if isinstance(value, list):
|
|
92
|
+
if key not in props_to_not_dedupe and isinstance(value, list):
|
|
64
93
|
res = []
|
|
65
94
|
for v in value:
|
|
66
95
|
if v not in res:
|
|
@@ -71,27 +100,25 @@ class RulesMapperBase(MapperBase):
|
|
|
71
100
|
self, marc_field: pymarc.Field, mappings, folio_record, legacy_ids
|
|
72
101
|
):
|
|
73
102
|
for mapping in mappings:
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
mapping, marc_field, folio_record, legacy_ids
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
103
|
+
try:
|
|
104
|
+
if "entity" not in mapping:
|
|
105
|
+
self.handle_normal_mapping(mapping, marc_field, folio_record, legacy_ids)
|
|
106
|
+
else:
|
|
107
|
+
self.handle_entity_mapping(
|
|
108
|
+
marc_field,
|
|
109
|
+
mapping,
|
|
110
|
+
folio_record,
|
|
111
|
+
legacy_ids,
|
|
112
|
+
)
|
|
113
|
+
except TransformationFieldMappingError as tre:
|
|
114
|
+
tre.log_it()
|
|
85
115
|
|
|
86
|
-
def handle_normal_mapping(
|
|
87
|
-
self, mapping, marc_field: pymarc.Field, folio_record, legacy_ids
|
|
88
|
-
):
|
|
116
|
+
def handle_normal_mapping(self, mapping, marc_field: pymarc.Field, folio_record, legacy_ids):
|
|
89
117
|
target = mapping["target"]
|
|
90
118
|
if mapping.get("ignoreSubsequentSubfields", False):
|
|
91
119
|
marc_field = self.remove_repeated_subfields(marc_field)
|
|
92
120
|
if has_conditions(mapping):
|
|
93
121
|
values = self.apply_rules(marc_field, mapping, legacy_ids)
|
|
94
|
-
# TODO: add condition to customize this hardcoded thing
|
|
95
122
|
if marc_field.tag == "655":
|
|
96
123
|
values[0] = f"Genre: {values[0]}"
|
|
97
124
|
self.add_value_to_target(folio_record, target, values)
|
|
@@ -108,7 +135,10 @@ class RulesMapperBase(MapperBase):
|
|
|
108
135
|
# Adding stuff without rules/Conditions.
|
|
109
136
|
# Might need more complex mapping for arrays etc
|
|
110
137
|
if any(mapping["subfield"]):
|
|
111
|
-
|
|
138
|
+
values = self.handle_sub_field_delimiters(
|
|
139
|
+
",".join(legacy_ids), mapping, marc_field
|
|
140
|
+
)
|
|
141
|
+
value = " ".join(values)
|
|
112
142
|
else:
|
|
113
143
|
value = marc_field.format_field() if marc_field else ""
|
|
114
144
|
self.add_value_to_target(folio_record, target, [value])
|
|
@@ -118,7 +148,8 @@ class RulesMapperBase(MapperBase):
|
|
|
118
148
|
try:
|
|
119
149
|
f005 = marc_record["005"].data[:14]
|
|
120
150
|
parsed_date = datetime.datetime.strptime(f005, "%Y%m%d%H%M%S").isoformat()
|
|
121
|
-
|
|
151
|
+
if "metadata" in folio_object:
|
|
152
|
+
folio_object["metadata"]["updatedDate"] = parsed_date
|
|
122
153
|
except Exception as exception:
|
|
123
154
|
if "005" in marc_record:
|
|
124
155
|
Helper.log_data_issue(
|
|
@@ -127,6 +158,12 @@ class RulesMapperBase(MapperBase):
|
|
|
127
158
|
marc_record["005"].data,
|
|
128
159
|
)
|
|
129
160
|
|
|
161
|
+
@abstractmethod
|
|
162
|
+
def parse_record(
|
|
163
|
+
self, marc_record: Record, file_def: FileDefinition, legacy_ids: List[str]
|
|
164
|
+
) -> list[dict]:
|
|
165
|
+
raise NotImplementedError()
|
|
166
|
+
|
|
130
167
|
@staticmethod
|
|
131
168
|
def use_008_for_dates(marc_record: Record, folio_object: dict, legacy_ids):
|
|
132
169
|
try:
|
|
@@ -143,6 +180,56 @@ class RulesMapperBase(MapperBase):
|
|
|
143
180
|
marc_record["008"].data,
|
|
144
181
|
)
|
|
145
182
|
|
|
183
|
+
def handle_sub_field_delimiters(
|
|
184
|
+
self,
|
|
185
|
+
legacy_id: str,
|
|
186
|
+
mapping,
|
|
187
|
+
marc_field: pymarc.Field,
|
|
188
|
+
condition_types: List[str] = None,
|
|
189
|
+
parameter: dict = None,
|
|
190
|
+
):
|
|
191
|
+
values: List[str] = []
|
|
192
|
+
if mapping.get("subfield") and (custom_delimiters := mapping.get("subFieldDelimiter")):
|
|
193
|
+
delimiter_map = {sub_f: " " for sub_f in mapping.get("subfield")}
|
|
194
|
+
for custom_delimiter in custom_delimiters:
|
|
195
|
+
delimiter_map.update(
|
|
196
|
+
{sub_f: custom_delimiter["value"] for sub_f in custom_delimiter["subfields"]}
|
|
197
|
+
)
|
|
198
|
+
custom_delimited_strings: List[Tuple[str, List[str]]] = []
|
|
199
|
+
subfields = mapping.get("subfield")
|
|
200
|
+
for custom_delimiter in custom_delimiters:
|
|
201
|
+
subfields_for_delimiter = [
|
|
202
|
+
sub_f
|
|
203
|
+
for sub_f in subfields
|
|
204
|
+
if custom_delimiter["subfields"]
|
|
205
|
+
and delimiter_map[sub_f] == custom_delimiter["value"]
|
|
206
|
+
]
|
|
207
|
+
subfield_collection: Tuple[str, List[str]] = (custom_delimiter["value"], [])
|
|
208
|
+
subfield_collection[1].extend(marc_field.get_subfields(*subfields_for_delimiter))
|
|
209
|
+
custom_delimited_strings.append(subfield_collection)
|
|
210
|
+
for custom_delimited_string in custom_delimited_strings:
|
|
211
|
+
if mapping.get("applyRulesOnConcatenatedData", ""):
|
|
212
|
+
values.extend(custom_delimited_string[1])
|
|
213
|
+
else:
|
|
214
|
+
values.extend(
|
|
215
|
+
dict.fromkeys(
|
|
216
|
+
[
|
|
217
|
+
self.apply_rule(
|
|
218
|
+
legacy_id,
|
|
219
|
+
x,
|
|
220
|
+
condition_types or [],
|
|
221
|
+
marc_field,
|
|
222
|
+
parameter or {},
|
|
223
|
+
)
|
|
224
|
+
for x in custom_delimited_string[1]
|
|
225
|
+
]
|
|
226
|
+
)
|
|
227
|
+
)
|
|
228
|
+
values = [custom_delimited_string[0].join(values)]
|
|
229
|
+
elif mapping.get("subfield", []):
|
|
230
|
+
values.extend(marc_field.get_subfields(*mapping["subfield"]))
|
|
231
|
+
return values
|
|
232
|
+
|
|
146
233
|
def get_value_from_condition(
|
|
147
234
|
self,
|
|
148
235
|
legacy_id,
|
|
@@ -152,32 +239,136 @@ class RulesMapperBase(MapperBase):
|
|
|
152
239
|
stripped_conds = mapping["rules"][0]["conditions"][0]["type"].split(",")
|
|
153
240
|
condition_types = list(map(str.strip, stripped_conds))
|
|
154
241
|
parameter = mapping["rules"][0]["conditions"][0].get("parameter", {})
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
242
|
+
values: List[str] = []
|
|
243
|
+
if mapping.get("subfield"):
|
|
244
|
+
values.extend(
|
|
245
|
+
self.handle_sub_field_delimiters(
|
|
246
|
+
legacy_id, mapping, marc_field, condition_types, parameter
|
|
247
|
+
)
|
|
248
|
+
)
|
|
249
|
+
else:
|
|
250
|
+
values.append(marc_field.format_field() if marc_field else "")
|
|
251
|
+
|
|
252
|
+
if not mapping.get("applyRulesOnConcatenatedData", "") and mapping.get("subfield", []):
|
|
253
|
+
return " ".join(
|
|
254
|
+
dict.fromkeys(
|
|
255
|
+
[
|
|
256
|
+
self.apply_rule(legacy_id, x, condition_types, marc_field, parameter)
|
|
257
|
+
for x in values
|
|
258
|
+
]
|
|
259
|
+
)
|
|
159
260
|
)
|
|
160
|
-
elif mapping.get("subfield", []):
|
|
161
|
-
subfields = marc_field.get_subfields(*mapping["subfield"])
|
|
162
|
-
x = [
|
|
163
|
-
self.apply_rule(legacy_id, x, condition_types, marc_field, parameter)
|
|
164
|
-
for x in subfields
|
|
165
|
-
]
|
|
166
|
-
return " ".join(set(x))
|
|
167
261
|
else:
|
|
168
|
-
value1 = marc_field.format_field() if marc_field else ""
|
|
169
262
|
return self.apply_rule(
|
|
170
|
-
legacy_id,
|
|
263
|
+
legacy_id, " ".join(values), condition_types, marc_field, parameter
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
def process_marc_field(
|
|
267
|
+
self,
|
|
268
|
+
folio_record: dict,
|
|
269
|
+
marc_field: Field,
|
|
270
|
+
ignored_subsequent_fields,
|
|
271
|
+
legacy_ids,
|
|
272
|
+
):
|
|
273
|
+
if marc_field.tag == "880":
|
|
274
|
+
mappings = self.perform_proxy_mapping(marc_field)
|
|
275
|
+
else:
|
|
276
|
+
tags_to_ignore = {"880", "001", "008"}
|
|
277
|
+
mappings = (
|
|
278
|
+
self.mappings.get(marc_field.tag, {})
|
|
279
|
+
if marc_field.tag not in tags_to_ignore
|
|
280
|
+
else []
|
|
281
|
+
)
|
|
282
|
+
if mappings:
|
|
283
|
+
try:
|
|
284
|
+
self.map_field_according_to_mapping(marc_field, mappings, folio_record, legacy_ids)
|
|
285
|
+
if any(m.get("ignoreSubsequentFields", False) for m in mappings):
|
|
286
|
+
ignored_subsequent_fields.add(marc_field.tag)
|
|
287
|
+
except Exception as ee:
|
|
288
|
+
logging.error(
|
|
289
|
+
"map_field_according_to_mapping %s %s %s",
|
|
290
|
+
marc_field.tag,
|
|
291
|
+
marc_field.format_field(),
|
|
292
|
+
json.dumps(mappings),
|
|
293
|
+
)
|
|
294
|
+
raise ee
|
|
295
|
+
|
|
296
|
+
def perform_proxy_mapping(self, marc_field):
|
|
297
|
+
proxy_mapping = next(iter(self.mappings.get("880", [])), [])
|
|
298
|
+
if "6" not in marc_field:
|
|
299
|
+
self.migration_report.add("Field880Mappings", i18n.t("Records without $6"))
|
|
300
|
+
return None
|
|
301
|
+
if not proxy_mapping or not proxy_mapping.get("fieldReplacementBy3Digits", False):
|
|
302
|
+
return None
|
|
303
|
+
if not marc_field["6"][:3] or len(marc_field["6"][:3]) != 3:
|
|
304
|
+
self.migration_report.add(
|
|
305
|
+
"Field880Mappings", i18n.t("Records with unexpected length in $6")
|
|
306
|
+
)
|
|
307
|
+
return None
|
|
308
|
+
first_three = marc_field["6"][:3]
|
|
309
|
+
|
|
310
|
+
target_field = next(
|
|
311
|
+
(
|
|
312
|
+
r.get("targetField", "")
|
|
313
|
+
for r in proxy_mapping.get("fieldReplacementRule", [])
|
|
314
|
+
if r["sourceDigits"] == first_three
|
|
315
|
+
),
|
|
316
|
+
first_three,
|
|
317
|
+
)
|
|
318
|
+
self.migration_report.add(
|
|
319
|
+
"Field880Mappings",
|
|
320
|
+
i18n.t("Source digits")
|
|
321
|
+
+ f": {marc_field['6']} "
|
|
322
|
+
+ i18n.t("Target field")
|
|
323
|
+
+ f": {target_field}",
|
|
324
|
+
)
|
|
325
|
+
mappings = self.mappings.get(target_field, {})
|
|
326
|
+
if not mappings:
|
|
327
|
+
self.migration_report.add(
|
|
328
|
+
"Field880Mappings",
|
|
329
|
+
i18n.t("Mapping not set up for target field")
|
|
330
|
+
+ f": {target_field} ({marc_field['6']})",
|
|
171
331
|
)
|
|
332
|
+
return mappings
|
|
333
|
+
|
|
334
|
+
def report_marc_stats(
|
|
335
|
+
self, marc_field: Field, bad_tags, legacy_ids, ignored_subsequent_fields
|
|
336
|
+
):
|
|
337
|
+
self.migration_report.add("Trivia", i18n.t("Total number of Tags processed"))
|
|
338
|
+
self.report_source_and_links(marc_field)
|
|
339
|
+
self.report_bad_tags(marc_field, bad_tags, legacy_ids)
|
|
340
|
+
mapped = marc_field.tag in self.mappings
|
|
341
|
+
if marc_field.tag in ignored_subsequent_fields:
|
|
342
|
+
mapped = False
|
|
343
|
+
self.report_legacy_mapping(marc_field.tag, True, mapped)
|
|
344
|
+
|
|
345
|
+
def report_source_and_links(self, marc_field: Field):
|
|
346
|
+
if marc_field.is_control_field():
|
|
347
|
+
return
|
|
348
|
+
for subfield_2 in marc_field.get_subfields("2"):
|
|
349
|
+
self.migration_report.add(
|
|
350
|
+
"AuthoritySources",
|
|
351
|
+
i18n.t("Source of heading or term") + f": {subfield_2.split(' ')[0]}",
|
|
352
|
+
)
|
|
353
|
+
for subfield_0 in marc_field.get_subfields("0"):
|
|
354
|
+
code = ""
|
|
355
|
+
if "(" in subfield_0 and ")" in subfield_0:
|
|
356
|
+
code = subfield_0[subfield_0.find("(") + 1 : subfield_0.find(")")]
|
|
357
|
+
code = code.split(" ")[0]
|
|
358
|
+
elif url := urllib.parse.urlparse(subfield_0):
|
|
359
|
+
if url.hostname:
|
|
360
|
+
code = subfield_0[: subfield_0.find(url.path)]
|
|
361
|
+
if code:
|
|
362
|
+
self.migration_report.add(
|
|
363
|
+
"AuthoritySources", i18n.t("$0 base uri or source code") + f": {code}"
|
|
364
|
+
)
|
|
172
365
|
|
|
173
366
|
def apply_rules(self, marc_field: pymarc.Field, mapping, legacy_ids):
|
|
174
367
|
try:
|
|
175
368
|
values = []
|
|
176
369
|
value = ""
|
|
177
370
|
if has_conditions(mapping):
|
|
178
|
-
value = self.get_value_from_condition(
|
|
179
|
-
",".join(legacy_ids), mapping, marc_field
|
|
180
|
-
)
|
|
371
|
+
value = self.get_value_from_condition(",".join(legacy_ids), mapping, marc_field)
|
|
181
372
|
elif has_value_to_add(mapping):
|
|
182
373
|
value = mapping["rules"][0]["value"]
|
|
183
374
|
if value == "false":
|
|
@@ -186,31 +377,44 @@ class RulesMapperBase(MapperBase):
|
|
|
186
377
|
return [True]
|
|
187
378
|
else:
|
|
188
379
|
return [value]
|
|
189
|
-
elif not mapping.get("rules", []) or not mapping["rules"][0].get(
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
380
|
+
elif not mapping.get("rules", []) or not mapping["rules"][0].get("conditions", []):
|
|
381
|
+
values = self.handle_sub_field_delimiters(
|
|
382
|
+
",".join(legacy_ids), mapping, marc_field
|
|
383
|
+
)
|
|
384
|
+
value = " ".join(values)
|
|
193
385
|
values = wrap(value, 3) if mapping.get("subFieldSplit", "") else [value]
|
|
194
386
|
return values
|
|
195
387
|
except TransformationProcessError as trpe:
|
|
196
388
|
self.handle_transformation_process_error(self.parsed_records, trpe)
|
|
197
389
|
except TransformationFieldMappingError as fme:
|
|
198
|
-
self.migration_report.add(
|
|
199
|
-
fme.data_value =
|
|
390
|
+
self.migration_report.add("FieldMappingErrors", fme.message)
|
|
391
|
+
fme.data_value = (
|
|
392
|
+
f"{fme.data_value} MARCField: {marc_field} Mapping: {json.dumps(mapping)}"
|
|
393
|
+
)
|
|
200
394
|
fme.log_it()
|
|
201
395
|
return []
|
|
202
396
|
except TransformationRecordFailedError as trfe:
|
|
203
397
|
trfe.data_value = (
|
|
204
|
-
f"{trfe.data_value} MARCField: {marc_field} "
|
|
205
|
-
f"Mapping: {json.dumps(mapping)}"
|
|
398
|
+
f"{trfe.data_value} MARCField: {marc_field} Mapping: {json.dumps(mapping)}"
|
|
206
399
|
)
|
|
207
400
|
trfe.log_it()
|
|
208
401
|
self.migration_report.add_general_statistics(
|
|
209
|
-
"Records failed due to an error. See data issues log for details"
|
|
402
|
+
i18n.t("Records failed due to an error. See data issues log for details")
|
|
210
403
|
)
|
|
211
404
|
except Exception as exception:
|
|
212
405
|
self.handle_generic_exception(self.parsed_records, exception)
|
|
213
406
|
|
|
407
|
+
def report_bad_tags(self, marc_field, bad_tags, legacy_ids):
|
|
408
|
+
if (
|
|
409
|
+
(not marc_field.tag.isnumeric())
|
|
410
|
+
and marc_field.tag != "LDR"
|
|
411
|
+
and marc_field.tag not in bad_tags
|
|
412
|
+
):
|
|
413
|
+
self.migration_report.add("NonNumericTagsInRecord", marc_field.tag)
|
|
414
|
+
message = "Non-numeric tags in records"
|
|
415
|
+
Helper.log_data_issue(legacy_ids, message, marc_field.tag)
|
|
416
|
+
bad_tags.add(marc_field.tag)
|
|
417
|
+
|
|
214
418
|
def add_value_to_target(self, rec, target_string, value):
|
|
215
419
|
if not value:
|
|
216
420
|
return
|
|
@@ -227,9 +431,7 @@ class RulesMapperBase(MapperBase):
|
|
|
227
431
|
sc_prop = sc_prop[target] # set current property
|
|
228
432
|
else: # next level. take the properties from the items
|
|
229
433
|
sc_prop = schema_parent["items"]["properties"][target]
|
|
230
|
-
if
|
|
231
|
-
target not in rec and not schema_parent
|
|
232
|
-
): # have we added this already?
|
|
434
|
+
if target not in rec and not schema_parent: # have we added this already?
|
|
233
435
|
if is_array_of_strings(sc_prop):
|
|
234
436
|
rec[target] = []
|
|
235
437
|
# break
|
|
@@ -284,12 +486,29 @@ class RulesMapperBase(MapperBase):
|
|
|
284
486
|
|
|
285
487
|
def add_value_to_first_level_target(self, rec, target_string, value):
|
|
286
488
|
sch = self.schema["properties"]
|
|
287
|
-
|
|
489
|
+
if (
|
|
490
|
+
self.task_configuration.migration_task_type == "BibsTransformer"
|
|
491
|
+
and self.task_configuration.parse_cataloged_date
|
|
492
|
+
and target_string == "catalogedDate"
|
|
493
|
+
):
|
|
494
|
+
try:
|
|
495
|
+
value = [str(parse(value[0], fuzzy=True).date())]
|
|
496
|
+
except Exception as ee:
|
|
497
|
+
Helper.log_data_issue("", f"Could not parse catalogedDate: {ee}", value)
|
|
498
|
+
self.migration_report.add(
|
|
499
|
+
"FieldMappingErrors", i18n.t("Could not parse catalogedDate")
|
|
500
|
+
)
|
|
288
501
|
if not target_string or target_string not in sch:
|
|
289
|
-
raise
|
|
502
|
+
raise TransformationFieldMappingError(
|
|
503
|
+
"",
|
|
504
|
+
i18n.t("Target string '%{string}' not in Schema!", string=target_string)
|
|
505
|
+
+ i18n.t("Check mapping file against the schema.")
|
|
506
|
+
+ " "
|
|
507
|
+
+ i18n.t("Target type")
|
|
508
|
+
+ f": {sch.get(target_string,{}).get('type','')} "
|
|
509
|
+
+ i18n.t("Value")
|
|
510
|
+
+ f": {value}",
|
|
290
511
|
"",
|
|
291
|
-
f"Target string {target_string} not in Schema! Check mapping file against the schema."
|
|
292
|
-
f"Target type: {sch.get(target_string,{}).get('type','')} Value: {value}",
|
|
293
512
|
)
|
|
294
513
|
|
|
295
514
|
target_field = sch.get(target_string, {})
|
|
@@ -297,7 +516,6 @@ class RulesMapperBase(MapperBase):
|
|
|
297
516
|
target_field.get("type", "") == "array"
|
|
298
517
|
and target_field.get("items", {}).get("type", "") == "string"
|
|
299
518
|
):
|
|
300
|
-
|
|
301
519
|
if target_string not in rec:
|
|
302
520
|
rec[target_string] = value
|
|
303
521
|
else:
|
|
@@ -309,22 +527,81 @@ class RulesMapperBase(MapperBase):
|
|
|
309
527
|
else:
|
|
310
528
|
raise TransformationProcessError(
|
|
311
529
|
"",
|
|
312
|
-
|
|
530
|
+
(
|
|
531
|
+
f"Edge! Target string: {target_string} "
|
|
532
|
+
f"Target type: {sch.get(target_string,{}).get('type','')} Value: {value}"
|
|
533
|
+
),
|
|
313
534
|
)
|
|
314
535
|
|
|
536
|
+
def remove_from_id_map(self, former_ids: List[str]):
|
|
537
|
+
"""removes the ID from the map in case parsing failed
|
|
538
|
+
|
|
539
|
+
Args:
|
|
540
|
+
former_ids (_type_): _description_
|
|
541
|
+
"""
|
|
542
|
+
for former_id in [id for id in former_ids if id]:
|
|
543
|
+
if former_id in self.id_map:
|
|
544
|
+
del self.id_map[former_id]
|
|
545
|
+
|
|
315
546
|
def create_entity(
|
|
316
|
-
self, entity_mappings, marc_field, entity_parent_key, index_or_legacy_id
|
|
547
|
+
self, entity_mappings, marc_field: Field, entity_parent_key, index_or_legacy_id
|
|
317
548
|
):
|
|
318
549
|
entity = {}
|
|
550
|
+
parent_schema_prop = self.schema.get("properties", {}).get(entity_parent_key, {})
|
|
551
|
+
if parent_schema_prop.get("type", "") == "array":
|
|
552
|
+
req_entity_props = parent_schema_prop.get("items", {}).get("required", [])
|
|
553
|
+
elif parent_schema_prop.get("type", "") == "object":
|
|
554
|
+
req_entity_props = parent_schema_prop.get("required", [])
|
|
555
|
+
else:
|
|
556
|
+
req_entity_props = []
|
|
319
557
|
for entity_mapping in entity_mappings:
|
|
320
558
|
k = entity_mapping["target"].split(".")[-1]
|
|
321
|
-
if
|
|
322
|
-
marc_field,
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
559
|
+
if k == "authorityId" and (legacy_subfield_9 := marc_field.get("9")):
|
|
560
|
+
marc_field.add_subfield("0", legacy_subfield_9)
|
|
561
|
+
marc_field.delete_subfield("9")
|
|
562
|
+
if k == "authorityId" and (entity_subfields := entity_mapping.get("subfield", [])):
|
|
563
|
+
for subfield in entity_subfields:
|
|
564
|
+
if subfield != "9":
|
|
565
|
+
Helper.log_data_issue(
|
|
566
|
+
index_or_legacy_id,
|
|
567
|
+
f"authorityId mapping from ${subfield} is not supported. Data Import will fail. "
|
|
568
|
+
"Use only $9 for authority id mapping in MARC-to-Instance mapping rules.",
|
|
569
|
+
marc_field,
|
|
570
|
+
)
|
|
571
|
+
entity_mapping["subfield"] = ["9"]
|
|
572
|
+
if my_values := [
|
|
573
|
+
v
|
|
574
|
+
for v in self.apply_rules(marc_field, entity_mapping, index_or_legacy_id)
|
|
575
|
+
if v != ""
|
|
576
|
+
]:
|
|
577
|
+
if entity_parent_key != k:
|
|
578
|
+
entity[k] = my_values[0]
|
|
326
579
|
else:
|
|
327
|
-
entity
|
|
580
|
+
entity = my_values[0]
|
|
581
|
+
elif "alternativeMapping" in entity_mapping:
|
|
582
|
+
alt_mapping = entity_mapping["alternativeMapping"]
|
|
583
|
+
alt_k = alt_mapping["target"].split(".")[-1]
|
|
584
|
+
if alt_values := [
|
|
585
|
+
v
|
|
586
|
+
for v in self.apply_rules(marc_field, alt_mapping, index_or_legacy_id)
|
|
587
|
+
if v != ""
|
|
588
|
+
]:
|
|
589
|
+
if entity_parent_key != alt_k:
|
|
590
|
+
entity[alt_k] = alt_values[0]
|
|
591
|
+
else:
|
|
592
|
+
entity = alt_values[0]
|
|
593
|
+
missing_required_props = [
|
|
594
|
+
req_entity_prop
|
|
595
|
+
for req_entity_prop in req_entity_props
|
|
596
|
+
if req_entity_prop not in entity
|
|
597
|
+
]
|
|
598
|
+
if any(missing_required_props):
|
|
599
|
+
entity = {}
|
|
600
|
+
Helper.log_data_issue(
|
|
601
|
+
index_or_legacy_id,
|
|
602
|
+
f"Missing one or more required property in entity {entity_parent_key} ({missing_required_props})",
|
|
603
|
+
marc_field,
|
|
604
|
+
)
|
|
328
605
|
return entity
|
|
329
606
|
|
|
330
607
|
def handle_entity_mapping(
|
|
@@ -335,96 +612,111 @@ class RulesMapperBase(MapperBase):
|
|
|
335
612
|
legacy_ids,
|
|
336
613
|
):
|
|
337
614
|
entity_mapping = mapping["entity"]
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
615
|
+
if entity_indicators_match(entity_mapping, marc_field):
|
|
616
|
+
entity_mapping = [x for x in entity_mapping if "indicators" not in x]
|
|
617
|
+
e_parent = entity_mapping[0]["target"].split(".")[0]
|
|
618
|
+
if mapping.get("entityPerRepeatedSubfield", False):
|
|
619
|
+
for temp_field in self.grouped(marc_field):
|
|
620
|
+
entity = self.create_entity(entity_mapping, temp_field, e_parent, legacy_ids)
|
|
621
|
+
if entity and (
|
|
622
|
+
(isinstance(entity, dict) and all(entity.values()))
|
|
623
|
+
or (isinstance(entity, list) and all(entity))
|
|
624
|
+
):
|
|
625
|
+
self.add_entity_to_record(entity, e_parent, folio_record, self.schema)
|
|
626
|
+
else:
|
|
627
|
+
if mapping.get("ignoreSubsequentSubfields", False):
|
|
628
|
+
marc_field = self.remove_repeated_subfields(marc_field)
|
|
629
|
+
entity = self.create_entity(entity_mapping, marc_field, e_parent, legacy_ids)
|
|
630
|
+
if e_parent in ["precedingTitles", "succeedingTitles"]:
|
|
631
|
+
self.create_preceding_succeeding_titles(
|
|
632
|
+
entity, e_parent, folio_record["id"], marc_field
|
|
633
|
+
)
|
|
634
|
+
elif entity and (
|
|
635
|
+
all(
|
|
636
|
+
v
|
|
637
|
+
for k, v in entity.items()
|
|
638
|
+
if k not in ["staffOnly", "primary", "isbnValue", "issnValue"]
|
|
639
|
+
)
|
|
640
|
+
or e_parent in ["electronicAccess", "publication"]
|
|
641
|
+
or (
|
|
642
|
+
e_parent.startswith("holdingsStatements") and any(v for k, v in entity.items())
|
|
643
|
+
)
|
|
346
644
|
):
|
|
347
|
-
self.add_entity_to_record(
|
|
348
|
-
|
|
645
|
+
self.add_entity_to_record(entity, e_parent, folio_record, self.schema)
|
|
646
|
+
else:
|
|
647
|
+
sfs = " - ".join(
|
|
648
|
+
f"{f[0]}:{('has_value' if f[1].strip() else 'empty')}" for f in marc_field
|
|
349
649
|
)
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
650
|
+
pattern = " - ".join(f"{k}:'{bool(v)}'" for k, v in entity.items())
|
|
651
|
+
self.migration_report.add(
|
|
652
|
+
"IncompleteEntityMapping",
|
|
653
|
+
f"{marc_field.tag} {sfs} ->>-->> {e_parent} {pattern} ",
|
|
654
|
+
)
|
|
655
|
+
# Experimental
|
|
656
|
+
# self.add_entity_to_record(entity, e_parent, rec, self.schema)
|
|
657
|
+
|
|
658
|
+
def handle_suppression(
|
|
659
|
+
self, folio_record, file_def: FileDefinition, only_discovery_suppress: bool = False
|
|
660
|
+
):
|
|
661
|
+
folio_record["discoverySuppress"] = file_def.discovery_suppressed
|
|
662
|
+
self.migration_report.add(
|
|
663
|
+
"Suppression",
|
|
664
|
+
i18n.t("Suppressed from discovery") + f' = {folio_record["discoverySuppress"]}',
|
|
665
|
+
)
|
|
666
|
+
if not only_discovery_suppress:
|
|
667
|
+
folio_record["staffSuppress"] = file_def.staff_suppressed
|
|
668
|
+
self.migration_report.add(
|
|
669
|
+
"Suppression", i18n.t("Staff suppressed") + f' = {folio_record["staffSuppress"]} '
|
|
355
670
|
)
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
or e_parent in ["electronicAccess", "publication"]
|
|
373
|
-
or (
|
|
374
|
-
e_parent.startswith("holdingsStatements")
|
|
375
|
-
and any(v for k, v in entity.items())
|
|
376
|
-
)
|
|
377
|
-
):
|
|
378
|
-
self.add_entity_to_record(entity, e_parent, folio_record, self.schema)
|
|
671
|
+
|
|
672
|
+
def create_preceding_succeeding_titles(
|
|
673
|
+
self, entity, e_parent: str, identifier: str, marc_field: pymarc.Field
|
|
674
|
+
):
|
|
675
|
+
if title := entity.get("title"):
|
|
676
|
+
self.migration_report.add(
|
|
677
|
+
"PrecedingSuccedingTitles", f"{e_parent} " + i18n.t("created")
|
|
678
|
+
)
|
|
679
|
+
# TODO: Make these uuids deterministic
|
|
680
|
+
new_entity = {
|
|
681
|
+
"id": str(uuid.uuid4()),
|
|
682
|
+
"title": title,
|
|
683
|
+
"identifiers": [],
|
|
684
|
+
}
|
|
685
|
+
if e_parent == "precedingTitles":
|
|
686
|
+
new_entity["succeedingInstanceId"] = identifier
|
|
379
687
|
else:
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
688
|
+
new_entity["precedingInstanceId"] = identifier
|
|
689
|
+
if new_entity.get("isbnValue", ""):
|
|
690
|
+
new_entity["identifiers"].append(
|
|
691
|
+
{
|
|
692
|
+
"identifierTypeId": new_entity.get("isbnId"),
|
|
693
|
+
"value": new_entity.get("isbnValue"),
|
|
694
|
+
}
|
|
383
695
|
)
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
696
|
+
if new_entity.get("issnValue", ""):
|
|
697
|
+
new_entity["identifiers"].append(
|
|
698
|
+
{
|
|
699
|
+
"identifierTypeId": new_entity.get("issnId"),
|
|
700
|
+
"value": new_entity.get("issnValue"),
|
|
701
|
+
}
|
|
388
702
|
)
|
|
389
|
-
|
|
390
|
-
# self.add_entity_to_record(entity, e_parent, rec, self.schema)
|
|
391
|
-
|
|
392
|
-
def create_preceding_succeeding_titles(self, entity, e_parent, identifier):
|
|
393
|
-
self.migration_report.add(
|
|
394
|
-
Blurbs.PrecedingSuccedingTitles, f"{e_parent} created"
|
|
395
|
-
)
|
|
396
|
-
# TODO: Make these uuids deterministic
|
|
397
|
-
new_entity = {
|
|
398
|
-
"id": str(uuid.uuid4()),
|
|
399
|
-
"title": entity.get("title"),
|
|
400
|
-
"identifiers": [],
|
|
401
|
-
}
|
|
402
|
-
if e_parent == "precedingTitles":
|
|
403
|
-
new_entity["succeedingInstanceId"] = identifier
|
|
703
|
+
self.extradata_writer.write(e_parent, new_entity)
|
|
404
704
|
else:
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
new_entity["identifiers"].append(
|
|
408
|
-
{
|
|
409
|
-
"identifierTypeId": new_entity.get("isbnId"),
|
|
410
|
-
"value": new_entity.get("isbnValue"),
|
|
411
|
-
}
|
|
705
|
+
Helper.log_data_issue(
|
|
706
|
+
identifier, f"Unable to create {e_parent} entity. Missing title.", marc_field
|
|
412
707
|
)
|
|
413
|
-
if new_entity.get("issnValue", ""):
|
|
414
|
-
new_entity["identifiers"].append(
|
|
415
|
-
{
|
|
416
|
-
"identifierTypeId": new_entity.get("issnId"),
|
|
417
|
-
"value": new_entity.get("issnValue"),
|
|
418
|
-
}
|
|
419
|
-
)
|
|
420
|
-
logging.log(25, f"{e_parent}\t{json.dumps(new_entity)}")
|
|
421
708
|
|
|
422
709
|
def apply_rule(self, legacy_id, value, condition_types, marc_field, parameter):
|
|
423
710
|
v = value
|
|
424
711
|
for condition_type in iter(condition_types):
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
712
|
+
try:
|
|
713
|
+
v = self.conditions.get_condition(
|
|
714
|
+
condition_type, legacy_id, v, parameter, marc_field
|
|
715
|
+
)
|
|
716
|
+
except AttributeError as attr_error:
|
|
717
|
+
raise TransformationProcessError(
|
|
718
|
+
legacy_id, attr_error, condition_type
|
|
719
|
+
) from attr_error
|
|
428
720
|
return v
|
|
429
721
|
|
|
430
722
|
@staticmethod
|
|
@@ -439,38 +731,28 @@ class RulesMapperBase(MapperBase):
|
|
|
439
731
|
rec[entity_parent_key] = entity
|
|
440
732
|
|
|
441
733
|
@staticmethod
|
|
442
|
-
def
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
"folio-org", "mod-inventory-storage", "ramls/instance.json"
|
|
446
|
-
)
|
|
447
|
-
logging.info("done")
|
|
448
|
-
return instance_schema
|
|
734
|
+
def grouped(marc_field: Field):
|
|
735
|
+
"""Groups the subfields
|
|
736
|
+
s -> (s0,s1,s2,...sn-1), (sn,sn+1,sn+2,...s2n-1), (s2n,s2n+1,s2n+2,...s3n-1), ...
|
|
449
737
|
|
|
450
|
-
@staticmethod
|
|
451
|
-
def fetch_holdings_schema():
|
|
452
|
-
logging.info("Fetching HoldingsRecord schema...")
|
|
453
|
-
holdings_record_schema = FolioClient.get_latest_from_github(
|
|
454
|
-
"folio-org", "mod-inventory-storage", "ramls/holdingsrecord.json"
|
|
455
|
-
)
|
|
456
|
-
logging.info("done")
|
|
457
|
-
return holdings_record_schema
|
|
458
738
|
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
739
|
+
Args:
|
|
740
|
+
marc_field (Field): _description_
|
|
741
|
+
|
|
742
|
+
Returns:
|
|
743
|
+
_type_: _description_
|
|
744
|
+
"""
|
|
745
|
+
unique_subfields: list = []
|
|
746
|
+
repeated_subfields: list = []
|
|
747
|
+
results = []
|
|
465
748
|
for sf, sf_vals in marc_field.subfields_as_dict().items():
|
|
466
749
|
if len(sf_vals) == 1:
|
|
467
|
-
unique_subfields.
|
|
750
|
+
unique_subfields.append(Subfield(code=sf, value=sf_vals[0]))
|
|
468
751
|
else:
|
|
469
|
-
for sf_val in sf_vals
|
|
470
|
-
repeated_subfields.append([sf, sf_val])
|
|
752
|
+
repeated_subfields.extend([Subfield(code=sf, value=sf_val) for sf_val in sf_vals])
|
|
471
753
|
if any(repeated_subfields):
|
|
472
754
|
for repeated_subfield in repeated_subfields:
|
|
473
|
-
new_subfields = [repeated_subfield
|
|
755
|
+
new_subfields = [repeated_subfield]
|
|
474
756
|
new_subfields.extend(unique_subfields)
|
|
475
757
|
temp_field = Field(
|
|
476
758
|
tag=marc_field.tag,
|
|
@@ -489,10 +771,18 @@ class RulesMapperBase(MapperBase):
|
|
|
489
771
|
|
|
490
772
|
@staticmethod
|
|
491
773
|
def remove_repeated_subfields(marc_field: Field):
|
|
492
|
-
"
|
|
774
|
+
"""Removes repeated subfields
|
|
775
|
+
s -> (s0,s1,s2,...sn-1), (sn,sn+1,sn+2,...s2n-1), (s2n,s2n+1,s2n+2,...s3n-1), ...
|
|
776
|
+
|
|
777
|
+
Args:
|
|
778
|
+
marc_field (Field): _description_
|
|
779
|
+
|
|
780
|
+
Returns:
|
|
781
|
+
_type_: _description_
|
|
782
|
+
"""
|
|
493
783
|
new_subfields = []
|
|
494
784
|
for sf, sf_vals in marc_field.subfields_as_dict().items():
|
|
495
|
-
new_subfields.extend([sf, sf_vals[0]])
|
|
785
|
+
new_subfields.extend([Subfield(code=sf, value=sf_vals[0])])
|
|
496
786
|
return Field(
|
|
497
787
|
tag=marc_field.tag,
|
|
498
788
|
indicators=marc_field.indicators,
|
|
@@ -500,90 +790,250 @@ class RulesMapperBase(MapperBase):
|
|
|
500
790
|
)
|
|
501
791
|
|
|
502
792
|
@staticmethod
|
|
793
|
+
def save_data_import_marc_record(
|
|
794
|
+
data_import_marc_file,
|
|
795
|
+
record_type: FOLIONamespaces,
|
|
796
|
+
marc_record: Record,
|
|
797
|
+
folio_record,
|
|
798
|
+
):
|
|
799
|
+
"""Saves the source marc_record to a file to be loaded via Data Import
|
|
800
|
+
|
|
801
|
+
Args:
|
|
802
|
+
srs_records_file (_type_): _description_
|
|
803
|
+
record_type (FOLIONamespaces): _description_
|
|
804
|
+
folio_client (FolioClient): _description_
|
|
805
|
+
marc_record (Record): _description_
|
|
806
|
+
folio_record (_type_): _description_
|
|
807
|
+
legacy_ids (List[str]): _description_
|
|
808
|
+
suppress (bool): _description_
|
|
809
|
+
"""
|
|
810
|
+
marc_record.add_ordered_field(
|
|
811
|
+
Field(
|
|
812
|
+
tag="999",
|
|
813
|
+
indicators=["f", "f"],
|
|
814
|
+
subfields=[
|
|
815
|
+
Subfield(code="i", value=folio_record["id"]),
|
|
816
|
+
],
|
|
817
|
+
)
|
|
818
|
+
)
|
|
819
|
+
# Since they all should be UTF encoded, make the leader align.
|
|
820
|
+
try:
|
|
821
|
+
marc_record.leader[9] = "a"
|
|
822
|
+
except Exception as ee:
|
|
823
|
+
logging.exception(
|
|
824
|
+
"Something is wrong with the marc record's leader: %s, %s", marc_record.leader, ee
|
|
825
|
+
)
|
|
826
|
+
data_import_marc_file.write(marc_record.as_marc())
|
|
827
|
+
|
|
828
|
+
|
|
829
|
+
def map_statistical_codes(
|
|
830
|
+
self,
|
|
831
|
+
folio_record: dict,
|
|
832
|
+
file_def: FileDefinition,
|
|
833
|
+
marc_record: Record,
|
|
834
|
+
):
|
|
835
|
+
"""Map statistical codes to FOLIO instance
|
|
836
|
+
|
|
837
|
+
This method first calls the base class method to map statistical codes
|
|
838
|
+
from the file_def. Then, it checks to see if there are any MARC field
|
|
839
|
+
mappings defined in the task configuration. If so, it creates a list
|
|
840
|
+
of lists where the first element is the MARC field tag, and the remaining
|
|
841
|
+
elements are the subfields to be used for mapping. It then iterates
|
|
842
|
+
through the MARC fields, retrieves the values based on the subfields.
|
|
843
|
+
Finally, it adds the mapped codes to the folio_record's statisticalCodeIds.
|
|
844
|
+
|
|
845
|
+
Args:
|
|
846
|
+
legacy_ids (List[str]): The legacy IDs of the folio record
|
|
847
|
+
folio_record (dict): The Dictionary representation of the FOLIO record
|
|
848
|
+
marc_record (Record): The pymarc Record object
|
|
849
|
+
file_def (FileDefinition): The file definition object from which marc_record was read
|
|
850
|
+
"""
|
|
851
|
+
super().map_statistical_codes(folio_record, file_def)
|
|
852
|
+
if self.task_configuration.statistical_code_mapping_fields:
|
|
853
|
+
stat_code_marc_fields = []
|
|
854
|
+
for mapping in self.task_configuration.statistical_code_mapping_fields:
|
|
855
|
+
stat_code_marc_fields.append(mapping.split("$"))
|
|
856
|
+
for field_map in stat_code_marc_fields:
|
|
857
|
+
mapped_codes = self.map_stat_codes_from_marc_field(field_map, marc_record, self.library_configuration.multi_field_delimiter)
|
|
858
|
+
folio_record['statisticalCodeIds'] = folio_record.get("statisticalCodeIds", []) + mapped_codes
|
|
859
|
+
|
|
860
|
+
@staticmethod
|
|
861
|
+
def map_stat_codes_from_marc_field(field_map: List[str], marc_record: Record, multi_field_delimiter: str="<delimiter>") -> List[str]:
|
|
862
|
+
"""Map statistical codes from MARC field to FOLIO instance.
|
|
863
|
+
|
|
864
|
+
This function extracts statistical codes from a MARC field based on the provided field map.
|
|
865
|
+
It supports multiple subfields and uses a delimiter to handle concatenated values.
|
|
866
|
+
|
|
867
|
+
Args:
|
|
868
|
+
field_map (List[str]): A list where the first element is the MARC field tag, and the remaining elements are subfields to extract values from.
|
|
869
|
+
marc_record (Record): The MARC record to process.
|
|
870
|
+
multi_field_delimiter (str): A delimiter used to concatenate multiple subfield values that should be individual mapped values.
|
|
871
|
+
|
|
872
|
+
Returns:
|
|
873
|
+
str: A string of statistical codes extracted from the MARC field, formatted as "<field>_<subfield>:<value>".
|
|
874
|
+
"""
|
|
875
|
+
field_values = []
|
|
876
|
+
if len(field_map) == 2:
|
|
877
|
+
subfields = []
|
|
878
|
+
for mf in marc_record.get_fields(field_map[0]):
|
|
879
|
+
subfields.extend(
|
|
880
|
+
multi_field_delimiter.join(
|
|
881
|
+
mf.get_subfields(field_map[1])
|
|
882
|
+
).split(multi_field_delimiter)
|
|
883
|
+
)
|
|
884
|
+
field_values.extend(
|
|
885
|
+
[
|
|
886
|
+
f"{field_map[0]}_{field_map[1]}:{x}" for
|
|
887
|
+
x in subfields
|
|
888
|
+
]
|
|
889
|
+
)
|
|
890
|
+
elif len(field_map) > 2:
|
|
891
|
+
for mf in marc_record.get_fields(field_map[0]):
|
|
892
|
+
for sf in field_map[1:]:
|
|
893
|
+
field_values.extend(
|
|
894
|
+
[
|
|
895
|
+
f"{field_map[0]}_{sf}:{x}" for x in multi_field_delimiter.join(
|
|
896
|
+
mf.get_subfields(sf)
|
|
897
|
+
).split(multi_field_delimiter)
|
|
898
|
+
]
|
|
899
|
+
)
|
|
900
|
+
elif field_map:
|
|
901
|
+
for mf in marc_record.get_fields(field_map[0]):
|
|
902
|
+
field_values.append(f"{field_map[0]}:{mf.value()}")
|
|
903
|
+
return field_values
|
|
904
|
+
|
|
503
905
|
def save_source_record(
|
|
906
|
+
self,
|
|
504
907
|
srs_records_file,
|
|
505
908
|
record_type: FOLIONamespaces,
|
|
506
909
|
folio_client: FolioClient,
|
|
507
910
|
marc_record: Record,
|
|
508
911
|
folio_record,
|
|
509
|
-
|
|
912
|
+
legacy_ids: List[str],
|
|
510
913
|
suppress: bool,
|
|
511
914
|
):
|
|
512
|
-
"""Saves the source Marc_record to the Source record Storage module
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
915
|
+
"""Saves the source Marc_record to the Source record Storage module
|
|
916
|
+
|
|
917
|
+
Args:
|
|
918
|
+
srs_records_file (_type_): _description_
|
|
919
|
+
record_type (FOLIONamespaces): _description_
|
|
920
|
+
folio_client (FolioClient): _description_
|
|
921
|
+
marc_record (Record): _description_
|
|
922
|
+
folio_record (_type_): _description_
|
|
923
|
+
legacy_ids (List[str]): _description_
|
|
924
|
+
suppress (bool): _description_
|
|
925
|
+
"""
|
|
926
|
+
srs_id = self.create_srs_id(record_type, legacy_ids[-1])
|
|
516
927
|
|
|
517
928
|
marc_record.add_ordered_field(
|
|
518
929
|
Field(
|
|
519
930
|
tag="999",
|
|
520
931
|
indicators=["f", "f"],
|
|
521
|
-
subfields=[
|
|
932
|
+
subfields=[
|
|
933
|
+
Subfield(code="i", value=folio_record["id"]),
|
|
934
|
+
Subfield(code="s", value=srs_id),
|
|
935
|
+
],
|
|
522
936
|
)
|
|
523
937
|
)
|
|
524
938
|
# Since they all should be UTF encoded, make the leader align.
|
|
525
939
|
try:
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
marc_record.leader = temp_leader
|
|
529
|
-
except Exception:
|
|
940
|
+
marc_record.leader[9] = "a"
|
|
941
|
+
except Exception as ee:
|
|
530
942
|
logging.exception(
|
|
531
|
-
"Something is wrong with the marc
|
|
532
|
-
marc_record.leader,
|
|
943
|
+
"Something is wrong with the marc record's leader: %s, %s", marc_record.leader, ee
|
|
533
944
|
)
|
|
534
|
-
srs_record_string =
|
|
945
|
+
srs_record_string = self.get_srs_string(
|
|
535
946
|
marc_record,
|
|
536
947
|
folio_record,
|
|
537
948
|
srs_id,
|
|
538
|
-
folio_client.get_metadata_construct(),
|
|
539
949
|
suppress,
|
|
540
950
|
record_type,
|
|
541
951
|
)
|
|
542
952
|
srs_records_file.write(f"{srs_record_string}\n")
|
|
543
953
|
|
|
544
|
-
|
|
545
|
-
def create_srs_id(record_type, okapi_url: str, legacy_id: str):
|
|
954
|
+
def create_srs_id(self, record_type, legacy_id: str):
|
|
546
955
|
srs_types = {
|
|
547
956
|
FOLIONamespaces.holdings: FOLIONamespaces.srs_records_holdingsrecord,
|
|
548
957
|
FOLIONamespaces.instances: FOLIONamespaces.srs_records_bib,
|
|
549
|
-
FOLIONamespaces.
|
|
958
|
+
FOLIONamespaces.authorities: FOLIONamespaces.srs_records_auth,
|
|
550
959
|
FOLIONamespaces.edifact: FOLIONamespaces.srs_records_edifact,
|
|
551
960
|
}
|
|
552
961
|
|
|
553
962
|
return str(
|
|
554
963
|
FolioUUID(
|
|
555
|
-
|
|
964
|
+
self.base_string_for_folio_uuid,
|
|
556
965
|
srs_types.get(record_type),
|
|
557
|
-
|
|
966
|
+
legacy_id
|
|
558
967
|
)
|
|
559
968
|
)
|
|
560
969
|
|
|
970
|
+
@staticmethod
|
|
971
|
+
def get_bib_id_from_907y(marc_record: Record, index_or_legacy_id):
|
|
972
|
+
try:
|
|
973
|
+
return list(set(marc_record["907"].get_subfields("a", "y")))
|
|
974
|
+
except Exception as e:
|
|
975
|
+
raise TransformationRecordFailedError(
|
|
976
|
+
index_or_legacy_id,
|
|
977
|
+
(
|
|
978
|
+
"907 $y and $a is missing is missing, although they is "
|
|
979
|
+
"required for this legacy ILS choice"
|
|
980
|
+
),
|
|
981
|
+
marc_record.as_json(),
|
|
982
|
+
) from e
|
|
983
|
+
|
|
984
|
+
@staticmethod
|
|
985
|
+
def get_bib_id_from_990a(marc_record: Record, index_or_legacy_id):
|
|
986
|
+
res = {f["a"].strip() for f in marc_record.get_fields("990") if "a" in f}
|
|
987
|
+
if marc_record["001"].format_field().strip():
|
|
988
|
+
res.add(marc_record["001"].format_field().strip())
|
|
989
|
+
if any(res):
|
|
990
|
+
return list(res)
|
|
991
|
+
else:
|
|
992
|
+
raise TransformationRecordFailedError(
|
|
993
|
+
index_or_legacy_id,
|
|
994
|
+
"neither 990$a or 001 found in record.",
|
|
995
|
+
marc_record.as_json(),
|
|
996
|
+
)
|
|
997
|
+
|
|
998
|
+
@staticmethod
|
|
999
|
+
def get_bib_id_from_001(marc_record: Record, index_or_legacy_id):
|
|
1000
|
+
try:
|
|
1001
|
+
return [marc_record["001"].format_field().strip()]
|
|
1002
|
+
except Exception as e:
|
|
1003
|
+
raise TransformationRecordFailedError(
|
|
1004
|
+
index_or_legacy_id,
|
|
1005
|
+
"001 is missing, although it is required for Voyager migrations",
|
|
1006
|
+
marc_record.as_json(),
|
|
1007
|
+
) from e
|
|
1008
|
+
|
|
561
1009
|
@staticmethod
|
|
562
1010
|
def get_srs_string(
|
|
563
1011
|
marc_record: Record,
|
|
564
1012
|
folio_object: dict,
|
|
565
1013
|
srs_id,
|
|
566
|
-
|
|
567
|
-
suppress,
|
|
1014
|
+
discovery_suppress: bool,
|
|
568
1015
|
record_type: FOLIONamespaces,
|
|
569
1016
|
):
|
|
570
1017
|
record_types = {
|
|
571
1018
|
FOLIONamespaces.holdings: "MARC_HOLDING",
|
|
572
1019
|
FOLIONamespaces.instances: "MARC_BIB",
|
|
573
|
-
FOLIONamespaces.
|
|
1020
|
+
FOLIONamespaces.authorities: "MARC_AUTHORITY",
|
|
574
1021
|
FOLIONamespaces.edifact: "EDIFACT",
|
|
575
1022
|
}
|
|
576
1023
|
|
|
577
1024
|
id_holders = {
|
|
578
1025
|
FOLIONamespaces.instances: {
|
|
579
1026
|
"instanceId": folio_object["id"],
|
|
580
|
-
"instanceHrid": folio_object
|
|
1027
|
+
"instanceHrid": folio_object.get("hrid", ""),
|
|
581
1028
|
},
|
|
582
1029
|
FOLIONamespaces.holdings: {
|
|
583
1030
|
"holdingsId": folio_object["id"],
|
|
584
|
-
"holdingsHrid": folio_object
|
|
1031
|
+
"holdingsHrid": folio_object.get("hrid", ""),
|
|
1032
|
+
},
|
|
1033
|
+
FOLIONamespaces.authorities: {
|
|
1034
|
+
"authorityId": folio_object["id"],
|
|
1035
|
+
"authorityHrid": marc_record["001"].data,
|
|
585
1036
|
},
|
|
586
|
-
FOLIONamespaces.athorities: {},
|
|
587
1037
|
FOLIONamespaces.edifact: {},
|
|
588
1038
|
}
|
|
589
1039
|
|
|
@@ -598,9 +1048,8 @@ class RulesMapperBase(MapperBase):
|
|
|
598
1048
|
"recordType": record_types.get(record_type),
|
|
599
1049
|
"rawRecord": raw_record,
|
|
600
1050
|
"parsedRecord": parsed_record,
|
|
601
|
-
"additionalInfo": {"suppressDiscovery":
|
|
1051
|
+
"additionalInfo": {"suppressDiscovery": discovery_suppress},
|
|
602
1052
|
"externalIdsHolder": id_holders.get(record_type),
|
|
603
|
-
"metadata": metadata_obj,
|
|
604
1053
|
"state": "ACTUAL",
|
|
605
1054
|
"leaderRecordStatus": parsed_record["content"]["leader"][5]
|
|
606
1055
|
if parsed_record["content"]["leader"][5] in [*"acdnposx"]
|
|
@@ -625,3 +1074,34 @@ def is_array_of_strings(schema_property):
|
|
|
625
1074
|
def is_array_of_objects(schema_property):
|
|
626
1075
|
sc_prop_type = schema_property.get("type", "string")
|
|
627
1076
|
return sc_prop_type == "array" and schema_property["items"]["type"] == "object"
|
|
1077
|
+
|
|
1078
|
+
def entity_indicators_match(entity_mapping, marc_field):
|
|
1079
|
+
"""
|
|
1080
|
+
Check if the indicators of the entity mapping match the indicators of the MARC field.
|
|
1081
|
+
Entity mappings can limit the fields they are applied to by specifying indicator values that
|
|
1082
|
+
must match the provided MARC field's indicators. If the entity mapping does not specify any
|
|
1083
|
+
indicator values, it is assumed to match all MARC fields. Entity indicator values can be a
|
|
1084
|
+
specific value or a wildcard "*", which matches any value.
|
|
1085
|
+
|
|
1086
|
+
This function compares the indicators of the entity mapping with the indicators of the MARC field.
|
|
1087
|
+
If the entity does not specify any indicator values, the function returns True. If the entity does
|
|
1088
|
+
specify indicator values, the function checks if the MARC field's indicators match the specified
|
|
1089
|
+
values or if the specified values are wildcards. If both indicators match, the function returns True;
|
|
1090
|
+
otherwise, it returns False.
|
|
1091
|
+
|
|
1092
|
+
Args:
|
|
1093
|
+
entity_mapping (dict): _description_
|
|
1094
|
+
marc_field (pymarc.Field): _description_
|
|
1095
|
+
|
|
1096
|
+
Returns:
|
|
1097
|
+
bool: True if the indicators match, False otherwise.
|
|
1098
|
+
"""
|
|
1099
|
+
if indicator_rule := [x["indicators"] for x in entity_mapping if "indicators" in x]:
|
|
1100
|
+
return all(
|
|
1101
|
+
[
|
|
1102
|
+
(marc_field.indicator1 == indicator_rule[0]['ind1'] or indicator_rule[0]['ind1'] == "*"),
|
|
1103
|
+
(marc_field.indicator2 == indicator_rule[0]['ind2'] or indicator_rule[0]['ind2'] == "*"),
|
|
1104
|
+
]
|
|
1105
|
+
)
|
|
1106
|
+
else:
|
|
1107
|
+
return True
|