folio-migration-tools 1.2.1__py3-none-any.whl → 1.9.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- folio_migration_tools/__init__.py +11 -0
- folio_migration_tools/__main__.py +169 -85
- folio_migration_tools/circulation_helper.py +96 -59
- folio_migration_tools/config_file_load.py +66 -0
- folio_migration_tools/custom_dict.py +6 -4
- folio_migration_tools/custom_exceptions.py +21 -19
- folio_migration_tools/extradata_writer.py +46 -0
- folio_migration_tools/folder_structure.py +63 -66
- folio_migration_tools/helper.py +29 -21
- folio_migration_tools/holdings_helper.py +57 -34
- folio_migration_tools/i18n_config.py +9 -0
- folio_migration_tools/library_configuration.py +173 -13
- folio_migration_tools/mapper_base.py +317 -106
- folio_migration_tools/mapping_file_transformation/courses_mapper.py +203 -0
- folio_migration_tools/mapping_file_transformation/holdings_mapper.py +83 -69
- folio_migration_tools/mapping_file_transformation/item_mapper.py +98 -94
- folio_migration_tools/mapping_file_transformation/manual_fee_fines_mapper.py +352 -0
- folio_migration_tools/mapping_file_transformation/mapping_file_mapper_base.py +702 -223
- folio_migration_tools/mapping_file_transformation/notes_mapper.py +90 -0
- folio_migration_tools/mapping_file_transformation/order_mapper.py +492 -0
- folio_migration_tools/mapping_file_transformation/organization_mapper.py +389 -0
- folio_migration_tools/mapping_file_transformation/ref_data_mapping.py +38 -27
- folio_migration_tools/mapping_file_transformation/user_mapper.py +149 -361
- folio_migration_tools/marc_rules_transformation/conditions.py +650 -246
- folio_migration_tools/marc_rules_transformation/holdings_statementsparser.py +292 -130
- folio_migration_tools/marc_rules_transformation/hrid_handler.py +244 -0
- folio_migration_tools/marc_rules_transformation/loc_language_codes.xml +20846 -0
- folio_migration_tools/marc_rules_transformation/marc_file_processor.py +300 -0
- folio_migration_tools/marc_rules_transformation/marc_reader_wrapper.py +136 -0
- folio_migration_tools/marc_rules_transformation/rules_mapper_authorities.py +241 -0
- folio_migration_tools/marc_rules_transformation/rules_mapper_base.py +681 -201
- folio_migration_tools/marc_rules_transformation/rules_mapper_bibs.py +395 -429
- folio_migration_tools/marc_rules_transformation/rules_mapper_holdings.py +531 -100
- folio_migration_tools/migration_report.py +85 -38
- folio_migration_tools/migration_tasks/__init__.py +1 -3
- folio_migration_tools/migration_tasks/authority_transformer.py +119 -0
- folio_migration_tools/migration_tasks/batch_poster.py +911 -198
- folio_migration_tools/migration_tasks/bibs_transformer.py +121 -116
- folio_migration_tools/migration_tasks/courses_migrator.py +192 -0
- folio_migration_tools/migration_tasks/holdings_csv_transformer.py +252 -247
- folio_migration_tools/migration_tasks/holdings_marc_transformer.py +321 -115
- folio_migration_tools/migration_tasks/items_transformer.py +264 -84
- folio_migration_tools/migration_tasks/loans_migrator.py +506 -195
- folio_migration_tools/migration_tasks/manual_fee_fines_transformer.py +187 -0
- folio_migration_tools/migration_tasks/migration_task_base.py +364 -74
- folio_migration_tools/migration_tasks/orders_transformer.py +373 -0
- folio_migration_tools/migration_tasks/organization_transformer.py +451 -0
- folio_migration_tools/migration_tasks/requests_migrator.py +130 -62
- folio_migration_tools/migration_tasks/reserves_migrator.py +253 -0
- folio_migration_tools/migration_tasks/user_transformer.py +180 -139
- folio_migration_tools/task_configuration.py +46 -0
- folio_migration_tools/test_infrastructure/__init__.py +0 -0
- folio_migration_tools/test_infrastructure/mocked_classes.py +406 -0
- folio_migration_tools/transaction_migration/legacy_loan.py +148 -34
- folio_migration_tools/transaction_migration/legacy_request.py +65 -25
- folio_migration_tools/transaction_migration/legacy_reserve.py +47 -0
- folio_migration_tools/transaction_migration/transaction_result.py +12 -1
- folio_migration_tools/translations/en.json +476 -0
- folio_migration_tools-1.9.10.dist-info/METADATA +169 -0
- folio_migration_tools-1.9.10.dist-info/RECORD +67 -0
- {folio_migration_tools-1.2.1.dist-info → folio_migration_tools-1.9.10.dist-info}/WHEEL +1 -2
- folio_migration_tools-1.9.10.dist-info/entry_points.txt +3 -0
- folio_migration_tools/generate_schemas.py +0 -46
- folio_migration_tools/mapping_file_transformation/mapping_file_mapping_base_impl.py +0 -44
- folio_migration_tools/mapping_file_transformation/user_mapper_base.py +0 -212
- folio_migration_tools/marc_rules_transformation/bibs_processor.py +0 -163
- folio_migration_tools/marc_rules_transformation/holdings_processor.py +0 -284
- folio_migration_tools/report_blurbs.py +0 -219
- folio_migration_tools/transaction_migration/legacy_fee_fine.py +0 -36
- folio_migration_tools-1.2.1.dist-info/METADATA +0 -134
- folio_migration_tools-1.2.1.dist-info/RECORD +0 -50
- folio_migration_tools-1.2.1.dist-info/top_level.txt +0 -1
- {folio_migration_tools-1.2.1.dist-info → folio_migration_tools-1.9.10.dist-info/licenses}/LICENSE +0 -0
|
@@ -1,14 +1,26 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import json
|
|
1
3
|
import logging
|
|
4
|
+
import re
|
|
5
|
+
from typing import Dict, List, Set
|
|
2
6
|
|
|
7
|
+
import i18n
|
|
3
8
|
from folio_uuid.folio_namespaces import FOLIONamespaces
|
|
4
9
|
from folio_uuid.folio_uuid import FolioUUID
|
|
10
|
+
from folioclient import FolioClient
|
|
11
|
+
from pymarc import Optional
|
|
12
|
+
from pymarc.field import Field
|
|
13
|
+
from pymarc.record import Record
|
|
14
|
+
|
|
5
15
|
from folio_migration_tools.custom_exceptions import (
|
|
6
16
|
TransformationFieldMappingError,
|
|
7
17
|
TransformationProcessError,
|
|
8
18
|
TransformationRecordFailedError,
|
|
9
19
|
)
|
|
10
20
|
from folio_migration_tools.helper import Helper
|
|
21
|
+
from folio_migration_tools.holdings_helper import HoldingsHelper
|
|
11
22
|
from folio_migration_tools.library_configuration import (
|
|
23
|
+
FileDefinition,
|
|
12
24
|
HridHandling,
|
|
13
25
|
LibraryConfiguration,
|
|
14
26
|
)
|
|
@@ -19,101 +31,233 @@ from folio_migration_tools.marc_rules_transformation.holdings_statementsparser i
|
|
|
19
31
|
from folio_migration_tools.marc_rules_transformation.rules_mapper_base import (
|
|
20
32
|
RulesMapperBase,
|
|
21
33
|
)
|
|
22
|
-
from folio_migration_tools.report_blurbs import Blurbs
|
|
23
|
-
from pymarc.field import Field
|
|
24
|
-
from pymarc.record import Record
|
|
25
34
|
|
|
26
35
|
|
|
27
36
|
class RulesMapperHoldings(RulesMapperBase):
|
|
28
37
|
def __init__(
|
|
29
38
|
self,
|
|
30
|
-
|
|
31
|
-
instance_id_map,
|
|
39
|
+
folio_client: FolioClient,
|
|
32
40
|
location_map,
|
|
33
41
|
task_configuration,
|
|
34
42
|
library_configuration: LibraryConfiguration,
|
|
43
|
+
parent_id_map: dict,
|
|
44
|
+
boundwith_relationship_map_rows: List[Dict],
|
|
45
|
+
statistical_codes_map: Optional[Dict] = None,
|
|
35
46
|
):
|
|
36
|
-
self.instance_id_map = instance_id_map
|
|
37
|
-
self.task_configuration = task_configuration
|
|
38
47
|
self.conditions = Conditions(
|
|
39
|
-
|
|
48
|
+
folio_client,
|
|
40
49
|
self,
|
|
41
50
|
"holdings",
|
|
42
|
-
|
|
51
|
+
library_configuration.folio_release,
|
|
52
|
+
task_configuration.default_call_number_type_name,
|
|
53
|
+
)
|
|
54
|
+
self.folio = folio_client
|
|
55
|
+
super().__init__(
|
|
56
|
+
folio_client,
|
|
57
|
+
library_configuration,
|
|
58
|
+
task_configuration,
|
|
59
|
+
statistical_codes_map,
|
|
60
|
+
self.fetch_holdings_schema(folio_client),
|
|
61
|
+
self.conditions,
|
|
62
|
+
parent_id_map,
|
|
63
|
+
)
|
|
64
|
+
self.boundwith_relationship_map: Dict = self.setup_boundwith_relationship_map(
|
|
65
|
+
boundwith_relationship_map_rows
|
|
43
66
|
)
|
|
44
|
-
self.
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
self.schema = self.holdings_json_schema
|
|
48
|
-
self.holdings_id_map = {}
|
|
49
|
-
self.ref_data_dicts = {}
|
|
50
|
-
self.fallback_holdings_type_id = (
|
|
51
|
-
self.task_configuration.fallback_holdings_type_id
|
|
67
|
+
self.location_map = self.validate_location_map(
|
|
68
|
+
location_map,
|
|
69
|
+
self.folio_client.locations,
|
|
52
70
|
)
|
|
71
|
+
self.holdings_id_map: dict = {}
|
|
72
|
+
self.ref_data_dicts: dict = {}
|
|
73
|
+
self.fallback_holdings_type_id = self.task_configuration.fallback_holdings_type_id
|
|
74
|
+
self.setup_holdings_sources()
|
|
75
|
+
logging.info("Fetching mapping rules from the tenant")
|
|
76
|
+
rules_endpoint = "/mapping-rules/marc-holdings"
|
|
77
|
+
self.mappings = self.folio_client.folio_get_single_object(rules_endpoint)
|
|
53
78
|
|
|
54
|
-
def
|
|
55
|
-
""
|
|
79
|
+
def fix_853_bug_in_rules(self):
|
|
80
|
+
f852_mappings = self.mappings["852"]
|
|
81
|
+
new_852_mapping = []
|
|
82
|
+
for mapping in f852_mappings:
|
|
83
|
+
if "entity" in mapping:
|
|
84
|
+
for entity_mapping in mapping["entity"]:
|
|
85
|
+
if "." not in entity_mapping["target"]:
|
|
86
|
+
new_852_mapping.append(entity_mapping)
|
|
87
|
+
else:
|
|
88
|
+
raise TransformationProcessError(
|
|
89
|
+
"",
|
|
90
|
+
(
|
|
91
|
+
"Actual entity mapping found in 852 mappings. "
|
|
92
|
+
"Report this to the maintainers of this codebase"
|
|
93
|
+
),
|
|
94
|
+
json.dumps(entity_mapping),
|
|
95
|
+
)
|
|
96
|
+
self.mappings["852"] = new_852_mapping
|
|
97
|
+
|
|
98
|
+
def integrate_supplemental_mfhd_mappings(self, new_rules={}):
|
|
99
|
+
try:
|
|
100
|
+
self.mappings.update(new_rules)
|
|
101
|
+
self.fix_853_bug_in_rules()
|
|
102
|
+
except Exception as e:
|
|
103
|
+
raise TransformationProcessError(
|
|
104
|
+
"",
|
|
105
|
+
"Failed to integrate supplemental mfhd mappings",
|
|
106
|
+
str(e),
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
def prep_852_notes(self, marc_record: Record):
|
|
110
|
+
for field in marc_record.get_fields("852"):
|
|
111
|
+
field.subfields.sort(key=lambda x: x[0])
|
|
112
|
+
new_952 = Field(
|
|
113
|
+
tag="952",
|
|
114
|
+
indicators=["f", "f"],
|
|
115
|
+
subfields=field.subfields
|
|
116
|
+
)
|
|
117
|
+
marc_record.add_ordered_field(new_952)
|
|
118
|
+
|
|
119
|
+
def parse_record(
|
|
120
|
+
self, marc_record: Record, file_def: FileDefinition, legacy_ids: List[str]
|
|
121
|
+
) -> list[dict]:
|
|
122
|
+
"""Parses a mfhd recod into a FOLIO Inventory holdings object
|
|
56
123
|
Community mapping suggestion: https://tinyurl.com/3rh52e2x
|
|
57
|
-
This is the main function
|
|
124
|
+
This is the main function
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
marc_record (Record): _description_
|
|
128
|
+
file_def (FileDefinition): _description_
|
|
129
|
+
legacy_ids (List[str]): _description_
|
|
130
|
+
|
|
131
|
+
Raises:
|
|
132
|
+
TransformationRecordFailedError: _description_
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
dict: _description_
|
|
136
|
+
"""
|
|
137
|
+
|
|
58
138
|
self.print_progress()
|
|
59
|
-
folio_holding =
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
ignored_subsequent_fields = set()
|
|
139
|
+
folio_holding = self.perform_initial_preparation(marc_record, legacy_ids)
|
|
140
|
+
self.prep_852_notes(marc_record)
|
|
141
|
+
self.migration_report.add("RecordStatus", marc_record.leader[5])
|
|
142
|
+
ignored_subsequent_fields: set = set()
|
|
64
143
|
num_852s = 0
|
|
65
144
|
for marc_field in marc_record:
|
|
66
145
|
try:
|
|
67
146
|
if marc_field.tag == "852":
|
|
68
147
|
num_852s += 1
|
|
148
|
+
if num_852s > 1:
|
|
149
|
+
continue
|
|
69
150
|
self.process_marc_field(
|
|
151
|
+
folio_holding,
|
|
70
152
|
marc_field,
|
|
71
153
|
ignored_subsequent_fields,
|
|
72
|
-
|
|
73
|
-
legacy_id,
|
|
154
|
+
legacy_ids,
|
|
74
155
|
)
|
|
75
156
|
except TransformationFieldMappingError as tfme:
|
|
76
157
|
tfme.log_it()
|
|
77
158
|
if num_852s > 1:
|
|
78
|
-
Helper.log_data_issue(
|
|
159
|
+
Helper.log_data_issue(legacy_ids, "More than 1 852 found", "")
|
|
79
160
|
|
|
80
|
-
folio_holding
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
161
|
+
self.perform_additional_mapping(marc_record, folio_holding, legacy_ids, file_def)
|
|
162
|
+
cleaned_folio_holding = self.validate_required_properties(
|
|
163
|
+
"-".join(folio_holding.get("formerIds")),
|
|
164
|
+
folio_holding,
|
|
165
|
+
self.schema,
|
|
166
|
+
FOLIONamespaces.holdings,
|
|
86
167
|
)
|
|
87
|
-
|
|
88
168
|
if not folio_holding.get("instanceId", ""):
|
|
89
169
|
raise TransformationRecordFailedError(
|
|
90
|
-
|
|
170
|
+
legacy_ids,
|
|
91
171
|
"No Instance id mapped. ",
|
|
92
|
-
folio_holding
|
|
172
|
+
folio_holding.get("formerIds", ["No former ids"]),
|
|
93
173
|
)
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
174
|
+
props_to_not_dedupe = (
|
|
175
|
+
[]
|
|
176
|
+
if self.task_configuration.deduplicate_holdings_statements
|
|
177
|
+
else [
|
|
178
|
+
"holdingsStatements",
|
|
179
|
+
"holdingsStatementsForIndexes",
|
|
180
|
+
"holdingsStatementsForSupplements",
|
|
181
|
+
]
|
|
100
182
|
)
|
|
101
|
-
self.dedupe_rec(cleaned_folio_holding)
|
|
102
|
-
self.
|
|
103
|
-
|
|
183
|
+
self.dedupe_rec(cleaned_folio_holding, props_to_not_dedupe)
|
|
184
|
+
self.report_folio_mapping(cleaned_folio_holding, self.schema)
|
|
185
|
+
if bw_instance_ids := self.boundwith_relationship_map.get(cleaned_folio_holding["id"], []):
|
|
186
|
+
return list(
|
|
187
|
+
self.create_bound_with_holdings(
|
|
188
|
+
cleaned_folio_holding,
|
|
189
|
+
bw_instance_ids,
|
|
190
|
+
self.task_configuration.holdings_type_uuid_for_boundwiths,
|
|
191
|
+
)
|
|
192
|
+
)
|
|
193
|
+
return [cleaned_folio_holding]
|
|
194
|
+
|
|
195
|
+
def set_instance_id_by_map(self, legacy_ids: list, folio_holding: dict, marc_record: Record):
|
|
196
|
+
if "004" not in marc_record:
|
|
197
|
+
raise TransformationProcessError(
|
|
198
|
+
"",
|
|
199
|
+
("No 004 in record. The tools only support bib-mfhd linking through 004"),
|
|
200
|
+
legacy_ids,
|
|
201
|
+
)
|
|
202
|
+
if len(marc_record.get_fields("004")) > 1:
|
|
203
|
+
Helper.log_data_issue(
|
|
204
|
+
legacy_ids,
|
|
205
|
+
"More than one linked bib (004) found in record. Using the first one",
|
|
206
|
+
[str(x) for x in marc_record.get_fields("004")],
|
|
207
|
+
)
|
|
208
|
+
legacy_instance_id = marc_record["004"].data.strip()
|
|
209
|
+
folio_holding["formerIds"].append(f"{self.bib_id_template}{legacy_instance_id}")
|
|
210
|
+
if legacy_instance_id in self.parent_id_map:
|
|
211
|
+
folio_holding["instanceId"] = self.parent_id_map[legacy_instance_id][1]
|
|
212
|
+
else:
|
|
213
|
+
raise TransformationRecordFailedError(
|
|
214
|
+
legacy_ids,
|
|
215
|
+
"Old instance id not in map",
|
|
216
|
+
marc_record["004"],
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
def perform_initial_preparation(self, marc_record: Record, legacy_ids):
|
|
220
|
+
folio_holding: dict = {}
|
|
221
|
+
folio_holding["id"] = str(
|
|
222
|
+
FolioUUID(
|
|
223
|
+
self.base_string_for_folio_uuid,
|
|
224
|
+
FOLIONamespaces.holdings,
|
|
225
|
+
str(legacy_ids[0]),
|
|
226
|
+
)
|
|
104
227
|
)
|
|
228
|
+
for legacy_id in legacy_ids:
|
|
229
|
+
self.add_legacy_id_to_admin_note(folio_holding, legacy_id)
|
|
230
|
+
folio_holding["formerIds"] = copy.copy(legacy_ids)
|
|
231
|
+
self.set_instance_id_by_map(legacy_ids, folio_holding, marc_record)
|
|
232
|
+
return folio_holding
|
|
105
233
|
|
|
106
|
-
|
|
107
|
-
|
|
234
|
+
def setup_holdings_sources(self):
|
|
235
|
+
holdings_sources = list(
|
|
236
|
+
self.folio_client.folio_get_all("/holdings-sources", "holdingsRecordsSources")
|
|
237
|
+
)
|
|
238
|
+
logging.info("Fetched %s holdingsRecordsSources from tenant", len(holdings_sources))
|
|
239
|
+
self.holdingssources = {n["name"].upper(): n["id"] for n in holdings_sources}
|
|
240
|
+
if "FOLIO" not in self.holdingssources:
|
|
241
|
+
raise TransformationProcessError("", "No holdings source with name FOLIO in tenant")
|
|
242
|
+
if "MARC" not in self.holdingssources:
|
|
243
|
+
raise TransformationProcessError("", "No holdings source with name MARC in tenant")
|
|
108
244
|
|
|
109
245
|
def process_marc_field(
|
|
110
246
|
self,
|
|
247
|
+
folio_holding: Dict,
|
|
111
248
|
marc_field: Field,
|
|
112
|
-
ignored_subsequent_fields,
|
|
113
|
-
|
|
114
|
-
index_or_legacy_ids,
|
|
249
|
+
ignored_subsequent_fields: Set,
|
|
250
|
+
index_or_legacy_ids: List[str],
|
|
115
251
|
):
|
|
116
|
-
|
|
252
|
+
"""This overwrites the implementation for Auth and instances
|
|
253
|
+
|
|
254
|
+
Args:
|
|
255
|
+
folio_holding (dict): _description_
|
|
256
|
+
marc_field (Field): _description_
|
|
257
|
+
ignored_subsequent_fields (_type_): _description_
|
|
258
|
+
index_or_legacy_ids (_type_): _description_
|
|
259
|
+
"""
|
|
260
|
+
self.migration_report.add("Trivia", i18n.t("Total number of Tags processed"))
|
|
117
261
|
if marc_field.tag not in self.mappings:
|
|
118
262
|
self.report_legacy_mapping(marc_field.tag, True, False)
|
|
119
263
|
elif marc_field.tag not in ignored_subsequent_fields:
|
|
@@ -126,27 +270,68 @@ class RulesMapperHoldings(RulesMapperBase):
|
|
|
126
270
|
ignored_subsequent_fields.add(marc_field.tag)
|
|
127
271
|
|
|
128
272
|
def perform_additional_mapping(
|
|
129
|
-
self, marc_record: Record, folio_holding,
|
|
273
|
+
self, marc_record: Record, folio_holding: Dict, legacy_ids: List[str], file_def: FileDefinition
|
|
130
274
|
):
|
|
131
|
-
"""
|
|
132
|
-
|
|
275
|
+
"""_summary_
|
|
276
|
+
|
|
277
|
+
Args:
|
|
278
|
+
marc_record (Record): _description_
|
|
279
|
+
folio_holding (_type_): _description_
|
|
280
|
+
legacy_ids (List[str]): _description_
|
|
281
|
+
file_def (FileDefinition): _description_
|
|
282
|
+
|
|
283
|
+
Raises:
|
|
284
|
+
TransformationRecordFailedError: _description_
|
|
285
|
+
"""
|
|
286
|
+
self.set_holdings_type(marc_record, folio_holding, legacy_ids)
|
|
133
287
|
self.set_default_call_number_type_if_empty(folio_holding)
|
|
134
|
-
self.pick_first_location_if_many(folio_holding,
|
|
135
|
-
self.parse_coded_holdings_statements(marc_record, folio_holding,
|
|
288
|
+
self.pick_first_location_if_many(folio_holding, legacy_ids)
|
|
289
|
+
self.parse_coded_holdings_statements(marc_record, folio_holding, legacy_ids)
|
|
290
|
+
self.add_mfhd_as_mrk_note(marc_record, folio_holding, legacy_ids)
|
|
291
|
+
self.add_mfhd_as_mrc_note(marc_record, folio_holding, legacy_ids)
|
|
292
|
+
HoldingsHelper.handle_notes(folio_holding)
|
|
293
|
+
if (
|
|
294
|
+
all([file_def.create_source_records, self.create_source_records])
|
|
295
|
+
or self.task_configuration.hrid_handling == HridHandling.preserve001
|
|
296
|
+
):
|
|
297
|
+
self.hrid_handler.handle_hrid(
|
|
298
|
+
FOLIONamespaces.holdings, folio_holding, marc_record, legacy_ids
|
|
299
|
+
)
|
|
300
|
+
else:
|
|
301
|
+
del folio_holding["hrid"]
|
|
302
|
+
if not folio_holding.get("instanceId", ""):
|
|
303
|
+
raise TransformationRecordFailedError(
|
|
304
|
+
"".join(folio_holding.get("formerIds", [])),
|
|
305
|
+
"Missing instance ids. Something is wrong.",
|
|
306
|
+
"",
|
|
307
|
+
)
|
|
308
|
+
self.handle_suppression(folio_holding, file_def, True)
|
|
309
|
+
# First, map statistical codes from MARC fields and FileDefinitions to FOLIO statistical codes.
|
|
310
|
+
# Then, convert the mapped statistical codes to their corresponding code IDs.
|
|
311
|
+
self.map_statistical_codes(folio_holding, file_def, marc_record)
|
|
312
|
+
self.map_statistical_code_ids(legacy_ids, folio_holding)
|
|
313
|
+
self.set_source_id(self.create_source_records, folio_holding, self.holdingssources, file_def)
|
|
136
314
|
|
|
137
|
-
def pick_first_location_if_many(self, folio_holding,
|
|
315
|
+
def pick_first_location_if_many(self, folio_holding: Dict, legacy_ids: List[str]):
|
|
138
316
|
if " " in folio_holding.get("permanentLocationId", ""):
|
|
139
317
|
Helper.log_data_issue(
|
|
140
|
-
|
|
318
|
+
legacy_ids,
|
|
141
319
|
"Space in permanentLocationId. Was this MFHD attached to multiple holdings?",
|
|
142
320
|
folio_holding["permanentLocationId"],
|
|
143
321
|
)
|
|
144
|
-
folio_holding["permanentLocationId"] = folio_holding[
|
|
145
|
-
|
|
146
|
-
]
|
|
322
|
+
folio_holding["permanentLocationId"] = folio_holding["permanentLocationId"].split(" ")[
|
|
323
|
+
0
|
|
324
|
+
]
|
|
325
|
+
|
|
326
|
+
@staticmethod
|
|
327
|
+
def set_source_id(create_source_records: bool, folio_rec: Dict, holdingssources: Dict, file_def: FileDefinition):
|
|
328
|
+
if file_def.create_source_records and create_source_records:
|
|
329
|
+
folio_rec["sourceId"] = holdingssources.get("MARC")
|
|
330
|
+
else:
|
|
331
|
+
folio_rec["sourceId"] = holdingssources.get("FOLIO")
|
|
147
332
|
|
|
148
333
|
def parse_coded_holdings_statements(
|
|
149
|
-
self, marc_record: Record, folio_holding,
|
|
334
|
+
self, marc_record: Record, folio_holding: Dict, legacy_ids: List[str]
|
|
150
335
|
):
|
|
151
336
|
# TODO: Should one be able to switch these things off?
|
|
152
337
|
a = {
|
|
@@ -157,56 +342,221 @@ class RulesMapperHoldings(RulesMapperBase):
|
|
|
157
342
|
for key, v in a.items():
|
|
158
343
|
try:
|
|
159
344
|
res = HoldingsStatementsParser.get_holdings_statements(
|
|
160
|
-
marc_record,
|
|
345
|
+
marc_record,
|
|
346
|
+
v[0],
|
|
347
|
+
v[1],
|
|
348
|
+
v[2],
|
|
349
|
+
legacy_ids,
|
|
350
|
+
self.task_configuration.deduplicate_holdings_statements,
|
|
161
351
|
)
|
|
162
|
-
|
|
352
|
+
if res["statements"]:
|
|
353
|
+
folio_holding[key] = res["statements"]
|
|
163
354
|
for mr in res["migration_report"]:
|
|
164
|
-
self.migration_report.add(
|
|
165
|
-
Blurbs.HoldingsStatementsParsing, f"{mr[0]} -- {mr[1]}"
|
|
166
|
-
)
|
|
355
|
+
self.migration_report.add("HoldingsStatementsParsing", f"{mr[0]} -- {mr[1]}")
|
|
167
356
|
except TransformationFieldMappingError as tfme:
|
|
168
357
|
Helper.log_data_issue(tfme.index_or_id, tfme.message, tfme.data_value)
|
|
169
|
-
self.migration_report.add(
|
|
358
|
+
self.migration_report.add("FieldMappingErrors", tfme.message)
|
|
359
|
+
self.collect_mrk_statement_notes(marc_record, folio_holding, legacy_ids)
|
|
360
|
+
|
|
361
|
+
def collect_mrk_statement_notes(self, marc_record, folio_holding, legacy_ids):
|
|
362
|
+
"""Collects MFHD holdings statements as MARC Maker field strings in a FOLIO holdings note
|
|
363
|
+
and adds them to the FOLIO holdings record.
|
|
364
|
+
|
|
365
|
+
This is done to preserve the information in the MARC record for future reference.
|
|
366
|
+
|
|
367
|
+
Args:
|
|
368
|
+
marc_record (Record): PyMARC record
|
|
369
|
+
folio_holding (Dict): FOLIO holdings record
|
|
370
|
+
|
|
371
|
+
"""
|
|
372
|
+
if self.task_configuration.include_mrk_statements:
|
|
373
|
+
mrk_statement_notes = []
|
|
374
|
+
for field in marc_record.get_fields("853", "854", "855", "863", "864", "865", "866", "867", "868"):
|
|
375
|
+
mrk_statement_notes.append(str(field))
|
|
376
|
+
if mrk_statement_notes:
|
|
377
|
+
folio_holding["notes"] = folio_holding.get("notes", []) + self.add_mrk_statements_note(mrk_statement_notes, legacy_ids)
|
|
378
|
+
|
|
379
|
+
def add_mrk_statements_note(self, mrk_statement_notes: List[str], legacy_ids) -> List[Dict]:
|
|
380
|
+
"""Creates a note from the MRK statements
|
|
381
|
+
|
|
382
|
+
Args:
|
|
383
|
+
mrk_statement_notes (List[str]): A list of MFHD holdings statements as MRK strings
|
|
384
|
+
|
|
385
|
+
Returns:
|
|
386
|
+
List: A list containing the FOLIO holdings note object (Dict)
|
|
387
|
+
"""
|
|
388
|
+
holdings_note_type_tuple = self.conditions.get_ref_data_tuple_by_name(
|
|
389
|
+
self.folio.holding_note_types, "holding_note_types", self.task_configuration.mrk_holdings_note_type
|
|
390
|
+
)
|
|
391
|
+
try:
|
|
392
|
+
holdings_note_type_id = holdings_note_type_tuple[0]
|
|
393
|
+
except Exception as ee:
|
|
394
|
+
logging.error(ee)
|
|
395
|
+
raise TransformationRecordFailedError(
|
|
396
|
+
legacy_ids,
|
|
397
|
+
f'Holdings note type mapping error.\tNote type name: {self.task_configuration.mrk_holdings_note_type}\t'
|
|
398
|
+
f"MFHD holdings statement note type not found in FOLIO.",
|
|
399
|
+
self.task_configuration.mrk_holdings_note_type,
|
|
400
|
+
) from ee
|
|
401
|
+
return [
|
|
402
|
+
{
|
|
403
|
+
"note": chunk,
|
|
404
|
+
"holdingsNoteTypeId": holdings_note_type_id,
|
|
405
|
+
"staffOnly": True,
|
|
406
|
+
} for chunk in self.split_mrk_by_max_note_size("\n".join(mrk_statement_notes))
|
|
407
|
+
]
|
|
408
|
+
|
|
409
|
+
@staticmethod
|
|
410
|
+
def split_mrk_by_max_note_size(s: str, max_chunk_size: int = 32000) -> List[str]:
|
|
411
|
+
lines = s.splitlines(keepends=True)
|
|
412
|
+
chunks = []
|
|
413
|
+
current_chunk = ""
|
|
414
|
+
for line in lines:
|
|
415
|
+
# If adding this line would exceed the limit, start a new chunk
|
|
416
|
+
if len(current_chunk) + len(line) > max_chunk_size:
|
|
417
|
+
if current_chunk:
|
|
418
|
+
chunks.append(current_chunk)
|
|
419
|
+
current_chunk = line
|
|
420
|
+
else:
|
|
421
|
+
current_chunk += line
|
|
422
|
+
if current_chunk:
|
|
423
|
+
chunks.append(current_chunk)
|
|
424
|
+
return chunks
|
|
425
|
+
|
|
426
|
+
def add_mfhd_as_mrk_note(self, marc_record: Record, folio_holding: Dict, legacy_ids: List[str]):
|
|
427
|
+
"""Adds the MFHD as a note to the holdings record
|
|
428
|
+
|
|
429
|
+
This is done to preserve the information in the MARC record for future reference.
|
|
430
|
+
|
|
431
|
+
Args:
|
|
432
|
+
marc_record (Record): PyMARC record
|
|
433
|
+
folio_holding (Dict): FOLIO holdings record
|
|
434
|
+
"""
|
|
435
|
+
if self.task_configuration.include_mfhd_mrk_as_note:
|
|
436
|
+
holdings_note_type_tuple = self.conditions.get_ref_data_tuple_by_name(
|
|
437
|
+
self.folio.holding_note_types, "holding_note_types", self.task_configuration.mfhd_mrk_note_type
|
|
438
|
+
)
|
|
439
|
+
try:
|
|
440
|
+
holdings_note_type_id = holdings_note_type_tuple[0]
|
|
441
|
+
except Exception as ee:
|
|
442
|
+
logging.error(ee)
|
|
443
|
+
raise TransformationRecordFailedError(
|
|
444
|
+
legacy_ids,
|
|
445
|
+
f'Holdings note type mapping error.\tNote type name: {self.task_configuration.mfhd_mrk_note_type}\t'
|
|
446
|
+
f"Note type not found in FOLIO.",
|
|
447
|
+
self.task_configuration.mfhd_mrk_note_type,
|
|
448
|
+
) from ee
|
|
449
|
+
folio_holding["notes"] = folio_holding.get("notes", []) + [
|
|
450
|
+
{
|
|
451
|
+
"note": chunk,
|
|
452
|
+
"holdingsNoteTypeId": holdings_note_type_id,
|
|
453
|
+
"staffOnly": True,
|
|
454
|
+
} for chunk in self.split_mrk_by_max_note_size(str(marc_record))
|
|
455
|
+
]
|
|
456
|
+
|
|
457
|
+
@staticmethod
|
|
458
|
+
def split_mrc_by_max_note_size(data: bytes, sep: bytes = b"\x1e", max_chunk_size: int = 32000) -> List[bytes]:
|
|
459
|
+
# Split data into segments, each ending with the separator (except possibly the last)
|
|
460
|
+
pattern = re.compile(b'(.*?' + re.escape(sep) + b'|.+?$)', re.DOTALL)
|
|
461
|
+
parts = [m.group(0) for m in pattern.finditer(data) if m.group(0)]
|
|
462
|
+
chunks = []
|
|
463
|
+
current_chunk = b""
|
|
464
|
+
for part in parts:
|
|
465
|
+
if len(current_chunk) + len(part) > max_chunk_size and current_chunk:
|
|
466
|
+
chunks.append(current_chunk)
|
|
467
|
+
current_chunk = part
|
|
468
|
+
else:
|
|
469
|
+
current_chunk += part
|
|
470
|
+
if current_chunk:
|
|
471
|
+
chunks.append(current_chunk)
|
|
472
|
+
return chunks
|
|
473
|
+
|
|
474
|
+
def add_mfhd_as_mrc_note(self, marc_record: Record, folio_holding: Dict, legacy_ids: List[str]):
|
|
475
|
+
"""Adds the MFHD as a note to the holdings record
|
|
476
|
+
|
|
477
|
+
This is done to preserve the information in the MARC record for future reference.
|
|
478
|
+
|
|
479
|
+
Args:
|
|
480
|
+
marc_record (Record): PyMARC record
|
|
481
|
+
folio_holding (Dict): FOLIO holdings record
|
|
482
|
+
"""
|
|
483
|
+
if self.task_configuration.include_mfhd_mrc_as_note:
|
|
484
|
+
holdings_note_type_tuple = self.conditions.get_ref_data_tuple_by_name(
|
|
485
|
+
self.folio.holding_note_types, "holding_note_types", self.task_configuration.mfhd_mrc_note_type
|
|
486
|
+
)
|
|
487
|
+
try:
|
|
488
|
+
holdings_note_type_id = holdings_note_type_tuple[0]
|
|
489
|
+
except Exception as ee:
|
|
490
|
+
logging.error(ee)
|
|
491
|
+
raise TransformationRecordFailedError(
|
|
492
|
+
legacy_ids,
|
|
493
|
+
f'Holdings note type mapping error.\tNote type name: {self.task_configuration.mfhd_mrc_note_type}\t'
|
|
494
|
+
f"Note type not found in FOLIO.",
|
|
495
|
+
self.task_configuration.mfhd_mrc_note_type,
|
|
496
|
+
) from ee
|
|
497
|
+
folio_holding["notes"] = folio_holding.get("notes", []) + [
|
|
498
|
+
{
|
|
499
|
+
"note": chunk.decode("utf-8"),
|
|
500
|
+
"holdingsNoteTypeId": holdings_note_type_id,
|
|
501
|
+
"staffOnly": True,
|
|
502
|
+
} for chunk in self.split_mrc_by_max_note_size(marc_record.as_marc())
|
|
503
|
+
]
|
|
170
504
|
|
|
171
505
|
def wrap_up(self):
|
|
172
506
|
logging.info("Mapper wrapping up")
|
|
173
|
-
|
|
174
|
-
self.
|
|
507
|
+
source_file_create_source_records = [
|
|
508
|
+
x.create_source_records for x in self.task_configuration.files
|
|
509
|
+
]
|
|
510
|
+
if all(source_file_create_source_records):
|
|
511
|
+
create_source_records = self.create_source_records
|
|
175
512
|
else:
|
|
176
|
-
logging.info(
|
|
513
|
+
logging.info(
|
|
514
|
+
"If all source files have create_source_records set to false, "
|
|
515
|
+
"this will override the task configuration setting"
|
|
516
|
+
)
|
|
517
|
+
create_source_records = any(source_file_create_source_records)
|
|
518
|
+
if self.task_configuration.update_hrid_settings:
|
|
519
|
+
if create_source_records:
|
|
520
|
+
logging.info("Storing HRID settings")
|
|
521
|
+
self.hrid_handler.store_hrid_settings()
|
|
522
|
+
else:
|
|
523
|
+
logging.info("NOT storing HRID settings since that is managed by FOLIO")
|
|
524
|
+
|
|
525
|
+
def fetch_holdings_schema(self, folio_client: FolioClient):
|
|
526
|
+
logging.info("Fetching HoldingsRecord schema...")
|
|
527
|
+
return folio_client.get_holdings_schema()
|
|
177
528
|
|
|
178
|
-
def set_holdings_type(self, marc_record: Record, folio_holding,
|
|
529
|
+
def set_holdings_type(self, marc_record: Record, folio_holding: Dict, legacy_ids: List[str]):
|
|
179
530
|
# Holdings type mapping
|
|
180
531
|
ldr06 = marc_record.leader[6]
|
|
181
532
|
# TODO: map this better
|
|
182
533
|
# type = type_map.get(ldr06, "Unknown")
|
|
183
534
|
if folio_holding.get("holdingsTypeId", ""):
|
|
184
535
|
self.migration_report.add(
|
|
185
|
-
|
|
186
|
-
|
|
536
|
+
"HoldingsTypeMapping",
|
|
537
|
+
i18n.t(
|
|
538
|
+
"Already set to %{value}. %{leader_key} was %{leader}",
|
|
539
|
+
value=folio_holding.get("holdingsTypeId"),
|
|
540
|
+
leader_key="LDR[06]",
|
|
541
|
+
leader=ldr06,
|
|
542
|
+
),
|
|
187
543
|
)
|
|
188
544
|
else:
|
|
189
|
-
|
|
190
|
-
"u": "Unknown",
|
|
191
|
-
"v": "Multi-part monograph",
|
|
192
|
-
"x": "Monograph",
|
|
193
|
-
"y": "Serial",
|
|
194
|
-
}
|
|
195
|
-
holdings_type = holdings_type_map.get(ldr06, "")
|
|
545
|
+
holdings_type = self.conditions.holdings_type_map.get(ldr06, "")
|
|
196
546
|
if t := self.conditions.get_ref_data_tuple_by_name(
|
|
197
547
|
self.conditions.holdings_types, "hold_types", holdings_type
|
|
198
548
|
):
|
|
199
549
|
folio_holding["holdingsTypeId"] = t[0]
|
|
200
550
|
self.migration_report.add(
|
|
201
|
-
|
|
551
|
+
"HoldingsTypeMapping",
|
|
202
552
|
f"{ldr06} -> {holdings_type} -> {t[1]} ({t[0]}",
|
|
203
553
|
)
|
|
204
554
|
if holdings_type == "Unknown":
|
|
205
555
|
Helper.log_data_issue(
|
|
206
|
-
|
|
556
|
+
legacy_ids,
|
|
207
557
|
(
|
|
208
|
-
|
|
209
|
-
"Check if this is correct"
|
|
558
|
+
i18n.t("blurbs.HoldingsTypeMapping.title") + " is 'unknown'. "
|
|
559
|
+
"(leader 06 is set to 'u') Check if this is correct"
|
|
210
560
|
),
|
|
211
561
|
ldr06,
|
|
212
562
|
)
|
|
@@ -218,23 +568,104 @@ class RulesMapperHoldings(RulesMapperBase):
|
|
|
218
568
|
)
|
|
219
569
|
folio_holding["holdingsTypeId"] = self.fallback_holdings_type_id
|
|
220
570
|
self.migration_report.add(
|
|
221
|
-
|
|
222
|
-
|
|
571
|
+
"HoldingsTypeMapping",
|
|
572
|
+
i18n.t("An Unmapped")
|
|
573
|
+
+ f" {ldr06} -> {holdings_type} -> "
|
|
574
|
+
+ i18n.t("Unmapped"),
|
|
223
575
|
)
|
|
224
576
|
Helper.log_data_issue(
|
|
225
|
-
|
|
226
|
-
(
|
|
577
|
+
legacy_ids,
|
|
578
|
+
(
|
|
579
|
+
i18n.t("blurbs.HoldingsTypeMapping.title", locale="en")
|
|
580
|
+
+ ". leader 06 was unmapped."
|
|
581
|
+
),
|
|
227
582
|
ldr06,
|
|
228
583
|
)
|
|
229
584
|
|
|
230
|
-
def set_default_call_number_type_if_empty(self, folio_holding):
|
|
585
|
+
def set_default_call_number_type_if_empty(self, folio_holding: Dict):
|
|
231
586
|
if not folio_holding.get("callNumberTypeId", ""):
|
|
232
|
-
folio_holding[
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
587
|
+
folio_holding["callNumberTypeId"] = self.conditions.default_call_number_type["id"]
|
|
588
|
+
|
|
589
|
+
def get_legacy_ids(self, marc_record: Record, idx: int) -> List[str]:
|
|
590
|
+
marc_path = self.task_configuration.legacy_id_marc_path
|
|
591
|
+
split = marc_path.split("$", maxsplit=1)
|
|
592
|
+
results = []
|
|
593
|
+
if not (split[0].isnumeric() and len(split[0]) == 3):
|
|
594
|
+
raise TransformationProcessError(
|
|
595
|
+
"",
|
|
596
|
+
(
|
|
597
|
+
"the marc field used for determining the legacy id is not numeric "
|
|
598
|
+
"or does not have the stipulated lenght of 3."
|
|
599
|
+
"Make sure the task configuration setting for 'legacyIdMarcPath' "
|
|
600
|
+
"is correct or make this piece of code more allowing"
|
|
601
|
+
),
|
|
602
|
+
marc_path,
|
|
603
|
+
)
|
|
604
|
+
elif len(split) == 1:
|
|
605
|
+
results.append(marc_record[split[0]].value())
|
|
606
|
+
elif len(split) == 2 and len(split[1]) == 1:
|
|
607
|
+
for field in marc_record.get_fields(split[0]):
|
|
608
|
+
if sf := field.get_subfields(split[1]):
|
|
609
|
+
results.append(sf[0])
|
|
610
|
+
else:
|
|
611
|
+
raise TransformationProcessError(
|
|
612
|
+
"",
|
|
613
|
+
("Something is wrong with 'legacyIdMarcPath' property in the settings"),
|
|
614
|
+
marc_path,
|
|
615
|
+
)
|
|
616
|
+
if not any(results):
|
|
617
|
+
raise TransformationRecordFailedError(
|
|
618
|
+
idx, f"No legacy id found in record from {marc_path}", ""
|
|
619
|
+
)
|
|
620
|
+
return results
|
|
621
|
+
|
|
622
|
+
def verity_boundwith_map_entry(self, entry: Dict):
|
|
623
|
+
if "MFHD_ID" not in entry or not entry.get("MFHD_ID", ""):
|
|
624
|
+
raise TransformationProcessError(
|
|
625
|
+
"", "Column MFHD_ID missing from Boundwith relationship map", ""
|
|
626
|
+
)
|
|
627
|
+
if "BIB_ID" not in entry or not entry.get("BIB_ID", ""):
|
|
628
|
+
raise TransformationProcessError(
|
|
629
|
+
"", "Column BIB_ID missing from Boundwith relationship map", ""
|
|
630
|
+
)
|
|
631
|
+
|
|
632
|
+
def setup_boundwith_relationship_map(self, boundwith_relationship_map_list: List[Dict]):
|
|
633
|
+
"""
|
|
634
|
+
Creates a map of MFHD_ID to BIB_ID for boundwith relationships.
|
|
635
|
+
|
|
636
|
+
Arguments:
|
|
637
|
+
boundwith_relationship_map: A list of dictionaries containing the MFHD_ID and BIB_ID.
|
|
638
|
+
|
|
639
|
+
Returns:
|
|
640
|
+
A dictionary mapping MFHD_ID to a list of BIB_IDs.
|
|
641
|
+
|
|
642
|
+
Raises:
|
|
643
|
+
TransformationProcessError: If MFHD_ID or BIB_ID is missing from the entry or if the instance_uuid is not in the parent_id_map.
|
|
644
|
+
TransformationRecordFailedError: If BIB_ID is not in the instance id map.
|
|
645
|
+
"""
|
|
646
|
+
new_map = {}
|
|
647
|
+
for idx, entry in enumerate(boundwith_relationship_map_list):
|
|
648
|
+
self.verity_boundwith_map_entry(entry)
|
|
649
|
+
mfhd_uuid = str(
|
|
650
|
+
FolioUUID(
|
|
651
|
+
self.base_string_for_folio_uuid,
|
|
652
|
+
FOLIONamespaces.holdings,
|
|
653
|
+
entry["MFHD_ID"],
|
|
654
|
+
)
|
|
655
|
+
)
|
|
656
|
+
try:
|
|
657
|
+
parent_id_tuple = self.get_bw_instance_id_map_tuple(entry)
|
|
658
|
+
new_map[mfhd_uuid] = new_map.get(mfhd_uuid, []) + [parent_id_tuple[1]]
|
|
659
|
+
except TransformationRecordFailedError as trfe:
|
|
660
|
+
self.handle_transformation_record_failed_error(idx, trfe)
|
|
661
|
+
return new_map
|
|
662
|
+
|
|
663
|
+
def get_bw_instance_id_map_tuple(self, entry: Dict):
|
|
664
|
+
try:
|
|
665
|
+
return self.parent_id_map[entry["BIB_ID"]]
|
|
666
|
+
except KeyError:
|
|
667
|
+
raise TransformationRecordFailedError(
|
|
668
|
+
entry["MFHD_ID"],
|
|
669
|
+
"Boundwith relationship map contains a BIB_ID id not in the instance id map. No boundwith holdings created for this BIB_ID.",
|
|
670
|
+
entry["BIB_ID"],
|
|
671
|
+
)
|