folio-migration-tools 1.2.1__py3-none-any.whl → 1.9.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- folio_migration_tools/__init__.py +11 -0
- folio_migration_tools/__main__.py +169 -85
- folio_migration_tools/circulation_helper.py +96 -59
- folio_migration_tools/config_file_load.py +66 -0
- folio_migration_tools/custom_dict.py +6 -4
- folio_migration_tools/custom_exceptions.py +21 -19
- folio_migration_tools/extradata_writer.py +46 -0
- folio_migration_tools/folder_structure.py +63 -66
- folio_migration_tools/helper.py +29 -21
- folio_migration_tools/holdings_helper.py +57 -34
- folio_migration_tools/i18n_config.py +9 -0
- folio_migration_tools/library_configuration.py +173 -13
- folio_migration_tools/mapper_base.py +317 -106
- folio_migration_tools/mapping_file_transformation/courses_mapper.py +203 -0
- folio_migration_tools/mapping_file_transformation/holdings_mapper.py +83 -69
- folio_migration_tools/mapping_file_transformation/item_mapper.py +98 -94
- folio_migration_tools/mapping_file_transformation/manual_fee_fines_mapper.py +352 -0
- folio_migration_tools/mapping_file_transformation/mapping_file_mapper_base.py +702 -223
- folio_migration_tools/mapping_file_transformation/notes_mapper.py +90 -0
- folio_migration_tools/mapping_file_transformation/order_mapper.py +492 -0
- folio_migration_tools/mapping_file_transformation/organization_mapper.py +389 -0
- folio_migration_tools/mapping_file_transformation/ref_data_mapping.py +38 -27
- folio_migration_tools/mapping_file_transformation/user_mapper.py +149 -361
- folio_migration_tools/marc_rules_transformation/conditions.py +650 -246
- folio_migration_tools/marc_rules_transformation/holdings_statementsparser.py +292 -130
- folio_migration_tools/marc_rules_transformation/hrid_handler.py +244 -0
- folio_migration_tools/marc_rules_transformation/loc_language_codes.xml +20846 -0
- folio_migration_tools/marc_rules_transformation/marc_file_processor.py +300 -0
- folio_migration_tools/marc_rules_transformation/marc_reader_wrapper.py +136 -0
- folio_migration_tools/marc_rules_transformation/rules_mapper_authorities.py +241 -0
- folio_migration_tools/marc_rules_transformation/rules_mapper_base.py +681 -201
- folio_migration_tools/marc_rules_transformation/rules_mapper_bibs.py +395 -429
- folio_migration_tools/marc_rules_transformation/rules_mapper_holdings.py +531 -100
- folio_migration_tools/migration_report.py +85 -38
- folio_migration_tools/migration_tasks/__init__.py +1 -3
- folio_migration_tools/migration_tasks/authority_transformer.py +119 -0
- folio_migration_tools/migration_tasks/batch_poster.py +911 -198
- folio_migration_tools/migration_tasks/bibs_transformer.py +121 -116
- folio_migration_tools/migration_tasks/courses_migrator.py +192 -0
- folio_migration_tools/migration_tasks/holdings_csv_transformer.py +252 -247
- folio_migration_tools/migration_tasks/holdings_marc_transformer.py +321 -115
- folio_migration_tools/migration_tasks/items_transformer.py +264 -84
- folio_migration_tools/migration_tasks/loans_migrator.py +506 -195
- folio_migration_tools/migration_tasks/manual_fee_fines_transformer.py +187 -0
- folio_migration_tools/migration_tasks/migration_task_base.py +364 -74
- folio_migration_tools/migration_tasks/orders_transformer.py +373 -0
- folio_migration_tools/migration_tasks/organization_transformer.py +451 -0
- folio_migration_tools/migration_tasks/requests_migrator.py +130 -62
- folio_migration_tools/migration_tasks/reserves_migrator.py +253 -0
- folio_migration_tools/migration_tasks/user_transformer.py +180 -139
- folio_migration_tools/task_configuration.py +46 -0
- folio_migration_tools/test_infrastructure/__init__.py +0 -0
- folio_migration_tools/test_infrastructure/mocked_classes.py +406 -0
- folio_migration_tools/transaction_migration/legacy_loan.py +148 -34
- folio_migration_tools/transaction_migration/legacy_request.py +65 -25
- folio_migration_tools/transaction_migration/legacy_reserve.py +47 -0
- folio_migration_tools/transaction_migration/transaction_result.py +12 -1
- folio_migration_tools/translations/en.json +476 -0
- folio_migration_tools-1.9.10.dist-info/METADATA +169 -0
- folio_migration_tools-1.9.10.dist-info/RECORD +67 -0
- {folio_migration_tools-1.2.1.dist-info → folio_migration_tools-1.9.10.dist-info}/WHEEL +1 -2
- folio_migration_tools-1.9.10.dist-info/entry_points.txt +3 -0
- folio_migration_tools/generate_schemas.py +0 -46
- folio_migration_tools/mapping_file_transformation/mapping_file_mapping_base_impl.py +0 -44
- folio_migration_tools/mapping_file_transformation/user_mapper_base.py +0 -212
- folio_migration_tools/marc_rules_transformation/bibs_processor.py +0 -163
- folio_migration_tools/marc_rules_transformation/holdings_processor.py +0 -284
- folio_migration_tools/report_blurbs.py +0 -219
- folio_migration_tools/transaction_migration/legacy_fee_fine.py +0 -36
- folio_migration_tools-1.2.1.dist-info/METADATA +0 -134
- folio_migration_tools-1.2.1.dist-info/RECORD +0 -50
- folio_migration_tools-1.2.1.dist-info/top_level.txt +0 -1
- {folio_migration_tools-1.2.1.dist-info → folio_migration_tools-1.9.10.dist-info/licenses}/LICENSE +0 -0
|
@@ -1,13 +1,18 @@
|
|
|
1
1
|
import csv
|
|
2
|
+
import itertools
|
|
2
3
|
import json
|
|
3
4
|
import logging
|
|
4
|
-
|
|
5
|
+
import re
|
|
6
|
+
import uuid
|
|
7
|
+
from functools import reduce
|
|
5
8
|
from pathlib import Path
|
|
9
|
+
from typing import Dict, List, Set
|
|
6
10
|
from uuid import UUID
|
|
7
|
-
import uuid
|
|
8
11
|
|
|
12
|
+
import i18n
|
|
9
13
|
from folio_uuid.folio_uuid import FOLIONamespaces, FolioUUID
|
|
10
14
|
from folioclient import FolioClient
|
|
15
|
+
|
|
11
16
|
from folio_migration_tools.custom_exceptions import (
|
|
12
17
|
TransformationFieldMappingError,
|
|
13
18
|
TransformationProcessError,
|
|
@@ -15,10 +20,8 @@ from folio_migration_tools.custom_exceptions import (
|
|
|
15
20
|
)
|
|
16
21
|
from folio_migration_tools.library_configuration import LibraryConfiguration
|
|
17
22
|
from folio_migration_tools.mapper_base import MapperBase
|
|
18
|
-
from folio_migration_tools.
|
|
19
|
-
|
|
20
|
-
)
|
|
21
|
-
from folio_migration_tools.report_blurbs import Blurbs
|
|
23
|
+
from folio_migration_tools.migration_report import MigrationReport
|
|
24
|
+
from folio_migration_tools.task_configuration import AbstractTaskConfiguration
|
|
22
25
|
|
|
23
26
|
empty_vals = ["Not mapped", None, ""]
|
|
24
27
|
|
|
@@ -32,57 +35,60 @@ class MappingFileMapperBase(MapperBase):
|
|
|
32
35
|
statistical_codes_map,
|
|
33
36
|
uuid_namespace: UUID,
|
|
34
37
|
library_configuration: LibraryConfiguration,
|
|
38
|
+
task_config: AbstractTaskConfiguration,
|
|
35
39
|
ignore_legacy_identifier=False,
|
|
36
40
|
):
|
|
37
|
-
super().__init__(library_configuration, folio_client)
|
|
41
|
+
super().__init__(library_configuration, task_config, folio_client)
|
|
38
42
|
self.uuid_namespace = uuid_namespace
|
|
39
43
|
self.ignore_legacy_identifier = ignore_legacy_identifier
|
|
40
44
|
self.schema = schema
|
|
45
|
+
self.unique_record_ids: Set[str] = set()
|
|
46
|
+
|
|
41
47
|
self.total_records = 0
|
|
42
|
-
self.folio_client = folio_client
|
|
43
|
-
self.use_map = True # Legacy
|
|
44
48
|
self.record_map = record_map
|
|
45
|
-
self.ref_data_dicts = {}
|
|
49
|
+
self.ref_data_dicts: Dict = {}
|
|
46
50
|
self.empty_vals = empty_vals
|
|
47
51
|
self.folio_keys = self.get_mapped_folio_properties_from_map(self.record_map)
|
|
48
52
|
self.field_map = self.setup_field_map(ignore_legacy_identifier)
|
|
49
53
|
self.validate_map()
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
+
try:
|
|
55
|
+
self.mapped_from_values = {
|
|
56
|
+
k["folio_field"]: k["value"]
|
|
57
|
+
for k in self.record_map["data"]
|
|
58
|
+
if k["value"] not in [None, ""] and k["folio_field"] != "legacyIdentifier"
|
|
59
|
+
}
|
|
60
|
+
except KeyError as ke:
|
|
61
|
+
raise TransformationProcessError(
|
|
62
|
+
"",
|
|
63
|
+
"Property missing from one of the settings in the record mapping file",
|
|
64
|
+
f"Property name: {ke}",
|
|
65
|
+
) from ke
|
|
66
|
+
|
|
54
67
|
logging.info(
|
|
55
68
|
"Mapped values:\n%s",
|
|
56
69
|
json.dumps(self.mapped_from_values, indent=4, sort_keys=True),
|
|
57
70
|
)
|
|
58
71
|
legacy_fields = set()
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
"/statistical-codes",
|
|
63
|
-
"statisticalCodes",
|
|
64
|
-
statistical_codes_map,
|
|
65
|
-
"code",
|
|
66
|
-
Blurbs.StatisticalCodeMapping,
|
|
67
|
-
)
|
|
68
|
-
logging.info("Statistical codes mapping set up")
|
|
69
|
-
else:
|
|
70
|
-
self.statistical_codes_mapping = None
|
|
71
|
-
logging.info("Statistical codes map is not set up")
|
|
72
|
-
self.mapped_from_legacy_data = {}
|
|
72
|
+
self.setup_statistical_codes_map(statistical_codes_map)
|
|
73
|
+
self.legacy_record_mappings: dict = {}
|
|
74
|
+
self.mapped_from_legacy_data: dict = {}
|
|
73
75
|
for k in self.record_map["data"]:
|
|
74
76
|
if (
|
|
75
77
|
k["legacy_field"] not in self.empty_vals
|
|
76
78
|
# or k["folio_field"] != "legacyIdentifier"
|
|
77
79
|
or k["value"] not in self.empty_vals
|
|
78
80
|
):
|
|
81
|
+
clean_folio_field = re.sub(r"\[\d+\]", "", k["folio_field"])
|
|
82
|
+
self.legacy_record_mappings[k["folio_field"]] = list(
|
|
83
|
+
self.get_map_entries_by_folio_prop_name(
|
|
84
|
+
clean_folio_field, self.record_map["data"]
|
|
85
|
+
)
|
|
86
|
+
)
|
|
79
87
|
legacy_fields.add(k["legacy_field"])
|
|
80
88
|
if not self.mapped_from_legacy_data.get(k["folio_field"]):
|
|
81
|
-
self.mapped_from_legacy_data[k["folio_field"]] =
|
|
82
|
-
|
|
83
|
-
self.mapped_from_legacy_data[k["folio_field"]].
|
|
84
|
-
k["legacy_field"]
|
|
85
|
-
)
|
|
89
|
+
self.mapped_from_legacy_data[k["folio_field"]] = [k["legacy_field"]]
|
|
90
|
+
elif k["legacy_field"] not in self.mapped_from_legacy_data[k["folio_field"]]:
|
|
91
|
+
self.mapped_from_legacy_data[k["folio_field"]].append(k["legacy_field"])
|
|
86
92
|
|
|
87
93
|
logging.info(
|
|
88
94
|
"Mapped legacy fields:\n%s",
|
|
@@ -97,6 +103,15 @@ class MappingFileMapperBase(MapperBase):
|
|
|
97
103
|
def setup_field_map(self, ignore_legacy_identifier):
|
|
98
104
|
field_map = {} # Map of folio_fields and source fields as an array
|
|
99
105
|
for k in self.record_map["data"]:
|
|
106
|
+
if "folio_field" not in k:
|
|
107
|
+
raise TransformationProcessError(
|
|
108
|
+
"", "Missing folio_field key in mapping", json.dumps(k)
|
|
109
|
+
)
|
|
110
|
+
if "legacy_field" not in k:
|
|
111
|
+
raise TransformationProcessError(
|
|
112
|
+
"", "Missing legacy_field key in mapping", json.dumps(k)
|
|
113
|
+
)
|
|
114
|
+
|
|
100
115
|
if not field_map.get(k["folio_field"]):
|
|
101
116
|
field_map[k["folio_field"]] = [k["legacy_field"]]
|
|
102
117
|
else:
|
|
@@ -109,10 +124,10 @@ class MappingFileMapperBase(MapperBase):
|
|
|
109
124
|
)
|
|
110
125
|
if not ignore_legacy_identifier:
|
|
111
126
|
try:
|
|
112
|
-
self.
|
|
127
|
+
self.legacy_id_property_names = field_map["legacyIdentifier"]
|
|
113
128
|
logging.info(
|
|
114
129
|
"Legacy identifier will be mapped from %s",
|
|
115
|
-
self.
|
|
130
|
+
",".join(self.legacy_id_property_names),
|
|
116
131
|
)
|
|
117
132
|
except Exception as exception:
|
|
118
133
|
raise TransformationProcessError(
|
|
@@ -137,6 +152,7 @@ class MappingFileMapperBase(MapperBase):
|
|
|
137
152
|
k["legacy_field"] not in empty_vals
|
|
138
153
|
# and k["folio_field"] != "legacyIdentifier"
|
|
139
154
|
or k.get("value", "") not in empty_vals
|
|
155
|
+
or isinstance(k.get("value", ""), bool)
|
|
140
156
|
)
|
|
141
157
|
]
|
|
142
158
|
|
|
@@ -149,39 +165,66 @@ class MappingFileMapperBase(MapperBase):
|
|
|
149
165
|
]
|
|
150
166
|
|
|
151
167
|
def instantiate_record(
|
|
152
|
-
self,
|
|
168
|
+
self,
|
|
169
|
+
legacy_object: dict,
|
|
170
|
+
index_or_id,
|
|
171
|
+
object_type: FOLIONamespaces,
|
|
172
|
+
accept_duplicate_ids: bool = False,
|
|
153
173
|
):
|
|
154
|
-
|
|
174
|
+
folio_object = {}
|
|
155
175
|
if self.ignore_legacy_identifier:
|
|
156
|
-
|
|
176
|
+
folio_object.update(
|
|
177
|
+
{
|
|
178
|
+
"id": str(uuid.uuid4()),
|
|
179
|
+
"type": "object",
|
|
180
|
+
}
|
|
181
|
+
)
|
|
182
|
+
return folio_object, index_or_id
|
|
157
183
|
|
|
158
|
-
|
|
159
|
-
|
|
184
|
+
if not (
|
|
185
|
+
legacy_id := " ".join(
|
|
186
|
+
legacy_object.get(li, "") for li in self.legacy_id_property_names
|
|
187
|
+
).strip()
|
|
188
|
+
):
|
|
160
189
|
raise TransformationRecordFailedError(
|
|
161
190
|
index_or_id,
|
|
162
191
|
"Could not get a value from legacy object from the property "
|
|
163
|
-
f"{self.
|
|
192
|
+
f"{self.legacy_id_property_names}. Check mapping and data",
|
|
164
193
|
)
|
|
165
|
-
|
|
194
|
+
generated_id = str(
|
|
195
|
+
FolioUUID(
|
|
196
|
+
self.base_string_for_folio_uuid,
|
|
197
|
+
object_type,
|
|
198
|
+
legacy_id,
|
|
199
|
+
)
|
|
200
|
+
)
|
|
201
|
+
if generated_id in self.unique_record_ids and not accept_duplicate_ids:
|
|
202
|
+
raise TransformationRecordFailedError(
|
|
203
|
+
index_or_id,
|
|
204
|
+
"Legacy id already generated.",
|
|
205
|
+
f"UUID: {generated_id}, seed: {legacy_id}",
|
|
206
|
+
)
|
|
207
|
+
else:
|
|
208
|
+
self.unique_record_ids.add(generated_id)
|
|
209
|
+
folio_object.update(
|
|
166
210
|
{
|
|
167
|
-
"id":
|
|
168
|
-
FolioUUID(
|
|
169
|
-
self.folio_client.okapi_url,
|
|
170
|
-
object_type,
|
|
171
|
-
legacy_id,
|
|
172
|
-
)
|
|
173
|
-
),
|
|
174
|
-
"metadata": self.folio_client.get_metadata_construct(),
|
|
211
|
+
"id": generated_id,
|
|
175
212
|
"type": "object",
|
|
176
|
-
}
|
|
177
|
-
legacy_id,
|
|
213
|
+
}
|
|
178
214
|
)
|
|
215
|
+
if object_type == FOLIONamespaces.holdings and hasattr(self, "holdings_sources"):
|
|
216
|
+
folio_object['sourceId'] = self.holdings_sources.get("FOLIO")
|
|
217
|
+
elif object_type == FOLIONamespaces.holdings and not hasattr(self, "holdings_sources"):
|
|
218
|
+
raise TransformationProcessError(
|
|
219
|
+
index_or_id,
|
|
220
|
+
"Holdings source not set in the mapper",
|
|
221
|
+
None
|
|
222
|
+
)
|
|
223
|
+
return folio_object, legacy_id
|
|
179
224
|
|
|
180
|
-
def
|
|
181
|
-
self, legacy_item: dict, folio_prop_name: str, index_or_id
|
|
182
|
-
):
|
|
225
|
+
def get_statistical_code(self, legacy_item: dict, folio_prop_name: str, index_or_id):
|
|
183
226
|
if self.statistical_codes_mapping:
|
|
184
|
-
return self.
|
|
227
|
+
return self.get_mapped_ref_data_value(
|
|
185
228
|
self.statistical_codes_mapping,
|
|
186
229
|
legacy_item,
|
|
187
230
|
index_or_id,
|
|
@@ -189,33 +232,68 @@ class MappingFileMapperBase(MapperBase):
|
|
|
189
232
|
True,
|
|
190
233
|
)
|
|
191
234
|
self.migration_report.add(
|
|
192
|
-
|
|
193
|
-
"Mapping not setup",
|
|
235
|
+
"StatisticalCodeMapping",
|
|
236
|
+
i18n.t("Mapping not setup"),
|
|
194
237
|
)
|
|
195
238
|
return ""
|
|
196
239
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
240
|
+
def get_prop(self, legacy_object, folio_prop_name, index_or_id, schema_default_value):
|
|
241
|
+
legacy_item_keys = self.mapped_from_legacy_data.get(folio_prop_name, [])
|
|
242
|
+
map_entries = list(
|
|
243
|
+
MappingFileMapperBase.get_map_entries_by_folio_prop_name(
|
|
244
|
+
folio_prop_name, self.record_map["data"]
|
|
245
|
+
)
|
|
201
246
|
)
|
|
247
|
+
if not any(map_entries):
|
|
248
|
+
return ""
|
|
249
|
+
elif len(map_entries) > 1:
|
|
250
|
+
self.migration_report.add(
|
|
251
|
+
"Details", i18n.t("%{props} were concatenated", props=legacy_item_keys)
|
|
252
|
+
)
|
|
253
|
+
return " ".join(
|
|
254
|
+
MappingFileMapperBase.get_legacy_value(
|
|
255
|
+
legacy_object,
|
|
256
|
+
map_entry,
|
|
257
|
+
self.migration_report,
|
|
258
|
+
index_or_id,
|
|
259
|
+
self.library_configuration.multi_field_delimiter,
|
|
260
|
+
)
|
|
261
|
+
for map_entry in map_entries
|
|
262
|
+
).strip()
|
|
263
|
+
else:
|
|
264
|
+
legacy_value = MappingFileMapperBase.get_legacy_value(
|
|
265
|
+
legacy_object,
|
|
266
|
+
map_entries[0],
|
|
267
|
+
self.migration_report,
|
|
268
|
+
index_or_id,
|
|
269
|
+
self.library_configuration.multi_field_delimiter,
|
|
270
|
+
)
|
|
271
|
+
if legacy_value or isinstance(legacy_value, bool):
|
|
272
|
+
return legacy_value
|
|
273
|
+
else:
|
|
274
|
+
self.migration_report.add(
|
|
275
|
+
"FolioDefaultValuesAdded",
|
|
276
|
+
i18n.t(
|
|
277
|
+
"%{schema_value} added to %{prop_name}",
|
|
278
|
+
schema_value=schema_default_value,
|
|
279
|
+
prop_name=folio_prop_name,
|
|
280
|
+
),
|
|
281
|
+
)
|
|
282
|
+
return schema_default_value
|
|
202
283
|
|
|
203
284
|
def do_map(
|
|
204
|
-
self,
|
|
285
|
+
self,
|
|
286
|
+
legacy_object,
|
|
287
|
+
index_or_id: str,
|
|
288
|
+
object_type: FOLIONamespaces,
|
|
289
|
+
accept_duplicate_ids=False,
|
|
205
290
|
) -> tuple[dict, str]:
|
|
206
|
-
|
|
207
291
|
folio_object, legacy_id = self.instantiate_record(
|
|
208
|
-
legacy_object, index_or_id, object_type
|
|
292
|
+
legacy_object, index_or_id, object_type, accept_duplicate_ids
|
|
209
293
|
)
|
|
210
|
-
for
|
|
294
|
+
for property_name, property in self.schema["properties"].items():
|
|
211
295
|
try:
|
|
212
|
-
self.
|
|
213
|
-
property_name_level1,
|
|
214
|
-
property_level1,
|
|
215
|
-
folio_object,
|
|
216
|
-
legacy_id,
|
|
217
|
-
legacy_object,
|
|
218
|
-
)
|
|
296
|
+
self.map_property(property_name, property, folio_object, legacy_id, legacy_object)
|
|
219
297
|
except TransformationFieldMappingError as data_error:
|
|
220
298
|
self.handle_transformation_field_mapping_error(legacy_id, data_error)
|
|
221
299
|
clean_folio_object = self.validate_required_properties(
|
|
@@ -223,119 +301,202 @@ class MappingFileMapperBase(MapperBase):
|
|
|
223
301
|
)
|
|
224
302
|
return (clean_folio_object, legacy_id)
|
|
225
303
|
|
|
226
|
-
def
|
|
227
|
-
self,
|
|
228
|
-
property_name_level1,
|
|
229
|
-
property_level1,
|
|
230
|
-
folio_object,
|
|
231
|
-
index_or_id,
|
|
232
|
-
legacy_object,
|
|
304
|
+
def map_property(
|
|
305
|
+
self, schema_property_name: str, schema_property, folio_object, index_or_id, legacy_object
|
|
233
306
|
):
|
|
234
|
-
if
|
|
235
|
-
property_name_level1, property_level1
|
|
236
|
-
):
|
|
307
|
+
if skip_property(schema_property_name, schema_property):
|
|
237
308
|
pass
|
|
238
|
-
elif
|
|
239
|
-
if "properties" in
|
|
309
|
+
elif schema_property.get("type", "") == "object":
|
|
310
|
+
if "properties" in schema_property:
|
|
240
311
|
self.map_object_props(
|
|
241
312
|
legacy_object,
|
|
242
|
-
|
|
243
|
-
|
|
313
|
+
schema_property_name,
|
|
314
|
+
schema_property,
|
|
244
315
|
folio_object,
|
|
245
316
|
index_or_id,
|
|
317
|
+
1,
|
|
246
318
|
)
|
|
247
|
-
elif
|
|
319
|
+
elif schema_property.get("type", "") == "array":
|
|
248
320
|
try:
|
|
249
|
-
if
|
|
321
|
+
if schema_property["items"].get("type", "") == "object":
|
|
250
322
|
self.map_objects_array_props(
|
|
251
323
|
legacy_object,
|
|
252
|
-
|
|
253
|
-
|
|
324
|
+
schema_property_name,
|
|
325
|
+
schema_property["items"]["properties"],
|
|
254
326
|
folio_object,
|
|
255
327
|
index_or_id,
|
|
256
|
-
|
|
328
|
+
schema_property["items"].get("required", []),
|
|
257
329
|
)
|
|
258
|
-
|
|
330
|
+
self.validate_object_items_in_array(
|
|
331
|
+
folio_object, schema_property_name, schema_property
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
elif schema_property["items"].get("type", "") in ["string", "number", "integer"]:
|
|
259
335
|
self.map_string_array_props(
|
|
260
336
|
legacy_object,
|
|
261
|
-
|
|
337
|
+
schema_property_name,
|
|
262
338
|
folio_object,
|
|
263
339
|
index_or_id,
|
|
264
340
|
)
|
|
265
341
|
else:
|
|
266
|
-
logging.info("Edge case %s",
|
|
342
|
+
logging.info("Edge case %s", schema_property_name)
|
|
267
343
|
|
|
268
344
|
except KeyError as schema_anomaly:
|
|
269
345
|
logging.error(
|
|
270
346
|
"Cannot create property '%s'. Unsupported schema format: %s",
|
|
271
|
-
|
|
347
|
+
schema_property_name,
|
|
272
348
|
schema_anomaly,
|
|
273
349
|
)
|
|
274
350
|
|
|
275
351
|
else: # Basic property
|
|
276
352
|
self.map_basic_props(
|
|
277
|
-
legacy_object,
|
|
353
|
+
legacy_object, schema_property_name, folio_object, index_or_id, schema_property
|
|
278
354
|
)
|
|
279
355
|
|
|
356
|
+
@staticmethod
|
|
357
|
+
def get_legacy_value(
|
|
358
|
+
legacy_object: dict,
|
|
359
|
+
mapping_file_entry: dict,
|
|
360
|
+
migration_report: MigrationReport,
|
|
361
|
+
index_or_id: str = "",
|
|
362
|
+
multi_field_delimiter="",
|
|
363
|
+
):
|
|
364
|
+
# Mapping from value fields has preceedence and does not get involved in post processing
|
|
365
|
+
if mapping_file_entry.get("value", "") or isinstance(
|
|
366
|
+
mapping_file_entry.get("value", ""), bool
|
|
367
|
+
):
|
|
368
|
+
value_mapped_value = mapping_file_entry.get("value")
|
|
369
|
+
migration_report.add(
|
|
370
|
+
"DefaultValuesAdded",
|
|
371
|
+
i18n.t(
|
|
372
|
+
"%{value} added to %{entry}",
|
|
373
|
+
value=value_mapped_value,
|
|
374
|
+
entry=mapping_file_entry.get("folio_field", ""),
|
|
375
|
+
),
|
|
376
|
+
)
|
|
377
|
+
return value_mapped_value
|
|
378
|
+
|
|
379
|
+
# Value mapped from the Legacy field(s)
|
|
380
|
+
value = legacy_object.get(mapping_file_entry["legacy_field"], "")
|
|
381
|
+
|
|
382
|
+
if value and mapping_file_entry.get("rules", {}).get("replaceValues", {}):
|
|
383
|
+
if multi_field_delimiter and multi_field_delimiter in value:
|
|
384
|
+
replaced_split_values = [
|
|
385
|
+
mapping_file_entry["rules"]["replaceValues"].get(sv, "")
|
|
386
|
+
for sv in value.split(multi_field_delimiter)
|
|
387
|
+
]
|
|
388
|
+
replaced_val = multi_field_delimiter.join(replaced_split_values)
|
|
389
|
+
else:
|
|
390
|
+
replaced_val = mapping_file_entry["rules"]["replaceValues"].get(value, "")
|
|
391
|
+
|
|
392
|
+
if replaced_val or isinstance(replaced_val, bool):
|
|
393
|
+
migration_report.add(
|
|
394
|
+
"FieldMappingDetails",
|
|
395
|
+
(
|
|
396
|
+
f"Replaced {value} in {mapping_file_entry['legacy_field']} "
|
|
397
|
+
f"with {replaced_val}"
|
|
398
|
+
),
|
|
399
|
+
)
|
|
400
|
+
value = replaced_val
|
|
401
|
+
if value and mapping_file_entry.get("rules", {}).get("regexGetFirstMatchOrEmpty", ""):
|
|
402
|
+
my_pattern = (
|
|
403
|
+
f'{mapping_file_entry.get("rules", {}).get("regexGetFirstMatchOrEmpty")}|$'
|
|
404
|
+
)
|
|
405
|
+
value = re.findall(my_pattern, value)[0]
|
|
406
|
+
if not value and mapping_file_entry.get("fallback_legacy_field", ""):
|
|
407
|
+
migration_report.add(
|
|
408
|
+
"FieldMappingDetails",
|
|
409
|
+
(
|
|
410
|
+
f"Added fallback value from {mapping_file_entry['fallback_legacy_field']} "
|
|
411
|
+
f"instead of {mapping_file_entry['legacy_field']}"
|
|
412
|
+
),
|
|
413
|
+
)
|
|
414
|
+
value = legacy_object.get(
|
|
415
|
+
mapping_file_entry.get("fallback_legacy_field", ""), ""
|
|
416
|
+
).strip()
|
|
417
|
+
if not value and mapping_file_entry.get("fallback_value", ""):
|
|
418
|
+
migration_report.add(
|
|
419
|
+
"FieldMappingDetails",
|
|
420
|
+
(
|
|
421
|
+
f"Added fallback value {mapping_file_entry['fallback_value']} "
|
|
422
|
+
f"instead of empty {mapping_file_entry['legacy_field']}"
|
|
423
|
+
),
|
|
424
|
+
)
|
|
425
|
+
value = mapping_file_entry.get("fallback_value", "")
|
|
426
|
+
return value
|
|
427
|
+
|
|
280
428
|
@staticmethod
|
|
281
429
|
def get_legacy_vals(legacy_item, legacy_item_keys):
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
if
|
|
286
|
-
|
|
430
|
+
result_list = []
|
|
431
|
+
for legacy_item_key in legacy_item_keys:
|
|
432
|
+
val = legacy_item.get(legacy_item_key, "")
|
|
433
|
+
if val not in ["", None]:
|
|
434
|
+
result_list.append(val)
|
|
435
|
+
return result_list
|
|
287
436
|
|
|
288
437
|
def map_object_props(
|
|
289
438
|
self,
|
|
290
439
|
legacy_object,
|
|
291
|
-
|
|
292
|
-
|
|
440
|
+
schema_property_name: str,
|
|
441
|
+
schema_property,
|
|
293
442
|
folio_object,
|
|
294
443
|
index_or_id,
|
|
444
|
+
level: int,
|
|
295
445
|
):
|
|
296
|
-
temp_object = {}
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
"properties"
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
elif
|
|
309
|
-
"""
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
446
|
+
temp_object: dict = {}
|
|
447
|
+
for child_property_name, child_property in schema_property["properties"].items():
|
|
448
|
+
sub_prop_path = f"{schema_property_name}.{child_property_name}"
|
|
449
|
+
if "properties" in child_property:
|
|
450
|
+
self.map_object_props(
|
|
451
|
+
legacy_object,
|
|
452
|
+
sub_prop_path,
|
|
453
|
+
child_property,
|
|
454
|
+
folio_object,
|
|
455
|
+
index_or_id,
|
|
456
|
+
level + 1,
|
|
457
|
+
)
|
|
458
|
+
elif (
|
|
459
|
+
child_property.get("type", "") == "array"
|
|
460
|
+
and child_property.get("items", {}).get("type", "") == "object"
|
|
461
|
+
and child_property.get("items", {}).get("properties", "")
|
|
462
|
+
):
|
|
463
|
+
self.map_objects_array_props(
|
|
464
|
+
legacy_object,
|
|
465
|
+
f"{schema_property_name}.{child_property_name}",
|
|
466
|
+
child_property["items"]["properties"],
|
|
467
|
+
folio_object,
|
|
468
|
+
index_or_id,
|
|
469
|
+
[],
|
|
470
|
+
)
|
|
471
|
+
self.validate_object_items_in_array(
|
|
472
|
+
legacy_object,
|
|
473
|
+
child_property_name,
|
|
474
|
+
child_property["items"]["properties"],
|
|
475
|
+
)
|
|
476
|
+
|
|
477
|
+
elif child_property.get("type", "") == "array" and child_property.get("items", {}).get(
|
|
478
|
+
"type", ""
|
|
479
|
+
) in ["string", "number", "integer"]:
|
|
480
|
+
self.map_string_array_props(
|
|
481
|
+
legacy_object,
|
|
482
|
+
f"{schema_property_name}.{child_property_name}",
|
|
483
|
+
folio_object,
|
|
484
|
+
index_or_id,
|
|
485
|
+
)
|
|
486
|
+
elif child_property.get("type", "") in ["string", "number", "integer"]:
|
|
487
|
+
path = sub_prop_path.split("].")[-1]
|
|
488
|
+
if p := self.get_prop(
|
|
489
|
+
legacy_object, sub_prop_path, index_or_id, child_property.get("default", "")
|
|
490
|
+
):
|
|
491
|
+
set_deep(folio_object, f"{path}", p)
|
|
492
|
+
# temp_object[child_property_name] = p
|
|
493
|
+
elif p := self.get_prop(
|
|
494
|
+
legacy_object, sub_prop_path, index_or_id, child_property.get("default", "")
|
|
495
|
+
):
|
|
496
|
+
set_deep(folio_object, sub_prop_path, p)
|
|
337
497
|
if temp_object:
|
|
338
|
-
folio_object
|
|
498
|
+
set_deep(folio_object, schema_property_name, temp_object)
|
|
499
|
+
# folio_object[schema_property_name] = temp_object
|
|
339
500
|
|
|
340
501
|
def map_objects_array_props(
|
|
341
502
|
self,
|
|
@@ -347,75 +508,233 @@ class MappingFileMapperBase(MapperBase):
|
|
|
347
508
|
required: list[str],
|
|
348
509
|
):
|
|
349
510
|
resulting_array = []
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
511
|
+
i = 0
|
|
512
|
+
while True:
|
|
513
|
+
keys_to_map = {
|
|
514
|
+
k.rsplit(".", 1)[0] for k in self.folio_keys if k.startswith(f"{prop_name}[{i}")
|
|
515
|
+
}
|
|
516
|
+
if not any(keys_to_map):
|
|
517
|
+
break
|
|
518
|
+
for _ in keys_to_map:
|
|
519
|
+
temp_object = {}
|
|
520
|
+
multi_field_props: List[str] = []
|
|
521
|
+
for sub_prop_name, sub_prop in (
|
|
522
|
+
(k, p)
|
|
523
|
+
for k, p in sub_properties.items()
|
|
524
|
+
if not p.get("folio:isVirtual", False)
|
|
525
|
+
):
|
|
526
|
+
prop_path = f"{prop_name}[{i}].{sub_prop_name}"
|
|
527
|
+
if prop_path in self.folio_keys:
|
|
528
|
+
# We have reached the end of the prop path?
|
|
529
|
+
res = self.get_prop(
|
|
530
|
+
legacy_object,
|
|
531
|
+
prop_path,
|
|
532
|
+
index_or_id,
|
|
533
|
+
sub_properties[sub_prop_name].get("default", ""),
|
|
534
|
+
)
|
|
535
|
+
self.report_legacy_mapping(
|
|
536
|
+
self.legacy_basic_property(prop_path), True, True
|
|
537
|
+
)
|
|
538
|
+
|
|
539
|
+
if (
|
|
540
|
+
isinstance(res, str)
|
|
541
|
+
and self.library_configuration.multi_field_delimiter in res
|
|
542
|
+
):
|
|
543
|
+
for delim_value in res.split(
|
|
544
|
+
self.library_configuration.multi_field_delimiter
|
|
545
|
+
):
|
|
546
|
+
if delim_value not in empty_vals:
|
|
547
|
+
self.validate_enums(
|
|
548
|
+
delim_value,
|
|
549
|
+
sub_prop,
|
|
550
|
+
sub_prop_name,
|
|
551
|
+
index_or_id,
|
|
552
|
+
required,
|
|
553
|
+
)
|
|
554
|
+
multi_field_props.append(sub_prop_name)
|
|
555
|
+
else:
|
|
556
|
+
self.validate_enums(res, sub_prop, sub_prop_name, index_or_id, required)
|
|
557
|
+
|
|
558
|
+
if res or isinstance(res, bool):
|
|
559
|
+
temp_object[sub_prop_name] = res
|
|
560
|
+
|
|
561
|
+
elif (
|
|
562
|
+
sub_prop_name in sub_properties
|
|
563
|
+
and sub_properties[sub_prop_name].get("type", "") == "array"
|
|
564
|
+
and sub_properties[sub_prop_name]["items"].get("type", "") == "object"
|
|
565
|
+
):
|
|
566
|
+
self.map_objects_array_props(
|
|
567
|
+
legacy_object,
|
|
568
|
+
prop_path,
|
|
569
|
+
sub_properties[sub_prop_name]["items"]["properties"],
|
|
570
|
+
folio_object,
|
|
571
|
+
index_or_id,
|
|
572
|
+
[],
|
|
573
|
+
)
|
|
574
|
+
elif (
|
|
575
|
+
sub_prop_name in sub_properties
|
|
576
|
+
and sub_properties[sub_prop_name].get("type", "") == "array"
|
|
577
|
+
and sub_properties[sub_prop_name]["items"].get("type", "")
|
|
578
|
+
in ["string", "number", "integer"]
|
|
579
|
+
):
|
|
580
|
+
# We have not reached the end of the prop path
|
|
581
|
+
for array_path in [p for p in self.folio_keys if p.startswith(prop_path)]:
|
|
582
|
+
res = self.get_prop(
|
|
583
|
+
legacy_object,
|
|
584
|
+
array_path,
|
|
585
|
+
index_or_id,
|
|
586
|
+
sub_properties[sub_prop_name].get("default", ""),
|
|
587
|
+
)
|
|
588
|
+
self.validate_enums(
|
|
589
|
+
res, sub_prop, sub_prop_name, index_or_id, required
|
|
590
|
+
)
|
|
591
|
+
if res or isinstance(res, bool):
|
|
592
|
+
self.add_values_to_string_array(
|
|
593
|
+
sub_prop_name,
|
|
594
|
+
temp_object,
|
|
595
|
+
res,
|
|
596
|
+
self.library_configuration.multi_field_delimiter,
|
|
597
|
+
)
|
|
598
|
+
|
|
599
|
+
elif sub_prop.get("type", "") == "object" and "properties" in sub_prop:
|
|
600
|
+
self.map_object_props(
|
|
601
|
+
legacy_object, prop_path, sub_prop, temp_object, index_or_id, 0
|
|
602
|
+
)
|
|
603
|
+
i = i + 1
|
|
604
|
+
|
|
605
|
+
if any(multi_field_props):
|
|
606
|
+
resulting_array.extend(
|
|
607
|
+
self.split_obj_by_delim(
|
|
608
|
+
self.library_configuration.multi_field_delimiter,
|
|
609
|
+
temp_object,
|
|
610
|
+
multi_field_props,
|
|
365
611
|
)
|
|
366
|
-
temp_object[prop] = res
|
|
367
|
-
|
|
368
|
-
if temp_object != {} and all(
|
|
369
|
-
(
|
|
370
|
-
v or (isinstance(v, bool))
|
|
371
|
-
for k, v in temp_object.items()
|
|
372
|
-
if k in required
|
|
373
612
|
)
|
|
374
|
-
|
|
613
|
+
else:
|
|
375
614
|
resulting_array.append(temp_object)
|
|
615
|
+
|
|
376
616
|
if any(resulting_array):
|
|
377
|
-
folio_object
|
|
617
|
+
set_deep2(folio_object, prop_name, resulting_array)
|
|
618
|
+
|
|
619
|
+
@staticmethod
|
|
620
|
+
def split_obj_by_delim(delimiter: str, folio_obj: dict, delimited_props: List[str]):
|
|
621
|
+
non_split_props = [(k, v) for k, v in folio_obj.items() if k not in delimited_props]
|
|
622
|
+
delimited_props = map(lambda x: [x, *folio_obj[x].split(delimiter)], delimited_props)
|
|
623
|
+
zipped = list(zip(*delimited_props))
|
|
624
|
+
res = []
|
|
625
|
+
for (prop_name_idx, prop_name), (value_idx, ra) in itertools.product(
|
|
626
|
+
enumerate(zipped[0]), enumerate(zipped[1:])
|
|
627
|
+
):
|
|
628
|
+
if prop_name_idx == 0:
|
|
629
|
+
res.append({prop_name: ra[prop_name_idx]})
|
|
630
|
+
else:
|
|
631
|
+
res[value_idx][prop_name] = ra[prop_name_idx]
|
|
632
|
+
for r in res:
|
|
633
|
+
r.update(non_split_props)
|
|
634
|
+
return res
|
|
378
635
|
|
|
379
636
|
def map_string_array_props(self, legacy_object, prop, folio_object, index_or_id):
|
|
380
637
|
keys_to_map = [k for k in self.folio_keys if k.startswith(prop)]
|
|
381
638
|
for prop_name in keys_to_map:
|
|
382
|
-
if prop_name in self.folio_keys and self.has_property(
|
|
383
|
-
legacy_object, prop_name
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
folio_object[prop] = [mapped_prop]
|
|
392
|
-
self.report_legacy_mapping(
|
|
393
|
-
self.legacy_basic_property(prop_name), True, True
|
|
394
|
-
)
|
|
639
|
+
if prop_name in self.folio_keys and self.has_property(legacy_object, prop_name):
|
|
640
|
+
if mapped_prop := self.get_prop(legacy_object, prop_name, index_or_id, ""):
|
|
641
|
+
self.add_values_to_string_array(
|
|
642
|
+
prop,
|
|
643
|
+
folio_object,
|
|
644
|
+
mapped_prop,
|
|
645
|
+
self.library_configuration.multi_field_delimiter,
|
|
646
|
+
)
|
|
647
|
+
self.report_legacy_mapping(self.legacy_basic_property(prop_name), True, True)
|
|
395
648
|
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
649
|
+
@staticmethod
|
|
650
|
+
def add_values_to_string_array(prop, folio_object, mapped_prop_value, delimiter: str):
|
|
651
|
+
if in_deep(folio_object, prop) and mapped_prop_value not in get_deep(
|
|
652
|
+
folio_object, prop, []
|
|
653
|
+
):
|
|
654
|
+
if isinstance(mapped_prop_value, str) and delimiter in mapped_prop_value:
|
|
655
|
+
old_prop = get_deep(folio_object, prop)
|
|
656
|
+
set_deep(folio_object, prop, old_prop.extend(mapped_prop_value.split(delimiter)))
|
|
401
657
|
|
|
402
|
-
|
|
658
|
+
else:
|
|
659
|
+
old_prop = get_deep(folio_object, prop)
|
|
660
|
+
added_prop = old_prop.append(mapped_prop_value)
|
|
661
|
+
set_deep(folio_object, prop, [added_prop])
|
|
662
|
+
elif isinstance(mapped_prop_value, str) and delimiter in mapped_prop_value:
|
|
663
|
+
set_deep(folio_object, prop, mapped_prop_value.split(delimiter))
|
|
664
|
+
else:
|
|
665
|
+
# No values in array previously
|
|
666
|
+
set_deep(folio_object, prop, [mapped_prop_value])
|
|
667
|
+
|
|
668
|
+
def map_basic_props(
|
|
669
|
+
self, legacy_object, property_name, folio_object, index_or_id, schema_property
|
|
670
|
+
):
|
|
671
|
+
if self.has_basic_property(legacy_object, property_name): # is there a match in the csv?
|
|
672
|
+
mapped_prop = self.get_prop(
|
|
673
|
+
legacy_object, property_name, index_or_id, schema_property.get("default", "")
|
|
674
|
+
)
|
|
675
|
+
if mapped_prop or isinstance(mapped_prop, bool):
|
|
676
|
+
self.validate_enums(
|
|
677
|
+
mapped_prop,
|
|
678
|
+
schema_property,
|
|
679
|
+
property_name,
|
|
680
|
+
index_or_id,
|
|
681
|
+
self.schema.get("required", []),
|
|
682
|
+
)
|
|
683
|
+
folio_object[property_name] = mapped_prop
|
|
684
|
+
self.report_legacy_mapping(self.legacy_basic_property(property_name), True, True)
|
|
685
|
+
|
|
686
|
+
@staticmethod
|
|
687
|
+
def _get_delimited_file_reader(source_file, file_name: Path):
|
|
688
|
+
"""
|
|
689
|
+
First, let's count:
|
|
690
|
+
* The total number of rows in the source file
|
|
691
|
+
* The total number of empty rows in the source file
|
|
692
|
+
|
|
693
|
+
Then, we'll return those counts and a csv.DictReader
|
|
694
|
+
|
|
695
|
+
Args:
|
|
696
|
+
source_file (_type_): _description_
|
|
697
|
+
file_name (Path): _description_
|
|
698
|
+
|
|
699
|
+
Returns:
|
|
700
|
+
(int, int, DictReader): total rows, empty rows, dict reader
|
|
701
|
+
"""
|
|
702
|
+
empty_rows = 0
|
|
703
|
+
total_rows = -1 # Do not count header row
|
|
704
|
+
if str(file_name).endswith("tsv"):
|
|
705
|
+
delimiter = "\t"
|
|
706
|
+
else:
|
|
707
|
+
delimiter = ","
|
|
708
|
+
for line in source_file:
|
|
709
|
+
if not "".join(line.strip().split(delimiter)): # check for empty rows
|
|
710
|
+
empty_rows += 1
|
|
711
|
+
total_rows += 1
|
|
712
|
+
source_file.seek(0) # Set file position back to start
|
|
403
713
|
if str(file_name).endswith("tsv"):
|
|
404
|
-
|
|
714
|
+
dict_reader = csv.DictReader(source_file, dialect="tsv")
|
|
405
715
|
else:
|
|
406
|
-
|
|
407
|
-
|
|
716
|
+
dict_reader = csv.DictReader(source_file)
|
|
717
|
+
return total_rows, empty_rows, dict_reader
|
|
718
|
+
|
|
719
|
+
def get_objects(self, source_file, file_name: Path):
|
|
720
|
+
total_rows, empty_rows, reader = self._get_delimited_file_reader(source_file, file_name)
|
|
721
|
+
logging.info("Source data file contains %d rows", total_rows)
|
|
722
|
+
logging.info("Source data file contains %d empty rows", empty_rows)
|
|
723
|
+
self.migration_report.set(
|
|
724
|
+
"GeneralStatistics", "Number of rows in {}".format(file_name.name), total_rows
|
|
725
|
+
)
|
|
726
|
+
self.migration_report.set(
|
|
727
|
+
"GeneralStatistics",
|
|
728
|
+
"Number of empty rows in {}".format(file_name.name),
|
|
729
|
+
empty_rows,
|
|
730
|
+
)
|
|
408
731
|
try:
|
|
409
|
-
|
|
410
|
-
yield row
|
|
732
|
+
yield from reader
|
|
411
733
|
except Exception as exception:
|
|
412
|
-
logging.error("%s at row %s", exception,
|
|
734
|
+
logging.error("%s at row %s", exception, reader.line_num)
|
|
413
735
|
raise exception from exception
|
|
414
736
|
|
|
415
737
|
def has_property(self, legacy_object, folio_prop_name: str):
|
|
416
|
-
if not self.use_map:
|
|
417
|
-
return folio_prop_name in legacy_object
|
|
418
|
-
|
|
419
738
|
legacy_keys = self.field_map.get(folio_prop_name, [])
|
|
420
739
|
return (
|
|
421
740
|
any(legacy_keys)
|
|
@@ -424,29 +743,33 @@ class MappingFileMapperBase(MapperBase):
|
|
|
424
743
|
)
|
|
425
744
|
|
|
426
745
|
def has_basic_property(self, legacy_object, folio_prop_name):
|
|
427
|
-
if not self.use_map:
|
|
428
|
-
return folio_prop_name in legacy_object
|
|
429
|
-
|
|
430
746
|
if folio_prop_name not in self.folio_keys:
|
|
431
747
|
return False
|
|
432
|
-
|
|
748
|
+
if folio_prop_name in self.mapped_from_values:
|
|
749
|
+
return True
|
|
750
|
+
legacy_mappings = self.legacy_record_mappings.get(folio_prop_name, [])
|
|
751
|
+
|
|
752
|
+
return any(legacy_mappings) and any(
|
|
753
|
+
legacy_mapping not in empty_vals for legacy_mapping in legacy_mappings
|
|
754
|
+
)
|
|
755
|
+
|
|
756
|
+
@staticmethod
|
|
757
|
+
def get_map_entries_by_folio_prop_name(folio_prop_name, data):
|
|
433
758
|
return (
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
759
|
+
k
|
|
760
|
+
for k in data
|
|
761
|
+
if k["folio_field"] == folio_prop_name
|
|
762
|
+
and any(
|
|
763
|
+
is_set_or_bool_or_numeric(k.get(key, ""))
|
|
764
|
+
for key in ("value", "legacy_field", "fallback_legacy_field", "fallback_value")
|
|
765
|
+
)
|
|
437
766
|
)
|
|
438
767
|
|
|
439
768
|
def legacy_basic_property(self, folio_prop):
|
|
440
|
-
if not self.use_map:
|
|
441
|
-
return folio_prop
|
|
442
769
|
if folio_prop not in self.folio_keys:
|
|
443
770
|
return ""
|
|
444
771
|
return next(
|
|
445
|
-
(
|
|
446
|
-
k["legacy_field"]
|
|
447
|
-
for k in self.record_map["data"]
|
|
448
|
-
if k["folio_field"] == folio_prop
|
|
449
|
-
),
|
|
772
|
+
(k["legacy_field"] for k in self.record_map["data"] if k["folio_field"] == folio_prop),
|
|
450
773
|
"",
|
|
451
774
|
)
|
|
452
775
|
|
|
@@ -460,13 +783,11 @@ class MappingFileMapperBase(MapperBase):
|
|
|
460
783
|
if any(missing_keys_in_record):
|
|
461
784
|
raise TransformationProcessError(
|
|
462
785
|
"",
|
|
463
|
-
(
|
|
464
|
-
"There are mapped legacy fields that are not in the legacy record"
|
|
465
|
-
),
|
|
786
|
+
("There are mapped legacy fields that are not in the legacy record"),
|
|
466
787
|
missing_keys_in_record,
|
|
467
788
|
)
|
|
468
789
|
else:
|
|
469
|
-
logging.info("All
|
|
790
|
+
logging.info("All mapped legacy fields are in the legacy object")
|
|
470
791
|
|
|
471
792
|
def get_ref_data_tuple_by_code(self, ref_data, ref_name, code):
|
|
472
793
|
return self.get_ref_data_tuple(ref_data, ref_name, code, "code")
|
|
@@ -476,22 +797,180 @@ class MappingFileMapperBase(MapperBase):
|
|
|
476
797
|
|
|
477
798
|
def get_ref_data_tuple(self, ref_data, ref_name, key_value, key_type):
|
|
478
799
|
dict_key = f"{ref_name}{key_type}"
|
|
479
|
-
if ref_object := self.ref_data_dicts.get(dict_key, {}).get(
|
|
480
|
-
key_value.lower().strip(), ()
|
|
481
|
-
):
|
|
800
|
+
if ref_object := self.ref_data_dicts.get(dict_key, {}).get(key_value.lower().strip(), ()):
|
|
482
801
|
return ref_object
|
|
483
802
|
d = {r[key_type].lower(): (r["id"], r["name"]) for r in ref_data}
|
|
484
803
|
self.ref_data_dicts[dict_key] = d
|
|
485
804
|
return self.ref_data_dicts.get(dict_key, {}).get(key_value.lower().strip(), ())
|
|
486
805
|
|
|
806
|
+
def validate_enums(
|
|
807
|
+
self,
|
|
808
|
+
mapped_value,
|
|
809
|
+
mapped_schema_property,
|
|
810
|
+
mapped_schema_property_name,
|
|
811
|
+
index_or_id,
|
|
812
|
+
required,
|
|
813
|
+
):
|
|
814
|
+
if (
|
|
815
|
+
(
|
|
816
|
+
"enum" in mapped_schema_property
|
|
817
|
+
and mapped_value
|
|
818
|
+
and mapped_value not in mapped_schema_property["enum"]
|
|
819
|
+
)
|
|
820
|
+
or (
|
|
821
|
+
"enum" in mapped_schema_property
|
|
822
|
+
and mapped_schema_property_name in required
|
|
823
|
+
and not mapped_value
|
|
824
|
+
)
|
|
825
|
+
or (
|
|
826
|
+
mapped_schema_property.get("items", {}).get("enum")
|
|
827
|
+
and mapped_value
|
|
828
|
+
and mapped_value not in mapped_schema_property.get("items", {}).get("enum")
|
|
829
|
+
)
|
|
830
|
+
):
|
|
831
|
+
raise TransformationRecordFailedError(
|
|
832
|
+
index_or_id,
|
|
833
|
+
f"Allowed values for {mapped_schema_property_name} "
|
|
834
|
+
f"are {mapped_schema_property['enum']} "
|
|
835
|
+
f"Forbidden enum value found: ",
|
|
836
|
+
mapped_value,
|
|
837
|
+
)
|
|
838
|
+
|
|
839
|
+
def is_uuid(self, value):
|
|
840
|
+
"""Returns True if the value is a UUID, and False if it is not.
|
|
841
|
+
|
|
842
|
+
Args:
|
|
843
|
+
value (_type_): a value that may or may not be a UUID
|
|
487
844
|
|
|
488
|
-
|
|
845
|
+
Returns:
|
|
846
|
+
bool: True/False
|
|
847
|
+
"""
|
|
848
|
+
try:
|
|
849
|
+
uuid.UUID(str(value))
|
|
850
|
+
except ValueError:
|
|
851
|
+
return False
|
|
852
|
+
return True
|
|
853
|
+
|
|
854
|
+
def validate_object_items_in_array(self, folio_object, schema_property_name, schema_property):
|
|
855
|
+
valid_array_objects = []
|
|
856
|
+
for item in folio_object.get(schema_property_name, []):
|
|
857
|
+
if all(
|
|
858
|
+
item.get(r) or (isinstance(item.get(r), bool))
|
|
859
|
+
for r in schema_property["items"].get("required", [])
|
|
860
|
+
):
|
|
861
|
+
valid_array_objects.append(item)
|
|
862
|
+
else:
|
|
863
|
+
self.migration_report.add(
|
|
864
|
+
"IncompleteSubPropertyRemoved",
|
|
865
|
+
f"{schema_property_name}",
|
|
866
|
+
)
|
|
867
|
+
if valid_array_objects:
|
|
868
|
+
folio_object[schema_property_name] = valid_array_objects
|
|
869
|
+
else:
|
|
870
|
+
folio_object.pop(schema_property_name, [])
|
|
871
|
+
|
|
872
|
+
|
|
873
|
+
def skip_property(property_name, property):
|
|
489
874
|
return bool(
|
|
490
|
-
|
|
491
|
-
or
|
|
492
|
-
or
|
|
875
|
+
property_name in ["metadata", "id", "lastCheckIn"]
|
|
876
|
+
or property_name.startswith("effective")
|
|
877
|
+
or property.get("folio:isVirtual", False)
|
|
878
|
+
or property.get("description", "") == "Deprecated"
|
|
493
879
|
)
|
|
494
880
|
|
|
495
881
|
|
|
496
882
|
def weird_division(number, divisor):
|
|
497
883
|
return number / divisor if divisor else 0
|
|
884
|
+
|
|
885
|
+
|
|
886
|
+
def set_deep(dictionary, key, value):
|
|
887
|
+
"""sets a nested property in a dict given a dot notated address
|
|
888
|
+
|
|
889
|
+
Args:
|
|
890
|
+
dictionary (_type_): a python dictionary ({"a":{"b":{"c":"value"}}})
|
|
891
|
+
key (_type_): A string of dot notated address (a.b.c)
|
|
892
|
+
value (_type_): the value to set
|
|
893
|
+
|
|
894
|
+
"""
|
|
895
|
+
dd = dictionary
|
|
896
|
+
keys = key.split(".")
|
|
897
|
+
latest = keys.pop()
|
|
898
|
+
for k in keys:
|
|
899
|
+
dd = dd.setdefault(k, {})
|
|
900
|
+
dd.setdefault(latest, value)
|
|
901
|
+
|
|
902
|
+
|
|
903
|
+
def set_deep2(dictionary, key, value):
|
|
904
|
+
"""sets a nested property in a dict given a dot notated address
|
|
905
|
+
|
|
906
|
+
Args:
|
|
907
|
+
dictionary (_type_): a python dictionary ({"a":{"b":{"c":"value"}}})
|
|
908
|
+
key (_type_): A string of dot notated address (a.b.c)
|
|
909
|
+
value (_type_): the value to set
|
|
910
|
+
|
|
911
|
+
"""
|
|
912
|
+
dd = dictionary
|
|
913
|
+
keys = key.split(".")
|
|
914
|
+
latest = keys.pop()
|
|
915
|
+
name = ""
|
|
916
|
+
number = 0
|
|
917
|
+
for k in keys:
|
|
918
|
+
if k == keys[0] and k.endswith("]"):
|
|
919
|
+
m = re.search(r"\[([\d]+)\]", k)
|
|
920
|
+
number = int(m[1])
|
|
921
|
+
name = k.split("[")[0]
|
|
922
|
+
dd = dd.setdefault(name, [{}])
|
|
923
|
+
else:
|
|
924
|
+
dd = dd.setdefault(k, {})
|
|
925
|
+
if name and keys and keys[0].startswith(name):
|
|
926
|
+
if len(dd) <= number:
|
|
927
|
+
dd.append({})
|
|
928
|
+
dd[number][latest] = value
|
|
929
|
+
elif latest in dd:
|
|
930
|
+
for i in range(len(value)):
|
|
931
|
+
if len(dd[latest]) > i and dd[latest][i] and isinstance(dd[latest][i], dict):
|
|
932
|
+
dd[latest][i].update(value[i])
|
|
933
|
+
else:
|
|
934
|
+
dd[latest].insert(i, value[i])
|
|
935
|
+
|
|
936
|
+
else:
|
|
937
|
+
dd[latest] = value
|
|
938
|
+
|
|
939
|
+
|
|
940
|
+
def get_deep(dictionary, keys, default=None):
|
|
941
|
+
"""returns a nested property in a dict given a dot notated address
|
|
942
|
+
|
|
943
|
+
Args:
|
|
944
|
+
dictionary (_type_): a python dictionary ({"a":{"b":{"c":"value"}}})
|
|
945
|
+
keys (_type_): A string of dot notated address (a.b.c)
|
|
946
|
+
default (_type_): Default value to return
|
|
947
|
+
|
|
948
|
+
Returns:
|
|
949
|
+
_type_: the value/property of the dict
|
|
950
|
+
"""
|
|
951
|
+
return reduce(
|
|
952
|
+
lambda d, key: d.get(key, default) if isinstance(d, dict) else default,
|
|
953
|
+
keys.split("."),
|
|
954
|
+
dictionary,
|
|
955
|
+
)
|
|
956
|
+
|
|
957
|
+
|
|
958
|
+
def in_deep(dictionary, keys):
|
|
959
|
+
"""Checks if a property exists given a dot notated address
|
|
960
|
+
|
|
961
|
+
Args:
|
|
962
|
+
dictionary (_type_): a python dictionary ({"a":{"b":{"c":"value"}}})
|
|
963
|
+
keys (_type_): A string of dot notated address (a.b.c)
|
|
964
|
+
|
|
965
|
+
Returns:
|
|
966
|
+
_type_: a truthy value or False is there is a property in the dict
|
|
967
|
+
"""
|
|
968
|
+
return reduce(
|
|
969
|
+
lambda d, key: d.get(key, False) if isinstance(d, dict) else False,
|
|
970
|
+
keys.split("."),
|
|
971
|
+
dictionary,
|
|
972
|
+
)
|
|
973
|
+
|
|
974
|
+
|
|
975
|
+
def is_set_or_bool_or_numeric(any_value):
|
|
976
|
+
return (isinstance(any_value, str) and (any_value.strip() not in empty_vals)) or isinstance(any_value, (int, float, complex))
|