folio-migration-tools 1.2.1__py3-none-any.whl → 1.9.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- folio_migration_tools/__init__.py +11 -0
- folio_migration_tools/__main__.py +169 -85
- folio_migration_tools/circulation_helper.py +96 -59
- folio_migration_tools/config_file_load.py +66 -0
- folio_migration_tools/custom_dict.py +6 -4
- folio_migration_tools/custom_exceptions.py +21 -19
- folio_migration_tools/extradata_writer.py +46 -0
- folio_migration_tools/folder_structure.py +63 -66
- folio_migration_tools/helper.py +29 -21
- folio_migration_tools/holdings_helper.py +57 -34
- folio_migration_tools/i18n_config.py +9 -0
- folio_migration_tools/library_configuration.py +173 -13
- folio_migration_tools/mapper_base.py +317 -106
- folio_migration_tools/mapping_file_transformation/courses_mapper.py +203 -0
- folio_migration_tools/mapping_file_transformation/holdings_mapper.py +83 -69
- folio_migration_tools/mapping_file_transformation/item_mapper.py +98 -94
- folio_migration_tools/mapping_file_transformation/manual_fee_fines_mapper.py +352 -0
- folio_migration_tools/mapping_file_transformation/mapping_file_mapper_base.py +702 -223
- folio_migration_tools/mapping_file_transformation/notes_mapper.py +90 -0
- folio_migration_tools/mapping_file_transformation/order_mapper.py +492 -0
- folio_migration_tools/mapping_file_transformation/organization_mapper.py +389 -0
- folio_migration_tools/mapping_file_transformation/ref_data_mapping.py +38 -27
- folio_migration_tools/mapping_file_transformation/user_mapper.py +149 -361
- folio_migration_tools/marc_rules_transformation/conditions.py +650 -246
- folio_migration_tools/marc_rules_transformation/holdings_statementsparser.py +292 -130
- folio_migration_tools/marc_rules_transformation/hrid_handler.py +244 -0
- folio_migration_tools/marc_rules_transformation/loc_language_codes.xml +20846 -0
- folio_migration_tools/marc_rules_transformation/marc_file_processor.py +300 -0
- folio_migration_tools/marc_rules_transformation/marc_reader_wrapper.py +136 -0
- folio_migration_tools/marc_rules_transformation/rules_mapper_authorities.py +241 -0
- folio_migration_tools/marc_rules_transformation/rules_mapper_base.py +681 -201
- folio_migration_tools/marc_rules_transformation/rules_mapper_bibs.py +395 -429
- folio_migration_tools/marc_rules_transformation/rules_mapper_holdings.py +531 -100
- folio_migration_tools/migration_report.py +85 -38
- folio_migration_tools/migration_tasks/__init__.py +1 -3
- folio_migration_tools/migration_tasks/authority_transformer.py +119 -0
- folio_migration_tools/migration_tasks/batch_poster.py +911 -198
- folio_migration_tools/migration_tasks/bibs_transformer.py +121 -116
- folio_migration_tools/migration_tasks/courses_migrator.py +192 -0
- folio_migration_tools/migration_tasks/holdings_csv_transformer.py +252 -247
- folio_migration_tools/migration_tasks/holdings_marc_transformer.py +321 -115
- folio_migration_tools/migration_tasks/items_transformer.py +264 -84
- folio_migration_tools/migration_tasks/loans_migrator.py +506 -195
- folio_migration_tools/migration_tasks/manual_fee_fines_transformer.py +187 -0
- folio_migration_tools/migration_tasks/migration_task_base.py +364 -74
- folio_migration_tools/migration_tasks/orders_transformer.py +373 -0
- folio_migration_tools/migration_tasks/organization_transformer.py +451 -0
- folio_migration_tools/migration_tasks/requests_migrator.py +130 -62
- folio_migration_tools/migration_tasks/reserves_migrator.py +253 -0
- folio_migration_tools/migration_tasks/user_transformer.py +180 -139
- folio_migration_tools/task_configuration.py +46 -0
- folio_migration_tools/test_infrastructure/__init__.py +0 -0
- folio_migration_tools/test_infrastructure/mocked_classes.py +406 -0
- folio_migration_tools/transaction_migration/legacy_loan.py +148 -34
- folio_migration_tools/transaction_migration/legacy_request.py +65 -25
- folio_migration_tools/transaction_migration/legacy_reserve.py +47 -0
- folio_migration_tools/transaction_migration/transaction_result.py +12 -1
- folio_migration_tools/translations/en.json +476 -0
- folio_migration_tools-1.9.10.dist-info/METADATA +169 -0
- folio_migration_tools-1.9.10.dist-info/RECORD +67 -0
- {folio_migration_tools-1.2.1.dist-info → folio_migration_tools-1.9.10.dist-info}/WHEEL +1 -2
- folio_migration_tools-1.9.10.dist-info/entry_points.txt +3 -0
- folio_migration_tools/generate_schemas.py +0 -46
- folio_migration_tools/mapping_file_transformation/mapping_file_mapping_base_impl.py +0 -44
- folio_migration_tools/mapping_file_transformation/user_mapper_base.py +0 -212
- folio_migration_tools/marc_rules_transformation/bibs_processor.py +0 -163
- folio_migration_tools/marc_rules_transformation/holdings_processor.py +0 -284
- folio_migration_tools/report_blurbs.py +0 -219
- folio_migration_tools/transaction_migration/legacy_fee_fine.py +0 -36
- folio_migration_tools-1.2.1.dist-info/METADATA +0 -134
- folio_migration_tools-1.2.1.dist-info/RECORD +0 -50
- folio_migration_tools-1.2.1.dist-info/top_level.txt +0 -1
- {folio_migration_tools-1.2.1.dist-info → folio_migration_tools-1.9.10.dist-info/licenses}/LICENSE +0 -0
|
@@ -1,21 +1,34 @@
|
|
|
1
1
|
import csv
|
|
2
|
-
|
|
2
|
+
import io
|
|
3
|
+
import json
|
|
3
4
|
import logging
|
|
4
|
-
|
|
5
|
+
import os
|
|
5
6
|
import sys
|
|
6
7
|
import time
|
|
7
8
|
from abc import abstractmethod
|
|
8
|
-
import
|
|
9
|
+
from datetime import datetime, timezone
|
|
10
|
+
from genericpath import isfile
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Annotated, List, Optional
|
|
9
13
|
|
|
10
|
-
|
|
14
|
+
import folioclient
|
|
11
15
|
from folio_uuid.folio_namespaces import FOLIONamespaces
|
|
12
16
|
from folioclient import FolioClient
|
|
13
|
-
from
|
|
17
|
+
from pydantic import Field
|
|
18
|
+
|
|
19
|
+
from folio_migration_tools import library_configuration, task_configuration
|
|
14
20
|
from folio_migration_tools.custom_exceptions import (
|
|
15
21
|
TransformationProcessError,
|
|
16
22
|
TransformationRecordFailedError,
|
|
17
23
|
)
|
|
24
|
+
from folio_migration_tools.extradata_writer import ExtradataWriter
|
|
18
25
|
from folio_migration_tools.folder_structure import FolderStructure
|
|
26
|
+
from folio_migration_tools.marc_rules_transformation.marc_file_processor import (
|
|
27
|
+
MarcFileProcessor,
|
|
28
|
+
)
|
|
29
|
+
from folio_migration_tools.marc_rules_transformation.marc_reader_wrapper import (
|
|
30
|
+
MARCReaderWrapper,
|
|
31
|
+
)
|
|
19
32
|
|
|
20
33
|
|
|
21
34
|
class MigrationTaskBase:
|
|
@@ -27,18 +40,31 @@ class MigrationTaskBase:
|
|
|
27
40
|
def __init__(
|
|
28
41
|
self,
|
|
29
42
|
library_configuration: library_configuration.LibraryConfiguration,
|
|
30
|
-
task_configuration,
|
|
43
|
+
task_configuration: task_configuration.AbstractTaskConfiguration,
|
|
44
|
+
folio_client: folioclient.FolioClient,
|
|
31
45
|
use_logging: bool = True,
|
|
32
46
|
):
|
|
33
|
-
|
|
34
47
|
logging.info("MigrationTaskBase init")
|
|
48
|
+
self.start_datetime = datetime.now(timezone.utc)
|
|
35
49
|
self.task_configuration = task_configuration
|
|
36
|
-
self.
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
library_configuration.
|
|
40
|
-
library_configuration.okapi_password,
|
|
50
|
+
logging.info(self.task_configuration.json(indent=4))
|
|
51
|
+
self.folio_client: FolioClient = folio_client
|
|
52
|
+
self.ecs_tenant_id = (
|
|
53
|
+
task_configuration.ecs_tenant_id or library_configuration.ecs_tenant_id
|
|
41
54
|
)
|
|
55
|
+
self.ecs_tenant_header = (
|
|
56
|
+
{"x-okapi-tenant": self.ecs_tenant_id} if self.ecs_tenant_id else {}
|
|
57
|
+
)
|
|
58
|
+
self.folio_client.okapi_headers.update(self.ecs_tenant_header)
|
|
59
|
+
self.central_folder_structure: Optional[FolderStructure] = None
|
|
60
|
+
if library_configuration.is_ecs and library_configuration.ecs_central_iteration_identifier:
|
|
61
|
+
self.central_folder_structure = FolderStructure(
|
|
62
|
+
library_configuration.base_folder,
|
|
63
|
+
FOLIONamespaces.instances,
|
|
64
|
+
task_configuration.name,
|
|
65
|
+
library_configuration.ecs_central_iteration_identifier,
|
|
66
|
+
library_configuration.add_time_stamp_to_file_names,
|
|
67
|
+
)
|
|
42
68
|
self.folder_structure: FolderStructure = FolderStructure(
|
|
43
69
|
library_configuration.base_folder,
|
|
44
70
|
self.get_object_type(),
|
|
@@ -51,6 +77,8 @@ class MigrationTaskBase:
|
|
|
51
77
|
self.object_type = self.get_object_type()
|
|
52
78
|
try:
|
|
53
79
|
self.folder_structure.setup_migration_file_structure()
|
|
80
|
+
if self.central_folder_structure:
|
|
81
|
+
self.central_folder_structure.setup_migration_file_structure()
|
|
54
82
|
# Initiate Worker
|
|
55
83
|
except FileNotFoundError as fne:
|
|
56
84
|
logging.error(fne)
|
|
@@ -59,6 +87,9 @@ class MigrationTaskBase:
|
|
|
59
87
|
logging.critical("Halting...")
|
|
60
88
|
sys.exit(1)
|
|
61
89
|
self.num_exeptions: int = 0
|
|
90
|
+
self.extradata_writer = ExtradataWriter(
|
|
91
|
+
self.folder_structure.transformation_extra_data_path
|
|
92
|
+
)
|
|
62
93
|
if use_logging:
|
|
63
94
|
self.setup_logging()
|
|
64
95
|
self.folder_structure.log_folder_structure()
|
|
@@ -68,32 +99,118 @@ class MigrationTaskBase:
|
|
|
68
99
|
def wrap_up(self):
|
|
69
100
|
raise NotImplementedError()
|
|
70
101
|
|
|
102
|
+
def clean_out_empty_logs(self):
|
|
103
|
+
if (
|
|
104
|
+
self.folder_structure.data_issue_file_path.is_file()
|
|
105
|
+
and os.stat(self.folder_structure.data_issue_file_path).st_size == 0
|
|
106
|
+
):
|
|
107
|
+
logging.info("Removing data issues file since it is empty")
|
|
108
|
+
os.remove(self.folder_structure.data_issue_file_path)
|
|
109
|
+
logging.info("Removed data issues file since it was empty")
|
|
110
|
+
|
|
111
|
+
if (
|
|
112
|
+
self.folder_structure.failed_marc_recs_file.is_file()
|
|
113
|
+
and os.stat(self.folder_structure.failed_marc_recs_file).st_size == 0
|
|
114
|
+
):
|
|
115
|
+
os.remove(self.folder_structure.failed_marc_recs_file)
|
|
116
|
+
logging.info("Removed empty failed marc records file since it was empty")
|
|
117
|
+
|
|
71
118
|
@abstractmethod
|
|
72
119
|
def do_work(self):
|
|
73
120
|
raise NotImplementedError
|
|
74
121
|
|
|
75
122
|
@staticmethod
|
|
76
|
-
def
|
|
123
|
+
def check_source_files(
|
|
124
|
+
source_path: Path, file_defs: list[library_configuration.FileDefinition]
|
|
125
|
+
) -> None:
|
|
126
|
+
"""Lists the source data files. Special case since we use the Items folder for holdings
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
source_path (Path): _description_
|
|
130
|
+
file_defs (list[library_configuration.FileDefinition]): _description_
|
|
131
|
+
|
|
132
|
+
Raises:
|
|
133
|
+
TransformationProcessError: _description_
|
|
134
|
+
|
|
135
|
+
"""
|
|
136
|
+
files = [
|
|
137
|
+
source_path / f.file_name
|
|
138
|
+
for f in file_defs
|
|
139
|
+
if isfile(source_path / f.file_name)
|
|
140
|
+
]
|
|
141
|
+
ret_str = ", ".join(f.file_name for f in file_defs)
|
|
142
|
+
|
|
143
|
+
if files and len(files) < len(file_defs):
|
|
144
|
+
raise TransformationProcessError(
|
|
145
|
+
"",
|
|
146
|
+
f"Some files listed in task configuration not found in {source_path}."
|
|
147
|
+
f"Listed files: {ret_str}",
|
|
148
|
+
)
|
|
149
|
+
if not any(files):
|
|
150
|
+
raise TransformationProcessError(
|
|
151
|
+
"",
|
|
152
|
+
f"None of the files listed in task configuration found in {source_path}."
|
|
153
|
+
f"Listed files: {ret_str}",
|
|
154
|
+
)
|
|
155
|
+
logging.info("Files to process:")
|
|
156
|
+
for filename in files:
|
|
157
|
+
logging.info("\t%s", filename)
|
|
158
|
+
|
|
159
|
+
def load_instance_id_map(self, raise_if_empty=True) -> dict:
|
|
160
|
+
"""
|
|
161
|
+
This method handles loading instance id maps for holdings and other transformations that require it.
|
|
162
|
+
This is in the base class because multiple tasks need it. It exists because instances in an ECS environment
|
|
163
|
+
are transformed for the central and data tenants separately, but the data tenants need to know about
|
|
164
|
+
the central tenant instance ids. This is a bit of a hack, but it works for now.
|
|
165
|
+
"""
|
|
166
|
+
map_files = []
|
|
167
|
+
instance_id_map = {}
|
|
168
|
+
if self.library_configuration.is_ecs and self.central_folder_structure:
|
|
169
|
+
logging.info(
|
|
170
|
+
"Loading ECS central tenant instance id map from %s", self.central_folder_structure.instance_id_map_path
|
|
171
|
+
)
|
|
172
|
+
instance_id_map = self.load_id_map(
|
|
173
|
+
self.central_folder_structure.instance_id_map_path,
|
|
174
|
+
raise_if_empty=False,
|
|
175
|
+
)
|
|
176
|
+
map_files.append(str(self.central_folder_structure.instance_id_map_path))
|
|
177
|
+
logging.info(
|
|
178
|
+
"Loading member tenant isntance id map from %s",
|
|
179
|
+
self.folder_structure.instance_id_map_path
|
|
180
|
+
)
|
|
181
|
+
instance_id_map = self.load_id_map(
|
|
182
|
+
self.folder_structure.instance_id_map_path,
|
|
183
|
+
raise_if_empty=False,
|
|
184
|
+
existing_id_map=instance_id_map,
|
|
185
|
+
)
|
|
186
|
+
map_files.append(str(self.folder_structure.instance_id_map_path))
|
|
187
|
+
if not any(instance_id_map) and raise_if_empty:
|
|
188
|
+
map_file_paths = ", ".join(map_files)
|
|
189
|
+
raise TransformationProcessError("", "Instance id map is empty", map_file_paths)
|
|
190
|
+
return instance_id_map
|
|
191
|
+
|
|
192
|
+
@staticmethod
|
|
193
|
+
def load_id_map(map_path, raise_if_empty=False, existing_id_map={}):
|
|
77
194
|
if not isfile(map_path):
|
|
78
|
-
logging.
|
|
195
|
+
logging.warning(
|
|
79
196
|
"No legacy id map found at %s. Will build one from scratch", map_path
|
|
80
197
|
)
|
|
81
198
|
return {}
|
|
82
|
-
id_map =
|
|
83
|
-
loaded_rows =
|
|
199
|
+
id_map = existing_id_map
|
|
200
|
+
loaded_rows = len(id_map)
|
|
84
201
|
with open(map_path) as id_map_file:
|
|
85
202
|
for index, json_string in enumerate(id_map_file, start=1):
|
|
86
203
|
loaded_rows = index
|
|
87
204
|
# {"legacy_id", "folio_id","suppressed"}
|
|
88
|
-
|
|
89
|
-
if loaded_rows %
|
|
205
|
+
map_tuple = json.loads(json_string)
|
|
206
|
+
if loaded_rows % 500000 == 0:
|
|
90
207
|
print(
|
|
91
|
-
f"{loaded_rows + 1} ids loaded to map {
|
|
208
|
+
f"{loaded_rows + 1} ids loaded to map. Last Id: {map_tuple[0]} ",
|
|
92
209
|
end="\r",
|
|
93
210
|
)
|
|
94
211
|
|
|
95
|
-
id_map[
|
|
96
|
-
logging.info("Loaded %s migrated IDs", loaded_rows)
|
|
212
|
+
id_map[map_tuple[0]] = map_tuple
|
|
213
|
+
logging.info("Loaded %s migrated IDs from %s", loaded_rows, id_map_file.name)
|
|
97
214
|
if not any(id_map) and raise_if_empty:
|
|
98
215
|
raise TransformationProcessError("", "Legacy id map is empty", map_path)
|
|
99
216
|
return id_map
|
|
@@ -104,15 +221,6 @@ class MigrationTaskBase:
|
|
|
104
221
|
|
|
105
222
|
def setup_logging(self):
|
|
106
223
|
debug = self.library_configuration.log_level_debug
|
|
107
|
-
DATA_OUTPUT_LVL_NUM = 25
|
|
108
|
-
logging.addLevelName(DATA_OUTPUT_LVL_NUM, "DATA_OUTPUT")
|
|
109
|
-
|
|
110
|
-
def data_output(self, message, *args, **kws):
|
|
111
|
-
if self.isEnabledFor(DATA_OUTPUT_LVL_NUM):
|
|
112
|
-
# Yes, logger takes its '*args' as 'args'.
|
|
113
|
-
self._log(DATA_OUTPUT_LVL_NUM, message, args, **kws)
|
|
114
|
-
|
|
115
|
-
logging.Logger.data_output = data_output
|
|
116
224
|
|
|
117
225
|
DATA_ISSUE_LVL_NUM = 26
|
|
118
226
|
logging.addLevelName(DATA_ISSUE_LVL_NUM, "DATA_ISSUES")
|
|
@@ -123,49 +231,41 @@ class MigrationTaskBase:
|
|
|
123
231
|
self._log(DATA_ISSUE_LVL_NUM, message, args, **kws)
|
|
124
232
|
|
|
125
233
|
logging.Logger.data_issues = data_issues
|
|
126
|
-
|
|
127
234
|
logger = logging.getLogger()
|
|
235
|
+
logger.propogate = True
|
|
128
236
|
logger.handlers = []
|
|
129
237
|
formatter = logging.Formatter(
|
|
130
|
-
"%(asctime)s\t%(levelname)s\t%(message)s\t%(
|
|
238
|
+
"%(asctime)s\t%(levelname)s\t%(message)s\t%(task_configuration_name)s"
|
|
131
239
|
)
|
|
132
240
|
stream_handler = logging.StreamHandler()
|
|
133
|
-
stream_handler.addFilter(ExcludeLevelFilter(25))
|
|
134
241
|
stream_handler.addFilter(ExcludeLevelFilter(26))
|
|
135
|
-
|
|
242
|
+
stream_handler.addFilter(TaskNameFilter(self.task_configuration.name))
|
|
136
243
|
if debug:
|
|
137
244
|
logger.setLevel(logging.DEBUG)
|
|
138
245
|
stream_handler.setLevel(logging.DEBUG)
|
|
246
|
+
logging.getLogger("httpx").setLevel(logging.DEBUG)
|
|
139
247
|
else:
|
|
140
248
|
logger.setLevel(logging.INFO)
|
|
141
249
|
stream_handler.setLevel(logging.INFO)
|
|
142
250
|
stream_handler.addFilter(
|
|
143
251
|
ExcludeLevelFilter(30)
|
|
144
|
-
) #
|
|
252
|
+
) # Exclude warnings from pymarc
|
|
145
253
|
stream_handler.setFormatter(formatter)
|
|
146
254
|
logger.addHandler(stream_handler)
|
|
147
255
|
|
|
148
|
-
file_formatter = logging.Formatter(
|
|
256
|
+
file_formatter = logging.Formatter(
|
|
257
|
+
"%(asctime)s\t%(message)s\t%(task_configuration_name)s\t%(filename)s:%(lineno)d"
|
|
258
|
+
)
|
|
149
259
|
file_handler = logging.FileHandler(
|
|
150
260
|
filename=self.folder_structure.transformation_log_path, mode="w"
|
|
151
261
|
)
|
|
152
|
-
file_handler.addFilter(ExcludeLevelFilter(25))
|
|
153
262
|
file_handler.addFilter(ExcludeLevelFilter(26))
|
|
263
|
+
file_handler.addFilter(TaskNameFilter(self.task_configuration.name))
|
|
154
264
|
# file_handler.addFilter(LevelFilter(0, 20))
|
|
155
265
|
file_handler.setFormatter(file_formatter)
|
|
156
266
|
file_handler.setLevel(logging.INFO)
|
|
157
267
|
logging.getLogger().addHandler(file_handler)
|
|
158
268
|
|
|
159
|
-
# Data file formatter
|
|
160
|
-
data_file_formatter = logging.Formatter("%(message)s")
|
|
161
|
-
data_file_handler = logging.FileHandler(
|
|
162
|
-
filename=str(self.folder_structure.transformation_extra_data_path), mode="w"
|
|
163
|
-
)
|
|
164
|
-
data_file_handler.addFilter(LevelFilter(25))
|
|
165
|
-
data_file_handler.setFormatter(data_file_formatter)
|
|
166
|
-
data_file_handler.setLevel(25)
|
|
167
|
-
logging.getLogger().addHandler(data_file_handler)
|
|
168
|
-
|
|
169
269
|
# Data issue file formatter
|
|
170
270
|
data_issue_file_formatter = logging.Formatter("%(message)s")
|
|
171
271
|
data_issue_file_handler = logging.FileHandler(
|
|
@@ -180,26 +280,19 @@ class MigrationTaskBase:
|
|
|
180
280
|
def setup_records_map(self, mapping_file_path):
|
|
181
281
|
with open(mapping_file_path) as mapping_file:
|
|
182
282
|
field_map = json.load(mapping_file)
|
|
183
|
-
logging.info(
|
|
283
|
+
logging.info(
|
|
284
|
+
"%s fields present in record mapping file", len(field_map["data"])
|
|
285
|
+
)
|
|
184
286
|
mapped_fields = (
|
|
185
287
|
f
|
|
186
288
|
for f in field_map["data"]
|
|
187
289
|
if f["legacy_field"] and f["legacy_field"] != "Not mapped"
|
|
188
290
|
)
|
|
189
291
|
logging.info(
|
|
190
|
-
"%s
|
|
292
|
+
"%s fields mapped in record mapping file", len(list(mapped_fields))
|
|
191
293
|
)
|
|
192
294
|
return field_map
|
|
193
295
|
|
|
194
|
-
@staticmethod
|
|
195
|
-
def add_common_arguments(parser: PromptParser):
|
|
196
|
-
|
|
197
|
-
"""parser.add_argument("okapi_url", help="OKAPI base url")
|
|
198
|
-
parser.add_argument("tenant_id", help="id of the FOLIO tenant.")
|
|
199
|
-
parser.add_argument("username", help="the api user")
|
|
200
|
-
parser.add_argument("base_folder", help="path base folder", type=str)
|
|
201
|
-
parser.add_argument("--password", help="the api users password", secure=True)"""
|
|
202
|
-
|
|
203
296
|
def log_and_exit_if_too_many_errors(
|
|
204
297
|
self, error: TransformationRecordFailedError, idx
|
|
205
298
|
):
|
|
@@ -222,52 +315,239 @@ class MigrationTaskBase:
|
|
|
222
315
|
f"{num_processed:,} records processed. Recs/sec: {elapsed_formatted} "
|
|
223
316
|
)
|
|
224
317
|
|
|
225
|
-
def
|
|
318
|
+
def do_work_marc_transformer(
|
|
226
319
|
self,
|
|
320
|
+
):
|
|
321
|
+
logging.info("Starting....")
|
|
322
|
+
if self.folder_structure.failed_marc_recs_file.is_file():
|
|
323
|
+
os.remove(self.folder_structure.failed_marc_recs_file)
|
|
324
|
+
logging.info("Removed failed marc records file to prevent duplicating data")
|
|
325
|
+
with open(
|
|
326
|
+
self.folder_structure.created_objects_path, "w+"
|
|
327
|
+
) as created_records_file:
|
|
328
|
+
self.processor = MarcFileProcessor(
|
|
329
|
+
self.mapper, self.folder_structure, created_records_file
|
|
330
|
+
)
|
|
331
|
+
for file_def in self.task_configuration.files:
|
|
332
|
+
MARCReaderWrapper.process_single_file(
|
|
333
|
+
file_def,
|
|
334
|
+
self.processor,
|
|
335
|
+
self.folder_structure.failed_marc_recs_file,
|
|
336
|
+
self.folder_structure,
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
@staticmethod
|
|
340
|
+
def validate_ref_data_mapping_lines(lines, num_of_columns):
|
|
341
|
+
"""
|
|
342
|
+
Helper method to validate the structure of individual lines in a mapping file.
|
|
343
|
+
|
|
344
|
+
Args:
|
|
345
|
+
lines (list): List of lines in the mapping file
|
|
346
|
+
num_of_columns (int): Number of columns expected in each line
|
|
347
|
+
|
|
348
|
+
Returns:
|
|
349
|
+
tuple: A tuple containing a list of invalid lines and a list of valid lines
|
|
350
|
+
"""
|
|
351
|
+
invalid_lines = []
|
|
352
|
+
valid_lines = []
|
|
353
|
+
for idx, row in enumerate(lines, start=2):
|
|
354
|
+
if not row.strip():
|
|
355
|
+
if idx == len(lines) + 1:
|
|
356
|
+
continue
|
|
357
|
+
else:
|
|
358
|
+
invalid_lines.append(str(idx))
|
|
359
|
+
else:
|
|
360
|
+
line_length = len(row.split("\t"))
|
|
361
|
+
if line_length != num_of_columns:
|
|
362
|
+
invalid_lines.append(str(idx))
|
|
363
|
+
else:
|
|
364
|
+
valid_lines.append(str(idx))
|
|
365
|
+
return invalid_lines, valid_lines
|
|
366
|
+
|
|
367
|
+
@staticmethod
|
|
368
|
+
def verify_ref_data_mapping_file_structure(map_file: io.TextIOBase):
|
|
369
|
+
"""
|
|
370
|
+
Helper method to validate the structure of a mapping file.
|
|
371
|
+
|
|
372
|
+
Args:
|
|
373
|
+
map_file (io.TextIOBase): The mapping file to validate
|
|
374
|
+
|
|
375
|
+
Raises:
|
|
376
|
+
TransformationProcessError: If the mapping file has rows with different number of columns
|
|
377
|
+
|
|
378
|
+
Returns:
|
|
379
|
+
None
|
|
380
|
+
"""
|
|
381
|
+
current_pos = map_file.tell()
|
|
382
|
+
try:
|
|
383
|
+
map_file.seek(0)
|
|
384
|
+
num_of_columns = len(map_file.readline().split("\t"))
|
|
385
|
+
lines = map_file.readlines()
|
|
386
|
+
invalid_lines, valid_lines = MigrationTaskBase.validate_ref_data_mapping_lines(
|
|
387
|
+
lines, num_of_columns
|
|
388
|
+
)
|
|
389
|
+
if invalid_lines:
|
|
390
|
+
raise TransformationProcessError(
|
|
391
|
+
"",
|
|
392
|
+
(
|
|
393
|
+
f"Mapping file {map_file.name} has rows with different number "
|
|
394
|
+
f"of columns ({'Row' if len(invalid_lines) == 1 else 'Rows'} {', '.join(invalid_lines)})"
|
|
395
|
+
),
|
|
396
|
+
)
|
|
397
|
+
if not valid_lines:
|
|
398
|
+
raise TransformationProcessError(
|
|
399
|
+
"", f"Map has no rows: {map_file.name}"
|
|
400
|
+
)
|
|
401
|
+
finally:
|
|
402
|
+
map_file.seek(current_pos)
|
|
403
|
+
|
|
404
|
+
@staticmethod
|
|
405
|
+
def load_ref_data_mapping_file(
|
|
227
406
|
folio_property_name: str,
|
|
228
407
|
map_file_path: Path,
|
|
229
408
|
folio_keys,
|
|
230
409
|
required: bool = True,
|
|
231
410
|
):
|
|
411
|
+
"""
|
|
412
|
+
Helper method to load a reference data mapping file.
|
|
413
|
+
|
|
414
|
+
Args:
|
|
415
|
+
folio_property_name (str): The name of the property in FOLIO
|
|
416
|
+
map_file_path (Path): The path to the mapping file
|
|
417
|
+
folio_keys (list): A list of FOLIO keys
|
|
418
|
+
required (bool): Whether the property is required or not
|
|
419
|
+
"""
|
|
232
420
|
if (
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
421
|
+
(
|
|
422
|
+
folio_property_name in folio_keys
|
|
423
|
+
or required
|
|
424
|
+
or folio_property_name.startswith("statisticalCodeIds")
|
|
425
|
+
or folio_property_name.startswith("locationMap")
|
|
426
|
+
or folio_property_name.startswith("fundsMap")
|
|
427
|
+
)
|
|
428
|
+
and map_file_path.is_file()
|
|
236
429
|
):
|
|
237
430
|
try:
|
|
238
431
|
with open(map_file_path) as map_file:
|
|
432
|
+
# Validate the structure of the mapping file
|
|
433
|
+
MigrationTaskBase.verify_ref_data_mapping_file_structure(map_file)
|
|
239
434
|
ref_data_map = list(csv.DictReader(map_file, dialect="tsv"))
|
|
240
435
|
logging.info(
|
|
241
436
|
"Found %s rows in %s map",
|
|
242
437
|
len(ref_data_map),
|
|
243
438
|
folio_property_name,
|
|
244
439
|
)
|
|
440
|
+
if not any(ref_data_map[0].keys()):
|
|
441
|
+
raise TransformationProcessError(
|
|
442
|
+
"",
|
|
443
|
+
(
|
|
444
|
+
f"{folio_property_name} not mapped in legacy->folio mapping file "
|
|
445
|
+
f"({map_file_path}). Did you map this field, "
|
|
446
|
+
"but forgot to add a mapping file?"
|
|
447
|
+
),
|
|
448
|
+
)
|
|
245
449
|
logging.info(
|
|
246
|
-
"%s will be used for
|
|
247
|
-
",".join(ref_data_map[0].keys()),
|
|
450
|
+
"%s will be used for determining %s",
|
|
451
|
+
", ".join(ref_data_map[0].keys()),
|
|
248
452
|
folio_property_name,
|
|
249
453
|
)
|
|
250
454
|
return ref_data_map
|
|
251
455
|
except Exception as exception:
|
|
252
|
-
raise
|
|
253
|
-
|
|
254
|
-
(
|
|
255
|
-
f"{folio_property_name} not mapped in legacy->folio mapping file "
|
|
256
|
-
f"({map_file_path}) ({exception}). Did you map this field, "
|
|
257
|
-
"but forgot to add a mapping file?"
|
|
258
|
-
),
|
|
259
|
-
) from exception
|
|
456
|
+
raise exception
|
|
457
|
+
|
|
260
458
|
else:
|
|
261
459
|
logging.info("No mapping setup for %s", folio_property_name)
|
|
262
460
|
logging.info("%s will have default mapping if any ", folio_property_name)
|
|
263
461
|
logging.info(
|
|
264
|
-
"Add a file named %s and add the field to "
|
|
265
|
-
"the item.mapping.json file.",
|
|
462
|
+
"Add a file named %s and add the field to the field mapping json file.",
|
|
266
463
|
map_file_path,
|
|
267
464
|
)
|
|
268
465
|
return None
|
|
269
466
|
|
|
270
467
|
|
|
468
|
+
class MarcTaskConfigurationBase(task_configuration.AbstractTaskConfiguration):
|
|
469
|
+
"""
|
|
470
|
+
Base class for MARC task configurations.
|
|
471
|
+
|
|
472
|
+
Attributes:
|
|
473
|
+
files (List[library_configuration.FileDefinition]):
|
|
474
|
+
List of MARC21 files to be processed.
|
|
475
|
+
|
|
476
|
+
create_source_records (bool):
|
|
477
|
+
Controls whether or not to retain the MARC records in Source Record Storage.
|
|
478
|
+
Default is False, meaning MARC records will not be retained.
|
|
479
|
+
|
|
480
|
+
hrid_handling (library_configuration.HridHandling):
|
|
481
|
+
Determines how HRIDs are handled.
|
|
482
|
+
- 'default': FOLIO generates HRIDs and moves existing 001 fields into a 035 field, concatenated with the 003 field.
|
|
483
|
+
- 'preserve001': Keeps the 001 fields in place and uses them as HRIDs.
|
|
484
|
+
Default is 'default'.
|
|
485
|
+
|
|
486
|
+
deactivate035_from001 (bool):
|
|
487
|
+
Disables the default FOLIO functionality of moving the previous 001 field into a 035 field, prefixed with the value from 003.
|
|
488
|
+
Default is False, meaning the functionality remains active.
|
|
489
|
+
"""
|
|
490
|
+
|
|
491
|
+
files: Annotated[
|
|
492
|
+
List[library_configuration.FileDefinition],
|
|
493
|
+
Field(
|
|
494
|
+
title="Source files",
|
|
495
|
+
description=("List of MARC21 files with bibliographic records."),
|
|
496
|
+
),
|
|
497
|
+
]
|
|
498
|
+
create_source_records: Annotated[
|
|
499
|
+
bool,
|
|
500
|
+
Field(
|
|
501
|
+
title="Create source records",
|
|
502
|
+
description=(
|
|
503
|
+
"Controls whether or not to retain the MARC records in "
|
|
504
|
+
"Source Record Storage."
|
|
505
|
+
),
|
|
506
|
+
),
|
|
507
|
+
] = False
|
|
508
|
+
hrid_handling: Annotated[
|
|
509
|
+
library_configuration.HridHandling,
|
|
510
|
+
Field(
|
|
511
|
+
title="HRID Handling",
|
|
512
|
+
description=(
|
|
513
|
+
"Setting to default will make FOLIO generate HRIDs and move the existing "
|
|
514
|
+
"001:s into a 035, concatenated with the 003. Choosing preserve001 means "
|
|
515
|
+
"the 001:s will remain in place, and that they will also become the HRIDs"
|
|
516
|
+
),
|
|
517
|
+
),
|
|
518
|
+
] = library_configuration.HridHandling.default
|
|
519
|
+
deactivate035_from001: Annotated[
|
|
520
|
+
bool,
|
|
521
|
+
Field(
|
|
522
|
+
title="Create 035 from 001 and 003",
|
|
523
|
+
description=(
|
|
524
|
+
"This deactivates the FOLIO default functionality of moving the previous 001 "
|
|
525
|
+
"into a 035, prefixed with the value from 003"
|
|
526
|
+
),
|
|
527
|
+
),
|
|
528
|
+
] = False
|
|
529
|
+
statistical_codes_map_file_name: Annotated[
|
|
530
|
+
Optional[str],
|
|
531
|
+
Field(
|
|
532
|
+
title="Statistical code map file name",
|
|
533
|
+
description=(
|
|
534
|
+
"Path to the file containing the mapping of statistical codes. "
|
|
535
|
+
"The file should be in TSV format with legacy_stat_code and folio_code columns."
|
|
536
|
+
),
|
|
537
|
+
),
|
|
538
|
+
] = ""
|
|
539
|
+
statistical_code_mapping_fields: Annotated[
|
|
540
|
+
List[str],
|
|
541
|
+
Field(
|
|
542
|
+
title="Statistical code mapping fields",
|
|
543
|
+
description=(
|
|
544
|
+
"List of fields + subfields to be used for mapping statistical codes. "
|
|
545
|
+
"Subfields should be delimited by a \"$\" (eg. 907$a). Single repeating subfields "
|
|
546
|
+
"will be treated as unique values. Multiple subfields will be concatenated together with a space."
|
|
547
|
+
),
|
|
548
|
+
),
|
|
549
|
+
] = []
|
|
550
|
+
|
|
271
551
|
class ExcludeLevelFilter(logging.Filter):
|
|
272
552
|
def __init__(self, level):
|
|
273
553
|
super().__init__()
|
|
@@ -277,6 +557,16 @@ class ExcludeLevelFilter(logging.Filter):
|
|
|
277
557
|
return record.levelno != self.level
|
|
278
558
|
|
|
279
559
|
|
|
560
|
+
class TaskNameFilter(logging.Filter):
|
|
561
|
+
def __init__(self, task_configuration_name):
|
|
562
|
+
super().__init__()
|
|
563
|
+
self.task_configuration_name = task_configuration_name
|
|
564
|
+
|
|
565
|
+
def filter(self, record):
|
|
566
|
+
record.task_configuration_name = self.task_configuration_name
|
|
567
|
+
return True
|
|
568
|
+
|
|
569
|
+
|
|
280
570
|
class LevelFilter(logging.Filter):
|
|
281
571
|
def __init__(self, level):
|
|
282
572
|
super().__init__()
|