folio-migration-tools 1.10.1__py3-none-any.whl → 1.10.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- folio_migration_tools/__init__.py +10 -2
- folio_migration_tools/__main__.py +7 -0
- folio_migration_tools/circulation_helper.py +23 -8
- folio_migration_tools/colors.py +7 -0
- folio_migration_tools/config_file_load.py +7 -0
- folio_migration_tools/custom_dict.py +17 -0
- folio_migration_tools/custom_exceptions.py +40 -4
- folio_migration_tools/extradata_writer.py +12 -0
- folio_migration_tools/folder_structure.py +16 -0
- folio_migration_tools/helper.py +7 -0
- folio_migration_tools/holdings_helper.py +11 -5
- folio_migration_tools/i18n_config.py +6 -0
- folio_migration_tools/library_configuration.py +19 -5
- folio_migration_tools/mapper_base.py +15 -0
- folio_migration_tools/mapping_file_transformation/__init__.py +1 -0
- folio_migration_tools/mapping_file_transformation/courses_mapper.py +17 -0
- folio_migration_tools/mapping_file_transformation/holdings_mapper.py +19 -0
- folio_migration_tools/mapping_file_transformation/item_mapper.py +24 -0
- folio_migration_tools/mapping_file_transformation/manual_fee_fines_mapper.py +18 -0
- folio_migration_tools/mapping_file_transformation/mapping_file_mapper_base.py +26 -9
- folio_migration_tools/mapping_file_transformation/notes_mapper.py +16 -0
- folio_migration_tools/mapping_file_transformation/order_mapper.py +40 -27
- folio_migration_tools/mapping_file_transformation/organization_mapper.py +40 -33
- folio_migration_tools/mapping_file_transformation/ref_data_mapping.py +17 -0
- folio_migration_tools/mapping_file_transformation/user_mapper.py +16 -0
- folio_migration_tools/marc_rules_transformation/__init__.py +1 -0
- folio_migration_tools/marc_rules_transformation/conditions.py +49 -36
- folio_migration_tools/marc_rules_transformation/holdings_statementsparser.py +9 -3
- folio_migration_tools/marc_rules_transformation/hrid_handler.py +16 -1
- folio_migration_tools/marc_rules_transformation/marc_file_processor.py +15 -1
- folio_migration_tools/marc_rules_transformation/marc_reader_wrapper.py +7 -0
- folio_migration_tools/marc_rules_transformation/rules_mapper_base.py +35 -29
- folio_migration_tools/marc_rules_transformation/rules_mapper_bibs.py +23 -18
- folio_migration_tools/marc_rules_transformation/rules_mapper_holdings.py +46 -27
- folio_migration_tools/migration_report.py +14 -6
- folio_migration_tools/migration_tasks/__init__.py +2 -0
- folio_migration_tools/migration_tasks/batch_poster.py +41 -19
- folio_migration_tools/migration_tasks/bibs_transformer.py +16 -0
- folio_migration_tools/migration_tasks/courses_migrator.py +15 -0
- folio_migration_tools/migration_tasks/holdings_csv_transformer.py +18 -3
- folio_migration_tools/migration_tasks/holdings_marc_transformer.py +17 -0
- folio_migration_tools/migration_tasks/inventory_batch_poster.py +424 -0
- folio_migration_tools/migration_tasks/items_transformer.py +16 -0
- folio_migration_tools/migration_tasks/loans_migrator.py +17 -2
- folio_migration_tools/migration_tasks/manual_fee_fines_transformer.py +16 -0
- folio_migration_tools/migration_tasks/marc_import.py +407 -0
- folio_migration_tools/migration_tasks/migration_task_base.py +49 -17
- folio_migration_tools/migration_tasks/orders_transformer.py +16 -0
- folio_migration_tools/migration_tasks/organization_transformer.py +17 -2
- folio_migration_tools/migration_tasks/requests_migrator.py +15 -0
- folio_migration_tools/migration_tasks/reserves_migrator.py +15 -0
- folio_migration_tools/migration_tasks/user_importer.py +347 -0
- folio_migration_tools/migration_tasks/user_transformer.py +16 -0
- folio_migration_tools/task_configuration.py +7 -0
- folio_migration_tools/transaction_migration/__init__.py +1 -0
- folio_migration_tools/transaction_migration/legacy_loan.py +16 -0
- folio_migration_tools/transaction_migration/legacy_request.py +14 -0
- folio_migration_tools/transaction_migration/legacy_reserve.py +14 -0
- folio_migration_tools/transaction_migration/transaction_result.py +16 -0
- {folio_migration_tools-1.10.1.dist-info → folio_migration_tools-1.10.3.dist-info}/METADATA +1 -1
- folio_migration_tools-1.10.3.dist-info/RECORD +66 -0
- folio_migration_tools-1.10.1.dist-info/RECORD +0 -63
- {folio_migration_tools-1.10.1.dist-info → folio_migration_tools-1.10.3.dist-info}/WHEEL +0 -0
- {folio_migration_tools-1.10.1.dist-info → folio_migration_tools-1.10.3.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,407 @@
|
|
|
1
|
+
"""MARCImportTask module for FOLIO MARC data import operations.
|
|
2
|
+
|
|
3
|
+
This module provides an adapter that wraps folio_data_import.MARCImportJob
|
|
4
|
+
to conform to the folio_migration_tools MigrationTaskBase interface.
|
|
5
|
+
It supports importing MARC records directly into FOLIO using the Data Import
|
|
6
|
+
APIs (change-manager), bypassing the need for SRS record creation during MARC transformation.
|
|
7
|
+
|
|
8
|
+
This provides an alternative workflow for MARC record loading, using FOLIO's
|
|
9
|
+
native Data Import capabilities.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import asyncio
|
|
13
|
+
import json
|
|
14
|
+
import logging
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Annotated, Dict, List
|
|
17
|
+
|
|
18
|
+
from folio_data_import._progress import RichProgressReporter
|
|
19
|
+
from folio_data_import.MARCDataImport import MARCImportJob as FDIMARCImportJob
|
|
20
|
+
from folio_uuid.folio_namespaces import FOLIONamespaces
|
|
21
|
+
from pydantic import Field
|
|
22
|
+
|
|
23
|
+
from folio_migration_tools.library_configuration import (
|
|
24
|
+
FileDefinition,
|
|
25
|
+
LibraryConfiguration,
|
|
26
|
+
)
|
|
27
|
+
from folio_migration_tools.migration_report import MigrationReport
|
|
28
|
+
from folio_migration_tools.migration_tasks.migration_task_base import MigrationTaskBase
|
|
29
|
+
from folio_migration_tools.task_configuration import AbstractTaskConfiguration
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class MARCImportTask(MigrationTaskBase):
|
|
33
|
+
"""MARCImportTask.
|
|
34
|
+
|
|
35
|
+
An adapter that wraps folio_data_import.MARCImportJob to provide MARC import
|
|
36
|
+
functionality via FOLIO's Data Import APIs while conforming to the
|
|
37
|
+
MigrationTaskBase interface.
|
|
38
|
+
|
|
39
|
+
This implementation:
|
|
40
|
+
- Imports MARC records using the change-manager APIs
|
|
41
|
+
- Uses configurable Data Import job profiles
|
|
42
|
+
- Supports MARC record preprocessing
|
|
43
|
+
- Handles large files with optional splitting
|
|
44
|
+
- Tracks job IDs for monitoring in FOLIO
|
|
45
|
+
|
|
46
|
+
Parents:
|
|
47
|
+
MigrationTaskBase: Base class for all migration tasks
|
|
48
|
+
|
|
49
|
+
Raises:
|
|
50
|
+
TransformationProcessError: When a critical error occurs during processing
|
|
51
|
+
FileNotFoundError: When input files are not found
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
class TaskConfiguration(AbstractTaskConfiguration):
|
|
55
|
+
"""Task configuration for MARCImportTask."""
|
|
56
|
+
|
|
57
|
+
name: Annotated[
|
|
58
|
+
str,
|
|
59
|
+
Field(
|
|
60
|
+
title="Task name",
|
|
61
|
+
description="The name of the task",
|
|
62
|
+
),
|
|
63
|
+
]
|
|
64
|
+
migration_task_type: Annotated[
|
|
65
|
+
str,
|
|
66
|
+
Field(
|
|
67
|
+
title="Migration task type",
|
|
68
|
+
description="The type of migration task",
|
|
69
|
+
),
|
|
70
|
+
]
|
|
71
|
+
files: Annotated[
|
|
72
|
+
List[FileDefinition],
|
|
73
|
+
Field(
|
|
74
|
+
title="List of files",
|
|
75
|
+
description=(
|
|
76
|
+
"List of MARC files to be imported. Files should be in binary MARC "
|
|
77
|
+
"format (.mrc). Located in the source_data folder."
|
|
78
|
+
),
|
|
79
|
+
),
|
|
80
|
+
]
|
|
81
|
+
import_profile_name: Annotated[
|
|
82
|
+
str,
|
|
83
|
+
Field(
|
|
84
|
+
title="Import profile name",
|
|
85
|
+
description=(
|
|
86
|
+
"The name of the Data Import job profile to use in FOLIO. "
|
|
87
|
+
"This profile determines how MARC records are processed and "
|
|
88
|
+
"what FOLIO records are created/updated."
|
|
89
|
+
),
|
|
90
|
+
),
|
|
91
|
+
]
|
|
92
|
+
batch_size: Annotated[
|
|
93
|
+
int,
|
|
94
|
+
Field(
|
|
95
|
+
title="Batch size",
|
|
96
|
+
description="Number of MARC records to include in each batch sent to FOLIO",
|
|
97
|
+
ge=1,
|
|
98
|
+
le=1000,
|
|
99
|
+
),
|
|
100
|
+
] = 10
|
|
101
|
+
batch_delay: Annotated[
|
|
102
|
+
float,
|
|
103
|
+
Field(
|
|
104
|
+
title="Batch delay",
|
|
105
|
+
description=(
|
|
106
|
+
"Number of seconds to wait between record batches. "
|
|
107
|
+
"Use this to throttle requests if needed."
|
|
108
|
+
),
|
|
109
|
+
ge=0.0,
|
|
110
|
+
),
|
|
111
|
+
] = 0.0
|
|
112
|
+
marc_record_preprocessors: Annotated[
|
|
113
|
+
List[str],
|
|
114
|
+
Field(
|
|
115
|
+
title="MARC record preprocessors",
|
|
116
|
+
description=(
|
|
117
|
+
"List of preprocessor names to apply to each record before import. "
|
|
118
|
+
"Preprocessors can modify MARC records before they are sent to FOLIO."
|
|
119
|
+
),
|
|
120
|
+
),
|
|
121
|
+
] = []
|
|
122
|
+
preprocessors_args: Annotated[
|
|
123
|
+
Dict[str, Dict] | str,
|
|
124
|
+
Field(
|
|
125
|
+
title="Preprocessor arguments",
|
|
126
|
+
description=(
|
|
127
|
+
"Dictionary of arguments to pass to the MARC record preprocessors. "
|
|
128
|
+
"Keys are preprocessor names, values are dicts of arguments."
|
|
129
|
+
),
|
|
130
|
+
),
|
|
131
|
+
] = {}
|
|
132
|
+
split_files: Annotated[
|
|
133
|
+
bool,
|
|
134
|
+
Field(
|
|
135
|
+
title="Split files",
|
|
136
|
+
description=(
|
|
137
|
+
"Split each file into smaller jobs of size split_size. "
|
|
138
|
+
"Useful for very large files that may timeout or be difficult to monitor."
|
|
139
|
+
),
|
|
140
|
+
),
|
|
141
|
+
] = False
|
|
142
|
+
split_size: Annotated[
|
|
143
|
+
int,
|
|
144
|
+
Field(
|
|
145
|
+
title="Split size",
|
|
146
|
+
description="Number of records to include in each split file",
|
|
147
|
+
ge=1,
|
|
148
|
+
),
|
|
149
|
+
] = 1000
|
|
150
|
+
split_offset: Annotated[
|
|
151
|
+
int,
|
|
152
|
+
Field(
|
|
153
|
+
title="Split offset",
|
|
154
|
+
description=(
|
|
155
|
+
"Number of split files to skip before starting processing. "
|
|
156
|
+
"Useful for resuming a partially completed import."
|
|
157
|
+
),
|
|
158
|
+
ge=0,
|
|
159
|
+
),
|
|
160
|
+
] = 0
|
|
161
|
+
show_file_names_in_data_import_logs: Annotated[
|
|
162
|
+
bool,
|
|
163
|
+
Field(
|
|
164
|
+
title="Show file names in Data Import logs",
|
|
165
|
+
description=(
|
|
166
|
+
"If true, set the file name for each job in the Data Import logs. "
|
|
167
|
+
"This makes it easier to identify jobs in the FOLIO UI."
|
|
168
|
+
),
|
|
169
|
+
),
|
|
170
|
+
] = False
|
|
171
|
+
let_summary_fail: Annotated[
|
|
172
|
+
bool,
|
|
173
|
+
Field(
|
|
174
|
+
title="Let summary fail",
|
|
175
|
+
description=(
|
|
176
|
+
"If true, do not retry or fail the import if the final job summary "
|
|
177
|
+
"cannot be retrieved. Useful when FOLIO is under heavy load."
|
|
178
|
+
),
|
|
179
|
+
),
|
|
180
|
+
] = False
|
|
181
|
+
skip_summary: Annotated[
|
|
182
|
+
bool,
|
|
183
|
+
Field(
|
|
184
|
+
title="Skip summary",
|
|
185
|
+
description=(
|
|
186
|
+
"If true, skip fetching the final job summary after import. "
|
|
187
|
+
"The import will complete but detailed statistics won't be available."
|
|
188
|
+
),
|
|
189
|
+
),
|
|
190
|
+
] = False
|
|
191
|
+
no_progress: Annotated[
|
|
192
|
+
bool,
|
|
193
|
+
Field(
|
|
194
|
+
title="No progress",
|
|
195
|
+
description=(
|
|
196
|
+
"Disable progress reporting in the console output. "
|
|
197
|
+
"Set to true for non-interactive/CI environments."
|
|
198
|
+
),
|
|
199
|
+
),
|
|
200
|
+
] = False
|
|
201
|
+
|
|
202
|
+
task_configuration: TaskConfiguration
|
|
203
|
+
|
|
204
|
+
@staticmethod
|
|
205
|
+
def get_object_type() -> FOLIONamespaces:
|
|
206
|
+
return FOLIONamespaces.instances # MARC imports primarily create instances
|
|
207
|
+
|
|
208
|
+
def __init__(
|
|
209
|
+
self,
|
|
210
|
+
task_config: TaskConfiguration,
|
|
211
|
+
library_config: LibraryConfiguration,
|
|
212
|
+
folio_client,
|
|
213
|
+
use_logging: bool = True,
|
|
214
|
+
):
|
|
215
|
+
"""Initialize MarcImport for MARC record import via Data Import APIs.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
task_config (TaskConfiguration): MARC import configuration.
|
|
219
|
+
library_config (LibraryConfiguration): Library configuration.
|
|
220
|
+
folio_client: FOLIO API client.
|
|
221
|
+
use_logging (bool): Whether to set up task logging.
|
|
222
|
+
"""
|
|
223
|
+
super().__init__(library_config, task_config, folio_client, use_logging)
|
|
224
|
+
self.migration_report = MigrationReport()
|
|
225
|
+
self.total_records_sent = 0
|
|
226
|
+
self.job_ids: List[str] = []
|
|
227
|
+
self.files_processed: List[str] = []
|
|
228
|
+
|
|
229
|
+
logging.info("MARCImportTask initialized")
|
|
230
|
+
logging.info("Import profile: %s", self.task_configuration.import_profile_name)
|
|
231
|
+
logging.info("Batch size: %s", self.task_configuration.batch_size)
|
|
232
|
+
logging.info("Results folder: %s", self.folder_structure.results_folder)
|
|
233
|
+
|
|
234
|
+
def _create_fdi_config(self, file_paths: List[Path]) -> FDIMARCImportJob.Config:
|
|
235
|
+
"""Create a folio_data_import.MARCImportJob.Config from our TaskConfiguration.
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
file_paths: List of file paths to process
|
|
239
|
+
|
|
240
|
+
Returns:
|
|
241
|
+
FDIMARCImportJob.Config: Configuration for the underlying MARCImportJob
|
|
242
|
+
|
|
243
|
+
Note:
|
|
244
|
+
The folio_data_import MARCImportJob places error files (bad_marc_records_*.mrc,
|
|
245
|
+
failed_batches_*.mrc) in the parent directory of the first MARC file. Since
|
|
246
|
+
we're reading from results_folder, error files will also be created there.
|
|
247
|
+
"""
|
|
248
|
+
# Convert preprocessor list to comma-separated string for folio_data_import
|
|
249
|
+
# folio_data_import expects List[Callable], str (comma-separated), or None
|
|
250
|
+
preprocessors_str = (
|
|
251
|
+
",".join(self.task_configuration.marc_record_preprocessors)
|
|
252
|
+
if self.task_configuration.marc_record_preprocessors
|
|
253
|
+
else None
|
|
254
|
+
)
|
|
255
|
+
if isinstance(self.task_configuration.preprocessors_args, str):
|
|
256
|
+
with open(
|
|
257
|
+
self.folder_structure.mapping_files_folder
|
|
258
|
+
/ self.task_configuration.preprocessors_args,
|
|
259
|
+
"r",
|
|
260
|
+
) as f:
|
|
261
|
+
preprocessors_args = json.load(f)
|
|
262
|
+
else:
|
|
263
|
+
preprocessors_args = self.task_configuration.preprocessors_args
|
|
264
|
+
|
|
265
|
+
return FDIMARCImportJob.Config(
|
|
266
|
+
marc_files=file_paths,
|
|
267
|
+
import_profile_name=self.task_configuration.import_profile_name,
|
|
268
|
+
batch_size=self.task_configuration.batch_size,
|
|
269
|
+
batch_delay=self.task_configuration.batch_delay,
|
|
270
|
+
marc_record_preprocessors=preprocessors_str,
|
|
271
|
+
preprocessors_args=preprocessors_args,
|
|
272
|
+
no_progress=self.task_configuration.no_progress,
|
|
273
|
+
no_summary=self.task_configuration.skip_summary,
|
|
274
|
+
let_summary_fail=self.task_configuration.let_summary_fail,
|
|
275
|
+
split_files=self.task_configuration.split_files,
|
|
276
|
+
split_size=self.task_configuration.split_size,
|
|
277
|
+
split_offset=self.task_configuration.split_offset,
|
|
278
|
+
job_ids_file_path=self.folder_structure.results_folder / "marc_import_job_ids.txt",
|
|
279
|
+
show_file_names_in_data_import_logs=(
|
|
280
|
+
self.task_configuration.show_file_names_in_data_import_logs
|
|
281
|
+
),
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
async def _do_work_async(self) -> None:
|
|
285
|
+
"""Async implementation of the work logic."""
|
|
286
|
+
file_paths: List[Path] = []
|
|
287
|
+
for file_def in self.task_configuration.files:
|
|
288
|
+
path = self.folder_structure.results_folder / file_def.file_name
|
|
289
|
+
if not path.exists():
|
|
290
|
+
logging.error("File not found: %s", path)
|
|
291
|
+
raise FileNotFoundError(f"File not found: {path}")
|
|
292
|
+
file_paths.append(path)
|
|
293
|
+
self.files_processed.append(file_def.file_name)
|
|
294
|
+
logging.info("Will process file: %s", path)
|
|
295
|
+
|
|
296
|
+
# Create the folio_data_import MARCImportJob config
|
|
297
|
+
fdi_config = self._create_fdi_config(file_paths)
|
|
298
|
+
|
|
299
|
+
# Create progress reporter
|
|
300
|
+
if self.task_configuration.no_progress:
|
|
301
|
+
from folio_data_import._progress import NoOpProgressReporter
|
|
302
|
+
|
|
303
|
+
reporter = NoOpProgressReporter()
|
|
304
|
+
else:
|
|
305
|
+
reporter = RichProgressReporter(enabled=True)
|
|
306
|
+
|
|
307
|
+
# Create and run the importer
|
|
308
|
+
# folio_data_import handles its own error files and progress reporting
|
|
309
|
+
importer = FDIMARCImportJob(
|
|
310
|
+
folio_client=self.folio_client,
|
|
311
|
+
config=fdi_config,
|
|
312
|
+
reporter=reporter,
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
await importer.do_work()
|
|
316
|
+
await importer.wrap_up()
|
|
317
|
+
|
|
318
|
+
# Capture stats and job IDs from the importer
|
|
319
|
+
self.total_records_sent = importer.total_records_sent
|
|
320
|
+
self.job_ids = importer.job_ids
|
|
321
|
+
|
|
322
|
+
# Note: Detailed stats (created/updated/discarded/error) are retrieved from
|
|
323
|
+
# the job summary by folio_data_import and logged via log_job_summary().
|
|
324
|
+
# We don't have direct access to those stats as they're logged, not returned.
|
|
325
|
+
|
|
326
|
+
def do_work(self) -> None:
|
|
327
|
+
"""Main work method that processes MARC files and imports them to FOLIO.
|
|
328
|
+
|
|
329
|
+
This method reads MARC records from the configured files and imports them
|
|
330
|
+
to FOLIO using the Data Import APIs via folio_data_import.MARCImportJob.
|
|
331
|
+
"""
|
|
332
|
+
logging.info("Starting MARCImportTask work...")
|
|
333
|
+
|
|
334
|
+
try:
|
|
335
|
+
# Run the async work in an event loop
|
|
336
|
+
asyncio.run(self._do_work_async())
|
|
337
|
+
except FileNotFoundError as e:
|
|
338
|
+
logging.error("File not found: %s", e)
|
|
339
|
+
raise
|
|
340
|
+
except Exception as e:
|
|
341
|
+
logging.error("Error during MARC import: %s", e)
|
|
342
|
+
raise
|
|
343
|
+
|
|
344
|
+
logging.info("MARCImportTask work complete")
|
|
345
|
+
|
|
346
|
+
def _translate_stats_to_migration_report(self) -> None:
|
|
347
|
+
"""Translate MARC import stats to MigrationReport format.
|
|
348
|
+
|
|
349
|
+
Note:
|
|
350
|
+
Detailed stats (created, updated, discarded, error) are retrieved from
|
|
351
|
+
the FOLIO job summary and logged by folio_data_import's log_job_summary().
|
|
352
|
+
We report what we can track directly: records sent and job IDs.
|
|
353
|
+
"""
|
|
354
|
+
# General statistics
|
|
355
|
+
self.migration_report.set(
|
|
356
|
+
"GeneralStatistics",
|
|
357
|
+
"Records sent to Data Import",
|
|
358
|
+
self.total_records_sent,
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
self.migration_report.set(
|
|
362
|
+
"GeneralStatistics",
|
|
363
|
+
"Data Import jobs created",
|
|
364
|
+
len(self.job_ids),
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
# Add file information
|
|
368
|
+
for file_name in self.files_processed:
|
|
369
|
+
self.migration_report.add("FilesProcessed", file_name)
|
|
370
|
+
|
|
371
|
+
def wrap_up(self) -> None:
|
|
372
|
+
"""Finalize the migration task and write reports.
|
|
373
|
+
|
|
374
|
+
This method translates statistics to the MigrationReport format and writes
|
|
375
|
+
both markdown and JSON reports. Error files created by folio_data_import
|
|
376
|
+
(bad_marc_records_*.mrc, failed_batches_*.mrc) are already in results_folder
|
|
377
|
+
since that's where we read the input files from.
|
|
378
|
+
"""
|
|
379
|
+
logging.info("Done. Wrapping up MARCImportTask")
|
|
380
|
+
|
|
381
|
+
# Translate stats to migration report
|
|
382
|
+
self._translate_stats_to_migration_report()
|
|
383
|
+
|
|
384
|
+
# Log summary
|
|
385
|
+
logging.info("=" * 60)
|
|
386
|
+
logging.info("MARCImportTask Summary")
|
|
387
|
+
logging.info("=" * 60)
|
|
388
|
+
logging.info("Records sent to Data Import: %d", self.total_records_sent)
|
|
389
|
+
logging.info("Files processed: %d", len(self.files_processed))
|
|
390
|
+
logging.info("Data Import jobs created: %d", len(self.job_ids))
|
|
391
|
+
|
|
392
|
+
# Write markdown report
|
|
393
|
+
with open(self.folder_structure.migration_reports_file, "w+") as report_file:
|
|
394
|
+
self.migration_report.write_migration_report(
|
|
395
|
+
"MARC Data Import report",
|
|
396
|
+
report_file,
|
|
397
|
+
self.start_datetime,
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
# Write raw JSON report
|
|
401
|
+
with open(self.folder_structure.migration_reports_raw_file, "w") as raw_report_file:
|
|
402
|
+
self.migration_report.write_json_report(raw_report_file)
|
|
403
|
+
|
|
404
|
+
# Clean up empty log files
|
|
405
|
+
self.clean_out_empty_logs()
|
|
406
|
+
|
|
407
|
+
logging.info("MARCImportTask wrap up complete")
|
|
@@ -1,3 +1,14 @@
|
|
|
1
|
+
"""Base class and utilities for all migration tasks.
|
|
2
|
+
|
|
3
|
+
This module provides the abstract MigrationTaskBase class that all migration tasks
|
|
4
|
+
must inherit from. It handles common functionality including:
|
|
5
|
+
- Folder structure setup and management
|
|
6
|
+
- Logging configuration
|
|
7
|
+
- Reference data mapping file loading and validation
|
|
8
|
+
- MARC file processing workflows
|
|
9
|
+
- Error tracking and reporting
|
|
10
|
+
"""
|
|
11
|
+
|
|
1
12
|
import csv
|
|
2
13
|
import io
|
|
3
14
|
import json
|
|
@@ -44,11 +55,18 @@ class MigrationTaskBase:
|
|
|
44
55
|
folio_client: folioclient.FolioClient,
|
|
45
56
|
use_logging: bool = True,
|
|
46
57
|
):
|
|
47
|
-
|
|
58
|
+
"""Initialize base migration task with configurations and FOLIO client.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
library_configuration: Library-specific configuration.
|
|
62
|
+
task_configuration: Task-specific configuration.
|
|
63
|
+
folio_client: FOLIO API client.
|
|
64
|
+
use_logging (bool): Whether to set up task logging.
|
|
65
|
+
"""
|
|
48
66
|
self.start_datetime = datetime.now(timezone.utc)
|
|
49
67
|
self.task_configuration = task_configuration
|
|
50
|
-
logging.info(self.task_configuration.model_dump_json(indent=4))
|
|
51
68
|
self.folio_client: FolioClient = folio_client
|
|
69
|
+
self.library_configuration = library_configuration
|
|
52
70
|
self.ecs_tenant_id = (
|
|
53
71
|
task_configuration.ecs_tenant_id or library_configuration.ecs_tenant_id
|
|
54
72
|
)
|
|
@@ -72,7 +90,6 @@ class MigrationTaskBase:
|
|
|
72
90
|
library_configuration.add_time_stamp_to_file_names,
|
|
73
91
|
)
|
|
74
92
|
|
|
75
|
-
self.library_configuration = library_configuration
|
|
76
93
|
self.object_type = self.get_object_type()
|
|
77
94
|
try:
|
|
78
95
|
self.folder_structure.setup_migration_file_structure()
|
|
@@ -89,8 +106,13 @@ class MigrationTaskBase:
|
|
|
89
106
|
self.extradata_writer = ExtradataWriter(
|
|
90
107
|
self.folder_structure.transformation_extra_data_path
|
|
91
108
|
)
|
|
109
|
+
|
|
110
|
+
# Setup logging after folder_structure, library_configuration, and task_configuration
|
|
111
|
+
# are initialized since setup_logging depends on all three
|
|
92
112
|
if use_logging:
|
|
93
113
|
self.setup_logging()
|
|
114
|
+
logging.info("MigrationTaskBase init")
|
|
115
|
+
logging.info(self.task_configuration.model_dump_json(indent=4))
|
|
94
116
|
self.folder_structure.log_folder_structure()
|
|
95
117
|
logging.info("MigrationTaskBase init done")
|
|
96
118
|
|
|
@@ -123,7 +145,7 @@ class MigrationTaskBase:
|
|
|
123
145
|
def check_source_files(
|
|
124
146
|
source_path: Path, file_defs: list[library_configuration.FileDefinition]
|
|
125
147
|
) -> None:
|
|
126
|
-
"""Lists the source data files. Special case since we use the Items folder for holdings
|
|
148
|
+
"""Lists the source data files. Special case since we use the Items folder for holdings.
|
|
127
149
|
|
|
128
150
|
Args:
|
|
129
151
|
source_path (Path): _description_
|
|
@@ -153,12 +175,11 @@ class MigrationTaskBase:
|
|
|
153
175
|
logging.info("\t%s", filename)
|
|
154
176
|
|
|
155
177
|
def load_instance_id_map(self, raise_if_empty=True) -> dict:
|
|
178
|
+
"""Load instance ID maps for holdings and other transformations.
|
|
179
|
+
|
|
180
|
+
Handles ECS environments where instances are transformed for central and
|
|
181
|
+
data tenants separately, but data tenants need central tenant instance IDs.
|
|
156
182
|
"""
|
|
157
|
-
This method handles loading instance id maps for holdings and other transformations that require it.
|
|
158
|
-
This is in the base class because multiple tasks need it. It exists because instances in an ECS environment
|
|
159
|
-
are transformed for the central and data tenants separately, but the data tenants need to know about
|
|
160
|
-
the central tenant instance ids. This is a bit of a hack, but it works for now.
|
|
161
|
-
""" # noqa: E501
|
|
162
183
|
map_files = []
|
|
163
184
|
instance_id_map = {}
|
|
164
185
|
if self.library_configuration.is_ecs and self.central_folder_structure:
|
|
@@ -321,8 +342,7 @@ class MigrationTaskBase:
|
|
|
321
342
|
|
|
322
343
|
@staticmethod
|
|
323
344
|
def validate_ref_data_mapping_lines(lines, num_of_columns):
|
|
324
|
-
"""
|
|
325
|
-
Helper method to validate the structure of individual lines in a mapping file.
|
|
345
|
+
"""Helper method to validate the structure of individual lines in a mapping file.
|
|
326
346
|
|
|
327
347
|
Args:
|
|
328
348
|
lines (list): List of lines in the mapping file
|
|
@@ -349,8 +369,7 @@ class MigrationTaskBase:
|
|
|
349
369
|
|
|
350
370
|
@staticmethod
|
|
351
371
|
def verify_ref_data_mapping_file_structure(map_file: io.TextIOBase):
|
|
352
|
-
"""
|
|
353
|
-
Helper method to validate the structure of a mapping file.
|
|
372
|
+
"""Helper method to validate the structure of a mapping file.
|
|
354
373
|
|
|
355
374
|
Args:
|
|
356
375
|
map_file (io.TextIOBase): The mapping file to validate
|
|
@@ -390,8 +409,7 @@ class MigrationTaskBase:
|
|
|
390
409
|
folio_keys,
|
|
391
410
|
required: bool = True,
|
|
392
411
|
):
|
|
393
|
-
"""
|
|
394
|
-
Helper method to load a reference data mapping file.
|
|
412
|
+
"""Helper method to load a reference data mapping file.
|
|
395
413
|
|
|
396
414
|
Args:
|
|
397
415
|
folio_property_name (str): The name of the property in FOLIO
|
|
@@ -445,8 +463,7 @@ class MigrationTaskBase:
|
|
|
445
463
|
|
|
446
464
|
|
|
447
465
|
class MarcTaskConfigurationBase(task_configuration.AbstractTaskConfiguration):
|
|
448
|
-
"""
|
|
449
|
-
Base class for MARC task configurations.
|
|
466
|
+
"""Base class for MARC task configurations.
|
|
450
467
|
|
|
451
468
|
Attributes:
|
|
452
469
|
files (List[library_configuration.FileDefinition]):
|
|
@@ -530,6 +547,11 @@ class MarcTaskConfigurationBase(task_configuration.AbstractTaskConfiguration):
|
|
|
530
547
|
|
|
531
548
|
class ExcludeLevelFilter(logging.Filter):
|
|
532
549
|
def __init__(self, level):
|
|
550
|
+
"""Initialize filter to exclude logs of a specific level.
|
|
551
|
+
|
|
552
|
+
Args:
|
|
553
|
+
level: Logging level to exclude.
|
|
554
|
+
"""
|
|
533
555
|
super().__init__()
|
|
534
556
|
self.level = level
|
|
535
557
|
|
|
@@ -539,6 +561,11 @@ class ExcludeLevelFilter(logging.Filter):
|
|
|
539
561
|
|
|
540
562
|
class TaskNameFilter(logging.Filter):
|
|
541
563
|
def __init__(self, task_configuration_name):
|
|
564
|
+
"""Initialize filter to add task name to log records.
|
|
565
|
+
|
|
566
|
+
Args:
|
|
567
|
+
task_configuration_name (str): Name of the task configuration to add to logs.
|
|
568
|
+
"""
|
|
542
569
|
super().__init__()
|
|
543
570
|
self.task_configuration_name = task_configuration_name
|
|
544
571
|
|
|
@@ -549,6 +576,11 @@ class TaskNameFilter(logging.Filter):
|
|
|
549
576
|
|
|
550
577
|
class LevelFilter(logging.Filter):
|
|
551
578
|
def __init__(self, level):
|
|
579
|
+
"""Initialize filter to include only logs of a specific level.
|
|
580
|
+
|
|
581
|
+
Args:
|
|
582
|
+
level: Logging level to include.
|
|
583
|
+
"""
|
|
552
584
|
super().__init__()
|
|
553
585
|
self.level = level
|
|
554
586
|
|
|
@@ -1,3 +1,9 @@
|
|
|
1
|
+
"""Purchase orders transformation task.
|
|
2
|
+
|
|
3
|
+
Transforms purchase order data from CSV files to FOLIO Orders. Handles composite
|
|
4
|
+
orders with embedded purchase order lines, acquisition units, and vendor references.
|
|
5
|
+
"""
|
|
6
|
+
|
|
1
7
|
import csv
|
|
2
8
|
import ctypes
|
|
3
9
|
import json
|
|
@@ -38,6 +44,8 @@ csv.field_size_limit(int(ctypes.c_ulong(-1).value // 2))
|
|
|
38
44
|
# Read files and do some work
|
|
39
45
|
class OrdersTransformer(MigrationTaskBase):
|
|
40
46
|
class TaskConfiguration(AbstractTaskConfiguration):
|
|
47
|
+
"""Task configuration for OrdersTransformer."""
|
|
48
|
+
|
|
41
49
|
name: Annotated[
|
|
42
50
|
str,
|
|
43
51
|
Field(
|
|
@@ -136,6 +144,14 @@ class OrdersTransformer(MigrationTaskBase):
|
|
|
136
144
|
folio_client,
|
|
137
145
|
use_logging: bool = True,
|
|
138
146
|
):
|
|
147
|
+
"""Initialize OrdersTransformer for purchase order transformations.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
task_config (TaskConfiguration): Orders transformation configuration.
|
|
151
|
+
library_config (LibraryConfiguration): Library configuration.
|
|
152
|
+
folio_client: FOLIO API client.
|
|
153
|
+
use_logging (bool): Whether to set up task logging.
|
|
154
|
+
"""
|
|
139
155
|
csv.register_dialect("tsv", delimiter="\t")
|
|
140
156
|
|
|
141
157
|
super().__init__(library_config, task_config, folio_client, use_logging)
|
|
@@ -1,3 +1,9 @@
|
|
|
1
|
+
"""Organization records transformation task.
|
|
2
|
+
|
|
3
|
+
Transforms organization/vendor data from CSV files to FOLIO Organizations. Handles
|
|
4
|
+
embedded interfaces, contacts, and categories with extradata object creation.
|
|
5
|
+
"""
|
|
6
|
+
|
|
1
7
|
import csv
|
|
2
8
|
import ctypes
|
|
3
9
|
import json
|
|
@@ -37,6 +43,8 @@ csv.field_size_limit(int(ctypes.c_ulong(-1).value // 2))
|
|
|
37
43
|
# Read files and do some work
|
|
38
44
|
class OrganizationTransformer(MigrationTaskBase):
|
|
39
45
|
class TaskConfiguration(AbstractTaskConfiguration):
|
|
46
|
+
"""Task configuration for OrganizationTransformer."""
|
|
47
|
+
|
|
40
48
|
name: Annotated[
|
|
41
49
|
str,
|
|
42
50
|
Field(
|
|
@@ -111,6 +119,14 @@ class OrganizationTransformer(MigrationTaskBase):
|
|
|
111
119
|
folio_client,
|
|
112
120
|
use_logging: bool = True,
|
|
113
121
|
):
|
|
122
|
+
"""Initialize OrganizationTransformer for organization transformations.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
task_configuration (TaskConfiguration): Organizations transformation config.
|
|
126
|
+
library_config (LibraryConfiguration): Library configuration.
|
|
127
|
+
folio_client: FOLIO API client.
|
|
128
|
+
use_logging (bool): Whether to set up task logging.
|
|
129
|
+
"""
|
|
114
130
|
csv.register_dialect("tsv", delimiter="\t")
|
|
115
131
|
|
|
116
132
|
super().__init__(library_config, task_configuration, folio_client, use_logging)
|
|
@@ -401,8 +417,7 @@ class OrganizationTransformer(MigrationTaskBase):
|
|
|
401
417
|
return record
|
|
402
418
|
|
|
403
419
|
def create_referenced_extradata_object(self, embedded_object, extradata_object_type):
|
|
404
|
-
"""
|
|
405
|
-
and returns the UUID.
|
|
420
|
+
"""Create an extradata object from an embedded object and return its UUID.
|
|
406
421
|
|
|
407
422
|
Args:
|
|
408
423
|
embedded_object (_type_): _description_
|
|
@@ -1,3 +1,9 @@
|
|
|
1
|
+
"""Request records migration task.
|
|
2
|
+
|
|
3
|
+
Migrates patron requests from legacy ILS to FOLIO. Validates patron and item
|
|
4
|
+
barcodes, handles request types and statuses, and maintains request dates.
|
|
5
|
+
"""
|
|
6
|
+
|
|
1
7
|
import csv
|
|
2
8
|
import json
|
|
3
9
|
import logging
|
|
@@ -26,6 +32,8 @@ from folio_migration_tools.transaction_migration.legacy_request import LegacyReq
|
|
|
26
32
|
|
|
27
33
|
class RequestsMigrator(MigrationTaskBase):
|
|
28
34
|
class TaskConfiguration(AbstractTaskConfiguration):
|
|
35
|
+
"""Task configuration for RequestsMigrator."""
|
|
36
|
+
|
|
29
37
|
name: Annotated[
|
|
30
38
|
str,
|
|
31
39
|
Field(
|
|
@@ -88,6 +96,13 @@ class RequestsMigrator(MigrationTaskBase):
|
|
|
88
96
|
library_config: LibraryConfiguration,
|
|
89
97
|
folio_client,
|
|
90
98
|
):
|
|
99
|
+
"""Initialize RequestsMigrator for migrating circulation requests.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
task_configuration (TaskConfiguration): Requests migration configuration.
|
|
103
|
+
library_config (LibraryConfiguration): Library configuration.
|
|
104
|
+
folio_client: FOLIO API client.
|
|
105
|
+
"""
|
|
91
106
|
csv.register_dialect("tsv", delimiter="\t")
|
|
92
107
|
self.migration_report = MigrationReport()
|
|
93
108
|
self.valid_legacy_requests = []
|