folio-migration-tools 1.2.1__py3-none-any.whl → 1.9.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. folio_migration_tools/__init__.py +11 -0
  2. folio_migration_tools/__main__.py +169 -85
  3. folio_migration_tools/circulation_helper.py +96 -59
  4. folio_migration_tools/config_file_load.py +66 -0
  5. folio_migration_tools/custom_dict.py +6 -4
  6. folio_migration_tools/custom_exceptions.py +21 -19
  7. folio_migration_tools/extradata_writer.py +46 -0
  8. folio_migration_tools/folder_structure.py +63 -66
  9. folio_migration_tools/helper.py +29 -21
  10. folio_migration_tools/holdings_helper.py +57 -34
  11. folio_migration_tools/i18n_config.py +9 -0
  12. folio_migration_tools/library_configuration.py +173 -13
  13. folio_migration_tools/mapper_base.py +317 -106
  14. folio_migration_tools/mapping_file_transformation/courses_mapper.py +203 -0
  15. folio_migration_tools/mapping_file_transformation/holdings_mapper.py +83 -69
  16. folio_migration_tools/mapping_file_transformation/item_mapper.py +98 -94
  17. folio_migration_tools/mapping_file_transformation/manual_fee_fines_mapper.py +352 -0
  18. folio_migration_tools/mapping_file_transformation/mapping_file_mapper_base.py +702 -223
  19. folio_migration_tools/mapping_file_transformation/notes_mapper.py +90 -0
  20. folio_migration_tools/mapping_file_transformation/order_mapper.py +492 -0
  21. folio_migration_tools/mapping_file_transformation/organization_mapper.py +389 -0
  22. folio_migration_tools/mapping_file_transformation/ref_data_mapping.py +38 -27
  23. folio_migration_tools/mapping_file_transformation/user_mapper.py +149 -361
  24. folio_migration_tools/marc_rules_transformation/conditions.py +650 -246
  25. folio_migration_tools/marc_rules_transformation/holdings_statementsparser.py +292 -130
  26. folio_migration_tools/marc_rules_transformation/hrid_handler.py +244 -0
  27. folio_migration_tools/marc_rules_transformation/loc_language_codes.xml +20846 -0
  28. folio_migration_tools/marc_rules_transformation/marc_file_processor.py +300 -0
  29. folio_migration_tools/marc_rules_transformation/marc_reader_wrapper.py +136 -0
  30. folio_migration_tools/marc_rules_transformation/rules_mapper_authorities.py +241 -0
  31. folio_migration_tools/marc_rules_transformation/rules_mapper_base.py +681 -201
  32. folio_migration_tools/marc_rules_transformation/rules_mapper_bibs.py +395 -429
  33. folio_migration_tools/marc_rules_transformation/rules_mapper_holdings.py +531 -100
  34. folio_migration_tools/migration_report.py +85 -38
  35. folio_migration_tools/migration_tasks/__init__.py +1 -3
  36. folio_migration_tools/migration_tasks/authority_transformer.py +119 -0
  37. folio_migration_tools/migration_tasks/batch_poster.py +911 -198
  38. folio_migration_tools/migration_tasks/bibs_transformer.py +121 -116
  39. folio_migration_tools/migration_tasks/courses_migrator.py +192 -0
  40. folio_migration_tools/migration_tasks/holdings_csv_transformer.py +252 -247
  41. folio_migration_tools/migration_tasks/holdings_marc_transformer.py +321 -115
  42. folio_migration_tools/migration_tasks/items_transformer.py +264 -84
  43. folio_migration_tools/migration_tasks/loans_migrator.py +506 -195
  44. folio_migration_tools/migration_tasks/manual_fee_fines_transformer.py +187 -0
  45. folio_migration_tools/migration_tasks/migration_task_base.py +364 -74
  46. folio_migration_tools/migration_tasks/orders_transformer.py +373 -0
  47. folio_migration_tools/migration_tasks/organization_transformer.py +451 -0
  48. folio_migration_tools/migration_tasks/requests_migrator.py +130 -62
  49. folio_migration_tools/migration_tasks/reserves_migrator.py +253 -0
  50. folio_migration_tools/migration_tasks/user_transformer.py +180 -139
  51. folio_migration_tools/task_configuration.py +46 -0
  52. folio_migration_tools/test_infrastructure/__init__.py +0 -0
  53. folio_migration_tools/test_infrastructure/mocked_classes.py +406 -0
  54. folio_migration_tools/transaction_migration/legacy_loan.py +148 -34
  55. folio_migration_tools/transaction_migration/legacy_request.py +65 -25
  56. folio_migration_tools/transaction_migration/legacy_reserve.py +47 -0
  57. folio_migration_tools/transaction_migration/transaction_result.py +12 -1
  58. folio_migration_tools/translations/en.json +476 -0
  59. folio_migration_tools-1.9.10.dist-info/METADATA +169 -0
  60. folio_migration_tools-1.9.10.dist-info/RECORD +67 -0
  61. {folio_migration_tools-1.2.1.dist-info → folio_migration_tools-1.9.10.dist-info}/WHEEL +1 -2
  62. folio_migration_tools-1.9.10.dist-info/entry_points.txt +3 -0
  63. folio_migration_tools/generate_schemas.py +0 -46
  64. folio_migration_tools/mapping_file_transformation/mapping_file_mapping_base_impl.py +0 -44
  65. folio_migration_tools/mapping_file_transformation/user_mapper_base.py +0 -212
  66. folio_migration_tools/marc_rules_transformation/bibs_processor.py +0 -163
  67. folio_migration_tools/marc_rules_transformation/holdings_processor.py +0 -284
  68. folio_migration_tools/report_blurbs.py +0 -219
  69. folio_migration_tools/transaction_migration/legacy_fee_fine.py +0 -36
  70. folio_migration_tools-1.2.1.dist-info/METADATA +0 -134
  71. folio_migration_tools-1.2.1.dist-info/RECORD +0 -50
  72. folio_migration_tools-1.2.1.dist-info/top_level.txt +0 -1
  73. {folio_migration_tools-1.2.1.dist-info → folio_migration_tools-1.9.10.dist-info/licenses}/LICENSE +0 -0
@@ -1,21 +1,34 @@
1
1
  import csv
2
- from genericpath import isfile
2
+ import io
3
+ import json
3
4
  import logging
4
- from pathlib import Path
5
+ import os
5
6
  import sys
6
7
  import time
7
8
  from abc import abstractmethod
8
- import json
9
+ from datetime import datetime, timezone
10
+ from genericpath import isfile
11
+ from pathlib import Path
12
+ from typing import Annotated, List, Optional
9
13
 
10
- from argparse_prompt import PromptParser
14
+ import folioclient
11
15
  from folio_uuid.folio_namespaces import FOLIONamespaces
12
16
  from folioclient import FolioClient
13
- from folio_migration_tools import library_configuration
17
+ from pydantic import Field
18
+
19
+ from folio_migration_tools import library_configuration, task_configuration
14
20
  from folio_migration_tools.custom_exceptions import (
15
21
  TransformationProcessError,
16
22
  TransformationRecordFailedError,
17
23
  )
24
+ from folio_migration_tools.extradata_writer import ExtradataWriter
18
25
  from folio_migration_tools.folder_structure import FolderStructure
26
+ from folio_migration_tools.marc_rules_transformation.marc_file_processor import (
27
+ MarcFileProcessor,
28
+ )
29
+ from folio_migration_tools.marc_rules_transformation.marc_reader_wrapper import (
30
+ MARCReaderWrapper,
31
+ )
19
32
 
20
33
 
21
34
  class MigrationTaskBase:
@@ -27,18 +40,31 @@ class MigrationTaskBase:
27
40
  def __init__(
28
41
  self,
29
42
  library_configuration: library_configuration.LibraryConfiguration,
30
- task_configuration,
43
+ task_configuration: task_configuration.AbstractTaskConfiguration,
44
+ folio_client: folioclient.FolioClient,
31
45
  use_logging: bool = True,
32
46
  ):
33
-
34
47
  logging.info("MigrationTaskBase init")
48
+ self.start_datetime = datetime.now(timezone.utc)
35
49
  self.task_configuration = task_configuration
36
- self.folio_client: FolioClient = FolioClient(
37
- library_configuration.okapi_url,
38
- library_configuration.tenant_id,
39
- library_configuration.okapi_username,
40
- library_configuration.okapi_password,
50
+ logging.info(self.task_configuration.json(indent=4))
51
+ self.folio_client: FolioClient = folio_client
52
+ self.ecs_tenant_id = (
53
+ task_configuration.ecs_tenant_id or library_configuration.ecs_tenant_id
41
54
  )
55
+ self.ecs_tenant_header = (
56
+ {"x-okapi-tenant": self.ecs_tenant_id} if self.ecs_tenant_id else {}
57
+ )
58
+ self.folio_client.okapi_headers.update(self.ecs_tenant_header)
59
+ self.central_folder_structure: Optional[FolderStructure] = None
60
+ if library_configuration.is_ecs and library_configuration.ecs_central_iteration_identifier:
61
+ self.central_folder_structure = FolderStructure(
62
+ library_configuration.base_folder,
63
+ FOLIONamespaces.instances,
64
+ task_configuration.name,
65
+ library_configuration.ecs_central_iteration_identifier,
66
+ library_configuration.add_time_stamp_to_file_names,
67
+ )
42
68
  self.folder_structure: FolderStructure = FolderStructure(
43
69
  library_configuration.base_folder,
44
70
  self.get_object_type(),
@@ -51,6 +77,8 @@ class MigrationTaskBase:
51
77
  self.object_type = self.get_object_type()
52
78
  try:
53
79
  self.folder_structure.setup_migration_file_structure()
80
+ if self.central_folder_structure:
81
+ self.central_folder_structure.setup_migration_file_structure()
54
82
  # Initiate Worker
55
83
  except FileNotFoundError as fne:
56
84
  logging.error(fne)
@@ -59,6 +87,9 @@ class MigrationTaskBase:
59
87
  logging.critical("Halting...")
60
88
  sys.exit(1)
61
89
  self.num_exeptions: int = 0
90
+ self.extradata_writer = ExtradataWriter(
91
+ self.folder_structure.transformation_extra_data_path
92
+ )
62
93
  if use_logging:
63
94
  self.setup_logging()
64
95
  self.folder_structure.log_folder_structure()
@@ -68,32 +99,118 @@ class MigrationTaskBase:
68
99
  def wrap_up(self):
69
100
  raise NotImplementedError()
70
101
 
102
+ def clean_out_empty_logs(self):
103
+ if (
104
+ self.folder_structure.data_issue_file_path.is_file()
105
+ and os.stat(self.folder_structure.data_issue_file_path).st_size == 0
106
+ ):
107
+ logging.info("Removing data issues file since it is empty")
108
+ os.remove(self.folder_structure.data_issue_file_path)
109
+ logging.info("Removed data issues file since it was empty")
110
+
111
+ if (
112
+ self.folder_structure.failed_marc_recs_file.is_file()
113
+ and os.stat(self.folder_structure.failed_marc_recs_file).st_size == 0
114
+ ):
115
+ os.remove(self.folder_structure.failed_marc_recs_file)
116
+ logging.info("Removed empty failed marc records file since it was empty")
117
+
71
118
  @abstractmethod
72
119
  def do_work(self):
73
120
  raise NotImplementedError
74
121
 
75
122
  @staticmethod
76
- def load_id_map(map_path, raise_if_empty=False):
123
+ def check_source_files(
124
+ source_path: Path, file_defs: list[library_configuration.FileDefinition]
125
+ ) -> None:
126
+ """Lists the source data files. Special case since we use the Items folder for holdings
127
+
128
+ Args:
129
+ source_path (Path): _description_
130
+ file_defs (list[library_configuration.FileDefinition]): _description_
131
+
132
+ Raises:
133
+ TransformationProcessError: _description_
134
+
135
+ """
136
+ files = [
137
+ source_path / f.file_name
138
+ for f in file_defs
139
+ if isfile(source_path / f.file_name)
140
+ ]
141
+ ret_str = ", ".join(f.file_name for f in file_defs)
142
+
143
+ if files and len(files) < len(file_defs):
144
+ raise TransformationProcessError(
145
+ "",
146
+ f"Some files listed in task configuration not found in {source_path}."
147
+ f"Listed files: {ret_str}",
148
+ )
149
+ if not any(files):
150
+ raise TransformationProcessError(
151
+ "",
152
+ f"None of the files listed in task configuration found in {source_path}."
153
+ f"Listed files: {ret_str}",
154
+ )
155
+ logging.info("Files to process:")
156
+ for filename in files:
157
+ logging.info("\t%s", filename)
158
+
159
+ def load_instance_id_map(self, raise_if_empty=True) -> dict:
160
+ """
161
+ This method handles loading instance id maps for holdings and other transformations that require it.
162
+ This is in the base class because multiple tasks need it. It exists because instances in an ECS environment
163
+ are transformed for the central and data tenants separately, but the data tenants need to know about
164
+ the central tenant instance ids. This is a bit of a hack, but it works for now.
165
+ """
166
+ map_files = []
167
+ instance_id_map = {}
168
+ if self.library_configuration.is_ecs and self.central_folder_structure:
169
+ logging.info(
170
+ "Loading ECS central tenant instance id map from %s", self.central_folder_structure.instance_id_map_path
171
+ )
172
+ instance_id_map = self.load_id_map(
173
+ self.central_folder_structure.instance_id_map_path,
174
+ raise_if_empty=False,
175
+ )
176
+ map_files.append(str(self.central_folder_structure.instance_id_map_path))
177
+ logging.info(
178
+ "Loading member tenant isntance id map from %s",
179
+ self.folder_structure.instance_id_map_path
180
+ )
181
+ instance_id_map = self.load_id_map(
182
+ self.folder_structure.instance_id_map_path,
183
+ raise_if_empty=False,
184
+ existing_id_map=instance_id_map,
185
+ )
186
+ map_files.append(str(self.folder_structure.instance_id_map_path))
187
+ if not any(instance_id_map) and raise_if_empty:
188
+ map_file_paths = ", ".join(map_files)
189
+ raise TransformationProcessError("", "Instance id map is empty", map_file_paths)
190
+ return instance_id_map
191
+
192
+ @staticmethod
193
+ def load_id_map(map_path, raise_if_empty=False, existing_id_map={}):
77
194
  if not isfile(map_path):
78
- logging.warn(
195
+ logging.warning(
79
196
  "No legacy id map found at %s. Will build one from scratch", map_path
80
197
  )
81
198
  return {}
82
- id_map = {}
83
- loaded_rows = 0
199
+ id_map = existing_id_map
200
+ loaded_rows = len(id_map)
84
201
  with open(map_path) as id_map_file:
85
202
  for index, json_string in enumerate(id_map_file, start=1):
86
203
  loaded_rows = index
87
204
  # {"legacy_id", "folio_id","suppressed"}
88
- map_object = json.loads(json_string)
89
- if loaded_rows % 50000 == 0:
205
+ map_tuple = json.loads(json_string)
206
+ if loaded_rows % 500000 == 0:
90
207
  print(
91
- f"{loaded_rows + 1} ids loaded to map {map_object['legacy_id']}",
208
+ f"{loaded_rows + 1} ids loaded to map. Last Id: {map_tuple[0]} ",
92
209
  end="\r",
93
210
  )
94
211
 
95
- id_map[map_object["legacy_id"]] = map_object
96
- logging.info("Loaded %s migrated IDs", loaded_rows)
212
+ id_map[map_tuple[0]] = map_tuple
213
+ logging.info("Loaded %s migrated IDs from %s", loaded_rows, id_map_file.name)
97
214
  if not any(id_map) and raise_if_empty:
98
215
  raise TransformationProcessError("", "Legacy id map is empty", map_path)
99
216
  return id_map
@@ -104,15 +221,6 @@ class MigrationTaskBase:
104
221
 
105
222
  def setup_logging(self):
106
223
  debug = self.library_configuration.log_level_debug
107
- DATA_OUTPUT_LVL_NUM = 25
108
- logging.addLevelName(DATA_OUTPUT_LVL_NUM, "DATA_OUTPUT")
109
-
110
- def data_output(self, message, *args, **kws):
111
- if self.isEnabledFor(DATA_OUTPUT_LVL_NUM):
112
- # Yes, logger takes its '*args' as 'args'.
113
- self._log(DATA_OUTPUT_LVL_NUM, message, args, **kws)
114
-
115
- logging.Logger.data_output = data_output
116
224
 
117
225
  DATA_ISSUE_LVL_NUM = 26
118
226
  logging.addLevelName(DATA_ISSUE_LVL_NUM, "DATA_ISSUES")
@@ -123,49 +231,41 @@ class MigrationTaskBase:
123
231
  self._log(DATA_ISSUE_LVL_NUM, message, args, **kws)
124
232
 
125
233
  logging.Logger.data_issues = data_issues
126
-
127
234
  logger = logging.getLogger()
235
+ logger.propogate = True
128
236
  logger.handlers = []
129
237
  formatter = logging.Formatter(
130
- "%(asctime)s\t%(levelname)s\t%(message)s\t%(filename)s:%(lineno)d"
238
+ "%(asctime)s\t%(levelname)s\t%(message)s\t%(task_configuration_name)s"
131
239
  )
132
240
  stream_handler = logging.StreamHandler()
133
- stream_handler.addFilter(ExcludeLevelFilter(25))
134
241
  stream_handler.addFilter(ExcludeLevelFilter(26))
135
-
242
+ stream_handler.addFilter(TaskNameFilter(self.task_configuration.name))
136
243
  if debug:
137
244
  logger.setLevel(logging.DEBUG)
138
245
  stream_handler.setLevel(logging.DEBUG)
246
+ logging.getLogger("httpx").setLevel(logging.DEBUG)
139
247
  else:
140
248
  logger.setLevel(logging.INFO)
141
249
  stream_handler.setLevel(logging.INFO)
142
250
  stream_handler.addFilter(
143
251
  ExcludeLevelFilter(30)
144
- ) # Loose warnings from pymarc
252
+ ) # Exclude warnings from pymarc
145
253
  stream_handler.setFormatter(formatter)
146
254
  logger.addHandler(stream_handler)
147
255
 
148
- file_formatter = logging.Formatter("%(message)s")
256
+ file_formatter = logging.Formatter(
257
+ "%(asctime)s\t%(message)s\t%(task_configuration_name)s\t%(filename)s:%(lineno)d"
258
+ )
149
259
  file_handler = logging.FileHandler(
150
260
  filename=self.folder_structure.transformation_log_path, mode="w"
151
261
  )
152
- file_handler.addFilter(ExcludeLevelFilter(25))
153
262
  file_handler.addFilter(ExcludeLevelFilter(26))
263
+ file_handler.addFilter(TaskNameFilter(self.task_configuration.name))
154
264
  # file_handler.addFilter(LevelFilter(0, 20))
155
265
  file_handler.setFormatter(file_formatter)
156
266
  file_handler.setLevel(logging.INFO)
157
267
  logging.getLogger().addHandler(file_handler)
158
268
 
159
- # Data file formatter
160
- data_file_formatter = logging.Formatter("%(message)s")
161
- data_file_handler = logging.FileHandler(
162
- filename=str(self.folder_structure.transformation_extra_data_path), mode="w"
163
- )
164
- data_file_handler.addFilter(LevelFilter(25))
165
- data_file_handler.setFormatter(data_file_formatter)
166
- data_file_handler.setLevel(25)
167
- logging.getLogger().addHandler(data_file_handler)
168
-
169
269
  # Data issue file formatter
170
270
  data_issue_file_formatter = logging.Formatter("%(message)s")
171
271
  data_issue_file_handler = logging.FileHandler(
@@ -180,26 +280,19 @@ class MigrationTaskBase:
180
280
  def setup_records_map(self, mapping_file_path):
181
281
  with open(mapping_file_path) as mapping_file:
182
282
  field_map = json.load(mapping_file)
183
- logging.info("%s fields in mapping file map", len(field_map["data"]))
283
+ logging.info(
284
+ "%s fields present in record mapping file", len(field_map["data"])
285
+ )
184
286
  mapped_fields = (
185
287
  f
186
288
  for f in field_map["data"]
187
289
  if f["legacy_field"] and f["legacy_field"] != "Not mapped"
188
290
  )
189
291
  logging.info(
190
- "%s Mapped fields in mapping file map", len(list(mapped_fields))
292
+ "%s fields mapped in record mapping file", len(list(mapped_fields))
191
293
  )
192
294
  return field_map
193
295
 
194
- @staticmethod
195
- def add_common_arguments(parser: PromptParser):
196
-
197
- """parser.add_argument("okapi_url", help="OKAPI base url")
198
- parser.add_argument("tenant_id", help="id of the FOLIO tenant.")
199
- parser.add_argument("username", help="the api user")
200
- parser.add_argument("base_folder", help="path base folder", type=str)
201
- parser.add_argument("--password", help="the api users password", secure=True)"""
202
-
203
296
  def log_and_exit_if_too_many_errors(
204
297
  self, error: TransformationRecordFailedError, idx
205
298
  ):
@@ -222,52 +315,239 @@ class MigrationTaskBase:
222
315
  f"{num_processed:,} records processed. Recs/sec: {elapsed_formatted} "
223
316
  )
224
317
 
225
- def load_ref_data_mapping_file(
318
+ def do_work_marc_transformer(
226
319
  self,
320
+ ):
321
+ logging.info("Starting....")
322
+ if self.folder_structure.failed_marc_recs_file.is_file():
323
+ os.remove(self.folder_structure.failed_marc_recs_file)
324
+ logging.info("Removed failed marc records file to prevent duplicating data")
325
+ with open(
326
+ self.folder_structure.created_objects_path, "w+"
327
+ ) as created_records_file:
328
+ self.processor = MarcFileProcessor(
329
+ self.mapper, self.folder_structure, created_records_file
330
+ )
331
+ for file_def in self.task_configuration.files:
332
+ MARCReaderWrapper.process_single_file(
333
+ file_def,
334
+ self.processor,
335
+ self.folder_structure.failed_marc_recs_file,
336
+ self.folder_structure,
337
+ )
338
+
339
+ @staticmethod
340
+ def validate_ref_data_mapping_lines(lines, num_of_columns):
341
+ """
342
+ Helper method to validate the structure of individual lines in a mapping file.
343
+
344
+ Args:
345
+ lines (list): List of lines in the mapping file
346
+ num_of_columns (int): Number of columns expected in each line
347
+
348
+ Returns:
349
+ tuple: A tuple containing a list of invalid lines and a list of valid lines
350
+ """
351
+ invalid_lines = []
352
+ valid_lines = []
353
+ for idx, row in enumerate(lines, start=2):
354
+ if not row.strip():
355
+ if idx == len(lines) + 1:
356
+ continue
357
+ else:
358
+ invalid_lines.append(str(idx))
359
+ else:
360
+ line_length = len(row.split("\t"))
361
+ if line_length != num_of_columns:
362
+ invalid_lines.append(str(idx))
363
+ else:
364
+ valid_lines.append(str(idx))
365
+ return invalid_lines, valid_lines
366
+
367
+ @staticmethod
368
+ def verify_ref_data_mapping_file_structure(map_file: io.TextIOBase):
369
+ """
370
+ Helper method to validate the structure of a mapping file.
371
+
372
+ Args:
373
+ map_file (io.TextIOBase): The mapping file to validate
374
+
375
+ Raises:
376
+ TransformationProcessError: If the mapping file has rows with different number of columns
377
+
378
+ Returns:
379
+ None
380
+ """
381
+ current_pos = map_file.tell()
382
+ try:
383
+ map_file.seek(0)
384
+ num_of_columns = len(map_file.readline().split("\t"))
385
+ lines = map_file.readlines()
386
+ invalid_lines, valid_lines = MigrationTaskBase.validate_ref_data_mapping_lines(
387
+ lines, num_of_columns
388
+ )
389
+ if invalid_lines:
390
+ raise TransformationProcessError(
391
+ "",
392
+ (
393
+ f"Mapping file {map_file.name} has rows with different number "
394
+ f"of columns ({'Row' if len(invalid_lines) == 1 else 'Rows'} {', '.join(invalid_lines)})"
395
+ ),
396
+ )
397
+ if not valid_lines:
398
+ raise TransformationProcessError(
399
+ "", f"Map has no rows: {map_file.name}"
400
+ )
401
+ finally:
402
+ map_file.seek(current_pos)
403
+
404
+ @staticmethod
405
+ def load_ref_data_mapping_file(
227
406
  folio_property_name: str,
228
407
  map_file_path: Path,
229
408
  folio_keys,
230
409
  required: bool = True,
231
410
  ):
411
+ """
412
+ Helper method to load a reference data mapping file.
413
+
414
+ Args:
415
+ folio_property_name (str): The name of the property in FOLIO
416
+ map_file_path (Path): The path to the mapping file
417
+ folio_keys (list): A list of FOLIO keys
418
+ required (bool): Whether the property is required or not
419
+ """
232
420
  if (
233
- folio_property_name in folio_keys
234
- or required
235
- or folio_property_name.startswith("statisticalCodeIds")
421
+ (
422
+ folio_property_name in folio_keys
423
+ or required
424
+ or folio_property_name.startswith("statisticalCodeIds")
425
+ or folio_property_name.startswith("locationMap")
426
+ or folio_property_name.startswith("fundsMap")
427
+ )
428
+ and map_file_path.is_file()
236
429
  ):
237
430
  try:
238
431
  with open(map_file_path) as map_file:
432
+ # Validate the structure of the mapping file
433
+ MigrationTaskBase.verify_ref_data_mapping_file_structure(map_file)
239
434
  ref_data_map = list(csv.DictReader(map_file, dialect="tsv"))
240
435
  logging.info(
241
436
  "Found %s rows in %s map",
242
437
  len(ref_data_map),
243
438
  folio_property_name,
244
439
  )
440
+ if not any(ref_data_map[0].keys()):
441
+ raise TransformationProcessError(
442
+ "",
443
+ (
444
+ f"{folio_property_name} not mapped in legacy->folio mapping file "
445
+ f"({map_file_path}). Did you map this field, "
446
+ "but forgot to add a mapping file?"
447
+ ),
448
+ )
245
449
  logging.info(
246
- "%s will be used for determinig %s",
247
- ",".join(ref_data_map[0].keys()),
450
+ "%s will be used for determining %s",
451
+ ", ".join(ref_data_map[0].keys()),
248
452
  folio_property_name,
249
453
  )
250
454
  return ref_data_map
251
455
  except Exception as exception:
252
- raise TransformationProcessError(
253
- "",
254
- (
255
- f"{folio_property_name} not mapped in legacy->folio mapping file "
256
- f"({map_file_path}) ({exception}). Did you map this field, "
257
- "but forgot to add a mapping file?"
258
- ),
259
- ) from exception
456
+ raise exception
457
+
260
458
  else:
261
459
  logging.info("No mapping setup for %s", folio_property_name)
262
460
  logging.info("%s will have default mapping if any ", folio_property_name)
263
461
  logging.info(
264
- "Add a file named %s and add the field to "
265
- "the item.mapping.json file.",
462
+ "Add a file named %s and add the field to the field mapping json file.",
266
463
  map_file_path,
267
464
  )
268
465
  return None
269
466
 
270
467
 
468
+ class MarcTaskConfigurationBase(task_configuration.AbstractTaskConfiguration):
469
+ """
470
+ Base class for MARC task configurations.
471
+
472
+ Attributes:
473
+ files (List[library_configuration.FileDefinition]):
474
+ List of MARC21 files to be processed.
475
+
476
+ create_source_records (bool):
477
+ Controls whether or not to retain the MARC records in Source Record Storage.
478
+ Default is False, meaning MARC records will not be retained.
479
+
480
+ hrid_handling (library_configuration.HridHandling):
481
+ Determines how HRIDs are handled.
482
+ - 'default': FOLIO generates HRIDs and moves existing 001 fields into a 035 field, concatenated with the 003 field.
483
+ - 'preserve001': Keeps the 001 fields in place and uses them as HRIDs.
484
+ Default is 'default'.
485
+
486
+ deactivate035_from001 (bool):
487
+ Disables the default FOLIO functionality of moving the previous 001 field into a 035 field, prefixed with the value from 003.
488
+ Default is False, meaning the functionality remains active.
489
+ """
490
+
491
+ files: Annotated[
492
+ List[library_configuration.FileDefinition],
493
+ Field(
494
+ title="Source files",
495
+ description=("List of MARC21 files with bibliographic records."),
496
+ ),
497
+ ]
498
+ create_source_records: Annotated[
499
+ bool,
500
+ Field(
501
+ title="Create source records",
502
+ description=(
503
+ "Controls whether or not to retain the MARC records in "
504
+ "Source Record Storage."
505
+ ),
506
+ ),
507
+ ] = False
508
+ hrid_handling: Annotated[
509
+ library_configuration.HridHandling,
510
+ Field(
511
+ title="HRID Handling",
512
+ description=(
513
+ "Setting to default will make FOLIO generate HRIDs and move the existing "
514
+ "001:s into a 035, concatenated with the 003. Choosing preserve001 means "
515
+ "the 001:s will remain in place, and that they will also become the HRIDs"
516
+ ),
517
+ ),
518
+ ] = library_configuration.HridHandling.default
519
+ deactivate035_from001: Annotated[
520
+ bool,
521
+ Field(
522
+ title="Create 035 from 001 and 003",
523
+ description=(
524
+ "This deactivates the FOLIO default functionality of moving the previous 001 "
525
+ "into a 035, prefixed with the value from 003"
526
+ ),
527
+ ),
528
+ ] = False
529
+ statistical_codes_map_file_name: Annotated[
530
+ Optional[str],
531
+ Field(
532
+ title="Statistical code map file name",
533
+ description=(
534
+ "Path to the file containing the mapping of statistical codes. "
535
+ "The file should be in TSV format with legacy_stat_code and folio_code columns."
536
+ ),
537
+ ),
538
+ ] = ""
539
+ statistical_code_mapping_fields: Annotated[
540
+ List[str],
541
+ Field(
542
+ title="Statistical code mapping fields",
543
+ description=(
544
+ "List of fields + subfields to be used for mapping statistical codes. "
545
+ "Subfields should be delimited by a \"$\" (eg. 907$a). Single repeating subfields "
546
+ "will be treated as unique values. Multiple subfields will be concatenated together with a space."
547
+ ),
548
+ ),
549
+ ] = []
550
+
271
551
  class ExcludeLevelFilter(logging.Filter):
272
552
  def __init__(self, level):
273
553
  super().__init__()
@@ -277,6 +557,16 @@ class ExcludeLevelFilter(logging.Filter):
277
557
  return record.levelno != self.level
278
558
 
279
559
 
560
+ class TaskNameFilter(logging.Filter):
561
+ def __init__(self, task_configuration_name):
562
+ super().__init__()
563
+ self.task_configuration_name = task_configuration_name
564
+
565
+ def filter(self, record):
566
+ record.task_configuration_name = self.task_configuration_name
567
+ return True
568
+
569
+
280
570
  class LevelFilter(logging.Filter):
281
571
  def __init__(self, level):
282
572
  super().__init__()