folio-migration-tools 1.9.0rc2__py3-none-any.whl → 1.9.0rc4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. folio_migration_tools/mapping_file_transformation/holdings_mapper.py +1 -1
  2. folio_migration_tools/mapping_file_transformation/order_mapper.py +8 -4
  3. folio_migration_tools/marc_rules_transformation/rules_mapper_holdings.py +22 -2
  4. folio_migration_tools/migration_tasks/batch_poster.py +143 -26
  5. folio_migration_tools/migration_tasks/courses_migrator.py +54 -8
  6. folio_migration_tools/migration_tasks/holdings_csv_transformer.py +102 -14
  7. folio_migration_tools/migration_tasks/holdings_marc_transformer.py +46 -4
  8. folio_migration_tools/migration_tasks/items_transformer.py +133 -20
  9. folio_migration_tools/migration_tasks/loans_migrator.py +61 -9
  10. folio_migration_tools/migration_tasks/migration_task_base.py +104 -11
  11. folio_migration_tools/migration_tasks/orders_transformer.py +107 -14
  12. folio_migration_tools/migration_tasks/organization_transformer.py +79 -14
  13. folio_migration_tools/migration_tasks/requests_migrator.py +56 -7
  14. folio_migration_tools/migration_tasks/reserves_migrator.py +26 -4
  15. folio_migration_tools/migration_tasks/user_transformer.py +88 -18
  16. folio_migration_tools/task_configuration.py +2 -2
  17. folio_migration_tools/transaction_migration/legacy_loan.py +13 -1
  18. folio_migration_tools/transaction_migration/legacy_reserve.py +3 -5
  19. {folio_migration_tools-1.9.0rc2.dist-info → folio_migration_tools-1.9.0rc4.dist-info}/METADATA +1 -1
  20. {folio_migration_tools-1.9.0rc2.dist-info → folio_migration_tools-1.9.0rc4.dist-info}/RECORD +23 -23
  21. {folio_migration_tools-1.9.0rc2.dist-info → folio_migration_tools-1.9.0rc4.dist-info}/WHEEL +1 -1
  22. {folio_migration_tools-1.9.0rc2.dist-info → folio_migration_tools-1.9.0rc4.dist-info}/LICENSE +0 -0
  23. {folio_migration_tools-1.9.0rc2.dist-info → folio_migration_tools-1.9.0rc4.dist-info}/entry_points.txt +0 -0
@@ -1,4 +1,3 @@
1
- '''Main "script."'''
2
1
  import csv
3
2
  import ctypes
4
3
  import json
@@ -36,26 +35,136 @@ csv.field_size_limit(int(ctypes.c_ulong(-1).value // 2))
36
35
 
37
36
  class ItemsTransformer(MigrationTaskBase):
38
37
  class TaskConfiguration(AbstractTaskConfiguration):
39
- name: str
40
- migration_task_type: str
41
- hrid_handling: HridHandling
42
- files: List[FileDefinition]
43
- items_mapping_file_name: str
44
- location_map_file_name: str
45
- default_call_number_type_name: str
46
- temp_location_map_file_name: Optional[str] = ""
47
- material_types_map_file_name: str
48
- loan_types_map_file_name: str
49
- temp_loan_types_map_file_name: Optional[str] = ""
50
- statistical_codes_map_file_name: Optional[str] = ""
51
- item_statuses_map_file_name: str
52
- call_number_type_map_file_name: str
53
- reset_hrid_settings: Optional[bool] = False
38
+ name: Annotated[
39
+ str,
40
+ Field(
41
+ title="Task name",
42
+ description="Name of the task.",
43
+ ),
44
+ ]
45
+ migration_task_type: Annotated[
46
+ str,
47
+ Field(
48
+ title="Migration task type",
49
+ description="Type of migration task.",
50
+ ),
51
+ ]
52
+ hrid_handling: Annotated[
53
+ HridHandling,
54
+ Field(
55
+ title="HRID handling",
56
+ description=(
57
+ "Determining how the HRID generation "
58
+ "should be handled."
59
+ ),
60
+ ),
61
+ ]
62
+ files: Annotated[
63
+ List[FileDefinition],
64
+ Field(
65
+ title="Files",
66
+ description="List of files.",
67
+ ),
68
+ ]
69
+ items_mapping_file_name: Annotated[
70
+ str,
71
+ Field(
72
+ title="Items mapping file name",
73
+ description="File name for items mapping.",
74
+ ),
75
+ ]
76
+ location_map_file_name: Annotated[
77
+ str,
78
+ Field(
79
+ title="Location map file name",
80
+ description="File name for location map.",
81
+ ),
82
+ ]
83
+ default_call_number_type_name: Annotated[
84
+ str,
85
+ Field(
86
+ title="Default call number type name",
87
+ description="Default name for call number type.",
88
+ ),
89
+ ]
90
+ temp_location_map_file_name: Annotated[
91
+ Optional[str],
92
+ Field(
93
+ title="Temporary location map file name",
94
+ description=(
95
+ "Temporary file name for location map. "
96
+ "Empty string by default."
97
+ ),
98
+ ),
99
+ ] = ""
100
+ material_types_map_file_name: Annotated[
101
+ str,
102
+ Field(
103
+ title="Material types map file name",
104
+ description="File name for material types map.",
105
+ ),
106
+ ]
107
+ loan_types_map_file_name: Annotated[
108
+ str,
109
+ Field(
110
+ title="Loan types map file name",
111
+ description="File name for loan types map.",
112
+ ),
113
+ ]
114
+ temp_loan_types_map_file_name: Annotated[
115
+ Optional[str],
116
+ Field(
117
+ title="Temporary loan types map file name",
118
+ description=(
119
+ "File name for temporary loan types map. "
120
+ "Empty string by default."
121
+ ),
122
+ ),
123
+ ] = ""
124
+ statistical_codes_map_file_name: Annotated[
125
+ Optional[str],
126
+ Field(
127
+ title="Statistical codes map file name",
128
+ description=(
129
+ "File name for statistical codes map. "
130
+ "Empty string by default."
131
+ ),
132
+ ),
133
+ ] = ""
134
+ item_statuses_map_file_name: Annotated[
135
+ str,
136
+ Field(
137
+ title="Item statuses map file name",
138
+ description="File name for item statuses map.",
139
+ ),
140
+ ]
141
+ call_number_type_map_file_name: Annotated[
142
+ str,
143
+ Field(
144
+ title="Call number type map file name",
145
+ description="File name for call number type map.",
146
+ ),
147
+ ]
148
+ reset_hrid_settings: Annotated[
149
+ Optional[bool],
150
+ Field(
151
+ title="Reset HRID settings",
152
+ description=(
153
+ "At the end of the run "
154
+ "reset FOLIO with the HRID settings. "
155
+ "By default is False."
156
+ ),
157
+ ),
158
+ ] = False
54
159
  update_hrid_settings: Annotated[
55
160
  bool,
56
161
  Field(
57
162
  title="Update HRID settings",
58
- description="At the end of the run, update FOLIO with the HRID settings",
163
+ description=(
164
+ "At the end of the run "
165
+ "update FOLIO with the HRID settings. "
166
+ "By default is True."
167
+ ),
59
168
  ),
60
169
  ] = True
61
170
  boundwith_relationship_file_path: Annotated[
@@ -63,8 +172,10 @@ class ItemsTransformer(MigrationTaskBase):
63
172
  Field(
64
173
  title="Boundwith relationship file path",
65
174
  description=(
66
- "Path to a file outlining Boundwith relationships, in the style of Voyager."
67
- " A TSV file with MFHD_ID and BIB_ID headers and values"
175
+ "Path to a file outlining Boundwith relationships, "
176
+ "in the style of Voyager. "
177
+ "A TSV file with MFHD_ID and BIB_ID headers and values. "
178
+ "By default is empty string."
68
179
  ),
69
180
  ),
70
181
  ] = ""
@@ -73,7 +184,9 @@ class ItemsTransformer(MigrationTaskBase):
73
184
  Field(
74
185
  title="Prevent permanent location map default",
75
186
  description=(
76
- "Prevent the default mapping of permanent location to the default location."
187
+ "Prevent the default mapping of permanent location "
188
+ "to the default location. "
189
+ "By default is False."
77
190
  ),
78
191
  ),
79
192
  ] = False
@@ -6,9 +6,10 @@ import sys
6
6
  import time
7
7
  import traceback
8
8
  from datetime import datetime, timedelta
9
- from typing import Optional
9
+ from typing import Annotated, Optional
10
10
  from urllib.error import HTTPError
11
11
  from zoneinfo import ZoneInfo
12
+ from pydantic import Field
12
13
 
13
14
  import i18n
14
15
  from dateutil import parser as du_parser
@@ -35,13 +36,64 @@ from folio_migration_tools.transaction_migration.transaction_result import (
35
36
 
36
37
  class LoansMigrator(MigrationTaskBase):
37
38
  class TaskConfiguration(AbstractTaskConfiguration):
38
- name: str
39
- migration_task_type: str
40
- open_loans_files: list[FileDefinition]
41
- fallback_service_point_id: str
42
- starting_row: Optional[int] = 1
43
- item_files: Optional[list[FileDefinition]] = []
44
- patron_files: Optional[list[FileDefinition]] = []
39
+ name: Annotated[
40
+ str,
41
+ Field(
42
+ title="Task name",
43
+ description="The name of the task.",
44
+ ),
45
+ ]
46
+ migration_task_type: Annotated[
47
+ str,
48
+ Field(
49
+ title="Migration task type",
50
+ description="The type of the migration task.",
51
+ ),
52
+ ]
53
+ open_loans_files: Annotated[
54
+ Optional[list[FileDefinition]],
55
+ Field(
56
+ title="Open loans files",
57
+ description="List of files containing open loan data."
58
+ ),
59
+ ]
60
+ fallback_service_point_id: Annotated[
61
+ str,
62
+ Field(
63
+ title="Fallback service point ID",
64
+ description="Identifier of the fallback service point.",
65
+ ),
66
+ ]
67
+ starting_row: Annotated[
68
+ Optional[int],
69
+ Field(
70
+ title="Starting row",
71
+ description=(
72
+ "The starting row for data processing. "
73
+ "By default is 1."
74
+ ),
75
+ ),
76
+ ] = 1
77
+ item_files: Annotated[
78
+ Optional[list[FileDefinition]],
79
+ Field(
80
+ title="Item files",
81
+ description=(
82
+ "List of files containing item data. "
83
+ "By default is empty list."
84
+ ),
85
+ ),
86
+ ] = []
87
+ patron_files: Annotated[
88
+ Optional[list[FileDefinition]],
89
+ Field(
90
+ title="Patron files",
91
+ description=(
92
+ "List of files containing patron data. "
93
+ "By default is empty list."
94
+ ),
95
+ ),
96
+ ] = []
45
97
 
46
98
  @staticmethod
47
99
  def get_object_type() -> FOLIONamespaces:
@@ -729,7 +781,7 @@ def timings(t0, t0func, num_objects):
729
781
 
730
782
 
731
783
  def print_smtp_warning():
732
- s = """
784
+ s = r"""
733
785
  _____ __ __ _____ ______ ___
734
786
  / ____| | \/ | |_ _| | __ | |__ \\
735
787
  | (___ | \ / | | | | |__|_| ) |
@@ -1,4 +1,5 @@
1
1
  import csv
2
+ import io
2
3
  import json
3
4
  import logging
4
5
  import os
@@ -119,7 +120,11 @@ class MigrationTaskBase:
119
120
  TransformationProcessError: _description_
120
121
 
121
122
  """
122
- files = [source_path / f.file_name for f in file_defs if isfile(source_path / f.file_name)]
123
+ files = [
124
+ source_path / f.file_name
125
+ for f in file_defs
126
+ if isfile(source_path / f.file_name)
127
+ ]
123
128
  ret_str = ", ".join(f.file_name for f in file_defs)
124
129
 
125
130
  if files and len(files) < len(file_defs):
@@ -141,7 +146,9 @@ class MigrationTaskBase:
141
146
  @staticmethod
142
147
  def load_id_map(map_path, raise_if_empty=False):
143
148
  if not isfile(map_path):
144
- logging.warn("No legacy id map found at %s. Will build one from scratch", map_path)
149
+ logging.warn(
150
+ "No legacy id map found at %s. Will build one from scratch", map_path
151
+ )
145
152
  return {}
146
153
  id_map = {}
147
154
  loaded_rows = 0
@@ -194,7 +201,9 @@ class MigrationTaskBase:
194
201
  else:
195
202
  logger.setLevel(logging.INFO)
196
203
  stream_handler.setLevel(logging.INFO)
197
- stream_handler.addFilter(ExcludeLevelFilter(30)) # Exclude warnings from pymarc
204
+ stream_handler.addFilter(
205
+ ExcludeLevelFilter(30)
206
+ ) # Exclude warnings from pymarc
198
207
  stream_handler.setFormatter(formatter)
199
208
  logger.addHandler(stream_handler)
200
209
 
@@ -225,16 +234,22 @@ class MigrationTaskBase:
225
234
  def setup_records_map(self, mapping_file_path):
226
235
  with open(mapping_file_path) as mapping_file:
227
236
  field_map = json.load(mapping_file)
228
- logging.info("%s fields present in record mapping file", len(field_map["data"]))
237
+ logging.info(
238
+ "%s fields present in record mapping file", len(field_map["data"])
239
+ )
229
240
  mapped_fields = (
230
241
  f
231
242
  for f in field_map["data"]
232
243
  if f["legacy_field"] and f["legacy_field"] != "Not mapped"
233
244
  )
234
- logging.info("%s fields mapped in record mapping file", len(list(mapped_fields)))
245
+ logging.info(
246
+ "%s fields mapped in record mapping file", len(list(mapped_fields))
247
+ )
235
248
  return field_map
236
249
 
237
- def log_and_exit_if_too_many_errors(self, error: TransformationRecordFailedError, idx):
250
+ def log_and_exit_if_too_many_errors(
251
+ self, error: TransformationRecordFailedError, idx
252
+ ):
238
253
  self.num_exeptions += 1
239
254
  error.log_it()
240
255
  if self.num_exeptions / (1 + idx) > 0.2 and self.num_exeptions > 5000:
@@ -250,7 +265,9 @@ class MigrationTaskBase:
250
265
  if num_processed > 1 and num_processed % 10000 == 0:
251
266
  elapsed = num_processed / (time.time() - start_time)
252
267
  elapsed_formatted = "{0:.4g}".format(elapsed)
253
- logging.info(f"{num_processed:,} records processed. Recs/sec: {elapsed_formatted} ")
268
+ logging.info(
269
+ f"{num_processed:,} records processed. Recs/sec: {elapsed_formatted} "
270
+ )
254
271
 
255
272
  def do_work_marc_transformer(
256
273
  self,
@@ -259,7 +276,9 @@ class MigrationTaskBase:
259
276
  if self.folder_structure.failed_marc_recs_file.is_file():
260
277
  os.remove(self.folder_structure.failed_marc_recs_file)
261
278
  logging.info("Removed failed marc records file to prevent duplicating data")
262
- with open(self.folder_structure.created_objects_path, "w+") as created_records_file:
279
+ with open(
280
+ self.folder_structure.created_objects_path, "w+"
281
+ ) as created_records_file:
263
282
  self.processor = MarcFileProcessor(
264
283
  self.mapper, self.folder_structure, created_records_file
265
284
  )
@@ -271,13 +290,87 @@ class MigrationTaskBase:
271
290
  self.folder_structure,
272
291
  )
273
292
 
293
+ @staticmethod
294
+ def validate_ref_data_mapping_lines(lines, num_of_columns):
295
+ """
296
+ Helper method to validate the structure of individual lines in a mapping file.
297
+
298
+ Args:
299
+ lines (list): List of lines in the mapping file
300
+ num_of_columns (int): Number of columns expected in each line
301
+
302
+ Returns:
303
+ tuple: A tuple containing a list of invalid lines and a list of valid lines
304
+ """
305
+ invalid_lines = []
306
+ valid_lines = []
307
+ for idx, row in enumerate(lines, start=2):
308
+ if not row.strip():
309
+ if idx == len(lines) + 1:
310
+ continue
311
+ else:
312
+ invalid_lines.append(str(idx))
313
+ else:
314
+ line_length = len(row.split("\t"))
315
+ if line_length != num_of_columns:
316
+ invalid_lines.append(str(idx))
317
+ else:
318
+ valid_lines.append(str(idx))
319
+ return invalid_lines, valid_lines
320
+
321
+ @staticmethod
322
+ def verify_ref_data_mapping_file_structure(map_file: io.TextIOBase):
323
+ """
324
+ Helper method to validate the structure of a mapping file.
325
+
326
+ Args:
327
+ map_file (io.TextIOBase): The mapping file to validate
328
+
329
+ Raises:
330
+ TransformationProcessError: If the mapping file has rows with different number of columns
331
+
332
+ Returns:
333
+ None
334
+ """
335
+ current_pos = map_file.tell()
336
+ try:
337
+ map_file.seek(0)
338
+ num_of_columns = len(map_file.readline().split("\t"))
339
+ lines = map_file.readlines()
340
+ invalid_lines, valid_lines = MigrationTaskBase.validate_ref_data_mapping_lines(
341
+ lines, num_of_columns
342
+ )
343
+ if invalid_lines:
344
+ raise TransformationProcessError(
345
+ "",
346
+ (
347
+ f"Mapping file {map_file.name} has rows with different number "
348
+ f"of columns ({'Row' if len(invalid_lines) == 1 else 'Rows'} {', '.join(invalid_lines)})"
349
+ ),
350
+ )
351
+ if not valid_lines:
352
+ raise TransformationProcessError(
353
+ "", f"Map has no rows: {map_file.name}"
354
+ )
355
+ finally:
356
+ map_file.seek(current_pos)
357
+
358
+ @staticmethod
274
359
  def load_ref_data_mapping_file(
275
- self,
276
360
  folio_property_name: str,
277
361
  map_file_path: Path,
278
362
  folio_keys,
279
363
  required: bool = True,
280
364
  ):
365
+ """
366
+ Helper method to load a reference data mapping file.
367
+
368
+ Args:
369
+ folio_property_name (str): The name of the property in FOLIO
370
+ map_file_path (Path): The path to the mapping file
371
+ folio_keys (list): A list of FOLIO keys
372
+ required (bool): Whether the property is required or not
373
+ """
281
374
  if (
282
375
  folio_property_name in folio_keys
283
376
  or required
@@ -286,9 +379,9 @@ class MigrationTaskBase:
286
379
  ):
287
380
  try:
288
381
  with open(map_file_path) as map_file:
382
+ # Validate the structure of the mapping file
383
+ MigrationTaskBase.verify_ref_data_mapping_file_structure(map_file)
289
384
  ref_data_map = list(csv.DictReader(map_file, dialect="tsv"))
290
- if not ref_data_map:
291
- raise TransformationProcessError("", f"Map has no rows: {map_file_path}")
292
385
  logging.info(
293
386
  "Found %s rows in %s map",
294
387
  len(ref_data_map),
@@ -5,7 +5,8 @@ import logging
5
5
  import sys
6
6
  import time
7
7
  from os.path import isfile
8
- from typing import List, Optional
8
+ from typing import List, Optional, Annotated
9
+ from pydantic import Field
9
10
 
10
11
  import i18n
11
12
  from deepdiff import DeepDiff
@@ -26,7 +27,9 @@ from folio_migration_tools.mapping_file_transformation.mapping_file_mapper_base
26
27
  from folio_migration_tools.mapping_file_transformation.order_mapper import (
27
28
  CompositeOrderMapper,
28
29
  )
29
- from folio_migration_tools.migration_tasks.migration_task_base import MigrationTaskBase
30
+ from folio_migration_tools.migration_tasks.migration_task_base import (
31
+ MigrationTaskBase,
32
+ )
30
33
  from folio_migration_tools.task_configuration import AbstractTaskConfiguration
31
34
 
32
35
  csv.field_size_limit(int(ctypes.c_ulong(-1).value // 2))
@@ -35,18 +38,108 @@ csv.field_size_limit(int(ctypes.c_ulong(-1).value // 2))
35
38
  # Read files and do some work
36
39
  class OrdersTransformer(MigrationTaskBase):
37
40
  class TaskConfiguration(AbstractTaskConfiguration):
38
- name: str
39
- migration_task_type: str
40
- files: List[FileDefinition]
41
- orders_mapping_file_name: str
42
- organizations_code_map_file_name: str
43
- acquisition_method_map_file_name: str
44
- payment_status_map_file_name: Optional[str] = ""
45
- receipt_status_map_file_name: Optional[str] = ""
46
- workflow_status_map_file_name: Optional[str] = ""
47
- location_map_file_name: Optional[str] = ""
48
- funds_map_file_name: Optional[str] = ""
49
- funds_expense_class_map_file_name: Optional[str] = ""
41
+ name: Annotated[
42
+ str,
43
+ Field(
44
+ title="Task name",
45
+ description="The name of the task.",
46
+ ),
47
+ ]
48
+ migration_task_type: Annotated[
49
+ str,
50
+ Field(
51
+ title="Migration task type",
52
+ description="Type of the migration task.",
53
+ ),
54
+ ]
55
+ files: Annotated[
56
+ List[FileDefinition],
57
+ Field(
58
+ title="Files",
59
+ description="List of the files.",
60
+ ),
61
+ ]
62
+ orders_mapping_file_name: Annotated[
63
+ str,
64
+ Field(
65
+ title="Orders Mapping File Name",
66
+ description="File name for orders mapping.",
67
+ ),
68
+ ]
69
+ organizations_code_map_file_name: Annotated[
70
+ str,
71
+ Field(
72
+ title="Organizations Code Map File Name",
73
+ description="File name for organizations code mapping.",
74
+ ),
75
+ ]
76
+ acquisition_method_map_file_name: Annotated[
77
+ str,
78
+ Field(
79
+ title="Acquisition Method Map File Name",
80
+ description="File name for acquisition method mapping.",
81
+ ),
82
+ ]
83
+ payment_status_map_file_name: Annotated[
84
+ Optional[str],
85
+ Field(
86
+ title="Payment Status Map File Name",
87
+ description=(
88
+ "File name for payment status mapping. "
89
+ "By default is empty string."
90
+ ),
91
+ ),
92
+ ] = ""
93
+ receipt_status_map_file_name: Annotated[
94
+ Optional[str],
95
+ Field(
96
+ title="Receipt Status Map File Name",
97
+ description=(
98
+ "File name for receipt status mapping. "
99
+ "By default is empty string."
100
+ ),
101
+ ),
102
+ ] = ""
103
+ workflow_status_map_file_name: Annotated[
104
+ Optional[str],
105
+ Field(
106
+ title="Workflow Status Map File Name",
107
+ description=(
108
+ "File name for workflow status mapping. "
109
+ "By default is empty string."
110
+ ),
111
+ ),
112
+ ] = ""
113
+ location_map_file_name: Annotated[
114
+ Optional[str],
115
+ Field(
116
+ title="Location Map File Name",
117
+ description=(
118
+ "File name for location mapping. "
119
+ "By default is empty string."
120
+ ),
121
+ ),
122
+ ] = ""
123
+ funds_map_file_name: Annotated[
124
+ Optional[str],
125
+ Field(
126
+ title="Funds Map File Name",
127
+ description=(
128
+ "File name for funds mapping. "
129
+ "By default is empty string."
130
+ ),
131
+ ),
132
+ ] = ""
133
+ funds_expense_class_map_file_name: Annotated[
134
+ Optional[str],
135
+ Field(
136
+ title="Funds Expense Class Map File Name",
137
+ description=(
138
+ "File name for funds expense class mapping. "
139
+ "By default is empty string."
140
+ ),
141
+ ),
142
+ ] = ""
50
143
 
51
144
  @staticmethod
52
145
  def get_object_type() -> FOLIONamespaces: