folio-migration-tools 1.2.1__py3-none-any.whl → 1.9.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. folio_migration_tools/__init__.py +11 -0
  2. folio_migration_tools/__main__.py +169 -85
  3. folio_migration_tools/circulation_helper.py +96 -59
  4. folio_migration_tools/config_file_load.py +66 -0
  5. folio_migration_tools/custom_dict.py +6 -4
  6. folio_migration_tools/custom_exceptions.py +21 -19
  7. folio_migration_tools/extradata_writer.py +46 -0
  8. folio_migration_tools/folder_structure.py +63 -66
  9. folio_migration_tools/helper.py +29 -21
  10. folio_migration_tools/holdings_helper.py +57 -34
  11. folio_migration_tools/i18n_config.py +9 -0
  12. folio_migration_tools/library_configuration.py +173 -13
  13. folio_migration_tools/mapper_base.py +317 -106
  14. folio_migration_tools/mapping_file_transformation/courses_mapper.py +203 -0
  15. folio_migration_tools/mapping_file_transformation/holdings_mapper.py +83 -69
  16. folio_migration_tools/mapping_file_transformation/item_mapper.py +98 -94
  17. folio_migration_tools/mapping_file_transformation/manual_fee_fines_mapper.py +352 -0
  18. folio_migration_tools/mapping_file_transformation/mapping_file_mapper_base.py +702 -223
  19. folio_migration_tools/mapping_file_transformation/notes_mapper.py +90 -0
  20. folio_migration_tools/mapping_file_transformation/order_mapper.py +492 -0
  21. folio_migration_tools/mapping_file_transformation/organization_mapper.py +389 -0
  22. folio_migration_tools/mapping_file_transformation/ref_data_mapping.py +38 -27
  23. folio_migration_tools/mapping_file_transformation/user_mapper.py +149 -361
  24. folio_migration_tools/marc_rules_transformation/conditions.py +650 -246
  25. folio_migration_tools/marc_rules_transformation/holdings_statementsparser.py +292 -130
  26. folio_migration_tools/marc_rules_transformation/hrid_handler.py +244 -0
  27. folio_migration_tools/marc_rules_transformation/loc_language_codes.xml +20846 -0
  28. folio_migration_tools/marc_rules_transformation/marc_file_processor.py +300 -0
  29. folio_migration_tools/marc_rules_transformation/marc_reader_wrapper.py +136 -0
  30. folio_migration_tools/marc_rules_transformation/rules_mapper_authorities.py +241 -0
  31. folio_migration_tools/marc_rules_transformation/rules_mapper_base.py +681 -201
  32. folio_migration_tools/marc_rules_transformation/rules_mapper_bibs.py +395 -429
  33. folio_migration_tools/marc_rules_transformation/rules_mapper_holdings.py +531 -100
  34. folio_migration_tools/migration_report.py +85 -38
  35. folio_migration_tools/migration_tasks/__init__.py +1 -3
  36. folio_migration_tools/migration_tasks/authority_transformer.py +119 -0
  37. folio_migration_tools/migration_tasks/batch_poster.py +911 -198
  38. folio_migration_tools/migration_tasks/bibs_transformer.py +121 -116
  39. folio_migration_tools/migration_tasks/courses_migrator.py +192 -0
  40. folio_migration_tools/migration_tasks/holdings_csv_transformer.py +252 -247
  41. folio_migration_tools/migration_tasks/holdings_marc_transformer.py +321 -115
  42. folio_migration_tools/migration_tasks/items_transformer.py +264 -84
  43. folio_migration_tools/migration_tasks/loans_migrator.py +506 -195
  44. folio_migration_tools/migration_tasks/manual_fee_fines_transformer.py +187 -0
  45. folio_migration_tools/migration_tasks/migration_task_base.py +364 -74
  46. folio_migration_tools/migration_tasks/orders_transformer.py +373 -0
  47. folio_migration_tools/migration_tasks/organization_transformer.py +451 -0
  48. folio_migration_tools/migration_tasks/requests_migrator.py +130 -62
  49. folio_migration_tools/migration_tasks/reserves_migrator.py +253 -0
  50. folio_migration_tools/migration_tasks/user_transformer.py +180 -139
  51. folio_migration_tools/task_configuration.py +46 -0
  52. folio_migration_tools/test_infrastructure/__init__.py +0 -0
  53. folio_migration_tools/test_infrastructure/mocked_classes.py +406 -0
  54. folio_migration_tools/transaction_migration/legacy_loan.py +148 -34
  55. folio_migration_tools/transaction_migration/legacy_request.py +65 -25
  56. folio_migration_tools/transaction_migration/legacy_reserve.py +47 -0
  57. folio_migration_tools/transaction_migration/transaction_result.py +12 -1
  58. folio_migration_tools/translations/en.json +476 -0
  59. folio_migration_tools-1.9.10.dist-info/METADATA +169 -0
  60. folio_migration_tools-1.9.10.dist-info/RECORD +67 -0
  61. {folio_migration_tools-1.2.1.dist-info → folio_migration_tools-1.9.10.dist-info}/WHEEL +1 -2
  62. folio_migration_tools-1.9.10.dist-info/entry_points.txt +3 -0
  63. folio_migration_tools/generate_schemas.py +0 -46
  64. folio_migration_tools/mapping_file_transformation/mapping_file_mapping_base_impl.py +0 -44
  65. folio_migration_tools/mapping_file_transformation/user_mapper_base.py +0 -212
  66. folio_migration_tools/marc_rules_transformation/bibs_processor.py +0 -163
  67. folio_migration_tools/marc_rules_transformation/holdings_processor.py +0 -284
  68. folio_migration_tools/report_blurbs.py +0 -219
  69. folio_migration_tools/transaction_migration/legacy_fee_fine.py +0 -36
  70. folio_migration_tools-1.2.1.dist-info/METADATA +0 -134
  71. folio_migration_tools-1.2.1.dist-info/RECORD +0 -50
  72. folio_migration_tools-1.2.1.dist-info/top_level.txt +0 -1
  73. {folio_migration_tools-1.2.1.dist-info → folio_migration_tools-1.9.10.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,451 @@
1
+ import csv
2
+ import ctypes
3
+ import json
4
+ import logging
5
+ import sys
6
+ import time
7
+ import uuid
8
+ from hashlib import sha1
9
+ from os.path import isfile
10
+ from typing import List, Optional, Annotated
11
+ from pydantic import Field
12
+
13
+ import i18n
14
+ from folio_uuid.folio_namespaces import FOLIONamespaces
15
+
16
+ from folio_migration_tools.custom_exceptions import (
17
+ TransformationProcessError,
18
+ TransformationRecordFailedError,
19
+ )
20
+ from folio_migration_tools.helper import Helper
21
+ from folio_migration_tools.library_configuration import (
22
+ FileDefinition,
23
+ LibraryConfiguration,
24
+ )
25
+ from folio_migration_tools.mapping_file_transformation.mapping_file_mapper_base import (
26
+ MappingFileMapperBase,
27
+ )
28
+ from folio_migration_tools.mapping_file_transformation.organization_mapper import (
29
+ OrganizationMapper,
30
+ )
31
+ from folio_migration_tools.migration_tasks.migration_task_base import MigrationTaskBase
32
+ from folio_migration_tools.task_configuration import AbstractTaskConfiguration
33
+
34
+ csv.field_size_limit(int(ctypes.c_ulong(-1).value // 2))
35
+
36
+
37
+ # Read files and do some work
38
+ class OrganizationTransformer(MigrationTaskBase):
39
+ class TaskConfiguration(AbstractTaskConfiguration):
40
+ name: Annotated[
41
+ str,
42
+ Field(
43
+ description=(
44
+ "Name of this migration task. The name is being used to call the specific "
45
+ "task, and to distinguish tasks of similar types"
46
+ ),
47
+ ),
48
+ ]
49
+ migration_task_type: Annotated[
50
+ str,
51
+ Field(
52
+ title="Migration task type",
53
+ description=(
54
+ "The type of migration task you want to perform"
55
+ ),
56
+ ),
57
+ ]
58
+ files: Annotated[
59
+ List[FileDefinition],
60
+ Field(
61
+ title="Source files",
62
+ description=(
63
+ "List of MARC21 files with holdings records"
64
+ ),
65
+ ),
66
+ ]
67
+ organization_map_path: Annotated[
68
+ str,
69
+ Field(
70
+ title="Organization map path",
71
+ description=(
72
+ "Path to the organization map file"
73
+ ),
74
+ ),
75
+ ]
76
+ organization_types_map_path: Annotated[
77
+ Optional[str],
78
+ Field(
79
+ title="Organization types map path",
80
+ description=(
81
+ "Path to the organization types map file. By default is empty string"
82
+ ),
83
+ ),
84
+ ] = ""
85
+ address_categories_map_path: Annotated[
86
+ Optional[str],
87
+ Field(
88
+ title="Address categories map path",
89
+ description=(
90
+ "Path to the address categories map file. By default is empty string"
91
+ ),
92
+ ),
93
+ ] = ""
94
+ email_categories_map_path: Annotated[
95
+ Optional[str],
96
+ Field(
97
+ title="Email categories map path",
98
+ description=(
99
+ "Path to the email categories map file. By default is empty string"
100
+ ),
101
+ ),
102
+ ] = ""
103
+ phone_categories_map_path: Annotated[
104
+ Optional[str],
105
+ Field(
106
+ title="Phone categories map path",
107
+ description=(
108
+ "Path to the phone categories map file. By default is empty string"
109
+ ),
110
+ ),
111
+ ] = ""
112
+
113
+ @staticmethod
114
+ def get_object_type() -> FOLIONamespaces:
115
+ return FOLIONamespaces.organizations
116
+
117
+ def __init__(
118
+ self,
119
+ task_configuration: TaskConfiguration,
120
+ library_config: LibraryConfiguration,
121
+ folio_client,
122
+ use_logging: bool = True,
123
+ ):
124
+ csv.register_dialect("tsv", delimiter="\t")
125
+
126
+ super().__init__(library_config, task_configuration, folio_client, use_logging)
127
+ self.object_type_name = self.get_object_type().name
128
+ self.task_configuration = task_configuration
129
+ self.files = self.list_source_files()
130
+ self.total_records = 0
131
+
132
+ self.organization_map = self.setup_records_map(
133
+ self.folder_structure.mapping_files_folder
134
+ / self.task_configuration.organization_map_path
135
+ )
136
+
137
+ self.results_path = self.folder_structure.created_objects_path
138
+ self.failed_files: List[str] = []
139
+ self.organizations_id_map = self.load_id_map(
140
+ self.folder_structure.organizations_id_map_path
141
+ )
142
+
143
+ self.folio_keys = []
144
+ self.folio_keys = MappingFileMapperBase.get_mapped_folio_properties_from_map(
145
+ self.organization_map
146
+ )
147
+
148
+ self.mapper = OrganizationMapper(
149
+ self.folio_client,
150
+ self.library_configuration,
151
+ self.task_configuration,
152
+ self.organization_map,
153
+ self.load_ref_data_mapping_file(
154
+ "organizationTypes",
155
+ self.folder_structure.mapping_files_folder
156
+ / self.task_configuration.organization_types_map_path,
157
+ self.folio_keys,
158
+ False,
159
+ ),
160
+ self.load_ref_data_mapping_file(
161
+ "addresses[0].categories[0]",
162
+ self.folder_structure.mapping_files_folder
163
+ / self.task_configuration.address_categories_map_path,
164
+ self.folio_keys,
165
+ False,
166
+ ),
167
+ self.load_ref_data_mapping_file(
168
+ "emails[0].categories[0]",
169
+ self.folder_structure.mapping_files_folder
170
+ / self.task_configuration.email_categories_map_path,
171
+ self.folio_keys,
172
+ False,
173
+ ),
174
+ self.load_ref_data_mapping_file(
175
+ "phoneNumbers[0].categories[0]",
176
+ self.folder_structure.mapping_files_folder
177
+ / self.task_configuration.phone_categories_map_path,
178
+ self.folio_keys,
179
+ False,
180
+ ),
181
+ )
182
+
183
+ self.embedded_extradata_object_cache: set = set()
184
+ self.interfaces_cache: dict = {}
185
+
186
+ def list_source_files(self):
187
+ files = [
188
+ self.folder_structure.data_folder / self.object_type_name / f.file_name
189
+ for f in self.task_configuration.files
190
+ if isfile(self.folder_structure.data_folder / self.object_type_name / f.file_name)
191
+ ]
192
+ if not any(files):
193
+ ret_str = ",".join(f.file_name for f in self.task_configuration.files)
194
+ raise TransformationProcessError(
195
+ f"Files {ret_str} not found in"
196
+ "{self.folder_structure.data_folder} / {self.object_type_name}"
197
+ )
198
+ logging.info("Files to process:")
199
+ for filename in files:
200
+ logging.info("\t%s", filename)
201
+ return files
202
+
203
+ def process_single_file(self, filename):
204
+ with open(filename, encoding="utf-8-sig") as records_file, open(
205
+ self.folder_structure.created_objects_path, "w+"
206
+ ) as results_file:
207
+ self.mapper.migration_report.add_general_statistics(
208
+ i18n.t("Number of files processed")
209
+ )
210
+ start = time.time()
211
+ records_processed = 0
212
+ for idx, record in enumerate(self.mapper.get_objects(records_file, filename)):
213
+ records_processed += 1
214
+ try:
215
+ if idx == 0:
216
+ logging.info("First legacy record:")
217
+ logging.info(json.dumps(record, indent=4))
218
+
219
+ folio_rec, legacy_id = self.mapper.do_map(
220
+ record, f"row {idx}", FOLIONamespaces.organizations
221
+ )
222
+ self.mapper.report_folio_mapping(folio_rec, self.mapper.organization_schema)
223
+
224
+ # Create extradata and clean the record up
225
+ folio_rec = self.handle_embedded_extradata_objects(folio_rec)
226
+ self.mapper.notes_mapper.map_notes(
227
+ record,
228
+ legacy_id,
229
+ folio_rec["id"],
230
+ FOLIONamespaces.organizations,
231
+ )
232
+ folio_rec = self.clean_org(folio_rec)
233
+ self.organizations_id_map[legacy_id] = self.mapper.get_id_map_tuple(
234
+ legacy_id, folio_rec, self.object_type
235
+ )
236
+
237
+ Helper.write_to_file(results_file, folio_rec)
238
+
239
+ if idx == 0:
240
+ logging.info("First FOLIO record:")
241
+ logging.info(json.dumps(folio_rec, indent=4))
242
+
243
+ except TransformationProcessError as process_error:
244
+ self.mapper.handle_transformation_process_error(idx, process_error)
245
+ except TransformationRecordFailedError as error:
246
+ self.mapper.handle_transformation_record_failed_error(idx, error)
247
+ except Exception as exception:
248
+ self.mapper.handle_generic_exception(idx, exception)
249
+
250
+ self.mapper.migration_report.add_general_statistics(
251
+ i18n.t("Number of objects in source data file")
252
+ )
253
+ self.mapper.migration_report.add_general_statistics(
254
+ i18n.t("Number of organizations created")
255
+ )
256
+
257
+ # TODO Rewrite to base % value on number of rows in file
258
+ if idx > 1 and idx % 50 == 0:
259
+ elapsed = idx / (time.time() - start)
260
+ elapsed_formatted = "{0:.4g}".format(elapsed)
261
+ logging.info( # pylint: disable=logging-fstring-interpolation
262
+ f"{idx:,} records processed. Recs/sec: {elapsed_formatted} "
263
+ )
264
+
265
+ self.total_records = records_processed
266
+
267
+ logging.info( # pylint: disable=logging-fstring-interpolation
268
+ f"Done processing {filename} containing {self.total_records:,} records. "
269
+ f"Total records processed: {self.total_records:,}"
270
+ )
271
+
272
+ def do_work(self):
273
+ logging.info("Getting started!")
274
+ for file in self.files:
275
+ logging.info("Processing %s", file)
276
+ try:
277
+ self.process_single_file(file)
278
+ except Exception as ee:
279
+ error_str = (
280
+ f"Processing of {file} failed:\n{ee}."
281
+ "Check source files for empty rows or missing reference data"
282
+ )
283
+ logging.exception(error_str)
284
+ self.mapper.migration_report.add("FailedFiles", f"{file} - {ee}")
285
+ sys.exit()
286
+
287
+ def wrap_up(self):
288
+ logging.info("Done. Transformer wrapping up...")
289
+ self.extradata_writer.flush()
290
+ with open(self.folder_structure.migration_reports_file, "w") as migration_report_file:
291
+ logging.info(
292
+ "Writing migration- and mapping report to %s",
293
+ self.folder_structure.migration_reports_file,
294
+ )
295
+ self.mapper.migration_report.write_migration_report(
296
+ i18n.t("Organization transformation report"),
297
+ migration_report_file,
298
+ self.start_datetime,
299
+ )
300
+
301
+ Helper.print_mapping_report(
302
+ migration_report_file,
303
+ self.total_records,
304
+ self.mapper.mapped_folio_fields,
305
+ self.mapper.mapped_legacy_fields,
306
+ )
307
+
308
+ self.mapper.save_id_map_file(
309
+ self.folder_structure.organizations_id_map_path, self.organizations_id_map
310
+ )
311
+ self.clean_out_empty_logs()
312
+
313
+ logging.info("All done!")
314
+
315
+ def clean_org(self, record):
316
+ if record.get("addresses"):
317
+ self.clean_addresses(record)
318
+ if record.get("interfaces"):
319
+ self.validate_uri(record)
320
+
321
+ return record
322
+
323
+ def clean_addresses(self, record):
324
+ addresses = record.get("addresses", [])
325
+ primary_address_exists = False
326
+ empty_addresses = []
327
+
328
+ for address in addresses:
329
+ # Check if the address has content
330
+ address_content = {k: v for k, v in address.items() if k != "isPrimary"}
331
+ if not any(address_content.values()):
332
+ empty_addresses.append(address)
333
+
334
+ # Check if the address is primary
335
+ if address.get("isPrimary") is True:
336
+ primary_address_exists = True
337
+
338
+ # If none of the existing addresses is primary
339
+ # Make the first one primary
340
+ if not primary_address_exists:
341
+ addresses[0]["isPrimary"] = True
342
+
343
+ record["addresses"] = [a for a in addresses if a not in empty_addresses]
344
+
345
+ return record
346
+
347
+ def validate_uri(self, record):
348
+ valid_interfaces = []
349
+ uri_prefixes = ("ftp://", "sftp://", "http://", "https://")
350
+
351
+ for interface in record.get("interfaces"):
352
+ if ("uri" not in interface) or (interface.get("uri", "").startswith(uri_prefixes)):
353
+ valid_interfaces.append(interface)
354
+ else:
355
+ self.mapper.migration_report.add(
356
+ "MalformedInterfaceUri",
357
+ i18n.t("Interfaces"),
358
+ )
359
+ Helper.log_data_issue(
360
+ f"{record['code']}",
361
+ f"INTERFACE FAILED Malformed interface URI: {interface['uri']}",
362
+ interface,
363
+ )
364
+
365
+ record["interfaces"] = valid_interfaces
366
+
367
+ return record
368
+
369
+ def handle_embedded_extradata_objects(self, record):
370
+ if record.get("interfaces"):
371
+ extradata_object_type = "interfaces"
372
+ ids_of_external_objects = []
373
+
374
+ for embedded_interface in record[extradata_object_type]:
375
+ interface_credential = embedded_interface.pop("interfaceCredential", None)
376
+
377
+ interface_id = self.create_referenced_extradata_object(
378
+ embedded_interface, extradata_object_type
379
+ )
380
+ ids_of_external_objects.append(interface_id)
381
+
382
+ if interface_credential and "username" in interface_credential:
383
+ interface_credential["interfaceId"] = interface_id
384
+ self.create_referenced_extradata_object(
385
+ interface_credential, "interfaceCredential"
386
+ )
387
+
388
+ record[extradata_object_type] = ids_of_external_objects
389
+
390
+ if record.get("contacts"):
391
+ extradata_object_type = "contacts"
392
+ ids_of_external_objects = []
393
+
394
+ for embedded_contact in record[extradata_object_type]:
395
+ if embedded_contact.get("firstName") and embedded_contact.get("lastName"):
396
+ ids_of_external_objects.append(
397
+ self.create_referenced_extradata_object(
398
+ embedded_contact, extradata_object_type
399
+ )
400
+ )
401
+
402
+ record[extradata_object_type] = ids_of_external_objects
403
+
404
+ if "notes" in record:
405
+ # TODO Do the same as for Contacts/Interfaces? Check implementation for Users.
406
+ pass
407
+
408
+ return record
409
+
410
+ def create_referenced_extradata_object(self, embedded_object, extradata_object_type):
411
+ """Creates an extradata object from an embedded object,
412
+ and returns the UUID.
413
+
414
+ Args:
415
+ embedded_object (_type_): _description_
416
+ extradata_object_type (_type_): _description_
417
+
418
+ Returns:
419
+ _type_: The organization record with linked extradata UUIDs.
420
+ """
421
+ embedded_object_hash = sha1(
422
+ json.dumps(embedded_object, sort_keys=True).encode("utf-8"), usedforsecurity=False
423
+ ).hexdigest()
424
+
425
+ identical_objects = [
426
+ value
427
+ for value in self.embedded_extradata_object_cache
428
+ if value == embedded_object_hash
429
+ ]
430
+
431
+ if len(identical_objects) > 0:
432
+ self.mapper.migration_report.add_general_statistics(
433
+ i18n.t("Number of reoccurring identical %{type}", type=extradata_object_type)
434
+ )
435
+ Helper.log_data_issue(
436
+ f"{self.legacy_id}",
437
+ f"Identical {extradata_object_type} objects found in multiple organizations",
438
+ embedded_object,
439
+ )
440
+
441
+ extradata_object_uuid = str(uuid.uuid4())
442
+ embedded_object["id"] = extradata_object_uuid
443
+
444
+ self.extradata_writer.write(extradata_object_type, embedded_object)
445
+ self.embedded_extradata_object_cache.add(embedded_object_hash)
446
+
447
+ self.mapper.migration_report.add_general_statistics(
448
+ i18n.t("Number of linked %{type} created", type=extradata_object_type)
449
+ )
450
+
451
+ return extradata_object_uuid