folio-migration-tools 1.2.1__py3-none-any.whl → 1.9.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. folio_migration_tools/__init__.py +11 -0
  2. folio_migration_tools/__main__.py +169 -85
  3. folio_migration_tools/circulation_helper.py +96 -59
  4. folio_migration_tools/config_file_load.py +66 -0
  5. folio_migration_tools/custom_dict.py +6 -4
  6. folio_migration_tools/custom_exceptions.py +21 -19
  7. folio_migration_tools/extradata_writer.py +46 -0
  8. folio_migration_tools/folder_structure.py +63 -66
  9. folio_migration_tools/helper.py +29 -21
  10. folio_migration_tools/holdings_helper.py +57 -34
  11. folio_migration_tools/i18n_config.py +9 -0
  12. folio_migration_tools/library_configuration.py +173 -13
  13. folio_migration_tools/mapper_base.py +317 -106
  14. folio_migration_tools/mapping_file_transformation/courses_mapper.py +203 -0
  15. folio_migration_tools/mapping_file_transformation/holdings_mapper.py +83 -69
  16. folio_migration_tools/mapping_file_transformation/item_mapper.py +98 -94
  17. folio_migration_tools/mapping_file_transformation/manual_fee_fines_mapper.py +352 -0
  18. folio_migration_tools/mapping_file_transformation/mapping_file_mapper_base.py +702 -223
  19. folio_migration_tools/mapping_file_transformation/notes_mapper.py +90 -0
  20. folio_migration_tools/mapping_file_transformation/order_mapper.py +492 -0
  21. folio_migration_tools/mapping_file_transformation/organization_mapper.py +389 -0
  22. folio_migration_tools/mapping_file_transformation/ref_data_mapping.py +38 -27
  23. folio_migration_tools/mapping_file_transformation/user_mapper.py +149 -361
  24. folio_migration_tools/marc_rules_transformation/conditions.py +650 -246
  25. folio_migration_tools/marc_rules_transformation/holdings_statementsparser.py +292 -130
  26. folio_migration_tools/marc_rules_transformation/hrid_handler.py +244 -0
  27. folio_migration_tools/marc_rules_transformation/loc_language_codes.xml +20846 -0
  28. folio_migration_tools/marc_rules_transformation/marc_file_processor.py +300 -0
  29. folio_migration_tools/marc_rules_transformation/marc_reader_wrapper.py +136 -0
  30. folio_migration_tools/marc_rules_transformation/rules_mapper_authorities.py +241 -0
  31. folio_migration_tools/marc_rules_transformation/rules_mapper_base.py +681 -201
  32. folio_migration_tools/marc_rules_transformation/rules_mapper_bibs.py +395 -429
  33. folio_migration_tools/marc_rules_transformation/rules_mapper_holdings.py +531 -100
  34. folio_migration_tools/migration_report.py +85 -38
  35. folio_migration_tools/migration_tasks/__init__.py +1 -3
  36. folio_migration_tools/migration_tasks/authority_transformer.py +119 -0
  37. folio_migration_tools/migration_tasks/batch_poster.py +911 -198
  38. folio_migration_tools/migration_tasks/bibs_transformer.py +121 -116
  39. folio_migration_tools/migration_tasks/courses_migrator.py +192 -0
  40. folio_migration_tools/migration_tasks/holdings_csv_transformer.py +252 -247
  41. folio_migration_tools/migration_tasks/holdings_marc_transformer.py +321 -115
  42. folio_migration_tools/migration_tasks/items_transformer.py +264 -84
  43. folio_migration_tools/migration_tasks/loans_migrator.py +506 -195
  44. folio_migration_tools/migration_tasks/manual_fee_fines_transformer.py +187 -0
  45. folio_migration_tools/migration_tasks/migration_task_base.py +364 -74
  46. folio_migration_tools/migration_tasks/orders_transformer.py +373 -0
  47. folio_migration_tools/migration_tasks/organization_transformer.py +451 -0
  48. folio_migration_tools/migration_tasks/requests_migrator.py +130 -62
  49. folio_migration_tools/migration_tasks/reserves_migrator.py +253 -0
  50. folio_migration_tools/migration_tasks/user_transformer.py +180 -139
  51. folio_migration_tools/task_configuration.py +46 -0
  52. folio_migration_tools/test_infrastructure/__init__.py +0 -0
  53. folio_migration_tools/test_infrastructure/mocked_classes.py +406 -0
  54. folio_migration_tools/transaction_migration/legacy_loan.py +148 -34
  55. folio_migration_tools/transaction_migration/legacy_request.py +65 -25
  56. folio_migration_tools/transaction_migration/legacy_reserve.py +47 -0
  57. folio_migration_tools/transaction_migration/transaction_result.py +12 -1
  58. folio_migration_tools/translations/en.json +476 -0
  59. folio_migration_tools-1.9.10.dist-info/METADATA +169 -0
  60. folio_migration_tools-1.9.10.dist-info/RECORD +67 -0
  61. {folio_migration_tools-1.2.1.dist-info → folio_migration_tools-1.9.10.dist-info}/WHEEL +1 -2
  62. folio_migration_tools-1.9.10.dist-info/entry_points.txt +3 -0
  63. folio_migration_tools/generate_schemas.py +0 -46
  64. folio_migration_tools/mapping_file_transformation/mapping_file_mapping_base_impl.py +0 -44
  65. folio_migration_tools/mapping_file_transformation/user_mapper_base.py +0 -212
  66. folio_migration_tools/marc_rules_transformation/bibs_processor.py +0 -163
  67. folio_migration_tools/marc_rules_transformation/holdings_processor.py +0 -284
  68. folio_migration_tools/report_blurbs.py +0 -219
  69. folio_migration_tools/transaction_migration/legacy_fee_fine.py +0 -36
  70. folio_migration_tools-1.2.1.dist-info/METADATA +0 -134
  71. folio_migration_tools-1.2.1.dist-info/RECORD +0 -50
  72. folio_migration_tools-1.2.1.dist-info/top_level.txt +0 -1
  73. {folio_migration_tools-1.2.1.dist-info → folio_migration_tools-1.9.10.dist-info/licenses}/LICENSE +0 -0
@@ -1,14 +1,20 @@
1
- from fileinput import filename
1
+ import asyncio
2
+ import copy
2
3
  import json
3
4
  import logging
5
+ import re
4
6
  import sys
5
7
  import time
6
8
  import traceback
7
- from datetime import datetime
9
+ from datetime import datetime, timezone
10
+ from typing import Annotated, List, Optional
8
11
  from uuid import uuid4
9
- from typing import List, Optional
10
- import requests
12
+
13
+ import httpx
14
+ import i18n
11
15
  from folio_uuid.folio_namespaces import FOLIONamespaces
16
+ from pydantic import Field
17
+
12
18
  from folio_migration_tools.custom_exceptions import (
13
19
  TransformationProcessError,
14
20
  TransformationRecordFailedError,
@@ -17,8 +23,9 @@ from folio_migration_tools.library_configuration import (
17
23
  FileDefinition,
18
24
  LibraryConfiguration,
19
25
  )
26
+ from folio_migration_tools.migration_report import MigrationReport
20
27
  from folio_migration_tools.migration_tasks.migration_task_base import MigrationTaskBase
21
- from pydantic import BaseModel
28
+ from folio_migration_tools.task_configuration import AbstractTaskConfiguration
22
29
 
23
30
 
24
31
  def write_failed_batch_to_file(batch, file):
@@ -27,13 +34,169 @@ def write_failed_batch_to_file(batch, file):
27
34
 
28
35
 
29
36
  class BatchPoster(MigrationTaskBase):
30
- class TaskConfiguration(BaseModel):
31
- name: str
32
- migration_task_type: str
33
- object_type: str
34
- files: List[FileDefinition]
35
- batch_size: int
36
- rerun_failed_records: Optional[bool] = True
37
+ """BatchPoster
38
+
39
+ Parents:
40
+ MigrationTaskBase (_type_): _description_
41
+
42
+ Raises:
43
+ ee: _description_
44
+ TransformationRecordFailedError: _description_
45
+ TransformationProcessError: _description_
46
+ TransformationRecordFailedError: _description_
47
+ TransformationRecordFailedError: _description_
48
+ TransformationProcessError: _description_
49
+
50
+ Returns:
51
+ _type_: _description_
52
+ """
53
+
54
+ class TaskConfiguration(AbstractTaskConfiguration):
55
+ name: Annotated[
56
+ str,
57
+ Field(
58
+ title="Task name",
59
+ description="The name of the task",
60
+ ),
61
+ ]
62
+ migration_task_type: Annotated[
63
+ str,
64
+ Field(
65
+ title="Migration task type",
66
+ description="The type of migration task",
67
+ ),
68
+ ]
69
+ object_type: Annotated[
70
+ str,
71
+ Field(
72
+ title="Object type",
73
+ description=(
74
+ "The type of object being migrated"
75
+ "Examples of possible values: "
76
+ "'Extradata', 'SRS', Instances', 'Holdings', 'Items'"
77
+ ),
78
+ ),
79
+ ]
80
+ files: Annotated[
81
+ List[FileDefinition],
82
+ Field(
83
+ title="List of files",
84
+ description="List of files to be processed",
85
+ ),
86
+ ]
87
+ batch_size: Annotated[
88
+ int,
89
+ Field(
90
+ title="Batch size",
91
+ description="The batch size for processing files",
92
+ ),
93
+ ]
94
+ rerun_failed_records: Annotated[
95
+ bool,
96
+ Field(
97
+ title="Rerun failed records",
98
+ description=(
99
+ "Toggles whether or not BatchPoster should try to rerun "
100
+ "failed batches or just leave the failing records on disk."
101
+ ),
102
+ ),
103
+ ] = True
104
+ use_safe_inventory_endpoints: Annotated[
105
+ bool,
106
+ Field(
107
+ title="Use safe inventory endpoints",
108
+ description=(
109
+ "Toggles the use of the safe/unsafe Inventory storage "
110
+ "endpoints. Unsafe circumvents the Optimistic locking "
111
+ "in FOLIO. Defaults to True (using the 'safe' options)"
112
+ ),
113
+ ),
114
+ ] = True
115
+ extradata_endpoints: Annotated[
116
+ dict,
117
+ Field(
118
+ title="Extradata endpoints",
119
+ description=(
120
+ "A dictionary of extradata endpoints. "
121
+ "The key is the object type and the value is the endpoint"
122
+ ),
123
+ ),
124
+ ] = {}
125
+ upsert: Annotated[
126
+ bool,
127
+ Field(
128
+ title="Upsert",
129
+ description=(
130
+ "Toggles whether or not to use the upsert feature "
131
+ "of the Inventory storage endpoints. Defaults to False"
132
+ ),
133
+ ),
134
+ ] = False
135
+ preserve_statistical_codes: Annotated[
136
+ bool,
137
+ Field(
138
+ title="Preserve statistical codes",
139
+ description=(
140
+ "Toggles whether or not to preserve statistical codes "
141
+ "during the upsert process. Defaults to False"
142
+ ),
143
+ ),
144
+ ] = False
145
+ preserve_administrative_notes: Annotated[
146
+ bool,
147
+ Field(
148
+ title="Preserve administrative notes",
149
+ description=(
150
+ "Toggles whether or not to preserve administrative notes "
151
+ "during the upsert process. Defaults to False"
152
+ ),
153
+ ),
154
+ ] = False
155
+ preserve_temporary_locations: Annotated[
156
+ bool,
157
+ Field(
158
+ title="Preserve temporary locations",
159
+ description=(
160
+ "Toggles whether or not to preserve temporary locations "
161
+ "on items during the upsert process. Defaults to False"
162
+ ),
163
+ ),
164
+ ] = False
165
+ preserve_temporary_loan_types: Annotated[
166
+ bool,
167
+ Field(
168
+ title="Preserve temporary loan types",
169
+ description=(
170
+ "Toggles whether or not to preserve temporary loan types "
171
+ "on items during the upsert process. Defaults to False"
172
+ ),
173
+ ),
174
+ ] = False
175
+ preserve_item_status: Annotated[
176
+ bool,
177
+ Field(
178
+ title="Preserve item status",
179
+ description=(
180
+ "Toggles whether or not to preserve item status "
181
+ "on items during the upsert process. Defaults to False"
182
+ ),
183
+ ),
184
+ ] = True
185
+ patch_existing_records: Annotated[bool, Field(
186
+ title="Patch existing records",
187
+ description=(
188
+ "Toggles whether or not to patch existing records "
189
+ "during the upsert process. Defaults to False"
190
+ ),
191
+ )] = False
192
+ patch_paths: Annotated[List[str], Field(
193
+ title="Patch paths",
194
+ description=(
195
+ "A list of fields in JSON Path notation to patch during the upsert process (leave off the $). If empty, all fields will be patched. Examples: ['statisticalCodeIds', 'administrativeNotes', 'instanceStatusId']"
196
+ ),
197
+ )] = []
198
+
199
+ task_configuration: TaskConfiguration
37
200
 
38
201
  @staticmethod
39
202
  def get_object_type() -> FOLIONamespaces:
@@ -43,98 +206,321 @@ class BatchPoster(MigrationTaskBase):
43
206
  self,
44
207
  task_config: TaskConfiguration,
45
208
  library_config: LibraryConfiguration,
209
+ folio_client,
46
210
  use_logging: bool = True,
47
211
  ):
48
- super().__init__(library_config, task_config, use_logging)
49
- self.task_config = task_config
50
- self.failed_ids = []
212
+ super().__init__(library_config, task_config, folio_client, use_logging)
213
+ self.migration_report = MigrationReport()
214
+ self.performing_rerun = False
215
+ self.failed_ids: list = []
51
216
  self.first_batch = True
52
- self.api_path = list_objects(self.task_config.object_type)
217
+ self.api_info = get_api_info(
218
+ self.task_configuration.object_type,
219
+ self.task_configuration.use_safe_inventory_endpoints,
220
+ )
221
+ self.query_params = {}
222
+ if self.api_info["supports_upsert"]:
223
+ self.query_params["upsert"] = self.task_configuration.upsert
224
+ elif self.task_configuration.upsert and not self.api_info["supports_upsert"]:
225
+ logging.info(
226
+ "Upsert is not supported for this object type. Query parameter will not be set.")
53
227
  self.snapshot_id = str(uuid4())
54
- self.failed_objects = []
55
- self.batch_size = self.task_config.batch_size
228
+ self.failed_objects: list = []
229
+ self.batch_size = self.task_configuration.batch_size
56
230
  logging.info("Batch size is %s", self.batch_size)
57
231
  self.processed = 0
58
232
  self.failed_batches = 0
59
- self.failed_records = 0
60
233
  self.users_created = 0
61
234
  self.users_updated = 0
62
- self.users_per_group = {}
63
- self.failed_fields = set()
235
+ self.users_per_group: dict = {}
236
+ self.failed_fields: set = set()
64
237
  self.num_failures = 0
65
238
  self.num_posted = 0
239
+ self.okapi_headers = self.folio_client.okapi_headers
240
+ self.http_client = None
241
+ self.starting_record_count_in_folio: Optional[int] = None
242
+ self.finished_record_count_in_folio: Optional[int] = None
66
243
 
67
244
  def do_work(self):
68
- try:
69
- batch = []
70
- if self.task_config.object_type == "SRS":
71
- self.create_snapshot()
72
- with open(self.folder_structure.failed_recs_path, "w") as failed_recs_file:
73
- for file_def in self.task_config.files:
74
- path = self.folder_structure.results_folder / file_def.file_name
75
- with open(path) as rows:
76
- logging.info("Running %s", path)
77
- last_row = ""
78
- for num_records, row in enumerate(rows, start=1):
79
- last_row = row
80
- if row.strip():
81
- try:
82
- if self.task_config.object_type == "Extradata":
83
- self.post_extra_data(
84
- row, num_records, failed_recs_file
85
- )
86
- else:
87
- json_rec = json.loads(row.split("\t")[-1])
88
- if self.task_config.object_type == "SRS":
89
- json_rec["snapshotId"] = self.snapshot_id
90
- if num_records == 1:
91
- logging.info(
92
- json.dumps(json_rec, indent=True)
245
+ with self.folio_client.get_folio_http_client() as httpx_client:
246
+ self.http_client = httpx_client
247
+ with open(
248
+ self.folder_structure.failed_recs_path, "w", encoding='utf-8'
249
+ ) as failed_recs_file:
250
+ self.get_starting_record_count()
251
+ try:
252
+ batch = []
253
+ if self.task_configuration.object_type == "SRS":
254
+ self.create_snapshot()
255
+ for idx, file_def in enumerate(self.task_configuration.files):
256
+ path = self.folder_structure.results_folder / file_def.file_name
257
+ with open(path) as rows:
258
+ logging.info("Running %s", path)
259
+ last_row = ""
260
+ for self.processed, row in enumerate(rows, start=1):
261
+ last_row = row
262
+ if row.strip():
263
+ try:
264
+ if self.task_configuration.object_type == "Extradata":
265
+ self.post_extra_data(
266
+ row, self.processed, failed_recs_file
267
+ )
268
+ elif not self.api_info["is_batch"]:
269
+ self.post_single_records(
270
+ row, self.processed, failed_recs_file
93
271
  )
94
- batch.append(json_rec)
95
- if len(batch) == int(self.batch_size):
96
- self.post_batch(
97
- batch, failed_recs_file, num_records
272
+ else:
273
+ batch = self.post_record_batch(
274
+ batch, failed_recs_file, row
98
275
  )
99
- batch = []
100
- except UnicodeDecodeError as unicode_error:
101
- self.handle_unicode_error(unicode_error, last_row)
102
- except TransformationProcessError as tpe:
103
- self.handle_generic_exception(
104
- tpe,
105
- last_row,
106
- batch,
107
- num_records,
108
- failed_recs_file,
109
- )
110
- logging.critical("Halting %s", tpe)
111
- except TransformationRecordFailedError as exception:
112
- self.handle_generic_exception(
113
- exception,
114
- last_row,
115
- batch,
116
- num_records,
117
- failed_recs_file,
118
- )
119
- batch = []
120
-
121
- if self.task_config.object_type != "Extradata" and any(batch):
122
- try:
123
- self.post_batch(batch, failed_recs_file, num_records)
124
- except Exception as exception:
125
- self.handle_generic_exception(
126
- exception, last_row, batch, num_records, failed_recs_file
127
- )
128
- logging.info("Done posting %s records. ", (num_records))
129
- except Exception as ee:
130
- if self.task_config.object_type == "SRS":
131
- self.commit_snapshot()
132
- raise ee
276
+ except UnicodeDecodeError as unicode_error:
277
+ self.handle_unicode_error(unicode_error, last_row)
278
+ except TransformationProcessError as tpe:
279
+ self.handle_generic_exception(
280
+ tpe,
281
+ last_row,
282
+ batch,
283
+ self.processed,
284
+ failed_recs_file,
285
+ )
286
+ batch = []
287
+ raise
288
+ except TransformationRecordFailedError as exception:
289
+ self.handle_generic_exception(
290
+ exception,
291
+ last_row,
292
+ batch,
293
+ self.processed,
294
+ failed_recs_file,
295
+ )
296
+ batch = []
297
+ except (FileNotFoundError, PermissionError) as ose:
298
+ logging.error("Error reading file: %s", ose)
299
+
300
+ except Exception as ee:
301
+ if "idx" in locals() and self.task_configuration.files[idx:]:
302
+ for file_def in self.task_configuration.files[idx:]:
303
+ path = self.folder_structure.results_folder / file_def.file_name
304
+ try:
305
+ with open(path, "r") as failed_file:
306
+ failed_file.seek(self.processed)
307
+ failed_recs_file.write(failed_file.read())
308
+ self.processed = 0
309
+ except (FileNotFoundError, PermissionError) as ose:
310
+ logging.error("Error reading file: %s", ose)
311
+ raise ee
312
+ finally:
313
+ if self.task_configuration.object_type != "Extradata" and any(batch):
314
+ try:
315
+ self.post_batch(batch, failed_recs_file, self.processed)
316
+ except Exception as exception:
317
+ self.handle_generic_exception(
318
+ exception, last_row, batch, self.processed, failed_recs_file
319
+ )
320
+ logging.info("Done posting %s records. ", self.processed)
321
+ if self.task_configuration.object_type == "SRS":
322
+ self.commit_snapshot()
323
+
324
+ @staticmethod
325
+ def set_consortium_source(json_rec):
326
+ if json_rec['source'] == 'MARC':
327
+ json_rec['source'] = 'CONSORTIUM-MARC'
328
+ elif json_rec['source'] == 'FOLIO':
329
+ json_rec['source'] = 'CONSORTIUM-FOLIO'
330
+
331
+ def set_version(self, batch, query_api, object_type) -> None:
332
+ """
333
+ Synchronous wrapper for set_version_async
334
+ """
335
+ try:
336
+ loop = asyncio.get_running_loop()
337
+ except RuntimeError:
338
+ loop = asyncio.new_event_loop()
339
+ asyncio.set_event_loop(loop)
340
+ loop.run_until_complete(self.set_version_async(batch, query_api, object_type))
341
+ asyncio.set_event_loop(None) # Reset the event loop
342
+ else:
343
+ loop.run_until_complete(self.set_version_async(batch, query_api, object_type))
344
+
345
+ async def set_version_async(self, batch, query_api, object_type) -> None:
346
+ """
347
+ Fetches the current version of the records in the batch if the record exists in FOLIO
348
+
349
+ Args:
350
+ batch (list): List of records to fetch versions for
351
+ query_api (str): The query API endpoint to use
352
+ object_type (str): The key in the API response that contains the records
353
+
354
+ Returns:
355
+ None
356
+ """
357
+ fetch_batch_size = 90
358
+ fetch_tasks = []
359
+ existing_records = {}
360
+ async with httpx.AsyncClient(base_url=self.folio_client.gateway_url) as client:
361
+ for i in range(0, len(batch), fetch_batch_size):
362
+ batch_slice = batch[i:i + fetch_batch_size]
363
+ fetch_tasks.append(
364
+ self.get_with_retry(
365
+ client,
366
+ query_api,
367
+ params={
368
+ "query": (
369
+ "id==("
370
+ f"{' OR '.join([r['id'] for r in batch_slice if 'id' in r])})"
371
+ ),
372
+ "limit": fetch_batch_size
373
+ },
374
+ )
375
+ )
376
+
377
+ responses = await asyncio.gather(*fetch_tasks)
378
+
379
+ for response in responses:
380
+ self.collect_existing_records_for_upsert(object_type, response, existing_records)
381
+ for record in batch:
382
+ if record["id"] in existing_records:
383
+ self.prepare_record_for_upsert(record, existing_records[record["id"]])
384
+
385
+ def patch_record(self, new_record: dict, existing_record: dict, patch_paths: List[str]):
386
+ """
387
+ Updates new_record with values from existing_record according to patch_paths.
388
+
389
+ Args:
390
+ new_record (dict): The new record to be updated.
391
+ existing_record (dict): The existing record to patch from.
392
+ patch_paths (List[str]): List of fields in JSON Path notation (e.g., ['statisticalCodeIds', 'administrativeNotes', 'instanceStatusId']) to patch during the upsert process. If empty, all fields will be patched.
393
+ """
394
+ updates = {}
395
+ updates.update(existing_record)
396
+ keep_existing = {}
397
+ self.handle_upsert_for_administrative_notes(updates, keep_existing)
398
+ self.handle_upsert_for_statistical_codes(updates, keep_existing)
399
+ if not patch_paths:
400
+ keep_new = new_record
401
+ else:
402
+ keep_new = extract_paths(new_record, patch_paths)
403
+ if "instanceStatusId" in new_record:
404
+ updates["instanceStatusId"] = new_record["instanceStatusId"]
405
+ deep_update(updates, keep_new)
406
+ for key, value in keep_existing.items():
407
+ if isinstance(value, list) and key in keep_new:
408
+ updates[key] = list(dict.fromkeys(updates.get(key, []) + value))
409
+ elif key not in keep_new:
410
+ updates[key] = value
411
+ new_record.clear()
412
+ new_record.update(updates)
413
+
414
+ @staticmethod
415
+ def collect_existing_records_for_upsert(object_type: str, response: httpx.Response, existing_records: dict):
416
+ if response.status_code == 200:
417
+ response_json = response.json()
418
+ for record in response_json[object_type]:
419
+ existing_records[record["id"]] = record
420
+ else:
421
+ logging.error(
422
+ "Failed to fetch current records. HTTP %s\t%s",
423
+ response.status_code,
424
+ response.text,
425
+ )
426
+
427
+ def handle_upsert_for_statistical_codes(self, updates: dict, keep_existing: dict):
428
+ if not self.task_configuration.preserve_statistical_codes:
429
+ updates["statisticalCodeIds"] = []
430
+ keep_existing["statisticalCodeIds"] = []
431
+ else:
432
+ keep_existing["statisticalCodeIds"] = updates.pop("statisticalCodeIds", [])
433
+ updates["statisticalCodeIds"] = []
434
+
435
+ def handle_upsert_for_administrative_notes(self, updates: dict, keep_existing: dict):
436
+ if not self.task_configuration.preserve_administrative_notes:
437
+ updates["administrativeNotes"] = []
438
+ keep_existing["administrativeNotes"] = []
439
+ else:
440
+ keep_existing["administrativeNotes"] = updates.pop("administrativeNotes", [])
441
+ updates["administrativeNotes"] = []
442
+
443
+ def handle_upsert_for_temporary_locations(self, updates: dict, keep_existing: dict):
444
+ if self.task_configuration.preserve_temporary_locations:
445
+ keep_existing["temporaryLocationId"] = updates.pop("temporaryLocationId", None)
446
+
447
+ def handle_upsert_for_temporary_loan_types(self, updates: dict, keep_existing: dict):
448
+ if self.task_configuration.preserve_temporary_loan_types:
449
+ keep_existing["temporaryLoanTypeId"] = updates.pop("temporaryLoanTypeId", None)
450
+
451
+ def keep_existing_fields(self, updates: dict, existing_record: dict):
452
+ keep_existing_fields = ["hrid", "lastCheckIn"]
453
+ if self.task_configuration.preserve_item_status:
454
+ keep_existing_fields.append("status")
455
+ for key in keep_existing_fields:
456
+ if key in existing_record:
457
+ updates[key] = existing_record[key]
458
+
459
+ def prepare_record_for_upsert(self, new_record: dict, existing_record: dict):
460
+ if "source" in existing_record and "MARC" in existing_record["source"]:
461
+ if self.task_configuration.patch_paths:
462
+ logging.debug(
463
+ "Record %s is a MARC record, patch_paths will be ignored",
464
+ existing_record["id"],
465
+ )
466
+ self.patch_record(new_record, existing_record, ["statisticalCodeIds", "administrativeNotes", "instanceStatusId"])
467
+ elif self.task_configuration.patch_existing_records:
468
+ self.patch_record(new_record, existing_record, self.task_configuration.patch_paths)
469
+ else:
470
+ updates = {
471
+ "_version": existing_record["_version"],
472
+ }
473
+ self.keep_existing_fields(updates, existing_record)
474
+ keep_new = {k: v for k, v in new_record.items() if k in ["statisticalCodeIds", "administrativeNotes"]}
475
+ keep_existing = {}
476
+ self.handle_upsert_for_statistical_codes(existing_record, keep_existing)
477
+ self.handle_upsert_for_administrative_notes(existing_record, keep_existing)
478
+ self.handle_upsert_for_temporary_locations(existing_record, keep_existing)
479
+ self.handle_upsert_for_temporary_loan_types(existing_record, keep_existing)
480
+ for k, v in keep_existing.items():
481
+ if isinstance(v, list) and k in keep_new:
482
+ keep_new[k] = list(dict.fromkeys(v + keep_new.get(k, [])))
483
+ elif k not in keep_new:
484
+ keep_new[k] = v
485
+ updates.update(keep_new)
486
+ new_record.update(updates)
487
+
488
+ async def get_with_retry(self, client: httpx.AsyncClient, url: str, params=None):
489
+ if params is None:
490
+ params = {}
491
+ retries = 3
492
+ for attempt in range(retries):
493
+ try:
494
+ response = await client.get(
495
+ url, params=params, headers=self.folio_client.okapi_headers)
496
+ response.raise_for_status()
497
+ return response
498
+ except httpx.HTTPError as e:
499
+ if attempt < retries - 1:
500
+ logging.warning(f"Retrying due to {e}")
501
+ await asyncio.sleep(2 ** attempt)
502
+ else:
503
+ logging.error(f"Failed to connect after {retries} attempts: {e}")
504
+ raise
505
+
506
+ def post_record_batch(self, batch, failed_recs_file, row):
507
+ json_rec = json.loads(row.split("\t")[-1])
508
+ if self.task_configuration.object_type == "ShadowInstances":
509
+ self.set_consortium_source(json_rec)
510
+ if self.task_configuration.object_type == "SRS":
511
+ json_rec["snapshotId"] = self.snapshot_id
512
+ if self.processed == 1:
513
+ logging.info(json.dumps(json_rec, indent=True))
514
+ batch.append(json_rec)
515
+ if len(batch) == int(self.batch_size):
516
+ self.post_batch(batch, failed_recs_file, self.processed)
517
+ batch = []
518
+ return batch
133
519
 
134
520
  def post_extra_data(self, row: str, num_records: int, failed_recs_file):
135
521
  (object_name, data) = row.split("\t")
136
- endpoint = get_extradata_endpoint(object_name)
137
- url = f"{self.folio_client.okapi_url}/{endpoint}"
522
+ endpoint = self.get_extradata_endpoint(self.task_configuration, object_name, data)
523
+ url = f"{self.folio_client.gateway_url}/{endpoint}"
138
524
  body = data
139
525
  response = self.post_objects(url, body)
140
526
  if response.status_code == 201:
@@ -142,19 +528,66 @@ class BatchPoster(MigrationTaskBase):
142
528
  elif response.status_code == 422:
143
529
  self.num_failures += 1
144
530
  error_msg = json.loads(response.text)["errors"][0]["message"]
145
- logging.error(
146
- "Row %s\tHTTP %s\t %s", num_records, response.status_code, error_msg
147
- )
148
- if (
149
- "id value already exists"
150
- not in json.loads(response.text)["errors"][0]["message"]
151
- ):
531
+ logging.error("Row %s\tHTTP %s\t %s", num_records, response.status_code, error_msg)
532
+ if "id value already exists" not in json.loads(response.text)["errors"][0]["message"]:
152
533
  failed_recs_file.write(row)
153
534
  else:
154
535
  self.num_failures += 1
155
- logging.error(
156
- "Row %s\tHTTP %s\t%s", num_records, response.status_code, response.text
536
+ logging.error("Row %s\tHTTP %s\t%s", num_records, response.status_code, response.text)
537
+ failed_recs_file.write(row)
538
+ if num_records % 50 == 0:
539
+ logging.info(
540
+ "%s records posted successfully. %s failed",
541
+ self.num_posted,
542
+ self.num_failures,
157
543
  )
544
+
545
+ @staticmethod
546
+ def get_extradata_endpoint(
547
+ task_configuration: TaskConfiguration, object_name: str, string_object: str
548
+ ):
549
+ object_types = {
550
+ "precedingSucceedingTitles": "preceding-succeeding-titles",
551
+ "precedingTitles": "preceding-succeeding-titles",
552
+ "succeedingTitles": "preceding-succeeding-titles",
553
+ "boundwithPart": "inventory-storage/bound-with-parts",
554
+ "notes": "notes",
555
+ "course": "coursereserves/courses",
556
+ "courselisting": "coursereserves/courselistings",
557
+ "contacts": "organizations-storage/contacts",
558
+ "interfaces": "organizations-storage/interfaces",
559
+ "account": "accounts",
560
+ "feefineaction": "feefineactions",
561
+ "bankInfo": "organizations/banking-information",
562
+ }
563
+ object_types.update(task_configuration.extradata_endpoints)
564
+ if object_name == "instructor":
565
+ instructor = json.loads(string_object)
566
+ return f'coursereserves/courselistings/{instructor["courseListingId"]}/instructors'
567
+
568
+ if object_name == "interfaceCredential":
569
+ credential = json.loads(string_object)
570
+ return f'organizations-storage/interfaces/{credential["interfaceId"]}/credentials'
571
+
572
+ return object_types[object_name]
573
+
574
+ def post_single_records(self, row: str, num_records: int, failed_recs_file):
575
+ if self.api_info["is_batch"]:
576
+ raise TypeError("This record type supports batch processing, use post_batch method")
577
+ api_endpoint = self.api_info.get("api_endpoint")
578
+ url = f"{self.folio_client.gateway_url}{api_endpoint}"
579
+ response = self.post_objects(url, row)
580
+ if response.status_code == 201:
581
+ self.num_posted += 1
582
+ elif response.status_code == 422:
583
+ self.num_failures += 1
584
+ error_msg = json.loads(response.text)["errors"][0]["message"]
585
+ logging.error("Row %s\tHTTP %s\t %s", num_records, response.status_code, error_msg)
586
+ if "id value already exists" not in json.loads(response.text)["errors"][0]["message"]:
587
+ failed_recs_file.write(row)
588
+ else:
589
+ self.num_failures += 1
590
+ logging.error("Row %s\tHTTP %s\t%s", num_records, response.status_code, response.text)
158
591
  failed_recs_file.write(row)
159
592
  if num_records % 50 == 0:
160
593
  logging.info(
@@ -164,21 +597,23 @@ class BatchPoster(MigrationTaskBase):
164
597
  )
165
598
 
166
599
  def post_objects(self, url, body):
167
- return requests.post(
168
- url, headers=self.folio_client.okapi_headers, data=body.encode("utf-8")
169
- )
600
+ if self.http_client and not self.http_client.is_closed:
601
+ return self.http_client.post(
602
+ url, data=body.encode("utf-8"), headers=self.folio_client.okapi_headers
603
+ )
604
+ else:
605
+ return httpx.post(
606
+ url, headers=self.okapi_headers, data=body.encode("utf-8"), timeout=None
607
+ )
170
608
 
171
- def handle_generic_exception(
172
- self, exception, last_row, batch, num_records, failed_recs_file
173
- ):
609
+ def handle_generic_exception(self, exception, last_row, batch, num_records, failed_recs_file):
174
610
  logging.error("%s", exception)
611
+ self.migration_report.add("Details", i18n.t("Generic exceptions (see log for details)"))
175
612
  # logging.error("Failed row: %s", last_row)
176
613
  self.failed_batches += 1
177
- self.failed_records += len(batch)
614
+ self.num_failures += len(batch)
178
615
  write_failed_batch_to_file(batch, failed_recs_file)
179
- logging.info(
180
- "Resetting batch...Number of failed batches: %s", self.failed_batches
181
- )
616
+ logging.info("Resetting batch...Number of failed batches: %s", self.failed_batches)
182
617
  batch = []
183
618
  if self.failed_batches > 50000:
184
619
  logging.error("Exceeded number of failed batches at row %s", num_records)
@@ -186,6 +621,7 @@ class BatchPoster(MigrationTaskBase):
186
621
  sys.exit(1)
187
622
 
188
623
  def handle_unicode_error(self, unicode_error, last_row):
624
+ self.migration_report.add("Details", i18n.t("Encoding errors"))
189
625
  logging.info("=========ERROR==============")
190
626
  logging.info(
191
627
  "%s Posting failed. Encoding error reading file",
@@ -193,15 +629,21 @@ class BatchPoster(MigrationTaskBase):
193
629
  )
194
630
  logging.info(
195
631
  "Failing row, either the one shown here or the next row in %s",
196
- self.task_config.file.file_name,
632
+ self.task_configuration.file.file_name,
197
633
  )
198
634
  logging.info(last_row)
199
635
  logging.info("=========Stack trace==============")
200
- traceback.logging.info_exc()
201
- logging.info("=======================", flush=True)
636
+ traceback.logging.info_exc() # type: ignore
637
+ logging.info("=======================")
202
638
 
203
639
  def post_batch(self, batch, failed_recs_file, num_records, recursion_depth=0):
640
+ if self.query_params.get("upsert", False) and self.api_info.get("query_endpoint", ""):
641
+ self.set_version(batch, self.api_info['query_endpoint'], self.api_info['object_name'])
204
642
  response = self.do_post(batch)
643
+ if response.status_code == 401:
644
+ logging.error("Authorization failed (%s). Fetching new auth token...", response.text)
645
+ self.folio_client.login()
646
+ response = self.do_post(batch)
205
647
  if response.status_code == 201:
206
648
  logging.info(
207
649
  (
@@ -210,7 +652,7 @@ class BatchPoster(MigrationTaskBase):
210
652
  "Batch Size: %s Request size: %s "
211
653
  ),
212
654
  num_records,
213
- self.failed_records,
655
+ self.num_failures,
214
656
  response.elapsed.total_seconds(),
215
657
  len(batch),
216
658
  get_req_size(response),
@@ -219,7 +661,8 @@ class BatchPoster(MigrationTaskBase):
219
661
  json_report = json.loads(response.text)
220
662
  self.users_created += json_report.get("createdRecords", 0)
221
663
  self.users_updated += json_report.get("updatedRecords", 0)
222
- self.failed_records += json_report.get("failedRecords", 0)
664
+ self.num_posted = self.users_updated + self.users_created
665
+ self.num_failures += json_report.get("failedRecords", 0)
223
666
  if json_report.get("failedRecords", 0) > 0:
224
667
  logging.error(
225
668
  "%s users in batch failed to load",
@@ -227,7 +670,7 @@ class BatchPoster(MigrationTaskBase):
227
670
  )
228
671
  write_failed_batch_to_file(batch, failed_recs_file)
229
672
  if json_report.get("failedUsers", []):
230
- logging.error("Errormessage: %s", json_report.get("error", []))
673
+ logging.error("Error message: %s", json_report.get("error", []))
231
674
  for failed_user in json_report.get("failedUsers"):
232
675
  logging.error(
233
676
  "User failed. %s\t%s\t%s",
@@ -235,6 +678,7 @@ class BatchPoster(MigrationTaskBase):
235
678
  failed_user.get("externalSystemId", ""),
236
679
  failed_user.get("errorMessage", ""),
237
680
  )
681
+ self.migration_report.add("Details", failed_user.get("errorMessage", ""))
238
682
  logging.info(
239
683
  (
240
684
  "Posting successful! Total rows: %s Total failed: %s "
@@ -242,7 +686,7 @@ class BatchPoster(MigrationTaskBase):
242
686
  "Message from server: %s"
243
687
  ),
244
688
  num_records,
245
- self.failed_records,
689
+ self.num_failures,
246
690
  self.users_created,
247
691
  self.users_updated,
248
692
  response.elapsed.total_seconds(),
@@ -256,18 +700,16 @@ class BatchPoster(MigrationTaskBase):
256
700
  "",
257
701
  f"HTTP {response.status_code}\t"
258
702
  f"Request size: {get_req_size(response)}"
259
- f"{datetime.utcnow().isoformat()} UTC\n",
703
+ f"{datetime.now(timezone.utc).isoformat()}\n",
260
704
  json.dumps(resp, indent=4),
261
705
  )
262
706
  elif response.status_code == 400:
263
707
  # Likely a json parsing error
264
708
  logging.error(response.text)
265
- raise TransformationProcessError(
266
- "", "HTTP 400. Somehting is wrong. Quitting"
267
- )
268
- elif self.task_config.object_type == "SRS" and response.status_code == 500:
709
+ raise TransformationProcessError("", "HTTP 400. Something is wrong. Quitting")
710
+ elif self.task_configuration.object_type == "SRS" and response.status_code >= 500:
269
711
  logging.info(
270
- "Post failed. Size: %s Waiting 30 seconds until reposting. Number of tries: %s of 5 before failing batch",
712
+ "Post failed. Size: %s Waiting 30s until reposting. Number of tries: %s of 5",
271
713
  get_req_size(response),
272
714
  recursion_depth,
273
715
  )
@@ -278,201 +720,390 @@ class BatchPoster(MigrationTaskBase):
278
720
  "",
279
721
  f"HTTP {response.status_code}\t"
280
722
  f"Request size: {get_req_size(response)}"
281
- f"{datetime.utcnow().isoformat()} UTC\n",
723
+ f"{datetime.now(timezone.utc).isoformat()}\n",
282
724
  response.text,
283
725
  )
284
726
  else:
285
- self.post_batch(
286
- batch, failed_recs_file, num_records, recursion_depth + 1
287
- )
727
+ self.post_batch(batch, failed_recs_file, num_records, recursion_depth + 1)
728
+ elif (
729
+ response.status_code == 413 and "DB_ALLOW_SUPPRESS_OPTIMISTIC_LOCKING" in response.text
730
+ ):
731
+ logging.error(response.text)
732
+ raise TransformationProcessError("", response.text, "")
733
+
288
734
  else:
289
735
  try:
290
736
  logging.info(response.text)
291
737
  resp = json.dumps(response, indent=4)
292
- except Exception:
293
- logging.exception("something unexpected happened")
738
+ except TypeError:
739
+ resp = response
740
+ except Exception as e:
741
+ logging.exception(f"something unexpected happened, {e}")
294
742
  resp = response
295
743
  raise TransformationRecordFailedError(
296
744
  "",
297
745
  f"HTTP {response.status_code}\t"
298
746
  f"Request size: {get_req_size(response)}"
299
- f"{datetime.utcnow().isoformat()} UTC\n",
747
+ f"{datetime.now(timezone.utc).isoformat()}\n",
300
748
  resp,
301
749
  )
302
750
 
303
751
  def do_post(self, batch):
304
- kind = list_objects(self.task_config.object_type)
305
- path = kind["api_endpoint"]
306
- url = self.folio_client.okapi_url + path
307
- if kind["object_name"] == "users":
308
- payload = {kind["object_name"]: list(batch), "totalRecords": len(batch)}
309
- elif kind["total_records"]:
752
+ path = self.api_info["api_endpoint"]
753
+ url = self.folio_client.gateway_url + path
754
+ if self.api_info["object_name"] == "users":
755
+ payload = {self.api_info["object_name"]: list(batch), "totalRecords": len(batch)}
756
+ elif self.api_info["total_records"]:
310
757
  payload = {"records": list(batch), "totalRecords": len(batch)}
311
758
  else:
312
- payload = {kind["object_name"]: batch}
313
- return requests.post(
314
- url, data=json.dumps(payload), headers=self.folio_client.okapi_headers
315
- )
759
+ payload = {self.api_info["object_name"]: batch}
760
+ if self.http_client and not self.http_client.is_closed:
761
+ return self.http_client.post(
762
+ url,
763
+ json=payload,
764
+ headers=self.folio_client.okapi_headers,
765
+ params=self.query_params
766
+ )
767
+ else:
768
+ return httpx.post(
769
+ url,
770
+ headers=self.okapi_headers,
771
+ json=payload,
772
+ params=self.query_params,
773
+ timeout=None)
774
+
775
+ def get_current_record_count_in_folio(self):
776
+ if "query_endpoint" in self.api_info:
777
+ url = f"{self.folio_client.gateway_url}{self.api_info['query_endpoint']}"
778
+ query_params = {"query": "cql.allRecords=1", "limit": 0}
779
+ if self.http_client and not self.http_client.is_closed:
780
+ res = self.http_client.get(
781
+ url,
782
+ headers=self.folio_client.okapi_headers,
783
+ params=query_params
784
+ )
785
+ else:
786
+ res = httpx.get(url, headers=self.okapi_headers, params=query_params, timeout=None)
787
+ try:
788
+ res.raise_for_status()
789
+ return res.json()["totalRecords"]
790
+ except httpx.HTTPStatusError:
791
+ logging.error("Failed to get current record count. HTTP %s", res.status_code)
792
+ return 0
793
+ except KeyError:
794
+ logging.error(
795
+ "Failed to get current record count. "
796
+ f"No 'totalRecords' in response: {res.json()}"
797
+ )
798
+ return 0
799
+ else:
800
+ raise ValueError(
801
+ "No 'query_endpoint' available for %s. Cannot get current record count.",
802
+ self.task_configuration.object_type
803
+ )
804
+
805
+ def get_starting_record_count(self):
806
+ if "query_endpoint" in self.api_info and not self.starting_record_count_in_folio:
807
+ logging.info("Getting starting record count in FOLIO")
808
+ self.starting_record_count_in_folio = self.get_current_record_count_in_folio()
809
+ else:
810
+ logging.info(
811
+ "No query_endpoint available for %s. Cannot get starting record count.",
812
+ self.task_configuration.object_type
813
+ )
814
+
815
+ def get_finished_record_count(self):
816
+ if "query_endpoint" in self.api_info:
817
+ logging.info("Getting finished record count in FOLIO")
818
+ self.finished_record_count_in_folio = self.get_current_record_count_in_folio()
819
+ else:
820
+ logging.info(
821
+ "No query_endpoint available for %s. Cannot get ending record count.",
822
+ self.task_configuration.object_type
823
+ )
316
824
 
317
825
  def wrap_up(self):
318
826
  logging.info("Done. Wrapping up")
319
- if self.task_config.object_type == "SRS":
827
+ self.extradata_writer.flush()
828
+ if self.task_configuration.object_type == "SRS":
320
829
  self.commit_snapshot()
321
- if self.task_config.object_type != "Extradata":
830
+ if self.task_configuration.object_type != "Extradata":
322
831
  logging.info(
323
832
  (
324
833
  "Failed records: %s failed records in %s "
325
834
  "failed batches. Failed records saved to %s"
326
835
  ),
327
- self.failed_records,
836
+ self.num_failures,
328
837
  self.failed_batches,
329
838
  self.folder_structure.failed_recs_path,
330
839
  )
331
-
332
840
  else:
333
- logging.info(
334
- "Done posting % records. % failed", self.num_posted, self.num_failures
841
+ logging.info("Done posting %s records. %s failed", self.num_posted, self.num_failures)
842
+ if self.starting_record_count_in_folio:
843
+ self.get_finished_record_count()
844
+ total_on_server = (
845
+ self.finished_record_count_in_folio - self.starting_record_count_in_folio
846
+ )
847
+ discrepancy = self.processed - self.num_failures - total_on_server
848
+ if discrepancy != 0:
849
+ logging.error(
850
+ (
851
+ "Discrepancy in record count. "
852
+ "Starting record count: %s. Finished record count: %s. "
853
+ "Records posted: %s. Discrepancy: %s"
854
+ ),
855
+ self.starting_record_count_in_folio,
856
+ self.finished_record_count_in_folio,
857
+ self.num_posted - self.num_failures,
858
+ discrepancy,
859
+ )
860
+ else:
861
+ discrepancy = 0
862
+ run = "second time" if self.performing_rerun else "first time"
863
+ self.migration_report.set("GeneralStatistics", f"Records processed {run}", self.processed)
864
+ self.migration_report.set("GeneralStatistics", f"Records posted {run}", self.num_posted)
865
+ self.migration_report.set("GeneralStatistics", f"Failed to post {run}", self.num_failures)
866
+ if discrepancy:
867
+ self.migration_report.set(
868
+ "GeneralStatistics",
869
+ f"Discrepancy in record count {run}",
870
+ discrepancy,
335
871
  )
336
-
337
872
  self.rerun_run()
873
+ with open(self.folder_structure.migration_reports_file, "w+") as report_file:
874
+ self.migration_report.write_migration_report(
875
+ f"{self.task_configuration.object_type} loading report",
876
+ report_file,
877
+ self.start_datetime,
878
+ )
879
+ self.clean_out_empty_logs()
338
880
 
339
881
  def rerun_run(self):
340
- if self.task_config.rerun_failed_records and (
341
- self.failed_records > 0 or self.num_failures > 0
342
- ):
882
+ if self.task_configuration.rerun_failed_records and (self.num_failures > 0):
343
883
  logging.info(
344
- "Rerunning the failed records from the load with a batchsize of 1"
884
+ "Rerunning the %s failed records from the load with a batchsize of 1",
885
+ self.num_failures,
345
886
  )
346
887
  try:
347
- self.task_config.batch_size = 1
348
- self.task_config.files = [
349
- FileDefinition(
350
- file_name=str(self.folder_structure.failed_recs_path.name)
351
- )
888
+ self.task_configuration.batch_size = 1
889
+ self.task_configuration.files = [
890
+ FileDefinition(file_name=str(self.folder_structure.failed_recs_path.name))
352
891
  ]
353
- self.task_config.rerun_failed_records = False
354
- self.__init__(self.task_config, self.library_configuration)
892
+ temp_report = copy.deepcopy(self.migration_report)
893
+ temp_start = self.start_datetime
894
+ self.task_configuration.rerun_failed_records = False
895
+ self.__init__(
896
+ self.task_configuration,
897
+ self.library_configuration,
898
+ self.folio_client)
899
+ self.performing_rerun = True
900
+ self.migration_report = temp_report
901
+ self.start_datetime = temp_start
355
902
  self.do_work()
356
903
  self.wrap_up()
357
904
  logging.info("Done rerunning the posting")
358
905
  except Exception as ee:
359
- logging.exception("Happed during rerun")
906
+ logging.exception("Occurred during rerun")
360
907
  raise TransformationProcessError("Error during rerun") from ee
908
+ elif not self.task_configuration.rerun_failed_records and (self.num_failures > 0):
909
+ logging.info(
910
+ (
911
+ "Task configured to not rerun failed records. "
912
+ " File with failed records is located at %s"
913
+ ),
914
+ str(self.folder_structure.failed_recs_path),
915
+ )
361
916
 
362
917
  def create_snapshot(self):
363
918
  snapshot = {
364
919
  "jobExecutionId": self.snapshot_id,
365
920
  "status": "PARSING_IN_PROGRESS",
366
- "processingStartedDate": datetime.utcnow().isoformat(
367
- timespec="milliseconds"
368
- ),
921
+ "processingStartedDate": datetime.now(timezone.utc).isoformat(timespec="milliseconds"),
369
922
  }
370
923
  try:
371
- url = f"{self.folio_client.okapi_url}/source-storage/snapshots"
372
- res = requests.post(
373
- url, data=json.dumps(snapshot), headers=self.folio_client.okapi_headers
374
- )
924
+ url = f"{self.folio_client.gateway_url}/source-storage/snapshots"
925
+ if self.http_client and not self.http_client.is_closed:
926
+ res = self.http_client.post(
927
+ url, json=snapshot, headers=self.folio_client.okapi_headers
928
+ )
929
+ else:
930
+ res = httpx.post(url, headers=self.okapi_headers, json=snapshot, timeout=None)
375
931
  res.raise_for_status()
376
932
  logging.info("Posted Snapshot to FOLIO: %s", json.dumps(snapshot, indent=4))
377
- get_url = f"{self.folio_client.okapi_url}/source-storage/snapshots/{self.snapshot_id}"
378
- getted = False
379
- while not getted:
933
+ get_url = (
934
+ f"{self.folio_client.gateway_url}/source-storage/snapshots/{self.snapshot_id}"
935
+ )
936
+ got = False
937
+ while not got:
380
938
  logging.info("Sleeping while waiting for the snapshot to get created")
381
939
  time.sleep(5)
382
- res = requests.get(get_url, headers=self.folio_client.okapi_headers)
940
+ if self.http_client and not self.http_client.is_closed:
941
+ res = self.http_client.get(get_url, headers=self.folio_client.okapi_headers)
942
+ else:
943
+ res = httpx.get(get_url, headers=self.okapi_headers, timeout=None)
383
944
  if res.status_code == 200:
384
- getted = True
945
+ got = True
385
946
  else:
386
947
  logging.info(res.status_code)
387
- except Exception:
388
- logging.exception("Could not post the snapshot")
948
+ except httpx.HTTPStatusError as exc:
949
+ logging.exception("HTTP error occurred while posting the snapshot: %s", exc)
950
+ sys.exit(1)
951
+ except Exception as exc:
952
+ logging.exception("Could not post the snapshot: %s", exc)
389
953
  sys.exit(1)
390
954
 
391
955
  def commit_snapshot(self):
392
956
  snapshot = {"jobExecutionId": self.snapshot_id, "status": "COMMITTED"}
393
957
  try:
394
- url = f"{self.folio_client.okapi_url}/source-storage/snapshots/{self.snapshot_id}"
395
- res = requests.put(
396
- url, data=json.dumps(snapshot), headers=self.folio_client.okapi_headers
397
- )
958
+ url = f"{self.folio_client.gateway_url}/source-storage/snapshots/{self.snapshot_id}"
959
+ if self.http_client and not self.http_client.is_closed:
960
+ res = self.http_client.put(
961
+ url, json=snapshot, headers=self.folio_client.okapi_headers
962
+ )
963
+ else:
964
+ res = httpx.put(url, headers=self.okapi_headers, json=snapshot, timeout=None)
398
965
  res.raise_for_status()
399
- logging.info(
400
- "Posted Committed snapshot to FOLIO: %s", json.dumps(snapshot, indent=4)
401
- )
402
- except Exception:
966
+ logging.info("Posted Committed snapshot to FOLIO: %s", json.dumps(snapshot, indent=4))
967
+ except httpx.HTTPStatusError as exc:
968
+ logging.exception("HTTP error occurred while posting the snapshot: %s", exc)
969
+ sys.exit(1)
970
+ except Exception as exc:
403
971
  logging.exception(
404
- "Could not commit snapshot with id %s. Post the following to /source-storage/snapshots/%s:",
972
+ "Could not commit snapshot with id %s. Post this to /source-storage/snapshots/%s:",
405
973
  self.snapshot_id,
406
974
  self.snapshot_id,
975
+ exc,
407
976
  )
408
977
  logging.info("%s", json.dumps(snapshot, indent=4))
409
978
  sys.exit(1)
410
979
 
411
980
 
412
- def list_objects(object_type: str):
981
+ def get_api_info(object_type: str, use_safe: bool = True):
413
982
  choices = {
414
983
  "Extradata": {
415
984
  "object_name": "",
416
985
  "api_endpoint": "",
417
986
  "total_records": False,
418
987
  "addSnapshotId": False,
988
+ "supports_upsert": False,
419
989
  },
420
990
  "Items": {
421
991
  "object_name": "items",
422
- "api_endpoint": "/item-storage/batch/synchronous?upsert=true",
992
+ "api_endpoint": (
993
+ "/item-storage/batch/synchronous"
994
+ if use_safe
995
+ else "/item-storage/batch/synchronous-unsafe"
996
+ ),
997
+ "query_endpoint": "/item-storage/items",
998
+ "is_batch": True,
423
999
  "total_records": False,
424
1000
  "addSnapshotId": False,
1001
+ "supports_upsert": True,
425
1002
  },
426
1003
  "Holdings": {
427
1004
  "object_name": "holdingsRecords",
428
- "api_endpoint": "/holdings-storage/batch/synchronous?upsert=true",
1005
+ "api_endpoint": (
1006
+ "/holdings-storage/batch/synchronous"
1007
+ if use_safe
1008
+ else "/holdings-storage/batch/synchronous-unsafe"
1009
+ ),
1010
+ "query_endpoint": "/holdings-storage/holdings",
1011
+ "is_batch": True,
429
1012
  "total_records": False,
430
1013
  "addSnapshotId": False,
1014
+ "supports_upsert": True,
431
1015
  },
432
1016
  "Instances": {
433
1017
  "object_name": "instances",
434
- "api_endpoint": "/instance-storage/batch/synchronous?upsert=true",
1018
+ "api_endpoint": (
1019
+ "/instance-storage/batch/synchronous"
1020
+ if use_safe
1021
+ else "/instance-storage/batch/synchronous-unsafe"
1022
+ ),
1023
+ "query_endpoint": "/instance-storage/instances",
1024
+ "is_batch": True,
1025
+ "total_records": False,
1026
+ "addSnapshotId": False,
1027
+ "supports_upsert": True,
1028
+ },
1029
+ "ShadowInstances": {
1030
+ "object_name": "instances",
1031
+ "api_endpoint": (
1032
+ "/instance-storage/batch/synchronous"
1033
+ if use_safe
1034
+ else "/instance-storage/batch/synchronous-unsafe"
1035
+ ),
1036
+ "is_batch": True,
1037
+ "total_records": False,
1038
+ "addSnapshotId": False,
1039
+ "supports_upsert": True,
1040
+ },
1041
+ "Authorities": {
1042
+ "object_name": "",
1043
+ "api_endpoint": "/authority-storage/authorities",
1044
+ "is_batch": False,
435
1045
  "total_records": False,
436
1046
  "addSnapshotId": False,
1047
+ "supports_upsert": False,
437
1048
  },
438
1049
  "SRS": {
439
1050
  "object_name": "records",
440
1051
  "api_endpoint": "/source-storage/batch/records",
1052
+ "is_batch": True,
441
1053
  "total_records": True,
442
1054
  "addSnapshotId": True,
1055
+ "supports_upsert": False,
443
1056
  },
444
1057
  "Users": {
445
1058
  "object_name": "users",
446
1059
  "api_endpoint": "/user-import",
1060
+ "is_batch": True,
447
1061
  "total_records": True,
448
1062
  "addSnapshotId": False,
1063
+ "supports_upsert": False,
1064
+ },
1065
+ "Organizations": {
1066
+ "object_name": "",
1067
+ "api_endpoint": "/organizations/organizations",
1068
+ "is_batch": False,
1069
+ "total_records": False,
1070
+ "addSnapshotId": False,
1071
+ "supports_upsert": False,
1072
+ },
1073
+ "Orders": {
1074
+ "object_name": "",
1075
+ "api_endpoint": "/orders/composite-orders",
1076
+ "is_batch": False,
1077
+ "total_records": False,
1078
+ "addSnapshotId": False,
1079
+ "supports_upsert": False,
449
1080
  },
450
1081
  }
451
1082
 
452
1083
  try:
453
1084
  return choices[object_type]
454
1085
  except KeyError:
455
- key_string = ",".join(choices.keys())
456
- logging.error(f"Wrong type. Only one of {key_string} are allowed")
1086
+ key_string = ", ".join(choices.keys())
1087
+ logging.error(
1088
+ f"Wrong type. Only one of {key_string} are allowed, "
1089
+ f"received {object_type=} instead"
1090
+ )
457
1091
  logging.error("Halting")
458
1092
  sys.exit(1)
459
1093
 
460
1094
 
461
1095
  def chunks(records, number_of_chunks):
462
- """Yield successive n-sized chunks from lst."""
463
- for i in range(0, len(records), number_of_chunks):
464
- yield records[i : i + number_of_chunks]
1096
+ """Yield successive n-sized chunks from lst.
465
1097
 
1098
+ Args:
1099
+ records (_type_): _description_
1100
+ number_of_chunks (_type_): _description_
466
1101
 
467
- def get_extradata_endpoint(object_name):
468
- object_types = {
469
- "precedingSucceedingTitles": "preceding-succeeding-titles",
470
- "precedingTitles": "preceding-succeeding-titles",
471
- "succeedingTitles": "preceding-succeeding-titles",
472
- "boundwithPart": "inventory-storage/bound-with-parts",
473
- "notes": "notes",
474
- }
475
- return object_types[object_name]
1102
+ Yields:
1103
+ _type_: _description_
1104
+ """
1105
+ for i in range(0, len(records), number_of_chunks):
1106
+ yield records[i: i + number_of_chunks]
476
1107
 
477
1108
 
478
1109
  def get_human_readable(size, precision=2):
@@ -484,9 +1115,91 @@ def get_human_readable(size, precision=2):
484
1115
  return "%.*f%s" % (precision, size, suffixes[suffix_index])
485
1116
 
486
1117
 
487
- def get_req_size(response):
1118
+ def get_req_size(response: httpx.Response):
488
1119
  size = response.request.method
489
- size += response.request.url
1120
+ size += str(response.request.url)
490
1121
  size += "\r\n".join(f"{k}{v}" for k, v in response.request.headers.items())
491
- size += response.request.body or []
1122
+ size += response.request.content.decode("utf-8") or ""
492
1123
  return get_human_readable(len(size.encode("utf-8")))
1124
+
1125
+ def parse_path(path):
1126
+ """
1127
+ Parses a path like 'foo.bar[0].baz' into ['foo', 'bar', 0, 'baz']
1128
+ """
1129
+ tokens = []
1130
+ # Split by dot, then extract indices
1131
+ for part in path.split('.'):
1132
+ # Find all [index] parts
1133
+ matches = re.findall(r'([^\[\]]+)|\[(\d+)\]', part)
1134
+ for name, idx in matches:
1135
+ if name:
1136
+ tokens.append(name)
1137
+ if idx:
1138
+ tokens.append(int(idx))
1139
+ return tokens
1140
+
1141
+ def get_by_path(data, path):
1142
+ keys = parse_path(path)
1143
+ for key in keys:
1144
+ data = data[key]
1145
+ return data
1146
+
1147
+ def set_by_path(data, path, value):
1148
+ keys = parse_path(path)
1149
+ for i, key in enumerate(keys[:-1]):
1150
+ next_key = keys[i + 1]
1151
+ if isinstance(key, int):
1152
+ while len(data) <= key:
1153
+ data.append({} if not isinstance(next_key, int) else [])
1154
+ data = data[key]
1155
+ else:
1156
+ if key not in data or not isinstance(data[key], (dict, list)):
1157
+ data[key] = {} if not isinstance(next_key, int) else []
1158
+ data = data[key]
1159
+ last_key = keys[-1]
1160
+ if isinstance(last_key, int):
1161
+ while len(data) <= last_key:
1162
+ data.append(None)
1163
+ data[last_key] = value
1164
+ else:
1165
+ data[last_key] = value
1166
+
1167
+ def extract_paths(data, paths):
1168
+ result = {}
1169
+ for path in paths:
1170
+ try:
1171
+ value = get_by_path(data, path)
1172
+ set_by_path(result, path, value)
1173
+ except KeyError:
1174
+ continue
1175
+ return result
1176
+
1177
+ def deep_update(target, patch):
1178
+ """
1179
+ Recursively update target dict/list with values from patch dict/list.
1180
+ For lists, only non-None values in patch are merged into target.
1181
+ """
1182
+ if isinstance(patch, dict):
1183
+ for k, v in patch.items():
1184
+ if (
1185
+ k in target
1186
+ and isinstance(target[k], (dict, list))
1187
+ and isinstance(v, (dict, list))
1188
+ ):
1189
+ deep_update(target[k], v)
1190
+ else:
1191
+ target[k] = v
1192
+ elif isinstance(patch, list):
1193
+ for i, v in enumerate(patch):
1194
+ if v is None:
1195
+ continue # Skip None values, leave target unchanged
1196
+ if i < len(target):
1197
+ if isinstance(target[i], (dict, list)) and isinstance(v, (dict, list)):
1198
+ deep_update(target[i], v)
1199
+ else:
1200
+ target[i] = v
1201
+ else:
1202
+ # Only append if not None
1203
+ target.append(v)
1204
+ else:
1205
+ return patch