folio-migration-tools 1.10.0__py3-none-any.whl → 1.10.0b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. folio_migration_tools/__main__.py +0 -9
  2. folio_migration_tools/circulation_helper.py +5 -6
  3. folio_migration_tools/folder_structure.py +6 -16
  4. folio_migration_tools/helper.py +6 -7
  5. folio_migration_tools/holdings_helper.py +3 -4
  6. folio_migration_tools/library_configuration.py +0 -12
  7. folio_migration_tools/mapper_base.py +6 -7
  8. folio_migration_tools/mapping_file_transformation/user_mapper.py +0 -4
  9. folio_migration_tools/marc_rules_transformation/conditions.py +29 -0
  10. folio_migration_tools/marc_rules_transformation/marc_file_processor.py +9 -19
  11. folio_migration_tools/marc_rules_transformation/rules_mapper_authorities.py +242 -0
  12. folio_migration_tools/marc_rules_transformation/rules_mapper_base.py +15 -10
  13. folio_migration_tools/marc_rules_transformation/rules_mapper_bibs.py +2 -3
  14. folio_migration_tools/marc_rules_transformation/rules_mapper_holdings.py +5 -6
  15. folio_migration_tools/migration_report.py +6 -17
  16. folio_migration_tools/migration_tasks/authority_transformer.py +118 -0
  17. folio_migration_tools/migration_tasks/batch_poster.py +298 -219
  18. folio_migration_tools/migration_tasks/bibs_transformer.py +2 -2
  19. folio_migration_tools/migration_tasks/holdings_csv_transformer.py +9 -9
  20. folio_migration_tools/migration_tasks/holdings_marc_transformer.py +3 -3
  21. folio_migration_tools/migration_tasks/items_transformer.py +4 -6
  22. folio_migration_tools/migration_tasks/loans_migrator.py +18 -19
  23. folio_migration_tools/migration_tasks/manual_fee_fines_transformer.py +3 -3
  24. folio_migration_tools/migration_tasks/migration_task_base.py +5 -13
  25. folio_migration_tools/migration_tasks/orders_transformer.py +3 -4
  26. folio_migration_tools/migration_tasks/requests_migrator.py +9 -10
  27. folio_migration_tools/migration_tasks/reserves_migrator.py +4 -5
  28. folio_migration_tools/migration_tasks/user_transformer.py +5 -15
  29. folio_migration_tools/translations/en.json +7 -0
  30. {folio_migration_tools-1.10.0.dist-info → folio_migration_tools-1.10.0b2.dist-info}/METADATA +2 -3
  31. {folio_migration_tools-1.10.0.dist-info → folio_migration_tools-1.10.0b2.dist-info}/RECORD +33 -32
  32. folio_migration_tools/i18n_cache.py +0 -79
  33. {folio_migration_tools-1.10.0.dist-info → folio_migration_tools-1.10.0b2.dist-info}/WHEEL +0 -0
  34. {folio_migration_tools-1.10.0.dist-info → folio_migration_tools-1.10.0b2.dist-info}/entry_points.txt +0 -0
@@ -4,15 +4,14 @@ import json
4
4
  import logging
5
5
  import re
6
6
  import sys
7
+ import time
7
8
  import traceback
8
9
  from datetime import datetime, timezone
9
- from typing import TYPE_CHECKING, Annotated, List, Optional
10
+ from typing import Annotated, List, Optional
10
11
  from uuid import uuid4
11
12
 
12
- import folioclient
13
-
14
- if TYPE_CHECKING:
15
- from httpx import Response
13
+ import httpx
14
+ import i18n
16
15
  from folio_uuid.folio_namespaces import FOLIONamespaces
17
16
  from pydantic import Field
18
17
 
@@ -20,7 +19,6 @@ from folio_migration_tools.custom_exceptions import (
20
19
  TransformationProcessError,
21
20
  TransformationRecordFailedError,
22
21
  )
23
- from folio_migration_tools.i18n_cache import i18n_t
24
22
  from folio_migration_tools.library_configuration import (
25
23
  FileDefinition,
26
24
  LibraryConfiguration,
@@ -75,7 +73,7 @@ class BatchPoster(MigrationTaskBase):
75
73
  description=(
76
74
  "The type of object being migrated"
77
75
  "Examples of possible values: "
78
- "'Extradata', 'Instances', 'Holdings', 'Items'"
76
+ "'Extradata', 'SRS', Instances', 'Holdings', 'Items'"
79
77
  ),
80
78
  ),
81
79
  ]
@@ -247,80 +245,90 @@ class BatchPoster(MigrationTaskBase):
247
245
  self.failed_fields: set = set()
248
246
  self.num_failures = 0
249
247
  self.num_posted = 0
248
+ self.okapi_headers = self.folio_client.okapi_headers
249
+ self.http_client = None
250
250
  self.starting_record_count_in_folio: Optional[int] = None
251
251
  self.finished_record_count_in_folio: Optional[int] = None
252
252
 
253
253
  def do_work(self): # noqa: C901
254
- with open(
255
- self.folder_structure.failed_recs_path, "w", encoding="utf-8"
256
- ) as failed_recs_file:
257
- self.get_starting_record_count()
258
- try:
259
- batch = []
260
- for idx, file_def in enumerate(self.task_configuration.files): # noqa: B007
261
- path = self.folder_structure.results_folder / file_def.file_name
262
- with open(path) as rows:
263
- logging.info("Running %s", path)
264
- last_row = ""
265
- for self.processed, row in enumerate(rows, start=1):
266
- last_row = row
267
- if row.strip():
268
- try:
269
- if self.task_configuration.object_type == "Extradata":
270
- self.post_extra_data(row, self.processed, failed_recs_file)
271
- elif not self.api_info["is_batch"]:
272
- self.post_single_records(
273
- row, self.processed, failed_recs_file
254
+ with self.folio_client.get_folio_http_client() as httpx_client:
255
+ self.http_client = httpx_client
256
+ with open(
257
+ self.folder_structure.failed_recs_path, "w", encoding="utf-8"
258
+ ) as failed_recs_file:
259
+ self.get_starting_record_count()
260
+ try:
261
+ batch = []
262
+ if self.task_configuration.object_type == "SRS":
263
+ self.create_snapshot()
264
+ for idx, file_def in enumerate(self.task_configuration.files): # noqa: B007
265
+ path = self.folder_structure.results_folder / file_def.file_name
266
+ with open(path) as rows:
267
+ logging.info("Running %s", path)
268
+ last_row = ""
269
+ for self.processed, row in enumerate(rows, start=1):
270
+ last_row = row
271
+ if row.strip():
272
+ try:
273
+ if self.task_configuration.object_type == "Extradata":
274
+ self.post_extra_data(
275
+ row, self.processed, failed_recs_file
276
+ )
277
+ elif not self.api_info["is_batch"]:
278
+ self.post_single_records(
279
+ row, self.processed, failed_recs_file
280
+ )
281
+ else:
282
+ batch = self.post_record_batch(
283
+ batch, failed_recs_file, row
284
+ )
285
+ except UnicodeDecodeError as unicode_error:
286
+ self.handle_unicode_error(unicode_error, last_row)
287
+ except TransformationProcessError as tpe:
288
+ self.handle_generic_exception(
289
+ tpe,
290
+ last_row,
291
+ batch,
292
+ self.processed,
293
+ failed_recs_file,
274
294
  )
275
- else:
276
- batch = self.post_record_batch(
277
- batch, failed_recs_file, row
295
+ batch = []
296
+ raise
297
+ except TransformationRecordFailedError as exception:
298
+ self.handle_generic_exception(
299
+ exception,
300
+ last_row,
301
+ batch,
302
+ self.processed,
303
+ failed_recs_file,
278
304
  )
279
- except UnicodeDecodeError as unicode_error:
280
- self.handle_unicode_error(unicode_error, last_row)
281
- except TransformationProcessError as tpe:
282
- self.handle_generic_exception(
283
- tpe,
284
- last_row,
285
- batch,
286
- self.processed,
287
- failed_recs_file,
288
- )
289
- batch = []
290
- raise
291
- except TransformationRecordFailedError as exception:
292
- self.handle_generic_exception(
293
- exception,
294
- last_row,
295
- batch,
296
- self.processed,
297
- failed_recs_file,
298
- )
299
- batch = []
300
- except (FileNotFoundError, PermissionError) as ose:
301
- logging.error("Error reading file: %s", ose)
302
-
303
- except Exception as ee:
304
- if "idx" in locals() and self.task_configuration.files[idx:]:
305
- for file_def in self.task_configuration.files[idx:]:
306
- path = self.folder_structure.results_folder / file_def.file_name
305
+ batch = []
306
+ except (FileNotFoundError, PermissionError) as ose:
307
+ logging.error("Error reading file: %s", ose)
308
+
309
+ except Exception as ee:
310
+ if "idx" in locals() and self.task_configuration.files[idx:]:
311
+ for file_def in self.task_configuration.files[idx:]:
312
+ path = self.folder_structure.results_folder / file_def.file_name
313
+ try:
314
+ with open(path, "r") as failed_file:
315
+ failed_file.seek(self.processed)
316
+ failed_recs_file.write(failed_file.read())
317
+ self.processed = 0
318
+ except (FileNotFoundError, PermissionError) as ose:
319
+ logging.error("Error reading file: %s", ose)
320
+ raise ee
321
+ finally:
322
+ if self.task_configuration.object_type != "Extradata" and any(batch):
307
323
  try:
308
- with open(path, "r") as failed_file:
309
- failed_file.seek(self.processed)
310
- failed_recs_file.write(failed_file.read())
311
- self.processed = 0
312
- except (FileNotFoundError, PermissionError) as ose:
313
- logging.error("Error reading file: %s", ose)
314
- raise ee
315
- finally:
316
- if self.task_configuration.object_type != "Extradata" and any(batch):
317
- try:
318
- self.post_batch(batch, failed_recs_file, self.processed)
319
- except Exception as exception:
320
- self.handle_generic_exception(
321
- exception, last_row, batch, self.processed, failed_recs_file
322
- )
323
- logging.info("Done posting %s records. ", self.processed)
324
+ self.post_batch(batch, failed_recs_file, self.processed)
325
+ except Exception as exception:
326
+ self.handle_generic_exception(
327
+ exception, last_row, batch, self.processed, failed_recs_file
328
+ )
329
+ logging.info("Done posting %s records. ", self.processed)
330
+ if self.task_configuration.object_type == "SRS":
331
+ self.commit_snapshot()
324
332
 
325
333
  @staticmethod
326
334
  def set_consortium_source(json_rec):
@@ -358,26 +366,26 @@ class BatchPoster(MigrationTaskBase):
358
366
  fetch_batch_size = 90
359
367
  fetch_tasks = []
360
368
  existing_records = {}
361
-
362
- for i in range(0, len(batch), fetch_batch_size):
363
- batch_slice = batch[i : i + fetch_batch_size]
364
- fetch_tasks.append(
365
- self.get_with_retry(
366
- query_api,
367
- params={
368
- "query": (
369
- f"id==({' OR '.join([r['id'] for r in batch_slice if 'id' in r])})"
370
- ),
371
- "limit": fetch_batch_size,
372
- },
369
+ async with httpx.AsyncClient(base_url=self.folio_client.gateway_url) as client:
370
+ for i in range(0, len(batch), fetch_batch_size):
371
+ batch_slice = batch[i : i + fetch_batch_size]
372
+ fetch_tasks.append(
373
+ self.get_with_retry(
374
+ client,
375
+ query_api,
376
+ params={
377
+ "query": (
378
+ f"id==({' OR '.join([r['id'] for r in batch_slice if 'id' in r])})"
379
+ ),
380
+ "limit": fetch_batch_size,
381
+ },
382
+ )
373
383
  )
374
- )
375
-
376
- responses = await asyncio.gather(*fetch_tasks)
377
384
 
378
- for response in responses:
379
- self.collect_existing_records_for_upsert(object_type, response, existing_records)
385
+ responses = await asyncio.gather(*fetch_tasks)
380
386
 
387
+ for response in responses:
388
+ self.collect_existing_records_for_upsert(object_type, response, existing_records)
381
389
  for record in batch:
382
390
  if record["id"] in existing_records:
383
391
  self.prepare_record_for_upsert(record, existing_records[record["id"]])
@@ -413,18 +421,18 @@ class BatchPoster(MigrationTaskBase):
413
421
 
414
422
  @staticmethod
415
423
  def collect_existing_records_for_upsert(
416
- object_type: str, response_json: dict, existing_records: dict
424
+ object_type: str, response: httpx.Response, existing_records: dict
417
425
  ):
418
- """
419
- Collects existing records from API response into existing_records dict.
420
-
421
- Args:
422
- object_type: The key in response containing the records array
423
- response_json: Parsed JSON response from API
424
- existing_records: Dict to populate with {record_id: record_data}
425
- """
426
- for record in response_json.get(object_type, []):
427
- existing_records[record["id"]] = record
426
+ if response.status_code == 200:
427
+ response_json = response.json()
428
+ for record in response_json[object_type]:
429
+ existing_records[record["id"]] = record
430
+ else:
431
+ logging.error(
432
+ "Failed to fetch current records. HTTP %s\t%s",
433
+ response.status_code,
434
+ response.text,
435
+ )
428
436
 
429
437
  def handle_upsert_for_statistical_codes(self, updates: dict, keep_existing: dict):
430
438
  if not self.task_configuration.preserve_statistical_codes:
@@ -502,66 +510,31 @@ class BatchPoster(MigrationTaskBase):
502
510
  updates.update(keep_new)
503
511
  new_record.update(updates)
504
512
 
505
- async def get_with_retry(self, url: str, params=None):
506
- """
507
- Wrapper around folio_get_async with selective retry logic.
508
-
509
- Retries on:
510
- - Connection errors (FolioConnectionError): Always retry
511
- - Server errors (5xx): Transient failures
512
- - Rate limiting (429): Too many requests
513
-
514
- Does NOT retry on:
515
- - Client errors (4xx except 429): Bad request, won't succeed on retry
516
- """
513
+ async def get_with_retry(self, client: httpx.AsyncClient, url: str, params=None):
517
514
  if params is None:
518
515
  params = {}
519
516
  retries = 3
520
-
521
517
  for attempt in range(retries):
522
518
  try:
523
- return await self.folio_client.folio_get_async(url, query_params=params)
524
-
525
- except folioclient.FolioConnectionError as e:
526
- # Network/connection errors - always retry
519
+ response = await client.get(
520
+ url, params=params, headers=self.folio_client.okapi_headers
521
+ )
522
+ response.raise_for_status()
523
+ return response
524
+ except httpx.HTTPError as e:
527
525
  if attempt < retries - 1:
528
- wait_time = 2**attempt
529
- logging.warning(
530
- f"Connection error, retrying in {wait_time}s "
531
- f"(attempt {attempt + 1}/{retries}): {e}"
532
- )
533
- await asyncio.sleep(wait_time)
534
- else:
535
- logging.error(f"Connection failed after {retries} attempts: {e}")
536
- raise
537
-
538
- except folioclient.FolioHTTPError as e:
539
- # HTTP errors - selective retry based on status code
540
- status_code = e.response.status_code
541
- should_retry = status_code >= 500 or status_code == 429
542
-
543
- if should_retry and attempt < retries - 1:
544
- # Longer wait for rate limiting
545
- wait_time = 5 if status_code == 429 else 2**attempt
546
- logging.warning(
547
- f"HTTP {status_code} error, retrying in {wait_time}s "
548
- f"(attempt {attempt + 1}/{retries}): {e}"
549
- )
550
- await asyncio.sleep(wait_time)
526
+ logging.warning(f"Retrying due to {e}")
527
+ await asyncio.sleep(2**attempt)
551
528
  else:
552
- # Either not retryable or out of attempts
553
- if should_retry:
554
- logging.error(
555
- f"HTTP {status_code} error persisted after {retries} attempts: {e}"
556
- )
557
- else:
558
- logging.error(f"HTTP {status_code} error (not retryable): {e}")
529
+ logging.error(f"Failed to connect after {retries} attempts: {e}")
559
530
  raise
560
531
 
561
532
  def post_record_batch(self, batch, failed_recs_file, row):
562
533
  json_rec = json.loads(row.split("\t")[-1])
563
534
  if self.task_configuration.object_type == "ShadowInstances":
564
535
  self.set_consortium_source(json_rec)
536
+ if self.task_configuration.object_type == "SRS":
537
+ json_rec["snapshotId"] = self.snapshot_id
565
538
  if self.processed == 1:
566
539
  logging.info(json.dumps(json_rec, indent=True))
567
540
  batch.append(json_rec)
@@ -572,29 +545,22 @@ class BatchPoster(MigrationTaskBase):
572
545
 
573
546
  def post_extra_data(self, row: str, num_records: int, failed_recs_file):
574
547
  (object_name, data) = row.split("\t")
575
- url = self.get_extradata_endpoint(self.task_configuration, object_name, data)
548
+ endpoint = self.get_extradata_endpoint(self.task_configuration, object_name, data)
549
+ url = f"{self.folio_client.gateway_url}/{endpoint}"
576
550
  body = data
577
- try:
578
- _ = self.folio_client.folio_post(url, payload=body)
551
+ response = self.post_objects(url, body)
552
+ if response.status_code == 201:
579
553
  self.num_posted += 1
580
- except folioclient.FolioHTTPError as fhe:
581
- if fhe.response.status_code == 422:
582
- self.num_failures += 1
583
- error_msg = json.loads(fhe.response.text)["errors"][0]["message"]
584
- logging.error(
585
- "Row %s\tHTTP %s\t %s", num_records, fhe.response.status_code, error_msg
586
- )
587
- if (
588
- "id value already exists"
589
- not in json.loads(fhe.response.text)["errors"][0]["message"]
590
- ):
591
- failed_recs_file.write(row)
592
- else:
593
- self.num_failures += 1
594
- logging.error(
595
- "Row %s\tHTTP %s\t%s", num_records, fhe.response.status_code, fhe.response.text
596
- )
554
+ elif response.status_code == 422:
555
+ self.num_failures += 1
556
+ error_msg = json.loads(response.text)["errors"][0]["message"]
557
+ logging.error("Row %s\tHTTP %s\t %s", num_records, response.status_code, error_msg)
558
+ if "id value already exists" not in json.loads(response.text)["errors"][0]["message"]:
597
559
  failed_recs_file.write(row)
560
+ else:
561
+ self.num_failures += 1
562
+ logging.error("Row %s\tHTTP %s\t%s", num_records, response.status_code, response.text)
563
+ failed_recs_file.write(row)
598
564
  if num_records % 50 == 0:
599
565
  logging.info(
600
566
  "%s records posted successfully. %s failed",
@@ -634,41 +600,41 @@ class BatchPoster(MigrationTaskBase):
634
600
  def post_single_records(self, row: str, num_records: int, failed_recs_file):
635
601
  if self.api_info["is_batch"]:
636
602
  raise TypeError("This record type supports batch processing, use post_batch method")
637
- url = self.api_info.get("api_endpoint")
638
- try:
639
- _ = self.folio_client.folio_post(url, payload=row)
603
+ api_endpoint = self.api_info.get("api_endpoint")
604
+ url = f"{self.folio_client.gateway_url}{api_endpoint}"
605
+ response = self.post_objects(url, row)
606
+ if response.status_code == 201:
640
607
  self.num_posted += 1
641
- except folioclient.FolioHTTPError as fhe:
642
- if fhe.response.status_code == 422:
643
- self.num_failures += 1
644
- error_msg = json.loads(fhe.response.text)["errors"][0]["message"]
645
- logging.error(
646
- "Row %s\tHTTP %s\t %s", num_records, fhe.response.status_code, error_msg
647
- )
648
- if (
649
- "id value already exists"
650
- not in json.loads(fhe.response.text)["errors"][0]["message"]
651
- ):
652
- failed_recs_file.write(row)
653
- else:
654
- self.num_failures += 1
655
- logging.error(
656
- "Row %s\tHTTP %s\t%s",
657
- num_records,
658
- fhe.response.status_code,
659
- fhe.response.text,
660
- )
608
+ elif response.status_code == 422:
609
+ self.num_failures += 1
610
+ error_msg = json.loads(response.text)["errors"][0]["message"]
611
+ logging.error("Row %s\tHTTP %s\t %s", num_records, response.status_code, error_msg)
612
+ if "id value already exists" not in json.loads(response.text)["errors"][0]["message"]:
661
613
  failed_recs_file.write(row)
662
- if num_records % 50 == 0:
663
- logging.info(
664
- "%s records posted successfully. %s failed",
665
- self.num_posted,
666
- self.num_failures,
667
- )
614
+ else:
615
+ self.num_failures += 1
616
+ logging.error("Row %s\tHTTP %s\t%s", num_records, response.status_code, response.text)
617
+ failed_recs_file.write(row)
618
+ if num_records % 50 == 0:
619
+ logging.info(
620
+ "%s records posted successfully. %s failed",
621
+ self.num_posted,
622
+ self.num_failures,
623
+ )
624
+
625
+ def post_objects(self, url, body):
626
+ if self.http_client and not self.http_client.is_closed:
627
+ return self.http_client.post(
628
+ url, data=body.encode("utf-8"), headers=self.folio_client.okapi_headers
629
+ )
630
+ else:
631
+ return httpx.post(
632
+ url, headers=self.okapi_headers, data=body.encode("utf-8"), timeout=None
633
+ )
668
634
 
669
635
  def handle_generic_exception(self, exception, last_row, batch, num_records, failed_recs_file):
670
636
  logging.error("%s", exception)
671
- self.migration_report.add("Details", i18n_t("Generic exceptions (see log for details)"))
637
+ self.migration_report.add("Details", i18n.t("Generic exceptions (see log for details)"))
672
638
  # logging.error("Failed row: %s", last_row)
673
639
  self.failed_batches += 1
674
640
  self.num_failures += len(batch)
@@ -681,7 +647,7 @@ class BatchPoster(MigrationTaskBase):
681
647
  sys.exit(1)
682
648
 
683
649
  def handle_unicode_error(self, unicode_error, last_row):
684
- self.migration_report.add("Details", i18n_t("Encoding errors"))
650
+ self.migration_report.add("Details", i18n.t("Encoding errors"))
685
651
  logging.info("=========ERROR==============")
686
652
  logging.info(
687
653
  "%s Posting failed. Encoding error reading file",
@@ -696,7 +662,7 @@ class BatchPoster(MigrationTaskBase):
696
662
  traceback.logging.info_exc() # type: ignore
697
663
  logging.info("=======================")
698
664
 
699
- def post_batch(self, batch, failed_recs_file, num_records):
665
+ def post_batch(self, batch, failed_recs_file, num_records, recursion_depth=0):
700
666
  if self.query_params.get("upsert", False) and self.api_info.get("query_endpoint", ""):
701
667
  self.set_version(batch, self.api_info["query_endpoint"], self.api_info["object_name"])
702
668
  response = self.do_post(batch)
@@ -767,6 +733,24 @@ class BatchPoster(MigrationTaskBase):
767
733
  # Likely a json parsing error
768
734
  logging.error(response.text)
769
735
  raise TransformationProcessError("", "HTTP 400. Something is wrong. Quitting")
736
+ elif self.task_configuration.object_type == "SRS" and response.status_code >= 500:
737
+ logging.info(
738
+ "Post failed. Size: %s Waiting 30s until reposting. Number of tries: %s of 5",
739
+ get_req_size(response),
740
+ recursion_depth,
741
+ )
742
+ logging.info(response.text)
743
+ time.sleep(30)
744
+ if recursion_depth > 4:
745
+ raise TransformationRecordFailedError(
746
+ "",
747
+ f"HTTP {response.status_code}\t"
748
+ f"Request size: {get_req_size(response)}"
749
+ f"{datetime.now(timezone.utc).isoformat()}\n",
750
+ response.text,
751
+ )
752
+ else:
753
+ self.post_batch(batch, failed_recs_file, num_records, recursion_depth + 1)
770
754
  elif (
771
755
  response.status_code == 413 and "DB_ALLOW_SUPPRESS_OPTIMISTIC_LOCKING" in response.text
772
756
  ):
@@ -791,36 +775,50 @@ class BatchPoster(MigrationTaskBase):
791
775
  )
792
776
 
793
777
  def do_post(self, batch):
794
- with self.folio_client.get_folio_http_client() as http_client:
795
- url = self.api_info["api_endpoint"]
796
- if self.api_info["object_name"] == "users":
797
- payload = {self.api_info["object_name"]: list(batch), "totalRecords": len(batch)}
798
- elif self.api_info["total_records"]:
799
- payload = {"records": list(batch), "totalRecords": len(batch)}
800
- else:
801
- payload = {self.api_info["object_name"]: batch}
802
- return http_client.post(
778
+ path = self.api_info["api_endpoint"]
779
+ url = self.folio_client.gateway_url + path
780
+ if self.api_info["object_name"] == "users":
781
+ payload = {self.api_info["object_name"]: list(batch), "totalRecords": len(batch)}
782
+ elif self.api_info["total_records"]:
783
+ payload = {"records": list(batch), "totalRecords": len(batch)}
784
+ else:
785
+ payload = {self.api_info["object_name"]: batch}
786
+ if self.http_client and not self.http_client.is_closed:
787
+ return self.http_client.post(
788
+ url,
789
+ json=payload,
790
+ headers=self.folio_client.okapi_headers,
791
+ params=self.query_params,
792
+ )
793
+ else:
794
+ return httpx.post(
803
795
  url,
796
+ headers=self.okapi_headers,
804
797
  json=payload,
805
798
  params=self.query_params,
799
+ timeout=None,
806
800
  )
807
801
 
808
802
  def get_current_record_count_in_folio(self):
809
803
  if "query_endpoint" in self.api_info:
810
- url = self.api_info["query_endpoint"]
804
+ url = f"{self.folio_client.gateway_url}{self.api_info['query_endpoint']}"
811
805
  query_params = {"query": "cql.allRecords=1", "limit": 0}
812
- try:
813
- res = self.folio_client.folio_get(url, query_params=query_params)
814
- return res["totalRecords"]
815
- except folioclient.FolioHTTPError as fhe:
816
- logging.error(
817
- "Failed to get current record count. HTTP %s", fhe.response.status_code
806
+ if self.http_client and not self.http_client.is_closed:
807
+ res = self.http_client.get(
808
+ url, headers=self.folio_client.okapi_headers, params=query_params
818
809
  )
810
+ else:
811
+ res = httpx.get(url, headers=self.okapi_headers, params=query_params, timeout=None)
812
+ try:
813
+ res.raise_for_status()
814
+ return res.json()["totalRecords"]
815
+ except httpx.HTTPStatusError:
816
+ logging.error("Failed to get current record count. HTTP %s", res.status_code)
819
817
  return 0
820
818
  except KeyError:
821
819
  logging.error(
822
820
  "Failed to get current record count. "
823
- f"No 'totalRecords' in response: {json.dumps(res, indent=2)}"
821
+ f"No 'totalRecords' in response: {res.json()}"
824
822
  )
825
823
  return 0
826
824
  else:
@@ -852,6 +850,8 @@ class BatchPoster(MigrationTaskBase):
852
850
  def wrap_up(self):
853
851
  logging.info("Done. Wrapping up")
854
852
  self.extradata_writer.flush()
853
+ if self.task_configuration.object_type == "SRS":
854
+ self.commit_snapshot()
855
855
  if self.task_configuration.object_type != "Extradata":
856
856
  logging.info(
857
857
  (
@@ -938,6 +938,69 @@ class BatchPoster(MigrationTaskBase):
938
938
  str(self.folder_structure.failed_recs_path),
939
939
  )
940
940
 
941
+ def create_snapshot(self):
942
+ snapshot = {
943
+ "jobExecutionId": self.snapshot_id,
944
+ "status": "PARSING_IN_PROGRESS",
945
+ "processingStartedDate": datetime.now(timezone.utc).isoformat(timespec="milliseconds"),
946
+ }
947
+ try:
948
+ url = f"{self.folio_client.gateway_url}/source-storage/snapshots"
949
+ if self.http_client and not self.http_client.is_closed:
950
+ res = self.http_client.post(
951
+ url, json=snapshot, headers=self.folio_client.okapi_headers
952
+ )
953
+ else:
954
+ res = httpx.post(url, headers=self.okapi_headers, json=snapshot, timeout=None)
955
+ res.raise_for_status()
956
+ logging.info("Posted Snapshot to FOLIO: %s", json.dumps(snapshot, indent=4))
957
+ get_url = (
958
+ f"{self.folio_client.gateway_url}/source-storage/snapshots/{self.snapshot_id}"
959
+ )
960
+ got = False
961
+ while not got:
962
+ logging.info("Sleeping while waiting for the snapshot to get created")
963
+ time.sleep(5)
964
+ if self.http_client and not self.http_client.is_closed:
965
+ res = self.http_client.get(get_url, headers=self.folio_client.okapi_headers)
966
+ else:
967
+ res = httpx.get(get_url, headers=self.okapi_headers, timeout=None)
968
+ if res.status_code == 200:
969
+ got = True
970
+ else:
971
+ logging.info(res.status_code)
972
+ except httpx.HTTPStatusError as exc:
973
+ logging.exception("HTTP error occurred while posting the snapshot: %s", exc)
974
+ sys.exit(1)
975
+ except Exception as exc:
976
+ logging.exception("Could not post the snapshot: %s", exc)
977
+ sys.exit(1)
978
+
979
+ def commit_snapshot(self):
980
+ snapshot = {"jobExecutionId": self.snapshot_id, "status": "COMMITTED"}
981
+ try:
982
+ url = f"{self.folio_client.gateway_url}/source-storage/snapshots/{self.snapshot_id}"
983
+ if self.http_client and not self.http_client.is_closed:
984
+ res = self.http_client.put(
985
+ url, json=snapshot, headers=self.folio_client.okapi_headers
986
+ )
987
+ else:
988
+ res = httpx.put(url, headers=self.okapi_headers, json=snapshot, timeout=None)
989
+ res.raise_for_status()
990
+ logging.info("Posted Committed snapshot to FOLIO: %s", json.dumps(snapshot, indent=4))
991
+ except httpx.HTTPStatusError as exc:
992
+ logging.exception("HTTP error occurred while posting the snapshot: %s", exc)
993
+ sys.exit(1)
994
+ except Exception as exc:
995
+ logging.exception(
996
+ "Could not commit snapshot with id %s. Post this to /source-storage/snapshots/%s:",
997
+ self.snapshot_id,
998
+ self.snapshot_id,
999
+ exc,
1000
+ )
1001
+ logging.info("%s", json.dumps(snapshot, indent=4))
1002
+ sys.exit(1)
1003
+
941
1004
 
942
1005
  def get_api_info(object_type: str, use_safe: bool = True):
943
1006
  choices = {
@@ -999,6 +1062,22 @@ def get_api_info(object_type: str, use_safe: bool = True):
999
1062
  "addSnapshotId": False,
1000
1063
  "supports_upsert": True,
1001
1064
  },
1065
+ "Authorities": {
1066
+ "object_name": "",
1067
+ "api_endpoint": "/authority-storage/authorities",
1068
+ "is_batch": False,
1069
+ "total_records": False,
1070
+ "addSnapshotId": False,
1071
+ "supports_upsert": False,
1072
+ },
1073
+ "SRS": {
1074
+ "object_name": "records",
1075
+ "api_endpoint": "/source-storage/batch/records",
1076
+ "is_batch": True,
1077
+ "total_records": True,
1078
+ "addSnapshotId": True,
1079
+ "supports_upsert": False,
1080
+ },
1002
1081
  "Users": {
1003
1082
  "object_name": "users",
1004
1083
  "api_endpoint": "/user-import",
@@ -1045,7 +1124,7 @@ def get_human_readable(size, precision=2):
1045
1124
  return "%.*f%s" % (precision, size, suffixes[suffix_index])
1046
1125
 
1047
1126
 
1048
- def get_req_size(response: "Response"):
1127
+ def get_req_size(response: httpx.Response):
1049
1128
  size = response.request.method
1050
1129
  size += str(response.request.url)
1051
1130
  size += "\r\n".join(f"{k}{v}" for k, v in response.request.headers.items())