PyPI - folio-migration-tools - Versions diffs - 1.9.10__py3-none-any.whl → 1.10.0__py3-none-any.whl - Mend

folio-migration-tools 1.9.10py3-none-any.whl → 1.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

folio_migration_tools/migration_tasks/batch_poster.py CHANGED Viewed

@@ -4,14 +4,15 @@ import json
 import logging
 import re
 import sys
-import time
 import traceback
 from datetime import datetime, timezone
-from typing import Annotated, List, Optional
+from typing import TYPE_CHECKING, Annotated, List, Optional
 from uuid import uuid4
-import httpx
-import i18n
+import folioclient
+if TYPE_CHECKING:
+    from httpx import Response
 from folio_uuid.folio_namespaces import FOLIONamespaces
 from pydantic import Field
@@ -19,6 +20,7 @@ from folio_migration_tools.custom_exceptions import (
     TransformationProcessError,
     TransformationRecordFailedError,
 )
+from folio_migration_tools.i18n_cache import i18n_t
 from folio_migration_tools.library_configuration import (
     FileDefinition,
     LibraryConfiguration,
@@ -73,7 +75,7 @@ class BatchPoster(MigrationTaskBase):
                 description=(
                     "The type of object being migrated"
                     "Examples of possible values: "
-                    "'Extradata', 'SRS', Instances', 'Holdings', 'Items'"
+                    "'Extradata', 'Instances', 'Holdings', 'Items'"
                 ),
             ),
         ]
@@ -182,19 +184,27 @@ class BatchPoster(MigrationTaskBase):
                 ),
             ),
         ] = True
-        patch_existing_records: Annotated[bool, Field(
-            title="Patch existing records",
-            description=(
-                "Toggles whether or not to patch existing records "
-                "during the upsert process. Defaults to False"
+        patch_existing_records: Annotated[
+            bool,
+            Field(
+                title="Patch existing records",
+                description=(
+                    "Toggles whether or not to patch existing records "
+                    "during the upsert process. Defaults to False"
+                ),
             ),
-        )] = False
-        patch_paths: Annotated[List[str], Field(
-            title="Patch paths",
-            description=(
-                "A list of fields in JSON Path notation to patch during the upsert process (leave off the $). If empty, all fields will be patched. Examples: ['statisticalCodeIds', 'administrativeNotes', 'instanceStatusId']"
+        ] = False
+        patch_paths: Annotated[
+            List[str],
+            Field(
+                title="Patch paths",
+                description=(
+                    "A list of fields in JSON Path notation to patch during the upsert process "
+                    "(leave off the $). If empty, all fields will be patched. Examples: "
+                    "['statisticalCodeIds', 'administrativeNotes', 'instanceStatusId']"
+                ),
             ),
-        )] = []
+        ] = []
     task_configuration: TaskConfiguration
@@ -223,7 +233,8 @@ class BatchPoster(MigrationTaskBase):
             self.query_params["upsert"] = self.task_configuration.upsert
         elif self.task_configuration.upsert and not self.api_info["supports_upsert"]:
             logging.info(
-                "Upsert is not supported for this object type. Query parameter will not be set.")
+                "Upsert is not supported for this object type. Query parameter will not be set."
+            )
         self.snapshot_id = str(uuid4())
         self.failed_objects: list = []
         self.batch_size = self.task_configuration.batch_size
@@ -236,97 +247,87 @@ class BatchPoster(MigrationTaskBase):
         self.failed_fields: set = set()
         self.num_failures = 0
         self.num_posted = 0
-        self.okapi_headers = self.folio_client.okapi_headers
-        self.http_client = None
         self.starting_record_count_in_folio: Optional[int] = None
         self.finished_record_count_in_folio: Optional[int] = None
-    def do_work(self):
-        with self.folio_client.get_folio_http_client() as httpx_client:
-            self.http_client = httpx_client
-            with open(
-                    self.folder_structure.failed_recs_path, "w", encoding='utf-8'
-            ) as failed_recs_file:
-                self.get_starting_record_count()
-                try:
-                    batch = []
-                    if self.task_configuration.object_type == "SRS":
-                        self.create_snapshot()
-                    for idx, file_def in enumerate(self.task_configuration.files):
-                        path = self.folder_structure.results_folder / file_def.file_name
-                        with open(path) as rows:
-                            logging.info("Running %s", path)
-                            last_row = ""
-                            for self.processed, row in enumerate(rows, start=1):
-                                last_row = row
-                                if row.strip():
-                                    try:
-                                        if self.task_configuration.object_type == "Extradata":
-                                            self.post_extra_data(
-                                                row, self.processed, failed_recs_file
-                                            )
-                                        elif not self.api_info["is_batch"]:
-                                            self.post_single_records(
-                                                row, self.processed, failed_recs_file
-                                            )
-                                        else:
-                                            batch = self.post_record_batch(
-                                                batch, failed_recs_file, row
-                                            )
-                                    except UnicodeDecodeError as unicode_error:
-                                        self.handle_unicode_error(unicode_error, last_row)
-                                    except TransformationProcessError as tpe:
-                                        self.handle_generic_exception(
-                                            tpe,
-                                            last_row,
-                                            batch,
-                                            self.processed,
-                                            failed_recs_file,
+    def do_work(self):  # noqa: C901
+        with open(
+            self.folder_structure.failed_recs_path, "w", encoding="utf-8"
+        ) as failed_recs_file:
+            self.get_starting_record_count()
+            try:
+                batch = []
+                for idx, file_def in enumerate(self.task_configuration.files):  # noqa: B007
+                    path = self.folder_structure.results_folder / file_def.file_name
+                    with open(path) as rows:
+                        logging.info("Running %s", path)
+                        last_row = ""
+                        for self.processed, row in enumerate(rows, start=1):
+                            last_row = row
+                            if row.strip():
+                                try:
+                                    if self.task_configuration.object_type == "Extradata":
+                                        self.post_extra_data(row, self.processed, failed_recs_file)
+                                    elif not self.api_info["is_batch"]:
+                                        self.post_single_records(
+                                            row, self.processed, failed_recs_file
                                         )
-                                        batch = []
-                                        raise
-                                    except TransformationRecordFailedError as exception:
-                                        self.handle_generic_exception(
-                                            exception,
-                                            last_row,
-                                            batch,
-                                            self.processed,
-                                            failed_recs_file,
+                                    else:
+                                        batch = self.post_record_batch(
+                                            batch, failed_recs_file, row
                                         )
-                                        batch = []
-                                    except (FileNotFoundError, PermissionError) as ose:
-                                        logging.error("Error reading file: %s", ose)
-                except Exception as ee:
-                    if "idx" in locals() and self.task_configuration.files[idx:]:
-                        for file_def in self.task_configuration.files[idx:]:
-                            path = self.folder_structure.results_folder / file_def.file_name
-                            try:
-                                with open(path, "r") as failed_file:
-                                    failed_file.seek(self.processed)
-                                    failed_recs_file.write(failed_file.read())
-                                    self.processed = 0
-                            except (FileNotFoundError, PermissionError) as ose:
-                                logging.error("Error reading file: %s", ose)
-                    raise ee
-                finally:
-                    if self.task_configuration.object_type != "Extradata" and any(batch):
+                                except UnicodeDecodeError as unicode_error:
+                                    self.handle_unicode_error(unicode_error, last_row)
+                                except TransformationProcessError as tpe:
+                                    self.handle_generic_exception(
+                                        tpe,
+                                        last_row,
+                                        batch,
+                                        self.processed,
+                                        failed_recs_file,
+                                    )
+                                    batch = []
+                                    raise
+                                except TransformationRecordFailedError as exception:
+                                    self.handle_generic_exception(
+                                        exception,
+                                        last_row,
+                                        batch,
+                                        self.processed,
+                                        failed_recs_file,
+                                    )
+                                    batch = []
+                                except (FileNotFoundError, PermissionError) as ose:
+                                    logging.error("Error reading file: %s", ose)
+            except Exception as ee:
+                if "idx" in locals() and self.task_configuration.files[idx:]:
+                    for file_def in self.task_configuration.files[idx:]:
+                        path = self.folder_structure.results_folder / file_def.file_name
                         try:
-                            self.post_batch(batch, failed_recs_file, self.processed)
-                        except Exception as exception:
-                            self.handle_generic_exception(
-                                exception, last_row, batch, self.processed, failed_recs_file
-                            )
-                    logging.info("Done posting %s records. ", self.processed)
-                    if self.task_configuration.object_type == "SRS":
-                        self.commit_snapshot()
+                            with open(path, "r") as failed_file:
+                                failed_file.seek(self.processed)
+                                failed_recs_file.write(failed_file.read())
+                                self.processed = 0
+                        except (FileNotFoundError, PermissionError) as ose:
+                            logging.error("Error reading file: %s", ose)
+                raise ee
+            finally:
+                if self.task_configuration.object_type != "Extradata" and any(batch):
+                    try:
+                        self.post_batch(batch, failed_recs_file, self.processed)
+                    except Exception as exception:
+                        self.handle_generic_exception(
+                            exception, last_row, batch, self.processed, failed_recs_file
+                        )
+                logging.info("Done posting %s records. ", self.processed)
     @staticmethod
     def set_consortium_source(json_rec):
-        if json_rec['source'] == 'MARC':
-            json_rec['source'] = 'CONSORTIUM-MARC'
-        elif json_rec['source'] == 'FOLIO':
-            json_rec['source'] = 'CONSORTIUM-FOLIO'
+        if json_rec["source"] == "MARC":
+            json_rec["source"] = "CONSORTIUM-MARC"
+        elif json_rec["source"] == "FOLIO":
+            json_rec["source"] = "CONSORTIUM-FOLIO"
     def set_version(self, batch, query_api, object_type) -> None:
         """
@@ -357,27 +358,26 @@ class BatchPoster(MigrationTaskBase):
         fetch_batch_size = 90
         fetch_tasks = []
         existing_records = {}
-        async with httpx.AsyncClient(base_url=self.folio_client.gateway_url) as client:
-            for i in range(0, len(batch), fetch_batch_size):
-                batch_slice = batch[i:i + fetch_batch_size]
-                fetch_tasks.append(
-                    self.get_with_retry(
-                        client,
-                        query_api,
-                        params={
-                            "query": (
-                                "id==("
-                                f"{' OR '.join([r['id'] for r in batch_slice if 'id' in r])})"
-                            ),
-                            "limit": fetch_batch_size
-                        },
-                    )
+        for i in range(0, len(batch), fetch_batch_size):
+            batch_slice = batch[i : i + fetch_batch_size]
+            fetch_tasks.append(
+                self.get_with_retry(
+                    query_api,
+                    params={
+                        "query": (
+                            f"id==({' OR '.join([r['id'] for r in batch_slice if 'id' in r])})"
+                        ),
+                        "limit": fetch_batch_size,
+                    },
                 )
+            )
+        responses = await asyncio.gather(*fetch_tasks)
-            responses = await asyncio.gather(*fetch_tasks)
+        for response in responses:
+            self.collect_existing_records_for_upsert(object_type, response, existing_records)
-            for response in responses:
-                self.collect_existing_records_for_upsert(object_type, response, existing_records)
         for record in batch:
             if record["id"] in existing_records:
                 self.prepare_record_for_upsert(record, existing_records[record["id"]])
@@ -390,7 +390,7 @@ class BatchPoster(MigrationTaskBase):
             new_record (dict): The new record to be updated.
             existing_record (dict): The existing record to patch from.
             patch_paths (List[str]): List of fields in JSON Path notation (e.g., ['statisticalCodeIds', 'administrativeNotes', 'instanceStatusId']) to patch during the upsert process. If empty, all fields will be patched.
-        """
+        """  # noqa: E501
         updates = {}
         updates.update(existing_record)
         keep_existing = {}
@@ -412,17 +412,19 @@ class BatchPoster(MigrationTaskBase):
         new_record.update(updates)
     @staticmethod
-    def collect_existing_records_for_upsert(object_type: str, response: httpx.Response, existing_records: dict):
-        if response.status_code == 200:
-            response_json = response.json()
-            for record in response_json[object_type]:
-                existing_records[record["id"]] = record
-        else:
-            logging.error(
-                "Failed to fetch current records. HTTP %s\t%s",
-                response.status_code,
-                response.text,
-            )
+    def collect_existing_records_for_upsert(
+        object_type: str, response_json: dict, existing_records: dict
+    ):
+        """
+        Collects existing records from API response into existing_records dict.
+        Args:
+            object_type: The key in response containing the records array
+            response_json: Parsed JSON response from API
+            existing_records: Dict to populate with {record_id: record_data}
+        """
+        for record in response_json.get(object_type, []):
+            existing_records[record["id"]] = record
     def handle_upsert_for_statistical_codes(self, updates: dict, keep_existing: dict):
         if not self.task_configuration.preserve_statistical_codes:
@@ -458,12 +460,23 @@ class BatchPoster(MigrationTaskBase):
     def prepare_record_for_upsert(self, new_record: dict, existing_record: dict):
         if "source" in existing_record and "MARC" in existing_record["source"]:
-            if self.task_configuration.patch_paths:
+            patch_paths = [
+                x
+                for x in self.task_configuration.patch_paths
+                if ("suppress" in x.lower() or x.lower() == "deleted")
+            ]
+            if patch_paths:
+                logging.debug(
+                    "Record %s is a MARC record, only suppression related fields will be patched",
+                    existing_record["id"],
+                )
+            else:
                 logging.debug(
                     "Record %s is a MARC record, patch_paths will be ignored",
                     existing_record["id"],
                 )
-            self.patch_record(new_record, existing_record, ["statisticalCodeIds", "administrativeNotes", "instanceStatusId"])
+            patch_paths.extend(["statisticalCodeIds", "administrativeNotes", "instanceStatusId"])
+            self.patch_record(new_record, existing_record, patch_paths)
         elif self.task_configuration.patch_existing_records:
             self.patch_record(new_record, existing_record, self.task_configuration.patch_paths)
         else:
@@ -471,7 +484,11 @@ class BatchPoster(MigrationTaskBase):
                 "_version": existing_record["_version"],
             }
             self.keep_existing_fields(updates, existing_record)
-            keep_new = {k: v for k, v in new_record.items() if k in ["statisticalCodeIds", "administrativeNotes"]}
+            keep_new = {
+                k: v
+                for k, v in new_record.items()
+                if k in ["statisticalCodeIds", "administrativeNotes"]
+            }
             keep_existing = {}
             self.handle_upsert_for_statistical_codes(existing_record, keep_existing)
             self.handle_upsert_for_administrative_notes(existing_record, keep_existing)
@@ -485,30 +502,66 @@ class BatchPoster(MigrationTaskBase):
             updates.update(keep_new)
             new_record.update(updates)
-    async def get_with_retry(self, client: httpx.AsyncClient, url: str, params=None):
+    async def get_with_retry(self, url: str, params=None):
+        """
+        Wrapper around folio_get_async with selective retry logic.
+        Retries on:
+        - Connection errors (FolioConnectionError): Always retry
+        - Server errors (5xx): Transient failures
+        - Rate limiting (429): Too many requests
+        Does NOT retry on:
+        - Client errors (4xx except 429): Bad request, won't succeed on retry
+        """
         if params is None:
             params = {}
         retries = 3
         for attempt in range(retries):
             try:
-                response = await client.get(
-                    url, params=params, headers=self.folio_client.okapi_headers)
-                response.raise_for_status()
-                return response
-            except httpx.HTTPError as e:
+                return await self.folio_client.folio_get_async(url, query_params=params)
+            except folioclient.FolioConnectionError as e:
+                # Network/connection errors - always retry
                 if attempt < retries - 1:
-                    logging.warning(f"Retrying due to {e}")
-                    await asyncio.sleep(2 ** attempt)
+                    wait_time = 2**attempt
+                    logging.warning(
+                        f"Connection error, retrying in {wait_time}s "
+                        f"(attempt {attempt + 1}/{retries}): {e}"
+                    )
+                    await asyncio.sleep(wait_time)
                 else:
-                    logging.error(f"Failed to connect after {retries} attempts: {e}")
+                    logging.error(f"Connection failed after {retries} attempts: {e}")
+                    raise
+            except folioclient.FolioHTTPError as e:
+                # HTTP errors - selective retry based on status code
+                status_code = e.response.status_code
+                should_retry = status_code >= 500 or status_code == 429
+                if should_retry and attempt < retries - 1:
+                    # Longer wait for rate limiting
+                    wait_time = 5 if status_code == 429 else 2**attempt
+                    logging.warning(
+                        f"HTTP {status_code} error, retrying in {wait_time}s "
+                        f"(attempt {attempt + 1}/{retries}): {e}"
+                    )
+                    await asyncio.sleep(wait_time)
+                else:
+                    # Either not retryable or out of attempts
+                    if should_retry:
+                        logging.error(
+                            f"HTTP {status_code} error persisted after {retries} attempts: {e}"
+                        )
+                    else:
+                        logging.error(f"HTTP {status_code} error (not retryable): {e}")
                     raise
     def post_record_batch(self, batch, failed_recs_file, row):
         json_rec = json.loads(row.split("\t")[-1])
         if self.task_configuration.object_type == "ShadowInstances":
             self.set_consortium_source(json_rec)
-        if self.task_configuration.object_type == "SRS":
-            json_rec["snapshotId"] = self.snapshot_id
         if self.processed == 1:
             logging.info(json.dumps(json_rec, indent=True))
         batch.append(json_rec)
@@ -519,22 +572,29 @@ class BatchPoster(MigrationTaskBase):
     def post_extra_data(self, row: str, num_records: int, failed_recs_file):
         (object_name, data) = row.split("\t")
-        endpoint = self.get_extradata_endpoint(self.task_configuration, object_name, data)
-        url = f"{self.folio_client.gateway_url}/{endpoint}"
+        url = self.get_extradata_endpoint(self.task_configuration, object_name, data)
         body = data
-        response = self.post_objects(url, body)
-        if response.status_code == 201:
+        try:
+            _ = self.folio_client.folio_post(url, payload=body)
             self.num_posted += 1
-        elif response.status_code == 422:
-            self.num_failures += 1
-            error_msg = json.loads(response.text)["errors"][0]["message"]
-            logging.error("Row %s\tHTTP %s\t %s", num_records, response.status_code, error_msg)
-            if "id value already exists" not in json.loads(response.text)["errors"][0]["message"]:
+        except folioclient.FolioHTTPError as fhe:
+            if fhe.response.status_code == 422:
+                self.num_failures += 1
+                error_msg = json.loads(fhe.response.text)["errors"][0]["message"]
+                logging.error(
+                    "Row %s\tHTTP %s\t %s", num_records, fhe.response.status_code, error_msg
+                )
+                if (
+                    "id value already exists"
+                    not in json.loads(fhe.response.text)["errors"][0]["message"]
+                ):
+                    failed_recs_file.write(row)
+            else:
+                self.num_failures += 1
+                logging.error(
+                    "Row %s\tHTTP %s\t%s", num_records, fhe.response.status_code, fhe.response.text
+                )
                 failed_recs_file.write(row)
-        else:
-            self.num_failures += 1
-            logging.error("Row %s\tHTTP %s\t%s", num_records, response.status_code, response.text)
-            failed_recs_file.write(row)
         if num_records % 50 == 0:
             logging.info(
                 "%s records posted successfully. %s failed",
@@ -563,52 +623,52 @@ class BatchPoster(MigrationTaskBase):
         object_types.update(task_configuration.extradata_endpoints)
         if object_name == "instructor":
             instructor = json.loads(string_object)
-            return f'coursereserves/courselistings/{instructor["courseListingId"]}/instructors'
+            return f"coursereserves/courselistings/{instructor['courseListingId']}/instructors"
         if object_name == "interfaceCredential":
             credential = json.loads(string_object)
-            return f'organizations-storage/interfaces/{credential["interfaceId"]}/credentials'
+            return f"organizations-storage/interfaces/{credential['interfaceId']}/credentials"
         return object_types[object_name]
     def post_single_records(self, row: str, num_records: int, failed_recs_file):
         if self.api_info["is_batch"]:
             raise TypeError("This record type supports batch processing, use post_batch method")
-        api_endpoint = self.api_info.get("api_endpoint")
-        url = f"{self.folio_client.gateway_url}{api_endpoint}"
-        response = self.post_objects(url, row)
-        if response.status_code == 201:
+        url = self.api_info.get("api_endpoint")
+        try:
+            _ = self.folio_client.folio_post(url, payload=row)
             self.num_posted += 1
-        elif response.status_code == 422:
-            self.num_failures += 1
-            error_msg = json.loads(response.text)["errors"][0]["message"]
-            logging.error("Row %s\tHTTP %s\t %s", num_records, response.status_code, error_msg)
-            if "id value already exists" not in json.loads(response.text)["errors"][0]["message"]:
+        except folioclient.FolioHTTPError as fhe:
+            if fhe.response.status_code == 422:
+                self.num_failures += 1
+                error_msg = json.loads(fhe.response.text)["errors"][0]["message"]
+                logging.error(
+                    "Row %s\tHTTP %s\t %s", num_records, fhe.response.status_code, error_msg
+                )
+                if (
+                    "id value already exists"
+                    not in json.loads(fhe.response.text)["errors"][0]["message"]
+                ):
+                    failed_recs_file.write(row)
+            else:
+                self.num_failures += 1
+                logging.error(
+                    "Row %s\tHTTP %s\t%s",
+                    num_records,
+                    fhe.response.status_code,
+                    fhe.response.text,
+                )
                 failed_recs_file.write(row)
-        else:
-            self.num_failures += 1
-            logging.error("Row %s\tHTTP %s\t%s", num_records, response.status_code, response.text)
-            failed_recs_file.write(row)
-        if num_records % 50 == 0:
-            logging.info(
-                "%s records posted successfully. %s failed",
-                self.num_posted,
-                self.num_failures,
-            )
-    def post_objects(self, url, body):
-        if self.http_client and not self.http_client.is_closed:
-            return self.http_client.post(
-                url, data=body.encode("utf-8"), headers=self.folio_client.okapi_headers
-            )
-        else:
-            return httpx.post(
-                url, headers=self.okapi_headers, data=body.encode("utf-8"), timeout=None
-            )
+            if num_records % 50 == 0:
+                logging.info(
+                    "%s records posted successfully. %s failed",
+                    self.num_posted,
+                    self.num_failures,
+                )
     def handle_generic_exception(self, exception, last_row, batch, num_records, failed_recs_file):
         logging.error("%s", exception)
-        self.migration_report.add("Details", i18n.t("Generic exceptions (see log for details)"))
+        self.migration_report.add("Details", i18n_t("Generic exceptions (see log for details)"))
         # logging.error("Failed row: %s", last_row)
         self.failed_batches += 1
         self.num_failures += len(batch)
@@ -621,7 +681,7 @@ class BatchPoster(MigrationTaskBase):
             sys.exit(1)
     def handle_unicode_error(self, unicode_error, last_row):
-        self.migration_report.add("Details", i18n.t("Encoding errors"))
+        self.migration_report.add("Details", i18n_t("Encoding errors"))
         logging.info("=========ERROR==============")
         logging.info(
             "%s Posting failed. Encoding error reading file",
@@ -636,9 +696,9 @@ class BatchPoster(MigrationTaskBase):
         traceback.logging.info_exc()  # type: ignore
         logging.info("=======================")
-    def post_batch(self, batch, failed_recs_file, num_records, recursion_depth=0):
+    def post_batch(self, batch, failed_recs_file, num_records):
         if self.query_params.get("upsert", False) and self.api_info.get("query_endpoint", ""):
-            self.set_version(batch, self.api_info['query_endpoint'], self.api_info['object_name'])
+            self.set_version(batch, self.api_info["query_endpoint"], self.api_info["object_name"])
         response = self.do_post(batch)
         if response.status_code == 401:
             logging.error("Authorization failed (%s). Fetching new auth token...", response.text)
@@ -707,24 +767,6 @@ class BatchPoster(MigrationTaskBase):
             # Likely a json parsing error
             logging.error(response.text)
             raise TransformationProcessError("", "HTTP 400. Something is wrong. Quitting")
-        elif self.task_configuration.object_type == "SRS" and response.status_code >= 500:
-            logging.info(
-                "Post failed. Size: %s Waiting 30s until reposting. Number of tries: %s of 5",
-                get_req_size(response),
-                recursion_depth,
-            )
-            logging.info(response.text)
-            time.sleep(30)
-            if recursion_depth > 4:
-                raise TransformationRecordFailedError(
-                    "",
-                    f"HTTP {response.status_code}\t"
-                    f"Request size: {get_req_size(response)}"
-                    f"{datetime.now(timezone.utc).isoformat()}\n",
-                    response.text,
-                )
-            else:
-                self.post_batch(batch, failed_recs_file, num_records, recursion_depth + 1)
         elif (
             response.status_code == 413 and "DB_ALLOW_SUPPRESS_OPTIMISTIC_LOCKING" in response.text
         ):
@@ -749,57 +791,42 @@ class BatchPoster(MigrationTaskBase):
             )
     def do_post(self, batch):
-        path = self.api_info["api_endpoint"]
-        url = self.folio_client.gateway_url + path
-        if self.api_info["object_name"] == "users":
-            payload = {self.api_info["object_name"]: list(batch), "totalRecords": len(batch)}
-        elif self.api_info["total_records"]:
-            payload = {"records": list(batch), "totalRecords": len(batch)}
-        else:
-            payload = {self.api_info["object_name"]: batch}
-        if self.http_client and not self.http_client.is_closed:
-            return self.http_client.post(
-                url,
-                json=payload,
-                headers=self.folio_client.okapi_headers,
-                params=self.query_params
-            )
-        else:
-            return httpx.post(
+        with self.folio_client.get_folio_http_client() as http_client:
+            url = self.api_info["api_endpoint"]
+            if self.api_info["object_name"] == "users":
+                payload = {self.api_info["object_name"]: list(batch), "totalRecords": len(batch)}
+            elif self.api_info["total_records"]:
+                payload = {"records": list(batch), "totalRecords": len(batch)}
+            else:
+                payload = {self.api_info["object_name"]: batch}
+            return http_client.post(
                 url,
-                headers=self.okapi_headers,
                 json=payload,
                 params=self.query_params,
-                timeout=None)
+            )
     def get_current_record_count_in_folio(self):
         if "query_endpoint" in self.api_info:
-            url = f"{self.folio_client.gateway_url}{self.api_info['query_endpoint']}"
+            url = self.api_info["query_endpoint"]
             query_params = {"query": "cql.allRecords=1", "limit": 0}
-            if self.http_client and not self.http_client.is_closed:
-                res = self.http_client.get(
-                    url,
-                    headers=self.folio_client.okapi_headers,
-                    params=query_params
-                )
-            else:
-                res = httpx.get(url, headers=self.okapi_headers, params=query_params, timeout=None)
             try:
-                res.raise_for_status()
-                return res.json()["totalRecords"]
-            except httpx.HTTPStatusError:
-                logging.error("Failed to get current record count. HTTP %s", res.status_code)
+                res = self.folio_client.folio_get(url, query_params=query_params)
+                return res["totalRecords"]
+            except folioclient.FolioHTTPError as fhe:
+                logging.error(
+                    "Failed to get current record count. HTTP %s", fhe.response.status_code
+                )
                 return 0
             except KeyError:
                 logging.error(
                     "Failed to get current record count. "
-                    f"No 'totalRecords' in response: {res.json()}"
+                    f"No 'totalRecords' in response: {json.dumps(res, indent=2)}"
                 )
                 return 0
         else:
             raise ValueError(
                 "No 'query_endpoint' available for %s. Cannot get current record count.",
-                self.task_configuration.object_type
+                self.task_configuration.object_type,
             )
     def get_starting_record_count(self):
@@ -809,7 +836,7 @@ class BatchPoster(MigrationTaskBase):
         else:
             logging.info(
                 "No query_endpoint available for %s. Cannot get starting record count.",
-                self.task_configuration.object_type
+                self.task_configuration.object_type,
             )
     def get_finished_record_count(self):
@@ -819,14 +846,12 @@ class BatchPoster(MigrationTaskBase):
         else:
             logging.info(
                 "No query_endpoint available for %s. Cannot get ending record count.",
-                self.task_configuration.object_type
+                self.task_configuration.object_type,
             )
     def wrap_up(self):
         logging.info("Done. Wrapping up")
         self.extradata_writer.flush()
-        if self.task_configuration.object_type == "SRS":
-            self.commit_snapshot()
         if self.task_configuration.object_type != "Extradata":
             logging.info(
                 (
@@ -842,7 +867,7 @@ class BatchPoster(MigrationTaskBase):
         if self.starting_record_count_in_folio:
             self.get_finished_record_count()
             total_on_server = (
-                    self.finished_record_count_in_folio - self.starting_record_count_in_folio
+                self.finished_record_count_in_folio - self.starting_record_count_in_folio
             )
             discrepancy = self.processed - self.num_failures - total_on_server
             if discrepancy != 0:
@@ -893,9 +918,8 @@ class BatchPoster(MigrationTaskBase):
                 temp_start = self.start_datetime
                 self.task_configuration.rerun_failed_records = False
                 self.__init__(
-                    self.task_configuration,
-                    self.library_configuration,
-                    self.folio_client)
+                    self.task_configuration, self.library_configuration, self.folio_client
+                )
                 self.performing_rerun = True
                 self.migration_report = temp_report
                 self.start_datetime = temp_start
@@ -914,69 +938,6 @@ class BatchPoster(MigrationTaskBase):
                 str(self.folder_structure.failed_recs_path),
             )
-    def create_snapshot(self):
-        snapshot = {
-            "jobExecutionId": self.snapshot_id,
-            "status": "PARSING_IN_PROGRESS",
-            "processingStartedDate": datetime.now(timezone.utc).isoformat(timespec="milliseconds"),
-        }
-        try:
-            url = f"{self.folio_client.gateway_url}/source-storage/snapshots"
-            if self.http_client and not self.http_client.is_closed:
-                res = self.http_client.post(
-                    url, json=snapshot, headers=self.folio_client.okapi_headers
-                )
-            else:
-                res = httpx.post(url, headers=self.okapi_headers, json=snapshot, timeout=None)
-            res.raise_for_status()
-            logging.info("Posted Snapshot to FOLIO: %s", json.dumps(snapshot, indent=4))
-            get_url = (
-                f"{self.folio_client.gateway_url}/source-storage/snapshots/{self.snapshot_id}"
-            )
-            got = False
-            while not got:
-                logging.info("Sleeping while waiting for the snapshot to get created")
-                time.sleep(5)
-                if self.http_client and not self.http_client.is_closed:
-                    res = self.http_client.get(get_url, headers=self.folio_client.okapi_headers)
-                else:
-                    res = httpx.get(get_url, headers=self.okapi_headers, timeout=None)
-                if res.status_code == 200:
-                    got = True
-                else:
-                    logging.info(res.status_code)
-        except httpx.HTTPStatusError as exc:
-            logging.exception("HTTP error occurred while posting the snapshot: %s", exc)
-            sys.exit(1)
-        except Exception as exc:
-            logging.exception("Could not post the snapshot: %s", exc)
-            sys.exit(1)
-    def commit_snapshot(self):
-        snapshot = {"jobExecutionId": self.snapshot_id, "status": "COMMITTED"}
-        try:
-            url = f"{self.folio_client.gateway_url}/source-storage/snapshots/{self.snapshot_id}"
-            if self.http_client and not self.http_client.is_closed:
-                res = self.http_client.put(
-                    url, json=snapshot, headers=self.folio_client.okapi_headers
-                )
-            else:
-                res = httpx.put(url, headers=self.okapi_headers, json=snapshot, timeout=None)
-            res.raise_for_status()
-            logging.info("Posted Committed snapshot to FOLIO: %s", json.dumps(snapshot, indent=4))
-        except httpx.HTTPStatusError as exc:
-            logging.exception("HTTP error occurred while posting the snapshot: %s", exc)
-            sys.exit(1)
-        except Exception as exc:
-            logging.exception(
-                "Could not commit snapshot with id %s. Post this to /source-storage/snapshots/%s:",
-                self.snapshot_id,
-                self.snapshot_id,
-                exc,
-            )
-            logging.info("%s", json.dumps(snapshot, indent=4))
-            sys.exit(1)
 def get_api_info(object_type: str, use_safe: bool = True):
     choices = {
@@ -1038,22 +999,6 @@ def get_api_info(object_type: str, use_safe: bool = True):
             "addSnapshotId": False,
             "supports_upsert": True,
         },
-        "Authorities": {
-            "object_name": "",
-            "api_endpoint": "/authority-storage/authorities",
-            "is_batch": False,
-            "total_records": False,
-            "addSnapshotId": False,
-            "supports_upsert": False,
-        },
-        "SRS": {
-            "object_name": "records",
-            "api_endpoint": "/source-storage/batch/records",
-            "is_batch": True,
-            "total_records": True,
-            "addSnapshotId": True,
-            "supports_upsert": False,
-        },
         "Users": {
             "object_name": "users",
             "api_endpoint": "/user-import",
@@ -1085,27 +1030,12 @@ def get_api_info(object_type: str, use_safe: bool = True):
     except KeyError:
         key_string = ", ".join(choices.keys())
         logging.error(
-            f"Wrong type. Only one of {key_string} are allowed, "
-            f"received {object_type=} instead"
+            f"Wrong type. Only one of {key_string} are allowed, received {object_type=} instead"
         )
         logging.error("Halting")
         sys.exit(1)
-def chunks(records, number_of_chunks):
-    """Yield successive n-sized chunks from lst.
-    Args:
-        records (_type_): _description_
-        number_of_chunks (_type_): _description_
-    Yields:
-        _type_: _description_
-    """
-    for i in range(0, len(records), number_of_chunks):
-        yield records[i: i + number_of_chunks]
 def get_human_readable(size, precision=2):
     suffixes = ["B", "KB", "MB", "GB", "TB"]
     suffix_index = 0
@@ -1115,22 +1045,23 @@ def get_human_readable(size, precision=2):
     return "%.*f%s" % (precision, size, suffixes[suffix_index])
-def get_req_size(response: httpx.Response):
+def get_req_size(response: "Response"):
     size = response.request.method
     size += str(response.request.url)
     size += "\r\n".join(f"{k}{v}" for k, v in response.request.headers.items())
     size += response.request.content.decode("utf-8") or ""
     return get_human_readable(len(size.encode("utf-8")))
 def parse_path(path):
     """
     Parses a path like 'foo.bar[0].baz' into ['foo', 'bar', 0, 'baz']
     """
     tokens = []
     # Split by dot, then extract indices
-    for part in path.split('.'):
+    for part in path.split("."):
         # Find all [index] parts
-        matches = re.findall(r'([^\[\]]+)|\[(\d+)\]', part)
+        matches = re.findall(r"([^\[\]]+)|\[(\d+)\]", part)
         for name, idx in matches:
             if name:
                 tokens.append(name)
@@ -1138,12 +1069,14 @@ def parse_path(path):
                 tokens.append(int(idx))
     return tokens
 def get_by_path(data, path):
     keys = parse_path(path)
     for key in keys:
         data = data[key]
     return data
 def set_by_path(data, path, value):
     keys = parse_path(path)
     for i, key in enumerate(keys[:-1]):
@@ -1164,6 +1097,7 @@ def set_by_path(data, path, value):
     else:
         data[last_key] = value
 def extract_paths(data, paths):
     result = {}
     for path in paths:
@@ -1174,6 +1108,7 @@ def extract_paths(data, paths):
             continue
     return result
 def deep_update(target, patch):
     """
     Recursively update target dict/list with values from patch dict/list.
@@ -1181,11 +1116,7 @@ def deep_update(target, patch):
     """
     if isinstance(patch, dict):
         for k, v in patch.items():
-            if (
-                k in target
-                and isinstance(target[k], (dict, list))
-                and isinstance(v, (dict, list))
-            ):
+            if k in target and isinstance(target[k], (dict, list)) and isinstance(v, (dict, list)):
                 deep_update(target[k], v)
             else:
                 target[k] = v

folio-migration-tools 1.9.10__py3-none-any.whl → 1.10.0__py3-none-any.whl

folio-migration-tools 1.9.10py3-none-any.whl → 1.10.0py3-none-any.whl