folio-migration-tools 1.10.0__py3-none-any.whl → 1.10.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- folio_migration_tools/__main__.py +0 -9
- folio_migration_tools/circulation_helper.py +5 -6
- folio_migration_tools/folder_structure.py +6 -16
- folio_migration_tools/helper.py +6 -7
- folio_migration_tools/holdings_helper.py +3 -4
- folio_migration_tools/library_configuration.py +0 -12
- folio_migration_tools/mapper_base.py +6 -7
- folio_migration_tools/mapping_file_transformation/user_mapper.py +0 -4
- folio_migration_tools/marc_rules_transformation/conditions.py +29 -0
- folio_migration_tools/marc_rules_transformation/marc_file_processor.py +9 -19
- folio_migration_tools/marc_rules_transformation/rules_mapper_authorities.py +242 -0
- folio_migration_tools/marc_rules_transformation/rules_mapper_base.py +15 -10
- folio_migration_tools/marc_rules_transformation/rules_mapper_bibs.py +2 -3
- folio_migration_tools/marc_rules_transformation/rules_mapper_holdings.py +5 -6
- folio_migration_tools/migration_report.py +6 -17
- folio_migration_tools/migration_tasks/authority_transformer.py +118 -0
- folio_migration_tools/migration_tasks/batch_poster.py +298 -219
- folio_migration_tools/migration_tasks/bibs_transformer.py +2 -2
- folio_migration_tools/migration_tasks/holdings_csv_transformer.py +9 -9
- folio_migration_tools/migration_tasks/holdings_marc_transformer.py +3 -3
- folio_migration_tools/migration_tasks/items_transformer.py +4 -6
- folio_migration_tools/migration_tasks/loans_migrator.py +18 -19
- folio_migration_tools/migration_tasks/manual_fee_fines_transformer.py +3 -3
- folio_migration_tools/migration_tasks/migration_task_base.py +6 -15
- folio_migration_tools/migration_tasks/orders_transformer.py +3 -4
- folio_migration_tools/migration_tasks/requests_migrator.py +9 -10
- folio_migration_tools/migration_tasks/reserves_migrator.py +4 -5
- folio_migration_tools/migration_tasks/user_transformer.py +5 -15
- folio_migration_tools/translations/en.json +7 -0
- {folio_migration_tools-1.10.0.dist-info → folio_migration_tools-1.10.0b1.dist-info}/METADATA +2 -3
- {folio_migration_tools-1.10.0.dist-info → folio_migration_tools-1.10.0b1.dist-info}/RECORD +33 -32
- folio_migration_tools/i18n_cache.py +0 -79
- {folio_migration_tools-1.10.0.dist-info → folio_migration_tools-1.10.0b1.dist-info}/WHEEL +0 -0
- {folio_migration_tools-1.10.0.dist-info → folio_migration_tools-1.10.0b1.dist-info}/entry_points.txt +0 -0
|
@@ -4,15 +4,14 @@ import json
|
|
|
4
4
|
import logging
|
|
5
5
|
import re
|
|
6
6
|
import sys
|
|
7
|
+
import time
|
|
7
8
|
import traceback
|
|
8
9
|
from datetime import datetime, timezone
|
|
9
|
-
from typing import
|
|
10
|
+
from typing import Annotated, List, Optional
|
|
10
11
|
from uuid import uuid4
|
|
11
12
|
|
|
12
|
-
import
|
|
13
|
-
|
|
14
|
-
if TYPE_CHECKING:
|
|
15
|
-
from httpx import Response
|
|
13
|
+
import httpx
|
|
14
|
+
import i18n
|
|
16
15
|
from folio_uuid.folio_namespaces import FOLIONamespaces
|
|
17
16
|
from pydantic import Field
|
|
18
17
|
|
|
@@ -20,7 +19,6 @@ from folio_migration_tools.custom_exceptions import (
|
|
|
20
19
|
TransformationProcessError,
|
|
21
20
|
TransformationRecordFailedError,
|
|
22
21
|
)
|
|
23
|
-
from folio_migration_tools.i18n_cache import i18n_t
|
|
24
22
|
from folio_migration_tools.library_configuration import (
|
|
25
23
|
FileDefinition,
|
|
26
24
|
LibraryConfiguration,
|
|
@@ -75,7 +73,7 @@ class BatchPoster(MigrationTaskBase):
|
|
|
75
73
|
description=(
|
|
76
74
|
"The type of object being migrated"
|
|
77
75
|
"Examples of possible values: "
|
|
78
|
-
"'Extradata', 'Instances', 'Holdings', 'Items'"
|
|
76
|
+
"'Extradata', 'SRS', Instances', 'Holdings', 'Items'"
|
|
79
77
|
),
|
|
80
78
|
),
|
|
81
79
|
]
|
|
@@ -247,80 +245,90 @@ class BatchPoster(MigrationTaskBase):
|
|
|
247
245
|
self.failed_fields: set = set()
|
|
248
246
|
self.num_failures = 0
|
|
249
247
|
self.num_posted = 0
|
|
248
|
+
self.okapi_headers = self.folio_client.okapi_headers
|
|
249
|
+
self.http_client = None
|
|
250
250
|
self.starting_record_count_in_folio: Optional[int] = None
|
|
251
251
|
self.finished_record_count_in_folio: Optional[int] = None
|
|
252
252
|
|
|
253
253
|
def do_work(self): # noqa: C901
|
|
254
|
-
with
|
|
255
|
-
self.
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
254
|
+
with self.folio_client.get_folio_http_client() as httpx_client:
|
|
255
|
+
self.http_client = httpx_client
|
|
256
|
+
with open(
|
|
257
|
+
self.folder_structure.failed_recs_path, "w", encoding="utf-8"
|
|
258
|
+
) as failed_recs_file:
|
|
259
|
+
self.get_starting_record_count()
|
|
260
|
+
try:
|
|
261
|
+
batch = []
|
|
262
|
+
if self.task_configuration.object_type == "SRS":
|
|
263
|
+
self.create_snapshot()
|
|
264
|
+
for idx, file_def in enumerate(self.task_configuration.files): # noqa: B007
|
|
265
|
+
path = self.folder_structure.results_folder / file_def.file_name
|
|
266
|
+
with open(path) as rows:
|
|
267
|
+
logging.info("Running %s", path)
|
|
268
|
+
last_row = ""
|
|
269
|
+
for self.processed, row in enumerate(rows, start=1):
|
|
270
|
+
last_row = row
|
|
271
|
+
if row.strip():
|
|
272
|
+
try:
|
|
273
|
+
if self.task_configuration.object_type == "Extradata":
|
|
274
|
+
self.post_extra_data(
|
|
275
|
+
row, self.processed, failed_recs_file
|
|
276
|
+
)
|
|
277
|
+
elif not self.api_info["is_batch"]:
|
|
278
|
+
self.post_single_records(
|
|
279
|
+
row, self.processed, failed_recs_file
|
|
280
|
+
)
|
|
281
|
+
else:
|
|
282
|
+
batch = self.post_record_batch(
|
|
283
|
+
batch, failed_recs_file, row
|
|
284
|
+
)
|
|
285
|
+
except UnicodeDecodeError as unicode_error:
|
|
286
|
+
self.handle_unicode_error(unicode_error, last_row)
|
|
287
|
+
except TransformationProcessError as tpe:
|
|
288
|
+
self.handle_generic_exception(
|
|
289
|
+
tpe,
|
|
290
|
+
last_row,
|
|
291
|
+
batch,
|
|
292
|
+
self.processed,
|
|
293
|
+
failed_recs_file,
|
|
274
294
|
)
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
295
|
+
batch = []
|
|
296
|
+
raise
|
|
297
|
+
except TransformationRecordFailedError as exception:
|
|
298
|
+
self.handle_generic_exception(
|
|
299
|
+
exception,
|
|
300
|
+
last_row,
|
|
301
|
+
batch,
|
|
302
|
+
self.processed,
|
|
303
|
+
failed_recs_file,
|
|
278
304
|
)
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
failed_recs_file,
|
|
298
|
-
)
|
|
299
|
-
batch = []
|
|
300
|
-
except (FileNotFoundError, PermissionError) as ose:
|
|
301
|
-
logging.error("Error reading file: %s", ose)
|
|
302
|
-
|
|
303
|
-
except Exception as ee:
|
|
304
|
-
if "idx" in locals() and self.task_configuration.files[idx:]:
|
|
305
|
-
for file_def in self.task_configuration.files[idx:]:
|
|
306
|
-
path = self.folder_structure.results_folder / file_def.file_name
|
|
305
|
+
batch = []
|
|
306
|
+
except (FileNotFoundError, PermissionError) as ose:
|
|
307
|
+
logging.error("Error reading file: %s", ose)
|
|
308
|
+
|
|
309
|
+
except Exception as ee:
|
|
310
|
+
if "idx" in locals() and self.task_configuration.files[idx:]:
|
|
311
|
+
for file_def in self.task_configuration.files[idx:]:
|
|
312
|
+
path = self.folder_structure.results_folder / file_def.file_name
|
|
313
|
+
try:
|
|
314
|
+
with open(path, "r") as failed_file:
|
|
315
|
+
failed_file.seek(self.processed)
|
|
316
|
+
failed_recs_file.write(failed_file.read())
|
|
317
|
+
self.processed = 0
|
|
318
|
+
except (FileNotFoundError, PermissionError) as ose:
|
|
319
|
+
logging.error("Error reading file: %s", ose)
|
|
320
|
+
raise ee
|
|
321
|
+
finally:
|
|
322
|
+
if self.task_configuration.object_type != "Extradata" and any(batch):
|
|
307
323
|
try:
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
self.processed
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
if self.task_configuration.object_type != "Extradata" and any(batch):
|
|
317
|
-
try:
|
|
318
|
-
self.post_batch(batch, failed_recs_file, self.processed)
|
|
319
|
-
except Exception as exception:
|
|
320
|
-
self.handle_generic_exception(
|
|
321
|
-
exception, last_row, batch, self.processed, failed_recs_file
|
|
322
|
-
)
|
|
323
|
-
logging.info("Done posting %s records. ", self.processed)
|
|
324
|
+
self.post_batch(batch, failed_recs_file, self.processed)
|
|
325
|
+
except Exception as exception:
|
|
326
|
+
self.handle_generic_exception(
|
|
327
|
+
exception, last_row, batch, self.processed, failed_recs_file
|
|
328
|
+
)
|
|
329
|
+
logging.info("Done posting %s records. ", self.processed)
|
|
330
|
+
if self.task_configuration.object_type == "SRS":
|
|
331
|
+
self.commit_snapshot()
|
|
324
332
|
|
|
325
333
|
@staticmethod
|
|
326
334
|
def set_consortium_source(json_rec):
|
|
@@ -358,26 +366,26 @@ class BatchPoster(MigrationTaskBase):
|
|
|
358
366
|
fetch_batch_size = 90
|
|
359
367
|
fetch_tasks = []
|
|
360
368
|
existing_records = {}
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
369
|
+
async with httpx.AsyncClient(base_url=self.folio_client.gateway_url) as client:
|
|
370
|
+
for i in range(0, len(batch), fetch_batch_size):
|
|
371
|
+
batch_slice = batch[i : i + fetch_batch_size]
|
|
372
|
+
fetch_tasks.append(
|
|
373
|
+
self.get_with_retry(
|
|
374
|
+
client,
|
|
375
|
+
query_api,
|
|
376
|
+
params={
|
|
377
|
+
"query": (
|
|
378
|
+
f"id==({' OR '.join([r['id'] for r in batch_slice if 'id' in r])})"
|
|
379
|
+
),
|
|
380
|
+
"limit": fetch_batch_size,
|
|
381
|
+
},
|
|
382
|
+
)
|
|
373
383
|
)
|
|
374
|
-
)
|
|
375
|
-
|
|
376
|
-
responses = await asyncio.gather(*fetch_tasks)
|
|
377
384
|
|
|
378
|
-
|
|
379
|
-
self.collect_existing_records_for_upsert(object_type, response, existing_records)
|
|
385
|
+
responses = await asyncio.gather(*fetch_tasks)
|
|
380
386
|
|
|
387
|
+
for response in responses:
|
|
388
|
+
self.collect_existing_records_for_upsert(object_type, response, existing_records)
|
|
381
389
|
for record in batch:
|
|
382
390
|
if record["id"] in existing_records:
|
|
383
391
|
self.prepare_record_for_upsert(record, existing_records[record["id"]])
|
|
@@ -413,18 +421,18 @@ class BatchPoster(MigrationTaskBase):
|
|
|
413
421
|
|
|
414
422
|
@staticmethod
|
|
415
423
|
def collect_existing_records_for_upsert(
|
|
416
|
-
object_type: str,
|
|
424
|
+
object_type: str, response: httpx.Response, existing_records: dict
|
|
417
425
|
):
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
426
|
+
if response.status_code == 200:
|
|
427
|
+
response_json = response.json()
|
|
428
|
+
for record in response_json[object_type]:
|
|
429
|
+
existing_records[record["id"]] = record
|
|
430
|
+
else:
|
|
431
|
+
logging.error(
|
|
432
|
+
"Failed to fetch current records. HTTP %s\t%s",
|
|
433
|
+
response.status_code,
|
|
434
|
+
response.text,
|
|
435
|
+
)
|
|
428
436
|
|
|
429
437
|
def handle_upsert_for_statistical_codes(self, updates: dict, keep_existing: dict):
|
|
430
438
|
if not self.task_configuration.preserve_statistical_codes:
|
|
@@ -502,66 +510,31 @@ class BatchPoster(MigrationTaskBase):
|
|
|
502
510
|
updates.update(keep_new)
|
|
503
511
|
new_record.update(updates)
|
|
504
512
|
|
|
505
|
-
async def get_with_retry(self, url: str, params=None):
|
|
506
|
-
"""
|
|
507
|
-
Wrapper around folio_get_async with selective retry logic.
|
|
508
|
-
|
|
509
|
-
Retries on:
|
|
510
|
-
- Connection errors (FolioConnectionError): Always retry
|
|
511
|
-
- Server errors (5xx): Transient failures
|
|
512
|
-
- Rate limiting (429): Too many requests
|
|
513
|
-
|
|
514
|
-
Does NOT retry on:
|
|
515
|
-
- Client errors (4xx except 429): Bad request, won't succeed on retry
|
|
516
|
-
"""
|
|
513
|
+
async def get_with_retry(self, client: httpx.AsyncClient, url: str, params=None):
|
|
517
514
|
if params is None:
|
|
518
515
|
params = {}
|
|
519
516
|
retries = 3
|
|
520
|
-
|
|
521
517
|
for attempt in range(retries):
|
|
522
518
|
try:
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
519
|
+
response = await client.get(
|
|
520
|
+
url, params=params, headers=self.folio_client.okapi_headers
|
|
521
|
+
)
|
|
522
|
+
response.raise_for_status()
|
|
523
|
+
return response
|
|
524
|
+
except httpx.HTTPError as e:
|
|
527
525
|
if attempt < retries - 1:
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
f"Connection error, retrying in {wait_time}s "
|
|
531
|
-
f"(attempt {attempt + 1}/{retries}): {e}"
|
|
532
|
-
)
|
|
533
|
-
await asyncio.sleep(wait_time)
|
|
534
|
-
else:
|
|
535
|
-
logging.error(f"Connection failed after {retries} attempts: {e}")
|
|
536
|
-
raise
|
|
537
|
-
|
|
538
|
-
except folioclient.FolioHTTPError as e:
|
|
539
|
-
# HTTP errors - selective retry based on status code
|
|
540
|
-
status_code = e.response.status_code
|
|
541
|
-
should_retry = status_code >= 500 or status_code == 429
|
|
542
|
-
|
|
543
|
-
if should_retry and attempt < retries - 1:
|
|
544
|
-
# Longer wait for rate limiting
|
|
545
|
-
wait_time = 5 if status_code == 429 else 2**attempt
|
|
546
|
-
logging.warning(
|
|
547
|
-
f"HTTP {status_code} error, retrying in {wait_time}s "
|
|
548
|
-
f"(attempt {attempt + 1}/{retries}): {e}"
|
|
549
|
-
)
|
|
550
|
-
await asyncio.sleep(wait_time)
|
|
526
|
+
logging.warning(f"Retrying due to {e}")
|
|
527
|
+
await asyncio.sleep(2**attempt)
|
|
551
528
|
else:
|
|
552
|
-
|
|
553
|
-
if should_retry:
|
|
554
|
-
logging.error(
|
|
555
|
-
f"HTTP {status_code} error persisted after {retries} attempts: {e}"
|
|
556
|
-
)
|
|
557
|
-
else:
|
|
558
|
-
logging.error(f"HTTP {status_code} error (not retryable): {e}")
|
|
529
|
+
logging.error(f"Failed to connect after {retries} attempts: {e}")
|
|
559
530
|
raise
|
|
560
531
|
|
|
561
532
|
def post_record_batch(self, batch, failed_recs_file, row):
|
|
562
533
|
json_rec = json.loads(row.split("\t")[-1])
|
|
563
534
|
if self.task_configuration.object_type == "ShadowInstances":
|
|
564
535
|
self.set_consortium_source(json_rec)
|
|
536
|
+
if self.task_configuration.object_type == "SRS":
|
|
537
|
+
json_rec["snapshotId"] = self.snapshot_id
|
|
565
538
|
if self.processed == 1:
|
|
566
539
|
logging.info(json.dumps(json_rec, indent=True))
|
|
567
540
|
batch.append(json_rec)
|
|
@@ -572,29 +545,22 @@ class BatchPoster(MigrationTaskBase):
|
|
|
572
545
|
|
|
573
546
|
def post_extra_data(self, row: str, num_records: int, failed_recs_file):
|
|
574
547
|
(object_name, data) = row.split("\t")
|
|
575
|
-
|
|
548
|
+
endpoint = self.get_extradata_endpoint(self.task_configuration, object_name, data)
|
|
549
|
+
url = f"{self.folio_client.gateway_url}/{endpoint}"
|
|
576
550
|
body = data
|
|
577
|
-
|
|
578
|
-
|
|
551
|
+
response = self.post_objects(url, body)
|
|
552
|
+
if response.status_code == 201:
|
|
579
553
|
self.num_posted += 1
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
"Row %s\tHTTP %s\t %s", num_records, fhe.response.status_code, error_msg
|
|
586
|
-
)
|
|
587
|
-
if (
|
|
588
|
-
"id value already exists"
|
|
589
|
-
not in json.loads(fhe.response.text)["errors"][0]["message"]
|
|
590
|
-
):
|
|
591
|
-
failed_recs_file.write(row)
|
|
592
|
-
else:
|
|
593
|
-
self.num_failures += 1
|
|
594
|
-
logging.error(
|
|
595
|
-
"Row %s\tHTTP %s\t%s", num_records, fhe.response.status_code, fhe.response.text
|
|
596
|
-
)
|
|
554
|
+
elif response.status_code == 422:
|
|
555
|
+
self.num_failures += 1
|
|
556
|
+
error_msg = json.loads(response.text)["errors"][0]["message"]
|
|
557
|
+
logging.error("Row %s\tHTTP %s\t %s", num_records, response.status_code, error_msg)
|
|
558
|
+
if "id value already exists" not in json.loads(response.text)["errors"][0]["message"]:
|
|
597
559
|
failed_recs_file.write(row)
|
|
560
|
+
else:
|
|
561
|
+
self.num_failures += 1
|
|
562
|
+
logging.error("Row %s\tHTTP %s\t%s", num_records, response.status_code, response.text)
|
|
563
|
+
failed_recs_file.write(row)
|
|
598
564
|
if num_records % 50 == 0:
|
|
599
565
|
logging.info(
|
|
600
566
|
"%s records posted successfully. %s failed",
|
|
@@ -634,41 +600,41 @@ class BatchPoster(MigrationTaskBase):
|
|
|
634
600
|
def post_single_records(self, row: str, num_records: int, failed_recs_file):
|
|
635
601
|
if self.api_info["is_batch"]:
|
|
636
602
|
raise TypeError("This record type supports batch processing, use post_batch method")
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
603
|
+
api_endpoint = self.api_info.get("api_endpoint")
|
|
604
|
+
url = f"{self.folio_client.gateway_url}{api_endpoint}"
|
|
605
|
+
response = self.post_objects(url, row)
|
|
606
|
+
if response.status_code == 201:
|
|
640
607
|
self.num_posted += 1
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
"Row %s\tHTTP %s\t %s", num_records, fhe.response.status_code, error_msg
|
|
647
|
-
)
|
|
648
|
-
if (
|
|
649
|
-
"id value already exists"
|
|
650
|
-
not in json.loads(fhe.response.text)["errors"][0]["message"]
|
|
651
|
-
):
|
|
652
|
-
failed_recs_file.write(row)
|
|
653
|
-
else:
|
|
654
|
-
self.num_failures += 1
|
|
655
|
-
logging.error(
|
|
656
|
-
"Row %s\tHTTP %s\t%s",
|
|
657
|
-
num_records,
|
|
658
|
-
fhe.response.status_code,
|
|
659
|
-
fhe.response.text,
|
|
660
|
-
)
|
|
608
|
+
elif response.status_code == 422:
|
|
609
|
+
self.num_failures += 1
|
|
610
|
+
error_msg = json.loads(response.text)["errors"][0]["message"]
|
|
611
|
+
logging.error("Row %s\tHTTP %s\t %s", num_records, response.status_code, error_msg)
|
|
612
|
+
if "id value already exists" not in json.loads(response.text)["errors"][0]["message"]:
|
|
661
613
|
failed_recs_file.write(row)
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
614
|
+
else:
|
|
615
|
+
self.num_failures += 1
|
|
616
|
+
logging.error("Row %s\tHTTP %s\t%s", num_records, response.status_code, response.text)
|
|
617
|
+
failed_recs_file.write(row)
|
|
618
|
+
if num_records % 50 == 0:
|
|
619
|
+
logging.info(
|
|
620
|
+
"%s records posted successfully. %s failed",
|
|
621
|
+
self.num_posted,
|
|
622
|
+
self.num_failures,
|
|
623
|
+
)
|
|
624
|
+
|
|
625
|
+
def post_objects(self, url, body):
|
|
626
|
+
if self.http_client and not self.http_client.is_closed:
|
|
627
|
+
return self.http_client.post(
|
|
628
|
+
url, data=body.encode("utf-8"), headers=self.folio_client.okapi_headers
|
|
629
|
+
)
|
|
630
|
+
else:
|
|
631
|
+
return httpx.post(
|
|
632
|
+
url, headers=self.okapi_headers, data=body.encode("utf-8"), timeout=None
|
|
633
|
+
)
|
|
668
634
|
|
|
669
635
|
def handle_generic_exception(self, exception, last_row, batch, num_records, failed_recs_file):
|
|
670
636
|
logging.error("%s", exception)
|
|
671
|
-
self.migration_report.add("Details",
|
|
637
|
+
self.migration_report.add("Details", i18n.t("Generic exceptions (see log for details)"))
|
|
672
638
|
# logging.error("Failed row: %s", last_row)
|
|
673
639
|
self.failed_batches += 1
|
|
674
640
|
self.num_failures += len(batch)
|
|
@@ -681,7 +647,7 @@ class BatchPoster(MigrationTaskBase):
|
|
|
681
647
|
sys.exit(1)
|
|
682
648
|
|
|
683
649
|
def handle_unicode_error(self, unicode_error, last_row):
|
|
684
|
-
self.migration_report.add("Details",
|
|
650
|
+
self.migration_report.add("Details", i18n.t("Encoding errors"))
|
|
685
651
|
logging.info("=========ERROR==============")
|
|
686
652
|
logging.info(
|
|
687
653
|
"%s Posting failed. Encoding error reading file",
|
|
@@ -696,7 +662,7 @@ class BatchPoster(MigrationTaskBase):
|
|
|
696
662
|
traceback.logging.info_exc() # type: ignore
|
|
697
663
|
logging.info("=======================")
|
|
698
664
|
|
|
699
|
-
def post_batch(self, batch, failed_recs_file, num_records):
|
|
665
|
+
def post_batch(self, batch, failed_recs_file, num_records, recursion_depth=0):
|
|
700
666
|
if self.query_params.get("upsert", False) and self.api_info.get("query_endpoint", ""):
|
|
701
667
|
self.set_version(batch, self.api_info["query_endpoint"], self.api_info["object_name"])
|
|
702
668
|
response = self.do_post(batch)
|
|
@@ -767,6 +733,24 @@ class BatchPoster(MigrationTaskBase):
|
|
|
767
733
|
# Likely a json parsing error
|
|
768
734
|
logging.error(response.text)
|
|
769
735
|
raise TransformationProcessError("", "HTTP 400. Something is wrong. Quitting")
|
|
736
|
+
elif self.task_configuration.object_type == "SRS" and response.status_code >= 500:
|
|
737
|
+
logging.info(
|
|
738
|
+
"Post failed. Size: %s Waiting 30s until reposting. Number of tries: %s of 5",
|
|
739
|
+
get_req_size(response),
|
|
740
|
+
recursion_depth,
|
|
741
|
+
)
|
|
742
|
+
logging.info(response.text)
|
|
743
|
+
time.sleep(30)
|
|
744
|
+
if recursion_depth > 4:
|
|
745
|
+
raise TransformationRecordFailedError(
|
|
746
|
+
"",
|
|
747
|
+
f"HTTP {response.status_code}\t"
|
|
748
|
+
f"Request size: {get_req_size(response)}"
|
|
749
|
+
f"{datetime.now(timezone.utc).isoformat()}\n",
|
|
750
|
+
response.text,
|
|
751
|
+
)
|
|
752
|
+
else:
|
|
753
|
+
self.post_batch(batch, failed_recs_file, num_records, recursion_depth + 1)
|
|
770
754
|
elif (
|
|
771
755
|
response.status_code == 413 and "DB_ALLOW_SUPPRESS_OPTIMISTIC_LOCKING" in response.text
|
|
772
756
|
):
|
|
@@ -791,36 +775,50 @@ class BatchPoster(MigrationTaskBase):
|
|
|
791
775
|
)
|
|
792
776
|
|
|
793
777
|
def do_post(self, batch):
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
778
|
+
path = self.api_info["api_endpoint"]
|
|
779
|
+
url = self.folio_client.gateway_url + path
|
|
780
|
+
if self.api_info["object_name"] == "users":
|
|
781
|
+
payload = {self.api_info["object_name"]: list(batch), "totalRecords": len(batch)}
|
|
782
|
+
elif self.api_info["total_records"]:
|
|
783
|
+
payload = {"records": list(batch), "totalRecords": len(batch)}
|
|
784
|
+
else:
|
|
785
|
+
payload = {self.api_info["object_name"]: batch}
|
|
786
|
+
if self.http_client and not self.http_client.is_closed:
|
|
787
|
+
return self.http_client.post(
|
|
788
|
+
url,
|
|
789
|
+
json=payload,
|
|
790
|
+
headers=self.folio_client.okapi_headers,
|
|
791
|
+
params=self.query_params,
|
|
792
|
+
)
|
|
793
|
+
else:
|
|
794
|
+
return httpx.post(
|
|
803
795
|
url,
|
|
796
|
+
headers=self.okapi_headers,
|
|
804
797
|
json=payload,
|
|
805
798
|
params=self.query_params,
|
|
799
|
+
timeout=None,
|
|
806
800
|
)
|
|
807
801
|
|
|
808
802
|
def get_current_record_count_in_folio(self):
|
|
809
803
|
if "query_endpoint" in self.api_info:
|
|
810
|
-
url = self.api_info[
|
|
804
|
+
url = f"{self.folio_client.gateway_url}{self.api_info['query_endpoint']}"
|
|
811
805
|
query_params = {"query": "cql.allRecords=1", "limit": 0}
|
|
812
|
-
|
|
813
|
-
res = self.
|
|
814
|
-
|
|
815
|
-
except folioclient.FolioHTTPError as fhe:
|
|
816
|
-
logging.error(
|
|
817
|
-
"Failed to get current record count. HTTP %s", fhe.response.status_code
|
|
806
|
+
if self.http_client and not self.http_client.is_closed:
|
|
807
|
+
res = self.http_client.get(
|
|
808
|
+
url, headers=self.folio_client.okapi_headers, params=query_params
|
|
818
809
|
)
|
|
810
|
+
else:
|
|
811
|
+
res = httpx.get(url, headers=self.okapi_headers, params=query_params, timeout=None)
|
|
812
|
+
try:
|
|
813
|
+
res.raise_for_status()
|
|
814
|
+
return res.json()["totalRecords"]
|
|
815
|
+
except httpx.HTTPStatusError:
|
|
816
|
+
logging.error("Failed to get current record count. HTTP %s", res.status_code)
|
|
819
817
|
return 0
|
|
820
818
|
except KeyError:
|
|
821
819
|
logging.error(
|
|
822
820
|
"Failed to get current record count. "
|
|
823
|
-
f"No 'totalRecords' in response: {json
|
|
821
|
+
f"No 'totalRecords' in response: {res.json()}"
|
|
824
822
|
)
|
|
825
823
|
return 0
|
|
826
824
|
else:
|
|
@@ -852,6 +850,8 @@ class BatchPoster(MigrationTaskBase):
|
|
|
852
850
|
def wrap_up(self):
|
|
853
851
|
logging.info("Done. Wrapping up")
|
|
854
852
|
self.extradata_writer.flush()
|
|
853
|
+
if self.task_configuration.object_type == "SRS":
|
|
854
|
+
self.commit_snapshot()
|
|
855
855
|
if self.task_configuration.object_type != "Extradata":
|
|
856
856
|
logging.info(
|
|
857
857
|
(
|
|
@@ -938,6 +938,69 @@ class BatchPoster(MigrationTaskBase):
|
|
|
938
938
|
str(self.folder_structure.failed_recs_path),
|
|
939
939
|
)
|
|
940
940
|
|
|
941
|
+
def create_snapshot(self):
|
|
942
|
+
snapshot = {
|
|
943
|
+
"jobExecutionId": self.snapshot_id,
|
|
944
|
+
"status": "PARSING_IN_PROGRESS",
|
|
945
|
+
"processingStartedDate": datetime.now(timezone.utc).isoformat(timespec="milliseconds"),
|
|
946
|
+
}
|
|
947
|
+
try:
|
|
948
|
+
url = f"{self.folio_client.gateway_url}/source-storage/snapshots"
|
|
949
|
+
if self.http_client and not self.http_client.is_closed:
|
|
950
|
+
res = self.http_client.post(
|
|
951
|
+
url, json=snapshot, headers=self.folio_client.okapi_headers
|
|
952
|
+
)
|
|
953
|
+
else:
|
|
954
|
+
res = httpx.post(url, headers=self.okapi_headers, json=snapshot, timeout=None)
|
|
955
|
+
res.raise_for_status()
|
|
956
|
+
logging.info("Posted Snapshot to FOLIO: %s", json.dumps(snapshot, indent=4))
|
|
957
|
+
get_url = (
|
|
958
|
+
f"{self.folio_client.gateway_url}/source-storage/snapshots/{self.snapshot_id}"
|
|
959
|
+
)
|
|
960
|
+
got = False
|
|
961
|
+
while not got:
|
|
962
|
+
logging.info("Sleeping while waiting for the snapshot to get created")
|
|
963
|
+
time.sleep(5)
|
|
964
|
+
if self.http_client and not self.http_client.is_closed:
|
|
965
|
+
res = self.http_client.get(get_url, headers=self.folio_client.okapi_headers)
|
|
966
|
+
else:
|
|
967
|
+
res = httpx.get(get_url, headers=self.okapi_headers, timeout=None)
|
|
968
|
+
if res.status_code == 200:
|
|
969
|
+
got = True
|
|
970
|
+
else:
|
|
971
|
+
logging.info(res.status_code)
|
|
972
|
+
except httpx.HTTPStatusError as exc:
|
|
973
|
+
logging.exception("HTTP error occurred while posting the snapshot: %s", exc)
|
|
974
|
+
sys.exit(1)
|
|
975
|
+
except Exception as exc:
|
|
976
|
+
logging.exception("Could not post the snapshot: %s", exc)
|
|
977
|
+
sys.exit(1)
|
|
978
|
+
|
|
979
|
+
def commit_snapshot(self):
|
|
980
|
+
snapshot = {"jobExecutionId": self.snapshot_id, "status": "COMMITTED"}
|
|
981
|
+
try:
|
|
982
|
+
url = f"{self.folio_client.gateway_url}/source-storage/snapshots/{self.snapshot_id}"
|
|
983
|
+
if self.http_client and not self.http_client.is_closed:
|
|
984
|
+
res = self.http_client.put(
|
|
985
|
+
url, json=snapshot, headers=self.folio_client.okapi_headers
|
|
986
|
+
)
|
|
987
|
+
else:
|
|
988
|
+
res = httpx.put(url, headers=self.okapi_headers, json=snapshot, timeout=None)
|
|
989
|
+
res.raise_for_status()
|
|
990
|
+
logging.info("Posted Committed snapshot to FOLIO: %s", json.dumps(snapshot, indent=4))
|
|
991
|
+
except httpx.HTTPStatusError as exc:
|
|
992
|
+
logging.exception("HTTP error occurred while posting the snapshot: %s", exc)
|
|
993
|
+
sys.exit(1)
|
|
994
|
+
except Exception as exc:
|
|
995
|
+
logging.exception(
|
|
996
|
+
"Could not commit snapshot with id %s. Post this to /source-storage/snapshots/%s:",
|
|
997
|
+
self.snapshot_id,
|
|
998
|
+
self.snapshot_id,
|
|
999
|
+
exc,
|
|
1000
|
+
)
|
|
1001
|
+
logging.info("%s", json.dumps(snapshot, indent=4))
|
|
1002
|
+
sys.exit(1)
|
|
1003
|
+
|
|
941
1004
|
|
|
942
1005
|
def get_api_info(object_type: str, use_safe: bool = True):
|
|
943
1006
|
choices = {
|
|
@@ -999,6 +1062,22 @@ def get_api_info(object_type: str, use_safe: bool = True):
|
|
|
999
1062
|
"addSnapshotId": False,
|
|
1000
1063
|
"supports_upsert": True,
|
|
1001
1064
|
},
|
|
1065
|
+
"Authorities": {
|
|
1066
|
+
"object_name": "",
|
|
1067
|
+
"api_endpoint": "/authority-storage/authorities",
|
|
1068
|
+
"is_batch": False,
|
|
1069
|
+
"total_records": False,
|
|
1070
|
+
"addSnapshotId": False,
|
|
1071
|
+
"supports_upsert": False,
|
|
1072
|
+
},
|
|
1073
|
+
"SRS": {
|
|
1074
|
+
"object_name": "records",
|
|
1075
|
+
"api_endpoint": "/source-storage/batch/records",
|
|
1076
|
+
"is_batch": True,
|
|
1077
|
+
"total_records": True,
|
|
1078
|
+
"addSnapshotId": True,
|
|
1079
|
+
"supports_upsert": False,
|
|
1080
|
+
},
|
|
1002
1081
|
"Users": {
|
|
1003
1082
|
"object_name": "users",
|
|
1004
1083
|
"api_endpoint": "/user-import",
|
|
@@ -1045,7 +1124,7 @@ def get_human_readable(size, precision=2):
|
|
|
1045
1124
|
return "%.*f%s" % (precision, size, suffixes[suffix_index])
|
|
1046
1125
|
|
|
1047
1126
|
|
|
1048
|
-
def get_req_size(response:
|
|
1127
|
+
def get_req_size(response: httpx.Response):
|
|
1049
1128
|
size = response.request.method
|
|
1050
1129
|
size += str(response.request.url)
|
|
1051
1130
|
size += "\r\n".join(f"{k}{v}" for k, v in response.request.headers.items())
|