folio-migration-tools 1.10.0b3__py3-none-any.whl → 1.10.0b6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- folio_migration_tools/__main__.py +9 -0
- folio_migration_tools/folder_structure.py +0 -3
- folio_migration_tools/mapping_file_transformation/user_mapper.py +4 -0
- folio_migration_tools/marc_rules_transformation/conditions.py +0 -29
- folio_migration_tools/marc_rules_transformation/rules_mapper_base.py +0 -6
- folio_migration_tools/migration_tasks/batch_poster.py +216 -295
- folio_migration_tools/migration_tasks/migration_task_base.py +13 -5
- folio_migration_tools/migration_tasks/user_transformer.py +10 -0
- folio_migration_tools/translations/en.json +0 -7
- {folio_migration_tools-1.10.0b3.dist-info → folio_migration_tools-1.10.0b6.dist-info}/METADATA +2 -2
- {folio_migration_tools-1.10.0b3.dist-info → folio_migration_tools-1.10.0b6.dist-info}/RECORD +13 -15
- folio_migration_tools/marc_rules_transformation/rules_mapper_authorities.py +0 -242
- folio_migration_tools/migration_tasks/authority_transformer.py +0 -118
- {folio_migration_tools-1.10.0b3.dist-info → folio_migration_tools-1.10.0b6.dist-info}/WHEEL +0 -0
- {folio_migration_tools-1.10.0b3.dist-info → folio_migration_tools-1.10.0b6.dist-info}/entry_points.txt +0 -0
|
@@ -4,14 +4,16 @@ import json
|
|
|
4
4
|
import logging
|
|
5
5
|
import re
|
|
6
6
|
import sys
|
|
7
|
-
import time
|
|
8
7
|
import traceback
|
|
9
8
|
from datetime import datetime, timezone
|
|
10
|
-
from typing import Annotated, List, Optional
|
|
9
|
+
from typing import TYPE_CHECKING, Annotated, List, Optional
|
|
11
10
|
from uuid import uuid4
|
|
12
11
|
|
|
13
|
-
import
|
|
12
|
+
import folioclient
|
|
14
13
|
import i18n
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from httpx import Response
|
|
15
17
|
from folio_uuid.folio_namespaces import FOLIONamespaces
|
|
16
18
|
from pydantic import Field
|
|
17
19
|
|
|
@@ -73,7 +75,7 @@ class BatchPoster(MigrationTaskBase):
|
|
|
73
75
|
description=(
|
|
74
76
|
"The type of object being migrated"
|
|
75
77
|
"Examples of possible values: "
|
|
76
|
-
"'Extradata', '
|
|
78
|
+
"'Extradata', 'Instances', 'Holdings', 'Items'"
|
|
77
79
|
),
|
|
78
80
|
),
|
|
79
81
|
]
|
|
@@ -245,90 +247,80 @@ class BatchPoster(MigrationTaskBase):
|
|
|
245
247
|
self.failed_fields: set = set()
|
|
246
248
|
self.num_failures = 0
|
|
247
249
|
self.num_posted = 0
|
|
248
|
-
self.okapi_headers = self.folio_client.okapi_headers
|
|
249
|
-
self.http_client = None
|
|
250
250
|
self.starting_record_count_in_folio: Optional[int] = None
|
|
251
251
|
self.finished_record_count_in_folio: Optional[int] = None
|
|
252
252
|
|
|
253
253
|
def do_work(self): # noqa: C901
|
|
254
|
-
with
|
|
255
|
-
self.
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
self.post_extra_data(
|
|
275
|
-
row, self.processed, failed_recs_file
|
|
276
|
-
)
|
|
277
|
-
elif not self.api_info["is_batch"]:
|
|
278
|
-
self.post_single_records(
|
|
279
|
-
row, self.processed, failed_recs_file
|
|
280
|
-
)
|
|
281
|
-
else:
|
|
282
|
-
batch = self.post_record_batch(
|
|
283
|
-
batch, failed_recs_file, row
|
|
284
|
-
)
|
|
285
|
-
except UnicodeDecodeError as unicode_error:
|
|
286
|
-
self.handle_unicode_error(unicode_error, last_row)
|
|
287
|
-
except TransformationProcessError as tpe:
|
|
288
|
-
self.handle_generic_exception(
|
|
289
|
-
tpe,
|
|
290
|
-
last_row,
|
|
291
|
-
batch,
|
|
292
|
-
self.processed,
|
|
293
|
-
failed_recs_file,
|
|
254
|
+
with open(
|
|
255
|
+
self.folder_structure.failed_recs_path, "w", encoding="utf-8"
|
|
256
|
+
) as failed_recs_file:
|
|
257
|
+
self.get_starting_record_count()
|
|
258
|
+
try:
|
|
259
|
+
batch = []
|
|
260
|
+
for idx, file_def in enumerate(self.task_configuration.files): # noqa: B007
|
|
261
|
+
path = self.folder_structure.results_folder / file_def.file_name
|
|
262
|
+
with open(path) as rows:
|
|
263
|
+
logging.info("Running %s", path)
|
|
264
|
+
last_row = ""
|
|
265
|
+
for self.processed, row in enumerate(rows, start=1):
|
|
266
|
+
last_row = row
|
|
267
|
+
if row.strip():
|
|
268
|
+
try:
|
|
269
|
+
if self.task_configuration.object_type == "Extradata":
|
|
270
|
+
self.post_extra_data(row, self.processed, failed_recs_file)
|
|
271
|
+
elif not self.api_info["is_batch"]:
|
|
272
|
+
self.post_single_records(
|
|
273
|
+
row, self.processed, failed_recs_file
|
|
294
274
|
)
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
self.handle_generic_exception(
|
|
299
|
-
exception,
|
|
300
|
-
last_row,
|
|
301
|
-
batch,
|
|
302
|
-
self.processed,
|
|
303
|
-
failed_recs_file,
|
|
275
|
+
else:
|
|
276
|
+
batch = self.post_record_batch(
|
|
277
|
+
batch, failed_recs_file, row
|
|
304
278
|
)
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
279
|
+
except UnicodeDecodeError as unicode_error:
|
|
280
|
+
self.handle_unicode_error(unicode_error, last_row)
|
|
281
|
+
except TransformationProcessError as tpe:
|
|
282
|
+
self.handle_generic_exception(
|
|
283
|
+
tpe,
|
|
284
|
+
last_row,
|
|
285
|
+
batch,
|
|
286
|
+
self.processed,
|
|
287
|
+
failed_recs_file,
|
|
288
|
+
)
|
|
289
|
+
batch = []
|
|
290
|
+
raise
|
|
291
|
+
except TransformationRecordFailedError as exception:
|
|
292
|
+
self.handle_generic_exception(
|
|
293
|
+
exception,
|
|
294
|
+
last_row,
|
|
295
|
+
batch,
|
|
296
|
+
self.processed,
|
|
297
|
+
failed_recs_file,
|
|
298
|
+
)
|
|
299
|
+
batch = []
|
|
300
|
+
except (FileNotFoundError, PermissionError) as ose:
|
|
301
|
+
logging.error("Error reading file: %s", ose)
|
|
302
|
+
|
|
303
|
+
except Exception as ee:
|
|
304
|
+
if "idx" in locals() and self.task_configuration.files[idx:]:
|
|
305
|
+
for file_def in self.task_configuration.files[idx:]:
|
|
306
|
+
path = self.folder_structure.results_folder / file_def.file_name
|
|
323
307
|
try:
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
308
|
+
with open(path, "r") as failed_file:
|
|
309
|
+
failed_file.seek(self.processed)
|
|
310
|
+
failed_recs_file.write(failed_file.read())
|
|
311
|
+
self.processed = 0
|
|
312
|
+
except (FileNotFoundError, PermissionError) as ose:
|
|
313
|
+
logging.error("Error reading file: %s", ose)
|
|
314
|
+
raise ee
|
|
315
|
+
finally:
|
|
316
|
+
if self.task_configuration.object_type != "Extradata" and any(batch):
|
|
317
|
+
try:
|
|
318
|
+
self.post_batch(batch, failed_recs_file, self.processed)
|
|
319
|
+
except Exception as exception:
|
|
320
|
+
self.handle_generic_exception(
|
|
321
|
+
exception, last_row, batch, self.processed, failed_recs_file
|
|
322
|
+
)
|
|
323
|
+
logging.info("Done posting %s records. ", self.processed)
|
|
332
324
|
|
|
333
325
|
@staticmethod
|
|
334
326
|
def set_consortium_source(json_rec):
|
|
@@ -366,26 +358,26 @@ class BatchPoster(MigrationTaskBase):
|
|
|
366
358
|
fetch_batch_size = 90
|
|
367
359
|
fetch_tasks = []
|
|
368
360
|
existing_records = {}
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
"
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
},
|
|
382
|
-
)
|
|
361
|
+
|
|
362
|
+
for i in range(0, len(batch), fetch_batch_size):
|
|
363
|
+
batch_slice = batch[i : i + fetch_batch_size]
|
|
364
|
+
fetch_tasks.append(
|
|
365
|
+
self.get_with_retry(
|
|
366
|
+
query_api,
|
|
367
|
+
params={
|
|
368
|
+
"query": (
|
|
369
|
+
f"id==({' OR '.join([r['id'] for r in batch_slice if 'id' in r])})"
|
|
370
|
+
),
|
|
371
|
+
"limit": fetch_batch_size,
|
|
372
|
+
},
|
|
383
373
|
)
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
responses = await asyncio.gather(*fetch_tasks)
|
|
384
377
|
|
|
385
|
-
|
|
378
|
+
for response in responses:
|
|
379
|
+
self.collect_existing_records_for_upsert(object_type, response, existing_records)
|
|
386
380
|
|
|
387
|
-
for response in responses:
|
|
388
|
-
self.collect_existing_records_for_upsert(object_type, response, existing_records)
|
|
389
381
|
for record in batch:
|
|
390
382
|
if record["id"] in existing_records:
|
|
391
383
|
self.prepare_record_for_upsert(record, existing_records[record["id"]])
|
|
@@ -421,18 +413,18 @@ class BatchPoster(MigrationTaskBase):
|
|
|
421
413
|
|
|
422
414
|
@staticmethod
|
|
423
415
|
def collect_existing_records_for_upsert(
|
|
424
|
-
object_type: str,
|
|
416
|
+
object_type: str, response_json: dict, existing_records: dict
|
|
425
417
|
):
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
418
|
+
"""
|
|
419
|
+
Collects existing records from API response into existing_records dict.
|
|
420
|
+
|
|
421
|
+
Args:
|
|
422
|
+
object_type: The key in response containing the records array
|
|
423
|
+
response_json: Parsed JSON response from API
|
|
424
|
+
existing_records: Dict to populate with {record_id: record_data}
|
|
425
|
+
"""
|
|
426
|
+
for record in response_json.get(object_type, []):
|
|
427
|
+
existing_records[record["id"]] = record
|
|
436
428
|
|
|
437
429
|
def handle_upsert_for_statistical_codes(self, updates: dict, keep_existing: dict):
|
|
438
430
|
if not self.task_configuration.preserve_statistical_codes:
|
|
@@ -510,31 +502,66 @@ class BatchPoster(MigrationTaskBase):
|
|
|
510
502
|
updates.update(keep_new)
|
|
511
503
|
new_record.update(updates)
|
|
512
504
|
|
|
513
|
-
async def get_with_retry(self,
|
|
505
|
+
async def get_with_retry(self, url: str, params=None):
|
|
506
|
+
"""
|
|
507
|
+
Wrapper around folio_get_async with selective retry logic.
|
|
508
|
+
|
|
509
|
+
Retries on:
|
|
510
|
+
- Connection errors (FolioConnectionError): Always retry
|
|
511
|
+
- Server errors (5xx): Transient failures
|
|
512
|
+
- Rate limiting (429): Too many requests
|
|
513
|
+
|
|
514
|
+
Does NOT retry on:
|
|
515
|
+
- Client errors (4xx except 429): Bad request, won't succeed on retry
|
|
516
|
+
"""
|
|
514
517
|
if params is None:
|
|
515
518
|
params = {}
|
|
516
519
|
retries = 3
|
|
520
|
+
|
|
517
521
|
for attempt in range(retries):
|
|
518
522
|
try:
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
return response
|
|
524
|
-
except httpx.HTTPError as e:
|
|
523
|
+
return await self.folio_client.folio_get_async(url, query_params=params)
|
|
524
|
+
|
|
525
|
+
except folioclient.FolioConnectionError as e:
|
|
526
|
+
# Network/connection errors - always retry
|
|
525
527
|
if attempt < retries - 1:
|
|
526
|
-
|
|
527
|
-
|
|
528
|
+
wait_time = 2**attempt
|
|
529
|
+
logging.warning(
|
|
530
|
+
f"Connection error, retrying in {wait_time}s "
|
|
531
|
+
f"(attempt {attempt + 1}/{retries}): {e}"
|
|
532
|
+
)
|
|
533
|
+
await asyncio.sleep(wait_time)
|
|
534
|
+
else:
|
|
535
|
+
logging.error(f"Connection failed after {retries} attempts: {e}")
|
|
536
|
+
raise
|
|
537
|
+
|
|
538
|
+
except folioclient.FolioHTTPError as e:
|
|
539
|
+
# HTTP errors - selective retry based on status code
|
|
540
|
+
status_code = e.response.status_code
|
|
541
|
+
should_retry = status_code >= 500 or status_code == 429
|
|
542
|
+
|
|
543
|
+
if should_retry and attempt < retries - 1:
|
|
544
|
+
# Longer wait for rate limiting
|
|
545
|
+
wait_time = 5 if status_code == 429 else 2**attempt
|
|
546
|
+
logging.warning(
|
|
547
|
+
f"HTTP {status_code} error, retrying in {wait_time}s "
|
|
548
|
+
f"(attempt {attempt + 1}/{retries}): {e}"
|
|
549
|
+
)
|
|
550
|
+
await asyncio.sleep(wait_time)
|
|
528
551
|
else:
|
|
529
|
-
|
|
552
|
+
# Either not retryable or out of attempts
|
|
553
|
+
if should_retry:
|
|
554
|
+
logging.error(
|
|
555
|
+
f"HTTP {status_code} error persisted after {retries} attempts: {e}"
|
|
556
|
+
)
|
|
557
|
+
else:
|
|
558
|
+
logging.error(f"HTTP {status_code} error (not retryable): {e}")
|
|
530
559
|
raise
|
|
531
560
|
|
|
532
561
|
def post_record_batch(self, batch, failed_recs_file, row):
|
|
533
562
|
json_rec = json.loads(row.split("\t")[-1])
|
|
534
563
|
if self.task_configuration.object_type == "ShadowInstances":
|
|
535
564
|
self.set_consortium_source(json_rec)
|
|
536
|
-
if self.task_configuration.object_type == "SRS":
|
|
537
|
-
json_rec["snapshotId"] = self.snapshot_id
|
|
538
565
|
if self.processed == 1:
|
|
539
566
|
logging.info(json.dumps(json_rec, indent=True))
|
|
540
567
|
batch.append(json_rec)
|
|
@@ -545,22 +572,29 @@ class BatchPoster(MigrationTaskBase):
|
|
|
545
572
|
|
|
546
573
|
def post_extra_data(self, row: str, num_records: int, failed_recs_file):
|
|
547
574
|
(object_name, data) = row.split("\t")
|
|
548
|
-
|
|
549
|
-
url = f"{self.folio_client.gateway_url}/{endpoint}"
|
|
575
|
+
url = self.get_extradata_endpoint(self.task_configuration, object_name, data)
|
|
550
576
|
body = data
|
|
551
|
-
|
|
552
|
-
|
|
577
|
+
try:
|
|
578
|
+
_ = self.folio_client.folio_post(url, payload=body)
|
|
553
579
|
self.num_posted += 1
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
580
|
+
except folioclient.FolioHTTPError as fhe:
|
|
581
|
+
if fhe.response.status_code == 422:
|
|
582
|
+
self.num_failures += 1
|
|
583
|
+
error_msg = json.loads(fhe.response.text)["errors"][0]["message"]
|
|
584
|
+
logging.error(
|
|
585
|
+
"Row %s\tHTTP %s\t %s", num_records, fhe.response.status_code, error_msg
|
|
586
|
+
)
|
|
587
|
+
if (
|
|
588
|
+
"id value already exists"
|
|
589
|
+
not in json.loads(fhe.response.text)["errors"][0]["message"]
|
|
590
|
+
):
|
|
591
|
+
failed_recs_file.write(row)
|
|
592
|
+
else:
|
|
593
|
+
self.num_failures += 1
|
|
594
|
+
logging.error(
|
|
595
|
+
"Row %s\tHTTP %s\t%s", num_records, fhe.response.status_code, fhe.response.text
|
|
596
|
+
)
|
|
559
597
|
failed_recs_file.write(row)
|
|
560
|
-
else:
|
|
561
|
-
self.num_failures += 1
|
|
562
|
-
logging.error("Row %s\tHTTP %s\t%s", num_records, response.status_code, response.text)
|
|
563
|
-
failed_recs_file.write(row)
|
|
564
598
|
if num_records % 50 == 0:
|
|
565
599
|
logging.info(
|
|
566
600
|
"%s records posted successfully. %s failed",
|
|
@@ -600,37 +634,37 @@ class BatchPoster(MigrationTaskBase):
|
|
|
600
634
|
def post_single_records(self, row: str, num_records: int, failed_recs_file):
|
|
601
635
|
if self.api_info["is_batch"]:
|
|
602
636
|
raise TypeError("This record type supports batch processing, use post_batch method")
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
if response.status_code == 201:
|
|
637
|
+
url = self.api_info.get("api_endpoint")
|
|
638
|
+
try:
|
|
639
|
+
_ = self.folio_client.folio_post(url, payload=row)
|
|
607
640
|
self.num_posted += 1
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
641
|
+
except folioclient.FolioHTTPError as fhe:
|
|
642
|
+
if fhe.response.status_code == 422:
|
|
643
|
+
self.num_failures += 1
|
|
644
|
+
error_msg = json.loads(fhe.response.text)["errors"][0]["message"]
|
|
645
|
+
logging.error(
|
|
646
|
+
"Row %s\tHTTP %s\t %s", num_records, fhe.response.status_code, error_msg
|
|
647
|
+
)
|
|
648
|
+
if (
|
|
649
|
+
"id value already exists"
|
|
650
|
+
not in json.loads(fhe.response.text)["errors"][0]["message"]
|
|
651
|
+
):
|
|
652
|
+
failed_recs_file.write(row)
|
|
653
|
+
else:
|
|
654
|
+
self.num_failures += 1
|
|
655
|
+
logging.error(
|
|
656
|
+
"Row %s\tHTTP %s\t%s",
|
|
657
|
+
num_records,
|
|
658
|
+
fhe.response.status_code,
|
|
659
|
+
fhe.response.text,
|
|
660
|
+
)
|
|
613
661
|
failed_recs_file.write(row)
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
"%s records posted successfully. %s failed",
|
|
621
|
-
self.num_posted,
|
|
622
|
-
self.num_failures,
|
|
623
|
-
)
|
|
624
|
-
|
|
625
|
-
def post_objects(self, url, body):
|
|
626
|
-
if self.http_client and not self.http_client.is_closed:
|
|
627
|
-
return self.http_client.post(
|
|
628
|
-
url, data=body.encode("utf-8"), headers=self.folio_client.okapi_headers
|
|
629
|
-
)
|
|
630
|
-
else:
|
|
631
|
-
return httpx.post(
|
|
632
|
-
url, headers=self.okapi_headers, data=body.encode("utf-8"), timeout=None
|
|
633
|
-
)
|
|
662
|
+
if num_records % 50 == 0:
|
|
663
|
+
logging.info(
|
|
664
|
+
"%s records posted successfully. %s failed",
|
|
665
|
+
self.num_posted,
|
|
666
|
+
self.num_failures,
|
|
667
|
+
)
|
|
634
668
|
|
|
635
669
|
def handle_generic_exception(self, exception, last_row, batch, num_records, failed_recs_file):
|
|
636
670
|
logging.error("%s", exception)
|
|
@@ -662,7 +696,7 @@ class BatchPoster(MigrationTaskBase):
|
|
|
662
696
|
traceback.logging.info_exc() # type: ignore
|
|
663
697
|
logging.info("=======================")
|
|
664
698
|
|
|
665
|
-
def post_batch(self, batch, failed_recs_file, num_records
|
|
699
|
+
def post_batch(self, batch, failed_recs_file, num_records):
|
|
666
700
|
if self.query_params.get("upsert", False) and self.api_info.get("query_endpoint", ""):
|
|
667
701
|
self.set_version(batch, self.api_info["query_endpoint"], self.api_info["object_name"])
|
|
668
702
|
response = self.do_post(batch)
|
|
@@ -733,24 +767,6 @@ class BatchPoster(MigrationTaskBase):
|
|
|
733
767
|
# Likely a json parsing error
|
|
734
768
|
logging.error(response.text)
|
|
735
769
|
raise TransformationProcessError("", "HTTP 400. Something is wrong. Quitting")
|
|
736
|
-
elif self.task_configuration.object_type == "SRS" and response.status_code >= 500:
|
|
737
|
-
logging.info(
|
|
738
|
-
"Post failed. Size: %s Waiting 30s until reposting. Number of tries: %s of 5",
|
|
739
|
-
get_req_size(response),
|
|
740
|
-
recursion_depth,
|
|
741
|
-
)
|
|
742
|
-
logging.info(response.text)
|
|
743
|
-
time.sleep(30)
|
|
744
|
-
if recursion_depth > 4:
|
|
745
|
-
raise TransformationRecordFailedError(
|
|
746
|
-
"",
|
|
747
|
-
f"HTTP {response.status_code}\t"
|
|
748
|
-
f"Request size: {get_req_size(response)}"
|
|
749
|
-
f"{datetime.now(timezone.utc).isoformat()}\n",
|
|
750
|
-
response.text,
|
|
751
|
-
)
|
|
752
|
-
else:
|
|
753
|
-
self.post_batch(batch, failed_recs_file, num_records, recursion_depth + 1)
|
|
754
770
|
elif (
|
|
755
771
|
response.status_code == 413 and "DB_ALLOW_SUPPRESS_OPTIMISTIC_LOCKING" in response.text
|
|
756
772
|
):
|
|
@@ -775,50 +791,36 @@ class BatchPoster(MigrationTaskBase):
|
|
|
775
791
|
)
|
|
776
792
|
|
|
777
793
|
def do_post(self, batch):
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
return self.http_client.post(
|
|
788
|
-
url,
|
|
789
|
-
json=payload,
|
|
790
|
-
headers=self.folio_client.okapi_headers,
|
|
791
|
-
params=self.query_params,
|
|
792
|
-
)
|
|
793
|
-
else:
|
|
794
|
-
return httpx.post(
|
|
794
|
+
with self.folio_client.get_folio_http_client() as http_client:
|
|
795
|
+
url = self.api_info["api_endpoint"]
|
|
796
|
+
if self.api_info["object_name"] == "users":
|
|
797
|
+
payload = {self.api_info["object_name"]: list(batch), "totalRecords": len(batch)}
|
|
798
|
+
elif self.api_info["total_records"]:
|
|
799
|
+
payload = {"records": list(batch), "totalRecords": len(batch)}
|
|
800
|
+
else:
|
|
801
|
+
payload = {self.api_info["object_name"]: batch}
|
|
802
|
+
return http_client.post(
|
|
795
803
|
url,
|
|
796
|
-
headers=self.okapi_headers,
|
|
797
804
|
json=payload,
|
|
798
805
|
params=self.query_params,
|
|
799
|
-
timeout=None,
|
|
800
806
|
)
|
|
801
807
|
|
|
802
808
|
def get_current_record_count_in_folio(self):
|
|
803
809
|
if "query_endpoint" in self.api_info:
|
|
804
|
-
url =
|
|
810
|
+
url = self.api_info["query_endpoint"]
|
|
805
811
|
query_params = {"query": "cql.allRecords=1", "limit": 0}
|
|
806
|
-
if self.http_client and not self.http_client.is_closed:
|
|
807
|
-
res = self.http_client.get(
|
|
808
|
-
url, headers=self.folio_client.okapi_headers, params=query_params
|
|
809
|
-
)
|
|
810
|
-
else:
|
|
811
|
-
res = httpx.get(url, headers=self.okapi_headers, params=query_params, timeout=None)
|
|
812
812
|
try:
|
|
813
|
-
res.
|
|
814
|
-
return res
|
|
815
|
-
except
|
|
816
|
-
logging.error(
|
|
813
|
+
res = self.folio_client.folio_get(url, query_params=query_params)
|
|
814
|
+
return res["totalRecords"]
|
|
815
|
+
except folioclient.FolioHTTPError as fhe:
|
|
816
|
+
logging.error(
|
|
817
|
+
"Failed to get current record count. HTTP %s", fhe.response.status_code
|
|
818
|
+
)
|
|
817
819
|
return 0
|
|
818
820
|
except KeyError:
|
|
819
821
|
logging.error(
|
|
820
822
|
"Failed to get current record count. "
|
|
821
|
-
f"No 'totalRecords' in response: {
|
|
823
|
+
f"No 'totalRecords' in response: {json.dumps(res, indent=2)}"
|
|
822
824
|
)
|
|
823
825
|
return 0
|
|
824
826
|
else:
|
|
@@ -850,8 +852,6 @@ class BatchPoster(MigrationTaskBase):
|
|
|
850
852
|
def wrap_up(self):
|
|
851
853
|
logging.info("Done. Wrapping up")
|
|
852
854
|
self.extradata_writer.flush()
|
|
853
|
-
if self.task_configuration.object_type == "SRS":
|
|
854
|
-
self.commit_snapshot()
|
|
855
855
|
if self.task_configuration.object_type != "Extradata":
|
|
856
856
|
logging.info(
|
|
857
857
|
(
|
|
@@ -938,69 +938,6 @@ class BatchPoster(MigrationTaskBase):
|
|
|
938
938
|
str(self.folder_structure.failed_recs_path),
|
|
939
939
|
)
|
|
940
940
|
|
|
941
|
-
def create_snapshot(self):
|
|
942
|
-
snapshot = {
|
|
943
|
-
"jobExecutionId": self.snapshot_id,
|
|
944
|
-
"status": "PARSING_IN_PROGRESS",
|
|
945
|
-
"processingStartedDate": datetime.now(timezone.utc).isoformat(timespec="milliseconds"),
|
|
946
|
-
}
|
|
947
|
-
try:
|
|
948
|
-
url = f"{self.folio_client.gateway_url}/source-storage/snapshots"
|
|
949
|
-
if self.http_client and not self.http_client.is_closed:
|
|
950
|
-
res = self.http_client.post(
|
|
951
|
-
url, json=snapshot, headers=self.folio_client.okapi_headers
|
|
952
|
-
)
|
|
953
|
-
else:
|
|
954
|
-
res = httpx.post(url, headers=self.okapi_headers, json=snapshot, timeout=None)
|
|
955
|
-
res.raise_for_status()
|
|
956
|
-
logging.info("Posted Snapshot to FOLIO: %s", json.dumps(snapshot, indent=4))
|
|
957
|
-
get_url = (
|
|
958
|
-
f"{self.folio_client.gateway_url}/source-storage/snapshots/{self.snapshot_id}"
|
|
959
|
-
)
|
|
960
|
-
got = False
|
|
961
|
-
while not got:
|
|
962
|
-
logging.info("Sleeping while waiting for the snapshot to get created")
|
|
963
|
-
time.sleep(5)
|
|
964
|
-
if self.http_client and not self.http_client.is_closed:
|
|
965
|
-
res = self.http_client.get(get_url, headers=self.folio_client.okapi_headers)
|
|
966
|
-
else:
|
|
967
|
-
res = httpx.get(get_url, headers=self.okapi_headers, timeout=None)
|
|
968
|
-
if res.status_code == 200:
|
|
969
|
-
got = True
|
|
970
|
-
else:
|
|
971
|
-
logging.info(res.status_code)
|
|
972
|
-
except httpx.HTTPStatusError as exc:
|
|
973
|
-
logging.exception("HTTP error occurred while posting the snapshot: %s", exc)
|
|
974
|
-
sys.exit(1)
|
|
975
|
-
except Exception as exc:
|
|
976
|
-
logging.exception("Could not post the snapshot: %s", exc)
|
|
977
|
-
sys.exit(1)
|
|
978
|
-
|
|
979
|
-
def commit_snapshot(self):
|
|
980
|
-
snapshot = {"jobExecutionId": self.snapshot_id, "status": "COMMITTED"}
|
|
981
|
-
try:
|
|
982
|
-
url = f"{self.folio_client.gateway_url}/source-storage/snapshots/{self.snapshot_id}"
|
|
983
|
-
if self.http_client and not self.http_client.is_closed:
|
|
984
|
-
res = self.http_client.put(
|
|
985
|
-
url, json=snapshot, headers=self.folio_client.okapi_headers
|
|
986
|
-
)
|
|
987
|
-
else:
|
|
988
|
-
res = httpx.put(url, headers=self.okapi_headers, json=snapshot, timeout=None)
|
|
989
|
-
res.raise_for_status()
|
|
990
|
-
logging.info("Posted Committed snapshot to FOLIO: %s", json.dumps(snapshot, indent=4))
|
|
991
|
-
except httpx.HTTPStatusError as exc:
|
|
992
|
-
logging.exception("HTTP error occurred while posting the snapshot: %s", exc)
|
|
993
|
-
sys.exit(1)
|
|
994
|
-
except Exception as exc:
|
|
995
|
-
logging.exception(
|
|
996
|
-
"Could not commit snapshot with id %s. Post this to /source-storage/snapshots/%s:",
|
|
997
|
-
self.snapshot_id,
|
|
998
|
-
self.snapshot_id,
|
|
999
|
-
exc,
|
|
1000
|
-
)
|
|
1001
|
-
logging.info("%s", json.dumps(snapshot, indent=4))
|
|
1002
|
-
sys.exit(1)
|
|
1003
|
-
|
|
1004
941
|
|
|
1005
942
|
def get_api_info(object_type: str, use_safe: bool = True):
|
|
1006
943
|
choices = {
|
|
@@ -1062,22 +999,6 @@ def get_api_info(object_type: str, use_safe: bool = True):
|
|
|
1062
999
|
"addSnapshotId": False,
|
|
1063
1000
|
"supports_upsert": True,
|
|
1064
1001
|
},
|
|
1065
|
-
"Authorities": {
|
|
1066
|
-
"object_name": "",
|
|
1067
|
-
"api_endpoint": "/authority-storage/authorities",
|
|
1068
|
-
"is_batch": False,
|
|
1069
|
-
"total_records": False,
|
|
1070
|
-
"addSnapshotId": False,
|
|
1071
|
-
"supports_upsert": False,
|
|
1072
|
-
},
|
|
1073
|
-
"SRS": {
|
|
1074
|
-
"object_name": "records",
|
|
1075
|
-
"api_endpoint": "/source-storage/batch/records",
|
|
1076
|
-
"is_batch": True,
|
|
1077
|
-
"total_records": True,
|
|
1078
|
-
"addSnapshotId": True,
|
|
1079
|
-
"supports_upsert": False,
|
|
1080
|
-
},
|
|
1081
1002
|
"Users": {
|
|
1082
1003
|
"object_name": "users",
|
|
1083
1004
|
"api_endpoint": "/user-import",
|
|
@@ -1124,7 +1045,7 @@ def get_human_readable(size, precision=2):
|
|
|
1124
1045
|
return "%.*f%s" % (precision, size, suffixes[suffix_index])
|
|
1125
1046
|
|
|
1126
1047
|
|
|
1127
|
-
def get_req_size(response:
|
|
1048
|
+
def get_req_size(response: "Response"):
|
|
1128
1049
|
size = response.request.method
|
|
1129
1050
|
size += str(response.request.url)
|
|
1130
1051
|
size += "\r\n".join(f"{k}{v}" for k, v in response.request.headers.items())
|