folio-data-import 0.2.8rc12__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of folio-data-import might be problematic. Click here for more details.
- {folio_data_import-0.2.8rc12 → folio_data_import-0.3.0}/PKG-INFO +2 -3
- {folio_data_import-0.2.8rc12 → folio_data_import-0.3.0}/pyproject.toml +2 -3
- {folio_data_import-0.2.8rc12 → folio_data_import-0.3.0}/src/folio_data_import/MARCDataImport.py +123 -133
- {folio_data_import-0.2.8rc12 → folio_data_import-0.3.0}/src/folio_data_import/UserImport.py +11 -11
- folio_data_import-0.3.0/src/folio_data_import/custom_exceptions.py +17 -0
- folio_data_import-0.3.0/src/folio_data_import/marc_preprocessors/_preprocessors.py +484 -0
- folio_data_import-0.2.8rc12/src/folio_data_import/marc_preprocessors/_preprocessors.py +0 -333
- {folio_data_import-0.2.8rc12 → folio_data_import-0.3.0}/LICENSE +0 -0
- {folio_data_import-0.2.8rc12 → folio_data_import-0.3.0}/README.md +0 -0
- {folio_data_import-0.2.8rc12 → folio_data_import-0.3.0}/src/folio_data_import/__init__.py +0 -0
- {folio_data_import-0.2.8rc12 → folio_data_import-0.3.0}/src/folio_data_import/__main__.py +0 -0
- {folio_data_import-0.2.8rc12 → folio_data_import-0.3.0}/src/folio_data_import/marc_preprocessors/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: folio_data_import
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: A python module to interact with the data importing capabilities of the open-source FOLIO ILS
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Brooks Travis
|
|
@@ -19,8 +19,7 @@ Requires-Dist: flake8-black (>=0.3.6,<0.4.0)
|
|
|
19
19
|
Requires-Dist: flake8-bugbear (>=24.8.19,<25.0.0)
|
|
20
20
|
Requires-Dist: flake8-docstrings (>=1.7.0,<2.0.0)
|
|
21
21
|
Requires-Dist: flake8-isort (>=6.1.1,<7.0.0)
|
|
22
|
-
Requires-Dist: folioclient (>=0.
|
|
23
|
-
Requires-Dist: httpx (>=0.27.2,<0.28.0)
|
|
22
|
+
Requires-Dist: folioclient (>=0.70.1,<0.71.0)
|
|
24
23
|
Requires-Dist: inquirer (>=3.4.0,<4.0.0)
|
|
25
24
|
Requires-Dist: pyhumps (>=3.8.0,<4.0.0)
|
|
26
25
|
Requires-Dist: pymarc (>=5.2.2,<6.0.0)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "folio_data_import"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.3.0"
|
|
4
4
|
description = "A python module to interact with the data importing capabilities of the open-source FOLIO ILS"
|
|
5
5
|
authors = ["Brooks Travis <brooks.travis@gmail.com>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -14,8 +14,7 @@ folio-user-import = "folio_data_import.UserImport:sync_main"
|
|
|
14
14
|
|
|
15
15
|
[tool.poetry.dependencies]
|
|
16
16
|
python = "^3.9"
|
|
17
|
-
folioclient = "^0.
|
|
18
|
-
httpx = "^0.27.2"
|
|
17
|
+
folioclient = "^0.70.1"
|
|
19
18
|
pymarc = "^5.2.2"
|
|
20
19
|
pyhumps = "^3.8.0"
|
|
21
20
|
inquirer = "^3.4.0"
|
{folio_data_import-0.2.8rc12 → folio_data_import-0.3.0}/src/folio_data_import/MARCDataImport.py
RENAMED
|
@@ -2,8 +2,8 @@ import argparse
|
|
|
2
2
|
import asyncio
|
|
3
3
|
import datetime
|
|
4
4
|
import glob
|
|
5
|
-
import importlib
|
|
6
5
|
import io
|
|
6
|
+
import json
|
|
7
7
|
import logging
|
|
8
8
|
import math
|
|
9
9
|
import os
|
|
@@ -15,7 +15,7 @@ from functools import cached_property
|
|
|
15
15
|
from getpass import getpass
|
|
16
16
|
from pathlib import Path
|
|
17
17
|
from time import sleep
|
|
18
|
-
from typing import List, Union
|
|
18
|
+
from typing import Any, BinaryIO, Callable, Dict, List, Union
|
|
19
19
|
|
|
20
20
|
import folioclient
|
|
21
21
|
import httpx
|
|
@@ -25,6 +25,9 @@ import tabulate
|
|
|
25
25
|
from humps import decamelize
|
|
26
26
|
from tqdm import tqdm
|
|
27
27
|
|
|
28
|
+
from folio_data_import.custom_exceptions import FolioDataImportBatchError
|
|
29
|
+
from folio_data_import.marc_preprocessors._preprocessors import MARCPreprocessor
|
|
30
|
+
|
|
28
31
|
try:
|
|
29
32
|
datetime_utc = datetime.UTC
|
|
30
33
|
except AttributeError:
|
|
@@ -63,7 +66,6 @@ class MARCImportJob:
|
|
|
63
66
|
import_profile_name (str): The name of the data import job profile to use.
|
|
64
67
|
batch_size (int): The number of source records to include in a record batch (default=10).
|
|
65
68
|
batch_delay (float): The number of seconds to wait between record batches (default=0).
|
|
66
|
-
consolidate (bool): Consolidate files into a single job. Default is one job for each file.
|
|
67
69
|
no_progress (bool): Disable progress bars (eg. for running in a CI environment).
|
|
68
70
|
"""
|
|
69
71
|
|
|
@@ -75,7 +77,6 @@ class MARCImportJob:
|
|
|
75
77
|
http_client: httpx.Client
|
|
76
78
|
current_file: List[Path]
|
|
77
79
|
record_batch: List[dict] = []
|
|
78
|
-
error_records: int = 0
|
|
79
80
|
last_current: int = 0
|
|
80
81
|
total_records_sent: int = 0
|
|
81
82
|
finished: bool = False
|
|
@@ -92,18 +93,17 @@ class MARCImportJob:
|
|
|
92
93
|
import_profile_name: str,
|
|
93
94
|
batch_size=10,
|
|
94
95
|
batch_delay=0,
|
|
95
|
-
marc_record_preprocessor=
|
|
96
|
-
|
|
96
|
+
marc_record_preprocessor: Union[List[Callable], str]=[],
|
|
97
|
+
preprocessor_args: Dict[str,Dict]={},
|
|
97
98
|
no_progress=False,
|
|
98
99
|
let_summary_fail=False,
|
|
99
100
|
split_files=False,
|
|
100
101
|
split_size=1000,
|
|
102
|
+
split_offset=0,
|
|
101
103
|
) -> None:
|
|
102
|
-
self.consolidate_files = consolidate
|
|
103
104
|
self.split_files = split_files
|
|
104
105
|
self.split_size = split_size
|
|
105
|
-
|
|
106
|
-
raise ValueError("Cannot consolidate and split files at the same time.")
|
|
106
|
+
self.split_offset = split_offset
|
|
107
107
|
self.no_progress = no_progress
|
|
108
108
|
self.let_summary_fail = let_summary_fail
|
|
109
109
|
self.folio_client: folioclient.FolioClient = folio_client
|
|
@@ -112,16 +112,14 @@ class MARCImportJob:
|
|
|
112
112
|
self.batch_size = batch_size
|
|
113
113
|
self.batch_delay = batch_delay
|
|
114
114
|
self.current_retry_timeout = None
|
|
115
|
-
self.marc_record_preprocessor = marc_record_preprocessor
|
|
115
|
+
self.marc_record_preprocessor: MARCPreprocessor = MARCPreprocessor(marc_record_preprocessor, **preprocessor_args)
|
|
116
116
|
|
|
117
117
|
async def do_work(self) -> None:
|
|
118
118
|
"""
|
|
119
119
|
Performs the necessary work for data import.
|
|
120
120
|
|
|
121
121
|
This method initializes an HTTP client, files to store records that fail to send,
|
|
122
|
-
and calls
|
|
123
|
-
it imports all the files specified in `import_files` as a single batch. Otherwise,
|
|
124
|
-
it imports each file as a separate import job.
|
|
122
|
+
and calls the appropriate method to import MARC files based on the configuration.
|
|
125
123
|
|
|
126
124
|
Returns:
|
|
127
125
|
None
|
|
@@ -146,27 +144,33 @@ class MARCImportJob:
|
|
|
146
144
|
self.failed_batches_file = failed_batches
|
|
147
145
|
logger.info(f"Writing failed batches to {self.failed_batches_file.name}")
|
|
148
146
|
self.http_client = http_client
|
|
149
|
-
if self.
|
|
150
|
-
|
|
151
|
-
await self.import_marc_file()
|
|
152
|
-
elif self.split_files:
|
|
153
|
-
for file in self.import_files:
|
|
154
|
-
with open(file, "rb") as f:
|
|
155
|
-
file_length = await self.read_total_records([f])
|
|
156
|
-
expected_batches = math.ceil(file_length /self.split_size)
|
|
157
|
-
logger.info(f"{file.name} contains {file_length} records. Splitting into {expected_batches} {self.split_size} record batches.")
|
|
158
|
-
zero_pad_parts = len(str(expected_batches)) if expected_batches > 1 else 2
|
|
159
|
-
for idx, batch in enumerate(self.split_marc_file(file, self.split_size), start=1):
|
|
160
|
-
batch.name = f"{file.name}_part{idx:0{zero_pad_parts}}"
|
|
161
|
-
self.current_file = [batch]
|
|
162
|
-
await self.import_marc_file()
|
|
163
|
-
self.move_file_to_complete(file)
|
|
147
|
+
if self.split_files:
|
|
148
|
+
await self.process_split_files()
|
|
164
149
|
else:
|
|
165
150
|
for file in self.import_files:
|
|
166
151
|
self.current_file = [file]
|
|
167
152
|
await self.import_marc_file()
|
|
168
153
|
await self.wrap_up()
|
|
169
154
|
|
|
155
|
+
async def process_split_files(self):
|
|
156
|
+
"""
|
|
157
|
+
Process the import of files in smaller batches.
|
|
158
|
+
This method is called when `split_files` is set to True.
|
|
159
|
+
It splits each file into smaller chunks and processes them one by one.
|
|
160
|
+
"""
|
|
161
|
+
for file in self.import_files:
|
|
162
|
+
with open(file, "rb") as f:
|
|
163
|
+
file_length = await self.read_total_records([f])
|
|
164
|
+
expected_batches = math.ceil(file_length /self.split_size)
|
|
165
|
+
logger.info(f"{file.name} contains {file_length} records. Splitting into {expected_batches} {self.split_size} record batches.")
|
|
166
|
+
zero_pad_parts = len(str(expected_batches)) if expected_batches > 1 else 2
|
|
167
|
+
for idx, batch in enumerate(self.split_marc_file(file, self.split_size), start=1):
|
|
168
|
+
if idx > self.split_offset:
|
|
169
|
+
batch.name = f"{file.name} (Part {idx:0{zero_pad_parts}})"
|
|
170
|
+
self.current_file = [batch]
|
|
171
|
+
await self.import_marc_file()
|
|
172
|
+
self.move_file_to_complete(file)
|
|
173
|
+
|
|
170
174
|
async def wrap_up(self) -> None:
|
|
171
175
|
"""
|
|
172
176
|
Wraps up the data import process.
|
|
@@ -214,7 +218,7 @@ class MARCImportJob:
|
|
|
214
218
|
)
|
|
215
219
|
self.current_retry_timeout = None
|
|
216
220
|
except (httpx.ConnectTimeout, httpx.ReadTimeout, httpx.HTTPStatusError) as e:
|
|
217
|
-
if not hasattr(e, "response") or e.response.status_code in [502, 504]:
|
|
221
|
+
if not hasattr(e, "response") or e.response.status_code in [502, 504, 401]:
|
|
218
222
|
error_text = e.response.text if hasattr(e, "response") else str(e)
|
|
219
223
|
logger.warning(f"SERVER ERROR fetching job status: {error_text}. Retrying.")
|
|
220
224
|
sleep(0.25)
|
|
@@ -276,7 +280,7 @@ class MARCImportJob:
|
|
|
276
280
|
"""
|
|
277
281
|
try:
|
|
278
282
|
create_job = self.http_client.post(
|
|
279
|
-
self.folio_client.
|
|
283
|
+
self.folio_client.gateway_url + "/change-manager/jobExecutions",
|
|
280
284
|
headers=self.folio_client.okapi_headers,
|
|
281
285
|
json={"sourceType": "ONLINE", "userId": self.folio_client.current_user},
|
|
282
286
|
)
|
|
@@ -325,7 +329,7 @@ class MARCImportJob:
|
|
|
325
329
|
The response from the HTTP request to set the job profile.
|
|
326
330
|
"""
|
|
327
331
|
set_job_profile = self.http_client.put(
|
|
328
|
-
self.folio_client.
|
|
332
|
+
self.folio_client.gateway_url
|
|
329
333
|
+ "/change-manager/jobExecutions/"
|
|
330
334
|
+ self.job_id
|
|
331
335
|
+ "/jobProfile",
|
|
@@ -350,7 +354,7 @@ class MARCImportJob:
|
|
|
350
354
|
raise e
|
|
351
355
|
|
|
352
356
|
@staticmethod
|
|
353
|
-
async def read_total_records(files) -> int:
|
|
357
|
+
async def read_total_records(files: List[BinaryIO]) -> int:
|
|
354
358
|
"""
|
|
355
359
|
Reads the total number of records from the given files.
|
|
356
360
|
|
|
@@ -379,17 +383,15 @@ class MARCImportJob:
|
|
|
379
383
|
"""
|
|
380
384
|
try:
|
|
381
385
|
post_batch = self.http_client.post(
|
|
382
|
-
self.folio_client.
|
|
386
|
+
self.folio_client.gateway_url
|
|
383
387
|
+ f"/change-manager/jobExecutions/{self.job_id}/records",
|
|
384
388
|
headers=self.folio_client.okapi_headers,
|
|
385
389
|
json=batch_payload,
|
|
386
390
|
)
|
|
387
|
-
# if batch_payload["recordsMetadata"]["last"]:
|
|
388
|
-
# logger.log(
|
|
389
|
-
# 25,
|
|
390
|
-
# f"Sending last batch of {batch_payload['recordsMetadata']['total']} records.",
|
|
391
|
-
# )
|
|
392
391
|
except (httpx.ConnectTimeout, httpx.ReadTimeout):
|
|
392
|
+
logger.warning(
|
|
393
|
+
f"CONNECTION ERROR posting batch {batch_payload['id']}. Retrying..."
|
|
394
|
+
)
|
|
393
395
|
sleep(0.25)
|
|
394
396
|
return await self.process_record_batch(batch_payload)
|
|
395
397
|
try:
|
|
@@ -397,20 +399,21 @@ class MARCImportJob:
|
|
|
397
399
|
self.total_records_sent += len(self.record_batch)
|
|
398
400
|
self.record_batch = []
|
|
399
401
|
self.pbar_sent.update(len(batch_payload["initialRecords"]))
|
|
400
|
-
except
|
|
402
|
+
except httpx.HTTPStatusError as e:
|
|
401
403
|
if (
|
|
402
|
-
|
|
403
|
-
): # TODO:
|
|
404
|
+
e.response.status_code in [500, 400, 422]
|
|
405
|
+
): # TODO: Update once we no longer have to support < Sunflower to just be 400
|
|
404
406
|
self.total_records_sent += len(self.record_batch)
|
|
405
407
|
self.record_batch = []
|
|
406
408
|
self.pbar_sent.update(len(batch_payload["initialRecords"]))
|
|
407
409
|
else:
|
|
408
|
-
logger.error("Error posting batch: " + str(e))
|
|
409
410
|
for record in self.record_batch:
|
|
410
411
|
self.failed_batches_file.write(record)
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
412
|
+
raise FolioDataImportBatchError(
|
|
413
|
+
batch_payload['id'],
|
|
414
|
+
f"{e}\n{e.response.text}",
|
|
415
|
+
e
|
|
416
|
+
)
|
|
414
417
|
await self.get_job_status()
|
|
415
418
|
sleep(self.batch_delay)
|
|
416
419
|
|
|
@@ -439,16 +442,12 @@ class MARCImportJob:
|
|
|
439
442
|
await self.create_batch_payload(
|
|
440
443
|
counter,
|
|
441
444
|
total_records,
|
|
442
|
-
|
|
443
|
-
== (total_records - self.error_records),
|
|
445
|
+
counter == total_records,
|
|
444
446
|
),
|
|
445
447
|
)
|
|
446
448
|
sleep(0.25)
|
|
447
449
|
if record:
|
|
448
|
-
|
|
449
|
-
record = await self.apply_marc_record_preprocessing(
|
|
450
|
-
record, self.marc_record_preprocessor
|
|
451
|
-
)
|
|
450
|
+
record = self.marc_record_preprocessor.do_work(record)
|
|
452
451
|
self.record_batch.append(record.as_marc())
|
|
453
452
|
counter += 1
|
|
454
453
|
else:
|
|
@@ -459,19 +458,18 @@ class MARCImportJob:
|
|
|
459
458
|
"",
|
|
460
459
|
)
|
|
461
460
|
self.bad_records_file.write(reader.current_chunk)
|
|
462
|
-
if self.record_batch:
|
|
463
|
-
await self.process_record_batch(
|
|
464
|
-
await self.create_batch_payload(
|
|
465
|
-
counter,
|
|
466
|
-
total_records,
|
|
467
|
-
(counter - self.error_records)
|
|
468
|
-
== (total_records - self.error_records),
|
|
469
|
-
),
|
|
470
|
-
)
|
|
471
461
|
if not self.split_files:
|
|
472
462
|
self.move_file_to_complete(file_path)
|
|
463
|
+
if self.record_batch or not self.finished:
|
|
464
|
+
await self.process_record_batch(
|
|
465
|
+
await self.create_batch_payload(
|
|
466
|
+
counter,
|
|
467
|
+
total_records,
|
|
468
|
+
counter == total_records,
|
|
469
|
+
),
|
|
470
|
+
)
|
|
473
471
|
|
|
474
|
-
def move_file_to_complete(self, file_path):
|
|
472
|
+
def move_file_to_complete(self, file_path: Path):
|
|
475
473
|
import_complete_path = file_path.parent.joinpath("import_complete")
|
|
476
474
|
if not import_complete_path.exists():
|
|
477
475
|
logger.debug(f"Creating import_complete directory: {import_complete_path.absolute()}")
|
|
@@ -481,58 +479,6 @@ class MARCImportJob:
|
|
|
481
479
|
file_path.parent.joinpath("import_complete", file_path.name)
|
|
482
480
|
)
|
|
483
481
|
|
|
484
|
-
@staticmethod
|
|
485
|
-
async def apply_marc_record_preprocessing(
|
|
486
|
-
record: pymarc.Record, func_or_path
|
|
487
|
-
) -> pymarc.Record:
|
|
488
|
-
"""
|
|
489
|
-
Apply preprocessing to the MARC record before sending it to FOLIO.
|
|
490
|
-
|
|
491
|
-
Args:
|
|
492
|
-
record (pymarc.Record): The MARC record to preprocess.
|
|
493
|
-
func_or_path (Union[Callable, str]): The preprocessing function or its import path.
|
|
494
|
-
|
|
495
|
-
Returns:
|
|
496
|
-
pymarc.Record: The preprocessed MARC record.
|
|
497
|
-
"""
|
|
498
|
-
if isinstance(func_or_path, str):
|
|
499
|
-
func_paths = func_or_path.split(",")
|
|
500
|
-
for func_path in func_paths:
|
|
501
|
-
record = await MARCImportJob._apply_single_marc_record_preprocessing_by_path(
|
|
502
|
-
record, func_path
|
|
503
|
-
)
|
|
504
|
-
elif callable(func_or_path):
|
|
505
|
-
record = func_or_path(record)
|
|
506
|
-
else:
|
|
507
|
-
logger.warning(
|
|
508
|
-
f"Invalid preprocessing function: {func_or_path}. Skipping preprocessing."
|
|
509
|
-
)
|
|
510
|
-
return record
|
|
511
|
-
|
|
512
|
-
async def _apply_single_marc_record_preprocessing_by_path(
|
|
513
|
-
record: pymarc.Record, func_path: str
|
|
514
|
-
) -> pymarc.Record:
|
|
515
|
-
"""
|
|
516
|
-
Apply a single preprocessing function to the MARC record.
|
|
517
|
-
|
|
518
|
-
Args:
|
|
519
|
-
record (pymarc.Record): The MARC record to preprocess.
|
|
520
|
-
func_path (str): The path to the preprocessing function.
|
|
521
|
-
|
|
522
|
-
Returns:
|
|
523
|
-
pymarc.Record: The preprocessed MARC record.
|
|
524
|
-
"""
|
|
525
|
-
try:
|
|
526
|
-
module_path, func_name = func_path.rsplit(".", 1)
|
|
527
|
-
module = importlib.import_module(module_path)
|
|
528
|
-
func = getattr(module, func_name)
|
|
529
|
-
record = func(record)
|
|
530
|
-
except Exception as e:
|
|
531
|
-
logger.warning(
|
|
532
|
-
f"Error applying preprocessing function {func_path}: {e}. Skipping."
|
|
533
|
-
)
|
|
534
|
-
return record
|
|
535
|
-
|
|
536
482
|
async def create_batch_payload(self, counter, total_records, is_last) -> dict:
|
|
537
483
|
"""
|
|
538
484
|
Create a batch payload for data import.
|
|
@@ -549,9 +495,9 @@ class MARCImportJob:
|
|
|
549
495
|
"id": str(uuid.uuid4()),
|
|
550
496
|
"recordsMetadata": {
|
|
551
497
|
"last": is_last,
|
|
552
|
-
"counter": counter
|
|
498
|
+
"counter": counter,
|
|
553
499
|
"contentType": "MARC_RAW",
|
|
554
|
-
"total": total_records
|
|
500
|
+
"total": total_records,
|
|
555
501
|
},
|
|
556
502
|
"initialRecords": [{"record": x.decode()} for x in self.record_batch],
|
|
557
503
|
}
|
|
@@ -646,17 +592,47 @@ class MARCImportJob:
|
|
|
646
592
|
disable=self.no_progress,
|
|
647
593
|
) as pbar_sent,
|
|
648
594
|
):
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
595
|
+
try:
|
|
596
|
+
self.pbar_sent = pbar_sent
|
|
597
|
+
self.pbar_imported = pbar_imported
|
|
598
|
+
await self.process_records(files, total_records)
|
|
599
|
+
while not self.finished:
|
|
600
|
+
await self.get_job_status()
|
|
601
|
+
sleep(1)
|
|
602
|
+
except FolioDataImportBatchError as e:
|
|
603
|
+
logger.error(
|
|
604
|
+
f"Unhandled error posting batch {e.batch_id}: {e.message}"
|
|
605
|
+
)
|
|
606
|
+
await self.cancel_job()
|
|
607
|
+
raise e
|
|
655
608
|
if self.finished:
|
|
656
609
|
await self.log_job_summary()
|
|
657
610
|
self.last_current = 0
|
|
658
611
|
self.finished = False
|
|
659
612
|
|
|
613
|
+
async def cancel_job(self) -> None:
|
|
614
|
+
"""
|
|
615
|
+
Cancels the current job execution.
|
|
616
|
+
|
|
617
|
+
This method sends a request to cancel the job execution and logs the result.
|
|
618
|
+
|
|
619
|
+
Returns:
|
|
620
|
+
None
|
|
621
|
+
"""
|
|
622
|
+
try:
|
|
623
|
+
cancel = self.http_client.delete(
|
|
624
|
+
self.folio_client.gateway_url
|
|
625
|
+
+ f"/change-manager/jobExecutions/{self.job_id}/records",
|
|
626
|
+
headers=self.folio_client.okapi_headers,
|
|
627
|
+
)
|
|
628
|
+
cancel.raise_for_status()
|
|
629
|
+
self.finished = True
|
|
630
|
+
logger.info(f"Cancelled job: {self.job_id}")
|
|
631
|
+
except (httpx.ConnectTimeout, httpx.ReadTimeout):
|
|
632
|
+
logger.warning(f"CONNECTION ERROR cancelling job {self.job_id}. Retrying...")
|
|
633
|
+
sleep(0.25)
|
|
634
|
+
await self.cancel_job()
|
|
635
|
+
|
|
660
636
|
async def log_job_summary(self):
|
|
661
637
|
if job_summary := await self.get_job_summary():
|
|
662
638
|
job_id = job_summary.pop("jobExecutionId", None)
|
|
@@ -835,17 +811,8 @@ async def main() -> None:
|
|
|
835
811
|
),
|
|
836
812
|
default=None,
|
|
837
813
|
)
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
group.add_argument(
|
|
841
|
-
"--consolidate",
|
|
842
|
-
action="store_true",
|
|
843
|
-
help=(
|
|
844
|
-
"Consolidate records into a single job. "
|
|
845
|
-
"Default is to create a new job for each MARC file."
|
|
846
|
-
),
|
|
847
|
-
)
|
|
848
|
-
group.add_argument(
|
|
814
|
+
|
|
815
|
+
parser.add_argument(
|
|
849
816
|
"--split-files",
|
|
850
817
|
action="store_true",
|
|
851
818
|
help="Split files into smaller parts before importing.",
|
|
@@ -856,6 +823,12 @@ async def main() -> None:
|
|
|
856
823
|
help="The number of records to include in each split file.",
|
|
857
824
|
default=1000,
|
|
858
825
|
)
|
|
826
|
+
parser.add_argument(
|
|
827
|
+
"--split-offset",
|
|
828
|
+
type=int,
|
|
829
|
+
help="The number of record batches of <split-size> to skip before starting import.",
|
|
830
|
+
default=0,
|
|
831
|
+
)
|
|
859
832
|
|
|
860
833
|
parser.add_argument(
|
|
861
834
|
"--no-progress",
|
|
@@ -867,6 +840,16 @@ async def main() -> None:
|
|
|
867
840
|
action="store_true",
|
|
868
841
|
help="Do not retry fetching the final job summary if it fails",
|
|
869
842
|
)
|
|
843
|
+
parser.add_argument(
|
|
844
|
+
"--preprocessor-config",
|
|
845
|
+
type=str,
|
|
846
|
+
help=(
|
|
847
|
+
"JSON file containing configuration for preprocessor functions. "
|
|
848
|
+
"This is passed to MARCPreprocessor class as a dict of dicts."
|
|
849
|
+
),
|
|
850
|
+
default=None,
|
|
851
|
+
)
|
|
852
|
+
|
|
870
853
|
args = parser.parse_args()
|
|
871
854
|
if not args.password:
|
|
872
855
|
args.password = getpass("Enter FOLIO password: ")
|
|
@@ -891,6 +874,12 @@ async def main() -> None:
|
|
|
891
874
|
else:
|
|
892
875
|
logger.info(marc_files)
|
|
893
876
|
|
|
877
|
+
if args.preprocessor_config:
|
|
878
|
+
with open(args.preprocessor_config, "r") as f:
|
|
879
|
+
preprocessor_args = json.load(f)
|
|
880
|
+
else:
|
|
881
|
+
preprocessor_args = {}
|
|
882
|
+
|
|
894
883
|
if not args.import_profile_name:
|
|
895
884
|
import_profiles = folio_client.folio_get(
|
|
896
885
|
"/data-import-profiles/jobProfiles",
|
|
@@ -919,11 +908,12 @@ async def main() -> None:
|
|
|
919
908
|
batch_size=args.batch_size,
|
|
920
909
|
batch_delay=args.batch_delay,
|
|
921
910
|
marc_record_preprocessor=args.preprocessor,
|
|
922
|
-
|
|
911
|
+
preprocessor_args=preprocessor_args,
|
|
923
912
|
no_progress=bool(args.no_progress),
|
|
924
913
|
let_summary_fail=bool(args.let_summary_fail),
|
|
925
914
|
split_files=bool(args.split_files),
|
|
926
915
|
split_size=args.split_size,
|
|
916
|
+
split_offset=args.split_offset,
|
|
927
917
|
).do_work()
|
|
928
918
|
except Exception as e:
|
|
929
919
|
logger.error("Error importing files: " + str(e))
|
|
@@ -137,7 +137,7 @@ class UserImporter: # noqa: R0902
|
|
|
137
137
|
match_key = "id" if ("id" in user_obj) else self.match_key
|
|
138
138
|
try:
|
|
139
139
|
existing_user = await self.http_client.get(
|
|
140
|
-
self.folio_client.
|
|
140
|
+
self.folio_client.gateway_url + "/users",
|
|
141
141
|
headers=self.folio_client.okapi_headers,
|
|
142
142
|
params={"query": f"{match_key}=={user_obj[match_key]}"},
|
|
143
143
|
)
|
|
@@ -161,7 +161,7 @@ class UserImporter: # noqa: R0902
|
|
|
161
161
|
"""
|
|
162
162
|
try:
|
|
163
163
|
existing_rp = await self.http_client.get(
|
|
164
|
-
self.folio_client.
|
|
164
|
+
self.folio_client.gateway_url
|
|
165
165
|
+ "/request-preference-storage/request-preference",
|
|
166
166
|
headers=self.folio_client.okapi_headers,
|
|
167
167
|
params={
|
|
@@ -188,7 +188,7 @@ class UserImporter: # noqa: R0902
|
|
|
188
188
|
"""
|
|
189
189
|
try:
|
|
190
190
|
existing_pu = await self.http_client.get(
|
|
191
|
-
self.folio_client.
|
|
191
|
+
self.folio_client.gateway_url + "/perms/users",
|
|
192
192
|
headers=self.folio_client.okapi_headers,
|
|
193
193
|
params={
|
|
194
194
|
"query": f"userId=={existing_user.get('id', user_obj.get('id', ''))}"
|
|
@@ -369,7 +369,7 @@ class UserImporter: # noqa: R0902
|
|
|
369
369
|
else:
|
|
370
370
|
existing_user[key] = value
|
|
371
371
|
create_update_user = await self.http_client.put(
|
|
372
|
-
self.folio_client.
|
|
372
|
+
self.folio_client.gateway_url + f"/users/{existing_user['id']}",
|
|
373
373
|
headers=self.folio_client.okapi_headers,
|
|
374
374
|
json=existing_user,
|
|
375
375
|
)
|
|
@@ -389,7 +389,7 @@ class UserImporter: # noqa: R0902
|
|
|
389
389
|
HTTPError: If the HTTP request to create the user fails.
|
|
390
390
|
"""
|
|
391
391
|
response = await self.http_client.post(
|
|
392
|
-
self.folio_client.
|
|
392
|
+
self.folio_client.gateway_url + "/users",
|
|
393
393
|
headers=self.folio_client.okapi_headers,
|
|
394
394
|
json=user_obj,
|
|
395
395
|
)
|
|
@@ -589,7 +589,7 @@ class UserImporter: # noqa: R0902
|
|
|
589
589
|
rp_obj["userId"] = new_user_obj["id"]
|
|
590
590
|
# print(rp_obj)
|
|
591
591
|
response = await self.http_client.post(
|
|
592
|
-
self.folio_client.
|
|
592
|
+
self.folio_client.gateway_url
|
|
593
593
|
+ "/request-preference-storage/request-preference",
|
|
594
594
|
headers=self.folio_client.okapi_headers,
|
|
595
595
|
json=rp_obj,
|
|
@@ -613,7 +613,7 @@ class UserImporter: # noqa: R0902
|
|
|
613
613
|
existing_rp.update(rp_obj)
|
|
614
614
|
# print(existing_rp)
|
|
615
615
|
response = await self.http_client.put(
|
|
616
|
-
self.folio_client.
|
|
616
|
+
self.folio_client.gateway_url
|
|
617
617
|
+ f"/request-preference-storage/request-preference/{existing_rp['id']}",
|
|
618
618
|
headers=self.folio_client.okapi_headers,
|
|
619
619
|
json=existing_rp,
|
|
@@ -635,7 +635,7 @@ class UserImporter: # noqa: R0902
|
|
|
635
635
|
"""
|
|
636
636
|
perms_user_obj = {"userId": new_user_obj["id"], "permissions": []}
|
|
637
637
|
response = await self.http_client.post(
|
|
638
|
-
self.folio_client.
|
|
638
|
+
self.folio_client.gateway_url + "/perms/users",
|
|
639
639
|
headers=self.folio_client.okapi_headers,
|
|
640
640
|
json=perms_user_obj,
|
|
641
641
|
)
|
|
@@ -788,7 +788,7 @@ class UserImporter: # noqa: R0902
|
|
|
788
788
|
"""
|
|
789
789
|
try:
|
|
790
790
|
existing_spu = await self.http_client.get(
|
|
791
|
-
self.folio_client.
|
|
791
|
+
self.folio_client.gateway_url + "/service-points-users",
|
|
792
792
|
headers=self.folio_client.okapi_headers,
|
|
793
793
|
params={"query": f"userId=={existing_user['id']}"},
|
|
794
794
|
)
|
|
@@ -812,7 +812,7 @@ class UserImporter: # noqa: R0902
|
|
|
812
812
|
"""
|
|
813
813
|
spu_obj["userId"] = existing_user["id"]
|
|
814
814
|
response = await self.http_client.post(
|
|
815
|
-
self.folio_client.
|
|
815
|
+
self.folio_client.gateway_url + "/service-points-users",
|
|
816
816
|
headers=self.folio_client.okapi_headers,
|
|
817
817
|
json=spu_obj,
|
|
818
818
|
)
|
|
@@ -831,7 +831,7 @@ class UserImporter: # noqa: R0902
|
|
|
831
831
|
"""
|
|
832
832
|
existing_spu.update(spu_obj)
|
|
833
833
|
response = await self.http_client.put(
|
|
834
|
-
self.folio_client.
|
|
834
|
+
self.folio_client.gateway_url + f"/service-points-users/{existing_spu['id']}",
|
|
835
835
|
headers=self.folio_client.okapi_headers,
|
|
836
836
|
json=existing_spu,
|
|
837
837
|
)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Custom exceptions for the Folio Data Import module."""
|
|
2
|
+
|
|
3
|
+
class FolioDataImportError(Exception):
|
|
4
|
+
"""Base class for all exceptions in the Folio Data Import module."""
|
|
5
|
+
pass
|
|
6
|
+
|
|
7
|
+
class FolioDataImportBatchError(FolioDataImportError):
|
|
8
|
+
"""Exception raised for errors in the Folio Data Import batch process.
|
|
9
|
+
|
|
10
|
+
Attributes:
|
|
11
|
+
batch_id -- ID of the batch that caused the error
|
|
12
|
+
message -- explanation of the error
|
|
13
|
+
"""
|
|
14
|
+
def __init__(self, batch_id, message, exception=None):
|
|
15
|
+
self.batch_id = batch_id
|
|
16
|
+
self.message = message
|
|
17
|
+
super().__init__(f"Unhandled error posting batch {batch_id}: {message}")
|