folio-data-import 0.3.2__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of folio-data-import might be problematic. Click here for more details.
- folio_data_import/MARCDataImport.py +295 -159
- folio_data_import/UserImport.py +386 -255
- folio_data_import/__main__.py +7 -110
- folio_data_import/_progress.py +27 -0
- folio_data_import/marc_preprocessors/_preprocessors.py +12 -8
- {folio_data_import-0.3.2.dist-info → folio_data_import-0.4.1.dist-info}/METADATA +58 -7
- folio_data_import-0.4.1.dist-info/RECORD +13 -0
- {folio_data_import-0.3.2.dist-info → folio_data_import-0.4.1.dist-info}/WHEEL +1 -1
- folio_data_import-0.4.1.dist-info/entry_points.txt +5 -0
- folio_data_import-0.3.2.dist-info/RECORD +0 -12
- folio_data_import-0.3.2.dist-info/entry_points.txt +0 -5
- {folio_data_import-0.3.2.dist-info → folio_data_import-0.4.1.dist-info/licenses}/LICENSE +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import
|
|
1
|
+
import typer
|
|
2
2
|
import asyncio
|
|
3
3
|
import datetime
|
|
4
4
|
import glob
|
|
@@ -12,10 +12,10 @@ import uuid
|
|
|
12
12
|
from contextlib import ExitStack
|
|
13
13
|
from datetime import datetime as dt
|
|
14
14
|
from functools import cached_property
|
|
15
|
-
from getpass import getpass
|
|
16
15
|
from pathlib import Path
|
|
17
16
|
from time import sleep
|
|
18
17
|
from typing import BinaryIO, Callable, Dict, List, Union
|
|
18
|
+
from typing_extensions import Annotated
|
|
19
19
|
|
|
20
20
|
import folioclient
|
|
21
21
|
import httpx
|
|
@@ -23,10 +23,21 @@ import inquirer
|
|
|
23
23
|
import pymarc
|
|
24
24
|
import tabulate
|
|
25
25
|
from humps import decamelize
|
|
26
|
-
from
|
|
27
|
-
|
|
28
|
-
|
|
26
|
+
from rich.progress import (
|
|
27
|
+
Progress,
|
|
28
|
+
TimeElapsedColumn,
|
|
29
|
+
BarColumn,
|
|
30
|
+
TimeRemainingColumn,
|
|
31
|
+
SpinnerColumn,
|
|
32
|
+
MofNCompleteColumn,
|
|
33
|
+
)
|
|
34
|
+
from rich.logging import RichHandler
|
|
35
|
+
from folio_data_import.custom_exceptions import (
|
|
36
|
+
FolioDataImportBatchError,
|
|
37
|
+
FolioDataImportJobError,
|
|
38
|
+
)
|
|
29
39
|
from folio_data_import.marc_preprocessors._preprocessors import MARCPreprocessor
|
|
40
|
+
from folio_data_import._progress import ItemsPerSecondColumn
|
|
30
41
|
|
|
31
42
|
try:
|
|
32
43
|
datetime_utc = datetime.UTC
|
|
@@ -71,20 +82,32 @@ class MARCImportJob:
|
|
|
71
82
|
batch_size (int): The number of source records to include in a record batch (default=10).
|
|
72
83
|
batch_delay (float): The number of seconds to wait between record batches (default=0).
|
|
73
84
|
no_progress (bool): Disable progress bars (eg. for running in a CI environment).
|
|
85
|
+
marc_record_preprocessor (list or str): A list of callables or a string representing
|
|
86
|
+
the MARC record preprocessor(s) to apply to each record before import.
|
|
87
|
+
preprocessor_args (dict): A dictionary of arguments to pass to the MARC record preprocessor(s).
|
|
88
|
+
let_summary_fail (bool): If True, will not retry or fail the import if the final job summary
|
|
89
|
+
cannot be retrieved (default=False).
|
|
90
|
+
split_files (bool): If True, will split each file into smaller jobs of size `split_size`
|
|
91
|
+
split_size (int): The number of records to include in each split file (default=1000).
|
|
92
|
+
split_offset (int): The number of split files to skip before starting processing (default=0).
|
|
93
|
+
job_ids_file_path (str): The path to the file where job IDs will be saved (default="marc_import_job_ids.txt").
|
|
94
|
+
show_file_names_in_data_import_logs (bool): If True, will set the file name for each job in the data import logs.
|
|
74
95
|
"""
|
|
75
96
|
|
|
76
97
|
bad_records_file: io.TextIOWrapper
|
|
77
98
|
failed_batches_file: io.TextIOWrapper
|
|
78
99
|
job_id: str
|
|
79
|
-
|
|
80
|
-
|
|
100
|
+
progress: Progress
|
|
101
|
+
pbar_sent: int
|
|
102
|
+
pbar_imported: int
|
|
81
103
|
http_client: httpx.Client
|
|
82
104
|
current_file: List[Path]
|
|
83
|
-
record_batch: List[dict]
|
|
105
|
+
record_batch: List[dict]
|
|
84
106
|
last_current: int = 0
|
|
85
107
|
total_records_sent: int = 0
|
|
86
108
|
finished: bool = False
|
|
87
109
|
job_id: str = ""
|
|
110
|
+
job_ids: List[str]
|
|
88
111
|
job_hrid: int = 0
|
|
89
112
|
current_file: Union[List[Path], List[io.BytesIO]] = []
|
|
90
113
|
_max_summary_retries: int = 2
|
|
@@ -99,13 +122,15 @@ class MARCImportJob:
|
|
|
99
122
|
import_profile_name: str,
|
|
100
123
|
batch_size=10,
|
|
101
124
|
batch_delay=0,
|
|
102
|
-
marc_record_preprocessor: Union[List[Callable], str] =
|
|
103
|
-
preprocessor_args: Dict[str, Dict] =
|
|
125
|
+
marc_record_preprocessor: Union[List[Callable], str] = None,
|
|
126
|
+
preprocessor_args: Dict[str, Dict] = None,
|
|
104
127
|
no_progress=False,
|
|
105
128
|
let_summary_fail=False,
|
|
106
129
|
split_files=False,
|
|
107
130
|
split_size=1000,
|
|
108
131
|
split_offset=0,
|
|
132
|
+
job_ids_file_path: str = "",
|
|
133
|
+
show_file_names_in_data_import_logs: bool = False,
|
|
109
134
|
) -> None:
|
|
110
135
|
self.split_files = split_files
|
|
111
136
|
self.split_size = split_size
|
|
@@ -121,6 +146,10 @@ class MARCImportJob:
|
|
|
121
146
|
self.marc_record_preprocessor: MARCPreprocessor = MARCPreprocessor(
|
|
122
147
|
marc_record_preprocessor, **preprocessor_args
|
|
123
148
|
)
|
|
149
|
+
self.job_ids_file_path = job_ids_file_path or self.import_files[
|
|
150
|
+
0
|
|
151
|
+
].parent.joinpath("marc_import_job_ids.txt")
|
|
152
|
+
self.show_file_names_in_data_import_logs = show_file_names_in_data_import_logs
|
|
124
153
|
|
|
125
154
|
async def do_work(self) -> None:
|
|
126
155
|
"""
|
|
@@ -132,6 +161,8 @@ class MARCImportJob:
|
|
|
132
161
|
Returns:
|
|
133
162
|
None
|
|
134
163
|
"""
|
|
164
|
+
self.record_batch = []
|
|
165
|
+
self.job_ids = []
|
|
135
166
|
with (
|
|
136
167
|
httpx.Client() as http_client,
|
|
137
168
|
open(
|
|
@@ -158,7 +189,6 @@ class MARCImportJob:
|
|
|
158
189
|
for file in self.import_files:
|
|
159
190
|
self.current_file = [file]
|
|
160
191
|
await self.import_marc_file()
|
|
161
|
-
await self.wrap_up()
|
|
162
192
|
|
|
163
193
|
async def process_split_files(self):
|
|
164
194
|
"""
|
|
@@ -201,6 +231,10 @@ class MARCImportJob:
|
|
|
201
231
|
if not failed_batches.read(1):
|
|
202
232
|
os.remove(failed_batches.name)
|
|
203
233
|
logger.info("No failed batches. Removing failed batches file.")
|
|
234
|
+
with open(self.job_ids_file_path, "a+") as job_ids_file:
|
|
235
|
+
logger.info(f"Writing job IDs to {self.job_ids_file_path}")
|
|
236
|
+
for job_id in self.job_ids:
|
|
237
|
+
job_ids_file.write(f"{job_id}\n")
|
|
204
238
|
logger.info("Import complete.")
|
|
205
239
|
logger.info(f"Total records imported: {self.total_records_sent}")
|
|
206
240
|
|
|
@@ -256,7 +290,10 @@ class MARCImportJob:
|
|
|
256
290
|
status = [
|
|
257
291
|
job for job in job_status["jobExecutions"] if job["id"] == self.job_id
|
|
258
292
|
][0]
|
|
259
|
-
self.
|
|
293
|
+
self.progress.update(
|
|
294
|
+
self.pbar_imported,
|
|
295
|
+
advance=status["progress"]["current"] - self.last_current,
|
|
296
|
+
)
|
|
260
297
|
self.last_current = status["progress"]["current"]
|
|
261
298
|
except (IndexError, ValueError, KeyError):
|
|
262
299
|
logger.debug(
|
|
@@ -272,8 +309,9 @@ class MARCImportJob:
|
|
|
272
309
|
for job in job_status["jobExecutions"]
|
|
273
310
|
if job["id"] == self.job_id
|
|
274
311
|
][0]
|
|
275
|
-
self.
|
|
276
|
-
|
|
312
|
+
self.progress.update(
|
|
313
|
+
self.pbar_imported,
|
|
314
|
+
advance=status["progress"]["current"] - self.last_current,
|
|
277
315
|
)
|
|
278
316
|
self.last_current = status["progress"]["current"]
|
|
279
317
|
self.finished = True
|
|
@@ -297,6 +335,40 @@ class MARCImportJob:
|
|
|
297
335
|
else:
|
|
298
336
|
raise e
|
|
299
337
|
|
|
338
|
+
async def set_job_file_name(self) -> None:
|
|
339
|
+
"""
|
|
340
|
+
Sets the file name for the current job execution.
|
|
341
|
+
|
|
342
|
+
Returns:
|
|
343
|
+
None
|
|
344
|
+
"""
|
|
345
|
+
try:
|
|
346
|
+
job_object = self.http_client.get(
|
|
347
|
+
self.folio_client.gateway_url
|
|
348
|
+
+ "/change-manager/jobExecutions/"
|
|
349
|
+
+ self.job_id,
|
|
350
|
+
headers=self.folio_client.okapi_headers,
|
|
351
|
+
)
|
|
352
|
+
job_object.raise_for_status()
|
|
353
|
+
job_object_json = job_object.json()
|
|
354
|
+
job_object_json.update({"fileName": self.current_file[0].name})
|
|
355
|
+
set_file_name = self.http_client.put(
|
|
356
|
+
self.folio_client.gateway_url
|
|
357
|
+
+ "/change-manager/jobExecutions/"
|
|
358
|
+
+ self.job_id,
|
|
359
|
+
headers=self.folio_client.okapi_headers,
|
|
360
|
+
json=job_object_json,
|
|
361
|
+
)
|
|
362
|
+
set_file_name.raise_for_status()
|
|
363
|
+
except httpx.HTTPError as e:
|
|
364
|
+
logger.error(
|
|
365
|
+
"Error setting job file name: "
|
|
366
|
+
+ str(e)
|
|
367
|
+
+ "\n"
|
|
368
|
+
+ getattr(getattr(e, "response", ""), "text", "")
|
|
369
|
+
)
|
|
370
|
+
raise e
|
|
371
|
+
|
|
300
372
|
async def create_folio_import_job(self) -> None:
|
|
301
373
|
"""
|
|
302
374
|
Creates a job execution for importing data into FOLIO.
|
|
@@ -328,6 +400,9 @@ class MARCImportJob:
|
|
|
328
400
|
)
|
|
329
401
|
raise e
|
|
330
402
|
self.job_id = create_job.json()["parentJobExecutionId"]
|
|
403
|
+
if self.show_file_names_in_data_import_logs:
|
|
404
|
+
await self.set_job_file_name()
|
|
405
|
+
self.job_ids.append(self.job_id)
|
|
331
406
|
logger.info(f"Created job: {self.job_id}")
|
|
332
407
|
|
|
333
408
|
@cached_property
|
|
@@ -357,6 +432,9 @@ class MARCImportJob:
|
|
|
357
432
|
Returns:
|
|
358
433
|
The response from the HTTP request to set the job profile.
|
|
359
434
|
"""
|
|
435
|
+
logger.info(
|
|
436
|
+
f"Setting job profile: {self.import_profile['name']} ({self.import_profile['id']}) for job {self.job_id}"
|
|
437
|
+
)
|
|
360
438
|
set_job_profile = self.http_client.put(
|
|
361
439
|
self.folio_client.gateway_url
|
|
362
440
|
+ "/change-manager/jobExecutions/"
|
|
@@ -427,14 +505,18 @@ class MARCImportJob:
|
|
|
427
505
|
post_batch.raise_for_status()
|
|
428
506
|
self.total_records_sent += len(self.record_batch)
|
|
429
507
|
self.record_batch = []
|
|
430
|
-
self.
|
|
508
|
+
self.progress.update(
|
|
509
|
+
self.pbar_sent, advance=len(batch_payload["initialRecords"])
|
|
510
|
+
)
|
|
431
511
|
except httpx.HTTPStatusError as e:
|
|
432
512
|
if (
|
|
433
513
|
e.response.status_code in [500, 400, 422]
|
|
434
514
|
): # TODO: Update once we no longer have to support < Sunflower to just be 400
|
|
435
515
|
self.total_records_sent += len(self.record_batch)
|
|
436
516
|
self.record_batch = []
|
|
437
|
-
self.
|
|
517
|
+
self.progress.update(
|
|
518
|
+
self.pbar_sent, advance=len(batch_payload["initialRecords"])
|
|
519
|
+
)
|
|
438
520
|
else:
|
|
439
521
|
for record in self.record_batch:
|
|
440
522
|
self.failed_batches_file.write(record)
|
|
@@ -459,8 +541,9 @@ class MARCImportJob:
|
|
|
459
541
|
counter = 0
|
|
460
542
|
for import_file in files:
|
|
461
543
|
file_path = Path(import_file.name)
|
|
462
|
-
self.
|
|
463
|
-
|
|
544
|
+
self.progress.update(
|
|
545
|
+
self.pbar_sent,
|
|
546
|
+
description=f"Sent ({os.path.basename(import_file.name)}): ",
|
|
464
547
|
)
|
|
465
548
|
reader = pymarc.MARCReader(import_file, hide_utf8_warnings=True)
|
|
466
549
|
for idx, record in enumerate(reader, start=1):
|
|
@@ -609,22 +692,30 @@ class MARCImportJob:
|
|
|
609
692
|
raise e
|
|
610
693
|
total_records = await self.read_total_records(files)
|
|
611
694
|
with (
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
695
|
+
Progress(
|
|
696
|
+
"{task.description}",
|
|
697
|
+
SpinnerColumn(),
|
|
698
|
+
BarColumn(),
|
|
699
|
+
MofNCompleteColumn(),
|
|
700
|
+
"[",
|
|
701
|
+
TimeElapsedColumn(),
|
|
702
|
+
"<",
|
|
703
|
+
TimeRemainingColumn(),
|
|
704
|
+
"/",
|
|
705
|
+
ItemsPerSecondColumn(),
|
|
706
|
+
"]",
|
|
707
|
+
) as import_progress,
|
|
624
708
|
):
|
|
709
|
+
self.progress = import_progress
|
|
625
710
|
try:
|
|
626
|
-
self.pbar_sent =
|
|
627
|
-
|
|
711
|
+
self.pbar_sent = self.progress.add_task(
|
|
712
|
+
"Sent: ", total=total_records, visible=not self.no_progress
|
|
713
|
+
)
|
|
714
|
+
self.pbar_imported = self.progress.add_task(
|
|
715
|
+
f"Imported: ({self.job_hrid})",
|
|
716
|
+
total=total_records,
|
|
717
|
+
visible=not self.no_progress,
|
|
718
|
+
)
|
|
628
719
|
await self.process_records(files, total_records)
|
|
629
720
|
while not self.finished:
|
|
630
721
|
await self.get_job_status()
|
|
@@ -785,7 +876,12 @@ def set_up_cli_logging():
|
|
|
785
876
|
isinstance(h, logging.StreamHandler) and h.stream == sys.stderr
|
|
786
877
|
for h in logger.handlers
|
|
787
878
|
):
|
|
788
|
-
stream_handler =
|
|
879
|
+
stream_handler = RichHandler(
|
|
880
|
+
show_level=False,
|
|
881
|
+
show_time=False,
|
|
882
|
+
omit_repeated_times=False,
|
|
883
|
+
show_path=False,
|
|
884
|
+
)
|
|
789
885
|
stream_handler.setLevel(logging.INFO)
|
|
790
886
|
stream_handler.addFilter(ExcludeLevelFilter(DATA_ISSUE_LVL_NUM))
|
|
791
887
|
# stream_handler.addFilter(ExcludeLevelFilter(25))
|
|
@@ -807,165 +903,208 @@ def set_up_cli_logging():
|
|
|
807
903
|
logging.getLogger("httpx").setLevel(logging.WARNING)
|
|
808
904
|
|
|
809
905
|
|
|
810
|
-
|
|
811
|
-
"""
|
|
812
|
-
Main function to run the MARC import job.
|
|
906
|
+
app = typer.Typer()
|
|
813
907
|
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
908
|
+
|
|
909
|
+
@app.command()
|
|
910
|
+
def main(
|
|
911
|
+
gateway_url: Annotated[
|
|
912
|
+
str,
|
|
913
|
+
typer.Option(
|
|
914
|
+
prompt="Please enter the FOLIO API Gateway URL",
|
|
915
|
+
help="The FOLIO API Gateway URL",
|
|
916
|
+
envvar="FOLIO_GATEWAY_URL",
|
|
917
|
+
),
|
|
918
|
+
],
|
|
919
|
+
tenant_id: Annotated[
|
|
920
|
+
str,
|
|
921
|
+
typer.Option(
|
|
922
|
+
prompt="Please enter the FOLIO tenant id",
|
|
923
|
+
help="The tenant id",
|
|
924
|
+
envvar="FOLIO_TENANT_ID",
|
|
925
|
+
),
|
|
926
|
+
],
|
|
927
|
+
username: Annotated[
|
|
928
|
+
str,
|
|
929
|
+
typer.Option(
|
|
930
|
+
prompt="Please enter your FOLIO username",
|
|
931
|
+
help="The FOLIO username",
|
|
932
|
+
envvar="FOLIO_USERNAME",
|
|
933
|
+
),
|
|
934
|
+
],
|
|
935
|
+
password: Annotated[
|
|
936
|
+
str,
|
|
937
|
+
typer.Option(
|
|
938
|
+
prompt="Please enter your FOLIO Password",
|
|
939
|
+
hide_input=True,
|
|
940
|
+
help="The FOLIO password",
|
|
941
|
+
envvar="FOLIO_PASSWORD",
|
|
942
|
+
),
|
|
943
|
+
],
|
|
944
|
+
marc_file_path: str = typer.Option(
|
|
945
|
+
..., help="The MARC file (or file glob, using shell globbing syntax) to import"
|
|
946
|
+
),
|
|
947
|
+
member_tenant_id: Annotated[
|
|
948
|
+
str,
|
|
949
|
+
typer.Option(
|
|
950
|
+
help="The FOLIO ECS member tenant id (if applicable)",
|
|
951
|
+
envvar="FOLIO_MEMBER_TENANT_ID",
|
|
952
|
+
),
|
|
953
|
+
] = "",
|
|
954
|
+
import_profile_name: str = typer.Option(
|
|
955
|
+
"", help="The name of the data import job profile to use"
|
|
956
|
+
),
|
|
957
|
+
batch_size: int = typer.Option(
|
|
958
|
+
10,
|
|
843
959
|
help="The number of source records to include in a record batch sent to FOLIO.",
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
default=0.0,
|
|
851
|
-
)
|
|
852
|
-
parser.add_argument(
|
|
853
|
-
"--preprocessor",
|
|
854
|
-
type=str,
|
|
960
|
+
),
|
|
961
|
+
batch_delay: float = typer.Option(
|
|
962
|
+
0.0, help="The number of seconds to wait between record batches."
|
|
963
|
+
),
|
|
964
|
+
preprocessor: str = typer.Option(
|
|
965
|
+
"",
|
|
855
966
|
help=(
|
|
856
967
|
"Comma-separated python import paths to Python function(s) "
|
|
857
968
|
"to apply to each MARC record before sending to FOLIO. Function should take "
|
|
858
969
|
"a pymarc.Record object as input and return a pymarc.Record object."
|
|
859
970
|
),
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
"
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
parser.add_argument(
|
|
875
|
-
"--split-offset",
|
|
876
|
-
type=int,
|
|
971
|
+
),
|
|
972
|
+
file_names_in_di_logs: bool = typer.Option(
|
|
973
|
+
False,
|
|
974
|
+
"--file-names-in-di-logs",
|
|
975
|
+
help="Show file names in FOLIO Data Import logs",
|
|
976
|
+
),
|
|
977
|
+
split_files: bool = typer.Option(
|
|
978
|
+
False, "--split-files", help="Split files into smaller parts before importing."
|
|
979
|
+
),
|
|
980
|
+
split_size: int = typer.Option(
|
|
981
|
+
1000, help="The number of records to include in each split file."
|
|
982
|
+
),
|
|
983
|
+
split_offset: int = typer.Option(
|
|
984
|
+
0,
|
|
877
985
|
help="The number of record batches of <split-size> to skip before starting import.",
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
parser.add_argument(
|
|
986
|
+
),
|
|
987
|
+
no_progress: bool = typer.Option(
|
|
988
|
+
False,
|
|
882
989
|
"--no-progress",
|
|
883
|
-
action="store_true",
|
|
884
990
|
help="Disable progress bars (eg. for running in a CI environment)",
|
|
885
|
-
|
|
886
|
-
|
|
991
|
+
envvar="FOLIO_MARC_NO_PROGRESS",
|
|
992
|
+
),
|
|
993
|
+
let_summary_fail: bool = typer.Option(
|
|
994
|
+
False,
|
|
887
995
|
"--let-summary-fail",
|
|
888
|
-
action="store_true",
|
|
889
996
|
help="Do not retry fetching the final job summary if it fails",
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
997
|
+
envvar="FOLIO_MARC_LET_SUMMARY_FAIL",
|
|
998
|
+
),
|
|
999
|
+
preprocessor_config: str = typer.Option(
|
|
1000
|
+
None,
|
|
894
1001
|
help=(
|
|
895
1002
|
"JSON file containing configuration for preprocessor functions. "
|
|
896
1003
|
"This is passed to MARCPreprocessor class as a dict of dicts."
|
|
897
1004
|
),
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
)
|
|
1005
|
+
),
|
|
1006
|
+
job_ids_file_path: str = typer.Option(
|
|
1007
|
+
None, help="Path to a file to write job IDs to for later processing."
|
|
1008
|
+
),
|
|
1009
|
+
):
|
|
1010
|
+
"""
|
|
1011
|
+
Command-line interface to batch import MARC records into FOLIO using FOLIO Data Import
|
|
1012
|
+
"""
|
|
1013
|
+
set_up_cli_logging()
|
|
1014
|
+
if not password:
|
|
1015
|
+
password = typer.prompt("Enter FOLIO password: ", hide_input=True)
|
|
1016
|
+
folio_client = folioclient.FolioClient(gateway_url, tenant_id, username, password)
|
|
907
1017
|
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
folio_client.okapi_headers["x-okapi-tenant"] = args.member_tenant_id
|
|
1018
|
+
if member_tenant_id:
|
|
1019
|
+
folio_client.okapi_headers["x-okapi-tenant"] = member_tenant_id
|
|
911
1020
|
|
|
912
|
-
if os.path.isabs(
|
|
913
|
-
marc_files = [Path(x) for x in glob.glob(
|
|
1021
|
+
if os.path.isabs(marc_file_path):
|
|
1022
|
+
marc_files = [Path(x) for x in glob.glob(marc_file_path)]
|
|
914
1023
|
else:
|
|
915
|
-
marc_files = list(Path("./").glob(
|
|
1024
|
+
marc_files = list(Path("./").glob(marc_file_path))
|
|
916
1025
|
|
|
917
1026
|
marc_files.sort()
|
|
918
1027
|
|
|
919
1028
|
if len(marc_files) == 0:
|
|
920
|
-
logger.critical(f"No files found matching {
|
|
1029
|
+
logger.critical(f"No files found matching {marc_file_path}. Exiting.")
|
|
921
1030
|
sys.exit(1)
|
|
922
1031
|
else:
|
|
923
1032
|
logger.info(marc_files)
|
|
924
1033
|
|
|
925
|
-
if
|
|
926
|
-
with open(
|
|
1034
|
+
if preprocessor_config:
|
|
1035
|
+
with open(preprocessor_config, "r") as f:
|
|
927
1036
|
preprocessor_args = json.load(f)
|
|
928
1037
|
else:
|
|
929
1038
|
preprocessor_args = {}
|
|
930
1039
|
|
|
931
|
-
if not
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
1040
|
+
if not import_profile_name:
|
|
1041
|
+
try:
|
|
1042
|
+
import_profiles = folio_client.folio_get(
|
|
1043
|
+
"/data-import-profiles/jobProfiles",
|
|
1044
|
+
"jobProfiles",
|
|
1045
|
+
query_params={"limit": "1000"},
|
|
1046
|
+
)
|
|
1047
|
+
import_profile_names = [
|
|
1048
|
+
profile["name"]
|
|
1049
|
+
for profile in import_profiles
|
|
1050
|
+
if "marc" in profile["dataType"].lower()
|
|
1051
|
+
]
|
|
1052
|
+
questions = [
|
|
1053
|
+
inquirer.List(
|
|
1054
|
+
"import_profile_name",
|
|
1055
|
+
message="Select an import profile",
|
|
1056
|
+
choices=import_profile_names,
|
|
1057
|
+
)
|
|
1058
|
+
]
|
|
1059
|
+
answers = inquirer.prompt(questions, raise_keyboard_interrupt=True)
|
|
1060
|
+
import_profile_name = answers["import_profile_name"]
|
|
1061
|
+
except httpx.HTTPStatusError as e:
|
|
1062
|
+
logger.error(
|
|
1063
|
+
f"HTTP Error fetching import profiles: {e}\n{getattr(getattr(e, 'response', ''), 'text', '')}\nExiting."
|
|
947
1064
|
)
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
1065
|
+
sys.exit(1)
|
|
1066
|
+
except KeyboardInterrupt:
|
|
1067
|
+
logger.info("Keyboard interrupt received. Exiting.")
|
|
1068
|
+
sys.exit(0)
|
|
1069
|
+
|
|
1070
|
+
job = None
|
|
951
1071
|
try:
|
|
952
|
-
|
|
1072
|
+
job = MARCImportJob(
|
|
953
1073
|
folio_client,
|
|
954
1074
|
marc_files,
|
|
955
|
-
|
|
956
|
-
batch_size=
|
|
957
|
-
batch_delay=
|
|
958
|
-
marc_record_preprocessor=
|
|
1075
|
+
import_profile_name,
|
|
1076
|
+
batch_size=batch_size,
|
|
1077
|
+
batch_delay=batch_delay,
|
|
1078
|
+
marc_record_preprocessor=preprocessor,
|
|
959
1079
|
preprocessor_args=preprocessor_args,
|
|
960
|
-
no_progress=
|
|
961
|
-
let_summary_fail=
|
|
962
|
-
split_files=
|
|
963
|
-
split_size=
|
|
964
|
-
split_offset=
|
|
965
|
-
|
|
1080
|
+
no_progress=no_progress,
|
|
1081
|
+
let_summary_fail=let_summary_fail,
|
|
1082
|
+
split_files=split_files,
|
|
1083
|
+
split_size=split_size,
|
|
1084
|
+
split_offset=split_offset,
|
|
1085
|
+
job_ids_file_path=job_ids_file_path,
|
|
1086
|
+
show_file_names_in_data_import_logs=file_names_in_di_logs,
|
|
1087
|
+
)
|
|
1088
|
+
asyncio.run(run_job(job))
|
|
1089
|
+
except Exception as e:
|
|
1090
|
+
logger.error("Could not initialize MARCImportJob: " + str(e))
|
|
1091
|
+
raise typer.Exit(1)
|
|
1092
|
+
|
|
1093
|
+
|
|
1094
|
+
async def run_job(job):
|
|
1095
|
+
try:
|
|
1096
|
+
await job.do_work()
|
|
1097
|
+
except httpx.HTTPStatusError as e:
|
|
1098
|
+
logger.error(
|
|
1099
|
+
f"HTTP Error importing files: {e}\n{getattr(getattr(e, 'response', ''), 'text', '')}\nExiting."
|
|
1100
|
+
)
|
|
1101
|
+
typer.Exit(1)
|
|
966
1102
|
except Exception as e:
|
|
967
1103
|
logger.error("Error importing files: " + str(e))
|
|
968
1104
|
raise
|
|
1105
|
+
finally:
|
|
1106
|
+
if job:
|
|
1107
|
+
await job.wrap_up()
|
|
969
1108
|
|
|
970
1109
|
|
|
971
1110
|
class ExcludeLevelFilter(logging.Filter):
|
|
@@ -986,12 +1125,9 @@ class IncludeLevelFilter(logging.Filter):
|
|
|
986
1125
|
return record.levelno == self.level
|
|
987
1126
|
|
|
988
1127
|
|
|
989
|
-
def
|
|
990
|
-
|
|
991
|
-
Synchronous main function to run the MARC import job.
|
|
992
|
-
"""
|
|
993
|
-
asyncio.run(main())
|
|
1128
|
+
def _main():
|
|
1129
|
+
typer.run(main)
|
|
994
1130
|
|
|
995
1131
|
|
|
996
1132
|
if __name__ == "__main__":
|
|
997
|
-
|
|
1133
|
+
app()
|