folio-data-import 0.2.7__py3-none-any.whl → 0.2.8.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of folio-data-import might be problematic. Click here for more details.
- folio_data_import/MARCDataImport.py +510 -166
- folio_data_import/UserImport.py +79 -21
- folio_data_import/marc_preprocessors/__init__.py +1 -1
- folio_data_import/marc_preprocessors/_preprocessors.py +306 -4
- {folio_data_import-0.2.7.dist-info → folio_data_import-0.2.8.post1.dist-info}/METADATA +5 -6
- folio_data_import-0.2.8.post1.dist-info/RECORD +11 -0
- {folio_data_import-0.2.7.dist-info → folio_data_import-0.2.8.post1.dist-info}/WHEEL +1 -1
- folio_data_import-0.2.7.dist-info/RECORD +0 -11
- {folio_data_import-0.2.7.dist-info → folio_data_import-0.2.8.post1.dist-info}/LICENSE +0 -0
- {folio_data_import-0.2.7.dist-info → folio_data_import-0.2.8.post1.dist-info}/entry_points.txt +0 -0
|
@@ -1,18 +1,21 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
import asyncio
|
|
3
|
+
import datetime
|
|
3
4
|
import glob
|
|
4
5
|
import importlib
|
|
5
6
|
import io
|
|
7
|
+
import logging
|
|
8
|
+
import math
|
|
6
9
|
import os
|
|
7
10
|
import sys
|
|
8
|
-
from typing import List
|
|
9
11
|
import uuid
|
|
10
12
|
from contextlib import ExitStack
|
|
11
|
-
import datetime
|
|
12
13
|
from datetime import datetime as dt
|
|
14
|
+
from functools import cached_property
|
|
13
15
|
from getpass import getpass
|
|
14
16
|
from pathlib import Path
|
|
15
17
|
from time import sleep
|
|
18
|
+
from typing import List, Union
|
|
16
19
|
|
|
17
20
|
import folioclient
|
|
18
21
|
import httpx
|
|
@@ -22,7 +25,6 @@ import tabulate
|
|
|
22
25
|
from humps import decamelize
|
|
23
26
|
from tqdm import tqdm
|
|
24
27
|
|
|
25
|
-
|
|
26
28
|
try:
|
|
27
29
|
datetime_utc = datetime.UTC
|
|
28
30
|
except AttributeError:
|
|
@@ -36,6 +38,18 @@ REPORT_SUMMARY_ORDERING = {"created": 0, "updated": 1, "discarded": 2, "error":
|
|
|
36
38
|
RETRY_TIMEOUT_START = 1
|
|
37
39
|
RETRY_TIMEOUT_RETRY_FACTOR = 2
|
|
38
40
|
|
|
41
|
+
# Custom log level for data issues, set to 26
|
|
42
|
+
DATA_ISSUE_LVL_NUM = 26
|
|
43
|
+
logging.addLevelName(DATA_ISSUE_LVL_NUM, "DATA_ISSUES")
|
|
44
|
+
|
|
45
|
+
def data_issues(self, msg, *args, **kws):
|
|
46
|
+
if self.isEnabledFor(DATA_ISSUE_LVL_NUM):
|
|
47
|
+
self._log(DATA_ISSUE_LVL_NUM, msg, args, **kws)
|
|
48
|
+
|
|
49
|
+
logging.Logger.data_issues = data_issues
|
|
50
|
+
|
|
51
|
+
logger = logging.getLogger(__name__)
|
|
52
|
+
|
|
39
53
|
class MARCImportJob:
|
|
40
54
|
"""
|
|
41
55
|
Class to manage importing MARC data (Bib, Authority) into FOLIO using the Change Manager
|
|
@@ -56,7 +70,6 @@ class MARCImportJob:
|
|
|
56
70
|
bad_records_file: io.TextIOWrapper
|
|
57
71
|
failed_batches_file: io.TextIOWrapper
|
|
58
72
|
job_id: str
|
|
59
|
-
job_import_profile: dict
|
|
60
73
|
pbar_sent: tqdm
|
|
61
74
|
pbar_imported: tqdm
|
|
62
75
|
http_client: httpx.Client
|
|
@@ -66,6 +79,11 @@ class MARCImportJob:
|
|
|
66
79
|
last_current: int = 0
|
|
67
80
|
total_records_sent: int = 0
|
|
68
81
|
finished: bool = False
|
|
82
|
+
job_id: str = ""
|
|
83
|
+
job_hrid: int = 0
|
|
84
|
+
current_file: Union[List[Path],List[io.BytesIO]] = []
|
|
85
|
+
_max_summary_retries: int = 2
|
|
86
|
+
_summary_retries: int = 0
|
|
69
87
|
|
|
70
88
|
def __init__(
|
|
71
89
|
self,
|
|
@@ -77,9 +95,19 @@ class MARCImportJob:
|
|
|
77
95
|
marc_record_preprocessor=None,
|
|
78
96
|
consolidate=False,
|
|
79
97
|
no_progress=False,
|
|
98
|
+
let_summary_fail=False,
|
|
99
|
+
split_files=False,
|
|
100
|
+
split_size=1000,
|
|
101
|
+
split_offset=0,
|
|
80
102
|
) -> None:
|
|
81
103
|
self.consolidate_files = consolidate
|
|
104
|
+
self.split_files = split_files
|
|
105
|
+
self.split_size = split_size
|
|
106
|
+
self.split_offset = split_offset
|
|
107
|
+
if self.split_files and self.consolidate_files:
|
|
108
|
+
raise ValueError("Cannot consolidate and split files at the same time.")
|
|
82
109
|
self.no_progress = no_progress
|
|
110
|
+
self.let_summary_fail = let_summary_fail
|
|
83
111
|
self.folio_client: folioclient.FolioClient = folio_client
|
|
84
112
|
self.import_files = marc_files
|
|
85
113
|
self.import_profile_name = import_profile_name
|
|
@@ -93,38 +121,69 @@ class MARCImportJob:
|
|
|
93
121
|
Performs the necessary work for data import.
|
|
94
122
|
|
|
95
123
|
This method initializes an HTTP client, files to store records that fail to send,
|
|
96
|
-
and calls
|
|
97
|
-
it imports all the files specified in `import_files` as a single batch. Otherwise,
|
|
98
|
-
it imports each file as a separate import job.
|
|
124
|
+
and calls the appropriate method to import MARC files based on the configuration.
|
|
99
125
|
|
|
100
126
|
Returns:
|
|
101
127
|
None
|
|
102
128
|
"""
|
|
103
|
-
with
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
129
|
+
with (
|
|
130
|
+
httpx.Client() as http_client,
|
|
131
|
+
open(
|
|
132
|
+
self.import_files[0].parent.joinpath(
|
|
133
|
+
f"bad_marc_records_{dt.now(tz=datetime_utc).strftime('%Y%m%d%H%M%S')}.mrc"
|
|
134
|
+
),
|
|
135
|
+
"wb+",
|
|
136
|
+
) as bad_marc_file,
|
|
137
|
+
open(
|
|
138
|
+
self.import_files[0].parent.joinpath(
|
|
139
|
+
f"failed_batches_{dt.now(tz=datetime_utc).strftime('%Y%m%d%H%M%S')}.mrc"
|
|
140
|
+
),
|
|
141
|
+
"wb+",
|
|
142
|
+
) as failed_batches,
|
|
143
|
+
):
|
|
114
144
|
self.bad_records_file = bad_marc_file
|
|
115
|
-
|
|
145
|
+
logger.info(f"Writing bad records to {self.bad_records_file.name}")
|
|
116
146
|
self.failed_batches_file = failed_batches
|
|
117
|
-
|
|
147
|
+
logger.info(f"Writing failed batches to {self.failed_batches_file.name}")
|
|
118
148
|
self.http_client = http_client
|
|
119
149
|
if self.consolidate_files:
|
|
120
|
-
|
|
121
|
-
|
|
150
|
+
await self.process_consolidated_import()
|
|
151
|
+
elif self.split_files:
|
|
152
|
+
await self.process_split_files()
|
|
122
153
|
else:
|
|
123
154
|
for file in self.import_files:
|
|
124
155
|
self.current_file = [file]
|
|
125
156
|
await self.import_marc_file()
|
|
126
157
|
await self.wrap_up()
|
|
127
158
|
|
|
159
|
+
async def process_split_files(self):
|
|
160
|
+
"""
|
|
161
|
+
Process the import of files in smaller batches.
|
|
162
|
+
This method is called when `split_files` is set to True.
|
|
163
|
+
It splits each file into smaller chunks and processes them one by one.
|
|
164
|
+
"""
|
|
165
|
+
for file in self.import_files:
|
|
166
|
+
with open(file, "rb") as f:
|
|
167
|
+
file_length = await self.read_total_records([f])
|
|
168
|
+
expected_batches = math.ceil(file_length /self.split_size)
|
|
169
|
+
logger.info(f"{file.name} contains {file_length} records. Splitting into {expected_batches} {self.split_size} record batches.")
|
|
170
|
+
zero_pad_parts = len(str(expected_batches)) if expected_batches > 1 else 2
|
|
171
|
+
for idx, batch in enumerate(self.split_marc_file(file, self.split_size), start=1):
|
|
172
|
+
if idx > self.split_offset:
|
|
173
|
+
batch.name = f"{file.name} (Part {idx:0{zero_pad_parts}})"
|
|
174
|
+
self.current_file = [batch]
|
|
175
|
+
await self.import_marc_file()
|
|
176
|
+
self.move_file_to_complete(file)
|
|
177
|
+
|
|
178
|
+
async def process_consolidated_import(self):
|
|
179
|
+
"""
|
|
180
|
+
Process the import of files as a single batch.
|
|
181
|
+
This method is called when `consolidate_files` is set to True.
|
|
182
|
+
It creates a single job for all files and processes them together.
|
|
183
|
+
"""
|
|
184
|
+
self.current_file = self.import_files
|
|
185
|
+
await self.import_marc_file()
|
|
186
|
+
|
|
128
187
|
async def wrap_up(self) -> None:
|
|
129
188
|
"""
|
|
130
189
|
Wraps up the data import process.
|
|
@@ -135,16 +194,16 @@ class MARCImportJob:
|
|
|
135
194
|
Returns:
|
|
136
195
|
None
|
|
137
196
|
"""
|
|
138
|
-
self.bad_records_file.
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
self.failed_batches_file.
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
197
|
+
with open(self.bad_records_file.name, "rb") as bad_records:
|
|
198
|
+
if not bad_records.read(1):
|
|
199
|
+
os.remove(bad_records.name)
|
|
200
|
+
logger.info("No bad records found. Removing bad records file.")
|
|
201
|
+
with open(self.failed_batches_file.name, "rb") as failed_batches:
|
|
202
|
+
if not failed_batches.read(1):
|
|
203
|
+
os.remove(failed_batches.name)
|
|
204
|
+
logger.info("No failed batches. Removing failed batches file.")
|
|
205
|
+
logger.info("Import complete.")
|
|
206
|
+
logger.info(f"Total records imported: {self.total_records_sent}")
|
|
148
207
|
|
|
149
208
|
async def get_job_status(self) -> None:
|
|
150
209
|
"""
|
|
@@ -158,38 +217,69 @@ class MARCImportJob:
|
|
|
158
217
|
"""
|
|
159
218
|
try:
|
|
160
219
|
self.current_retry_timeout = (
|
|
161
|
-
self.current_retry_timeout * RETRY_TIMEOUT_RETRY_FACTOR
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
"/metadata-provider/jobExecutions?statusNot=DISCARDED&uiStatusAny"
|
|
165
|
-
"=PREPARING_FOR_PREVIEW&uiStatusAny=READY_FOR_PREVIEW&uiStatusAny=RUNNING&limit=50"
|
|
220
|
+
(self.current_retry_timeout * RETRY_TIMEOUT_RETRY_FACTOR)
|
|
221
|
+
if self.current_retry_timeout
|
|
222
|
+
else RETRY_TIMEOUT_START
|
|
166
223
|
)
|
|
167
|
-
self.current_retry_timeout = None
|
|
168
|
-
except httpx.ConnectTimeout:
|
|
169
|
-
sleep(.25)
|
|
170
224
|
with httpx.Client(
|
|
171
225
|
timeout=self.current_retry_timeout,
|
|
172
|
-
verify=self.folio_client.ssl_verify
|
|
226
|
+
verify=self.folio_client.ssl_verify,
|
|
173
227
|
) as temp_client:
|
|
174
|
-
self.folio_client.
|
|
175
|
-
|
|
228
|
+
job_status = self.folio_client.folio_get(
|
|
229
|
+
"/metadata-provider/jobExecutions?statusNot=DISCARDED&uiStatusAny"
|
|
230
|
+
"=PREPARING_FOR_PREVIEW&uiStatusAny=READY_FOR_PREVIEW&uiStatusAny=RUNNING&limit=50"
|
|
231
|
+
)
|
|
232
|
+
self.current_retry_timeout = None
|
|
233
|
+
except (httpx.ConnectTimeout, httpx.ReadTimeout, httpx.HTTPStatusError) as e:
|
|
234
|
+
if not hasattr(e, "response") or e.response.status_code in [502, 504]:
|
|
235
|
+
error_text = e.response.text if hasattr(e, "response") else str(e)
|
|
236
|
+
logger.warning(f"SERVER ERROR fetching job status: {error_text}. Retrying.")
|
|
237
|
+
sleep(0.25)
|
|
238
|
+
with httpx.Client(
|
|
239
|
+
timeout=self.current_retry_timeout,
|
|
240
|
+
verify=self.folio_client.ssl_verify,
|
|
241
|
+
) as temp_client:
|
|
242
|
+
self.folio_client.httpx_client = temp_client
|
|
243
|
+
return await self.get_job_status()
|
|
244
|
+
else:
|
|
245
|
+
raise e
|
|
246
|
+
except Exception as e:
|
|
247
|
+
logger.error(f"Error fetching job status. {e}")
|
|
248
|
+
|
|
176
249
|
try:
|
|
177
250
|
status = [
|
|
178
251
|
job for job in job_status["jobExecutions"] if job["id"] == self.job_id
|
|
179
252
|
][0]
|
|
180
253
|
self.pbar_imported.update(status["progress"]["current"] - self.last_current)
|
|
181
254
|
self.last_current = status["progress"]["current"]
|
|
182
|
-
except IndexError:
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
255
|
+
except (IndexError, ValueError, KeyError):
|
|
256
|
+
logger.debug(f"No active job found with ID {self.job_id}. Checking for finished job.")
|
|
257
|
+
try:
|
|
258
|
+
job_status = self.folio_client.folio_get(
|
|
259
|
+
"/metadata-provider/jobExecutions?limit=100&sortBy=completed_date%2Cdesc&statusAny"
|
|
260
|
+
"=COMMITTED&statusAny=ERROR&statusAny=CANCELLED"
|
|
261
|
+
)
|
|
262
|
+
status = [
|
|
263
|
+
job for job in job_status["jobExecutions"] if job["id"] == self.job_id
|
|
264
|
+
][0]
|
|
265
|
+
self.pbar_imported.update(status["progress"]["current"] - self.last_current)
|
|
266
|
+
self.last_current = status["progress"]["current"]
|
|
267
|
+
self.finished = True
|
|
268
|
+
except (httpx.ConnectTimeout, httpx.ReadTimeout, httpx.HTTPStatusError) as e:
|
|
269
|
+
if not hasattr(e, "response") or e.response.status_code in [502, 504]:
|
|
270
|
+
error_text = e.response.text if hasattr(e, "response") else str(e)
|
|
271
|
+
logger.warning(
|
|
272
|
+
f"SERVER ERROR fetching job status: {error_text}. Retrying."
|
|
273
|
+
)
|
|
274
|
+
sleep(0.25)
|
|
275
|
+
with httpx.Client(
|
|
276
|
+
timeout=self.current_retry_timeout,
|
|
277
|
+
verify=self.folio_client.ssl_verify,
|
|
278
|
+
) as temp_client:
|
|
279
|
+
self.folio_client.httpx_client = temp_client
|
|
280
|
+
return await self.get_job_status()
|
|
281
|
+
else:
|
|
282
|
+
raise e
|
|
193
283
|
|
|
194
284
|
async def create_folio_import_job(self) -> None:
|
|
195
285
|
"""
|
|
@@ -201,26 +291,36 @@ class MARCImportJob:
|
|
|
201
291
|
Raises:
|
|
202
292
|
HTTPError: If there is an error creating the job.
|
|
203
293
|
"""
|
|
204
|
-
create_job = self.http_client.post(
|
|
205
|
-
self.folio_client.okapi_url + "/change-manager/jobExecutions",
|
|
206
|
-
headers=self.folio_client.okapi_headers,
|
|
207
|
-
json={"sourceType": "ONLINE", "userId": self.folio_client.current_user},
|
|
208
|
-
)
|
|
209
294
|
try:
|
|
210
|
-
create_job.
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
"
|
|
214
|
-
+ str(e)
|
|
215
|
-
+ "\n"
|
|
216
|
-
+ getattr(getattr(e, "response", ""), "text", "")
|
|
295
|
+
create_job = self.http_client.post(
|
|
296
|
+
self.folio_client.gateway_url + "/change-manager/jobExecutions",
|
|
297
|
+
headers=self.folio_client.okapi_headers,
|
|
298
|
+
json={"sourceType": "ONLINE", "userId": self.folio_client.current_user},
|
|
217
299
|
)
|
|
218
|
-
|
|
300
|
+
create_job.raise_for_status()
|
|
301
|
+
except (httpx.ConnectTimeout, httpx.ReadTimeout, httpx.HTTPStatusError) as e:
|
|
302
|
+
if not hasattr(e, "response") or e.response.status_code in [502, 504]:
|
|
303
|
+
logger.warning(f"SERVER ERROR creating job: {e}. Retrying.")
|
|
304
|
+
sleep(0.25)
|
|
305
|
+
return await self.create_folio_import_job()
|
|
306
|
+
else:
|
|
307
|
+
logger.error(
|
|
308
|
+
"Error creating job: "
|
|
309
|
+
+ str(e)
|
|
310
|
+
+ "\n"
|
|
311
|
+
+ getattr(getattr(e, "response", ""), "text", "")
|
|
312
|
+
)
|
|
313
|
+
raise e
|
|
219
314
|
self.job_id = create_job.json()["parentJobExecutionId"]
|
|
315
|
+
logger.info(f"Created job: {self.job_id}")
|
|
220
316
|
|
|
221
|
-
|
|
317
|
+
@cached_property
|
|
318
|
+
def import_profile(self) -> dict:
|
|
222
319
|
"""
|
|
223
|
-
|
|
320
|
+
Returns the import profile for the current job execution.
|
|
321
|
+
|
|
322
|
+
Returns:
|
|
323
|
+
dict: The import profile for the current job execution.
|
|
224
324
|
"""
|
|
225
325
|
import_profiles = self.folio_client.folio_get(
|
|
226
326
|
"/data-import-profiles/jobProfiles",
|
|
@@ -232,7 +332,7 @@ class MARCImportJob:
|
|
|
232
332
|
for profile in import_profiles
|
|
233
333
|
if profile["name"] == self.import_profile_name
|
|
234
334
|
][0]
|
|
235
|
-
|
|
335
|
+
return profile
|
|
236
336
|
|
|
237
337
|
async def set_job_profile(self) -> None:
|
|
238
338
|
"""
|
|
@@ -242,21 +342,23 @@ class MARCImportJob:
|
|
|
242
342
|
The response from the HTTP request to set the job profile.
|
|
243
343
|
"""
|
|
244
344
|
set_job_profile = self.http_client.put(
|
|
245
|
-
self.folio_client.
|
|
345
|
+
self.folio_client.gateway_url
|
|
246
346
|
+ "/change-manager/jobExecutions/"
|
|
247
347
|
+ self.job_id
|
|
248
348
|
+ "/jobProfile",
|
|
249
349
|
headers=self.folio_client.okapi_headers,
|
|
250
350
|
json={
|
|
251
|
-
"id": self.
|
|
252
|
-
"name": self.
|
|
351
|
+
"id": self.import_profile["id"],
|
|
352
|
+
"name": self.import_profile["name"],
|
|
253
353
|
"dataType": "MARC",
|
|
254
354
|
},
|
|
255
355
|
)
|
|
256
356
|
try:
|
|
257
357
|
set_job_profile.raise_for_status()
|
|
358
|
+
self.job_hrid = set_job_profile.json()['hrId']
|
|
359
|
+
logger.info(f"Job HRID: {self.job_hrid}")
|
|
258
360
|
except httpx.HTTPError as e:
|
|
259
|
-
|
|
361
|
+
logger.error(
|
|
260
362
|
"Error creating job: "
|
|
261
363
|
+ str(e)
|
|
262
364
|
+ "\n"
|
|
@@ -264,7 +366,8 @@ class MARCImportJob:
|
|
|
264
366
|
)
|
|
265
367
|
raise e
|
|
266
368
|
|
|
267
|
-
|
|
369
|
+
@staticmethod
|
|
370
|
+
async def read_total_records(files) -> int:
|
|
268
371
|
"""
|
|
269
372
|
Reads the total number of records from the given files.
|
|
270
373
|
|
|
@@ -277,7 +380,7 @@ class MARCImportJob:
|
|
|
277
380
|
total_records = 0
|
|
278
381
|
for import_file in files:
|
|
279
382
|
while True:
|
|
280
|
-
chunk = import_file.read(
|
|
383
|
+
chunk = import_file.read(104857600)
|
|
281
384
|
if not chunk:
|
|
282
385
|
break
|
|
283
386
|
total_records += chunk.count(b"\x1d")
|
|
@@ -291,24 +394,41 @@ class MARCImportJob:
|
|
|
291
394
|
Args:
|
|
292
395
|
batch_payload (dict): A records payload containing the current batch of MARC records.
|
|
293
396
|
"""
|
|
294
|
-
|
|
295
|
-
self.
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
397
|
+
try:
|
|
398
|
+
post_batch = self.http_client.post(
|
|
399
|
+
self.folio_client.gateway_url
|
|
400
|
+
+ f"/change-manager/jobExecutions/{self.job_id}/records",
|
|
401
|
+
headers=self.folio_client.okapi_headers,
|
|
402
|
+
json=batch_payload,
|
|
403
|
+
)
|
|
404
|
+
# if batch_payload["recordsMetadata"]["last"]:
|
|
405
|
+
# logger.log(
|
|
406
|
+
# 25,
|
|
407
|
+
# f"Sending last batch of {batch_payload['recordsMetadata']['total']} records.",
|
|
408
|
+
# )
|
|
409
|
+
except (httpx.ConnectTimeout, httpx.ReadTimeout):
|
|
410
|
+
sleep(0.25)
|
|
411
|
+
return await self.process_record_batch(batch_payload)
|
|
300
412
|
try:
|
|
301
413
|
post_batch.raise_for_status()
|
|
302
414
|
self.total_records_sent += len(self.record_batch)
|
|
303
415
|
self.record_batch = []
|
|
304
416
|
self.pbar_sent.update(len(batch_payload["initialRecords"]))
|
|
305
417
|
except Exception as e:
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
self.
|
|
310
|
-
self.
|
|
311
|
-
|
|
418
|
+
if (
|
|
419
|
+
hasattr(e, "response") and e.response.status_code in [500, 422]
|
|
420
|
+
): # TODO: #26 Check for specific error code once https://folio-org.atlassian.net/browse/MODSOURMAN-1281 is resolved
|
|
421
|
+
self.total_records_sent += len(self.record_batch)
|
|
422
|
+
self.record_batch = []
|
|
423
|
+
self.pbar_sent.update(len(batch_payload["initialRecords"]))
|
|
424
|
+
else:
|
|
425
|
+
logger.error("Error posting batch: " + str(e))
|
|
426
|
+
for record in self.record_batch:
|
|
427
|
+
self.failed_batches_file.write(record)
|
|
428
|
+
self.error_records += len(self.record_batch)
|
|
429
|
+
self.pbar_sent.total = self.pbar_sent.total - len(self.record_batch)
|
|
430
|
+
self.record_batch = []
|
|
431
|
+
await self.get_job_status()
|
|
312
432
|
sleep(self.batch_delay)
|
|
313
433
|
|
|
314
434
|
async def process_records(self, files, total_records) -> None:
|
|
@@ -325,16 +445,21 @@ class MARCImportJob:
|
|
|
325
445
|
"""
|
|
326
446
|
counter = 0
|
|
327
447
|
for import_file in files:
|
|
448
|
+
file_path = Path(import_file.name)
|
|
328
449
|
self.pbar_sent.set_description(
|
|
329
450
|
f"Sent ({os.path.basename(import_file.name)}): "
|
|
330
451
|
)
|
|
331
452
|
reader = pymarc.MARCReader(import_file, hide_utf8_warnings=True)
|
|
332
|
-
for record in reader:
|
|
453
|
+
for idx, record in enumerate(reader, start=1):
|
|
333
454
|
if len(self.record_batch) == self.batch_size:
|
|
334
455
|
await self.process_record_batch(
|
|
335
|
-
await self.create_batch_payload(
|
|
456
|
+
await self.create_batch_payload(
|
|
457
|
+
counter,
|
|
458
|
+
total_records,
|
|
459
|
+
(counter - self.error_records)
|
|
460
|
+
== (total_records - self.error_records),
|
|
461
|
+
),
|
|
336
462
|
)
|
|
337
|
-
await self.get_job_status()
|
|
338
463
|
sleep(0.25)
|
|
339
464
|
if record:
|
|
340
465
|
if self.marc_record_preprocessor:
|
|
@@ -344,14 +469,39 @@ class MARCImportJob:
|
|
|
344
469
|
self.record_batch.append(record.as_marc())
|
|
345
470
|
counter += 1
|
|
346
471
|
else:
|
|
472
|
+
logger.data_issues(
|
|
473
|
+
"RECORD FAILED\t%s\t%s\t%s",
|
|
474
|
+
f"{file_path.name}:{idx}",
|
|
475
|
+
f"Error reading {idx} record from {file_path}. Skipping. Writing current chunk to {self.bad_records_file.name}.",
|
|
476
|
+
"",
|
|
477
|
+
)
|
|
347
478
|
self.bad_records_file.write(reader.current_chunk)
|
|
348
479
|
if self.record_batch:
|
|
349
480
|
await self.process_record_batch(
|
|
350
|
-
await self.create_batch_payload(
|
|
481
|
+
await self.create_batch_payload(
|
|
482
|
+
counter,
|
|
483
|
+
total_records,
|
|
484
|
+
(counter - self.error_records)
|
|
485
|
+
== (total_records - self.error_records),
|
|
486
|
+
),
|
|
351
487
|
)
|
|
488
|
+
if not self.split_files:
|
|
489
|
+
self.move_file_to_complete(file_path)
|
|
490
|
+
|
|
491
|
+
def move_file_to_complete(self, file_path):
|
|
492
|
+
import_complete_path = file_path.parent.joinpath("import_complete")
|
|
493
|
+
if not import_complete_path.exists():
|
|
494
|
+
logger.debug(f"Creating import_complete directory: {import_complete_path.absolute()}")
|
|
495
|
+
import_complete_path.mkdir(exist_ok=True)
|
|
496
|
+
logger.debug(f"Moving {file_path} to {import_complete_path.absolute()}")
|
|
497
|
+
file_path.rename(
|
|
498
|
+
file_path.parent.joinpath("import_complete", file_path.name)
|
|
499
|
+
)
|
|
352
500
|
|
|
353
501
|
@staticmethod
|
|
354
|
-
async def apply_marc_record_preprocessing(
|
|
502
|
+
async def apply_marc_record_preprocessing(
|
|
503
|
+
record: pymarc.Record, func_or_path
|
|
504
|
+
) -> pymarc.Record:
|
|
355
505
|
"""
|
|
356
506
|
Apply preprocessing to the MARC record before sending it to FOLIO.
|
|
357
507
|
|
|
@@ -363,25 +513,42 @@ class MARCImportJob:
|
|
|
363
513
|
pymarc.Record: The preprocessed MARC record.
|
|
364
514
|
"""
|
|
365
515
|
if isinstance(func_or_path, str):
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
except (ImportError, AttributeError) as e:
|
|
372
|
-
print(f"Error importing preprocessing function {func_or_path}: {e}. Skipping preprocessing.")
|
|
373
|
-
return record
|
|
516
|
+
func_paths = func_or_path.split(",")
|
|
517
|
+
for func_path in func_paths:
|
|
518
|
+
record = await MARCImportJob._apply_single_marc_record_preprocessing_by_path(
|
|
519
|
+
record, func_path
|
|
520
|
+
)
|
|
374
521
|
elif callable(func_or_path):
|
|
375
|
-
|
|
522
|
+
record = func_or_path(record)
|
|
376
523
|
else:
|
|
377
|
-
|
|
378
|
-
|
|
524
|
+
logger.warning(
|
|
525
|
+
f"Invalid preprocessing function: {func_or_path}. Skipping preprocessing."
|
|
526
|
+
)
|
|
527
|
+
return record
|
|
528
|
+
|
|
529
|
+
async def _apply_single_marc_record_preprocessing_by_path(
|
|
530
|
+
record: pymarc.Record, func_path: str
|
|
531
|
+
) -> pymarc.Record:
|
|
532
|
+
"""
|
|
533
|
+
Apply a single preprocessing function to the MARC record.
|
|
534
|
+
|
|
535
|
+
Args:
|
|
536
|
+
record (pymarc.Record): The MARC record to preprocess.
|
|
537
|
+
func_path (str): The path to the preprocessing function.
|
|
379
538
|
|
|
539
|
+
Returns:
|
|
540
|
+
pymarc.Record: The preprocessed MARC record.
|
|
541
|
+
"""
|
|
380
542
|
try:
|
|
381
|
-
|
|
543
|
+
module_path, func_name = func_path.rsplit(".", 1)
|
|
544
|
+
module = importlib.import_module(module_path)
|
|
545
|
+
func = getattr(module, func_name)
|
|
546
|
+
record = func(record)
|
|
382
547
|
except Exception as e:
|
|
383
|
-
|
|
384
|
-
|
|
548
|
+
logger.warning(
|
|
549
|
+
f"Error applying preprocessing function {func_path}: {e}. Skipping."
|
|
550
|
+
)
|
|
551
|
+
return record
|
|
385
552
|
|
|
386
553
|
async def create_batch_payload(self, counter, total_records, is_last) -> dict:
|
|
387
554
|
"""
|
|
@@ -406,6 +573,46 @@ class MARCImportJob:
|
|
|
406
573
|
"initialRecords": [{"record": x.decode()} for x in self.record_batch],
|
|
407
574
|
}
|
|
408
575
|
|
|
576
|
+
@staticmethod
|
|
577
|
+
def split_marc_file(file_path, batch_size):
|
|
578
|
+
"""Generator to iterate over MARC records in batches, yielding BytesIO objects."""
|
|
579
|
+
with open(file_path, "rb") as f:
|
|
580
|
+
batch = io.BytesIO()
|
|
581
|
+
count = 0
|
|
582
|
+
|
|
583
|
+
while True:
|
|
584
|
+
leader = f.read(24)
|
|
585
|
+
if not leader:
|
|
586
|
+
break # End of file
|
|
587
|
+
|
|
588
|
+
try:
|
|
589
|
+
record_length = int(leader[:5]) # Extract record length from leader
|
|
590
|
+
except ValueError:
|
|
591
|
+
raise ValueError("Invalid MARC record length encountered.")
|
|
592
|
+
|
|
593
|
+
record_body = f.read(record_length - 24)
|
|
594
|
+
if len(record_body) != record_length - 24:
|
|
595
|
+
raise ValueError("Unexpected end of file while reading MARC record.")
|
|
596
|
+
|
|
597
|
+
# Verify record terminator
|
|
598
|
+
if record_body[-1:] != b'\x1D':
|
|
599
|
+
raise ValueError("MARC record does not end with the expected terminator (0x1D).")
|
|
600
|
+
|
|
601
|
+
# Write the full record to the batch buffer
|
|
602
|
+
batch.write(leader + record_body)
|
|
603
|
+
count += 1
|
|
604
|
+
|
|
605
|
+
if count >= batch_size:
|
|
606
|
+
batch.seek(0)
|
|
607
|
+
yield batch
|
|
608
|
+
batch = io.BytesIO() # Reset buffer
|
|
609
|
+
count = 0
|
|
610
|
+
|
|
611
|
+
# Yield any remaining records
|
|
612
|
+
if count > 0:
|
|
613
|
+
batch.seek(0)
|
|
614
|
+
yield batch
|
|
615
|
+
|
|
409
616
|
async def import_marc_file(self) -> None:
|
|
410
617
|
"""
|
|
411
618
|
Imports MARC file into the system.
|
|
@@ -425,24 +632,37 @@ class MARCImportJob:
|
|
|
425
632
|
None
|
|
426
633
|
"""
|
|
427
634
|
await self.create_folio_import_job()
|
|
428
|
-
await self.get_import_profile()
|
|
429
635
|
await self.set_job_profile()
|
|
430
636
|
with ExitStack() as stack:
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
637
|
+
try:
|
|
638
|
+
if isinstance(self.current_file[0], Path):
|
|
639
|
+
files = [
|
|
640
|
+
stack.enter_context(open(file, "rb")) for file in self.current_file
|
|
641
|
+
]
|
|
642
|
+
elif isinstance(self.current_file[0], io.BytesIO):
|
|
643
|
+
files = [
|
|
644
|
+
stack.enter_context(file) for file in self.current_file
|
|
645
|
+
]
|
|
646
|
+
else:
|
|
647
|
+
raise ValueError("Invalid file type. Must be Path or BytesIO.")
|
|
648
|
+
except IndexError as e:
|
|
649
|
+
logger.error(f"Error opening file: {e}")
|
|
650
|
+
raise e
|
|
434
651
|
total_records = await self.read_total_records(files)
|
|
435
|
-
with
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
652
|
+
with (
|
|
653
|
+
tqdm(
|
|
654
|
+
desc=f"Imported ({self.job_hrid}): ",
|
|
655
|
+
total=total_records,
|
|
656
|
+
position=1,
|
|
657
|
+
disable=self.no_progress,
|
|
658
|
+
) as pbar_imported,
|
|
659
|
+
tqdm(
|
|
660
|
+
desc="Sent: ()",
|
|
661
|
+
total=total_records,
|
|
662
|
+
position=0,
|
|
663
|
+
disable=self.no_progress,
|
|
664
|
+
) as pbar_sent,
|
|
665
|
+
):
|
|
446
666
|
self.pbar_sent = pbar_sent
|
|
447
667
|
self.pbar_imported = pbar_imported
|
|
448
668
|
await self.process_records(files, total_records)
|
|
@@ -450,37 +670,45 @@ class MARCImportJob:
|
|
|
450
670
|
await self.get_job_status()
|
|
451
671
|
sleep(1)
|
|
452
672
|
if self.finished:
|
|
453
|
-
|
|
454
|
-
job_summary.pop("jobExecutionId")
|
|
455
|
-
job_summary.pop("totalErrors")
|
|
456
|
-
columns = ["Summary"] + list(job_summary.keys())
|
|
457
|
-
rows = set()
|
|
458
|
-
for key in columns[1:]:
|
|
459
|
-
rows.update(job_summary[key].keys())
|
|
460
|
-
|
|
461
|
-
table_data = []
|
|
462
|
-
for row in rows:
|
|
463
|
-
metric_name = decamelize(row).split("_")[1]
|
|
464
|
-
table_row = [metric_name]
|
|
465
|
-
for col in columns[1:]:
|
|
466
|
-
table_row.append(job_summary[col].get(row, "N/A"))
|
|
467
|
-
table_data.append(table_row)
|
|
468
|
-
table_data.sort(key=lambda x: REPORT_SUMMARY_ORDERING.get(x[0], 99))
|
|
469
|
-
columns = columns[:1] + [
|
|
470
|
-
" ".join(decamelize(x).split("_")[:-1]) for x in columns[1:]
|
|
471
|
-
]
|
|
472
|
-
print(
|
|
473
|
-
f"Results for {'file' if len(self.current_file) == 1 else 'files'}: "
|
|
474
|
-
f"{', '.join([os.path.basename(x.name) for x in self.current_file])}"
|
|
475
|
-
)
|
|
476
|
-
print(
|
|
477
|
-
tabulate.tabulate(
|
|
478
|
-
table_data, headers=columns, tablefmt="fancy_grid"
|
|
479
|
-
),
|
|
480
|
-
)
|
|
673
|
+
await self.log_job_summary()
|
|
481
674
|
self.last_current = 0
|
|
482
675
|
self.finished = False
|
|
483
676
|
|
|
677
|
+
async def log_job_summary(self):
|
|
678
|
+
if job_summary := await self.get_job_summary():
|
|
679
|
+
job_id = job_summary.pop("jobExecutionId", None)
|
|
680
|
+
total_errors = job_summary.pop("totalErrors", 0)
|
|
681
|
+
columns = ["Summary"] + list(job_summary.keys())
|
|
682
|
+
rows = set()
|
|
683
|
+
for key in columns[1:]:
|
|
684
|
+
rows.update(job_summary[key].keys())
|
|
685
|
+
|
|
686
|
+
table_data = []
|
|
687
|
+
for row in rows:
|
|
688
|
+
metric_name = decamelize(row).split("_")[1]
|
|
689
|
+
table_row = [metric_name]
|
|
690
|
+
for col in columns[1:]:
|
|
691
|
+
table_row.append(job_summary[col].get(row, "N/A"))
|
|
692
|
+
table_data.append(table_row)
|
|
693
|
+
table_data.sort(key=lambda x: REPORT_SUMMARY_ORDERING.get(x[0], 99))
|
|
694
|
+
columns = columns[:1] + [
|
|
695
|
+
" ".join(decamelize(x).split("_")[:-1]) for x in columns[1:]
|
|
696
|
+
]
|
|
697
|
+
logger.info(
|
|
698
|
+
f"Results for {'file' if len(self.current_file) == 1 else 'files'}: "
|
|
699
|
+
f"{', '.join([os.path.basename(x.name) for x in self.current_file])}"
|
|
700
|
+
)
|
|
701
|
+
logger.info(
|
|
702
|
+
"\n"
|
|
703
|
+
+ tabulate.tabulate(
|
|
704
|
+
table_data, headers=columns, tablefmt="fancy_grid"
|
|
705
|
+
),
|
|
706
|
+
)
|
|
707
|
+
if total_errors:
|
|
708
|
+
logger.info(f"Total errors: {total_errors}. Job ID: {job_id}.")
|
|
709
|
+
else:
|
|
710
|
+
logger.error(f"No job summary available for job #{self.job_hrid}({self.job_id}).")
|
|
711
|
+
|
|
484
712
|
async def get_job_summary(self) -> dict:
|
|
485
713
|
"""
|
|
486
714
|
Retrieves the job summary for the current job execution.
|
|
@@ -490,23 +718,88 @@ class MARCImportJob:
|
|
|
490
718
|
"""
|
|
491
719
|
try:
|
|
492
720
|
self.current_retry_timeout = (
|
|
493
|
-
self.current_retry_timeout * RETRY_TIMEOUT_RETRY_FACTOR
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
f"/metadata-provider/jobSummary/{self.job_id}"
|
|
721
|
+
(self.current_retry_timeout * RETRY_TIMEOUT_RETRY_FACTOR)
|
|
722
|
+
if self.current_retry_timeout
|
|
723
|
+
else RETRY_TIMEOUT_START
|
|
497
724
|
)
|
|
498
|
-
self.current_retry_timeout = None
|
|
499
|
-
except httpx.ReadTimeout: #
|
|
500
|
-
sleep(.25)
|
|
501
725
|
with httpx.Client(
|
|
502
|
-
timeout=self.current_retry_timeout,
|
|
503
|
-
verify=self.folio_client.ssl_verify
|
|
726
|
+
timeout=self.current_retry_timeout, verify=self.folio_client.ssl_verify
|
|
504
727
|
) as temp_client:
|
|
505
728
|
self.folio_client.httpx_client = temp_client
|
|
506
|
-
|
|
729
|
+
job_summary = self.folio_client.folio_get(
|
|
730
|
+
f"/metadata-provider/jobSummary/{self.job_id}"
|
|
731
|
+
)
|
|
732
|
+
self.current_retry_timeout = None
|
|
733
|
+
except (httpx.ConnectTimeout, httpx.ReadTimeout, httpx.HTTPStatusError) as e:
|
|
734
|
+
error_text = e.response.text if hasattr(e, "response") else str(e)
|
|
735
|
+
if (self._max_summary_retries > self._summary_retries) and (not hasattr(e, "response") or (
|
|
736
|
+
hasattr(e, "response") and e.response.status_code in [502, 504]) and not self.let_summary_fail
|
|
737
|
+
):
|
|
738
|
+
logger.warning(f"SERVER ERROR fetching job summary: {e}. Retrying.")
|
|
739
|
+
sleep(0.25)
|
|
740
|
+
with httpx.Client(
|
|
741
|
+
timeout=self.current_retry_timeout,
|
|
742
|
+
verify=self.folio_client.ssl_verify,
|
|
743
|
+
) as temp_client:
|
|
744
|
+
self.folio_client.httpx_client = temp_client
|
|
745
|
+
self._summary_retries += 1
|
|
746
|
+
return await self.get_job_summary()
|
|
747
|
+
elif (self._summary_retries >= self._max_summary_retries) or (hasattr(e, "response") and (
|
|
748
|
+
e.response.status_code in [502, 504] and self.let_summary_fail)
|
|
749
|
+
):
|
|
750
|
+
logger.warning(
|
|
751
|
+
f"SERVER ERROR fetching job summary: {error_text}. Skipping final summary check."
|
|
752
|
+
)
|
|
753
|
+
job_summary = {}
|
|
754
|
+
else:
|
|
755
|
+
raise e
|
|
507
756
|
return job_summary
|
|
508
757
|
|
|
509
758
|
|
|
759
|
+
def set_up_cli_logging():
|
|
760
|
+
"""
|
|
761
|
+
This function sets up logging for the CLI.
|
|
762
|
+
"""
|
|
763
|
+
logger.setLevel(logging.INFO)
|
|
764
|
+
logger.propagate = False
|
|
765
|
+
|
|
766
|
+
# Set up file and stream handlers
|
|
767
|
+
file_handler = logging.FileHandler(
|
|
768
|
+
"folio_data_import_{}.log".format(dt.now().strftime("%Y%m%d%H%M%S"))
|
|
769
|
+
)
|
|
770
|
+
file_handler.setLevel(logging.INFO)
|
|
771
|
+
file_handler.addFilter(ExcludeLevelFilter(DATA_ISSUE_LVL_NUM))
|
|
772
|
+
# file_handler.addFilter(IncludeLevelFilter(25))
|
|
773
|
+
file_formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
|
|
774
|
+
file_handler.setFormatter(file_formatter)
|
|
775
|
+
logger.addHandler(file_handler)
|
|
776
|
+
|
|
777
|
+
if not any(
|
|
778
|
+
isinstance(h, logging.StreamHandler) and h.stream == sys.stderr
|
|
779
|
+
for h in logger.handlers
|
|
780
|
+
):
|
|
781
|
+
stream_handler = logging.StreamHandler(sys.stdout)
|
|
782
|
+
stream_handler.setLevel(logging.INFO)
|
|
783
|
+
stream_handler.addFilter(ExcludeLevelFilter(DATA_ISSUE_LVL_NUM))
|
|
784
|
+
# stream_handler.addFilter(ExcludeLevelFilter(25))
|
|
785
|
+
stream_formatter = logging.Formatter("%(message)s")
|
|
786
|
+
stream_handler.setFormatter(stream_formatter)
|
|
787
|
+
logger.addHandler(stream_handler)
|
|
788
|
+
|
|
789
|
+
# Set up data issues logging
|
|
790
|
+
data_issues_handler = logging.FileHandler(
|
|
791
|
+
"marc_import_data_issues_{}.log".format(dt.now().strftime("%Y%m%d%H%M%S"))
|
|
792
|
+
)
|
|
793
|
+
data_issues_handler.setLevel(26)
|
|
794
|
+
data_issues_handler.addFilter(IncludeLevelFilter(DATA_ISSUE_LVL_NUM))
|
|
795
|
+
data_issues_formatter = logging.Formatter("%(message)s")
|
|
796
|
+
data_issues_handler.setFormatter(data_issues_formatter)
|
|
797
|
+
logger.addHandler(data_issues_handler)
|
|
798
|
+
|
|
799
|
+
# Stop httpx from logging info messages to the console
|
|
800
|
+
logging.getLogger("httpx").setLevel(logging.WARNING)
|
|
801
|
+
|
|
802
|
+
|
|
510
803
|
async def main() -> None:
|
|
511
804
|
"""
|
|
512
805
|
Main function to run the MARC import job.
|
|
@@ -514,6 +807,7 @@ async def main() -> None:
|
|
|
514
807
|
This function parses command line arguments, initializes the FolioClient,
|
|
515
808
|
and runs the MARCImportJob.
|
|
516
809
|
"""
|
|
810
|
+
set_up_cli_logging()
|
|
517
811
|
parser = argparse.ArgumentParser()
|
|
518
812
|
parser.add_argument("--gateway_url", type=str, help="The FOLIO API Gateway URL")
|
|
519
813
|
parser.add_argument("--tenant_id", type=str, help="The FOLIO tenant ID")
|
|
@@ -552,12 +846,15 @@ async def main() -> None:
|
|
|
552
846
|
"--preprocessor",
|
|
553
847
|
type=str,
|
|
554
848
|
help=(
|
|
555
|
-
"
|
|
556
|
-
"to apply to each MARC record before sending to FOLIO."
|
|
849
|
+
"Comma-separated python import paths to Python function(s) "
|
|
850
|
+
"to apply to each MARC record before sending to FOLIO. Function should take "
|
|
851
|
+
"a pymarc.Record object as input and return a pymarc.Record object."
|
|
557
852
|
),
|
|
558
853
|
default=None,
|
|
559
854
|
)
|
|
560
|
-
|
|
855
|
+
# Add mutually exclusive group for consolidate and split-files options
|
|
856
|
+
group = parser.add_mutually_exclusive_group()
|
|
857
|
+
group.add_argument(
|
|
561
858
|
"--consolidate",
|
|
562
859
|
action="store_true",
|
|
563
860
|
help=(
|
|
@@ -565,11 +862,34 @@ async def main() -> None:
|
|
|
565
862
|
"Default is to create a new job for each MARC file."
|
|
566
863
|
),
|
|
567
864
|
)
|
|
865
|
+
group.add_argument(
|
|
866
|
+
"--split-files",
|
|
867
|
+
action="store_true",
|
|
868
|
+
help="Split files into smaller parts before importing.",
|
|
869
|
+
)
|
|
870
|
+
parser.add_argument(
|
|
871
|
+
"--split-size",
|
|
872
|
+
type=int,
|
|
873
|
+
help="The number of records to include in each split file.",
|
|
874
|
+
default=1000,
|
|
875
|
+
)
|
|
876
|
+
parser.add_argument(
|
|
877
|
+
"--split-offset",
|
|
878
|
+
type=int,
|
|
879
|
+
help="The number of record batches of <split-size> to skip before starting import.",
|
|
880
|
+
default=0,
|
|
881
|
+
)
|
|
882
|
+
|
|
568
883
|
parser.add_argument(
|
|
569
884
|
"--no-progress",
|
|
570
885
|
action="store_true",
|
|
571
886
|
help="Disable progress bars (eg. for running in a CI environment)",
|
|
572
887
|
)
|
|
888
|
+
parser.add_argument(
|
|
889
|
+
"--let-summary-fail",
|
|
890
|
+
action="store_true",
|
|
891
|
+
help="Do not retry fetching the final job summary if it fails",
|
|
892
|
+
)
|
|
573
893
|
args = parser.parse_args()
|
|
574
894
|
if not args.password:
|
|
575
895
|
args.password = getpass("Enter FOLIO password: ")
|
|
@@ -586,11 +906,13 @@ async def main() -> None:
|
|
|
586
906
|
else:
|
|
587
907
|
marc_files = list(Path("./").glob(args.marc_file_path))
|
|
588
908
|
|
|
909
|
+
marc_files.sort()
|
|
910
|
+
|
|
589
911
|
if len(marc_files) == 0:
|
|
590
|
-
|
|
912
|
+
logger.critical(f"No files found matching {args.marc_file_path}. Exiting.")
|
|
591
913
|
sys.exit(1)
|
|
592
914
|
else:
|
|
593
|
-
|
|
915
|
+
logger.info(marc_files)
|
|
594
916
|
|
|
595
917
|
if not args.import_profile_name:
|
|
596
918
|
import_profiles = folio_client.folio_get(
|
|
@@ -622,12 +944,34 @@ async def main() -> None:
|
|
|
622
944
|
marc_record_preprocessor=args.preprocessor,
|
|
623
945
|
consolidate=bool(args.consolidate),
|
|
624
946
|
no_progress=bool(args.no_progress),
|
|
947
|
+
let_summary_fail=bool(args.let_summary_fail),
|
|
948
|
+
split_files=bool(args.split_files),
|
|
949
|
+
split_size=args.split_size,
|
|
950
|
+
split_offset=args.split_offset,
|
|
625
951
|
).do_work()
|
|
626
952
|
except Exception as e:
|
|
627
|
-
|
|
953
|
+
logger.error("Error importing files: " + str(e))
|
|
628
954
|
raise
|
|
629
955
|
|
|
630
956
|
|
|
957
|
+
class ExcludeLevelFilter(logging.Filter):
|
|
958
|
+
def __init__(self, level):
|
|
959
|
+
super().__init__()
|
|
960
|
+
self.level = level
|
|
961
|
+
|
|
962
|
+
def filter(self, record):
|
|
963
|
+
return record.levelno != self.level
|
|
964
|
+
|
|
965
|
+
|
|
966
|
+
class IncludeLevelFilter(logging.Filter):
|
|
967
|
+
def __init__(self, level):
|
|
968
|
+
super().__init__()
|
|
969
|
+
self.level = level
|
|
970
|
+
|
|
971
|
+
def filter(self, record):
|
|
972
|
+
return record.levelno == self.level
|
|
973
|
+
|
|
974
|
+
|
|
631
975
|
def sync_main() -> None:
|
|
632
976
|
"""
|
|
633
977
|
Synchronous main function to run the MARC import job.
|