folio-data-import 0.2.8rc8__py3-none-any.whl → 0.2.8rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of folio-data-import might be problematic. Click here for more details.

@@ -1,7 +1,6 @@
1
1
  import argparse
2
2
  import asyncio
3
3
  import datetime
4
- from email import message
5
4
  import glob
6
5
  import importlib
7
6
  import io
@@ -104,6 +103,8 @@ class MARCImportJob:
104
103
  self.marc_record_preprocessor = marc_record_preprocessor
105
104
  self.pbar_sent: tqdm
106
105
  self.pbar_imported: tqdm
106
+ self._max_summary_retries: int = 2
107
+ self._summary_retries: int = 0
107
108
 
108
109
  async def do_work(self) -> None:
109
110
  """
@@ -183,11 +184,15 @@ class MARCImportJob:
183
184
  if self.current_retry_timeout
184
185
  else RETRY_TIMEOUT_START
185
186
  )
186
- job_status = self.folio_client.folio_get(
187
- "/metadata-provider/jobExecutions?statusNot=DISCARDED&uiStatusAny"
188
- "=PREPARING_FOR_PREVIEW&uiStatusAny=READY_FOR_PREVIEW&uiStatusAny=RUNNING&limit=50"
189
- )
190
- self.current_retry_timeout = None
187
+ with httpx.Client(
188
+ timeout=self.current_retry_timeout,
189
+ verify=self.folio_client.ssl_verify,
190
+ ) as temp_client:
191
+ job_status = self.folio_client.folio_get(
192
+ "/metadata-provider/jobExecutions?statusNot=DISCARDED&uiStatusAny"
193
+ "=PREPARING_FOR_PREVIEW&uiStatusAny=READY_FOR_PREVIEW&uiStatusAny=RUNNING&limit=50"
194
+ )
195
+ self.current_retry_timeout = None
191
196
  except (httpx.ConnectTimeout, httpx.ReadTimeout, httpx.HTTPStatusError) as e:
192
197
  if not hasattr(e, "response") or e.response.status_code in [502, 504]:
193
198
  error_text = e.response.text if hasattr(e, "response") else str(e)
@@ -201,13 +206,17 @@ class MARCImportJob:
201
206
  return await self.get_job_status()
202
207
  else:
203
208
  raise e
209
+ except Exception as e:
210
+ logger.error(f"Error fetching job status. {e}")
211
+
204
212
  try:
205
213
  status = [
206
214
  job for job in job_status["jobExecutions"] if job["id"] == self.job_id
207
215
  ][0]
208
216
  self.pbar_imported.update(status["progress"]["current"] - self.last_current)
209
217
  self.last_current = status["progress"]["current"]
210
- except IndexError:
218
+ except (IndexError, ValueError, KeyError):
219
+ logger.debug(f"No active job found with ID {self.job_id}. Checking for finished job.")
211
220
  try:
212
221
  job_status = self.folio_client.folio_get(
213
222
  "/metadata-provider/jobExecutions?limit=100&sortBy=completed_date%2Cdesc&statusAny"
@@ -245,21 +254,26 @@ class MARCImportJob:
245
254
  Raises:
246
255
  HTTPError: If there is an error creating the job.
247
256
  """
248
- create_job = self.http_client.post(
249
- self.folio_client.okapi_url + "/change-manager/jobExecutions",
250
- headers=self.folio_client.okapi_headers,
251
- json={"sourceType": "ONLINE", "userId": self.folio_client.current_user},
252
- )
253
257
  try:
254
- create_job.raise_for_status()
255
- except httpx.HTTPError as e:
256
- logger.error(
257
- "Error creating job: "
258
- + str(e)
259
- + "\n"
260
- + getattr(getattr(e, "response", ""), "text", "")
258
+ create_job = self.http_client.post(
259
+ self.folio_client.okapi_url + "/change-manager/jobExecutions",
260
+ headers=self.folio_client.okapi_headers,
261
+ json={"sourceType": "ONLINE", "userId": self.folio_client.current_user},
261
262
  )
262
- raise e
263
+ create_job.raise_for_status()
264
+ except (httpx.ConnectTimeout, httpx.ReadTimeout, httpx.HTTPStatusError) as e:
265
+ if not hasattr(e, "response") or e.response.status_code in [502, 504]:
266
+ logger.warning(f"SERVER ERROR creating job: {e}. Retrying.")
267
+ sleep(0.25)
268
+ return await self.create_folio_import_job()
269
+ else:
270
+ logger.error(
271
+ "Error creating job: "
272
+ + str(e)
273
+ + "\n"
274
+ + getattr(getattr(e, "response", ""), "text", "")
275
+ )
276
+ raise e
263
277
  self.job_id = create_job.json()["parentJobExecutionId"]
264
278
  logger.info("Created job: " + self.job_id)
265
279
 
@@ -432,7 +446,7 @@ class MARCImportJob:
432
446
  ),
433
447
  )
434
448
  import_complete_path = file_path.parent.joinpath("import_complete")
435
- if import_complete_path.exists():
449
+ if not import_complete_path.exists():
436
450
  logger.debug(f"Creating import_complete directory: {import_complete_path.absolute()}")
437
451
  import_complete_path.mkdir(exist_ok=True)
438
452
  logger.debug(f"Moving {file_path} to {import_complete_path.absolute()}")
@@ -455,31 +469,42 @@ class MARCImportJob:
455
469
  pymarc.Record: The preprocessed MARC record.
456
470
  """
457
471
  if isinstance(func_or_path, str):
458
- try:
459
- path_parts = func_or_path.rsplit(".")
460
- module_path, func_name = ".".join(path_parts[:-1]), path_parts[-1]
461
- module = importlib.import_module(module_path)
462
- func = getattr(module, func_name)
463
- except (ImportError, AttributeError) as e:
464
- logger.error(
465
- f"Error importing preprocessing function {func_or_path}: {e}. Skipping preprocessing."
472
+ func_paths = func_or_path.split(",")
473
+ for func_path in func_paths:
474
+ record = await MARCImportJob._apply_single_marc_record_preprocessing_by_path(
475
+ record, func_path
466
476
  )
467
- return record
468
477
  elif callable(func_or_path):
469
- func = func_or_path
478
+ record = func_or_path(record)
470
479
  else:
471
480
  logger.warning(
472
481
  f"Invalid preprocessing function: {func_or_path}. Skipping preprocessing."
473
482
  )
474
- return record
483
+ return record
484
+
485
+ async def _apply_single_marc_record_preprocessing_by_path(
486
+ record: pymarc.Record, func_path: str
487
+ ) -> pymarc.Record:
488
+ """
489
+ Apply a single preprocessing function to the MARC record.
490
+
491
+ Args:
492
+ record (pymarc.Record): The MARC record to preprocess.
493
+ func_path (str): The path to the preprocessing function.
475
494
 
495
+ Returns:
496
+ pymarc.Record: The preprocessed MARC record.
497
+ """
476
498
  try:
477
- return func(record)
499
+ module_path, func_name = func_path.rsplit(".", 1)
500
+ module = importlib.import_module(module_path)
501
+ func = getattr(module, func_name)
502
+ record = func(record)
478
503
  except Exception as e:
479
- logger.error(
480
- f"Error applying preprocessing function: {e}. Skipping preprocessing."
504
+ logger.warning(
505
+ f"Error applying preprocessing function {func_path}: {e}. Skipping."
481
506
  )
482
- return record
507
+ return record
483
508
 
484
509
  async def create_batch_payload(self, counter, total_records, is_last) -> dict:
485
510
  """
@@ -609,7 +634,7 @@ class MARCImportJob:
609
634
  self.current_retry_timeout = None
610
635
  except (httpx.ConnectTimeout, httpx.ReadTimeout, httpx.HTTPStatusError) as e:
611
636
  error_text = e.response.text if hasattr(e, "response") else str(e)
612
- if not hasattr(e, "response") or (
637
+ if (self._max_summary_retries > self._summary_retries and not hasattr(e, "response")) or (
613
638
  e.response.status_code in [502, 504] and not self.let_summary_fail
614
639
  ):
615
640
  logger.warning(f"SERVER ERROR fetching job summary: {e}. Retrying.")
@@ -619,9 +644,10 @@ class MARCImportJob:
619
644
  verify=self.folio_client.ssl_verify,
620
645
  ) as temp_client:
621
646
  self.folio_client.httpx_client = temp_client
647
+ self._summary_retries += 1
622
648
  return await self.get_job_summary()
623
- elif hasattr(e, "response") and (
624
- e.response.status_code in [502, 504] and self.let_summary_fail
649
+ elif (self._summary_retries >= self._max_summary_retries) or (hasattr(e, "response") and (
650
+ e.response.status_code in [502, 504] and self.let_summary_fail)
625
651
  ):
626
652
  logger.warning(
627
653
  f"SERVER ERROR fetching job summary: {error_text}. Skipping final summary check."
@@ -667,6 +693,7 @@ def set_up_cli_logging():
667
693
  "marc_import_data_issues_{}.log".format(dt.now().strftime("%Y%m%d%H%M%S"))
668
694
  )
669
695
  data_issues_handler.setLevel(26)
696
+ data_issues_handler.addFilter(IncludeLevelFilter(DATA_ISSUE_LVL_NUM))
670
697
  data_issues_formatter = logging.Formatter("%(message)s")
671
698
  data_issues_handler.setFormatter(data_issues_formatter)
672
699
  logger.addHandler(data_issues_handler)
@@ -63,6 +63,29 @@ def strip_999_ff_fields(record: pymarc.Record) -> pymarc.Record:
63
63
  record.remove_field(field)
64
64
  return record
65
65
 
66
+ def clean_999_fields(record: pymarc.Record) -> pymarc.Record:
67
+ """
68
+ The presence of 999 fields, with or without ff indicators, can cause
69
+ issues with data import mapping in FOLIO. This function calls strip_999_ff_fields
70
+ to remove 999 fields with ff indicators and then copies the remaining 999 fields
71
+ to 945 fields.
72
+
73
+ Args:
74
+ record (pymarc.Record): The MARC record to preprocess.
75
+
76
+ Returns:
77
+ pymarc.Record: The preprocessed MARC record.
78
+ """
79
+ record = strip_999_ff_fields(record)
80
+ for field in record.get_fields("999"):
81
+ _945 = pymarc.Field(
82
+ tag="945",
83
+ indicators=field.indicators,
84
+ subfields=field.subfields,
85
+ )
86
+ record.add_ordered_field(_945)
87
+ record.remove_field(field)
88
+ return record
66
89
 
67
90
  def sudoc_supercede_prep(record: pymarc.Record) -> pymarc.Record:
68
91
  """
@@ -219,7 +242,7 @@ def clean_empty_fields(record: pymarc.Record) -> pymarc.Record:
219
242
  26,
220
243
  "DATA ISSUE\t%s\t%s\t%s",
221
244
  record["001"].value(),
222
- f"{field.tag} is empty",
245
+ f"{field.tag} is empty, removing field",
223
246
  field,
224
247
  )
225
248
  record.remove_field(field)
@@ -228,7 +251,7 @@ def clean_empty_fields(record: pymarc.Record) -> pymarc.Record:
228
251
  26,
229
252
  "DATA ISSUE\t%s\t%s\t%s",
230
253
  record["001"].value(),
231
- f"{field.tag}${field.subfields[0].code} is empty, removing field",
254
+ f"{field.tag}${field.subfields[0].code} is empty, no other subfields present, removing field",
232
255
  field,
233
256
  )
234
257
  record.remove_field(field)
@@ -238,12 +261,15 @@ def clean_empty_fields(record: pymarc.Record) -> pymarc.Record:
238
261
  26,
239
262
  "DATA ISSUE\t%s\t%s\t%s",
240
263
  record["001"].value(),
241
- f"{field.tag}$a is empty, removing field",
264
+ f"{field.tag}$a is empty, removing subfield",
242
265
  field,
243
266
  )
244
267
  field.delete_subfield("a")
245
268
  for idx, subfield in enumerate(list(field.subfields), start=1):
246
- if subfield.code in MAPPED_FIELDS.get(field.tag, []) and not subfield.value:
269
+ if (
270
+ subfield.code in MAPPED_FIELDS.get(field.tag, [])
271
+ and not subfield.value
272
+ ):
247
273
  logger.log(
248
274
  26,
249
275
  "DATA ISSUE\t%s\t%s\t%s",
@@ -264,6 +290,40 @@ def clean_empty_fields(record: pymarc.Record) -> pymarc.Record:
264
290
  return record
265
291
 
266
292
 
293
+ def fix_leader(record: pymarc.Record) -> pymarc.Record:
294
+ """
295
+ Fixes the leader of the record by setting the record status to 'c' (modified
296
+ record) and the type of record to 'a' (language material).
297
+
298
+ Args:
299
+ record (pymarc.Record): The MARC record to preprocess.
300
+
301
+ Returns:
302
+ pymarc.Record: The preprocessed MARC record.
303
+ """
304
+ VALID_STATUSES = ["a", "c", "d", "n", "p"]
305
+ VALID_TYPES = ["a", "c", "d", "e", "f", "g", "i", "j", "k", "m", "o", "p", "r", "t"]
306
+ if record.leader[5] not in VALID_STATUSES:
307
+ logger.log(
308
+ 26,
309
+ "DATA ISSUE\t%s\t%s\t%s",
310
+ record["001"].value(),
311
+ f"Invalid record status: {record.leader[5]}, setting to 'c'",
312
+ record,
313
+ )
314
+ record.leader = pymarc.Leader(record.leader[:5] + "c" + record.leader[6:])
315
+ if record.leader[6] not in VALID_TYPES:
316
+ logger.log(
317
+ 26,
318
+ "DATA ISSUE\t%s\t%s\t%s",
319
+ record["001"].value(),
320
+ f"Invalid record type: {record.leader[6]}, setting to 'a'",
321
+ record,
322
+ )
323
+ record.leader = pymarc.Leader(record.leader[:6] + "a" + record.leader[7:])
324
+ return record
325
+
326
+
267
327
  def ordinal(n):
268
328
  s = ("th", "st", "nd", "rd") + ("th",) * 10
269
329
  v = n % 100
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: folio_data_import
3
- Version: 0.2.8rc8
3
+ Version: 0.2.8rc9
4
4
  Summary: A python module to interact with the data importing capabilities of the open-source FOLIO ILS
5
5
  License: MIT
6
6
  Author: Brooks Travis
@@ -0,0 +1,11 @@
1
+ folio_data_import/MARCDataImport.py,sha256=ImbuGw1ADt4nCmq0lLaqugP2wv5kBrgMGAr0jbKSgFc,33135
2
+ folio_data_import/UserImport.py,sha256=Y9ZjYoUP_vNJVftx_xUcbBqvC5CwWeuzlmCcSVQfzgo,40976
3
+ folio_data_import/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ folio_data_import/__main__.py,sha256=kav_uUsnrIjGjVxQkk3exLKrc1mah9t2x3G6bGS-5I0,3710
5
+ folio_data_import/marc_preprocessors/__init__.py,sha256=urExfNTQoZsDCtDPcUY9EEC5OFcUihxhYEQkQFVzbMY,30
6
+ folio_data_import/marc_preprocessors/_preprocessors.py,sha256=4i1_lEnptzZDx3DojX9sfvJ_hmehwFJUC3aZsUADcwA,10851
7
+ folio_data_import-0.2.8rc9.dist-info/LICENSE,sha256=qJX7wxMC7ky9Kq4v3zij8MjGEiC5wsB7pYeOhLj5TDk,1083
8
+ folio_data_import-0.2.8rc9.dist-info/METADATA,sha256=Q80K34yk3xcZPfCf50FBtAYY7Hrxb3ukbAAGAv4uCEs,6112
9
+ folio_data_import-0.2.8rc9.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
10
+ folio_data_import-0.2.8rc9.dist-info/entry_points.txt,sha256=498SxWVXeEMRNw3PUf-eoReZvKewmYwPBtZhIUPr_Jg,192
11
+ folio_data_import-0.2.8rc9.dist-info/RECORD,,
@@ -1,11 +0,0 @@
1
- folio_data_import/MARCDataImport.py,sha256=wLh8raMOIXCQiNZd_MLyUPwGlxZU2G-qOrnySKR9XU4,31723
2
- folio_data_import/UserImport.py,sha256=Y9ZjYoUP_vNJVftx_xUcbBqvC5CwWeuzlmCcSVQfzgo,40976
3
- folio_data_import/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- folio_data_import/__main__.py,sha256=kav_uUsnrIjGjVxQkk3exLKrc1mah9t2x3G6bGS-5I0,3710
5
- folio_data_import/marc_preprocessors/__init__.py,sha256=urExfNTQoZsDCtDPcUY9EEC5OFcUihxhYEQkQFVzbMY,30
6
- folio_data_import/marc_preprocessors/_preprocessors.py,sha256=LJdmwW_0oPbcaWb85F0PAcXJWbbBp9HHrFyZQZER5gs,8738
7
- folio_data_import-0.2.8rc8.dist-info/LICENSE,sha256=qJX7wxMC7ky9Kq4v3zij8MjGEiC5wsB7pYeOhLj5TDk,1083
8
- folio_data_import-0.2.8rc8.dist-info/METADATA,sha256=Nl8YmVukI1et0xX3ObggSQTbcgjNuYbY3E8GrvG_X8M,6112
9
- folio_data_import-0.2.8rc8.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
10
- folio_data_import-0.2.8rc8.dist-info/entry_points.txt,sha256=498SxWVXeEMRNw3PUf-eoReZvKewmYwPBtZhIUPr_Jg,192
11
- folio_data_import-0.2.8rc8.dist-info/RECORD,,