folio-data-import 0.2.8rc12__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of folio-data-import might be problematic. Click here for more details.

@@ -8,7 +8,7 @@ import time
8
8
  import uuid
9
9
  from datetime import datetime as dt
10
10
  from pathlib import Path
11
- from typing import Tuple
11
+ from typing import Tuple, List
12
12
 
13
13
  import aiofiles
14
14
  import folioclient
@@ -51,6 +51,7 @@ class UserImporter: # noqa: R0902
51
51
  user_match_key: str = "externalSystemId",
52
52
  only_update_present_fields: bool = False,
53
53
  default_preferred_contact_type: str = "002",
54
+ fields_to_protect: List[str] =[],
54
55
  ) -> None:
55
56
  self.limit_simultaneous_requests = limit_simultaneous_requests
56
57
  self.batch_size = batch_size
@@ -77,6 +78,7 @@ class UserImporter: # noqa: R0902
77
78
  self.match_key = user_match_key
78
79
  self.lock: asyncio.Lock = asyncio.Lock()
79
80
  self.logs: dict = {"created": 0, "updated": 0, "failed": 0}
81
+ self.fields_to_protect = set(fields_to_protect)
80
82
 
81
83
  @staticmethod
82
84
  def build_ref_data_id_map(
@@ -137,7 +139,7 @@ class UserImporter: # noqa: R0902
137
139
  match_key = "id" if ("id" in user_obj) else self.match_key
138
140
  try:
139
141
  existing_user = await self.http_client.get(
140
- self.folio_client.okapi_url + "/users",
142
+ self.folio_client.gateway_url + "/users",
141
143
  headers=self.folio_client.okapi_headers,
142
144
  params={"query": f"{match_key}=={user_obj[match_key]}"},
143
145
  )
@@ -161,7 +163,7 @@ class UserImporter: # noqa: R0902
161
163
  """
162
164
  try:
163
165
  existing_rp = await self.http_client.get(
164
- self.folio_client.okapi_url
166
+ self.folio_client.gateway_url
165
167
  + "/request-preference-storage/request-preference",
166
168
  headers=self.folio_client.okapi_headers,
167
169
  params={
@@ -188,7 +190,7 @@ class UserImporter: # noqa: R0902
188
190
  """
189
191
  try:
190
192
  existing_pu = await self.http_client.get(
191
- self.folio_client.okapi_url + "/perms/users",
193
+ self.folio_client.gateway_url + "/perms/users",
192
194
  headers=self.folio_client.okapi_headers,
193
195
  params={
194
196
  "query": f"userId=={existing_user.get('id', user_obj.get('id', ''))}"
@@ -334,6 +336,7 @@ class UserImporter: # noqa: R0902
334
336
  None
335
337
 
336
338
  """
339
+
337
340
  await self.set_preferred_contact_type(user_obj, existing_user)
338
341
  preferred_contact_type = {"preferredContactTypeId": existing_user.get("personal", {}).pop("preferredContactTypeId")}
339
342
  if self.only_update_present_fields:
@@ -369,7 +372,7 @@ class UserImporter: # noqa: R0902
369
372
  else:
370
373
  existing_user[key] = value
371
374
  create_update_user = await self.http_client.put(
372
- self.folio_client.okapi_url + f"/users/{existing_user['id']}",
375
+ self.folio_client.gateway_url + f"/users/{existing_user['id']}",
373
376
  headers=self.folio_client.okapi_headers,
374
377
  json=existing_user,
375
378
  )
@@ -389,7 +392,7 @@ class UserImporter: # noqa: R0902
389
392
  HTTPError: If the HTTP request to create the user fails.
390
393
  """
391
394
  response = await self.http_client.post(
392
- self.folio_client.okapi_url + "/users",
395
+ self.folio_client.gateway_url + "/users",
393
396
  headers=self.folio_client.okapi_headers,
394
397
  json=user_obj,
395
398
  )
@@ -502,7 +505,9 @@ class UserImporter: # noqa: R0902
502
505
 
503
506
  async def get_protected_fields(self, existing_user) -> dict:
504
507
  """
505
- Retrieves the protected fields from the existing user object.
508
+ Retrieves the protected fields from the existing user object,
509
+ combining both the customFields.protectedFields list *and*
510
+ any fields_to_protect passed on the CLI.
506
511
 
507
512
  Args:
508
513
  existing_user (dict): The existing user object.
@@ -512,18 +517,19 @@ class UserImporter: # noqa: R0902
512
517
  """
513
518
  protected_fields = {}
514
519
  protected_fields_list = existing_user.get("customFields", {}).get("protectedFields", "").split(",")
515
- for field in protected_fields_list:
516
- if len(field.split(".")) > 1:
517
- field, subfield = field.split(".")
518
- if field not in protected_fields:
519
- protected_fields[field] = {}
520
- protected_fields[field][subfield] = existing_user.get(field, {}).pop(subfield, None)
521
- if protected_fields[field][subfield] is None:
522
- protected_fields[field].pop(subfield)
520
+ cli_fields = list(self.fields_to_protect)
521
+ # combine and dedupe:
522
+ all_fields = list(dict.fromkeys(protected_fields_list + cli_fields))
523
+ for field in all_fields:
524
+ if "." in field:
525
+ fld, subfld = field.split(".", 1)
526
+ val = existing_user.get(fld, {}).pop(subfld, None)
527
+ if val is not None:
528
+ protected_fields.setdefault(fld, {})[subfld] = val
523
529
  else:
524
- protected_fields[field] = existing_user.pop(field, None)
525
- if protected_fields[field] is None:
526
- protected_fields.pop(field)
530
+ val = existing_user.pop(field, None)
531
+ if val is not None:
532
+ protected_fields[field] = val
527
533
  return protected_fields
528
534
 
529
535
  async def process_existing_user(self, user_obj) -> Tuple[dict, dict, dict, dict]:
@@ -589,7 +595,7 @@ class UserImporter: # noqa: R0902
589
595
  rp_obj["userId"] = new_user_obj["id"]
590
596
  # print(rp_obj)
591
597
  response = await self.http_client.post(
592
- self.folio_client.okapi_url
598
+ self.folio_client.gateway_url
593
599
  + "/request-preference-storage/request-preference",
594
600
  headers=self.folio_client.okapi_headers,
595
601
  json=rp_obj,
@@ -613,7 +619,7 @@ class UserImporter: # noqa: R0902
613
619
  existing_rp.update(rp_obj)
614
620
  # print(existing_rp)
615
621
  response = await self.http_client.put(
616
- self.folio_client.okapi_url
622
+ self.folio_client.gateway_url
617
623
  + f"/request-preference-storage/request-preference/{existing_rp['id']}",
618
624
  headers=self.folio_client.okapi_headers,
619
625
  json=existing_rp,
@@ -635,7 +641,7 @@ class UserImporter: # noqa: R0902
635
641
  """
636
642
  perms_user_obj = {"userId": new_user_obj["id"], "permissions": []}
637
643
  response = await self.http_client.post(
638
- self.folio_client.okapi_url + "/perms/users",
644
+ self.folio_client.gateway_url + "/perms/users",
639
645
  headers=self.folio_client.okapi_headers,
640
646
  json=perms_user_obj,
641
647
  )
@@ -788,7 +794,7 @@ class UserImporter: # noqa: R0902
788
794
  """
789
795
  try:
790
796
  existing_spu = await self.http_client.get(
791
- self.folio_client.okapi_url + "/service-points-users",
797
+ self.folio_client.gateway_url + "/service-points-users",
792
798
  headers=self.folio_client.okapi_headers,
793
799
  params={"query": f"userId=={existing_user['id']}"},
794
800
  )
@@ -812,7 +818,7 @@ class UserImporter: # noqa: R0902
812
818
  """
813
819
  spu_obj["userId"] = existing_user["id"]
814
820
  response = await self.http_client.post(
815
- self.folio_client.okapi_url + "/service-points-users",
821
+ self.folio_client.gateway_url + "/service-points-users",
816
822
  headers=self.folio_client.okapi_headers,
817
823
  json=spu_obj,
818
824
  )
@@ -831,7 +837,7 @@ class UserImporter: # noqa: R0902
831
837
  """
832
838
  existing_spu.update(spu_obj)
833
839
  response = await self.http_client.put(
834
- self.folio_client.okapi_url + f"/service-points-users/{existing_spu['id']}",
840
+ self.folio_client.gateway_url + f"/service-points-users/{existing_spu['id']}",
835
841
  headers=self.folio_client.okapi_headers,
836
842
  json=existing_spu,
837
843
  )
@@ -896,6 +902,7 @@ async def main() -> None:
896
902
  --update_only_present_fields (bool): Only update fields that are present in the new user object.
897
903
  --default_preferred_contact_type (str): The default preferred contact type to use if the provided \
898
904
  value is not valid or not present. Default "002".
905
+ --fields_to_protect (str): Comma-separated list of top-level or nested (dot-notation) fields to protect.
899
906
 
900
907
  Raises:
901
908
  Exception: If an unknown error occurs during the import process.
@@ -953,7 +960,20 @@ async def main() -> None:
953
960
  choices=list(PREFERRED_CONTACT_TYPES_MAP.keys()) + list(PREFERRED_CONTACT_TYPES_MAP.values()),
954
961
  default="002",
955
962
  )
963
+ parser.add_argument(
964
+ "--fields-to-protect", # new flag name
965
+ dest="fields_to_protect", # sets args.fields_to_protect
966
+ help=(
967
+ "Comma-separated list of top-level user fields to protect "
968
+ "(e.g. type,expirationDate)"
969
+ ),
970
+ default="",
971
+ )
956
972
  args = parser.parse_args()
973
+ protect_fields = [
974
+ f.strip() for f in args.fields_to_protect.split(",")
975
+ if f.strip()
976
+ ]
957
977
 
958
978
  library_name = args.library_name
959
979
 
@@ -1005,6 +1025,7 @@ async def main() -> None:
1005
1025
  args.user_match_key,
1006
1026
  args.update_only_present_fields,
1007
1027
  args.default_preferred_contact_type,
1028
+ fields_to_protect=protect_fields,
1008
1029
  )
1009
1030
  await importer.do_import()
1010
1031
  except Exception as ee:
@@ -0,0 +1,29 @@
1
+ """Custom exceptions for the Folio Data Import module."""
2
+
3
+ class FolioDataImportError(Exception):
4
+ """Base class for all exceptions in the Folio Data Import module."""
5
+ pass
6
+
7
+ class FolioDataImportBatchError(FolioDataImportError):
8
+ """Exception raised for errors in the Folio Data Import batch process.
9
+
10
+ Attributes:
11
+ batch_id -- ID of the batch that caused the error
12
+ message -- explanation of the error
13
+ """
14
+ def __init__(self, batch_id, message, exception=None):
15
+ self.batch_id = batch_id
16
+ self.message = message
17
+ super().__init__(f"Unhandled error posting batch {batch_id}: {message}")
18
+
19
+ class FolioDataImportJobError(FolioDataImportError):
20
+ """Exception raised for errors in the Folio Data Import job process.
21
+
22
+ Attributes:
23
+ job_id -- ID of the job that caused the error
24
+ message -- explanation of the error
25
+ """
26
+ def __init__(self, job_id, message, exception=None):
27
+ self.job_id = job_id
28
+ self.message = message
29
+ super().__init__(f"Unhandled error processing job {job_id}: {message}")
@@ -1,69 +1,171 @@
1
+ import importlib
2
+ import sys
3
+ from typing import Callable, Dict, List, Tuple, Union
1
4
  import pymarc
2
5
  import logging
3
6
 
7
+ from pymarc.record import Record
8
+
4
9
  logger = logging.getLogger("folio_data_import.MARCDataImport")
5
10
 
11
+ class MARCPreprocessor:
12
+ """
13
+ A class to preprocess MARC records for data import into FOLIO.
14
+ """
15
+
16
+ def __init__(self, preprocessors: Union[str,List[Callable]], **kwargs):
17
+ """
18
+ Initialize the MARCPreprocessor with a list of preprocessors.
19
+
20
+ Args:
21
+ preprocessors (Union[str, List[Callable]]): A string of comma-separated function names or a list of callable preprocessor functions to apply.
22
+ """
23
+ self.preprocessor_args: Dict[str, Dict] = kwargs
24
+ self.preprocessors: List[Tuple[Callable, Dict]] = self._get_preprocessor_functions(
25
+ preprocessors
26
+ )
27
+ self.proc_kwargs = kwargs
28
+ self.record = None
29
+
30
+ def _get_preprocessor_args(self, func: Callable) -> Dict:
31
+ """
32
+ Get the arguments for the preprocessor function.
33
+
34
+ Args:
35
+ func (Callable): The preprocessor function.
36
+
37
+ Returns:
38
+ Dict: A dictionary of arguments for the preprocessor function.
39
+ """
40
+ func_path = f"{func.__module__}.{func.__name__}"
41
+ path_args: Dict = self.preprocessor_args.get("default", {})
42
+ path_args.update(self.preprocessor_args.get(func.__name__, {}))
43
+ path_args.update(self.preprocessor_args.get(func_path, {}))
44
+ return path_args
45
+
46
+ def _get_preprocessor_functions(self, func_list: Union[str, List[Callable]]) -> List[Callable]:
47
+ """
48
+ Get the preprocessor functions based on the provided names.
49
+
50
+ Args:
51
+ func_list (Union[str, List[Callable]]): A string of comma-separated function names or a list of callable preprocessor functions.
52
+
53
+ Returns:
54
+ List[callable]: A list of preprocessor functions.
55
+ """
56
+ preprocessors = []
57
+ if isinstance(func_list, str):
58
+ func_list = func_list.split(",")
59
+ else:
60
+ for f in func_list:
61
+ if not callable(f):
62
+ logger.warning(
63
+ f"Preprocessing function {f} is not callable. Skipping."
64
+ )
65
+ else:
66
+ preprocessors.append((f, self._get_preprocessor_args(f)))
67
+ return preprocessors
68
+ for f_path in func_list:
69
+ f_import = f_path.rsplit(".", 1)
70
+ if len(f_import) == 1:
71
+ # If the function is not a full path, assume it's in the current module
72
+ if func := getattr(sys.modules[__name__], f_import[0], None):
73
+ if callable(func):
74
+ preprocessors.append((func, self._get_preprocessor_args(func)))
75
+ else:
76
+ logger.warning(
77
+ f"Preprocessing function {f_path} is not callable. Skipping."
78
+ )
79
+ else:
80
+ logger.warning(
81
+ f"Preprocessing function {f_path} not found in current module. Skipping."
82
+ )
83
+ elif len(f_import) == 2:
84
+ # If the function is a full path, import it
85
+ module_path, func_name = f_import
86
+ try:
87
+ module = importlib.import_module(module_path)
88
+ func = getattr(module, func_name)
89
+ preprocessors.append((func, self._get_preprocessor_args(func)))
90
+ except ImportError as e:
91
+ logger.warning(
92
+ f"Error importing preprocessing function {f_path}: {e}. Skipping."
93
+ )
94
+ return preprocessors
95
+
96
+ def do_work(self, record: Record) -> Record:
97
+ """
98
+ Preprocess the MARC record.
99
+ """
100
+ for proc, kwargs in self.preprocessors:
101
+ record = proc(record, **kwargs)
102
+ return record
6
103
 
7
- def prepend_prefix_001(record: pymarc.Record, prefix: str) -> pymarc.Record:
104
+
105
+
106
+ def prepend_prefix_001(record: Record, prefix: str) -> Record:
8
107
  """
9
108
  Prepend a prefix to the record's 001 field.
10
109
 
11
110
  Args:
12
- record (pymarc.Record): The MARC record to preprocess.
111
+ record (Record): The MARC record to preprocess.
13
112
  prefix (str): The prefix to prepend to the 001 field.
14
113
 
15
114
  Returns:
16
- pymarc.Record: The preprocessed MARC record.
115
+ Record: The preprocessed MARC record.
17
116
  """
18
- record["001"].data = f"({prefix})" + record["001"].data
117
+ if "001" in record:
118
+ record["001"].data = f"({prefix})" + record["001"].data
119
+ else:
120
+ logger.warning("Field '001' not found in record. Skipping prefix prepend.")
19
121
  return record
20
122
 
21
123
 
22
- def prepend_ppn_prefix_001(record: pymarc.Record) -> pymarc.Record:
124
+ def prepend_ppn_prefix_001(record: Record, **kwargs) -> Record:
23
125
  """
24
126
  Prepend the PPN prefix to the record's 001 field. Useful when
25
127
  importing records from the ABES SUDOC catalog
26
128
 
27
129
  Args:
28
- record (pymarc.Record): The MARC record to preprocess.
130
+ record (Record): The MARC record to preprocess.
29
131
 
30
132
  Returns:
31
- pymarc.Record: The preprocessed MARC record.
133
+ Record: The preprocessed MARC record.
32
134
  """
33
135
  return prepend_prefix_001(record, "PPN")
34
136
 
35
137
 
36
- def prepend_abes_prefix_001(record: pymarc.Record) -> pymarc.Record:
138
+ def prepend_abes_prefix_001(record: Record, **kwargs) -> Record:
37
139
  """
38
140
  Prepend the ABES prefix to the record's 001 field. Useful when
39
141
  importing records from the ABES SUDOC catalog
40
142
 
41
143
  Args:
42
- record (pymarc.Record): The MARC record to preprocess.
144
+ record (Record): The MARC record to preprocess.
43
145
 
44
146
  Returns:
45
- pymarc.Record: The preprocessed MARC record.
147
+ Record: The preprocessed MARC record.
46
148
  """
47
149
  return prepend_prefix_001(record, "ABES")
48
150
 
49
151
 
50
- def strip_999_ff_fields(record: pymarc.Record) -> pymarc.Record:
152
+ def strip_999_ff_fields(record: Record, **kwargs) -> Record:
51
153
  """
52
154
  Strip all 999 fields with ff indicators from the record.
53
155
  Useful when importing records exported from another FOLIO system
54
156
 
55
157
  Args:
56
- record (pymarc.Record): The MARC record to preprocess.
158
+ record (Record): The MARC record to preprocess.
57
159
 
58
160
  Returns:
59
- pymarc.Record: The preprocessed MARC record.
161
+ Record: The preprocessed MARC record.
60
162
  """
61
163
  for field in record.get_fields("999"):
62
164
  if field.indicators == pymarc.Indicators(*["f", "f"]):
63
165
  record.remove_field(field)
64
166
  return record
65
167
 
66
- def clean_999_fields(record: pymarc.Record) -> pymarc.Record:
168
+ def clean_999_fields(record: Record, **kwargs) -> Record:
67
169
  """
68
170
  The presence of 999 fields, with or without ff indicators, can cause
69
171
  issues with data import mapping in FOLIO. This function calls strip_999_ff_fields
@@ -71,10 +173,10 @@ def clean_999_fields(record: pymarc.Record) -> pymarc.Record:
71
173
  to 945 fields.
72
174
 
73
175
  Args:
74
- record (pymarc.Record): The MARC record to preprocess.
176
+ record (Record): The MARC record to preprocess.
75
177
 
76
178
  Returns:
77
- pymarc.Record: The preprocessed MARC record.
179
+ Record: The preprocessed MARC record.
78
180
  """
79
181
  record = strip_999_ff_fields(record)
80
182
  for field in record.get_fields("999"):
@@ -87,7 +189,31 @@ def clean_999_fields(record: pymarc.Record) -> pymarc.Record:
87
189
  record.remove_field(field)
88
190
  return record
89
191
 
90
- def sudoc_supercede_prep(record: pymarc.Record) -> pymarc.Record:
192
+ def clean_non_ff_999_fields(record: Record, **kwargs) -> Record:
193
+ """
194
+ When loading migrated MARC records from folio_migration_tools, the presence of other 999 fields
195
+ than those set by the migration process can cause the record to fail to load properly. This preprocessor
196
+ function moves all 999 fields with non-ff indicators to 945 fields with 99 indicators.
197
+ """
198
+ for field in record.get_fields("999"):
199
+ if field.indicators != pymarc.Indicators(*["f", "f"]):
200
+ logger.log(
201
+ 26,
202
+ "DATA ISSUE\t%s\t%s\t%s",
203
+ record["001"].value(),
204
+ "Record contains a 999 field with non-ff indicators: Moving field to a 945 with indicators \"99\"",
205
+ field,
206
+ )
207
+ _945 = pymarc.Field(
208
+ tag="945",
209
+ indicators=pymarc.Indicators("9","9"),
210
+ subfields=field.subfields,
211
+ )
212
+ record.add_ordered_field(_945)
213
+ record.remove_field(field)
214
+ return record
215
+
216
+ def sudoc_supercede_prep(record: Record, **kwargs) -> Record:
91
217
  """
92
218
  Preprocesses a record from the ABES SUDOC catalog to copy 035 fields
93
219
  with a $9 subfield value of 'sudoc' to 935 fields with a $a subfield
@@ -96,10 +222,10 @@ def sudoc_supercede_prep(record: pymarc.Record) -> pymarc.Record:
96
222
  in FOLIO. This also applyes the prepend_ppn_prefix_001 function to the record.
97
223
 
98
224
  Args:
99
- record (pymarc.Record): The MARC record to preprocess.
225
+ record (Record): The MARC record to preprocess.
100
226
 
101
227
  Returns:
102
- pymarc.Record: The preprocessed MARC record.
228
+ Record: The preprocessed MARC record.
103
229
  """
104
230
  record = prepend_abes_prefix_001(record)
105
231
  for field in record.get_fields("035"):
@@ -113,7 +239,7 @@ def sudoc_supercede_prep(record: pymarc.Record) -> pymarc.Record:
113
239
  return record
114
240
 
115
241
 
116
- def clean_empty_fields(record: pymarc.Record) -> pymarc.Record:
242
+ def clean_empty_fields(record: Record, **kwargs) -> Record:
117
243
  """
118
244
  Remove empty fields and subfields from the record. These can cause
119
245
  data import mapping issues in FOLIO. Removals are logged at custom
@@ -121,10 +247,10 @@ def clean_empty_fields(record: pymarc.Record) -> pymarc.Record:
121
247
  data issues report.
122
248
 
123
249
  Args:
124
- record (pymarc.Record): The MARC record to preprocess.
250
+ record (Record): The MARC record to preprocess.
125
251
 
126
252
  Returns:
127
- pymarc.Record: The preprocessed MARC record.
253
+ Record: The preprocessed MARC record.
128
254
  """
129
255
  MAPPED_FIELDS = {
130
256
  "010": ["a", "z"],
@@ -233,73 +359,72 @@ def clean_empty_fields(record: pymarc.Record) -> pymarc.Record:
233
359
  "856": ["u", "y", "z"],
234
360
  }
235
361
 
236
- for field in list(record.get_fields()):
362
+ for field in record.get_fields(*MAPPED_FIELDS.keys()):
237
363
  len_subs = len(field.subfields)
238
- subfield_value = bool(field.subfields[0].value) if len_subs > 0 else False
239
- if not int(field.tag) >= 900 and field.tag in MAPPED_FIELDS:
240
- if int(field.tag) > 9 and len_subs == 0:
364
+ subfield_value = bool(field.subfields[0].value) if len_subs else False
365
+ if int(field.tag) > 9 and len_subs == 0:
366
+ logger.log(
367
+ 26,
368
+ "DATA ISSUE\t%s\t%s\t%s",
369
+ record["001"].value(),
370
+ f"{field.tag} is empty, removing field",
371
+ field,
372
+ )
373
+ record.remove_field(field)
374
+ elif len_subs == 1 and not subfield_value:
375
+ logger.log(
376
+ 26,
377
+ "DATA ISSUE\t%s\t%s\t%s",
378
+ record["001"].value(),
379
+ f"{field.tag}${field.subfields[0].code} is empty, no other subfields present, removing field",
380
+ field,
381
+ )
382
+ record.remove_field(field)
383
+ else:
384
+ if len_subs > 1 and "a" in field and not field["a"].strip():
241
385
  logger.log(
242
386
  26,
243
387
  "DATA ISSUE\t%s\t%s\t%s",
244
388
  record["001"].value(),
245
- f"{field.tag} is empty, removing field",
389
+ f"{field.tag}$a is empty, removing subfield",
246
390
  field,
247
391
  )
248
- record.remove_field(field)
249
- elif len_subs == 1 and not subfield_value:
392
+ field.delete_subfield("a")
393
+ for idx, subfield in enumerate(list(field.subfields), start=1):
394
+ if (
395
+ subfield.code in MAPPED_FIELDS.get(field.tag, [])
396
+ and not subfield.value
397
+ ):
398
+ logger.log(
399
+ 26,
400
+ "DATA ISSUE\t%s\t%s\t%s",
401
+ record["001"].value(),
402
+ f"{field.tag}${subfield.code} ({ordinal(idx)} subfield) is empty, but other subfields have values, removing subfield",
403
+ field,
404
+ )
405
+ field.delete_subfield(subfield.code)
406
+ if len(field.subfields) == 0:
250
407
  logger.log(
251
408
  26,
252
409
  "DATA ISSUE\t%s\t%s\t%s",
253
410
  record["001"].value(),
254
- f"{field.tag}${field.subfields[0].code} is empty, no other subfields present, removing field",
411
+ f"{field.tag} has no non-empty subfields after cleaning, removing field",
255
412
  field,
256
413
  )
257
414
  record.remove_field(field)
258
- else:
259
- if len_subs > 1 and "a" in field and not field["a"].strip():
260
- logger.log(
261
- 26,
262
- "DATA ISSUE\t%s\t%s\t%s",
263
- record["001"].value(),
264
- f"{field.tag}$a is empty, removing subfield",
265
- field,
266
- )
267
- field.delete_subfield("a")
268
- for idx, subfield in enumerate(list(field.subfields), start=1):
269
- if (
270
- subfield.code in MAPPED_FIELDS.get(field.tag, [])
271
- and not subfield.value
272
- ):
273
- logger.log(
274
- 26,
275
- "DATA ISSUE\t%s\t%s\t%s",
276
- record["001"].value(),
277
- f"{field.tag}${subfield.code} ({ordinal(idx)} subfield) is empty, but other subfields have values, removing subfield",
278
- field,
279
- )
280
- field.delete_subfield(subfield.code)
281
- if len(field.subfields) == 0:
282
- logger.log(
283
- 26,
284
- "DATA ISSUE\t%s\t%s\t%s",
285
- record["001"].value(),
286
- f"{field.tag} has no non-empty subfields after cleaning, removing field",
287
- field,
288
- )
289
- record.remove_field(field)
290
415
  return record
291
416
 
292
417
 
293
- def fix_leader(record: pymarc.Record) -> pymarc.Record:
418
+ def fix_leader(record: Record, **kwargs) -> Record:
294
419
  """
295
420
  Fixes the leader of the record by setting the record status to 'c' (modified
296
421
  record) and the type of record to 'a' (language material).
297
422
 
298
423
  Args:
299
- record (pymarc.Record): The MARC record to preprocess.
424
+ record (Record): The MARC record to preprocess.
300
425
 
301
426
  Returns:
302
- pymarc.Record: The preprocessed MARC record.
427
+ Record: The preprocessed MARC record.
303
428
  """
304
429
  VALID_STATUSES = ["a", "c", "d", "n", "p"]
305
430
  VALID_TYPES = ["a", "c", "d", "e", "f", "g", "i", "j", "k", "m", "o", "p", "r", "t"]
@@ -309,7 +434,7 @@ def fix_leader(record: pymarc.Record) -> pymarc.Record:
309
434
  "DATA ISSUE\t%s\t%s\t%s",
310
435
  record["001"].value(),
311
436
  f"Invalid record status: {record.leader[5]}, setting to 'c'",
312
- record,
437
+ record.leader,
313
438
  )
314
439
  record.leader = pymarc.Leader(record.leader[:5] + "c" + record.leader[6:])
315
440
  if record.leader[6] not in VALID_TYPES:
@@ -318,11 +443,40 @@ def fix_leader(record: pymarc.Record) -> pymarc.Record:
318
443
  "DATA ISSUE\t%s\t%s\t%s",
319
444
  record["001"].value(),
320
445
  f"Invalid record type: {record.leader[6]}, setting to 'a'",
321
- record,
446
+ record.leader,
322
447
  )
323
448
  record.leader = pymarc.Leader(record.leader[:6] + "a" + record.leader[7:])
324
449
  return record
325
450
 
451
+ def move_authority_subfield_9_to_0_all_controllable_fields(record: Record, **kwargs) -> Record:
452
+ """
453
+ Move subfield 9 from authority fields to subfield 0. This is useful when
454
+ importing records from the ABES SUDOC catalog.
455
+
456
+ Args:
457
+ record (Record): The MARC record to preprocess.
458
+
459
+ Returns:
460
+ Record: The preprocessed MARC record.
461
+ """
462
+ controlled_fields = [
463
+ "100", "110", "111", "130",
464
+ "600", "610", "611", "630", "650", "651", "655",
465
+ "700", "710", "711", "730",
466
+ "800", "810", "811", "830"
467
+ ]
468
+ for field in record.get_fields(*controlled_fields):
469
+ for subfield in list(field.get_subfields("9")):
470
+ field.add_subfield("0", subfield)
471
+ field.delete_subfield("9", subfield)
472
+ logger.log(
473
+ 26,
474
+ "DATA ISSUE\t%s\t%s\t%s",
475
+ record["001"].value(),
476
+ f"Subfield 9 moved to subfield 0 in {field.tag}",
477
+ field,
478
+ )
479
+ return record
326
480
 
327
481
  def ordinal(n):
328
482
  s = ("th", "st", "nd", "rd") + ("th",) * 10