folio-data-import 0.2.8rc12__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of folio-data-import might be problematic. Click here for more details.
- folio_data_import/MARCDataImport.py +215 -177
- folio_data_import/UserImport.py +45 -24
- folio_data_import/custom_exceptions.py +29 -0
- folio_data_import/marc_preprocessors/_preprocessors.py +221 -67
- {folio_data_import-0.2.8rc12.dist-info → folio_data_import-0.3.1.dist-info}/METADATA +2 -3
- folio_data_import-0.3.1.dist-info/RECORD +12 -0
- {folio_data_import-0.2.8rc12.dist-info → folio_data_import-0.3.1.dist-info}/WHEEL +1 -1
- folio_data_import-0.2.8rc12.dist-info/RECORD +0 -11
- {folio_data_import-0.2.8rc12.dist-info → folio_data_import-0.3.1.dist-info}/LICENSE +0 -0
- {folio_data_import-0.2.8rc12.dist-info → folio_data_import-0.3.1.dist-info}/entry_points.txt +0 -0
folio_data_import/UserImport.py
CHANGED
|
@@ -8,7 +8,7 @@ import time
|
|
|
8
8
|
import uuid
|
|
9
9
|
from datetime import datetime as dt
|
|
10
10
|
from pathlib import Path
|
|
11
|
-
from typing import Tuple
|
|
11
|
+
from typing import Tuple, List
|
|
12
12
|
|
|
13
13
|
import aiofiles
|
|
14
14
|
import folioclient
|
|
@@ -51,6 +51,7 @@ class UserImporter: # noqa: R0902
|
|
|
51
51
|
user_match_key: str = "externalSystemId",
|
|
52
52
|
only_update_present_fields: bool = False,
|
|
53
53
|
default_preferred_contact_type: str = "002",
|
|
54
|
+
fields_to_protect: List[str] =[],
|
|
54
55
|
) -> None:
|
|
55
56
|
self.limit_simultaneous_requests = limit_simultaneous_requests
|
|
56
57
|
self.batch_size = batch_size
|
|
@@ -77,6 +78,7 @@ class UserImporter: # noqa: R0902
|
|
|
77
78
|
self.match_key = user_match_key
|
|
78
79
|
self.lock: asyncio.Lock = asyncio.Lock()
|
|
79
80
|
self.logs: dict = {"created": 0, "updated": 0, "failed": 0}
|
|
81
|
+
self.fields_to_protect = set(fields_to_protect)
|
|
80
82
|
|
|
81
83
|
@staticmethod
|
|
82
84
|
def build_ref_data_id_map(
|
|
@@ -137,7 +139,7 @@ class UserImporter: # noqa: R0902
|
|
|
137
139
|
match_key = "id" if ("id" in user_obj) else self.match_key
|
|
138
140
|
try:
|
|
139
141
|
existing_user = await self.http_client.get(
|
|
140
|
-
self.folio_client.
|
|
142
|
+
self.folio_client.gateway_url + "/users",
|
|
141
143
|
headers=self.folio_client.okapi_headers,
|
|
142
144
|
params={"query": f"{match_key}=={user_obj[match_key]}"},
|
|
143
145
|
)
|
|
@@ -161,7 +163,7 @@ class UserImporter: # noqa: R0902
|
|
|
161
163
|
"""
|
|
162
164
|
try:
|
|
163
165
|
existing_rp = await self.http_client.get(
|
|
164
|
-
self.folio_client.
|
|
166
|
+
self.folio_client.gateway_url
|
|
165
167
|
+ "/request-preference-storage/request-preference",
|
|
166
168
|
headers=self.folio_client.okapi_headers,
|
|
167
169
|
params={
|
|
@@ -188,7 +190,7 @@ class UserImporter: # noqa: R0902
|
|
|
188
190
|
"""
|
|
189
191
|
try:
|
|
190
192
|
existing_pu = await self.http_client.get(
|
|
191
|
-
self.folio_client.
|
|
193
|
+
self.folio_client.gateway_url + "/perms/users",
|
|
192
194
|
headers=self.folio_client.okapi_headers,
|
|
193
195
|
params={
|
|
194
196
|
"query": f"userId=={existing_user.get('id', user_obj.get('id', ''))}"
|
|
@@ -334,6 +336,7 @@ class UserImporter: # noqa: R0902
|
|
|
334
336
|
None
|
|
335
337
|
|
|
336
338
|
"""
|
|
339
|
+
|
|
337
340
|
await self.set_preferred_contact_type(user_obj, existing_user)
|
|
338
341
|
preferred_contact_type = {"preferredContactTypeId": existing_user.get("personal", {}).pop("preferredContactTypeId")}
|
|
339
342
|
if self.only_update_present_fields:
|
|
@@ -369,7 +372,7 @@ class UserImporter: # noqa: R0902
|
|
|
369
372
|
else:
|
|
370
373
|
existing_user[key] = value
|
|
371
374
|
create_update_user = await self.http_client.put(
|
|
372
|
-
self.folio_client.
|
|
375
|
+
self.folio_client.gateway_url + f"/users/{existing_user['id']}",
|
|
373
376
|
headers=self.folio_client.okapi_headers,
|
|
374
377
|
json=existing_user,
|
|
375
378
|
)
|
|
@@ -389,7 +392,7 @@ class UserImporter: # noqa: R0902
|
|
|
389
392
|
HTTPError: If the HTTP request to create the user fails.
|
|
390
393
|
"""
|
|
391
394
|
response = await self.http_client.post(
|
|
392
|
-
self.folio_client.
|
|
395
|
+
self.folio_client.gateway_url + "/users",
|
|
393
396
|
headers=self.folio_client.okapi_headers,
|
|
394
397
|
json=user_obj,
|
|
395
398
|
)
|
|
@@ -502,7 +505,9 @@ class UserImporter: # noqa: R0902
|
|
|
502
505
|
|
|
503
506
|
async def get_protected_fields(self, existing_user) -> dict:
|
|
504
507
|
"""
|
|
505
|
-
Retrieves the protected fields from the existing user object
|
|
508
|
+
Retrieves the protected fields from the existing user object,
|
|
509
|
+
combining both the customFields.protectedFields list *and*
|
|
510
|
+
any fields_to_protect passed on the CLI.
|
|
506
511
|
|
|
507
512
|
Args:
|
|
508
513
|
existing_user (dict): The existing user object.
|
|
@@ -512,18 +517,19 @@ class UserImporter: # noqa: R0902
|
|
|
512
517
|
"""
|
|
513
518
|
protected_fields = {}
|
|
514
519
|
protected_fields_list = existing_user.get("customFields", {}).get("protectedFields", "").split(",")
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
520
|
+
cli_fields = list(self.fields_to_protect)
|
|
521
|
+
# combine and dedupe:
|
|
522
|
+
all_fields = list(dict.fromkeys(protected_fields_list + cli_fields))
|
|
523
|
+
for field in all_fields:
|
|
524
|
+
if "." in field:
|
|
525
|
+
fld, subfld = field.split(".", 1)
|
|
526
|
+
val = existing_user.get(fld, {}).pop(subfld, None)
|
|
527
|
+
if val is not None:
|
|
528
|
+
protected_fields.setdefault(fld, {})[subfld] = val
|
|
523
529
|
else:
|
|
524
|
-
|
|
525
|
-
if
|
|
526
|
-
protected_fields
|
|
530
|
+
val = existing_user.pop(field, None)
|
|
531
|
+
if val is not None:
|
|
532
|
+
protected_fields[field] = val
|
|
527
533
|
return protected_fields
|
|
528
534
|
|
|
529
535
|
async def process_existing_user(self, user_obj) -> Tuple[dict, dict, dict, dict]:
|
|
@@ -589,7 +595,7 @@ class UserImporter: # noqa: R0902
|
|
|
589
595
|
rp_obj["userId"] = new_user_obj["id"]
|
|
590
596
|
# print(rp_obj)
|
|
591
597
|
response = await self.http_client.post(
|
|
592
|
-
self.folio_client.
|
|
598
|
+
self.folio_client.gateway_url
|
|
593
599
|
+ "/request-preference-storage/request-preference",
|
|
594
600
|
headers=self.folio_client.okapi_headers,
|
|
595
601
|
json=rp_obj,
|
|
@@ -613,7 +619,7 @@ class UserImporter: # noqa: R0902
|
|
|
613
619
|
existing_rp.update(rp_obj)
|
|
614
620
|
# print(existing_rp)
|
|
615
621
|
response = await self.http_client.put(
|
|
616
|
-
self.folio_client.
|
|
622
|
+
self.folio_client.gateway_url
|
|
617
623
|
+ f"/request-preference-storage/request-preference/{existing_rp['id']}",
|
|
618
624
|
headers=self.folio_client.okapi_headers,
|
|
619
625
|
json=existing_rp,
|
|
@@ -635,7 +641,7 @@ class UserImporter: # noqa: R0902
|
|
|
635
641
|
"""
|
|
636
642
|
perms_user_obj = {"userId": new_user_obj["id"], "permissions": []}
|
|
637
643
|
response = await self.http_client.post(
|
|
638
|
-
self.folio_client.
|
|
644
|
+
self.folio_client.gateway_url + "/perms/users",
|
|
639
645
|
headers=self.folio_client.okapi_headers,
|
|
640
646
|
json=perms_user_obj,
|
|
641
647
|
)
|
|
@@ -788,7 +794,7 @@ class UserImporter: # noqa: R0902
|
|
|
788
794
|
"""
|
|
789
795
|
try:
|
|
790
796
|
existing_spu = await self.http_client.get(
|
|
791
|
-
self.folio_client.
|
|
797
|
+
self.folio_client.gateway_url + "/service-points-users",
|
|
792
798
|
headers=self.folio_client.okapi_headers,
|
|
793
799
|
params={"query": f"userId=={existing_user['id']}"},
|
|
794
800
|
)
|
|
@@ -812,7 +818,7 @@ class UserImporter: # noqa: R0902
|
|
|
812
818
|
"""
|
|
813
819
|
spu_obj["userId"] = existing_user["id"]
|
|
814
820
|
response = await self.http_client.post(
|
|
815
|
-
self.folio_client.
|
|
821
|
+
self.folio_client.gateway_url + "/service-points-users",
|
|
816
822
|
headers=self.folio_client.okapi_headers,
|
|
817
823
|
json=spu_obj,
|
|
818
824
|
)
|
|
@@ -831,7 +837,7 @@ class UserImporter: # noqa: R0902
|
|
|
831
837
|
"""
|
|
832
838
|
existing_spu.update(spu_obj)
|
|
833
839
|
response = await self.http_client.put(
|
|
834
|
-
self.folio_client.
|
|
840
|
+
self.folio_client.gateway_url + f"/service-points-users/{existing_spu['id']}",
|
|
835
841
|
headers=self.folio_client.okapi_headers,
|
|
836
842
|
json=existing_spu,
|
|
837
843
|
)
|
|
@@ -896,6 +902,7 @@ async def main() -> None:
|
|
|
896
902
|
--update_only_present_fields (bool): Only update fields that are present in the new user object.
|
|
897
903
|
--default_preferred_contact_type (str): The default preferred contact type to use if the provided \
|
|
898
904
|
value is not valid or not present. Default "002".
|
|
905
|
+
--fields_to_protect (str): Comma-separated list of top-level or nested (dot-notation) fields to protect.
|
|
899
906
|
|
|
900
907
|
Raises:
|
|
901
908
|
Exception: If an unknown error occurs during the import process.
|
|
@@ -953,7 +960,20 @@ async def main() -> None:
|
|
|
953
960
|
choices=list(PREFERRED_CONTACT_TYPES_MAP.keys()) + list(PREFERRED_CONTACT_TYPES_MAP.values()),
|
|
954
961
|
default="002",
|
|
955
962
|
)
|
|
963
|
+
parser.add_argument(
|
|
964
|
+
"--fields-to-protect", # new flag name
|
|
965
|
+
dest="fields_to_protect", # sets args.fields_to_protect
|
|
966
|
+
help=(
|
|
967
|
+
"Comma-separated list of top-level user fields to protect "
|
|
968
|
+
"(e.g. type,expirationDate)"
|
|
969
|
+
),
|
|
970
|
+
default="",
|
|
971
|
+
)
|
|
956
972
|
args = parser.parse_args()
|
|
973
|
+
protect_fields = [
|
|
974
|
+
f.strip() for f in args.fields_to_protect.split(",")
|
|
975
|
+
if f.strip()
|
|
976
|
+
]
|
|
957
977
|
|
|
958
978
|
library_name = args.library_name
|
|
959
979
|
|
|
@@ -1005,6 +1025,7 @@ async def main() -> None:
|
|
|
1005
1025
|
args.user_match_key,
|
|
1006
1026
|
args.update_only_present_fields,
|
|
1007
1027
|
args.default_preferred_contact_type,
|
|
1028
|
+
fields_to_protect=protect_fields,
|
|
1008
1029
|
)
|
|
1009
1030
|
await importer.do_import()
|
|
1010
1031
|
except Exception as ee:
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""Custom exceptions for the Folio Data Import module."""
|
|
2
|
+
|
|
3
|
+
class FolioDataImportError(Exception):
|
|
4
|
+
"""Base class for all exceptions in the Folio Data Import module."""
|
|
5
|
+
pass
|
|
6
|
+
|
|
7
|
+
class FolioDataImportBatchError(FolioDataImportError):
|
|
8
|
+
"""Exception raised for errors in the Folio Data Import batch process.
|
|
9
|
+
|
|
10
|
+
Attributes:
|
|
11
|
+
batch_id -- ID of the batch that caused the error
|
|
12
|
+
message -- explanation of the error
|
|
13
|
+
"""
|
|
14
|
+
def __init__(self, batch_id, message, exception=None):
|
|
15
|
+
self.batch_id = batch_id
|
|
16
|
+
self.message = message
|
|
17
|
+
super().__init__(f"Unhandled error posting batch {batch_id}: {message}")
|
|
18
|
+
|
|
19
|
+
class FolioDataImportJobError(FolioDataImportError):
|
|
20
|
+
"""Exception raised for errors in the Folio Data Import job process.
|
|
21
|
+
|
|
22
|
+
Attributes:
|
|
23
|
+
job_id -- ID of the job that caused the error
|
|
24
|
+
message -- explanation of the error
|
|
25
|
+
"""
|
|
26
|
+
def __init__(self, job_id, message, exception=None):
|
|
27
|
+
self.job_id = job_id
|
|
28
|
+
self.message = message
|
|
29
|
+
super().__init__(f"Unhandled error processing job {job_id}: {message}")
|
|
@@ -1,69 +1,171 @@
|
|
|
1
|
+
import importlib
|
|
2
|
+
import sys
|
|
3
|
+
from typing import Callable, Dict, List, Tuple, Union
|
|
1
4
|
import pymarc
|
|
2
5
|
import logging
|
|
3
6
|
|
|
7
|
+
from pymarc.record import Record
|
|
8
|
+
|
|
4
9
|
logger = logging.getLogger("folio_data_import.MARCDataImport")
|
|
5
10
|
|
|
11
|
+
class MARCPreprocessor:
|
|
12
|
+
"""
|
|
13
|
+
A class to preprocess MARC records for data import into FOLIO.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(self, preprocessors: Union[str,List[Callable]], **kwargs):
|
|
17
|
+
"""
|
|
18
|
+
Initialize the MARCPreprocessor with a list of preprocessors.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
preprocessors (Union[str, List[Callable]]): A string of comma-separated function names or a list of callable preprocessor functions to apply.
|
|
22
|
+
"""
|
|
23
|
+
self.preprocessor_args: Dict[str, Dict] = kwargs
|
|
24
|
+
self.preprocessors: List[Tuple[Callable, Dict]] = self._get_preprocessor_functions(
|
|
25
|
+
preprocessors
|
|
26
|
+
)
|
|
27
|
+
self.proc_kwargs = kwargs
|
|
28
|
+
self.record = None
|
|
29
|
+
|
|
30
|
+
def _get_preprocessor_args(self, func: Callable) -> Dict:
|
|
31
|
+
"""
|
|
32
|
+
Get the arguments for the preprocessor function.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
func (Callable): The preprocessor function.
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
Dict: A dictionary of arguments for the preprocessor function.
|
|
39
|
+
"""
|
|
40
|
+
func_path = f"{func.__module__}.{func.__name__}"
|
|
41
|
+
path_args: Dict = self.preprocessor_args.get("default", {})
|
|
42
|
+
path_args.update(self.preprocessor_args.get(func.__name__, {}))
|
|
43
|
+
path_args.update(self.preprocessor_args.get(func_path, {}))
|
|
44
|
+
return path_args
|
|
45
|
+
|
|
46
|
+
def _get_preprocessor_functions(self, func_list: Union[str, List[Callable]]) -> List[Callable]:
|
|
47
|
+
"""
|
|
48
|
+
Get the preprocessor functions based on the provided names.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
func_list (Union[str, List[Callable]]): A string of comma-separated function names or a list of callable preprocessor functions.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
List[callable]: A list of preprocessor functions.
|
|
55
|
+
"""
|
|
56
|
+
preprocessors = []
|
|
57
|
+
if isinstance(func_list, str):
|
|
58
|
+
func_list = func_list.split(",")
|
|
59
|
+
else:
|
|
60
|
+
for f in func_list:
|
|
61
|
+
if not callable(f):
|
|
62
|
+
logger.warning(
|
|
63
|
+
f"Preprocessing function {f} is not callable. Skipping."
|
|
64
|
+
)
|
|
65
|
+
else:
|
|
66
|
+
preprocessors.append((f, self._get_preprocessor_args(f)))
|
|
67
|
+
return preprocessors
|
|
68
|
+
for f_path in func_list:
|
|
69
|
+
f_import = f_path.rsplit(".", 1)
|
|
70
|
+
if len(f_import) == 1:
|
|
71
|
+
# If the function is not a full path, assume it's in the current module
|
|
72
|
+
if func := getattr(sys.modules[__name__], f_import[0], None):
|
|
73
|
+
if callable(func):
|
|
74
|
+
preprocessors.append((func, self._get_preprocessor_args(func)))
|
|
75
|
+
else:
|
|
76
|
+
logger.warning(
|
|
77
|
+
f"Preprocessing function {f_path} is not callable. Skipping."
|
|
78
|
+
)
|
|
79
|
+
else:
|
|
80
|
+
logger.warning(
|
|
81
|
+
f"Preprocessing function {f_path} not found in current module. Skipping."
|
|
82
|
+
)
|
|
83
|
+
elif len(f_import) == 2:
|
|
84
|
+
# If the function is a full path, import it
|
|
85
|
+
module_path, func_name = f_import
|
|
86
|
+
try:
|
|
87
|
+
module = importlib.import_module(module_path)
|
|
88
|
+
func = getattr(module, func_name)
|
|
89
|
+
preprocessors.append((func, self._get_preprocessor_args(func)))
|
|
90
|
+
except ImportError as e:
|
|
91
|
+
logger.warning(
|
|
92
|
+
f"Error importing preprocessing function {f_path}: {e}. Skipping."
|
|
93
|
+
)
|
|
94
|
+
return preprocessors
|
|
95
|
+
|
|
96
|
+
def do_work(self, record: Record) -> Record:
|
|
97
|
+
"""
|
|
98
|
+
Preprocess the MARC record.
|
|
99
|
+
"""
|
|
100
|
+
for proc, kwargs in self.preprocessors:
|
|
101
|
+
record = proc(record, **kwargs)
|
|
102
|
+
return record
|
|
6
103
|
|
|
7
|
-
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def prepend_prefix_001(record: Record, prefix: str) -> Record:
|
|
8
107
|
"""
|
|
9
108
|
Prepend a prefix to the record's 001 field.
|
|
10
109
|
|
|
11
110
|
Args:
|
|
12
|
-
record (
|
|
111
|
+
record (Record): The MARC record to preprocess.
|
|
13
112
|
prefix (str): The prefix to prepend to the 001 field.
|
|
14
113
|
|
|
15
114
|
Returns:
|
|
16
|
-
|
|
115
|
+
Record: The preprocessed MARC record.
|
|
17
116
|
"""
|
|
18
|
-
|
|
117
|
+
if "001" in record:
|
|
118
|
+
record["001"].data = f"({prefix})" + record["001"].data
|
|
119
|
+
else:
|
|
120
|
+
logger.warning("Field '001' not found in record. Skipping prefix prepend.")
|
|
19
121
|
return record
|
|
20
122
|
|
|
21
123
|
|
|
22
|
-
def prepend_ppn_prefix_001(record:
|
|
124
|
+
def prepend_ppn_prefix_001(record: Record, **kwargs) -> Record:
|
|
23
125
|
"""
|
|
24
126
|
Prepend the PPN prefix to the record's 001 field. Useful when
|
|
25
127
|
importing records from the ABES SUDOC catalog
|
|
26
128
|
|
|
27
129
|
Args:
|
|
28
|
-
record (
|
|
130
|
+
record (Record): The MARC record to preprocess.
|
|
29
131
|
|
|
30
132
|
Returns:
|
|
31
|
-
|
|
133
|
+
Record: The preprocessed MARC record.
|
|
32
134
|
"""
|
|
33
135
|
return prepend_prefix_001(record, "PPN")
|
|
34
136
|
|
|
35
137
|
|
|
36
|
-
def prepend_abes_prefix_001(record:
|
|
138
|
+
def prepend_abes_prefix_001(record: Record, **kwargs) -> Record:
|
|
37
139
|
"""
|
|
38
140
|
Prepend the ABES prefix to the record's 001 field. Useful when
|
|
39
141
|
importing records from the ABES SUDOC catalog
|
|
40
142
|
|
|
41
143
|
Args:
|
|
42
|
-
record (
|
|
144
|
+
record (Record): The MARC record to preprocess.
|
|
43
145
|
|
|
44
146
|
Returns:
|
|
45
|
-
|
|
147
|
+
Record: The preprocessed MARC record.
|
|
46
148
|
"""
|
|
47
149
|
return prepend_prefix_001(record, "ABES")
|
|
48
150
|
|
|
49
151
|
|
|
50
|
-
def strip_999_ff_fields(record:
|
|
152
|
+
def strip_999_ff_fields(record: Record, **kwargs) -> Record:
|
|
51
153
|
"""
|
|
52
154
|
Strip all 999 fields with ff indicators from the record.
|
|
53
155
|
Useful when importing records exported from another FOLIO system
|
|
54
156
|
|
|
55
157
|
Args:
|
|
56
|
-
record (
|
|
158
|
+
record (Record): The MARC record to preprocess.
|
|
57
159
|
|
|
58
160
|
Returns:
|
|
59
|
-
|
|
161
|
+
Record: The preprocessed MARC record.
|
|
60
162
|
"""
|
|
61
163
|
for field in record.get_fields("999"):
|
|
62
164
|
if field.indicators == pymarc.Indicators(*["f", "f"]):
|
|
63
165
|
record.remove_field(field)
|
|
64
166
|
return record
|
|
65
167
|
|
|
66
|
-
def clean_999_fields(record:
|
|
168
|
+
def clean_999_fields(record: Record, **kwargs) -> Record:
|
|
67
169
|
"""
|
|
68
170
|
The presence of 999 fields, with or without ff indicators, can cause
|
|
69
171
|
issues with data import mapping in FOLIO. This function calls strip_999_ff_fields
|
|
@@ -71,10 +173,10 @@ def clean_999_fields(record: pymarc.Record) -> pymarc.Record:
|
|
|
71
173
|
to 945 fields.
|
|
72
174
|
|
|
73
175
|
Args:
|
|
74
|
-
record (
|
|
176
|
+
record (Record): The MARC record to preprocess.
|
|
75
177
|
|
|
76
178
|
Returns:
|
|
77
|
-
|
|
179
|
+
Record: The preprocessed MARC record.
|
|
78
180
|
"""
|
|
79
181
|
record = strip_999_ff_fields(record)
|
|
80
182
|
for field in record.get_fields("999"):
|
|
@@ -87,7 +189,31 @@ def clean_999_fields(record: pymarc.Record) -> pymarc.Record:
|
|
|
87
189
|
record.remove_field(field)
|
|
88
190
|
return record
|
|
89
191
|
|
|
90
|
-
def
|
|
192
|
+
def clean_non_ff_999_fields(record: Record, **kwargs) -> Record:
|
|
193
|
+
"""
|
|
194
|
+
When loading migrated MARC records from folio_migration_tools, the presence of other 999 fields
|
|
195
|
+
than those set by the migration process can cause the record to fail to load properly. This preprocessor
|
|
196
|
+
function moves all 999 fields with non-ff indicators to 945 fields with 99 indicators.
|
|
197
|
+
"""
|
|
198
|
+
for field in record.get_fields("999"):
|
|
199
|
+
if field.indicators != pymarc.Indicators(*["f", "f"]):
|
|
200
|
+
logger.log(
|
|
201
|
+
26,
|
|
202
|
+
"DATA ISSUE\t%s\t%s\t%s",
|
|
203
|
+
record["001"].value(),
|
|
204
|
+
"Record contains a 999 field with non-ff indicators: Moving field to a 945 with indicators \"99\"",
|
|
205
|
+
field,
|
|
206
|
+
)
|
|
207
|
+
_945 = pymarc.Field(
|
|
208
|
+
tag="945",
|
|
209
|
+
indicators=pymarc.Indicators("9","9"),
|
|
210
|
+
subfields=field.subfields,
|
|
211
|
+
)
|
|
212
|
+
record.add_ordered_field(_945)
|
|
213
|
+
record.remove_field(field)
|
|
214
|
+
return record
|
|
215
|
+
|
|
216
|
+
def sudoc_supercede_prep(record: Record, **kwargs) -> Record:
|
|
91
217
|
"""
|
|
92
218
|
Preprocesses a record from the ABES SUDOC catalog to copy 035 fields
|
|
93
219
|
with a $9 subfield value of 'sudoc' to 935 fields with a $a subfield
|
|
@@ -96,10 +222,10 @@ def sudoc_supercede_prep(record: pymarc.Record) -> pymarc.Record:
|
|
|
96
222
|
in FOLIO. This also applyes the prepend_ppn_prefix_001 function to the record.
|
|
97
223
|
|
|
98
224
|
Args:
|
|
99
|
-
record (
|
|
225
|
+
record (Record): The MARC record to preprocess.
|
|
100
226
|
|
|
101
227
|
Returns:
|
|
102
|
-
|
|
228
|
+
Record: The preprocessed MARC record.
|
|
103
229
|
"""
|
|
104
230
|
record = prepend_abes_prefix_001(record)
|
|
105
231
|
for field in record.get_fields("035"):
|
|
@@ -113,7 +239,7 @@ def sudoc_supercede_prep(record: pymarc.Record) -> pymarc.Record:
|
|
|
113
239
|
return record
|
|
114
240
|
|
|
115
241
|
|
|
116
|
-
def clean_empty_fields(record:
|
|
242
|
+
def clean_empty_fields(record: Record, **kwargs) -> Record:
|
|
117
243
|
"""
|
|
118
244
|
Remove empty fields and subfields from the record. These can cause
|
|
119
245
|
data import mapping issues in FOLIO. Removals are logged at custom
|
|
@@ -121,10 +247,10 @@ def clean_empty_fields(record: pymarc.Record) -> pymarc.Record:
|
|
|
121
247
|
data issues report.
|
|
122
248
|
|
|
123
249
|
Args:
|
|
124
|
-
record (
|
|
250
|
+
record (Record): The MARC record to preprocess.
|
|
125
251
|
|
|
126
252
|
Returns:
|
|
127
|
-
|
|
253
|
+
Record: The preprocessed MARC record.
|
|
128
254
|
"""
|
|
129
255
|
MAPPED_FIELDS = {
|
|
130
256
|
"010": ["a", "z"],
|
|
@@ -233,73 +359,72 @@ def clean_empty_fields(record: pymarc.Record) -> pymarc.Record:
|
|
|
233
359
|
"856": ["u", "y", "z"],
|
|
234
360
|
}
|
|
235
361
|
|
|
236
|
-
for field in
|
|
362
|
+
for field in record.get_fields(*MAPPED_FIELDS.keys()):
|
|
237
363
|
len_subs = len(field.subfields)
|
|
238
|
-
subfield_value = bool(field.subfields[0].value) if len_subs
|
|
239
|
-
if
|
|
240
|
-
|
|
364
|
+
subfield_value = bool(field.subfields[0].value) if len_subs else False
|
|
365
|
+
if int(field.tag) > 9 and len_subs == 0:
|
|
366
|
+
logger.log(
|
|
367
|
+
26,
|
|
368
|
+
"DATA ISSUE\t%s\t%s\t%s",
|
|
369
|
+
record["001"].value(),
|
|
370
|
+
f"{field.tag} is empty, removing field",
|
|
371
|
+
field,
|
|
372
|
+
)
|
|
373
|
+
record.remove_field(field)
|
|
374
|
+
elif len_subs == 1 and not subfield_value:
|
|
375
|
+
logger.log(
|
|
376
|
+
26,
|
|
377
|
+
"DATA ISSUE\t%s\t%s\t%s",
|
|
378
|
+
record["001"].value(),
|
|
379
|
+
f"{field.tag}${field.subfields[0].code} is empty, no other subfields present, removing field",
|
|
380
|
+
field,
|
|
381
|
+
)
|
|
382
|
+
record.remove_field(field)
|
|
383
|
+
else:
|
|
384
|
+
if len_subs > 1 and "a" in field and not field["a"].strip():
|
|
241
385
|
logger.log(
|
|
242
386
|
26,
|
|
243
387
|
"DATA ISSUE\t%s\t%s\t%s",
|
|
244
388
|
record["001"].value(),
|
|
245
|
-
f"{field.tag} is empty, removing
|
|
389
|
+
f"{field.tag}$a is empty, removing subfield",
|
|
246
390
|
field,
|
|
247
391
|
)
|
|
248
|
-
|
|
249
|
-
|
|
392
|
+
field.delete_subfield("a")
|
|
393
|
+
for idx, subfield in enumerate(list(field.subfields), start=1):
|
|
394
|
+
if (
|
|
395
|
+
subfield.code in MAPPED_FIELDS.get(field.tag, [])
|
|
396
|
+
and not subfield.value
|
|
397
|
+
):
|
|
398
|
+
logger.log(
|
|
399
|
+
26,
|
|
400
|
+
"DATA ISSUE\t%s\t%s\t%s",
|
|
401
|
+
record["001"].value(),
|
|
402
|
+
f"{field.tag}${subfield.code} ({ordinal(idx)} subfield) is empty, but other subfields have values, removing subfield",
|
|
403
|
+
field,
|
|
404
|
+
)
|
|
405
|
+
field.delete_subfield(subfield.code)
|
|
406
|
+
if len(field.subfields) == 0:
|
|
250
407
|
logger.log(
|
|
251
408
|
26,
|
|
252
409
|
"DATA ISSUE\t%s\t%s\t%s",
|
|
253
410
|
record["001"].value(),
|
|
254
|
-
f"{field.tag}
|
|
411
|
+
f"{field.tag} has no non-empty subfields after cleaning, removing field",
|
|
255
412
|
field,
|
|
256
413
|
)
|
|
257
414
|
record.remove_field(field)
|
|
258
|
-
else:
|
|
259
|
-
if len_subs > 1 and "a" in field and not field["a"].strip():
|
|
260
|
-
logger.log(
|
|
261
|
-
26,
|
|
262
|
-
"DATA ISSUE\t%s\t%s\t%s",
|
|
263
|
-
record["001"].value(),
|
|
264
|
-
f"{field.tag}$a is empty, removing subfield",
|
|
265
|
-
field,
|
|
266
|
-
)
|
|
267
|
-
field.delete_subfield("a")
|
|
268
|
-
for idx, subfield in enumerate(list(field.subfields), start=1):
|
|
269
|
-
if (
|
|
270
|
-
subfield.code in MAPPED_FIELDS.get(field.tag, [])
|
|
271
|
-
and not subfield.value
|
|
272
|
-
):
|
|
273
|
-
logger.log(
|
|
274
|
-
26,
|
|
275
|
-
"DATA ISSUE\t%s\t%s\t%s",
|
|
276
|
-
record["001"].value(),
|
|
277
|
-
f"{field.tag}${subfield.code} ({ordinal(idx)} subfield) is empty, but other subfields have values, removing subfield",
|
|
278
|
-
field,
|
|
279
|
-
)
|
|
280
|
-
field.delete_subfield(subfield.code)
|
|
281
|
-
if len(field.subfields) == 0:
|
|
282
|
-
logger.log(
|
|
283
|
-
26,
|
|
284
|
-
"DATA ISSUE\t%s\t%s\t%s",
|
|
285
|
-
record["001"].value(),
|
|
286
|
-
f"{field.tag} has no non-empty subfields after cleaning, removing field",
|
|
287
|
-
field,
|
|
288
|
-
)
|
|
289
|
-
record.remove_field(field)
|
|
290
415
|
return record
|
|
291
416
|
|
|
292
417
|
|
|
293
|
-
def fix_leader(record:
|
|
418
|
+
def fix_leader(record: Record, **kwargs) -> Record:
|
|
294
419
|
"""
|
|
295
420
|
Fixes the leader of the record by setting the record status to 'c' (modified
|
|
296
421
|
record) and the type of record to 'a' (language material).
|
|
297
422
|
|
|
298
423
|
Args:
|
|
299
|
-
record (
|
|
424
|
+
record (Record): The MARC record to preprocess.
|
|
300
425
|
|
|
301
426
|
Returns:
|
|
302
|
-
|
|
427
|
+
Record: The preprocessed MARC record.
|
|
303
428
|
"""
|
|
304
429
|
VALID_STATUSES = ["a", "c", "d", "n", "p"]
|
|
305
430
|
VALID_TYPES = ["a", "c", "d", "e", "f", "g", "i", "j", "k", "m", "o", "p", "r", "t"]
|
|
@@ -309,7 +434,7 @@ def fix_leader(record: pymarc.Record) -> pymarc.Record:
|
|
|
309
434
|
"DATA ISSUE\t%s\t%s\t%s",
|
|
310
435
|
record["001"].value(),
|
|
311
436
|
f"Invalid record status: {record.leader[5]}, setting to 'c'",
|
|
312
|
-
record,
|
|
437
|
+
record.leader,
|
|
313
438
|
)
|
|
314
439
|
record.leader = pymarc.Leader(record.leader[:5] + "c" + record.leader[6:])
|
|
315
440
|
if record.leader[6] not in VALID_TYPES:
|
|
@@ -318,11 +443,40 @@ def fix_leader(record: pymarc.Record) -> pymarc.Record:
|
|
|
318
443
|
"DATA ISSUE\t%s\t%s\t%s",
|
|
319
444
|
record["001"].value(),
|
|
320
445
|
f"Invalid record type: {record.leader[6]}, setting to 'a'",
|
|
321
|
-
record,
|
|
446
|
+
record.leader,
|
|
322
447
|
)
|
|
323
448
|
record.leader = pymarc.Leader(record.leader[:6] + "a" + record.leader[7:])
|
|
324
449
|
return record
|
|
325
450
|
|
|
451
|
+
def move_authority_subfield_9_to_0_all_controllable_fields(record: Record, **kwargs) -> Record:
|
|
452
|
+
"""
|
|
453
|
+
Move subfield 9 from authority fields to subfield 0. This is useful when
|
|
454
|
+
importing records from the ABES SUDOC catalog.
|
|
455
|
+
|
|
456
|
+
Args:
|
|
457
|
+
record (Record): The MARC record to preprocess.
|
|
458
|
+
|
|
459
|
+
Returns:
|
|
460
|
+
Record: The preprocessed MARC record.
|
|
461
|
+
"""
|
|
462
|
+
controlled_fields = [
|
|
463
|
+
"100", "110", "111", "130",
|
|
464
|
+
"600", "610", "611", "630", "650", "651", "655",
|
|
465
|
+
"700", "710", "711", "730",
|
|
466
|
+
"800", "810", "811", "830"
|
|
467
|
+
]
|
|
468
|
+
for field in record.get_fields(*controlled_fields):
|
|
469
|
+
for subfield in list(field.get_subfields("9")):
|
|
470
|
+
field.add_subfield("0", subfield)
|
|
471
|
+
field.delete_subfield("9", subfield)
|
|
472
|
+
logger.log(
|
|
473
|
+
26,
|
|
474
|
+
"DATA ISSUE\t%s\t%s\t%s",
|
|
475
|
+
record["001"].value(),
|
|
476
|
+
f"Subfield 9 moved to subfield 0 in {field.tag}",
|
|
477
|
+
field,
|
|
478
|
+
)
|
|
479
|
+
return record
|
|
326
480
|
|
|
327
481
|
def ordinal(n):
|
|
328
482
|
s = ("th", "st", "nd", "rd") + ("th",) * 10
|