folio-data-import 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of folio-data-import might be problematic. Click here for more details.
- folio_data_import/MARCDataImport.py +510 -166
- folio_data_import/UserImport.py +87 -26
- folio_data_import/marc_preprocessors/__init__.py +1 -1
- folio_data_import/marc_preprocessors/_preprocessors.py +306 -4
- {folio_data_import-0.2.6.dist-info → folio_data_import-0.2.8.dist-info}/METADATA +25 -7
- folio_data_import-0.2.8.dist-info/RECORD +11 -0
- {folio_data_import-0.2.6.dist-info → folio_data_import-0.2.8.dist-info}/WHEEL +1 -1
- folio_data_import-0.2.6.dist-info/RECORD +0 -11
- {folio_data_import-0.2.6.dist-info → folio_data_import-0.2.8.dist-info}/LICENSE +0 -0
- {folio_data_import-0.2.6.dist-info → folio_data_import-0.2.8.dist-info}/entry_points.txt +0 -0
folio_data_import/UserImport.py
CHANGED
|
@@ -5,6 +5,7 @@ import getpass
|
|
|
5
5
|
import json
|
|
6
6
|
import os
|
|
7
7
|
import time
|
|
8
|
+
import uuid
|
|
8
9
|
from datetime import datetime as dt
|
|
9
10
|
from pathlib import Path
|
|
10
11
|
from typing import Tuple
|
|
@@ -41,12 +42,12 @@ class UserImporter: # noqa: R0902
|
|
|
41
42
|
self,
|
|
42
43
|
folio_client: folioclient.FolioClient,
|
|
43
44
|
library_name: str,
|
|
44
|
-
user_file_path: Path,
|
|
45
45
|
batch_size: int,
|
|
46
46
|
limit_simultaneous_requests: asyncio.Semaphore,
|
|
47
47
|
logfile: AsyncTextIOWrapper,
|
|
48
48
|
errorfile: AsyncTextIOWrapper,
|
|
49
49
|
http_client: httpx.AsyncClient,
|
|
50
|
+
user_file_path: Path = None,
|
|
50
51
|
user_match_key: str = "externalSystemId",
|
|
51
52
|
only_update_present_fields: bool = False,
|
|
52
53
|
default_preferred_contact_type: str = "002",
|
|
@@ -94,14 +95,34 @@ class UserImporter: # noqa: R0902
|
|
|
94
95
|
"""
|
|
95
96
|
return {x[name]: x["id"] for x in folio_client.folio_get_all(endpoint, key)}
|
|
96
97
|
|
|
98
|
+
@staticmethod
|
|
99
|
+
def validate_uuid(uuid_string: str) -> bool:
|
|
100
|
+
"""
|
|
101
|
+
Validate a UUID string.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
uuid_string (str): The UUID string to validate.
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
bool: True if the UUID is valid, otherwise False.
|
|
108
|
+
"""
|
|
109
|
+
try:
|
|
110
|
+
uuid.UUID(uuid_string)
|
|
111
|
+
return True
|
|
112
|
+
except ValueError:
|
|
113
|
+
return False
|
|
114
|
+
|
|
97
115
|
async def do_import(self) -> None:
|
|
98
116
|
"""
|
|
99
117
|
Main method to import users.
|
|
100
118
|
|
|
101
119
|
This method triggers the process of importing users by calling the `process_file` method.
|
|
102
120
|
"""
|
|
103
|
-
|
|
104
|
-
|
|
121
|
+
if self.user_file_path:
|
|
122
|
+
with open(self.user_file_path, "r", encoding="utf-8") as openfile:
|
|
123
|
+
await self.process_file(openfile)
|
|
124
|
+
else:
|
|
125
|
+
raise FileNotFoundError("No user objects file provided")
|
|
105
126
|
|
|
106
127
|
async def get_existing_user(self, user_obj) -> dict:
|
|
107
128
|
"""
|
|
@@ -116,7 +137,7 @@ class UserImporter: # noqa: R0902
|
|
|
116
137
|
match_key = "id" if ("id" in user_obj) else self.match_key
|
|
117
138
|
try:
|
|
118
139
|
existing_user = await self.http_client.get(
|
|
119
|
-
self.folio_client.
|
|
140
|
+
self.folio_client.gateway_url + "/users",
|
|
120
141
|
headers=self.folio_client.okapi_headers,
|
|
121
142
|
params={"query": f"{match_key}=={user_obj[match_key]}"},
|
|
122
143
|
)
|
|
@@ -140,7 +161,7 @@ class UserImporter: # noqa: R0902
|
|
|
140
161
|
"""
|
|
141
162
|
try:
|
|
142
163
|
existing_rp = await self.http_client.get(
|
|
143
|
-
self.folio_client.
|
|
164
|
+
self.folio_client.gateway_url
|
|
144
165
|
+ "/request-preference-storage/request-preference",
|
|
145
166
|
headers=self.folio_client.okapi_headers,
|
|
146
167
|
params={
|
|
@@ -167,7 +188,7 @@ class UserImporter: # noqa: R0902
|
|
|
167
188
|
"""
|
|
168
189
|
try:
|
|
169
190
|
existing_pu = await self.http_client.get(
|
|
170
|
-
self.folio_client.
|
|
191
|
+
self.folio_client.gateway_url + "/perms/users",
|
|
171
192
|
headers=self.folio_client.okapi_headers,
|
|
172
193
|
params={
|
|
173
194
|
"query": f"userId=={existing_user.get('id', user_obj.get('id', ''))}"
|
|
@@ -200,10 +221,20 @@ class UserImporter: # noqa: R0902
|
|
|
200
221
|
mapped_addresses = []
|
|
201
222
|
for address in addresses:
|
|
202
223
|
try:
|
|
203
|
-
|
|
204
|
-
address["addressTypeId"]
|
|
205
|
-
|
|
206
|
-
|
|
224
|
+
if (
|
|
225
|
+
self.validate_uuid(address["addressTypeId"])
|
|
226
|
+
and address["addressTypeId"] in self.address_type_map.values()
|
|
227
|
+
):
|
|
228
|
+
await self.logfile.write(
|
|
229
|
+
f"Row {line_number}: Address type {address['addressTypeId']} is a UUID, "
|
|
230
|
+
f"skipping mapping\n"
|
|
231
|
+
)
|
|
232
|
+
mapped_addresses.append(address)
|
|
233
|
+
else:
|
|
234
|
+
address["addressTypeId"] = self.address_type_map[
|
|
235
|
+
address["addressTypeId"]
|
|
236
|
+
]
|
|
237
|
+
mapped_addresses.append(address)
|
|
207
238
|
except KeyError:
|
|
208
239
|
if address["addressTypeId"] not in self.address_type_map.values():
|
|
209
240
|
print(
|
|
@@ -229,7 +260,16 @@ class UserImporter: # noqa: R0902
|
|
|
229
260
|
None
|
|
230
261
|
"""
|
|
231
262
|
try:
|
|
232
|
-
|
|
263
|
+
if (
|
|
264
|
+
self.validate_uuid(user_obj["patronGroup"])
|
|
265
|
+
and user_obj["patronGroup"] in self.patron_group_map.values()
|
|
266
|
+
):
|
|
267
|
+
await self.logfile.write(
|
|
268
|
+
f"Row {line_number}: Patron group {user_obj['patronGroup']} is a UUID, "
|
|
269
|
+
f"skipping mapping\n"
|
|
270
|
+
)
|
|
271
|
+
else:
|
|
272
|
+
user_obj["patronGroup"] = self.patron_group_map[user_obj["patronGroup"]]
|
|
233
273
|
except KeyError:
|
|
234
274
|
if user_obj["patronGroup"] not in self.patron_group_map.values():
|
|
235
275
|
print(
|
|
@@ -256,7 +296,16 @@ class UserImporter: # noqa: R0902
|
|
|
256
296
|
mapped_departments = []
|
|
257
297
|
for department in user_obj.pop("departments", []):
|
|
258
298
|
try:
|
|
259
|
-
|
|
299
|
+
if (
|
|
300
|
+
self.validate_uuid(department)
|
|
301
|
+
and department in self.department_map.values()
|
|
302
|
+
):
|
|
303
|
+
await self.logfile.write(
|
|
304
|
+
f"Row {line_number}: Department {department} is a UUID, skipping mapping\n"
|
|
305
|
+
)
|
|
306
|
+
mapped_departments.append(department)
|
|
307
|
+
else:
|
|
308
|
+
mapped_departments.append(self.department_map[department])
|
|
260
309
|
except KeyError:
|
|
261
310
|
print(
|
|
262
311
|
f'Row {line_number}: Department "{department}" not found, ' # noqa: B907
|
|
@@ -320,7 +369,7 @@ class UserImporter: # noqa: R0902
|
|
|
320
369
|
else:
|
|
321
370
|
existing_user[key] = value
|
|
322
371
|
create_update_user = await self.http_client.put(
|
|
323
|
-
self.folio_client.
|
|
372
|
+
self.folio_client.gateway_url + f"/users/{existing_user['id']}",
|
|
324
373
|
headers=self.folio_client.okapi_headers,
|
|
325
374
|
json=existing_user,
|
|
326
375
|
)
|
|
@@ -340,7 +389,7 @@ class UserImporter: # noqa: R0902
|
|
|
340
389
|
HTTPError: If the HTTP request to create the user fails.
|
|
341
390
|
"""
|
|
342
391
|
response = await self.http_client.post(
|
|
343
|
-
self.folio_client.
|
|
392
|
+
self.folio_client.gateway_url + "/users",
|
|
344
393
|
headers=self.folio_client.okapi_headers,
|
|
345
394
|
json=user_obj,
|
|
346
395
|
)
|
|
@@ -490,7 +539,7 @@ class UserImporter: # noqa: R0902
|
|
|
490
539
|
and the existing PU object (existing_pu).
|
|
491
540
|
"""
|
|
492
541
|
rp_obj = user_obj.pop("requestPreference", {})
|
|
493
|
-
spu_obj = user_obj.pop("servicePointsUser")
|
|
542
|
+
spu_obj = user_obj.pop("servicePointsUser", {})
|
|
494
543
|
existing_user = await self.get_existing_user(user_obj)
|
|
495
544
|
if existing_user:
|
|
496
545
|
existing_rp = await self.get_existing_rp(user_obj, existing_user)
|
|
@@ -540,7 +589,7 @@ class UserImporter: # noqa: R0902
|
|
|
540
589
|
rp_obj["userId"] = new_user_obj["id"]
|
|
541
590
|
# print(rp_obj)
|
|
542
591
|
response = await self.http_client.post(
|
|
543
|
-
self.folio_client.
|
|
592
|
+
self.folio_client.gateway_url
|
|
544
593
|
+ "/request-preference-storage/request-preference",
|
|
545
594
|
headers=self.folio_client.okapi_headers,
|
|
546
595
|
json=rp_obj,
|
|
@@ -564,7 +613,7 @@ class UserImporter: # noqa: R0902
|
|
|
564
613
|
existing_rp.update(rp_obj)
|
|
565
614
|
# print(existing_rp)
|
|
566
615
|
response = await self.http_client.put(
|
|
567
|
-
self.folio_client.
|
|
616
|
+
self.folio_client.gateway_url
|
|
568
617
|
+ f"/request-preference-storage/request-preference/{existing_rp['id']}",
|
|
569
618
|
headers=self.folio_client.okapi_headers,
|
|
570
619
|
json=existing_rp,
|
|
@@ -586,7 +635,7 @@ class UserImporter: # noqa: R0902
|
|
|
586
635
|
"""
|
|
587
636
|
perms_user_obj = {"userId": new_user_obj["id"], "permissions": []}
|
|
588
637
|
response = await self.http_client.post(
|
|
589
|
-
self.folio_client.
|
|
638
|
+
self.folio_client.gateway_url + "/perms/users",
|
|
590
639
|
headers=self.folio_client.okapi_headers,
|
|
591
640
|
json=perms_user_obj,
|
|
592
641
|
)
|
|
@@ -674,7 +723,13 @@ class UserImporter: # noqa: R0902
|
|
|
674
723
|
mapped_service_points = []
|
|
675
724
|
for sp in spu_obj.pop("servicePointsIds", []):
|
|
676
725
|
try:
|
|
677
|
-
|
|
726
|
+
if self.validate_uuid(sp) and sp in self.service_point_map.values():
|
|
727
|
+
await self.logfile.write(
|
|
728
|
+
f"Service point {sp} is a UUID, skipping mapping\n"
|
|
729
|
+
)
|
|
730
|
+
mapped_service_points.append(sp)
|
|
731
|
+
else:
|
|
732
|
+
mapped_service_points.append(self.service_point_map[sp])
|
|
678
733
|
except KeyError:
|
|
679
734
|
print(
|
|
680
735
|
f'Service point "{sp}" not found, excluding service point from user: '
|
|
@@ -685,7 +740,13 @@ class UserImporter: # noqa: R0902
|
|
|
685
740
|
if "defaultServicePointId" in spu_obj:
|
|
686
741
|
sp_code = spu_obj.pop('defaultServicePointId', '')
|
|
687
742
|
try:
|
|
688
|
-
|
|
743
|
+
if self.validate_uuid(sp_code) and sp_code in self.service_point_map.values():
|
|
744
|
+
await self.logfile.write(
|
|
745
|
+
f"Default service point {sp_code} is a UUID, skipping mapping\n"
|
|
746
|
+
)
|
|
747
|
+
mapped_sp_id = sp_code
|
|
748
|
+
else:
|
|
749
|
+
mapped_sp_id = self.service_point_map[sp_code]
|
|
689
750
|
if mapped_sp_id not in spu_obj.get('servicePointsIds', []):
|
|
690
751
|
print(
|
|
691
752
|
f'Default service point "{sp_code}" not found in assigned service points, '
|
|
@@ -708,7 +769,7 @@ class UserImporter: # noqa: R0902
|
|
|
708
769
|
existing_spu (dict): The existing service-points-user object, if it exists.
|
|
709
770
|
existing_user (dict): The existing user object associated with the spu_obj.
|
|
710
771
|
"""
|
|
711
|
-
if spu_obj
|
|
772
|
+
if spu_obj:
|
|
712
773
|
await self.map_service_points(spu_obj, existing_user)
|
|
713
774
|
if existing_spu:
|
|
714
775
|
await self.update_existing_spu(spu_obj, existing_spu)
|
|
@@ -727,7 +788,7 @@ class UserImporter: # noqa: R0902
|
|
|
727
788
|
"""
|
|
728
789
|
try:
|
|
729
790
|
existing_spu = await self.http_client.get(
|
|
730
|
-
self.folio_client.
|
|
791
|
+
self.folio_client.gateway_url + "/service-points-users",
|
|
731
792
|
headers=self.folio_client.okapi_headers,
|
|
732
793
|
params={"query": f"userId=={existing_user['id']}"},
|
|
733
794
|
)
|
|
@@ -751,7 +812,7 @@ class UserImporter: # noqa: R0902
|
|
|
751
812
|
"""
|
|
752
813
|
spu_obj["userId"] = existing_user["id"]
|
|
753
814
|
response = await self.http_client.post(
|
|
754
|
-
self.folio_client.
|
|
815
|
+
self.folio_client.gateway_url + "/service-points-users",
|
|
755
816
|
headers=self.folio_client.okapi_headers,
|
|
756
817
|
json=spu_obj,
|
|
757
818
|
)
|
|
@@ -770,7 +831,7 @@ class UserImporter: # noqa: R0902
|
|
|
770
831
|
"""
|
|
771
832
|
existing_spu.update(spu_obj)
|
|
772
833
|
response = await self.http_client.put(
|
|
773
|
-
self.folio_client.
|
|
834
|
+
self.folio_client.gateway_url + f"/service-points-users/{existing_spu['id']}",
|
|
774
835
|
headers=self.folio_client.okapi_headers,
|
|
775
836
|
json=existing_spu,
|
|
776
837
|
)
|
|
@@ -781,7 +842,7 @@ class UserImporter: # noqa: R0902
|
|
|
781
842
|
Process the user object file.
|
|
782
843
|
|
|
783
844
|
Args:
|
|
784
|
-
openfile: The file object to process.
|
|
845
|
+
openfile: The file or file-like object to process.
|
|
785
846
|
"""
|
|
786
847
|
tasks = []
|
|
787
848
|
for line_number, user in enumerate(openfile):
|
|
@@ -935,12 +996,12 @@ async def main() -> None:
|
|
|
935
996
|
importer = UserImporter(
|
|
936
997
|
folio_client,
|
|
937
998
|
library_name,
|
|
938
|
-
user_file_path,
|
|
939
999
|
batch_size,
|
|
940
1000
|
limit_async_requests,
|
|
941
1001
|
logfile,
|
|
942
1002
|
errorfile,
|
|
943
1003
|
http_client,
|
|
1004
|
+
user_file_path,
|
|
944
1005
|
args.user_match_key,
|
|
945
1006
|
args.update_only_present_fields,
|
|
946
1007
|
args.default_preferred_contact_type,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
from ._preprocessors import
|
|
1
|
+
from ._preprocessors import *
|
|
@@ -1,4 +1,23 @@
|
|
|
1
1
|
import pymarc
|
|
2
|
+
import logging
|
|
3
|
+
|
|
4
|
+
logger = logging.getLogger("folio_data_import.MARCDataImport")
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def prepend_prefix_001(record: pymarc.Record, prefix: str) -> pymarc.Record:
|
|
8
|
+
"""
|
|
9
|
+
Prepend a prefix to the record's 001 field.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
record (pymarc.Record): The MARC record to preprocess.
|
|
13
|
+
prefix (str): The prefix to prepend to the 001 field.
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
pymarc.Record: The preprocessed MARC record.
|
|
17
|
+
"""
|
|
18
|
+
record["001"].data = f"({prefix})" + record["001"].data
|
|
19
|
+
return record
|
|
20
|
+
|
|
2
21
|
|
|
3
22
|
def prepend_ppn_prefix_001(record: pymarc.Record) -> pymarc.Record:
|
|
4
23
|
"""
|
|
@@ -11,8 +30,22 @@ def prepend_ppn_prefix_001(record: pymarc.Record) -> pymarc.Record:
|
|
|
11
30
|
Returns:
|
|
12
31
|
pymarc.Record: The preprocessed MARC record.
|
|
13
32
|
"""
|
|
14
|
-
record
|
|
15
|
-
|
|
33
|
+
return prepend_prefix_001(record, "PPN")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def prepend_abes_prefix_001(record: pymarc.Record) -> pymarc.Record:
|
|
37
|
+
"""
|
|
38
|
+
Prepend the ABES prefix to the record's 001 field. Useful when
|
|
39
|
+
importing records from the ABES SUDOC catalog
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
record (pymarc.Record): The MARC record to preprocess.
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
pymarc.Record: The preprocessed MARC record.
|
|
46
|
+
"""
|
|
47
|
+
return prepend_prefix_001(record, "ABES")
|
|
48
|
+
|
|
16
49
|
|
|
17
50
|
def strip_999_ff_fields(record: pymarc.Record) -> pymarc.Record:
|
|
18
51
|
"""
|
|
@@ -25,7 +58,276 @@ def strip_999_ff_fields(record: pymarc.Record) -> pymarc.Record:
|
|
|
25
58
|
Returns:
|
|
26
59
|
pymarc.Record: The preprocessed MARC record.
|
|
27
60
|
"""
|
|
28
|
-
for field in record.get_fields(
|
|
29
|
-
if field.indicators == pymarc.Indicators(*[
|
|
61
|
+
for field in record.get_fields("999"):
|
|
62
|
+
if field.indicators == pymarc.Indicators(*["f", "f"]):
|
|
30
63
|
record.remove_field(field)
|
|
31
64
|
return record
|
|
65
|
+
|
|
66
|
+
def clean_999_fields(record: pymarc.Record) -> pymarc.Record:
|
|
67
|
+
"""
|
|
68
|
+
The presence of 999 fields, with or without ff indicators, can cause
|
|
69
|
+
issues with data import mapping in FOLIO. This function calls strip_999_ff_fields
|
|
70
|
+
to remove 999 fields with ff indicators and then copies the remaining 999 fields
|
|
71
|
+
to 945 fields.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
record (pymarc.Record): The MARC record to preprocess.
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
pymarc.Record: The preprocessed MARC record.
|
|
78
|
+
"""
|
|
79
|
+
record = strip_999_ff_fields(record)
|
|
80
|
+
for field in record.get_fields("999"):
|
|
81
|
+
_945 = pymarc.Field(
|
|
82
|
+
tag="945",
|
|
83
|
+
indicators=field.indicators,
|
|
84
|
+
subfields=field.subfields,
|
|
85
|
+
)
|
|
86
|
+
record.add_ordered_field(_945)
|
|
87
|
+
record.remove_field(field)
|
|
88
|
+
return record
|
|
89
|
+
|
|
90
|
+
def sudoc_supercede_prep(record: pymarc.Record) -> pymarc.Record:
|
|
91
|
+
"""
|
|
92
|
+
Preprocesses a record from the ABES SUDOC catalog to copy 035 fields
|
|
93
|
+
with a $9 subfield value of 'sudoc' to 935 fields with a $a subfield
|
|
94
|
+
prefixed with "(ABES)". This is useful when importing newly-merged records
|
|
95
|
+
from the SUDOC catalog when you want the new record to replace the old one
|
|
96
|
+
in FOLIO. This also applyes the prepend_ppn_prefix_001 function to the record.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
record (pymarc.Record): The MARC record to preprocess.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
pymarc.Record: The preprocessed MARC record.
|
|
103
|
+
"""
|
|
104
|
+
record = prepend_abes_prefix_001(record)
|
|
105
|
+
for field in record.get_fields("035"):
|
|
106
|
+
if "a" in field and "9" in field and field["9"] == "sudoc":
|
|
107
|
+
_935 = pymarc.Field(
|
|
108
|
+
tag="935",
|
|
109
|
+
indicators=["f", "f"],
|
|
110
|
+
subfields=[pymarc.field.Subfield("a", "(ABES)" + field["a"])],
|
|
111
|
+
)
|
|
112
|
+
record.add_ordered_field(_935)
|
|
113
|
+
return record
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def clean_empty_fields(record: pymarc.Record) -> pymarc.Record:
|
|
117
|
+
"""
|
|
118
|
+
Remove empty fields and subfields from the record. These can cause
|
|
119
|
+
data import mapping issues in FOLIO. Removals are logged at custom
|
|
120
|
+
log level 26, which is used by folio_migration_tools to populate the
|
|
121
|
+
data issues report.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
record (pymarc.Record): The MARC record to preprocess.
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
pymarc.Record: The preprocessed MARC record.
|
|
128
|
+
"""
|
|
129
|
+
MAPPED_FIELDS = {
|
|
130
|
+
"010": ["a", "z"],
|
|
131
|
+
"020": ["a", "y", "z"],
|
|
132
|
+
"035": ["a", "z"],
|
|
133
|
+
"040": ["a", "b", "c", "d", "e", "f", "g", "h", "k", "m", "n", "p", "r", "s"],
|
|
134
|
+
"050": ["a", "b"],
|
|
135
|
+
"082": ["a", "b"],
|
|
136
|
+
"100": ["a", "b", "c", "d", "q"],
|
|
137
|
+
"110": ["a", "b", "c"],
|
|
138
|
+
"111": ["a", "c", "d"],
|
|
139
|
+
"130": [
|
|
140
|
+
"a",
|
|
141
|
+
"d",
|
|
142
|
+
"f",
|
|
143
|
+
"k",
|
|
144
|
+
"l",
|
|
145
|
+
"m",
|
|
146
|
+
"n",
|
|
147
|
+
"o",
|
|
148
|
+
"p",
|
|
149
|
+
"r",
|
|
150
|
+
"s",
|
|
151
|
+
"t",
|
|
152
|
+
"x",
|
|
153
|
+
"y",
|
|
154
|
+
"z",
|
|
155
|
+
],
|
|
156
|
+
"180": ["x", "y", "z"],
|
|
157
|
+
"210": ["a", "c"],
|
|
158
|
+
"240": ["a", "f", "k", "l", "m", "n", "o", "p", "r", "s", "t", "x", "y", "z"],
|
|
159
|
+
"245": ["a", "b", "c", "f", "g", "h", "k", "n", "p", "s"],
|
|
160
|
+
"246": ["a", "f", "g", "n", "p", "s"],
|
|
161
|
+
"250": ["a", "b"],
|
|
162
|
+
"260": ["a", "b", "c", "e", "f", "g"],
|
|
163
|
+
"300": ["a", "b", "c", "e", "f", "g"],
|
|
164
|
+
"440": ["a", "n", "p", "v", "x", "y", "z"],
|
|
165
|
+
"490": ["a", "v", "x", "y", "z"],
|
|
166
|
+
"500": ["a", "c", "d", "n", "p", "v", "x", "y", "z"],
|
|
167
|
+
"505": ["a", "g", "r", "t", "u"],
|
|
168
|
+
"520": ["a", "b", "c", "u"],
|
|
169
|
+
"600": ["a", "b", "c", "d", "q", "t", "v", "x", "y", "z"],
|
|
170
|
+
"610": ["a", "b", "c", "d", "t", "v", "x", "y", "z"],
|
|
171
|
+
"611": ["a", "c", "d", "t", "v", "x", "y", "z"],
|
|
172
|
+
"630": [
|
|
173
|
+
"a",
|
|
174
|
+
"d",
|
|
175
|
+
"f",
|
|
176
|
+
"k",
|
|
177
|
+
"l",
|
|
178
|
+
"m",
|
|
179
|
+
"n",
|
|
180
|
+
"o",
|
|
181
|
+
"p",
|
|
182
|
+
"r",
|
|
183
|
+
"s",
|
|
184
|
+
"t",
|
|
185
|
+
"x",
|
|
186
|
+
"y",
|
|
187
|
+
"z",
|
|
188
|
+
],
|
|
189
|
+
"650": ["a", "d", "v", "x", "y", "z"],
|
|
190
|
+
"651": ["a", "v", "x", "y", "z"],
|
|
191
|
+
"655": ["a", "v", "x", "y", "z"],
|
|
192
|
+
"700": ["a", "b", "c", "d", "q", "t", "v", "x", "y", "z"],
|
|
193
|
+
"710": ["a", "b", "c", "d", "t", "v", "x", "y", "z"],
|
|
194
|
+
"711": ["a", "c", "d", "t", "v", "x", "y", "z"],
|
|
195
|
+
"730": [
|
|
196
|
+
"a",
|
|
197
|
+
"d",
|
|
198
|
+
"f",
|
|
199
|
+
"k",
|
|
200
|
+
"l",
|
|
201
|
+
"m",
|
|
202
|
+
"n",
|
|
203
|
+
"o",
|
|
204
|
+
"p",
|
|
205
|
+
"r",
|
|
206
|
+
"s",
|
|
207
|
+
"t",
|
|
208
|
+
"x",
|
|
209
|
+
"y",
|
|
210
|
+
"z",
|
|
211
|
+
],
|
|
212
|
+
"740": ["a", "n", "p", "v", "x", "y", "z"],
|
|
213
|
+
"800": ["a", "b", "c", "d", "q", "t", "v", "x", "y", "z"],
|
|
214
|
+
"810": ["a", "b", "c", "d", "t", "v", "x", "y", "z"],
|
|
215
|
+
"811": ["a", "c", "d", "t", "v", "x", "y", "z"],
|
|
216
|
+
"830": [
|
|
217
|
+
"a",
|
|
218
|
+
"d",
|
|
219
|
+
"f",
|
|
220
|
+
"k",
|
|
221
|
+
"l",
|
|
222
|
+
"m",
|
|
223
|
+
"n",
|
|
224
|
+
"o",
|
|
225
|
+
"p",
|
|
226
|
+
"r",
|
|
227
|
+
"s",
|
|
228
|
+
"t",
|
|
229
|
+
"x",
|
|
230
|
+
"y",
|
|
231
|
+
"z",
|
|
232
|
+
],
|
|
233
|
+
"856": ["u", "y", "z"],
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
for field in list(record.get_fields()):
|
|
237
|
+
len_subs = len(field.subfields)
|
|
238
|
+
subfield_value = bool(field.subfields[0].value) if len_subs > 0 else False
|
|
239
|
+
if not int(field.tag) >= 900 and field.tag in MAPPED_FIELDS:
|
|
240
|
+
if int(field.tag) > 9 and len_subs == 0:
|
|
241
|
+
logger.log(
|
|
242
|
+
26,
|
|
243
|
+
"DATA ISSUE\t%s\t%s\t%s",
|
|
244
|
+
record["001"].value(),
|
|
245
|
+
f"{field.tag} is empty, removing field",
|
|
246
|
+
field,
|
|
247
|
+
)
|
|
248
|
+
record.remove_field(field)
|
|
249
|
+
elif len_subs == 1 and not subfield_value:
|
|
250
|
+
logger.log(
|
|
251
|
+
26,
|
|
252
|
+
"DATA ISSUE\t%s\t%s\t%s",
|
|
253
|
+
record["001"].value(),
|
|
254
|
+
f"{field.tag}${field.subfields[0].code} is empty, no other subfields present, removing field",
|
|
255
|
+
field,
|
|
256
|
+
)
|
|
257
|
+
record.remove_field(field)
|
|
258
|
+
else:
|
|
259
|
+
if len_subs > 1 and "a" in field and not field["a"].strip():
|
|
260
|
+
logger.log(
|
|
261
|
+
26,
|
|
262
|
+
"DATA ISSUE\t%s\t%s\t%s",
|
|
263
|
+
record["001"].value(),
|
|
264
|
+
f"{field.tag}$a is empty, removing subfield",
|
|
265
|
+
field,
|
|
266
|
+
)
|
|
267
|
+
field.delete_subfield("a")
|
|
268
|
+
for idx, subfield in enumerate(list(field.subfields), start=1):
|
|
269
|
+
if (
|
|
270
|
+
subfield.code in MAPPED_FIELDS.get(field.tag, [])
|
|
271
|
+
and not subfield.value
|
|
272
|
+
):
|
|
273
|
+
logger.log(
|
|
274
|
+
26,
|
|
275
|
+
"DATA ISSUE\t%s\t%s\t%s",
|
|
276
|
+
record["001"].value(),
|
|
277
|
+
f"{field.tag}${subfield.code} ({ordinal(idx)} subfield) is empty, but other subfields have values, removing subfield",
|
|
278
|
+
field,
|
|
279
|
+
)
|
|
280
|
+
field.delete_subfield(subfield.code)
|
|
281
|
+
if len(field.subfields) == 0:
|
|
282
|
+
logger.log(
|
|
283
|
+
26,
|
|
284
|
+
"DATA ISSUE\t%s\t%s\t%s",
|
|
285
|
+
record["001"].value(),
|
|
286
|
+
f"{field.tag} has no non-empty subfields after cleaning, removing field",
|
|
287
|
+
field,
|
|
288
|
+
)
|
|
289
|
+
record.remove_field(field)
|
|
290
|
+
return record
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def fix_leader(record: pymarc.Record) -> pymarc.Record:
|
|
294
|
+
"""
|
|
295
|
+
Fixes the leader of the record by setting the record status to 'c' (modified
|
|
296
|
+
record) and the type of record to 'a' (language material).
|
|
297
|
+
|
|
298
|
+
Args:
|
|
299
|
+
record (pymarc.Record): The MARC record to preprocess.
|
|
300
|
+
|
|
301
|
+
Returns:
|
|
302
|
+
pymarc.Record: The preprocessed MARC record.
|
|
303
|
+
"""
|
|
304
|
+
VALID_STATUSES = ["a", "c", "d", "n", "p"]
|
|
305
|
+
VALID_TYPES = ["a", "c", "d", "e", "f", "g", "i", "j", "k", "m", "o", "p", "r", "t"]
|
|
306
|
+
if record.leader[5] not in VALID_STATUSES:
|
|
307
|
+
logger.log(
|
|
308
|
+
26,
|
|
309
|
+
"DATA ISSUE\t%s\t%s\t%s",
|
|
310
|
+
record["001"].value(),
|
|
311
|
+
f"Invalid record status: {record.leader[5]}, setting to 'c'",
|
|
312
|
+
record,
|
|
313
|
+
)
|
|
314
|
+
record.leader = pymarc.Leader(record.leader[:5] + "c" + record.leader[6:])
|
|
315
|
+
if record.leader[6] not in VALID_TYPES:
|
|
316
|
+
logger.log(
|
|
317
|
+
26,
|
|
318
|
+
"DATA ISSUE\t%s\t%s\t%s",
|
|
319
|
+
record["001"].value(),
|
|
320
|
+
f"Invalid record type: {record.leader[6]}, setting to 'a'",
|
|
321
|
+
record,
|
|
322
|
+
)
|
|
323
|
+
record.leader = pymarc.Leader(record.leader[:6] + "a" + record.leader[7:])
|
|
324
|
+
return record
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def ordinal(n):
|
|
328
|
+
s = ("th", "st", "nd", "rd") + ("th",) * 10
|
|
329
|
+
v = n % 100
|
|
330
|
+
if v > 13:
|
|
331
|
+
return f"{n}{s[v % 10]}"
|
|
332
|
+
else:
|
|
333
|
+
return f"{n}{s[v]}"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: folio_data_import
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.8
|
|
4
4
|
Summary: A python module to interact with the data importing capabilities of the open-source FOLIO ILS
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Brooks Travis
|
|
@@ -19,8 +19,7 @@ Requires-Dist: flake8-black (>=0.3.6,<0.4.0)
|
|
|
19
19
|
Requires-Dist: flake8-bugbear (>=24.8.19,<25.0.0)
|
|
20
20
|
Requires-Dist: flake8-docstrings (>=1.7.0,<2.0.0)
|
|
21
21
|
Requires-Dist: flake8-isort (>=6.1.1,<7.0.0)
|
|
22
|
-
Requires-Dist: folioclient (
|
|
23
|
-
Requires-Dist: httpx (>=0.27.2,<0.28.0)
|
|
22
|
+
Requires-Dist: folioclient (==0.70.1)
|
|
24
23
|
Requires-Dist: inquirer (>=3.4.0,<4.0.0)
|
|
25
24
|
Requires-Dist: pyhumps (>=3.8.0,<4.0.0)
|
|
26
25
|
Requires-Dist: pymarc (>=5.2.2,<6.0.0)
|
|
@@ -102,11 +101,30 @@ When this package is installed via PyPI or using `poetry install` from this repo
|
|
|
102
101
|
}
|
|
103
102
|
}
|
|
104
103
|
```
|
|
105
|
-
|
|
104
|
+
#### Matching Existing Users
|
|
106
105
|
|
|
107
|
-
|
|
106
|
+
Unlike mod-user-import, this importer does not require `externalSystemId` as the match point for your objects. If the user objects have `id` values, that will be used, falling back to `externalSystemId`. However, you can also specify `username` or `barcode` as the match point if desired, using the `--user_match_key` argument.
|
|
108
107
|
|
|
109
|
-
|
|
108
|
+
#### Preferred Contact Type Mapping
|
|
109
|
+
|
|
110
|
+
Another point of departure from the behavior of `mod-user-import` is the handling of `preferredContactTypeId`. This importer will accept either the `"001", "002", "003"...` values stored by FOLIO, or the human-friendly strings used by `mod-user-import` (`"mail", "email", "text", "phone", "mobile"`). It will also __*set a customizable default for all users that do not otherwise have a valid value specified*__ (using `--default_preferred_contact_type`), unless a (valid) value is already present in the user record being updated.
|
|
111
|
+
|
|
112
|
+
#### Field Protection (*experimental*)
|
|
113
|
+
|
|
114
|
+
This script offers a rudimentary field protection implementation using custom fields. To enable this functionality, create a text custom field that has the field name `protectedFields`. In this field, you can specify a comma-separated list of User schema field names, using dot-notation for nested fields. This protection should support all standard fields except addresses within `personal.addresses`. If you include `personal.addresses` in a user record, any existing addresses will be replaced by the new values.
|
|
115
|
+
|
|
116
|
+
##### Example
|
|
117
|
+
|
|
118
|
+
```
|
|
119
|
+
{
|
|
120
|
+
"protectedFields": "customFields.protectedFields,personal.preferredFirstName,barcode,personal.telephone,personal.addresses"
|
|
121
|
+
}
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
Would result in `preferredFirstName`, `barcode`, and `telephone` remaining unchanged, regardless of the contents of the incoming records.
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
#### How to use:
|
|
110
128
|
1. Generate a JSON lines (one JSON object per line) file of FOLIO user objects in the style of [mod-user-import](https://github.com/folio-org/mod-user-import)
|
|
111
129
|
2. Run the script and specify the required arguments (and any desired optional arguments), including the path to your file of user objects
|
|
112
130
|
|