folio-data-import 0.2.4__tar.gz → 0.2.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of folio-data-import might be problematic. Click here for more details.
- folio_data_import-0.2.6/PKG-INFO +121 -0
- folio_data_import-0.2.6/README.md +90 -0
- {folio_data_import-0.2.4 → folio_data_import-0.2.6}/pyproject.toml +3 -3
- {folio_data_import-0.2.4 → folio_data_import-0.2.6}/src/folio_data_import/MARCDataImport.py +109 -9
- {folio_data_import-0.2.4 → folio_data_import-0.2.6}/src/folio_data_import/UserImport.py +267 -42
- folio_data_import-0.2.6/src/folio_data_import/marc_preprocessors/__init__.py +1 -0
- folio_data_import-0.2.6/src/folio_data_import/marc_preprocessors/_preprocessors.py +31 -0
- folio_data_import-0.2.4/PKG-INFO +0 -68
- folio_data_import-0.2.4/README.md +0 -38
- {folio_data_import-0.2.4 → folio_data_import-0.2.6}/LICENSE +0 -0
- {folio_data_import-0.2.4 → folio_data_import-0.2.6}/src/folio_data_import/__init__.py +0 -0
- {folio_data_import-0.2.4 → folio_data_import-0.2.6}/src/folio_data_import/__main__.py +0 -0
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: folio_data_import
|
|
3
|
+
Version: 0.2.6
|
|
4
|
+
Summary: A python module to interact with the data importing capabilities of the open-source FOLIO ILS
|
|
5
|
+
License: MIT
|
|
6
|
+
Author: Brooks Travis
|
|
7
|
+
Author-email: brooks.travis@gmail.com
|
|
8
|
+
Requires-Python: >=3.9,<4.0
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Requires-Dist: aiofiles (>=24.1.0,<25.0.0)
|
|
17
|
+
Requires-Dist: flake8-bandit (>=4.1.1,<5.0.0)
|
|
18
|
+
Requires-Dist: flake8-black (>=0.3.6,<0.4.0)
|
|
19
|
+
Requires-Dist: flake8-bugbear (>=24.8.19,<25.0.0)
|
|
20
|
+
Requires-Dist: flake8-docstrings (>=1.7.0,<2.0.0)
|
|
21
|
+
Requires-Dist: flake8-isort (>=6.1.1,<7.0.0)
|
|
22
|
+
Requires-Dist: folioclient (>=0.61.0,<0.62.0)
|
|
23
|
+
Requires-Dist: httpx (>=0.27.2,<0.28.0)
|
|
24
|
+
Requires-Dist: inquirer (>=3.4.0,<4.0.0)
|
|
25
|
+
Requires-Dist: pyhumps (>=3.8.0,<4.0.0)
|
|
26
|
+
Requires-Dist: pymarc (>=5.2.2,<6.0.0)
|
|
27
|
+
Requires-Dist: tabulate (>=0.9.0,<0.10.0)
|
|
28
|
+
Requires-Dist: tqdm (>=4.66.5,<5.0.0)
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
30
|
+
|
|
31
|
+
# folio_data_import
|
|
32
|
+
|
|
33
|
+
## Description
|
|
34
|
+
|
|
35
|
+
This project is designed to import data into the FOLIO LSP. It provides a simple and efficient way to import data from various sources using FOLIO's REST APIs.
|
|
36
|
+
|
|
37
|
+
## Features
|
|
38
|
+
|
|
39
|
+
- Import MARC records using FOLIO's Data Import system
|
|
40
|
+
- Import User records using FOLIO's User APIs
|
|
41
|
+
|
|
42
|
+
## Installation
|
|
43
|
+
|
|
44
|
+
## Installation
|
|
45
|
+
|
|
46
|
+
To install the project using Poetry, follow these steps:
|
|
47
|
+
|
|
48
|
+
1. Clone the repository.
|
|
49
|
+
2. Navigate to the project directory: `$ cd /path/to/folio_data_import`.
|
|
50
|
+
3. Install Poetry if you haven't already: `$ pip install poetry`.
|
|
51
|
+
4. Install the project and its dependencies: `$ poetry install`.
|
|
52
|
+
6. Run the application using Poetry: `$ poetry run python -m folio_data_import --help`.
|
|
53
|
+
|
|
54
|
+
Make sure to activate the virtual environment created by Poetry before running the application.
|
|
55
|
+
|
|
56
|
+
## Usage
|
|
57
|
+
|
|
58
|
+
1. Prepare the data to be imported in the specified format.
|
|
59
|
+
2. Run the application and follow the prompts to import the data.
|
|
60
|
+
3. Monitor the import progress and handle any errors or conflicts that may arise.
|
|
61
|
+
|
|
62
|
+
### folio-user-import
|
|
63
|
+
When this package is installed via PyPI or using `poetry install` from this repository, it installs a convenience script in your `$PATH` called `folio-user-import`. To view all command line options for this script, run `folio-user-import -h`. In addition to supporting `mod-user-import`-style JSON objects, this script also allows you to manage service point assignments for users by specifying a `servicePointsUser` object in the JSON object, using service point codes in place of UUIDs in the `defaultServicePointId` and `servicePointIds` fields:
|
|
64
|
+
```
|
|
65
|
+
{
|
|
66
|
+
"username": "checkin-all",
|
|
67
|
+
"barcode": "1728439497039848103",
|
|
68
|
+
"active": true,
|
|
69
|
+
"type": "patron",
|
|
70
|
+
"patronGroup": "staff",
|
|
71
|
+
"departments": [],
|
|
72
|
+
"personal": {
|
|
73
|
+
"lastName": "Admin",
|
|
74
|
+
"firstName": "checkin-all",
|
|
75
|
+
"addresses": [
|
|
76
|
+
{
|
|
77
|
+
"countryId": "HU",
|
|
78
|
+
"addressLine1": "Andrássy Street 1.",
|
|
79
|
+
"addressLine2": "",
|
|
80
|
+
"city": "Budapest",
|
|
81
|
+
"region": "Pest",
|
|
82
|
+
"postalCode": "1061",
|
|
83
|
+
"addressTypeId": "Home",
|
|
84
|
+
"primaryAddress": true
|
|
85
|
+
}
|
|
86
|
+
],
|
|
87
|
+
"preferredContactTypeId": "email"
|
|
88
|
+
},
|
|
89
|
+
"requestPreference": {
|
|
90
|
+
"holdShelf": true,
|
|
91
|
+
"delivery": false,
|
|
92
|
+
"fulfillment": "Hold Shelf"
|
|
93
|
+
}
|
|
94
|
+
"servicePointsUser": {
|
|
95
|
+
"defaultServicePointId": "cd1",
|
|
96
|
+
"servicePointsIds": [
|
|
97
|
+
"cd1",
|
|
98
|
+
"Online",
|
|
99
|
+
"000",
|
|
100
|
+
"cd2"
|
|
101
|
+
]
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
```
|
|
105
|
+
One thing to note here is that this importer does not require `externalSystemId` as the match point for your objects. While it is the default, if the user objects have `id` present, that will be used, falling back to `externalSystemId`. However, you can also specify `username` or `barcode` as the match point if desired, using the `--default_preferred_contact_type` argument.
|
|
106
|
+
|
|
107
|
+
Another point of departure from the behavior of `mod-user-import` is the handling of `preferredContactTypeId`. This importer will accept either the `"001", "002", "003"...` values stored by the FOLIO, or the human-friendly strings used by `mod-user-import` (`"mail", "email", "text", "phone", "mobile"`). It will also __*set a customizable default for all users that do not otherwise have a valid value specified*__, unless a value is already present in the user record being updated.
|
|
108
|
+
|
|
109
|
+
How to use:
|
|
110
|
+
1. Generate a JSON lines (one JSON object per line) file of FOLIO user objects in the style of [mod-user-import](https://github.com/folio-org/mod-user-import)
|
|
111
|
+
2. Run the script and specify the required arguments (and any desired optional arguments), including the path to your file of user objects
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
## Contributing
|
|
115
|
+
|
|
116
|
+
Contributions are welcome! If you have any ideas, suggestions, or bug reports, please open an issue or submit a pull request.
|
|
117
|
+
|
|
118
|
+
## License
|
|
119
|
+
|
|
120
|
+
This project is licensed under the [MIT License](LICENSE).
|
|
121
|
+
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# folio_data_import
|
|
2
|
+
|
|
3
|
+
## Description
|
|
4
|
+
|
|
5
|
+
This project is designed to import data into the FOLIO LSP. It provides a simple and efficient way to import data from various sources using FOLIO's REST APIs.
|
|
6
|
+
|
|
7
|
+
## Features
|
|
8
|
+
|
|
9
|
+
- Import MARC records using FOLIO's Data Import system
|
|
10
|
+
- Import User records using FOLIO's User APIs
|
|
11
|
+
|
|
12
|
+
## Installation
|
|
13
|
+
|
|
14
|
+
## Installation
|
|
15
|
+
|
|
16
|
+
To install the project using Poetry, follow these steps:
|
|
17
|
+
|
|
18
|
+
1. Clone the repository.
|
|
19
|
+
2. Navigate to the project directory: `$ cd /path/to/folio_data_import`.
|
|
20
|
+
3. Install Poetry if you haven't already: `$ pip install poetry`.
|
|
21
|
+
4. Install the project and its dependencies: `$ poetry install`.
|
|
22
|
+
6. Run the application using Poetry: `$ poetry run python -m folio_data_import --help`.
|
|
23
|
+
|
|
24
|
+
Make sure to activate the virtual environment created by Poetry before running the application.
|
|
25
|
+
|
|
26
|
+
## Usage
|
|
27
|
+
|
|
28
|
+
1. Prepare the data to be imported in the specified format.
|
|
29
|
+
2. Run the application and follow the prompts to import the data.
|
|
30
|
+
3. Monitor the import progress and handle any errors or conflicts that may arise.
|
|
31
|
+
|
|
32
|
+
### folio-user-import
|
|
33
|
+
When this package is installed via PyPI or using `poetry install` from this repository, it installs a convenience script in your `$PATH` called `folio-user-import`. To view all command line options for this script, run `folio-user-import -h`. In addition to supporting `mod-user-import`-style JSON objects, this script also allows you to manage service point assignments for users by specifying a `servicePointsUser` object in the JSON object, using service point codes in place of UUIDs in the `defaultServicePointId` and `servicePointIds` fields:
|
|
34
|
+
```
|
|
35
|
+
{
|
|
36
|
+
"username": "checkin-all",
|
|
37
|
+
"barcode": "1728439497039848103",
|
|
38
|
+
"active": true,
|
|
39
|
+
"type": "patron",
|
|
40
|
+
"patronGroup": "staff",
|
|
41
|
+
"departments": [],
|
|
42
|
+
"personal": {
|
|
43
|
+
"lastName": "Admin",
|
|
44
|
+
"firstName": "checkin-all",
|
|
45
|
+
"addresses": [
|
|
46
|
+
{
|
|
47
|
+
"countryId": "HU",
|
|
48
|
+
"addressLine1": "Andrássy Street 1.",
|
|
49
|
+
"addressLine2": "",
|
|
50
|
+
"city": "Budapest",
|
|
51
|
+
"region": "Pest",
|
|
52
|
+
"postalCode": "1061",
|
|
53
|
+
"addressTypeId": "Home",
|
|
54
|
+
"primaryAddress": true
|
|
55
|
+
}
|
|
56
|
+
],
|
|
57
|
+
"preferredContactTypeId": "email"
|
|
58
|
+
},
|
|
59
|
+
"requestPreference": {
|
|
60
|
+
"holdShelf": true,
|
|
61
|
+
"delivery": false,
|
|
62
|
+
"fulfillment": "Hold Shelf"
|
|
63
|
+
}
|
|
64
|
+
"servicePointsUser": {
|
|
65
|
+
"defaultServicePointId": "cd1",
|
|
66
|
+
"servicePointsIds": [
|
|
67
|
+
"cd1",
|
|
68
|
+
"Online",
|
|
69
|
+
"000",
|
|
70
|
+
"cd2"
|
|
71
|
+
]
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
```
|
|
75
|
+
One thing to note here is that this importer does not require `externalSystemId` as the match point for your objects. While it is the default, if the user objects have `id` present, that will be used, falling back to `externalSystemId`. However, you can also specify `username` or `barcode` as the match point if desired, using the `--default_preferred_contact_type` argument.
|
|
76
|
+
|
|
77
|
+
Another point of departure from the behavior of `mod-user-import` is the handling of `preferredContactTypeId`. This importer will accept either the `"001", "002", "003"...` values stored by the FOLIO, or the human-friendly strings used by `mod-user-import` (`"mail", "email", "text", "phone", "mobile"`). It will also __*set a customizable default for all users that do not otherwise have a valid value specified*__, unless a value is already present in the user record being updated.
|
|
78
|
+
|
|
79
|
+
How to use:
|
|
80
|
+
1. Generate a JSON lines (one JSON object per line) file of FOLIO user objects in the style of [mod-user-import](https://github.com/folio-org/mod-user-import)
|
|
81
|
+
2. Run the script and specify the required arguments (and any desired optional arguments), including the path to your file of user objects
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
## Contributing
|
|
85
|
+
|
|
86
|
+
Contributions are welcome! If you have any ideas, suggestions, or bug reports, please open an issue or submit a pull request.
|
|
87
|
+
|
|
88
|
+
## License
|
|
89
|
+
|
|
90
|
+
This project is licensed under the [MIT License](LICENSE).
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "folio_data_import"
|
|
3
|
-
version = "0.2.
|
|
3
|
+
version = "0.2.6"
|
|
4
4
|
description = "A python module to interact with the data importing capabilities of the open-source FOLIO ILS"
|
|
5
5
|
authors = ["Brooks Travis <brooks.travis@gmail.com>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -14,8 +14,8 @@ folio-user-import = "folio_data_import.UserImport:sync_main"
|
|
|
14
14
|
|
|
15
15
|
[tool.poetry.dependencies]
|
|
16
16
|
python = "^3.9"
|
|
17
|
-
folioclient = "^0.
|
|
18
|
-
httpx = "^0.
|
|
17
|
+
folioclient = "^0.61.0"
|
|
18
|
+
httpx = "^0.27.2"
|
|
19
19
|
pymarc = "^5.2.2"
|
|
20
20
|
pyhumps = "^3.8.0"
|
|
21
21
|
inquirer = "^3.4.0"
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
import asyncio
|
|
3
3
|
import glob
|
|
4
|
+
import importlib
|
|
4
5
|
import io
|
|
5
6
|
import os
|
|
7
|
+
import sys
|
|
6
8
|
from typing import List
|
|
7
9
|
import uuid
|
|
8
10
|
from contextlib import ExitStack
|
|
@@ -30,6 +32,9 @@ except AttributeError:
|
|
|
30
32
|
# The order in which the report summary should be displayed
|
|
31
33
|
REPORT_SUMMARY_ORDERING = {"created": 0, "updated": 1, "discarded": 2, "error": 3}
|
|
32
34
|
|
|
35
|
+
# Set default timeout and backoff values for HTTP requests when retrying job status and final summary checks
|
|
36
|
+
RETRY_TIMEOUT_START = 1
|
|
37
|
+
RETRY_TIMEOUT_RETRY_FACTOR = 2
|
|
33
38
|
|
|
34
39
|
class MARCImportJob:
|
|
35
40
|
"""
|
|
@@ -69,6 +74,7 @@ class MARCImportJob:
|
|
|
69
74
|
import_profile_name: str,
|
|
70
75
|
batch_size=10,
|
|
71
76
|
batch_delay=0,
|
|
77
|
+
marc_record_preprocessor=None,
|
|
72
78
|
consolidate=False,
|
|
73
79
|
no_progress=False,
|
|
74
80
|
) -> None:
|
|
@@ -79,6 +85,8 @@ class MARCImportJob:
|
|
|
79
85
|
self.import_profile_name = import_profile_name
|
|
80
86
|
self.batch_size = batch_size
|
|
81
87
|
self.batch_delay = batch_delay
|
|
88
|
+
self.current_retry_timeout = None
|
|
89
|
+
self.marc_record_preprocessor = marc_record_preprocessor
|
|
82
90
|
|
|
83
91
|
async def do_work(self) -> None:
|
|
84
92
|
"""
|
|
@@ -148,10 +156,23 @@ class MARCImportJob:
|
|
|
148
156
|
Raises:
|
|
149
157
|
IndexError: If the job execution with the specified ID is not found.
|
|
150
158
|
"""
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
159
|
+
try:
|
|
160
|
+
self.current_retry_timeout = (
|
|
161
|
+
self.current_retry_timeout * RETRY_TIMEOUT_RETRY_FACTOR
|
|
162
|
+
) if self.current_retry_timeout else RETRY_TIMEOUT_START
|
|
163
|
+
job_status = self.folio_client.folio_get(
|
|
164
|
+
"/metadata-provider/jobExecutions?statusNot=DISCARDED&uiStatusAny"
|
|
165
|
+
"=PREPARING_FOR_PREVIEW&uiStatusAny=READY_FOR_PREVIEW&uiStatusAny=RUNNING&limit=50"
|
|
166
|
+
)
|
|
167
|
+
self.current_retry_timeout = None
|
|
168
|
+
except httpx.ConnectTimeout:
|
|
169
|
+
sleep(.25)
|
|
170
|
+
with httpx.Client(
|
|
171
|
+
timeout=self.current_retry_timeout,
|
|
172
|
+
verify=self.folio_client.ssl_verify
|
|
173
|
+
) as temp_client:
|
|
174
|
+
self.folio_client.httpx_client = temp_client
|
|
175
|
+
return await self.get_job_status()
|
|
155
176
|
try:
|
|
156
177
|
status = [
|
|
157
178
|
job for job in job_status["jobExecutions"] if job["id"] == self.job_id
|
|
@@ -316,6 +337,10 @@ class MARCImportJob:
|
|
|
316
337
|
await self.get_job_status()
|
|
317
338
|
sleep(0.25)
|
|
318
339
|
if record:
|
|
340
|
+
if self.marc_record_preprocessor:
|
|
341
|
+
record = await self.apply_marc_record_preprocessing(
|
|
342
|
+
record, self.marc_record_preprocessor
|
|
343
|
+
)
|
|
319
344
|
self.record_batch.append(record.as_marc())
|
|
320
345
|
counter += 1
|
|
321
346
|
else:
|
|
@@ -325,6 +350,39 @@ class MARCImportJob:
|
|
|
325
350
|
await self.create_batch_payload(counter, total_records, True),
|
|
326
351
|
)
|
|
327
352
|
|
|
353
|
+
@staticmethod
|
|
354
|
+
async def apply_marc_record_preprocessing(record: pymarc.Record, func_or_path) -> pymarc.Record:
|
|
355
|
+
"""
|
|
356
|
+
Apply preprocessing to the MARC record before sending it to FOLIO.
|
|
357
|
+
|
|
358
|
+
Args:
|
|
359
|
+
record (pymarc.Record): The MARC record to preprocess.
|
|
360
|
+
func_or_path (Union[Callable, str]): The preprocessing function or its import path.
|
|
361
|
+
|
|
362
|
+
Returns:
|
|
363
|
+
pymarc.Record: The preprocessed MARC record.
|
|
364
|
+
"""
|
|
365
|
+
if isinstance(func_or_path, str):
|
|
366
|
+
try:
|
|
367
|
+
path_parts = func_or_path.rsplit('.')
|
|
368
|
+
module_path, func_name = ".".join(path_parts[:-1]), path_parts[-1]
|
|
369
|
+
module = importlib.import_module(module_path)
|
|
370
|
+
func = getattr(module, func_name)
|
|
371
|
+
except (ImportError, AttributeError) as e:
|
|
372
|
+
print(f"Error importing preprocessing function {func_or_path}: {e}. Skipping preprocessing.")
|
|
373
|
+
return record
|
|
374
|
+
elif callable(func_or_path):
|
|
375
|
+
func = func_or_path
|
|
376
|
+
else:
|
|
377
|
+
print(f"Invalid preprocessing function: {func_or_path}. Skipping preprocessing.")
|
|
378
|
+
return record
|
|
379
|
+
|
|
380
|
+
try:
|
|
381
|
+
return func(record)
|
|
382
|
+
except Exception as e:
|
|
383
|
+
print(f"Error applying preprocessing function: {e}. Skipping preprocessing.")
|
|
384
|
+
return record
|
|
385
|
+
|
|
328
386
|
async def create_batch_payload(self, counter, total_records, is_last) -> dict:
|
|
329
387
|
"""
|
|
330
388
|
Create a batch payload for data import.
|
|
@@ -392,9 +450,7 @@ class MARCImportJob:
|
|
|
392
450
|
await self.get_job_status()
|
|
393
451
|
sleep(1)
|
|
394
452
|
if self.finished:
|
|
395
|
-
job_summary = self.
|
|
396
|
-
f"/metadata-provider/jobSummary/{self.job_id}"
|
|
397
|
-
)
|
|
453
|
+
job_summary = await self.get_job_summary()
|
|
398
454
|
job_summary.pop("jobExecutionId")
|
|
399
455
|
job_summary.pop("totalErrors")
|
|
400
456
|
columns = ["Summary"] + list(job_summary.keys())
|
|
@@ -425,6 +481,31 @@ class MARCImportJob:
|
|
|
425
481
|
self.last_current = 0
|
|
426
482
|
self.finished = False
|
|
427
483
|
|
|
484
|
+
async def get_job_summary(self) -> dict:
|
|
485
|
+
"""
|
|
486
|
+
Retrieves the job summary for the current job execution.
|
|
487
|
+
|
|
488
|
+
Returns:
|
|
489
|
+
dict: The job summary for the current job execution.
|
|
490
|
+
"""
|
|
491
|
+
try:
|
|
492
|
+
self.current_retry_timeout = (
|
|
493
|
+
self.current_retry_timeout * RETRY_TIMEOUT_RETRY_FACTOR
|
|
494
|
+
) if self.current_retry_timeout else RETRY_TIMEOUT_START
|
|
495
|
+
job_summary = self.folio_client.folio_get(
|
|
496
|
+
f"/metadata-provider/jobSummary/{self.job_id}"
|
|
497
|
+
)
|
|
498
|
+
self.current_retry_timeout = None
|
|
499
|
+
except httpx.ReadTimeout: #
|
|
500
|
+
sleep(.25)
|
|
501
|
+
with httpx.Client(
|
|
502
|
+
timeout=self.current_retry_timeout,
|
|
503
|
+
verify=self.folio_client.ssl_verify
|
|
504
|
+
) as temp_client:
|
|
505
|
+
self.folio_client.httpx_client = temp_client
|
|
506
|
+
return await self.get_job_summary()
|
|
507
|
+
return job_summary
|
|
508
|
+
|
|
428
509
|
|
|
429
510
|
async def main() -> None:
|
|
430
511
|
"""
|
|
@@ -467,6 +548,15 @@ async def main() -> None:
|
|
|
467
548
|
help="The number of seconds to wait between record batches.",
|
|
468
549
|
default=0.0,
|
|
469
550
|
)
|
|
551
|
+
parser.add_argument(
|
|
552
|
+
"--preprocessor",
|
|
553
|
+
type=str,
|
|
554
|
+
help=(
|
|
555
|
+
"The path to a Python module containing a preprocessing function "
|
|
556
|
+
"to apply to each MARC record before sending to FOLIO."
|
|
557
|
+
),
|
|
558
|
+
default=None,
|
|
559
|
+
)
|
|
470
560
|
parser.add_argument(
|
|
471
561
|
"--consolidate",
|
|
472
562
|
action="store_true",
|
|
@@ -491,6 +581,17 @@ async def main() -> None:
|
|
|
491
581
|
if args.member_tenant_id:
|
|
492
582
|
folio_client.okapi_headers["x-okapi-tenant"] = args.member_tenant_id
|
|
493
583
|
|
|
584
|
+
if os.path.isabs(args.marc_file_path):
|
|
585
|
+
marc_files = [Path(x) for x in glob.glob(args.marc_file_path)]
|
|
586
|
+
else:
|
|
587
|
+
marc_files = list(Path("./").glob(args.marc_file_path))
|
|
588
|
+
|
|
589
|
+
if len(marc_files) == 0:
|
|
590
|
+
print(f"No files found matching {args.marc_file_path}. Exiting.")
|
|
591
|
+
sys.exit(1)
|
|
592
|
+
else:
|
|
593
|
+
print(marc_files)
|
|
594
|
+
|
|
494
595
|
if not args.import_profile_name:
|
|
495
596
|
import_profiles = folio_client.folio_get(
|
|
496
597
|
"/data-import-profiles/jobProfiles",
|
|
@@ -511,8 +612,6 @@ async def main() -> None:
|
|
|
511
612
|
]
|
|
512
613
|
answers = inquirer.prompt(questions)
|
|
513
614
|
args.import_profile_name = answers["import_profile_name"]
|
|
514
|
-
marc_files = [Path(x) for x in glob.glob(args.marc_file_path, root_dir="./")]
|
|
515
|
-
print(marc_files)
|
|
516
615
|
try:
|
|
517
616
|
await MARCImportJob(
|
|
518
617
|
folio_client,
|
|
@@ -520,6 +619,7 @@ async def main() -> None:
|
|
|
520
619
|
args.import_profile_name,
|
|
521
620
|
batch_size=args.batch_size,
|
|
522
621
|
batch_delay=args.batch_delay,
|
|
622
|
+
marc_record_preprocessor=args.preprocessor,
|
|
523
623
|
consolidate=bool(args.consolidate),
|
|
524
624
|
no_progress=bool(args.no_progress),
|
|
525
625
|
).do_work()
|
|
@@ -21,6 +21,14 @@ except AttributeError:
|
|
|
21
21
|
|
|
22
22
|
utc = zoneinfo.ZoneInfo("UTC")
|
|
23
23
|
|
|
24
|
+
# Mapping of preferred contact type IDs to their corresponding values
|
|
25
|
+
PREFERRED_CONTACT_TYPES_MAP = {
|
|
26
|
+
"001": "mail",
|
|
27
|
+
"002": "email",
|
|
28
|
+
"003": "text",
|
|
29
|
+
"004": "phone",
|
|
30
|
+
"005": "mobile",
|
|
31
|
+
}
|
|
24
32
|
|
|
25
33
|
class UserImporter: # noqa: R0902
|
|
26
34
|
"""
|
|
@@ -41,6 +49,7 @@ class UserImporter: # noqa: R0902
|
|
|
41
49
|
http_client: httpx.AsyncClient,
|
|
42
50
|
user_match_key: str = "externalSystemId",
|
|
43
51
|
only_update_present_fields: bool = False,
|
|
52
|
+
default_preferred_contact_type: str = "002",
|
|
44
53
|
) -> None:
|
|
45
54
|
self.limit_simultaneous_requests = limit_simultaneous_requests
|
|
46
55
|
self.batch_size = batch_size
|
|
@@ -56,10 +65,14 @@ class UserImporter: # noqa: R0902
|
|
|
56
65
|
self.department_map: dict = self.build_ref_data_id_map(
|
|
57
66
|
self.folio_client, "/departments", "departments", "name"
|
|
58
67
|
)
|
|
68
|
+
self.service_point_map: dict = self.build_ref_data_id_map(
|
|
69
|
+
self.folio_client, "/service-points", "servicepoints", "code"
|
|
70
|
+
)
|
|
59
71
|
self.logfile: AsyncTextIOWrapper = logfile
|
|
60
72
|
self.errorfile: AsyncTextIOWrapper = errorfile
|
|
61
73
|
self.http_client: httpx.AsyncClient = http_client
|
|
62
74
|
self.only_update_present_fields: bool = only_update_present_fields
|
|
75
|
+
self.default_preferred_contact_type: str = default_preferred_contact_type
|
|
63
76
|
self.match_key = user_match_key
|
|
64
77
|
self.lock: asyncio.Lock = asyncio.Lock()
|
|
65
78
|
self.logs: dict = {"created": 0, "updated": 0, "failed": 0}
|
|
@@ -87,7 +100,8 @@ class UserImporter: # noqa: R0902
|
|
|
87
100
|
|
|
88
101
|
This method triggers the process of importing users by calling the `process_file` method.
|
|
89
102
|
"""
|
|
90
|
-
|
|
103
|
+
with open(self.user_file_path, "r", encoding="utf-8") as openfile:
|
|
104
|
+
await self.process_file(openfile)
|
|
91
105
|
|
|
92
106
|
async def get_existing_user(self, user_obj) -> dict:
|
|
93
107
|
"""
|
|
@@ -255,13 +269,14 @@ class UserImporter: # noqa: R0902
|
|
|
255
269
|
if mapped_departments:
|
|
256
270
|
user_obj["departments"] = mapped_departments
|
|
257
271
|
|
|
258
|
-
async def update_existing_user(self, user_obj, existing_user) -> Tuple[dict, dict]:
|
|
272
|
+
async def update_existing_user(self, user_obj, existing_user, protected_fields) -> Tuple[dict, dict]:
|
|
259
273
|
"""
|
|
260
274
|
Updates an existing user with the provided user object.
|
|
261
275
|
|
|
262
276
|
Args:
|
|
263
277
|
user_obj (dict): The user object containing the updated user information.
|
|
264
278
|
existing_user (dict): The existing user object to be updated.
|
|
279
|
+
protected_fields (dict): A dictionary containing the protected fields and their values.
|
|
265
280
|
|
|
266
281
|
Returns:
|
|
267
282
|
tuple: A tuple containing the updated existing user object and the API response.
|
|
@@ -270,6 +285,8 @@ class UserImporter: # noqa: R0902
|
|
|
270
285
|
None
|
|
271
286
|
|
|
272
287
|
"""
|
|
288
|
+
await self.set_preferred_contact_type(user_obj, existing_user)
|
|
289
|
+
preferred_contact_type = {"preferredContactTypeId": existing_user.get("personal", {}).pop("preferredContactTypeId")}
|
|
273
290
|
if self.only_update_present_fields:
|
|
274
291
|
new_personal = user_obj.pop("personal", {})
|
|
275
292
|
existing_personal = existing_user.pop("personal", {})
|
|
@@ -290,6 +307,18 @@ class UserImporter: # noqa: R0902
|
|
|
290
307
|
existing_user["personal"] = existing_personal
|
|
291
308
|
else:
|
|
292
309
|
existing_user.update(user_obj)
|
|
310
|
+
if "personal" in existing_user:
|
|
311
|
+
existing_user["personal"].update(preferred_contact_type)
|
|
312
|
+
else:
|
|
313
|
+
existing_user["personal"] = preferred_contact_type
|
|
314
|
+
for key, value in protected_fields.items():
|
|
315
|
+
if type(value) is dict:
|
|
316
|
+
try:
|
|
317
|
+
existing_user[key].update(value)
|
|
318
|
+
except KeyError:
|
|
319
|
+
existing_user[key] = value
|
|
320
|
+
else:
|
|
321
|
+
existing_user[key] = value
|
|
293
322
|
create_update_user = await self.http_client.put(
|
|
294
323
|
self.folio_client.okapi_url + f"/users/{existing_user['id']}",
|
|
295
324
|
headers=self.folio_client.okapi_headers,
|
|
@@ -320,7 +349,41 @@ class UserImporter: # noqa: R0902
|
|
|
320
349
|
self.logs["created"] += 1
|
|
321
350
|
return response.json()
|
|
322
351
|
|
|
323
|
-
async def
|
|
352
|
+
async def set_preferred_contact_type(self, user_obj, existing_user) -> None:
|
|
353
|
+
"""
|
|
354
|
+
Sets the preferred contact type for a user object. If the provided preferred contact type
|
|
355
|
+
is not valid, the default preferred contact type is used, unless the previously existing
|
|
356
|
+
user object has a valid preferred contact type set. In that case, the existing preferred
|
|
357
|
+
contact type is used.
|
|
358
|
+
"""
|
|
359
|
+
if "personal" in user_obj and "preferredContactTypeId" in user_obj["personal"]:
|
|
360
|
+
current_pref_contact = user_obj["personal"].get(
|
|
361
|
+
"preferredContactTypeId", ""
|
|
362
|
+
)
|
|
363
|
+
if mapped_contact_type := dict([(v, k) for k, v in PREFERRED_CONTACT_TYPES_MAP.items()]).get(
|
|
364
|
+
current_pref_contact,
|
|
365
|
+
"",
|
|
366
|
+
):
|
|
367
|
+
existing_user["personal"]["preferredContactTypeId"] = mapped_contact_type
|
|
368
|
+
else:
|
|
369
|
+
existing_user["personal"]["preferredContactTypeId"] = current_pref_contact if current_pref_contact in PREFERRED_CONTACT_TYPES_MAP else self.default_preferred_contact_type
|
|
370
|
+
else:
|
|
371
|
+
print(
|
|
372
|
+
f"Preferred contact type not provided or is not a valid option: {PREFERRED_CONTACT_TYPES_MAP}\n"
|
|
373
|
+
f"Setting preferred contact type to {self.default_preferred_contact_type} or using existing value"
|
|
374
|
+
)
|
|
375
|
+
await self.logfile.write(
|
|
376
|
+
f"Preferred contact type not provided or is not a valid option: {PREFERRED_CONTACT_TYPES_MAP}\n"
|
|
377
|
+
f"Setting preferred contact type to {self.default_preferred_contact_type} or using existing value\n"
|
|
378
|
+
)
|
|
379
|
+
mapped_contact_type = existing_user.get("personal", {}).get(
|
|
380
|
+
"preferredContactTypeId", ""
|
|
381
|
+
) or self.default_preferred_contact_type
|
|
382
|
+
if "personal" not in existing_user:
|
|
383
|
+
existing_user["personal"] = {}
|
|
384
|
+
existing_user["personal"]["preferredContactTypeId"] = mapped_contact_type or self.default_preferred_contact_type
|
|
385
|
+
|
|
386
|
+
async def create_or_update_user(self, user_obj, existing_user, protected_fields, line_number) -> dict:
|
|
324
387
|
"""
|
|
325
388
|
Creates or updates a user based on the given user object and existing user.
|
|
326
389
|
|
|
@@ -334,7 +397,7 @@ class UserImporter: # noqa: R0902
|
|
|
334
397
|
"""
|
|
335
398
|
if existing_user:
|
|
336
399
|
existing_user, update_user = await self.update_existing_user(
|
|
337
|
-
user_obj, existing_user
|
|
400
|
+
user_obj, existing_user, protected_fields
|
|
338
401
|
)
|
|
339
402
|
try:
|
|
340
403
|
update_user.raise_for_status()
|
|
@@ -375,7 +438,7 @@ class UserImporter: # noqa: R0902
|
|
|
375
438
|
|
|
376
439
|
async def process_user_obj(self, user: str) -> dict:
|
|
377
440
|
"""
|
|
378
|
-
Process a user object.
|
|
441
|
+
Process a user object. If not type is found in the source object, type is set to "patron".
|
|
379
442
|
|
|
380
443
|
Args:
|
|
381
444
|
user (str): The user data to be processed, as a json string.
|
|
@@ -386,17 +449,34 @@ class UserImporter: # noqa: R0902
|
|
|
386
449
|
"""
|
|
387
450
|
user_obj = json.loads(user)
|
|
388
451
|
user_obj["type"] = user_obj.get("type", "patron")
|
|
389
|
-
if "personal" in user_obj:
|
|
390
|
-
current_pref_contact = user_obj["personal"].get(
|
|
391
|
-
"preferredContactTypeId", ""
|
|
392
|
-
)
|
|
393
|
-
user_obj["personal"]["preferredContactTypeId"] = (
|
|
394
|
-
current_pref_contact
|
|
395
|
-
if current_pref_contact in ["001", "002", "003"]
|
|
396
|
-
else "002"
|
|
397
|
-
)
|
|
398
452
|
return user_obj
|
|
399
453
|
|
|
454
|
+
async def get_protected_fields(self, existing_user) -> dict:
|
|
455
|
+
"""
|
|
456
|
+
Retrieves the protected fields from the existing user object.
|
|
457
|
+
|
|
458
|
+
Args:
|
|
459
|
+
existing_user (dict): The existing user object.
|
|
460
|
+
|
|
461
|
+
Returns:
|
|
462
|
+
dict: A dictionary containing the protected fields and their values.
|
|
463
|
+
"""
|
|
464
|
+
protected_fields = {}
|
|
465
|
+
protected_fields_list = existing_user.get("customFields", {}).get("protectedFields", "").split(",")
|
|
466
|
+
for field in protected_fields_list:
|
|
467
|
+
if len(field.split(".")) > 1:
|
|
468
|
+
field, subfield = field.split(".")
|
|
469
|
+
if field not in protected_fields:
|
|
470
|
+
protected_fields[field] = {}
|
|
471
|
+
protected_fields[field][subfield] = existing_user.get(field, {}).pop(subfield, None)
|
|
472
|
+
if protected_fields[field][subfield] is None:
|
|
473
|
+
protected_fields[field].pop(subfield)
|
|
474
|
+
else:
|
|
475
|
+
protected_fields[field] = existing_user.pop(field, None)
|
|
476
|
+
if protected_fields[field] is None:
|
|
477
|
+
protected_fields.pop(field)
|
|
478
|
+
return protected_fields
|
|
479
|
+
|
|
400
480
|
async def process_existing_user(self, user_obj) -> Tuple[dict, dict, dict, dict]:
|
|
401
481
|
"""
|
|
402
482
|
Process an existing user.
|
|
@@ -410,14 +490,19 @@ class UserImporter: # noqa: R0902
|
|
|
410
490
|
and the existing PU object (existing_pu).
|
|
411
491
|
"""
|
|
412
492
|
rp_obj = user_obj.pop("requestPreference", {})
|
|
493
|
+
spu_obj = user_obj.pop("servicePointsUser")
|
|
413
494
|
existing_user = await self.get_existing_user(user_obj)
|
|
414
495
|
if existing_user:
|
|
415
496
|
existing_rp = await self.get_existing_rp(user_obj, existing_user)
|
|
416
497
|
existing_pu = await self.get_existing_pu(user_obj, existing_user)
|
|
498
|
+
existing_spu = await self.get_existing_spu(existing_user)
|
|
499
|
+
protected_fields = await self.get_protected_fields(existing_user)
|
|
417
500
|
else:
|
|
418
501
|
existing_rp = {}
|
|
419
502
|
existing_pu = {}
|
|
420
|
-
|
|
503
|
+
existing_spu = {}
|
|
504
|
+
protected_fields = {}
|
|
505
|
+
return rp_obj, spu_obj, existing_user, protected_fields, existing_rp, existing_pu, existing_spu
|
|
421
506
|
|
|
422
507
|
async def create_or_update_rp(self, rp_obj, existing_rp, new_user_obj):
|
|
423
508
|
"""
|
|
@@ -528,14 +613,14 @@ class UserImporter: # noqa: R0902
|
|
|
528
613
|
"""
|
|
529
614
|
async with self.limit_simultaneous_requests:
|
|
530
615
|
user_obj = await self.process_user_obj(user)
|
|
531
|
-
rp_obj, existing_user, existing_rp, existing_pu = (
|
|
616
|
+
rp_obj, spu_obj, existing_user, protected_fields, existing_rp, existing_pu, existing_spu = (
|
|
532
617
|
await self.process_existing_user(user_obj)
|
|
533
618
|
)
|
|
534
619
|
await self.map_address_types(user_obj, line_number)
|
|
535
620
|
await self.map_patron_groups(user_obj, line_number)
|
|
536
621
|
await self.map_departments(user_obj, line_number)
|
|
537
622
|
new_user_obj = await self.create_or_update_user(
|
|
538
|
-
user_obj, existing_user, line_number
|
|
623
|
+
user_obj, existing_user, protected_fields, line_number
|
|
539
624
|
)
|
|
540
625
|
if new_user_obj:
|
|
541
626
|
try:
|
|
@@ -572,42 +657,162 @@ class UserImporter: # noqa: R0902
|
|
|
572
657
|
)
|
|
573
658
|
print(pu_error_message)
|
|
574
659
|
await self.logfile.write(pu_error_message + "\n")
|
|
660
|
+
await self.handle_service_points_user(spu_obj, existing_spu, new_user_obj)
|
|
661
|
+
|
|
662
|
+
async def map_service_points(self, spu_obj, existing_user):
|
|
663
|
+
"""
|
|
664
|
+
Maps the service points of a user object using the provided service point map.
|
|
665
|
+
|
|
666
|
+
Args:
|
|
667
|
+
spu_obj (dict): The service-points-user object to update.
|
|
668
|
+
existing_user (dict): The existing user object associated with the spu_obj.
|
|
669
|
+
|
|
670
|
+
Returns:
|
|
671
|
+
None
|
|
672
|
+
"""
|
|
673
|
+
if "servicePointsIds" in spu_obj:
|
|
674
|
+
mapped_service_points = []
|
|
675
|
+
for sp in spu_obj.pop("servicePointsIds", []):
|
|
676
|
+
try:
|
|
677
|
+
mapped_service_points.append(self.service_point_map[sp])
|
|
678
|
+
except KeyError:
|
|
679
|
+
print(
|
|
680
|
+
f'Service point "{sp}" not found, excluding service point from user: '
|
|
681
|
+
f'{self.service_point_map}'
|
|
682
|
+
)
|
|
683
|
+
if mapped_service_points:
|
|
684
|
+
spu_obj["servicePointsIds"] = mapped_service_points
|
|
685
|
+
if "defaultServicePointId" in spu_obj:
|
|
686
|
+
sp_code = spu_obj.pop('defaultServicePointId', '')
|
|
687
|
+
try:
|
|
688
|
+
mapped_sp_id = self.service_point_map[sp_code]
|
|
689
|
+
if mapped_sp_id not in spu_obj.get('servicePointsIds', []):
|
|
690
|
+
print(
|
|
691
|
+
f'Default service point "{sp_code}" not found in assigned service points, '
|
|
692
|
+
'excluding default service point from user'
|
|
693
|
+
)
|
|
694
|
+
else:
|
|
695
|
+
spu_obj['defaultServicePointId'] = mapped_sp_id
|
|
696
|
+
except KeyError:
|
|
697
|
+
print(
|
|
698
|
+
f'Default service point "{sp_code}" not found, excluding default service '
|
|
699
|
+
f'point from user: {existing_user["id"]}'
|
|
700
|
+
)
|
|
701
|
+
|
|
702
|
+
async def handle_service_points_user(self, spu_obj, existing_spu, existing_user):
|
|
703
|
+
"""
|
|
704
|
+
Handles processing a service-points-user object for a user.
|
|
705
|
+
|
|
706
|
+
Args:
|
|
707
|
+
spu_obj (dict): The service-points-user object to process.
|
|
708
|
+
existing_spu (dict): The existing service-points-user object, if it exists.
|
|
709
|
+
existing_user (dict): The existing user object associated with the spu_obj.
|
|
710
|
+
"""
|
|
711
|
+
if spu_obj is not None:
|
|
712
|
+
await self.map_service_points(spu_obj, existing_user)
|
|
713
|
+
if existing_spu:
|
|
714
|
+
await self.update_existing_spu(spu_obj, existing_spu)
|
|
715
|
+
else:
|
|
716
|
+
await self.create_new_spu(spu_obj, existing_user)
|
|
717
|
+
|
|
718
|
+
async def get_existing_spu(self, existing_user):
|
|
719
|
+
"""
|
|
720
|
+
Retrieves the existing service-points-user object for a given user.
|
|
721
|
+
|
|
722
|
+
Args:
|
|
723
|
+
existing_user (dict): The existing user object.
|
|
724
|
+
|
|
725
|
+
Returns:
|
|
726
|
+
dict: The existing service-points-user object.
|
|
727
|
+
"""
|
|
728
|
+
try:
|
|
729
|
+
existing_spu = await self.http_client.get(
|
|
730
|
+
self.folio_client.okapi_url + "/service-points-users",
|
|
731
|
+
headers=self.folio_client.okapi_headers,
|
|
732
|
+
params={"query": f"userId=={existing_user['id']}"},
|
|
733
|
+
)
|
|
734
|
+
existing_spu.raise_for_status()
|
|
735
|
+
existing_spu = existing_spu.json().get("servicePointsUsers", [])
|
|
736
|
+
existing_spu = existing_spu[0] if existing_spu else {}
|
|
737
|
+
except httpx.HTTPError:
|
|
738
|
+
existing_spu = {}
|
|
739
|
+
return existing_spu
|
|
740
|
+
|
|
741
|
+
async def create_new_spu(self, spu_obj, existing_user):
|
|
742
|
+
"""
|
|
743
|
+
Creates a new service-points-user object for a given user.
|
|
744
|
+
|
|
745
|
+
Args:
|
|
746
|
+
spu_obj (dict): The service-points-user object to create.
|
|
747
|
+
existing_user (dict): The existing user object.
|
|
748
|
+
|
|
749
|
+
Returns:
|
|
750
|
+
None
|
|
751
|
+
"""
|
|
752
|
+
spu_obj["userId"] = existing_user["id"]
|
|
753
|
+
response = await self.http_client.post(
|
|
754
|
+
self.folio_client.okapi_url + "/service-points-users",
|
|
755
|
+
headers=self.folio_client.okapi_headers,
|
|
756
|
+
json=spu_obj,
|
|
757
|
+
)
|
|
758
|
+
response.raise_for_status()
|
|
759
|
+
|
|
760
|
+
async def update_existing_spu(self, spu_obj, existing_spu):
|
|
761
|
+
"""
|
|
762
|
+
Updates an existing service-points-user object with the provided service-points-user object.
|
|
763
|
+
|
|
764
|
+
Args:
|
|
765
|
+
spu_obj (dict): The service-points-user object containing the updated values.
|
|
766
|
+
existing_spu (dict): The existing service-points-user object to be updated.
|
|
767
|
+
|
|
768
|
+
Returns:
|
|
769
|
+
None
|
|
770
|
+
"""
|
|
771
|
+
existing_spu.update(spu_obj)
|
|
772
|
+
response = await self.http_client.put(
|
|
773
|
+
self.folio_client.okapi_url + f"/service-points-users/{existing_spu['id']}",
|
|
774
|
+
headers=self.folio_client.okapi_headers,
|
|
775
|
+
json=existing_spu,
|
|
776
|
+
)
|
|
777
|
+
response.raise_for_status()
|
|
575
778
|
|
|
576
|
-
async def process_file(self) -> None:
|
|
779
|
+
async def process_file(self, openfile) -> None:
|
|
577
780
|
"""
|
|
578
781
|
Process the user object file.
|
|
782
|
+
|
|
783
|
+
Args:
|
|
784
|
+
openfile: The file object to process.
|
|
579
785
|
"""
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
if len(tasks) == self.batch_size:
|
|
585
|
-
start = time.time()
|
|
586
|
-
await asyncio.gather(*tasks)
|
|
587
|
-
duration = time.time() - start
|
|
588
|
-
async with self.lock:
|
|
589
|
-
message = (
|
|
590
|
-
f"{dt.now().isoformat(sep=' ', timespec='milliseconds')}: "
|
|
591
|
-
f"Batch of {self.batch_size} users processed in {duration:.2f} "
|
|
592
|
-
f"seconds. - Users created: {self.logs['created']} - Users updated: "
|
|
593
|
-
f"{self.logs['updated']} - Users failed: {self.logs['failed']}"
|
|
594
|
-
)
|
|
595
|
-
print(message)
|
|
596
|
-
await self.logfile.write(message + "\n")
|
|
597
|
-
tasks = []
|
|
598
|
-
if tasks:
|
|
786
|
+
tasks = []
|
|
787
|
+
for line_number, user in enumerate(openfile):
|
|
788
|
+
tasks.append(self.process_line(user, line_number))
|
|
789
|
+
if len(tasks) == self.batch_size:
|
|
599
790
|
start = time.time()
|
|
600
791
|
await asyncio.gather(*tasks)
|
|
601
792
|
duration = time.time() - start
|
|
602
793
|
async with self.lock:
|
|
603
794
|
message = (
|
|
604
795
|
f"{dt.now().isoformat(sep=' ', timespec='milliseconds')}: "
|
|
605
|
-
f"Batch of {self.batch_size} users processed in {duration:.2f}
|
|
606
|
-
f"Users created: {self.logs['created']} - Users updated: "
|
|
796
|
+
f"Batch of {self.batch_size} users processed in {duration:.2f} "
|
|
797
|
+
f"seconds. - Users created: {self.logs['created']} - Users updated: "
|
|
607
798
|
f"{self.logs['updated']} - Users failed: {self.logs['failed']}"
|
|
608
799
|
)
|
|
609
800
|
print(message)
|
|
610
801
|
await self.logfile.write(message + "\n")
|
|
802
|
+
tasks = []
|
|
803
|
+
if tasks:
|
|
804
|
+
start = time.time()
|
|
805
|
+
await asyncio.gather(*tasks)
|
|
806
|
+
duration = time.time() - start
|
|
807
|
+
async with self.lock:
|
|
808
|
+
message = (
|
|
809
|
+
f"{dt.now().isoformat(sep=' ', timespec='milliseconds')}: "
|
|
810
|
+
f"Batch of {len(tasks)} users processed in {duration:.2f} seconds. - "
|
|
811
|
+
f"Users created: {self.logs['created']} - Users updated: "
|
|
812
|
+
f"{self.logs['updated']} - Users failed: {self.logs['failed']}"
|
|
813
|
+
)
|
|
814
|
+
print(message)
|
|
815
|
+
await self.logfile.write(message + "\n")
|
|
611
816
|
|
|
612
817
|
|
|
613
818
|
async def main() -> None:
|
|
@@ -626,6 +831,10 @@ async def main() -> None:
|
|
|
626
831
|
--batch_size (int): How many records to process before logging statistics. Default 250.
|
|
627
832
|
--folio_password (str): The FOLIO password.
|
|
628
833
|
--user_match_key (str): The key to use to match users. Default "externalSystemId".
|
|
834
|
+
--report_file_base_path (str): The base path for the log and error files. Default "./".
|
|
835
|
+
--update_only_present_fields (bool): Only update fields that are present in the new user object.
|
|
836
|
+
--default_preferred_contact_type (str): The default preferred contact type to use if the provided \
|
|
837
|
+
value is not valid or not present. Default "002".
|
|
629
838
|
|
|
630
839
|
Raises:
|
|
631
840
|
Exception: If an unknown error occurs during the import process.
|
|
@@ -663,11 +872,26 @@ async def main() -> None:
|
|
|
663
872
|
choices=["externalSystemId", "barcode", "username"],
|
|
664
873
|
default="externalSystemId",
|
|
665
874
|
)
|
|
875
|
+
parser.add_argument(
|
|
876
|
+
"--report_file_base_path",
|
|
877
|
+
help="The base path for the log and error files",
|
|
878
|
+
default="./",
|
|
879
|
+
)
|
|
666
880
|
parser.add_argument(
|
|
667
881
|
"--update_only_present_fields",
|
|
668
882
|
help="Only update fields that are present in the user object",
|
|
669
883
|
action="store_true",
|
|
670
884
|
)
|
|
885
|
+
parser.add_argument(
|
|
886
|
+
"--default_preferred_contact_type",
|
|
887
|
+
help=(
|
|
888
|
+
"The default preferred contact type to use if the provided value is not present or not valid. "
|
|
889
|
+
"Note: '002' is the default, and will be used if the provided value is not valid or not present, "
|
|
890
|
+
"unless the existing user object being updated has a valid preferred contact type set."
|
|
891
|
+
),
|
|
892
|
+
choices=list(PREFERRED_CONTACT_TYPES_MAP.keys()) + list(PREFERRED_CONTACT_TYPES_MAP.values()),
|
|
893
|
+
default="002",
|
|
894
|
+
)
|
|
671
895
|
args = parser.parse_args()
|
|
672
896
|
|
|
673
897
|
library_name = args.library_name
|
|
@@ -692,13 +916,13 @@ async def main() -> None:
|
|
|
692
916
|
folio_client.okapi_headers["x-okapi-tenant"] = args.member_tenant_id
|
|
693
917
|
|
|
694
918
|
user_file_path = Path(args.user_file_path)
|
|
919
|
+
report_file_base_path = Path(args.report_file_base_path)
|
|
695
920
|
log_file_path = (
|
|
696
|
-
|
|
697
|
-
/ "reports"
|
|
921
|
+
report_file_base_path
|
|
698
922
|
/ f"log_user_import_{dt.now(utc).strftime('%Y%m%d_%H%M%S')}.log"
|
|
699
923
|
)
|
|
700
924
|
error_file_path = (
|
|
701
|
-
|
|
925
|
+
report_file_base_path
|
|
702
926
|
/ f"failed_user_import_{dt.now(utc).strftime('%Y%m%d_%H%M%S')}.txt"
|
|
703
927
|
)
|
|
704
928
|
async with aiofiles.open(
|
|
@@ -719,6 +943,7 @@ async def main() -> None:
|
|
|
719
943
|
http_client,
|
|
720
944
|
args.user_match_key,
|
|
721
945
|
args.update_only_present_fields,
|
|
946
|
+
args.default_preferred_contact_type,
|
|
722
947
|
)
|
|
723
948
|
await importer.do_import()
|
|
724
949
|
except Exception as ee:
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from ._preprocessors import prepend_ppn_prefix_001, strip_999_ff_fields
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import pymarc
|
|
2
|
+
|
|
3
|
+
def prepend_ppn_prefix_001(record: pymarc.Record) -> pymarc.Record:
|
|
4
|
+
"""
|
|
5
|
+
Prepend the PPN prefix to the record's 001 field. Useful when
|
|
6
|
+
importing records from the ABES SUDOC catalog
|
|
7
|
+
|
|
8
|
+
Args:
|
|
9
|
+
record (pymarc.Record): The MARC record to preprocess.
|
|
10
|
+
|
|
11
|
+
Returns:
|
|
12
|
+
pymarc.Record: The preprocessed MARC record.
|
|
13
|
+
"""
|
|
14
|
+
record['001'].data = '(PPN)' + record['001'].data
|
|
15
|
+
return record
|
|
16
|
+
|
|
17
|
+
def strip_999_ff_fields(record: pymarc.Record) -> pymarc.Record:
|
|
18
|
+
"""
|
|
19
|
+
Strip all 999 fields with ff indicators from the record.
|
|
20
|
+
Useful when importing records exported from another FOLIO system
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
record (pymarc.Record): The MARC record to preprocess.
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
pymarc.Record: The preprocessed MARC record.
|
|
27
|
+
"""
|
|
28
|
+
for field in record.get_fields('999'):
|
|
29
|
+
if field.indicators == pymarc.Indicators(*['f', 'f']):
|
|
30
|
+
record.remove_field(field)
|
|
31
|
+
return record
|
folio_data_import-0.2.4/PKG-INFO
DELETED
|
@@ -1,68 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: folio_data_import
|
|
3
|
-
Version: 0.2.4
|
|
4
|
-
Summary: A python module to interact with the data importing capabilities of the open-source FOLIO ILS
|
|
5
|
-
License: MIT
|
|
6
|
-
Author: Brooks Travis
|
|
7
|
-
Author-email: brooks.travis@gmail.com
|
|
8
|
-
Requires-Python: >=3.9,<4.0
|
|
9
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
-
Classifier: Programming Language :: Python :: 3
|
|
11
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
12
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
-
Requires-Dist: aiofiles (>=24.1.0,<25.0.0)
|
|
16
|
-
Requires-Dist: flake8-bandit (>=4.1.1,<5.0.0)
|
|
17
|
-
Requires-Dist: flake8-black (>=0.3.6,<0.4.0)
|
|
18
|
-
Requires-Dist: flake8-bugbear (>=24.8.19,<25.0.0)
|
|
19
|
-
Requires-Dist: flake8-docstrings (>=1.7.0,<2.0.0)
|
|
20
|
-
Requires-Dist: flake8-isort (>=6.1.1,<7.0.0)
|
|
21
|
-
Requires-Dist: folioclient (>=0.60.5,<0.61.0)
|
|
22
|
-
Requires-Dist: httpx (>=0.23.0,<0.24.0)
|
|
23
|
-
Requires-Dist: inquirer (>=3.4.0,<4.0.0)
|
|
24
|
-
Requires-Dist: pyhumps (>=3.8.0,<4.0.0)
|
|
25
|
-
Requires-Dist: pymarc (>=5.2.2,<6.0.0)
|
|
26
|
-
Requires-Dist: tabulate (>=0.9.0,<0.10.0)
|
|
27
|
-
Requires-Dist: tqdm (>=4.66.5,<5.0.0)
|
|
28
|
-
Description-Content-Type: text/markdown
|
|
29
|
-
|
|
30
|
-
# folio_data_import
|
|
31
|
-
|
|
32
|
-
## Description
|
|
33
|
-
|
|
34
|
-
This project is designed to import data into the FOLIO LSP. It provides a simple and efficient way to import data from various sources using FOLIO's REST APIs.
|
|
35
|
-
|
|
36
|
-
## Features
|
|
37
|
-
|
|
38
|
-
- Import MARC records using FOLIO's Data Import system
|
|
39
|
-
- Import User records using FOLIO's User APIs
|
|
40
|
-
|
|
41
|
-
## Installation
|
|
42
|
-
|
|
43
|
-
## Installation
|
|
44
|
-
|
|
45
|
-
To install the project using Poetry, follow these steps:
|
|
46
|
-
|
|
47
|
-
1. Clone the repository.
|
|
48
|
-
2. Navigate to the project directory: `$ cd /path/to/folio_data_import`.
|
|
49
|
-
3. Install Poetry if you haven't already: `$ pip install poetry`.
|
|
50
|
-
4. Install the project dependencies: `$ poetry install`.
|
|
51
|
-
6. Run the application using Poetry: `$ poetry run python -m folio_data_import --help`.
|
|
52
|
-
|
|
53
|
-
Make sure to activate the virtual environment created by Poetry before running the application.
|
|
54
|
-
|
|
55
|
-
## Usage
|
|
56
|
-
|
|
57
|
-
1. Prepare the data to be imported in the specified format.
|
|
58
|
-
2. Run the application and follow the prompts to import the data.
|
|
59
|
-
3. Monitor the import progress and handle any errors or conflicts that may arise.
|
|
60
|
-
|
|
61
|
-
## Contributing
|
|
62
|
-
|
|
63
|
-
Contributions are welcome! If you have any ideas, suggestions, or bug reports, please open an issue or submit a pull request.
|
|
64
|
-
|
|
65
|
-
## License
|
|
66
|
-
|
|
67
|
-
This project is licensed under the [MIT License](LICENSE).
|
|
68
|
-
|
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
# folio_data_import
|
|
2
|
-
|
|
3
|
-
## Description
|
|
4
|
-
|
|
5
|
-
This project is designed to import data into the FOLIO LSP. It provides a simple and efficient way to import data from various sources using FOLIO's REST APIs.
|
|
6
|
-
|
|
7
|
-
## Features
|
|
8
|
-
|
|
9
|
-
- Import MARC records using FOLIO's Data Import system
|
|
10
|
-
- Import User records using FOLIO's User APIs
|
|
11
|
-
|
|
12
|
-
## Installation
|
|
13
|
-
|
|
14
|
-
## Installation
|
|
15
|
-
|
|
16
|
-
To install the project using Poetry, follow these steps:
|
|
17
|
-
|
|
18
|
-
1. Clone the repository.
|
|
19
|
-
2. Navigate to the project directory: `$ cd /path/to/folio_data_import`.
|
|
20
|
-
3. Install Poetry if you haven't already: `$ pip install poetry`.
|
|
21
|
-
4. Install the project dependencies: `$ poetry install`.
|
|
22
|
-
6. Run the application using Poetry: `$ poetry run python -m folio_data_import --help`.
|
|
23
|
-
|
|
24
|
-
Make sure to activate the virtual environment created by Poetry before running the application.
|
|
25
|
-
|
|
26
|
-
## Usage
|
|
27
|
-
|
|
28
|
-
1. Prepare the data to be imported in the specified format.
|
|
29
|
-
2. Run the application and follow the prompts to import the data.
|
|
30
|
-
3. Monitor the import progress and handle any errors or conflicts that may arise.
|
|
31
|
-
|
|
32
|
-
## Contributing
|
|
33
|
-
|
|
34
|
-
Contributions are welcome! If you have any ideas, suggestions, or bug reports, please open an issue or submit a pull request.
|
|
35
|
-
|
|
36
|
-
## License
|
|
37
|
-
|
|
38
|
-
This project is licensed under the [MIT License](LICENSE).
|
|
File without changes
|
|
File without changes
|
|
File without changes
|