folio-data-import 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of folio-data-import might be problematic. Click here for more details.

@@ -1,115 +1,12 @@
1
- import argparse
2
- import asyncio
3
- import glob
4
- from getpass import getpass
5
- from pathlib import Path
1
+ import typer
6
2
 
7
- import folioclient
8
- import inquirer
3
+ from folio_data_import.MARCDataImport import main as marc_app
4
+ from folio_data_import.UserImport import main as users_main
9
5
 
10
- from folio_data_import.MARCDataImport import MARCImportJob
11
-
12
-
13
- async def main():
14
- parser = argparse.ArgumentParser()
15
- parser.add_argument(
16
- "--record-type", type=str, help="The record type to import", default="MARC21"
17
- )
18
- parser.add_argument("--gateway_url", type=str, help="The FOLIO API Gateway URL")
19
- parser.add_argument("--tenant_id", type=str, help="The FOLIO tenant ID")
20
- parser.add_argument("--username", type=str, help="The FOLIO username")
21
- parser.add_argument("--password", type=str, help="The FOLIO password", default="")
22
- parser.add_argument(
23
- "--marc_file_path",
24
- type=str,
25
- help="The MARC file (or file glob, using shell globbing syntax) to import",
26
- )
27
- parser.add_argument(
28
- "--import_profile_name",
29
- type=str,
30
- help="The name of the data import job profile to use",
31
- default="",
32
- )
33
- parser.add_argument(
34
- "--batch_size",
35
- type=int,
36
- help="The number of source records to include in a record batch sent to FOLIO.",
37
- default=10,
38
- )
39
- parser.add_argument(
40
- "--batch_delay",
41
- type=float,
42
- help="The number of seconds to wait between record batches.",
43
- default=0.0,
44
- )
45
- parser.add_argument(
46
- "--consolidate",
47
- action="store_true",
48
- help=(
49
- "Consolidate records into a single job. "
50
- "Default is to create a new job for each MARC file."
51
- ),
52
- )
53
- parser.add_argument(
54
- "--no-progress",
55
- action="store_true",
56
- help="Disable progress bars (eg. for running in a CI environment)",
57
- )
58
- args = parser.parse_args()
59
- if not args.password:
60
- args.password = getpass("Enter FOLIO password: ")
61
- folio_client = folioclient.FolioClient(
62
- args.gateway_url, args.tenant_id, args.username, args.password
63
- )
64
- if not args.import_profile_name:
65
- import_profiles = folio_client.folio_get(
66
- "/data-import-profiles/jobProfiles",
67
- "jobProfiles",
68
- query_params={"limit": "1000"},
69
- )
70
- import_profile_names = [
71
- profile["name"]
72
- for profile in import_profiles
73
- if args.record_type.lower() in profile["dataType"].lower()
74
- ]
75
- questions = [
76
- inquirer.List(
77
- "import_profile_name",
78
- message="Select an import profile",
79
- choices=import_profile_names,
80
- )
81
- ]
82
- answers = inquirer.prompt(questions)
83
- args.import_profile_name = answers["import_profile_name"]
84
-
85
- if args.record_type.lower() == "marc21":
86
- marc_files = [Path(x) for x in glob.glob(args.marc_file_path, root_dir="./")]
87
- print(marc_files)
88
- try:
89
- await MARCImportJob(
90
- folio_client,
91
- marc_files,
92
- args.import_profile_name,
93
- batch_size=args.batch_size,
94
- batch_delay=args.batch_delay,
95
- consolidate=bool(args.consolidate),
96
- no_progress=bool(args.no_progress),
97
- ).do_work()
98
- except Exception as e:
99
- print("Error importing files: " + str(e))
100
- raise
101
- elif args.record_type.lower() == "users":
102
- print(
103
- "User import not yet implemented. Run UserImport.py directly "
104
- "or use folio-user-import CLI."
105
- )
106
- else:
107
- print("Record type not supported. Supported types are: MARC21")
108
-
109
-
110
- def sync_main():
111
- asyncio.run(main())
6
+ app = typer.Typer()
112
7
 
8
+ app.command(name="marc")(marc_app)
9
+ app.command(name="users")(users_main)
113
10
 
114
11
  if __name__ == "__main__":
115
- asyncio.run(main())
12
+ app()
@@ -0,0 +1,27 @@
1
+ from rich.progress import ProgressColumn, Task
2
+ from rich.text import Text
3
+
4
+ class ItemsPerSecondColumn(ProgressColumn):
5
+ """Renders the speed in items per second."""
6
+
7
+ def render(self, task: Task) -> Text:
8
+ if task.speed is None:
9
+ return Text("?", style="progress.data.speed")
10
+ return Text(f"{task.speed:.0f}rec/s", style="progress.data.speed")
11
+
12
+ class UserStatsColumn(ProgressColumn):
13
+ def render(self, task: Task) -> Text:
14
+ created = task.fields.get("created", 0)
15
+ updated = task.fields.get("updated", 0)
16
+ failed = task.fields.get("failed", 0)
17
+ created_string = f"Created: {created}"
18
+ updated_string = f"Updated: {updated}"
19
+ failed_string = f"Failed: {failed}"
20
+ text = Text("(")
21
+ text.append(created_string, style="green")
22
+ text.append(" | ")
23
+ text.append(updated_string, style="cyan")
24
+ text.append(" | ")
25
+ text.append(failed_string, style="red")
26
+ text.append(")")
27
+ return text
@@ -3,17 +3,19 @@ import sys
3
3
  from typing import Callable, Dict, List, Tuple, Union
4
4
  import pymarc
5
5
  import logging
6
+ import re
6
7
 
7
8
  from pymarc.record import Record
8
9
 
9
10
  logger = logging.getLogger("folio_data_import.MARCDataImport")
10
11
 
12
+
11
13
  class MARCPreprocessor:
12
14
  """
13
15
  A class to preprocess MARC records for data import into FOLIO.
14
16
  """
15
17
 
16
- def __init__(self, preprocessors: Union[str,List[Callable]], **kwargs):
18
+ def __init__(self, preprocessors: Union[str, List[Callable]], **kwargs):
17
19
  """
18
20
  Initialize the MARCPreprocessor with a list of preprocessors.
19
21
 
@@ -361,7 +363,7 @@ def clean_empty_fields(record: Record, **kwargs) -> Record:
361
363
 
362
364
  for field in record.get_fields(*MAPPED_FIELDS.keys()):
363
365
  len_subs = len(field.subfields)
364
- subfield_value = bool(field.subfields[0].value) if len_subs else False
366
+ subfield_value = bool(re.sub(r"[.,-]", "", field.subfields[0].value).strip()) if len_subs else False
365
367
  if int(field.tag) > 9 and len_subs == 0:
366
368
  logger.log(
367
369
  26,
@@ -415,7 +417,7 @@ def clean_empty_fields(record: Record, **kwargs) -> Record:
415
417
  return record
416
418
 
417
419
 
418
- def fix_leader(record: Record, **kwargs) -> Record:
420
+ def fix_bib_leader(record: Record, **kwargs) -> Record:
419
421
  """
420
422
  Fixes the leader of the record by setting the record status to 'c' (modified
421
423
  record) and the type of record to 'a' (language material).
@@ -448,6 +450,7 @@ def fix_leader(record: Record, **kwargs) -> Record:
448
450
  record.leader = pymarc.Leader(record.leader[:6] + "a" + record.leader[7:])
449
451
  return record
450
452
 
453
+
451
454
  def move_authority_subfield_9_to_0_all_controllable_fields(record: Record, **kwargs) -> Record:
452
455
  """
453
456
  Move subfield 9 from authority fields to subfield 0. This is useful when
@@ -460,15 +463,16 @@ def move_authority_subfield_9_to_0_all_controllable_fields(record: Record, **kwa
460
463
  Record: The preprocessed MARC record.
461
464
  """
462
465
  controlled_fields = [
463
- "100", "110", "111", "130",
464
- "600", "610", "611", "630", "650", "651", "655",
465
- "700", "710", "711", "730",
466
- "800", "810", "811", "830"
467
- ]
466
+ "100", "110", "111", "130",
467
+ "600", "610", "611", "630", "650", "651", "655",
468
+ "700", "710", "711", "730",
469
+ "800", "810", "811", "830", "880"
470
+ ]
468
471
  for field in record.get_fields(*controlled_fields):
469
- for subfield in list(field.get_subfields("9")):
472
+ _subfields = field.get_subfields("9")
473
+ for subfield in _subfields:
470
474
  field.add_subfield("0", subfield)
471
- field.delete_subfield("9", subfield)
475
+ field.delete_subfield("9")
472
476
  logger.log(
473
477
  26,
474
478
  "DATA ISSUE\t%s\t%s\t%s",
@@ -478,6 +482,7 @@ def move_authority_subfield_9_to_0_all_controllable_fields(record: Record, **kwa
478
482
  )
479
483
  return record
480
484
 
485
+
481
486
  def ordinal(n):
482
487
  s = ("th", "st", "nd", "rd") + ("th",) * 10
483
488
  v = n % 100
@@ -1,8 +1,9 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: folio_data_import
3
- Version: 0.3.1
3
+ Version: 0.4.0
4
4
  Summary: A python module to interact with the data importing capabilities of the open-source FOLIO ILS
5
5
  License: MIT
6
+ License-File: LICENSE
6
7
  Author: Brooks Travis
7
8
  Author-email: brooks.travis@gmail.com
8
9
  Requires-Python: >=3.9,<4.0
@@ -13,6 +14,7 @@ Classifier: Programming Language :: Python :: 3.10
13
14
  Classifier: Programming Language :: Python :: 3.11
14
15
  Classifier: Programming Language :: Python :: 3.12
15
16
  Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Programming Language :: Python :: 3.14
16
18
  Requires-Dist: aiofiles (>=24.1.0,<25.0.0)
17
19
  Requires-Dist: flake8-bandit (>=4.1.1,<5.0.0)
18
20
  Requires-Dist: flake8-black (>=0.3.6,<0.4.0)
@@ -24,7 +26,7 @@ Requires-Dist: inquirer (>=3.4.0,<4.0.0)
24
26
  Requires-Dist: pyhumps (>=3.8.0,<4.0.0)
25
27
  Requires-Dist: pymarc (>=5.2.2,<6.0.0)
26
28
  Requires-Dist: tabulate (>=0.9.0,<0.10.0)
27
- Requires-Dist: tqdm (>=4.66.5,<5.0.0)
29
+ Requires-Dist: typer (>=0.17.4,<0.18.0)
28
30
  Description-Content-Type: text/markdown
29
31
 
30
32
  # folio_data_import
@@ -42,7 +44,16 @@ This project is designed to import data into the FOLIO LSP. It provides a simple
42
44
 
43
45
  ## Installation
44
46
 
45
- To install the project using Poetry, follow these steps:
47
+ Using `pip`
48
+ ```shell
49
+ pip install folio_data_import
50
+ ```
51
+ or `uv pip`
52
+ ```shell
53
+ uv pip install folio_data_import
54
+ ```
55
+
56
+ To install the project from the git repo using Poetry, follow these steps:
46
57
 
47
58
  1. Clone the repository.
48
59
  2. Navigate to the project directory: `$ cd /path/to/folio_data_import`.
@@ -58,8 +69,27 @@ Make sure to activate the virtual environment created by Poetry before running t
58
69
  2. Run the application and follow the prompts to import the data.
59
70
  3. Monitor the import progress and handle any errors or conflicts that may arise.
60
71
 
72
+ ### folio-data-import
73
+ This command provides access to subcommands for importing user and marc data. To import users:
74
+ ```shell
75
+ folio-data-import users --help
76
+ ```
77
+ (for more information, see [folio-user-import](#folio-user-import), below)
78
+
79
+ For MARC data:
80
+ ```shell
81
+ folio-data-import marc --help
82
+ ```
83
+ (for more information, see [folio-marc-import](#folio-marc-import), below)
84
+
85
+ As an added convenience, this script can also install tab-completions for itself in your shell:
86
+ ```shell
87
+ folio-data-import --install-completion
88
+ ```
89
+ Once installed, you can `[tab][tab]` after typing `--` and be presented with a list of availabl command options.
90
+
61
91
  ### folio-user-import
62
- When this package is installed via PyPI or using `poetry install` from this repository, it installs a convenience script in your `$PATH` called `folio-user-import`. To view all command line options for this script, run `folio-user-import -h`. In addition to supporting `mod-user-import`-style JSON objects, this script also allows you to manage service point assignments for users by specifying a `servicePointsUser` object in the JSON object, using service point codes in place of UUIDs in the `defaultServicePointId` and `servicePointIds` fields:
92
+ When this package is installed via PyPI or using `poetry install` from this repository, it installs a convenience script in your `$PATH` called `folio-user-import`. To view all command line options for this script, run `folio-user-import --help`. In addition to supporting `mod-user-import`-style JSON objects, this script also allows you to manage service point assignments for users by specifying a `servicePointsUser` object in the JSON object, using service point codes in place of UUIDs in the `defaultServicePointId` and `servicePointIds` fields:
63
93
  ```
64
94
  {
65
95
  "username": "checkin-all",
@@ -109,7 +139,7 @@ Unlike mod-user-import, this importer does not require `externalSystemId` as the
109
139
 
110
140
  Another point of departure from the behavior of `mod-user-import` is the handling of `preferredContactTypeId`. This importer will accept either the `"001", "002", "003"...` values stored by FOLIO, or the human-friendly strings used by `mod-user-import` (`"mail", "email", "text", "phone", "mobile"`). It will also __*set a customizable default for all users that do not otherwise have a valid value specified*__ (using `--default_preferred_contact_type`), unless a (valid) value is already present in the user record being updated.
111
141
 
112
- #### Field Protection (*experimental*)
142
+ #### Per-record Field Protection (*experimental*)
113
143
 
114
144
  This script offers a rudimentary field protection implementation using custom fields. To enable this functionality, create a text custom field that has the field name `protectedFields`. In this field, you can specify a comma-separated list of User schema field names, using dot-notation for nested fields. This protection should support all standard fields except addresses within `personal.addresses`. If you include `personal.addresses` in a user record, any existing addresses will be replaced by the new values.
115
145
 
@@ -123,11 +153,32 @@ This script offers a rudimentary field protection implementation using custom fi
123
153
 
124
154
  Would result in `preferredFirstName`, `barcode`, and `telephone` remaining unchanged, regardless of the contents of the incoming records.
125
155
 
156
+ #### Job-level field protection
157
+
158
+ To protect fields for all records in a particular import job, you can pass a list of field paths with the `--fields-to-protect` flag. These protections will be applied in combination with any record-level protections specified.
159
+
160
+ ##### Example
161
+ ```Shell
162
+ folio-user-import ... --fields-to-protect "personal.preferredFirstName,customFields.exampleCustomField"
163
+ ```
126
164
 
127
165
  #### How to use:
128
166
  1. Generate a JSON lines (one JSON object per line) file of FOLIO user objects in the style of [mod-user-import](https://github.com/folio-org/mod-user-import)
129
167
  2. Run the script and specify the required arguments (and any desired optional arguments), including the path to your file of user objects
168
+ 3. Watch the pretty progress bars...
169
+
170
+ ### folio-marc-import
171
+ `folio-marc-import` provides direct access to the MARC import functionality of FOLIO. It can be used to import any file (or files) of binary MARC records via FOLIO's Data Import system using the [change-manager](https://github.com/folio-org/mod-source-record-manager?tab=readme-ov-file#data-import-workflow) APIs directly.
130
172
 
173
+ #### How to use:
174
+ 1. Have a binary MARC21 file (or directory of files)
175
+ 2. Have a [Data Import Job Profile](https://docs.folio.org/docs/metadata/additional-topics/jobprofiles/) that you want to use to import your records already set up in FOLIO
176
+ 3. Run the script and specify the required arguments. (`folio-marc-import --help` for more details)
177
+ 4. Select the job profile you want to use
178
+ 5. Watch the pretty progress bars...
179
+
180
+ #### A note on logging
181
+ The import logs and job summaries provided by FOLIO can be unreliable in certain circumstances. The scripts have been written to balance the need to retrieve job summary information at the end of each job with the the need to move on to the next import job. If you don't see a job summary when your job completes, check Data Import in FOLIO (Data Import > Actions > View all logs...)
131
182
 
132
183
  ## Contributing
133
184
 
@@ -0,0 +1,13 @@
1
+ folio_data_import/MARCDataImport.py,sha256=ubTd7ROtqLavSmPNma2pI8ixjFZs7FEp5_oIuD0ONqo,42945
2
+ folio_data_import/UserImport.py,sha256=ic2msCmHsU3cFWPd1edBbC7ltTCYxV8OFCvGd0cBcDQ,43048
3
+ folio_data_import/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ folio_data_import/__main__.py,sha256=asGlYSuLpvNW5cwxcAH03iXtMcP_DpCdwhpYZy01org,269
5
+ folio_data_import/_progress.py,sha256=QsZNHLjjNW3Q_NHIAuLgsigGXnV-COAvxIM6vO2ggnM,1010
6
+ folio_data_import/custom_exceptions.py,sha256=1xw1BI5fW7WDd37zUIOcw0DAvrFKtklnqmbRhZXSAiE,1093
7
+ folio_data_import/marc_preprocessors/__init__.py,sha256=urExfNTQoZsDCtDPcUY9EEC5OFcUihxhYEQkQFVzbMY,30
8
+ folio_data_import/marc_preprocessors/_preprocessors.py,sha256=PuXWWzDLr0AJVS0rFcK5MiP_apSm0ewwTNRd_n8hMgc,16584
9
+ folio_data_import-0.4.0.dist-info/METADATA,sha256=Ecy3voKtb7f647G1MHwvgWOH5LBJYaAHjB7yXGoFlMI,8530
10
+ folio_data_import-0.4.0.dist-info/WHEEL,sha256=M5asmiAlL6HEcOq52Yi5mmk9KmTVjY2RDPtO4p9DMrc,88
11
+ folio_data_import-0.4.0.dist-info/entry_points.txt,sha256=jRCg5w4I8s4qYRexIJJeVLSNZ142WVTn25tM84MetxM,174
12
+ folio_data_import-0.4.0.dist-info/licenses/LICENSE,sha256=qJX7wxMC7ky9Kq4v3zij8MjGEiC5wsB7pYeOhLj5TDk,1083
13
+ folio_data_import-0.4.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 2.1.3
2
+ Generator: poetry-core 2.2.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -0,0 +1,5 @@
1
+ [console_scripts]
2
+ folio-data-import=folio_data_import.__main__:app
3
+ folio-marc-import=folio_data_import.MARCDataImport:app
4
+ folio-user-import=folio_data_import.UserImport:app
5
+
@@ -1,12 +0,0 @@
1
- folio_data_import/MARCDataImport.py,sha256=Qfpc3NtlQe4VUobXsFhmSnScBhiNRGEzBN7FZrdsBYc,37633
2
- folio_data_import/UserImport.py,sha256=4Bn_Z1xX5DvnodscW9NgGeBZgvFeShVoIbxDGrt6BMo,41748
3
- folio_data_import/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- folio_data_import/__main__.py,sha256=kav_uUsnrIjGjVxQkk3exLKrc1mah9t2x3G6bGS-5I0,3710
5
- folio_data_import/custom_exceptions.py,sha256=1xw1BI5fW7WDd37zUIOcw0DAvrFKtklnqmbRhZXSAiE,1093
6
- folio_data_import/marc_preprocessors/__init__.py,sha256=urExfNTQoZsDCtDPcUY9EEC5OFcUihxhYEQkQFVzbMY,30
7
- folio_data_import/marc_preprocessors/_preprocessors.py,sha256=5JnxCoLDsZKEwrAazTXrI1b0CNthvhcTEKgQZILIjVk,16533
8
- folio_data_import-0.3.1.dist-info/LICENSE,sha256=qJX7wxMC7ky9Kq4v3zij8MjGEiC5wsB7pYeOhLj5TDk,1083
9
- folio_data_import-0.3.1.dist-info/METADATA,sha256=VsbdMezeVPB5jyYAB6o-9k6fhfgnEa5i_T2GphBOtzg,6069
10
- folio_data_import-0.3.1.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
11
- folio_data_import-0.3.1.dist-info/entry_points.txt,sha256=498SxWVXeEMRNw3PUf-eoReZvKewmYwPBtZhIUPr_Jg,192
12
- folio_data_import-0.3.1.dist-info/RECORD,,
@@ -1,5 +0,0 @@
1
- [console_scripts]
2
- folio-data-import=folio_data_import.__main__:sync_main
3
- folio-marc-import=folio_data_import.MARCDataImport:sync_main
4
- folio-user-import=folio_data_import.UserImport:sync_main
5
-