folio-data-import 0.5.0b3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- folio_data_import/BatchPoster.py +1265 -0
- folio_data_import/MARCDataImport.py +1252 -0
- folio_data_import/UserImport.py +1270 -0
- folio_data_import/__init__.py +31 -0
- folio_data_import/__main__.py +14 -0
- folio_data_import/_progress.py +737 -0
- folio_data_import/custom_exceptions.py +35 -0
- folio_data_import/marc_preprocessors/__init__.py +29 -0
- folio_data_import/marc_preprocessors/_preprocessors.py +517 -0
- folio_data_import-0.5.0b3.dist-info/METADATA +467 -0
- folio_data_import-0.5.0b3.dist-info/RECORD +13 -0
- folio_data_import-0.5.0b3.dist-info/WHEEL +4 -0
- folio_data_import-0.5.0b3.dist-info/entry_points.txt +6 -0
|
@@ -0,0 +1,1265 @@
|
|
|
1
|
+
"""
|
|
2
|
+
BatchPoster module for FOLIO inventory batch operations.
|
|
3
|
+
|
|
4
|
+
This module provides functionality for batch posting of Instances, Holdings, and Items
|
|
5
|
+
to FOLIO's inventory storage endpoints with support for upsert operations.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
import glob as glob_module
|
|
10
|
+
import json
|
|
11
|
+
import logging
|
|
12
|
+
import sys
|
|
13
|
+
from datetime import datetime as dt
|
|
14
|
+
from io import TextIOWrapper
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Annotated, Any, Dict, Generator, List, Literal, Union
|
|
17
|
+
|
|
18
|
+
import cyclopts
|
|
19
|
+
import folioclient
|
|
20
|
+
from folioclient import FolioClient
|
|
21
|
+
import httpx
|
|
22
|
+
from pydantic import BaseModel, Field
|
|
23
|
+
from rich.logging import RichHandler
|
|
24
|
+
|
|
25
|
+
from folio_data_import import get_folio_connection_parameters
|
|
26
|
+
from folio_data_import._progress import (
|
|
27
|
+
RichProgressReporter,
|
|
28
|
+
ProgressReporter,
|
|
29
|
+
NoOpProgressReporter,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
logger = logging.getLogger(__name__)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class BatchPosterStats(BaseModel):
|
|
36
|
+
"""Statistics for batch posting operations."""
|
|
37
|
+
|
|
38
|
+
records_processed: int = 0
|
|
39
|
+
records_posted: int = 0
|
|
40
|
+
records_created: int = 0
|
|
41
|
+
records_updated: int = 0
|
|
42
|
+
records_failed: int = 0
|
|
43
|
+
batches_posted: int = 0
|
|
44
|
+
batches_failed: int = 0
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def get_api_info(object_type: str) -> Dict[str, Any]:
|
|
48
|
+
"""
|
|
49
|
+
Get API endpoint information for a given object type.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
object_type: The type of object (Instances, Holdings, Items)
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
Dictionary containing API endpoint information
|
|
56
|
+
|
|
57
|
+
Raises:
|
|
58
|
+
ValueError: If object_type is not supported
|
|
59
|
+
"""
|
|
60
|
+
api_info = {
|
|
61
|
+
"Items": {
|
|
62
|
+
"object_name": "items",
|
|
63
|
+
"api_endpoint": "/item-storage/batch/synchronous",
|
|
64
|
+
"query_endpoint": "/item-storage/items",
|
|
65
|
+
"is_batch": True,
|
|
66
|
+
"supports_upsert": True,
|
|
67
|
+
},
|
|
68
|
+
"Holdings": {
|
|
69
|
+
"object_name": "holdingsRecords",
|
|
70
|
+
"api_endpoint": "/holdings-storage/batch/synchronous",
|
|
71
|
+
"query_endpoint": "/holdings-storage/holdings",
|
|
72
|
+
"is_batch": True,
|
|
73
|
+
"supports_upsert": True,
|
|
74
|
+
},
|
|
75
|
+
"Instances": {
|
|
76
|
+
"object_name": "instances",
|
|
77
|
+
"api_endpoint": "/instance-storage/batch/synchronous",
|
|
78
|
+
"query_endpoint": "/instance-storage/instances",
|
|
79
|
+
"is_batch": True,
|
|
80
|
+
"supports_upsert": True,
|
|
81
|
+
},
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
if object_type not in api_info:
|
|
85
|
+
raise ValueError(
|
|
86
|
+
f"Unsupported object type: {object_type}. "
|
|
87
|
+
f"Supported types: {', '.join(api_info.keys())}"
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
return api_info[object_type]
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def deep_update(target: dict, source: dict) -> None:
|
|
94
|
+
"""
|
|
95
|
+
Recursively update target dictionary with values from source dictionary.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
target: The dictionary to update
|
|
99
|
+
source: The dictionary to merge into target
|
|
100
|
+
"""
|
|
101
|
+
for key, value in source.items():
|
|
102
|
+
if isinstance(value, dict) and key in target and isinstance(target[key], dict):
|
|
103
|
+
deep_update(target[key], value)
|
|
104
|
+
else:
|
|
105
|
+
target[key] = value
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def extract_paths(record: dict, paths: List[str]) -> dict:
|
|
109
|
+
"""
|
|
110
|
+
Extract specified paths from a record.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
record: The record to extract from
|
|
114
|
+
paths: List of JSON paths to extract (e.g., ['statisticalCodeIds', 'status'])
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
Dictionary containing only the specified paths
|
|
118
|
+
"""
|
|
119
|
+
result = {}
|
|
120
|
+
for path in paths:
|
|
121
|
+
if path in record:
|
|
122
|
+
result[path] = record[path]
|
|
123
|
+
return result
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
class BatchPoster:
|
|
127
|
+
"""
|
|
128
|
+
Handles batch posting of inventory records to FOLIO.
|
|
129
|
+
|
|
130
|
+
This class provides functionality for posting Instances, Holdings, and Items
|
|
131
|
+
to FOLIO's batch inventory endpoints with support for upsert operations.
|
|
132
|
+
"""
|
|
133
|
+
|
|
134
|
+
class Config(BaseModel):
|
|
135
|
+
"""Configuration for BatchPoster operations."""
|
|
136
|
+
|
|
137
|
+
object_type: Annotated[
|
|
138
|
+
Literal["Instances", "Holdings", "Items"],
|
|
139
|
+
Field(
|
|
140
|
+
title="Object type",
|
|
141
|
+
description="The type of inventory object to post: Instances, Holdings, or Items",
|
|
142
|
+
),
|
|
143
|
+
]
|
|
144
|
+
batch_size: Annotated[
|
|
145
|
+
int,
|
|
146
|
+
Field(
|
|
147
|
+
title="Batch size",
|
|
148
|
+
description="Number of records to include in each batch (1-1000)",
|
|
149
|
+
),
|
|
150
|
+
] = 1
|
|
151
|
+
upsert: Annotated[
|
|
152
|
+
bool,
|
|
153
|
+
Field(
|
|
154
|
+
title="Upsert",
|
|
155
|
+
description=(
|
|
156
|
+
"Enable upsert mode to create new records or update existing ones. "
|
|
157
|
+
"When enabled, records with matching IDs will be updated instead "
|
|
158
|
+
"of causing errors."
|
|
159
|
+
),
|
|
160
|
+
),
|
|
161
|
+
] = False
|
|
162
|
+
preserve_statistical_codes: Annotated[
|
|
163
|
+
bool,
|
|
164
|
+
Field(
|
|
165
|
+
title="Preserve statistical codes",
|
|
166
|
+
description=(
|
|
167
|
+
"Preserve existing statistical codes during upsert. "
|
|
168
|
+
"When enabled, statistical codes from existing records will be retained "
|
|
169
|
+
"and merged with new codes."
|
|
170
|
+
),
|
|
171
|
+
),
|
|
172
|
+
] = False
|
|
173
|
+
preserve_administrative_notes: Annotated[
|
|
174
|
+
bool,
|
|
175
|
+
Field(
|
|
176
|
+
title="Preserve administrative notes",
|
|
177
|
+
description=(
|
|
178
|
+
"Preserve existing administrative notes during upsert. "
|
|
179
|
+
"When enabled, administrative notes from existing records will be retained "
|
|
180
|
+
"and merged with new notes."
|
|
181
|
+
),
|
|
182
|
+
),
|
|
183
|
+
] = False
|
|
184
|
+
preserve_temporary_locations: Annotated[
|
|
185
|
+
bool,
|
|
186
|
+
Field(
|
|
187
|
+
title="Preserve temporary locations",
|
|
188
|
+
description=(
|
|
189
|
+
"Preserve temporary location assignments on items during upsert. "
|
|
190
|
+
"Only applicable when object_type is 'Items'."
|
|
191
|
+
),
|
|
192
|
+
),
|
|
193
|
+
] = False
|
|
194
|
+
preserve_temporary_loan_types: Annotated[
|
|
195
|
+
bool,
|
|
196
|
+
Field(
|
|
197
|
+
title="Preserve temporary loan types",
|
|
198
|
+
description=(
|
|
199
|
+
"Preserve temporary loan type assignments on items during upsert. "
|
|
200
|
+
"Only applicable when object_type is 'Items'."
|
|
201
|
+
),
|
|
202
|
+
),
|
|
203
|
+
] = False
|
|
204
|
+
preserve_item_status: Annotated[
|
|
205
|
+
bool,
|
|
206
|
+
Field(
|
|
207
|
+
title="Preserve item status",
|
|
208
|
+
description=(
|
|
209
|
+
"Preserve item status during upsert. When enabled, the status "
|
|
210
|
+
"field from existing records will be retained. Only applicable "
|
|
211
|
+
"when object_type is 'Items'."
|
|
212
|
+
),
|
|
213
|
+
),
|
|
214
|
+
] = True
|
|
215
|
+
patch_existing_records: Annotated[
|
|
216
|
+
bool,
|
|
217
|
+
Field(
|
|
218
|
+
title="Patch existing records",
|
|
219
|
+
description=(
|
|
220
|
+
"Enable selective field patching during upsert. When enabled, only fields "
|
|
221
|
+
"specified in patch_paths will be updated, preserving all other fields."
|
|
222
|
+
),
|
|
223
|
+
),
|
|
224
|
+
] = False
|
|
225
|
+
patch_paths: Annotated[
|
|
226
|
+
List[str] | None,
|
|
227
|
+
Field(
|
|
228
|
+
title="Patch paths",
|
|
229
|
+
description=(
|
|
230
|
+
"List of field paths to patch during upsert "
|
|
231
|
+
"(e.g., ['barcode', 'status']). "
|
|
232
|
+
"If empty and patch_existing_records is True, all fields "
|
|
233
|
+
"will be patched. Use this to selectively update only "
|
|
234
|
+
"specific fields while preserving others."
|
|
235
|
+
),
|
|
236
|
+
),
|
|
237
|
+
] = None
|
|
238
|
+
no_progress: Annotated[
|
|
239
|
+
bool,
|
|
240
|
+
Field(
|
|
241
|
+
title="No progress bar",
|
|
242
|
+
description="Disable the progress bar display (e.g., for CI environments)",
|
|
243
|
+
),
|
|
244
|
+
] = False
|
|
245
|
+
|
|
246
|
+
def __init__(
|
|
247
|
+
self,
|
|
248
|
+
folio_client: FolioClient,
|
|
249
|
+
config: "BatchPoster.Config",
|
|
250
|
+
failed_records_file=None,
|
|
251
|
+
reporter: ProgressReporter | None = None,
|
|
252
|
+
):
|
|
253
|
+
"""
|
|
254
|
+
Initialize BatchPoster.
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
folio_client: Authenticated FOLIO client
|
|
258
|
+
config: Configuration for batch posting
|
|
259
|
+
failed_records_file: Optional file handle or path for writing failed records.
|
|
260
|
+
Can be an open file handle (managed by caller) or a string/Path
|
|
261
|
+
(will be opened/closed by BatchPoster).
|
|
262
|
+
reporter: Optional progress reporter. If None, uses NoOpProgressReporter.
|
|
263
|
+
"""
|
|
264
|
+
self.folio_client = folio_client
|
|
265
|
+
self.config = config
|
|
266
|
+
self.reporter = reporter or NoOpProgressReporter()
|
|
267
|
+
self.api_info = get_api_info(config.object_type)
|
|
268
|
+
self.stats = BatchPosterStats()
|
|
269
|
+
|
|
270
|
+
# Handle failed records file
|
|
271
|
+
self._failed_records_file_handle: TextIOWrapper | None = None
|
|
272
|
+
self._failed_records_path: Path | None = None
|
|
273
|
+
self._owns_file_handle = False
|
|
274
|
+
|
|
275
|
+
if failed_records_file:
|
|
276
|
+
if hasattr(failed_records_file, "write"):
|
|
277
|
+
# It's a file handle - use it but don't close it
|
|
278
|
+
self._failed_records_file_handle = failed_records_file
|
|
279
|
+
self._owns_file_handle = False
|
|
280
|
+
else:
|
|
281
|
+
# It's a path - we'll open and manage it
|
|
282
|
+
self._failed_records_path = Path(failed_records_file)
|
|
283
|
+
self._owns_file_handle = True
|
|
284
|
+
|
|
285
|
+
# Validate upsert configuration
|
|
286
|
+
if config.upsert and not self.api_info["supports_upsert"]:
|
|
287
|
+
raise ValueError(f"Upsert is not supported for {config.object_type}")
|
|
288
|
+
|
|
289
|
+
async def __aenter__(self):
|
|
290
|
+
"""Async context manager entry."""
|
|
291
|
+
# Open the file if we own it and it's not already open
|
|
292
|
+
if (
|
|
293
|
+
self._owns_file_handle
|
|
294
|
+
and self._failed_records_path
|
|
295
|
+
and not self._failed_records_file_handle
|
|
296
|
+
):
|
|
297
|
+
self._failed_records_file_handle = open(
|
|
298
|
+
self._failed_records_path, "w", encoding="utf-8"
|
|
299
|
+
)
|
|
300
|
+
logger.info(f"Opened failed records file: {self._failed_records_path}")
|
|
301
|
+
return self
|
|
302
|
+
|
|
303
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
304
|
+
"""Async context manager exit."""
|
|
305
|
+
# Only close the file if we opened it
|
|
306
|
+
if self._owns_file_handle and self._failed_records_file_handle:
|
|
307
|
+
self._failed_records_file_handle.close()
|
|
308
|
+
if self._failed_records_path:
|
|
309
|
+
logger.info(
|
|
310
|
+
f"Wrote {self.stats.records_failed} failed records "
|
|
311
|
+
f"to {self._failed_records_path}"
|
|
312
|
+
)
|
|
313
|
+
self._failed_records_file_handle = None
|
|
314
|
+
|
|
315
|
+
def _write_failed_record(self, record: dict) -> None:
|
|
316
|
+
"""
|
|
317
|
+
Write a single failed record to the file immediately.
|
|
318
|
+
|
|
319
|
+
Args:
|
|
320
|
+
record: The record that failed to post
|
|
321
|
+
"""
|
|
322
|
+
if self._failed_records_file_handle:
|
|
323
|
+
self._failed_records_file_handle.write(json.dumps(record) + "\n")
|
|
324
|
+
self._failed_records_file_handle.flush() # Ensure it's written immediately
|
|
325
|
+
|
|
326
|
+
def _write_failed_batch(self, batch: List[dict]) -> None:
|
|
327
|
+
"""
|
|
328
|
+
Write a batch of failed records to the file immediately.
|
|
329
|
+
|
|
330
|
+
Args:
|
|
331
|
+
batch: List of records that failed to post
|
|
332
|
+
"""
|
|
333
|
+
if self._failed_records_file_handle:
|
|
334
|
+
for record in batch:
|
|
335
|
+
self._failed_records_file_handle.write(json.dumps(record) + "\n")
|
|
336
|
+
self._failed_records_file_handle.flush() # Ensure they're written immediately
|
|
337
|
+
|
|
338
|
+
def handle_upsert_for_statistical_codes(self, updates: dict, keep_existing: dict) -> None:
|
|
339
|
+
"""
|
|
340
|
+
Handle statistical codes during upsert based on configuration.
|
|
341
|
+
|
|
342
|
+
Args:
|
|
343
|
+
updates: Dictionary being prepared for update
|
|
344
|
+
keep_existing: Dictionary of fields to preserve from existing record
|
|
345
|
+
"""
|
|
346
|
+
if not self.config.preserve_statistical_codes:
|
|
347
|
+
updates["statisticalCodeIds"] = []
|
|
348
|
+
keep_existing["statisticalCodeIds"] = []
|
|
349
|
+
else:
|
|
350
|
+
keep_existing["statisticalCodeIds"] = updates.pop("statisticalCodeIds", [])
|
|
351
|
+
updates["statisticalCodeIds"] = []
|
|
352
|
+
|
|
353
|
+
def handle_upsert_for_administrative_notes(self, updates: dict, keep_existing: dict) -> None:
|
|
354
|
+
"""
|
|
355
|
+
Handle administrative notes during upsert based on configuration.
|
|
356
|
+
|
|
357
|
+
Args:
|
|
358
|
+
updates: Dictionary being prepared for update
|
|
359
|
+
keep_existing: Dictionary of fields to preserve from existing record
|
|
360
|
+
"""
|
|
361
|
+
if not self.config.preserve_administrative_notes:
|
|
362
|
+
updates["administrativeNotes"] = []
|
|
363
|
+
keep_existing["administrativeNotes"] = []
|
|
364
|
+
else:
|
|
365
|
+
keep_existing["administrativeNotes"] = updates.pop("administrativeNotes", [])
|
|
366
|
+
updates["administrativeNotes"] = []
|
|
367
|
+
|
|
368
|
+
def handle_upsert_for_temporary_locations(self, updates: dict, keep_existing: dict) -> None:
|
|
369
|
+
"""
|
|
370
|
+
Handle temporary locations during upsert based on configuration.
|
|
371
|
+
|
|
372
|
+
Args:
|
|
373
|
+
updates: Dictionary being prepared for update
|
|
374
|
+
keep_existing: Dictionary of fields to preserve from existing record
|
|
375
|
+
"""
|
|
376
|
+
if self.config.preserve_temporary_locations:
|
|
377
|
+
keep_existing["temporaryLocationId"] = updates.pop("temporaryLocationId", None)
|
|
378
|
+
|
|
379
|
+
def handle_upsert_for_temporary_loan_types(self, updates: dict, keep_existing: dict) -> None:
|
|
380
|
+
"""
|
|
381
|
+
Handle temporary loan types during upsert based on configuration.
|
|
382
|
+
|
|
383
|
+
Args:
|
|
384
|
+
updates: Dictionary being prepared for update
|
|
385
|
+
keep_existing: Dictionary of fields to preserve from existing record
|
|
386
|
+
"""
|
|
387
|
+
if self.config.preserve_temporary_loan_types:
|
|
388
|
+
keep_existing["temporaryLoanTypeId"] = updates.pop("temporaryLoanTypeId", None)
|
|
389
|
+
|
|
390
|
+
def keep_existing_fields(self, updates: dict, existing_record: dict) -> None:
|
|
391
|
+
"""
|
|
392
|
+
Preserve specific fields from existing record during upsert.
|
|
393
|
+
|
|
394
|
+
Args:
|
|
395
|
+
updates: Dictionary being prepared for update
|
|
396
|
+
existing_record: The existing record in FOLIO
|
|
397
|
+
"""
|
|
398
|
+
if self.config.preserve_item_status and "status" in existing_record:
|
|
399
|
+
updates["status"] = existing_record["status"]
|
|
400
|
+
|
|
401
|
+
def patch_record(
|
|
402
|
+
self, new_record: dict, existing_record: dict, patch_paths: List[str]
|
|
403
|
+
) -> None:
|
|
404
|
+
"""
|
|
405
|
+
Update new_record with values from existing_record according to patch_paths.
|
|
406
|
+
|
|
407
|
+
Args:
|
|
408
|
+
new_record: The new record to be updated
|
|
409
|
+
existing_record: The existing record to patch from
|
|
410
|
+
patch_paths: List of fields in JSON Path notation to patch during upsert
|
|
411
|
+
"""
|
|
412
|
+
updates = {}
|
|
413
|
+
updates.update(existing_record)
|
|
414
|
+
keep_existing: Dict[str, Any] = {}
|
|
415
|
+
|
|
416
|
+
# Handle special field preservation rules
|
|
417
|
+
self.handle_upsert_for_administrative_notes(updates, keep_existing)
|
|
418
|
+
self.handle_upsert_for_statistical_codes(updates, keep_existing)
|
|
419
|
+
|
|
420
|
+
if self.config.object_type == "Items":
|
|
421
|
+
self.handle_upsert_for_temporary_locations(updates, keep_existing)
|
|
422
|
+
self.handle_upsert_for_temporary_loan_types(updates, keep_existing)
|
|
423
|
+
|
|
424
|
+
# Determine which fields to keep from new record
|
|
425
|
+
if not patch_paths:
|
|
426
|
+
keep_new = new_record
|
|
427
|
+
else:
|
|
428
|
+
keep_new = extract_paths(new_record, patch_paths)
|
|
429
|
+
|
|
430
|
+
# Special handling for instance status
|
|
431
|
+
if "instanceStatusId" in new_record:
|
|
432
|
+
updates["instanceStatusId"] = new_record["instanceStatusId"]
|
|
433
|
+
|
|
434
|
+
# Merge the updates
|
|
435
|
+
deep_update(updates, keep_new)
|
|
436
|
+
|
|
437
|
+
# Merge arrays from keep_existing, avoiding duplicates
|
|
438
|
+
for key, value in keep_existing.items():
|
|
439
|
+
if isinstance(value, list) and key in keep_new:
|
|
440
|
+
# Combine arrays and remove duplicates
|
|
441
|
+
updates[key] = list(dict.fromkeys(updates.get(key, []) + value))
|
|
442
|
+
elif key not in keep_new:
|
|
443
|
+
updates[key] = value
|
|
444
|
+
|
|
445
|
+
# Apply item-specific preservation
|
|
446
|
+
if self.config.object_type == "Items":
|
|
447
|
+
self.keep_existing_fields(updates, existing_record)
|
|
448
|
+
|
|
449
|
+
# Update the new_record in place
|
|
450
|
+
new_record.clear()
|
|
451
|
+
new_record.update(updates)
|
|
452
|
+
|
|
453
|
+
def prepare_record_for_upsert(self, new_record: dict, existing_record: dict) -> None:
|
|
454
|
+
"""
|
|
455
|
+
Prepare a record for upsert by adding version and patching fields.
|
|
456
|
+
|
|
457
|
+
Args:
|
|
458
|
+
new_record: The new record to prepare
|
|
459
|
+
existing_record: The existing record in FOLIO
|
|
460
|
+
"""
|
|
461
|
+
# Set the version for optimistic locking
|
|
462
|
+
new_record["_version"] = existing_record.get("_version", 1)
|
|
463
|
+
|
|
464
|
+
# Apply patching if configured
|
|
465
|
+
if self.config.patch_existing_records:
|
|
466
|
+
self.patch_record(new_record, existing_record, self.config.patch_paths or [])
|
|
467
|
+
|
|
468
|
+
async def fetch_existing_records(self, record_ids: List[str]) -> Dict[str, dict]:
|
|
469
|
+
"""
|
|
470
|
+
Fetch existing records from FOLIO by their IDs.
|
|
471
|
+
|
|
472
|
+
Args:
|
|
473
|
+
record_ids: List of record IDs to fetch
|
|
474
|
+
|
|
475
|
+
Returns:
|
|
476
|
+
Dictionary mapping record IDs to their full records
|
|
477
|
+
"""
|
|
478
|
+
existing_records: Dict[str, dict] = {}
|
|
479
|
+
query_endpoint = self.api_info["query_endpoint"]
|
|
480
|
+
object_name = self.api_info["object_name"]
|
|
481
|
+
|
|
482
|
+
# Fetch in batches of 90 (FOLIO CQL limit for OR queries)
|
|
483
|
+
fetch_batch_size = 90
|
|
484
|
+
|
|
485
|
+
async def fetch_batch(batch_ids: List[str]) -> dict:
|
|
486
|
+
query = f"id==({' OR '.join(batch_ids)})"
|
|
487
|
+
params = {"query": query, "limit": fetch_batch_size}
|
|
488
|
+
try:
|
|
489
|
+
return await self.folio_client.folio_get_async(
|
|
490
|
+
query_endpoint, key=object_name, query_params=params
|
|
491
|
+
)
|
|
492
|
+
except folioclient.FolioClientError as e:
|
|
493
|
+
logger.error(f"FOLIO client error fetching existing records: {e}")
|
|
494
|
+
raise
|
|
495
|
+
except folioclient.FolioConnectionError as e:
|
|
496
|
+
logger.error(f"FOLIO connection error fetching existing records: {e}")
|
|
497
|
+
raise
|
|
498
|
+
except Exception as e:
|
|
499
|
+
logger.error(f"Failed to fetch existing records: {e}")
|
|
500
|
+
raise
|
|
501
|
+
|
|
502
|
+
# Create tasks for all batches
|
|
503
|
+
tasks = []
|
|
504
|
+
for i in range(0, len(record_ids), fetch_batch_size):
|
|
505
|
+
batch_slice = record_ids[i : i + fetch_batch_size]
|
|
506
|
+
tasks.append(fetch_batch(batch_slice))
|
|
507
|
+
|
|
508
|
+
# Fetch all batches concurrently
|
|
509
|
+
results = await asyncio.gather(*tasks)
|
|
510
|
+
|
|
511
|
+
# Process results
|
|
512
|
+
for result in results:
|
|
513
|
+
if isinstance(result, list):
|
|
514
|
+
for record in result:
|
|
515
|
+
existing_records[record["id"]] = record
|
|
516
|
+
|
|
517
|
+
return existing_records
|
|
518
|
+
|
|
519
|
+
async def set_versions_for_upsert(self, batch: List[dict]) -> None:
|
|
520
|
+
"""
|
|
521
|
+
Fetch existing record versions and prepare batch for upsert.
|
|
522
|
+
|
|
523
|
+
Only records that already exist in FOLIO will have their _version set
|
|
524
|
+
and be prepared for update. New records will not have _version set.
|
|
525
|
+
|
|
526
|
+
Args:
|
|
527
|
+
batch: List of records to prepare for upsert
|
|
528
|
+
"""
|
|
529
|
+
# Extract record IDs
|
|
530
|
+
record_ids = [record["id"] for record in batch if "id" in record]
|
|
531
|
+
|
|
532
|
+
if not record_ids:
|
|
533
|
+
return
|
|
534
|
+
|
|
535
|
+
# Fetch existing records
|
|
536
|
+
existing_records = await self.fetch_existing_records(record_ids)
|
|
537
|
+
|
|
538
|
+
# Only prepare records that already exist
|
|
539
|
+
for record in batch:
|
|
540
|
+
if "id" in record and record["id"] in existing_records:
|
|
541
|
+
self.prepare_record_for_upsert(record, existing_records[record["id"]])
|
|
542
|
+
|
|
543
|
+
async def post_batch(self, batch: List[dict]) -> tuple[httpx.Response, int, int]:
|
|
544
|
+
"""
|
|
545
|
+
Post a batch of records to FOLIO.
|
|
546
|
+
|
|
547
|
+
Args:
|
|
548
|
+
batch: List of records to post
|
|
549
|
+
|
|
550
|
+
Returns:
|
|
551
|
+
Tuple of (response data dict, number of creates, number of updates)
|
|
552
|
+
|
|
553
|
+
Raises:
|
|
554
|
+
folioclient.FolioClientError: If FOLIO API returns an error
|
|
555
|
+
folioclient.FolioConnectionError: If connection to FOLIO fails
|
|
556
|
+
"""
|
|
557
|
+
# Track creates vs updates before posting
|
|
558
|
+
num_creates = 0
|
|
559
|
+
num_updates = 0
|
|
560
|
+
|
|
561
|
+
# If upsert mode, set versions and track which are updates
|
|
562
|
+
if self.config.upsert:
|
|
563
|
+
await self.set_versions_for_upsert(batch)
|
|
564
|
+
# Count records with _version as updates, others as creates
|
|
565
|
+
for record in batch:
|
|
566
|
+
if "_version" in record:
|
|
567
|
+
num_updates += 1
|
|
568
|
+
else:
|
|
569
|
+
num_creates += 1
|
|
570
|
+
else:
|
|
571
|
+
# In create-only mode, all are creates
|
|
572
|
+
num_creates = len(batch)
|
|
573
|
+
|
|
574
|
+
# Prepare payload
|
|
575
|
+
object_name = self.api_info["object_name"]
|
|
576
|
+
payload = {object_name: batch}
|
|
577
|
+
|
|
578
|
+
# Prepare query parameters
|
|
579
|
+
query_params = {}
|
|
580
|
+
if self.config.upsert:
|
|
581
|
+
query_params["upsert"] = "true"
|
|
582
|
+
|
|
583
|
+
# Make the request
|
|
584
|
+
api_endpoint = self.api_info["api_endpoint"]
|
|
585
|
+
|
|
586
|
+
response_data = await self.folio_client.async_httpx_client.post(
|
|
587
|
+
api_endpoint, json=payload, params=query_params
|
|
588
|
+
)
|
|
589
|
+
response_data.raise_for_status()
|
|
590
|
+
logger.info(
|
|
591
|
+
(
|
|
592
|
+
"Posting successful! Total rows: %s Total failed: %s "
|
|
593
|
+
"in %ss "
|
|
594
|
+
"Batch Size: %s Request size: %s "
|
|
595
|
+
),
|
|
596
|
+
self.stats.records_processed,
|
|
597
|
+
self.stats.records_failed,
|
|
598
|
+
response_data.elapsed.total_seconds(),
|
|
599
|
+
len(batch),
|
|
600
|
+
get_req_size(response_data),
|
|
601
|
+
)
|
|
602
|
+
self.stats.records_posted += len(batch)
|
|
603
|
+
self.stats.batches_posted += 1
|
|
604
|
+
|
|
605
|
+
return response_data, num_creates, num_updates
|
|
606
|
+
|
|
607
|
+
async def post_records(self, records) -> None:
|
|
608
|
+
"""
|
|
609
|
+
Post records in batches.
|
|
610
|
+
|
|
611
|
+
Failed records will be written to the file handle provided during initialization.
|
|
612
|
+
|
|
613
|
+
Args:
|
|
614
|
+
records: Records to post. Can be:
|
|
615
|
+
- List of dict records
|
|
616
|
+
- File-like object containing JSON lines (one record per line)
|
|
617
|
+
- String/Path to a file containing JSON lines
|
|
618
|
+
"""
|
|
619
|
+
# Normalize input to an iterator
|
|
620
|
+
if isinstance(records, (str, Path)):
|
|
621
|
+
# It's a file path
|
|
622
|
+
record_iterator = self._read_records_from_path(records)
|
|
623
|
+
elif hasattr(records, "read"):
|
|
624
|
+
# It's a file-like object
|
|
625
|
+
record_iterator = self._read_records_from_file_handle(records)
|
|
626
|
+
elif isinstance(records, list):
|
|
627
|
+
# It's already a list - wrap in a generator
|
|
628
|
+
record_iterator = iter(records)
|
|
629
|
+
else:
|
|
630
|
+
raise TypeError(
|
|
631
|
+
f"records must be a list, file path, or file-like object, got {type(records)}"
|
|
632
|
+
)
|
|
633
|
+
|
|
634
|
+
# Process records in batches
|
|
635
|
+
batch = []
|
|
636
|
+
for record in record_iterator:
|
|
637
|
+
batch.append(record)
|
|
638
|
+
|
|
639
|
+
# Post when batch is full
|
|
640
|
+
if len(batch) >= self.config.batch_size:
|
|
641
|
+
await self._post_single_batch(batch)
|
|
642
|
+
batch = []
|
|
643
|
+
|
|
644
|
+
# Post any remaining records
|
|
645
|
+
if batch:
|
|
646
|
+
await self._post_single_batch(batch)
|
|
647
|
+
|
|
648
|
+
def _read_records_from_path(self, file_path: Union[str, Path]) -> Generator[dict, None, None]:
|
|
649
|
+
"""
|
|
650
|
+
Generator that yields records from a file path.
|
|
651
|
+
|
|
652
|
+
Args:
|
|
653
|
+
file_path: Path to file containing JSON lines
|
|
654
|
+
|
|
655
|
+
Yields:
|
|
656
|
+
Parsed record dictionaries
|
|
657
|
+
"""
|
|
658
|
+
file_path = Path(file_path)
|
|
659
|
+
if not file_path.exists():
|
|
660
|
+
raise FileNotFoundError(f"Input file not found: {file_path}")
|
|
661
|
+
|
|
662
|
+
logger.info(f"Reading records from {file_path}")
|
|
663
|
+
|
|
664
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
665
|
+
yield from self._read_records_from_file_handle(f)
|
|
666
|
+
|
|
667
|
+
def _read_records_from_file_handle(self, file_handle) -> Generator[dict, None, None]:
|
|
668
|
+
"""
|
|
669
|
+
Generator that yields records from a file handle.
|
|
670
|
+
|
|
671
|
+
If a line cannot be parsed as JSON, writes the problematic line and all
|
|
672
|
+
remaining lines to the failed records file (if configured) before raising
|
|
673
|
+
an exception.
|
|
674
|
+
|
|
675
|
+
Args:
|
|
676
|
+
file_handle: File-like object containing JSON lines
|
|
677
|
+
|
|
678
|
+
Yields:
|
|
679
|
+
Parsed record dictionaries
|
|
680
|
+
|
|
681
|
+
Raises:
|
|
682
|
+
ValueError: If a line cannot be parsed as JSON
|
|
683
|
+
"""
|
|
684
|
+
for line_number, original_line in enumerate(file_handle, start=1):
|
|
685
|
+
line = original_line.strip()
|
|
686
|
+
if not line:
|
|
687
|
+
continue
|
|
688
|
+
|
|
689
|
+
try:
|
|
690
|
+
record = self._parse_json_line(line, line_number)
|
|
691
|
+
yield record
|
|
692
|
+
except ValueError:
|
|
693
|
+
# Write the failed line to failed records file
|
|
694
|
+
if self._failed_records_file_handle:
|
|
695
|
+
self._failed_records_file_handle.write(original_line)
|
|
696
|
+
# Write all remaining lines as-is
|
|
697
|
+
for remaining_line in file_handle:
|
|
698
|
+
self._failed_records_file_handle.write(remaining_line)
|
|
699
|
+
|
|
700
|
+
self._failed_records_file_handle.flush()
|
|
701
|
+
|
|
702
|
+
# Re-raise the exception
|
|
703
|
+
raise
|
|
704
|
+
|
|
705
|
+
async def _post_single_batch(self, batch: List[dict]) -> None:
|
|
706
|
+
"""
|
|
707
|
+
Post a single batch with error handling.
|
|
708
|
+
|
|
709
|
+
Args:
|
|
710
|
+
batch: List of records to post
|
|
711
|
+
"""
|
|
712
|
+
self.stats.records_processed += len(batch)
|
|
713
|
+
|
|
714
|
+
try:
|
|
715
|
+
_, num_creates, num_updates = await self.post_batch(batch)
|
|
716
|
+
|
|
717
|
+
# Success - update stats
|
|
718
|
+
self.stats.records_created += num_creates
|
|
719
|
+
self.stats.records_updated += num_updates
|
|
720
|
+
# Update progress bar if available
|
|
721
|
+
if hasattr(self, "reporter") and hasattr(self, "task_id"):
|
|
722
|
+
self.reporter.update_task(
|
|
723
|
+
self.task_id,
|
|
724
|
+
advance=len(batch),
|
|
725
|
+
posted=self.stats.records_posted,
|
|
726
|
+
created=self.stats.records_created,
|
|
727
|
+
updated=self.stats.records_updated,
|
|
728
|
+
failed=self.stats.records_failed,
|
|
729
|
+
)
|
|
730
|
+
|
|
731
|
+
except folioclient.FolioClientError as e:
|
|
732
|
+
logger.error(f"Batch failed: {e} - {e.response.text}")
|
|
733
|
+
self.stats.records_failed += len(batch)
|
|
734
|
+
self._write_failed_batch(batch)
|
|
735
|
+
|
|
736
|
+
# Update progress bar if available
|
|
737
|
+
if hasattr(self, "reporter") and hasattr(self, "task_id"):
|
|
738
|
+
self.reporter.update_task(
|
|
739
|
+
self.task_id,
|
|
740
|
+
advance=len(batch),
|
|
741
|
+
posted=self.stats.records_posted,
|
|
742
|
+
created=self.stats.records_created,
|
|
743
|
+
updated=self.stats.records_updated,
|
|
744
|
+
failed=self.stats.records_failed,
|
|
745
|
+
)
|
|
746
|
+
|
|
747
|
+
except folioclient.FolioConnectionError as e:
|
|
748
|
+
logger.error(f"Batch failed due to connection error: {e}")
|
|
749
|
+
self.stats.records_failed += len(batch)
|
|
750
|
+
self._write_failed_batch(batch)
|
|
751
|
+
|
|
752
|
+
# Update progress bar if available
|
|
753
|
+
if hasattr(self, "reporter") and hasattr(self, "task_id"):
|
|
754
|
+
self.reporter.update_task(
|
|
755
|
+
self.task_id,
|
|
756
|
+
advance=len(batch),
|
|
757
|
+
posted=self.stats.records_posted,
|
|
758
|
+
created=self.stats.records_created,
|
|
759
|
+
updated=self.stats.records_updated,
|
|
760
|
+
failed=self.stats.records_failed,
|
|
761
|
+
)
|
|
762
|
+
|
|
763
|
+
except Exception as e:
|
|
764
|
+
logger.error(f"Unexpected error during batch post: {e}")
|
|
765
|
+
if hasattr(e, "request"):
|
|
766
|
+
logger.debug(f"DEBUG: {e.request}, {e.request.content}")
|
|
767
|
+
self.stats.records_failed += len(batch)
|
|
768
|
+
self._write_failed_batch(batch)
|
|
769
|
+
|
|
770
|
+
# Update progress bar if available
|
|
771
|
+
if hasattr(self, "reporter") and hasattr(self, "task_id"):
|
|
772
|
+
self.reporter.update_task(
|
|
773
|
+
self.task_id,
|
|
774
|
+
advance=len(batch),
|
|
775
|
+
posted=self.stats.records_posted,
|
|
776
|
+
created=self.stats.records_created,
|
|
777
|
+
updated=self.stats.records_updated,
|
|
778
|
+
failed=self.stats.records_failed,
|
|
779
|
+
)
|
|
780
|
+
|
|
781
|
+
def _parse_json_line(self, line: str, line_number: int) -> dict:
|
|
782
|
+
"""
|
|
783
|
+
Parse a JSON line, handling both plain and tab-delimited formats.
|
|
784
|
+
|
|
785
|
+
Args:
|
|
786
|
+
line: Line to parse
|
|
787
|
+
line_number: Line number for error reporting
|
|
788
|
+
|
|
789
|
+
Returns:
|
|
790
|
+
Parsed record dictionary
|
|
791
|
+
|
|
792
|
+
Raises:
|
|
793
|
+
ValueError: If the line cannot be parsed as JSON
|
|
794
|
+
"""
|
|
795
|
+
try:
|
|
796
|
+
# Handle both plain JSON and tab-delimited format
|
|
797
|
+
# (tab-delimited: last field is the JSON)
|
|
798
|
+
json_str = line.split("\t")[-1] if "\t" in line else line
|
|
799
|
+
return json.loads(json_str)
|
|
800
|
+
except json.JSONDecodeError as e:
|
|
801
|
+
raise ValueError(
|
|
802
|
+
f"Invalid JSON at line {line_number}: {e}. Line content: {line[:100]}"
|
|
803
|
+
) from e
|
|
804
|
+
except Exception as e:
|
|
805
|
+
raise ValueError(f"Error processing line {line_number}: {e}") from e
|
|
806
|
+
|
|
807
|
+
async def do_work(
|
|
808
|
+
self,
|
|
809
|
+
file_paths: Union[str, Path, List[Union[str, Path]]],
|
|
810
|
+
) -> BatchPosterStats:
|
|
811
|
+
"""
|
|
812
|
+
Main orchestration method for processing files.
|
|
813
|
+
|
|
814
|
+
This is the primary entry point for batch posting from files. It handles:
|
|
815
|
+
- Single or multiple file processing
|
|
816
|
+
- Progress tracking and logging
|
|
817
|
+
- Failed record collection
|
|
818
|
+
- Statistics reporting
|
|
819
|
+
|
|
820
|
+
Mimics the folio_migration_tools BatchPoster.do_work() workflow.
|
|
821
|
+
|
|
822
|
+
Note: To write failed records, pass a file handle or path to the
|
|
823
|
+
BatchPoster constructor's `failed_records_file` parameter.
|
|
824
|
+
|
|
825
|
+
Args:
|
|
826
|
+
file_paths: Path(s) to JSONL file(s) to process
|
|
827
|
+
|
|
828
|
+
Returns:
|
|
829
|
+
Final statistics from the posting operation
|
|
830
|
+
|
|
831
|
+
Example:
|
|
832
|
+
```python
|
|
833
|
+
config = BatchPosterConfig(
|
|
834
|
+
object_type="Items",
|
|
835
|
+
batch_size=100,
|
|
836
|
+
upsert=True
|
|
837
|
+
)
|
|
838
|
+
|
|
839
|
+
reporter = RichProgressReporter(enabled=True)
|
|
840
|
+
|
|
841
|
+
# With failed records file
|
|
842
|
+
with open("failed_items.jsonl", "w") as failed_file:
|
|
843
|
+
poster = BatchPoster(folio_client, config, failed_records_file=failed_file, reporter=reporter)
|
|
844
|
+
async with poster:
|
|
845
|
+
stats = await poster.do_work(["items1.jsonl", "items2.jsonl"])
|
|
846
|
+
|
|
847
|
+
# Or let BatchPoster manage the file
|
|
848
|
+
poster = BatchPoster(folio_client, config, failed_records_file="failed_items.jsonl", reporter=reporter)
|
|
849
|
+
async with poster:
|
|
850
|
+
stats = await poster.do_work("items.jsonl")
|
|
851
|
+
|
|
852
|
+
print(f"Posted: {stats.records_posted}, Failed: {stats.records_failed}")
|
|
853
|
+
```
|
|
854
|
+
""" # noqa: E501
|
|
855
|
+
# Reset statistics
|
|
856
|
+
self.stats = BatchPosterStats()
|
|
857
|
+
|
|
858
|
+
# Normalize file_paths to list
|
|
859
|
+
if isinstance(file_paths, (str, Path)):
|
|
860
|
+
files_to_process = [Path(file_paths)]
|
|
861
|
+
else:
|
|
862
|
+
files_to_process = [Path(p) for p in file_paths]
|
|
863
|
+
|
|
864
|
+
# Log start
|
|
865
|
+
logger.info(
|
|
866
|
+
"Starting batch posting of %d file(s) with batch_size=%d",
|
|
867
|
+
len(files_to_process),
|
|
868
|
+
self.config.batch_size,
|
|
869
|
+
)
|
|
870
|
+
logger.info("Object type: %s", self.config.object_type)
|
|
871
|
+
logger.info("Upsert mode: %s", "On" if self.config.upsert else "Off")
|
|
872
|
+
if self.config.upsert:
|
|
873
|
+
logger.info(
|
|
874
|
+
"Preservation settings: statistical_codes=%s, administrative_notes=%s, "
|
|
875
|
+
"temporary_locations=%s, temporary_loan_types=%s",
|
|
876
|
+
self.config.preserve_statistical_codes,
|
|
877
|
+
self.config.preserve_administrative_notes,
|
|
878
|
+
self.config.preserve_temporary_locations,
|
|
879
|
+
self.config.preserve_temporary_loan_types,
|
|
880
|
+
)
|
|
881
|
+
|
|
882
|
+
# Count total lines across all files for progress bar
|
|
883
|
+
total_lines = 0
|
|
884
|
+
for file_path in files_to_process:
|
|
885
|
+
with open(file_path, "rb") as f:
|
|
886
|
+
total_lines += sum(
|
|
887
|
+
buf.count(b"\n") for buf in iter(lambda: f.read(1024 * 1024), b"")
|
|
888
|
+
)
|
|
889
|
+
|
|
890
|
+
# Set up progress reporting
|
|
891
|
+
with self.reporter:
|
|
892
|
+
self.task_id = self.reporter.start_task(
|
|
893
|
+
f"posting_{self.config.object_type}",
|
|
894
|
+
total=total_lines,
|
|
895
|
+
description=f"Posting {self.config.object_type}",
|
|
896
|
+
)
|
|
897
|
+
|
|
898
|
+
# Process each file
|
|
899
|
+
for idx, file_path in enumerate(files_to_process, start=1):
|
|
900
|
+
logger.info(
|
|
901
|
+
"Processing file %d of %d: %s",
|
|
902
|
+
idx,
|
|
903
|
+
len(files_to_process),
|
|
904
|
+
file_path.name,
|
|
905
|
+
)
|
|
906
|
+
|
|
907
|
+
try:
|
|
908
|
+
await self.post_records(file_path)
|
|
909
|
+
except Exception as e:
|
|
910
|
+
logger.error("Error processing file %s: %s", file_path, e, exc_info=True)
|
|
911
|
+
raise
|
|
912
|
+
|
|
913
|
+
return self.stats
|
|
914
|
+
|
|
915
|
+
def get_stats(self) -> BatchPosterStats:
|
|
916
|
+
"""
|
|
917
|
+
Get current posting statistics.
|
|
918
|
+
|
|
919
|
+
Returns:
|
|
920
|
+
Current statistics
|
|
921
|
+
"""
|
|
922
|
+
return self.stats
|
|
923
|
+
|
|
924
|
+
|
|
925
|
+
def get_human_readable_size(size: int, precision: int = 2) -> str:
|
|
926
|
+
"""
|
|
927
|
+
Convert bytes to human-readable format.
|
|
928
|
+
|
|
929
|
+
Args:
|
|
930
|
+
size: Size in bytes
|
|
931
|
+
precision: Number of decimal places
|
|
932
|
+
|
|
933
|
+
Returns:
|
|
934
|
+
Human-readable size string
|
|
935
|
+
"""
|
|
936
|
+
suffixes = ["B", "KB", "MB", "GB", "TB"]
|
|
937
|
+
suffix_index = 0
|
|
938
|
+
size_float = float(size)
|
|
939
|
+
|
|
940
|
+
while size_float >= 1024 and suffix_index < len(suffixes) - 1:
|
|
941
|
+
suffix_index += 1
|
|
942
|
+
size_float = size_float / 1024.0
|
|
943
|
+
|
|
944
|
+
return f"{size_float:.{precision}f}{suffixes[suffix_index]}"
|
|
945
|
+
|
|
946
|
+
|
|
947
|
+
def get_req_size(response: httpx.Response):
|
|
948
|
+
size = response.request.method
|
|
949
|
+
size += str(response.request.url)
|
|
950
|
+
size += "\r\n".join(f"{k}{v}" for k, v in response.request.headers.items())
|
|
951
|
+
size += response.request.content.decode("utf-8") or ""
|
|
952
|
+
return get_human_readable_size(len(size.encode("utf-8")))
|
|
953
|
+
|
|
954
|
+
|
|
955
|
+
def set_up_cli_logging() -> None:
|
|
956
|
+
"""
|
|
957
|
+
This function sets up logging for the CLI.
|
|
958
|
+
"""
|
|
959
|
+
|
|
960
|
+
logger.setLevel(logging.INFO)
|
|
961
|
+
logger.propagate = False
|
|
962
|
+
|
|
963
|
+
# Set up file and stream handlers
|
|
964
|
+
file_handler = logging.FileHandler(
|
|
965
|
+
"folio_batch_poster_{}.log".format(dt.now().strftime("%Y%m%d%H%M%S"))
|
|
966
|
+
)
|
|
967
|
+
file_handler.setLevel(logging.INFO)
|
|
968
|
+
file_formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
|
|
969
|
+
file_handler.setFormatter(file_formatter)
|
|
970
|
+
logger.addHandler(file_handler)
|
|
971
|
+
|
|
972
|
+
if not any(
|
|
973
|
+
isinstance(h, logging.StreamHandler) and h.stream == sys.stderr for h in logger.handlers
|
|
974
|
+
):
|
|
975
|
+
stream_handler = RichHandler(
|
|
976
|
+
show_level=False,
|
|
977
|
+
show_time=False,
|
|
978
|
+
omit_repeated_times=False,
|
|
979
|
+
show_path=False,
|
|
980
|
+
)
|
|
981
|
+
stream_handler.setLevel(logging.INFO)
|
|
982
|
+
stream_formatter = logging.Formatter("%(message)s")
|
|
983
|
+
stream_handler.setFormatter(stream_formatter)
|
|
984
|
+
logger.addHandler(stream_handler)
|
|
985
|
+
|
|
986
|
+
# Stop httpx from logging info messages to the console
|
|
987
|
+
logging.getLogger("httpx").setLevel(logging.WARNING)
|
|
988
|
+
|
|
989
|
+
|
|
990
|
+
app = cyclopts.App(default_parameter=cyclopts.Parameter(negative=()))
|
|
991
|
+
|
|
992
|
+
|
|
993
|
+
@app.default
|
|
994
|
+
def main(
|
|
995
|
+
config_file: Annotated[
|
|
996
|
+
Path | None, cyclopts.Parameter(group="Job Configuration Parameters")
|
|
997
|
+
] = None,
|
|
998
|
+
*,
|
|
999
|
+
gateway_url: Annotated[
|
|
1000
|
+
str | None,
|
|
1001
|
+
cyclopts.Parameter(
|
|
1002
|
+
env_var="FOLIO_GATEWAY_URL",
|
|
1003
|
+
show_env_var=True,
|
|
1004
|
+
group="FOLIO Connection Parameters",
|
|
1005
|
+
),
|
|
1006
|
+
] = None,
|
|
1007
|
+
tenant_id: Annotated[
|
|
1008
|
+
str | None,
|
|
1009
|
+
cyclopts.Parameter(
|
|
1010
|
+
env_var="FOLIO_TENANT_ID", show_env_var=True, group="FOLIO Connection Parameters"
|
|
1011
|
+
),
|
|
1012
|
+
] = None,
|
|
1013
|
+
username: Annotated[
|
|
1014
|
+
str | None,
|
|
1015
|
+
cyclopts.Parameter(
|
|
1016
|
+
env_var="FOLIO_USERNAME", show_env_var=True, group="FOLIO Connection Parameters"
|
|
1017
|
+
),
|
|
1018
|
+
] = None,
|
|
1019
|
+
password: Annotated[
|
|
1020
|
+
str | None,
|
|
1021
|
+
cyclopts.Parameter(
|
|
1022
|
+
env_var="FOLIO_PASSWORD", show_env_var=True, group="FOLIO Connection Parameters"
|
|
1023
|
+
),
|
|
1024
|
+
] = None,
|
|
1025
|
+
member_tenant_id: Annotated[
|
|
1026
|
+
str | None,
|
|
1027
|
+
cyclopts.Parameter(
|
|
1028
|
+
env_var="FOLIO_MEMBER_TENANT_ID",
|
|
1029
|
+
show_env_var=True,
|
|
1030
|
+
group="FOLIO Connection Parameters",
|
|
1031
|
+
),
|
|
1032
|
+
] = None,
|
|
1033
|
+
object_type: Annotated[
|
|
1034
|
+
Literal["Instances", "Holdings", "Items"] | None,
|
|
1035
|
+
cyclopts.Parameter(group="Job Configuration Parameters"),
|
|
1036
|
+
] = None,
|
|
1037
|
+
file_paths: Annotated[
|
|
1038
|
+
tuple[Path, ...] | None,
|
|
1039
|
+
cyclopts.Parameter(
|
|
1040
|
+
name=["--file-paths", "--file-path"],
|
|
1041
|
+
help="Path(s) to JSONL file(s). Accepts multiple values and glob patterns.",
|
|
1042
|
+
group="Job Configuration Parameters",
|
|
1043
|
+
),
|
|
1044
|
+
] = None,
|
|
1045
|
+
batch_size: Annotated[
|
|
1046
|
+
int,
|
|
1047
|
+
cyclopts.Parameter(group="Job Configuration Parameters"),
|
|
1048
|
+
] = 100,
|
|
1049
|
+
upsert: Annotated[
|
|
1050
|
+
bool,
|
|
1051
|
+
cyclopts.Parameter(group="Job Configuration Parameters"),
|
|
1052
|
+
] = False,
|
|
1053
|
+
preserve_statistical_codes: Annotated[
|
|
1054
|
+
bool,
|
|
1055
|
+
cyclopts.Parameter(group="Job Configuration Parameters: --upsert options"),
|
|
1056
|
+
] = False,
|
|
1057
|
+
preserve_administrative_notes: Annotated[
|
|
1058
|
+
bool,
|
|
1059
|
+
cyclopts.Parameter(group="Job Configuration Parameters: --upsert options"),
|
|
1060
|
+
] = False,
|
|
1061
|
+
preserve_temporary_locations: Annotated[
|
|
1062
|
+
bool,
|
|
1063
|
+
cyclopts.Parameter(group="Job Configuration Parameters: --upsert options"),
|
|
1064
|
+
] = False,
|
|
1065
|
+
preserve_temporary_loan_types: Annotated[
|
|
1066
|
+
bool,
|
|
1067
|
+
cyclopts.Parameter(group="Job Configuration Parameters: --upsert options"),
|
|
1068
|
+
] = False,
|
|
1069
|
+
overwrite_item_status: Annotated[
|
|
1070
|
+
bool,
|
|
1071
|
+
cyclopts.Parameter(group="Job Configuration Parameters: --upsert options"),
|
|
1072
|
+
] = False,
|
|
1073
|
+
patch_existing_records: Annotated[
|
|
1074
|
+
bool,
|
|
1075
|
+
cyclopts.Parameter(group="Job Configuration Parameters: --upsert options"),
|
|
1076
|
+
] = False,
|
|
1077
|
+
patch_paths: Annotated[
|
|
1078
|
+
str | None,
|
|
1079
|
+
cyclopts.Parameter(
|
|
1080
|
+
help=(
|
|
1081
|
+
"Comma-separated list of field paths to patch during upsert (e.g., barcode,status)"
|
|
1082
|
+
),
|
|
1083
|
+
group="Job Configuration Parameters: --upsert options",
|
|
1084
|
+
),
|
|
1085
|
+
] = None,
|
|
1086
|
+
failed_records_file: Annotated[
|
|
1087
|
+
Path | None,
|
|
1088
|
+
cyclopts.Parameter(group="Job Configuration Parameters"),
|
|
1089
|
+
] = None,
|
|
1090
|
+
no_progress: Annotated[
|
|
1091
|
+
bool,
|
|
1092
|
+
cyclopts.Parameter(group="Job Configuration Parameters"),
|
|
1093
|
+
] = False,
|
|
1094
|
+
) -> None:
|
|
1095
|
+
"""
|
|
1096
|
+
Command-line interface to batch post inventory records to FOLIO
|
|
1097
|
+
|
|
1098
|
+
Parameters:
|
|
1099
|
+
config_file: Path to JSON config file (overrides CLI parameters).
|
|
1100
|
+
gateway_url: The FOLIO API Gateway URL.
|
|
1101
|
+
tenant_id: The tenant id.
|
|
1102
|
+
username: The FOLIO username.
|
|
1103
|
+
password: The FOLIO password.
|
|
1104
|
+
member_tenant_id: The FOLIO ECS member tenant id (if applicable).
|
|
1105
|
+
object_type: Type of inventory object (Instances, Holdings, or Items).
|
|
1106
|
+
file_paths: Path(s) to JSONL file(s) to post.
|
|
1107
|
+
batch_size: Number of records to include in each batch (1-1000).
|
|
1108
|
+
upsert: Enable upsert mode to update existing records.
|
|
1109
|
+
preserve_statistical_codes: Preserve existing statistical codes during upsert.
|
|
1110
|
+
preserve_administrative_notes: Preserve existing administrative notes during upsert.
|
|
1111
|
+
preserve_temporary_locations: Preserve temporary location assignments during upsert.
|
|
1112
|
+
preserve_temporary_loan_types: Preserve temporary loan type assignments during upsert.
|
|
1113
|
+
overwrite_item_status: Overwrite item status during upsert.
|
|
1114
|
+
patch_existing_records: Enable selective field patching during upsert.
|
|
1115
|
+
patch_paths: Comma-separated list of field paths to patch.
|
|
1116
|
+
failed_records_file: Path to file for writing failed records.
|
|
1117
|
+
no_progress: Disable progress bar display.
|
|
1118
|
+
"""
|
|
1119
|
+
set_up_cli_logging()
|
|
1120
|
+
|
|
1121
|
+
gateway_url, tenant_id, username, password = get_folio_connection_parameters(
|
|
1122
|
+
gateway_url, tenant_id, username, password
|
|
1123
|
+
)
|
|
1124
|
+
folio_client = folioclient.FolioClient(gateway_url, tenant_id, username, password)
|
|
1125
|
+
|
|
1126
|
+
if member_tenant_id:
|
|
1127
|
+
folio_client.tenant_id = member_tenant_id
|
|
1128
|
+
|
|
1129
|
+
# Handle file path expansion
|
|
1130
|
+
expanded_file_paths = expand_file_paths(file_paths)
|
|
1131
|
+
|
|
1132
|
+
expanded_file_paths.sort()
|
|
1133
|
+
|
|
1134
|
+
# Parse patch_paths if provided
|
|
1135
|
+
patch_paths_list = parse_patch_paths(patch_paths)
|
|
1136
|
+
|
|
1137
|
+
try:
|
|
1138
|
+
if config_file:
|
|
1139
|
+
config, files_to_process = parse_config_file(config_file)
|
|
1140
|
+
else:
|
|
1141
|
+
if not object_type:
|
|
1142
|
+
logger.critical("--object-type is required when not using a config file")
|
|
1143
|
+
sys.exit(1)
|
|
1144
|
+
|
|
1145
|
+
if not expanded_file_paths:
|
|
1146
|
+
logger.critical("No files found to process. Exiting.")
|
|
1147
|
+
sys.exit(1)
|
|
1148
|
+
|
|
1149
|
+
config = BatchPoster.Config(
|
|
1150
|
+
object_type=object_type,
|
|
1151
|
+
batch_size=batch_size,
|
|
1152
|
+
upsert=upsert,
|
|
1153
|
+
preserve_statistical_codes=preserve_statistical_codes,
|
|
1154
|
+
preserve_administrative_notes=preserve_administrative_notes,
|
|
1155
|
+
preserve_temporary_locations=preserve_temporary_locations,
|
|
1156
|
+
preserve_temporary_loan_types=preserve_temporary_loan_types,
|
|
1157
|
+
preserve_item_status=not overwrite_item_status,
|
|
1158
|
+
patch_existing_records=patch_existing_records,
|
|
1159
|
+
patch_paths=patch_paths_list,
|
|
1160
|
+
no_progress=no_progress,
|
|
1161
|
+
)
|
|
1162
|
+
files_to_process = expanded_file_paths
|
|
1163
|
+
|
|
1164
|
+
logger.info(f"Processing {len(files_to_process)} file(s)")
|
|
1165
|
+
asyncio.run(run_batch_poster(folio_client, config, files_to_process, failed_records_file))
|
|
1166
|
+
|
|
1167
|
+
except Exception as e:
|
|
1168
|
+
logger.critical(f"An error occurred: {e}", exc_info=True)
|
|
1169
|
+
sys.exit(1)
|
|
1170
|
+
|
|
1171
|
+
|
|
1172
|
+
def parse_config_file(config_file):
|
|
1173
|
+
with open(config_file, "r") as f:
|
|
1174
|
+
config_data = json.load(f)
|
|
1175
|
+
# Convert file_paths if present in config
|
|
1176
|
+
if "file_paths" in config_data:
|
|
1177
|
+
config_data["file_paths"] = [Path(p) for p in config_data["file_paths"]]
|
|
1178
|
+
config = BatchPoster.Config(**config_data)
|
|
1179
|
+
files_to_process = config_data.get("file_paths", [])
|
|
1180
|
+
return config, files_to_process
|
|
1181
|
+
|
|
1182
|
+
|
|
1183
|
+
def parse_patch_paths(patch_paths):
|
|
1184
|
+
patch_paths_list = None
|
|
1185
|
+
if patch_paths:
|
|
1186
|
+
patch_paths_list = [p.strip() for p in patch_paths.split(",") if p.strip()]
|
|
1187
|
+
return patch_paths_list
|
|
1188
|
+
|
|
1189
|
+
|
|
1190
|
+
def expand_file_paths(file_paths):
|
|
1191
|
+
expanded_paths: List[Path] = []
|
|
1192
|
+
if file_paths:
|
|
1193
|
+
for file_path in file_paths:
|
|
1194
|
+
file_path_str = str(file_path)
|
|
1195
|
+
if any(char in file_path_str for char in ["*", "?", "["]):
|
|
1196
|
+
# It's a glob pattern - expand it
|
|
1197
|
+
expanded = glob_module.glob(file_path_str)
|
|
1198
|
+
expanded_paths.extend([Path(x) for x in expanded])
|
|
1199
|
+
else:
|
|
1200
|
+
# It's a regular path
|
|
1201
|
+
expanded_paths.append(file_path)
|
|
1202
|
+
return expanded_paths
|
|
1203
|
+
|
|
1204
|
+
|
|
1205
|
+
async def run_batch_poster(
|
|
1206
|
+
folio_client: FolioClient,
|
|
1207
|
+
config: "BatchPoster.Config",
|
|
1208
|
+
files_to_process: List[Path],
|
|
1209
|
+
failed_records_file: Path | None,
|
|
1210
|
+
):
|
|
1211
|
+
"""
|
|
1212
|
+
Run the batch poster operation.
|
|
1213
|
+
|
|
1214
|
+
Args:
|
|
1215
|
+
folio_client: Authenticated FOLIO client
|
|
1216
|
+
config: BatchPoster configuration
|
|
1217
|
+
files_to_process: List of file paths to process
|
|
1218
|
+
failed_records_file: Optional path for failed records
|
|
1219
|
+
"""
|
|
1220
|
+
async with folio_client:
|
|
1221
|
+
try:
|
|
1222
|
+
# Create progress reporter
|
|
1223
|
+
reporter = (
|
|
1224
|
+
NoOpProgressReporter()
|
|
1225
|
+
if config.no_progress
|
|
1226
|
+
else RichProgressReporter(show_speed=True, show_time=True)
|
|
1227
|
+
)
|
|
1228
|
+
|
|
1229
|
+
poster = BatchPoster(
|
|
1230
|
+
folio_client, config, failed_records_file=failed_records_file, reporter=reporter
|
|
1231
|
+
)
|
|
1232
|
+
async with poster:
|
|
1233
|
+
await poster.do_work(files_to_process)
|
|
1234
|
+
log_final_stats(poster)
|
|
1235
|
+
|
|
1236
|
+
except Exception as e:
|
|
1237
|
+
logger.critical(f"Batch posting failed: {e}", exc_info=True)
|
|
1238
|
+
raise
|
|
1239
|
+
|
|
1240
|
+
|
|
1241
|
+
def log_final_stats(poster: BatchPoster) -> None:
|
|
1242
|
+
"""
|
|
1243
|
+
Log the final statistics after batch posting.
|
|
1244
|
+
|
|
1245
|
+
Args:
|
|
1246
|
+
poster: The BatchPoster instance containing the stats
|
|
1247
|
+
"""
|
|
1248
|
+
# Log final statistics
|
|
1249
|
+
logger.info("=" * 60)
|
|
1250
|
+
logger.info("Batch posting complete!")
|
|
1251
|
+
logger.info("=" * 60)
|
|
1252
|
+
total_processed = poster.stats.records_processed
|
|
1253
|
+
logger.info("Total records processed: %d", total_processed)
|
|
1254
|
+
logger.info("Records posted successfully: %d", poster.stats.records_posted)
|
|
1255
|
+
logger.info("Records created: %d", poster.stats.records_created)
|
|
1256
|
+
logger.info("Records updated: %d", poster.stats.records_updated)
|
|
1257
|
+
logger.info("Records failed: %d", poster.stats.records_failed)
|
|
1258
|
+
logger.info("Total batches posted: %d", poster.stats.batches_posted)
|
|
1259
|
+
logger.info("Total batches failed: %d", poster.stats.batches_failed)
|
|
1260
|
+
if poster._failed_records_path:
|
|
1261
|
+
logger.info("Failed records written to: %s", poster._failed_records_path)
|
|
1262
|
+
|
|
1263
|
+
|
|
1264
|
+
if __name__ == "__main__":
|
|
1265
|
+
app()
|