folio_data_import 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- folio_data_import/BatchPoster.py +1466 -0
- folio_data_import/MARCDataImport.py +1207 -0
- folio_data_import/UserImport.py +1242 -0
- folio_data_import/__init__.py +141 -0
- folio_data_import/__main__.py +14 -0
- folio_data_import/_progress.py +739 -0
- folio_data_import/custom_exceptions.py +35 -0
- folio_data_import/marc_preprocessors/__init__.py +29 -0
- folio_data_import/marc_preprocessors/_preprocessors.py +530 -0
- folio_data_import-0.5.0.dist-info/METADATA +465 -0
- folio_data_import-0.5.0.dist-info/RECORD +13 -0
- folio_data_import-0.5.0.dist-info/WHEEL +4 -0
- folio_data_import-0.5.0.dist-info/entry_points.txt +6 -0
|
@@ -0,0 +1,1466 @@
|
|
|
1
|
+
"""
|
|
2
|
+
BatchPoster module for FOLIO inventory batch operations.
|
|
3
|
+
|
|
4
|
+
This module provides functionality for batch posting of Instances, Holdings, and Items
|
|
5
|
+
to FOLIO's inventory storage endpoints with support for upsert operations.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
import glob as glob_module
|
|
10
|
+
import json
|
|
11
|
+
import logging
|
|
12
|
+
import sys
|
|
13
|
+
from io import TextIOWrapper
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Annotated, Any, Dict, Generator, List, Literal, Union
|
|
16
|
+
|
|
17
|
+
import cyclopts
|
|
18
|
+
import folioclient
|
|
19
|
+
import httpx
|
|
20
|
+
from folioclient import FolioClient
|
|
21
|
+
from pydantic import BaseModel, Field
|
|
22
|
+
|
|
23
|
+
from folio_data_import import get_folio_connection_parameters, set_up_cli_logging
|
|
24
|
+
from folio_data_import._progress import (
|
|
25
|
+
NoOpProgressReporter,
|
|
26
|
+
ProgressReporter,
|
|
27
|
+
RichProgressReporter,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
logger = logging.getLogger(__name__)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class BatchPosterStats(BaseModel):
|
|
34
|
+
"""Statistics for batch posting operations."""
|
|
35
|
+
|
|
36
|
+
records_processed: int = 0
|
|
37
|
+
records_posted: int = 0
|
|
38
|
+
records_created: int = 0
|
|
39
|
+
records_updated: int = 0
|
|
40
|
+
records_failed: int = 0
|
|
41
|
+
batches_posted: int = 0
|
|
42
|
+
batches_failed: int = 0
|
|
43
|
+
rerun_succeeded: int = 0
|
|
44
|
+
rerun_still_failed: int = 0
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def get_api_info(object_type: str) -> Dict[str, Any]:
|
|
48
|
+
"""
|
|
49
|
+
Get API endpoint information for a given object type.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
object_type: The type of object (Instances, Holdings, Items)
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
Dictionary containing API endpoint information
|
|
56
|
+
|
|
57
|
+
Raises:
|
|
58
|
+
ValueError: If object_type is not supported
|
|
59
|
+
"""
|
|
60
|
+
api_info = {
|
|
61
|
+
"Items": {
|
|
62
|
+
"object_name": "items",
|
|
63
|
+
"api_endpoint": "/item-storage/batch/synchronous",
|
|
64
|
+
"query_endpoint": "/item-storage/items",
|
|
65
|
+
"is_batch": True,
|
|
66
|
+
"supports_upsert": True,
|
|
67
|
+
},
|
|
68
|
+
"Holdings": {
|
|
69
|
+
"object_name": "holdingsRecords",
|
|
70
|
+
"api_endpoint": "/holdings-storage/batch/synchronous",
|
|
71
|
+
"query_endpoint": "/holdings-storage/holdings",
|
|
72
|
+
"is_batch": True,
|
|
73
|
+
"supports_upsert": True,
|
|
74
|
+
},
|
|
75
|
+
"Instances": {
|
|
76
|
+
"object_name": "instances",
|
|
77
|
+
"api_endpoint": "/instance-storage/batch/synchronous",
|
|
78
|
+
"query_endpoint": "/instance-storage/instances",
|
|
79
|
+
"is_batch": True,
|
|
80
|
+
"supports_upsert": True,
|
|
81
|
+
},
|
|
82
|
+
"ShadowInstances": {
|
|
83
|
+
"object_name": "instances",
|
|
84
|
+
"api_endpoint": "/instance-storage/batch/synchronous",
|
|
85
|
+
"query_endpoint": "/instance-storage/instances",
|
|
86
|
+
"is_batch": True,
|
|
87
|
+
"supports_upsert": True,
|
|
88
|
+
},
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
if object_type not in api_info:
|
|
92
|
+
raise ValueError(
|
|
93
|
+
f"Unsupported object type: {object_type}. "
|
|
94
|
+
f"Supported types: {', '.join(api_info.keys())}"
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
return api_info[object_type]
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def deep_update(target: dict, source: dict) -> None:
|
|
101
|
+
"""
|
|
102
|
+
Recursively update target dictionary with values from source dictionary.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
target: The dictionary to update
|
|
106
|
+
source: The dictionary to merge into target
|
|
107
|
+
"""
|
|
108
|
+
for key, value in source.items():
|
|
109
|
+
if isinstance(value, dict) and key in target and isinstance(target[key], dict):
|
|
110
|
+
deep_update(target[key], value)
|
|
111
|
+
else:
|
|
112
|
+
target[key] = value
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def extract_paths(record: dict, paths: List[str]) -> dict:
|
|
116
|
+
"""
|
|
117
|
+
Extract specified paths from a record.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
record: The record to extract from
|
|
121
|
+
paths: List of JSON paths to extract (e.g., ['statisticalCodeIds', 'status'])
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
Dictionary containing only the specified paths
|
|
125
|
+
"""
|
|
126
|
+
result = {}
|
|
127
|
+
for path in paths:
|
|
128
|
+
if path in record:
|
|
129
|
+
result[path] = record[path]
|
|
130
|
+
return result
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class BatchPoster:
|
|
134
|
+
"""
|
|
135
|
+
Handles batch posting of inventory records to FOLIO.
|
|
136
|
+
|
|
137
|
+
This class provides functionality for posting Instances, Holdings, and Items
|
|
138
|
+
to FOLIO's batch inventory endpoints with support for upsert operations.
|
|
139
|
+
"""
|
|
140
|
+
|
|
141
|
+
class Config(BaseModel):
|
|
142
|
+
"""Configuration for BatchPoster operations."""
|
|
143
|
+
|
|
144
|
+
object_type: Annotated[
|
|
145
|
+
Literal["Instances", "Holdings", "Items", "ShadowInstances"],
|
|
146
|
+
Field(
|
|
147
|
+
title="Object type",
|
|
148
|
+
description=(
|
|
149
|
+
"The type of inventory object to post: Instances, Holdings, Items, "
|
|
150
|
+
"or ShadowInstances (for consortium shadow copies)"
|
|
151
|
+
),
|
|
152
|
+
),
|
|
153
|
+
]
|
|
154
|
+
batch_size: Annotated[
|
|
155
|
+
int,
|
|
156
|
+
Field(
|
|
157
|
+
title="Batch size",
|
|
158
|
+
description="Number of records to include in each batch (1-1000)",
|
|
159
|
+
),
|
|
160
|
+
] = 1
|
|
161
|
+
upsert: Annotated[
|
|
162
|
+
bool,
|
|
163
|
+
Field(
|
|
164
|
+
title="Upsert",
|
|
165
|
+
description=(
|
|
166
|
+
"Enable upsert mode to create new records or update existing ones. "
|
|
167
|
+
"When enabled, records with matching IDs will be updated instead "
|
|
168
|
+
"of causing errors."
|
|
169
|
+
),
|
|
170
|
+
),
|
|
171
|
+
] = False
|
|
172
|
+
preserve_statistical_codes: Annotated[
|
|
173
|
+
bool,
|
|
174
|
+
Field(
|
|
175
|
+
title="Preserve statistical codes",
|
|
176
|
+
description=(
|
|
177
|
+
"Preserve existing statistical codes during upsert. "
|
|
178
|
+
"When enabled, statistical codes from existing records will be retained "
|
|
179
|
+
"and merged with new codes."
|
|
180
|
+
),
|
|
181
|
+
),
|
|
182
|
+
] = False
|
|
183
|
+
preserve_administrative_notes: Annotated[
|
|
184
|
+
bool,
|
|
185
|
+
Field(
|
|
186
|
+
title="Preserve administrative notes",
|
|
187
|
+
description=(
|
|
188
|
+
"Preserve existing administrative notes during upsert. "
|
|
189
|
+
"When enabled, administrative notes from existing records will be retained "
|
|
190
|
+
"and merged with new notes."
|
|
191
|
+
),
|
|
192
|
+
),
|
|
193
|
+
] = False
|
|
194
|
+
preserve_temporary_locations: Annotated[
|
|
195
|
+
bool,
|
|
196
|
+
Field(
|
|
197
|
+
title="Preserve temporary locations",
|
|
198
|
+
description=(
|
|
199
|
+
"Preserve temporary location assignments on items during upsert. "
|
|
200
|
+
"Only applicable when object_type is 'Items'."
|
|
201
|
+
),
|
|
202
|
+
),
|
|
203
|
+
] = False
|
|
204
|
+
preserve_temporary_loan_types: Annotated[
|
|
205
|
+
bool,
|
|
206
|
+
Field(
|
|
207
|
+
title="Preserve temporary loan types",
|
|
208
|
+
description=(
|
|
209
|
+
"Preserve temporary loan type assignments on items during upsert. "
|
|
210
|
+
"Only applicable when object_type is 'Items'."
|
|
211
|
+
),
|
|
212
|
+
),
|
|
213
|
+
] = False
|
|
214
|
+
preserve_item_status: Annotated[
|
|
215
|
+
bool,
|
|
216
|
+
Field(
|
|
217
|
+
title="Preserve item status",
|
|
218
|
+
description=(
|
|
219
|
+
"Preserve item status during upsert. When enabled, the status "
|
|
220
|
+
"field from existing records will be retained. Only applicable "
|
|
221
|
+
"when object_type is 'Items'."
|
|
222
|
+
),
|
|
223
|
+
),
|
|
224
|
+
] = True
|
|
225
|
+
patch_existing_records: Annotated[
|
|
226
|
+
bool,
|
|
227
|
+
Field(
|
|
228
|
+
title="Patch existing records",
|
|
229
|
+
description=(
|
|
230
|
+
"Enable selective field patching during upsert. When enabled, only fields "
|
|
231
|
+
"specified in patch_paths will be updated, preserving all other fields."
|
|
232
|
+
),
|
|
233
|
+
),
|
|
234
|
+
] = False
|
|
235
|
+
patch_paths: Annotated[
|
|
236
|
+
List[str] | None,
|
|
237
|
+
Field(
|
|
238
|
+
title="Patch paths",
|
|
239
|
+
description=(
|
|
240
|
+
"List of field paths to patch during upsert "
|
|
241
|
+
"(e.g., ['barcode', 'status']). "
|
|
242
|
+
"If empty and patch_existing_records is True, all fields "
|
|
243
|
+
"will be patched. Use this to selectively update only "
|
|
244
|
+
"specific fields while preserving others."
|
|
245
|
+
),
|
|
246
|
+
),
|
|
247
|
+
] = None
|
|
248
|
+
rerun_failed_records: Annotated[
|
|
249
|
+
bool,
|
|
250
|
+
Field(
|
|
251
|
+
title="Rerun failed records",
|
|
252
|
+
description=(
|
|
253
|
+
"After the main run, reprocess any failed records one at a time. "
|
|
254
|
+
"Requires --failed-records-file to be set."
|
|
255
|
+
),
|
|
256
|
+
),
|
|
257
|
+
] = False
|
|
258
|
+
no_progress: Annotated[
|
|
259
|
+
bool,
|
|
260
|
+
Field(
|
|
261
|
+
title="No progress bar",
|
|
262
|
+
description="Disable the progress bar display (e.g., for CI environments)",
|
|
263
|
+
),
|
|
264
|
+
] = False
|
|
265
|
+
|
|
266
|
+
def __init__(
|
|
267
|
+
self,
|
|
268
|
+
folio_client: FolioClient,
|
|
269
|
+
config: "BatchPoster.Config",
|
|
270
|
+
failed_records_file=None,
|
|
271
|
+
reporter: ProgressReporter | None = None,
|
|
272
|
+
):
|
|
273
|
+
"""
|
|
274
|
+
Initialize BatchPoster.
|
|
275
|
+
|
|
276
|
+
Args:
|
|
277
|
+
folio_client: Authenticated FOLIO client
|
|
278
|
+
config: Configuration for batch posting
|
|
279
|
+
failed_records_file: Optional file handle or path for writing failed records.
|
|
280
|
+
Can be an open file handle (managed by caller) or a string/Path
|
|
281
|
+
(will be opened/closed by BatchPoster).
|
|
282
|
+
reporter: Optional progress reporter. If None, uses NoOpProgressReporter.
|
|
283
|
+
"""
|
|
284
|
+
self.folio_client = folio_client
|
|
285
|
+
self.config = config
|
|
286
|
+
self.reporter = reporter or NoOpProgressReporter()
|
|
287
|
+
self.api_info = get_api_info(config.object_type)
|
|
288
|
+
self.stats = BatchPosterStats()
|
|
289
|
+
|
|
290
|
+
# Handle failed records file
|
|
291
|
+
self._failed_records_file_handle: TextIOWrapper | None = None
|
|
292
|
+
self._failed_records_path: Path | None = None
|
|
293
|
+
self._owns_file_handle = False
|
|
294
|
+
|
|
295
|
+
if failed_records_file:
|
|
296
|
+
if hasattr(failed_records_file, "write"):
|
|
297
|
+
# It's a file handle - use it but don't close it
|
|
298
|
+
self._failed_records_file_handle = failed_records_file
|
|
299
|
+
self._owns_file_handle = False
|
|
300
|
+
else:
|
|
301
|
+
# It's a path - we'll open and manage it
|
|
302
|
+
self._failed_records_path = Path(failed_records_file)
|
|
303
|
+
self._owns_file_handle = True
|
|
304
|
+
|
|
305
|
+
# Validate upsert configuration
|
|
306
|
+
if config.upsert and not self.api_info["supports_upsert"]:
|
|
307
|
+
raise ValueError(f"Upsert is not supported for {config.object_type}")
|
|
308
|
+
|
|
309
|
+
async def __aenter__(self):
|
|
310
|
+
"""Async context manager entry."""
|
|
311
|
+
# Open the file if we own it and it's not already open
|
|
312
|
+
if (
|
|
313
|
+
self._owns_file_handle
|
|
314
|
+
and self._failed_records_path
|
|
315
|
+
and not self._failed_records_file_handle
|
|
316
|
+
):
|
|
317
|
+
self._failed_records_file_handle = open(
|
|
318
|
+
self._failed_records_path, "w", encoding="utf-8"
|
|
319
|
+
)
|
|
320
|
+
logger.info(f"Opened failed records file: {self._failed_records_path}")
|
|
321
|
+
return self
|
|
322
|
+
|
|
323
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
324
|
+
"""Async context manager exit."""
|
|
325
|
+
# Only close the file if we opened it
|
|
326
|
+
if self._owns_file_handle and self._failed_records_file_handle:
|
|
327
|
+
self._failed_records_file_handle.close()
|
|
328
|
+
if self._failed_records_path:
|
|
329
|
+
logger.info(
|
|
330
|
+
f"Wrote {self.stats.records_failed} failed records "
|
|
331
|
+
f"to {self._failed_records_path}"
|
|
332
|
+
)
|
|
333
|
+
self._failed_records_file_handle = None
|
|
334
|
+
|
|
335
|
+
def _write_failed_record(self, record: dict) -> None:
|
|
336
|
+
"""
|
|
337
|
+
Write a single failed record to the file immediately.
|
|
338
|
+
|
|
339
|
+
Args:
|
|
340
|
+
record: The record that failed to post
|
|
341
|
+
"""
|
|
342
|
+
if self._failed_records_file_handle:
|
|
343
|
+
self._failed_records_file_handle.write(json.dumps(record) + "\n")
|
|
344
|
+
self._failed_records_file_handle.flush() # Ensure it's written immediately
|
|
345
|
+
|
|
346
|
+
def _write_failed_batch(self, batch: List[dict]) -> None:
|
|
347
|
+
"""
|
|
348
|
+
Write a batch of failed records to the file immediately.
|
|
349
|
+
|
|
350
|
+
Args:
|
|
351
|
+
batch: List of records that failed to post
|
|
352
|
+
"""
|
|
353
|
+
if self._failed_records_file_handle:
|
|
354
|
+
for record in batch:
|
|
355
|
+
self._failed_records_file_handle.write(json.dumps(record) + "\n")
|
|
356
|
+
self._failed_records_file_handle.flush() # Ensure they're written immediately
|
|
357
|
+
|
|
358
|
+
def handle_upsert_for_statistical_codes(self, updates: dict, keep_existing: dict) -> None:
|
|
359
|
+
"""
|
|
360
|
+
Handle statistical codes during upsert based on configuration.
|
|
361
|
+
|
|
362
|
+
Args:
|
|
363
|
+
updates: Dictionary being prepared for update
|
|
364
|
+
keep_existing: Dictionary of fields to preserve from existing record
|
|
365
|
+
"""
|
|
366
|
+
if not self.config.preserve_statistical_codes:
|
|
367
|
+
updates["statisticalCodeIds"] = []
|
|
368
|
+
keep_existing["statisticalCodeIds"] = []
|
|
369
|
+
else:
|
|
370
|
+
keep_existing["statisticalCodeIds"] = updates.pop("statisticalCodeIds", [])
|
|
371
|
+
updates["statisticalCodeIds"] = []
|
|
372
|
+
|
|
373
|
+
def handle_upsert_for_administrative_notes(self, updates: dict, keep_existing: dict) -> None:
|
|
374
|
+
"""
|
|
375
|
+
Handle administrative notes during upsert based on configuration.
|
|
376
|
+
|
|
377
|
+
Args:
|
|
378
|
+
updates: Dictionary being prepared for update
|
|
379
|
+
keep_existing: Dictionary of fields to preserve from existing record
|
|
380
|
+
"""
|
|
381
|
+
if not self.config.preserve_administrative_notes:
|
|
382
|
+
updates["administrativeNotes"] = []
|
|
383
|
+
keep_existing["administrativeNotes"] = []
|
|
384
|
+
else:
|
|
385
|
+
keep_existing["administrativeNotes"] = updates.pop("administrativeNotes", [])
|
|
386
|
+
updates["administrativeNotes"] = []
|
|
387
|
+
|
|
388
|
+
def handle_upsert_for_temporary_locations(self, updates: dict, keep_existing: dict) -> None:
|
|
389
|
+
"""
|
|
390
|
+
Handle temporary locations during upsert based on configuration.
|
|
391
|
+
|
|
392
|
+
Args:
|
|
393
|
+
updates: Dictionary being prepared for update
|
|
394
|
+
keep_existing: Dictionary of fields to preserve from existing record
|
|
395
|
+
"""
|
|
396
|
+
if self.config.preserve_temporary_locations:
|
|
397
|
+
keep_existing["temporaryLocationId"] = updates.pop("temporaryLocationId", None)
|
|
398
|
+
|
|
399
|
+
def handle_upsert_for_temporary_loan_types(self, updates: dict, keep_existing: dict) -> None:
|
|
400
|
+
"""
|
|
401
|
+
Handle temporary loan types during upsert based on configuration.
|
|
402
|
+
|
|
403
|
+
Args:
|
|
404
|
+
updates: Dictionary being prepared for update
|
|
405
|
+
keep_existing: Dictionary of fields to preserve from existing record
|
|
406
|
+
"""
|
|
407
|
+
if self.config.preserve_temporary_loan_types:
|
|
408
|
+
keep_existing["temporaryLoanTypeId"] = updates.pop("temporaryLoanTypeId", None)
|
|
409
|
+
|
|
410
|
+
def keep_existing_fields(self, updates: dict, existing_record: dict) -> None:
|
|
411
|
+
"""
|
|
412
|
+
Preserve specific fields from existing record during upsert.
|
|
413
|
+
|
|
414
|
+
Always preserves ``hrid`` (human-readable ID) and ``lastCheckIn`` (circulation data)
|
|
415
|
+
from existing records to prevent data loss. Optionally preserves ``status``
|
|
416
|
+
based on configuration.
|
|
417
|
+
|
|
418
|
+
Args:
|
|
419
|
+
updates: Dictionary being prepared for update
|
|
420
|
+
existing_record: The existing record in FOLIO
|
|
421
|
+
"""
|
|
422
|
+
# Always preserve these fields - they should never be overwritten
|
|
423
|
+
always_preserve = ["hrid", "lastCheckIn"]
|
|
424
|
+
for key in always_preserve:
|
|
425
|
+
if key in existing_record:
|
|
426
|
+
updates[key] = existing_record[key]
|
|
427
|
+
|
|
428
|
+
# Conditionally preserve item status
|
|
429
|
+
if self.config.preserve_item_status and "status" in existing_record:
|
|
430
|
+
updates["status"] = existing_record["status"]
|
|
431
|
+
|
|
432
|
+
def patch_record(
|
|
433
|
+
self, new_record: dict, existing_record: dict, patch_paths: List[str]
|
|
434
|
+
) -> None:
|
|
435
|
+
"""
|
|
436
|
+
Update new_record with values from existing_record according to patch_paths.
|
|
437
|
+
|
|
438
|
+
Args:
|
|
439
|
+
new_record: The new record to be updated
|
|
440
|
+
existing_record: The existing record to patch from
|
|
441
|
+
patch_paths: List of fields in JSON Path notation to patch during upsert
|
|
442
|
+
"""
|
|
443
|
+
updates = {}
|
|
444
|
+
updates.update(existing_record)
|
|
445
|
+
keep_existing: Dict[str, Any] = {}
|
|
446
|
+
|
|
447
|
+
# Handle special field preservation rules
|
|
448
|
+
self.handle_upsert_for_administrative_notes(updates, keep_existing)
|
|
449
|
+
self.handle_upsert_for_statistical_codes(updates, keep_existing)
|
|
450
|
+
|
|
451
|
+
if self.config.object_type == "Items":
|
|
452
|
+
self.handle_upsert_for_temporary_locations(updates, keep_existing)
|
|
453
|
+
self.handle_upsert_for_temporary_loan_types(updates, keep_existing)
|
|
454
|
+
|
|
455
|
+
# Determine which fields to keep from new record
|
|
456
|
+
if not patch_paths:
|
|
457
|
+
keep_new = new_record
|
|
458
|
+
else:
|
|
459
|
+
keep_new = extract_paths(new_record, patch_paths)
|
|
460
|
+
|
|
461
|
+
# Special handling for instance status
|
|
462
|
+
if "instanceStatusId" in new_record:
|
|
463
|
+
updates["instanceStatusId"] = new_record["instanceStatusId"]
|
|
464
|
+
|
|
465
|
+
# Merge the updates
|
|
466
|
+
deep_update(updates, keep_new)
|
|
467
|
+
|
|
468
|
+
# Merge arrays from keep_existing, avoiding duplicates
|
|
469
|
+
for key, value in keep_existing.items():
|
|
470
|
+
if isinstance(value, list) and key in keep_new:
|
|
471
|
+
# Combine arrays and remove duplicates
|
|
472
|
+
updates[key] = list(dict.fromkeys(updates.get(key, []) + value))
|
|
473
|
+
elif key not in keep_new:
|
|
474
|
+
updates[key] = value
|
|
475
|
+
|
|
476
|
+
# Apply item-specific preservation
|
|
477
|
+
if self.config.object_type == "Items":
|
|
478
|
+
self.keep_existing_fields(updates, existing_record)
|
|
479
|
+
|
|
480
|
+
# Update the new_record in place
|
|
481
|
+
new_record.clear()
|
|
482
|
+
new_record.update(updates)
|
|
483
|
+
|
|
484
|
+
def prepare_record_for_upsert(self, new_record: dict, existing_record: dict) -> None:
|
|
485
|
+
"""
|
|
486
|
+
Prepare a record for upsert by adding version and patching fields.
|
|
487
|
+
|
|
488
|
+
For MARC-sourced Instance records, only suppression flags, deleted status,
|
|
489
|
+
statistical codes, administrative notes, and instance status are allowed
|
|
490
|
+
to be patched. This protects MARC-managed fields from being overwritten.
|
|
491
|
+
|
|
492
|
+
Args:
|
|
493
|
+
new_record: The new record to prepare
|
|
494
|
+
existing_record: The existing record in FOLIO
|
|
495
|
+
"""
|
|
496
|
+
# Set the version for optimistic locking
|
|
497
|
+
new_record["_version"] = existing_record.get("_version", 1)
|
|
498
|
+
|
|
499
|
+
# Check if this is a MARC-sourced record (Instances only)
|
|
500
|
+
is_marc_record = (
|
|
501
|
+
self.config.object_type == "Instances"
|
|
502
|
+
and "source" in existing_record
|
|
503
|
+
and "MARC" in existing_record.get("source", "")
|
|
504
|
+
)
|
|
505
|
+
|
|
506
|
+
if is_marc_record:
|
|
507
|
+
# For MARC records, only allow patching specific fields
|
|
508
|
+
# Filter patch_paths to only include allowed fields
|
|
509
|
+
allowed_marc_fields = {"discoverySuppress", "staffSuppress", "deleted"}
|
|
510
|
+
user_patch_paths = set(self.config.patch_paths or [])
|
|
511
|
+
|
|
512
|
+
# Only keep suppression/deleted fields from user's patch_paths
|
|
513
|
+
restricted_paths = [
|
|
514
|
+
path
|
|
515
|
+
for path in user_patch_paths
|
|
516
|
+
if any(allowed.lower() == path.lower() for allowed in allowed_marc_fields)
|
|
517
|
+
]
|
|
518
|
+
|
|
519
|
+
# Always allow these fields for MARC records
|
|
520
|
+
restricted_paths.extend(
|
|
521
|
+
["statisticalCodeIds", "administrativeNotes", "instanceStatusId"]
|
|
522
|
+
)
|
|
523
|
+
|
|
524
|
+
if self.config.patch_existing_records and user_patch_paths:
|
|
525
|
+
logger.debug(
|
|
526
|
+
"Record %s is MARC-sourced, restricting patch to: %s",
|
|
527
|
+
existing_record.get("id", "unknown"),
|
|
528
|
+
restricted_paths,
|
|
529
|
+
)
|
|
530
|
+
|
|
531
|
+
self.patch_record(new_record, existing_record, restricted_paths)
|
|
532
|
+
|
|
533
|
+
elif self.config.patch_existing_records:
|
|
534
|
+
# Apply patching with user-specified paths
|
|
535
|
+
self.patch_record(new_record, existing_record, self.config.patch_paths or [])
|
|
536
|
+
|
|
537
|
+
async def fetch_existing_records(self, record_ids: List[str]) -> Dict[str, dict]:
|
|
538
|
+
"""
|
|
539
|
+
Fetch existing records from FOLIO by their IDs.
|
|
540
|
+
|
|
541
|
+
Args:
|
|
542
|
+
record_ids: List of record IDs to fetch
|
|
543
|
+
|
|
544
|
+
Returns:
|
|
545
|
+
Dictionary mapping record IDs to their full records
|
|
546
|
+
"""
|
|
547
|
+
existing_records: Dict[str, dict] = {}
|
|
548
|
+
query_endpoint = self.api_info["query_endpoint"]
|
|
549
|
+
object_name = self.api_info["object_name"]
|
|
550
|
+
|
|
551
|
+
# Fetch in batches of 90 (FOLIO CQL limit for OR queries)
|
|
552
|
+
fetch_batch_size = 90
|
|
553
|
+
|
|
554
|
+
async def fetch_batch(batch_ids: List[str]) -> dict:
|
|
555
|
+
query = f"id==({' OR '.join(batch_ids)})"
|
|
556
|
+
params = {"query": query, "limit": fetch_batch_size}
|
|
557
|
+
try:
|
|
558
|
+
return await self.folio_client.folio_get_async(
|
|
559
|
+
query_endpoint, key=object_name, query_params=params
|
|
560
|
+
)
|
|
561
|
+
except folioclient.FolioClientError as e:
|
|
562
|
+
logger.error(f"FOLIO client error fetching existing records: {e}")
|
|
563
|
+
raise
|
|
564
|
+
except folioclient.FolioConnectionError as e:
|
|
565
|
+
logger.error(f"FOLIO connection error fetching existing records: {e}")
|
|
566
|
+
raise
|
|
567
|
+
except Exception as e:
|
|
568
|
+
logger.error(f"Failed to fetch existing records: {e}")
|
|
569
|
+
raise
|
|
570
|
+
|
|
571
|
+
# Create tasks for all batches
|
|
572
|
+
tasks = []
|
|
573
|
+
for i in range(0, len(record_ids), fetch_batch_size):
|
|
574
|
+
batch_slice = record_ids[i : i + fetch_batch_size]
|
|
575
|
+
tasks.append(fetch_batch(batch_slice))
|
|
576
|
+
|
|
577
|
+
# Fetch all batches concurrently
|
|
578
|
+
results = await asyncio.gather(*tasks)
|
|
579
|
+
|
|
580
|
+
# Process results
|
|
581
|
+
for result in results:
|
|
582
|
+
if isinstance(result, list):
|
|
583
|
+
for record in result:
|
|
584
|
+
existing_records[record["id"]] = record
|
|
585
|
+
|
|
586
|
+
return existing_records
|
|
587
|
+
|
|
588
|
+
@staticmethod
|
|
589
|
+
def set_consortium_source(record: dict) -> None:
|
|
590
|
+
"""
|
|
591
|
+
Convert source field for consortium shadow instances.
|
|
592
|
+
|
|
593
|
+
For shadow instances in ECS/consortium environments, the source field
|
|
594
|
+
must be prefixed with "CONSORTIUM-" to distinguish them from local records.
|
|
595
|
+
|
|
596
|
+
Args:
|
|
597
|
+
record: The record to modify (modified in place)
|
|
598
|
+
"""
|
|
599
|
+
source = record.get("source", "")
|
|
600
|
+
if source == "MARC":
|
|
601
|
+
record["source"] = "CONSORTIUM-MARC"
|
|
602
|
+
elif source == "FOLIO":
|
|
603
|
+
record["source"] = "CONSORTIUM-FOLIO"
|
|
604
|
+
|
|
605
|
+
async def set_versions_for_upsert(self, batch: List[dict]) -> None:
|
|
606
|
+
"""
|
|
607
|
+
Fetch existing record versions and prepare batch for upsert.
|
|
608
|
+
|
|
609
|
+
Only records that already exist in FOLIO will have their _version set
|
|
610
|
+
and be prepared for update. New records will not have _version set.
|
|
611
|
+
|
|
612
|
+
Args:
|
|
613
|
+
batch: List of records to prepare for upsert
|
|
614
|
+
"""
|
|
615
|
+
# Extract record IDs
|
|
616
|
+
record_ids = [record["id"] for record in batch if "id" in record]
|
|
617
|
+
|
|
618
|
+
if not record_ids:
|
|
619
|
+
return
|
|
620
|
+
|
|
621
|
+
# Fetch existing records
|
|
622
|
+
existing_records = await self.fetch_existing_records(record_ids)
|
|
623
|
+
|
|
624
|
+
# Only prepare records that already exist
|
|
625
|
+
for record in batch:
|
|
626
|
+
if "id" in record and record["id"] in existing_records:
|
|
627
|
+
self.prepare_record_for_upsert(record, existing_records[record["id"]])
|
|
628
|
+
|
|
629
|
+
async def post_batch(self, batch: List[dict]) -> tuple[httpx.Response, int, int]:
|
|
630
|
+
"""
|
|
631
|
+
Post a batch of records to FOLIO.
|
|
632
|
+
|
|
633
|
+
Args:
|
|
634
|
+
batch: List of records to post
|
|
635
|
+
|
|
636
|
+
Returns:
|
|
637
|
+
Tuple of (response data dict, number of creates, number of updates)
|
|
638
|
+
|
|
639
|
+
Raises:
|
|
640
|
+
folioclient.FolioClientError: If FOLIO API returns an error
|
|
641
|
+
folioclient.FolioConnectionError: If connection to FOLIO fails
|
|
642
|
+
"""
|
|
643
|
+
# Track creates vs updates before posting
|
|
644
|
+
num_creates = 0
|
|
645
|
+
num_updates = 0
|
|
646
|
+
|
|
647
|
+
# For ShadowInstances, convert source to consortium format
|
|
648
|
+
if self.config.object_type == "ShadowInstances":
|
|
649
|
+
for record in batch:
|
|
650
|
+
self.set_consortium_source(record)
|
|
651
|
+
|
|
652
|
+
# If upsert mode, set versions and track which are updates
|
|
653
|
+
if self.config.upsert:
|
|
654
|
+
await self.set_versions_for_upsert(batch)
|
|
655
|
+
# Count records with _version as updates, others as creates
|
|
656
|
+
for record in batch:
|
|
657
|
+
if "_version" in record:
|
|
658
|
+
num_updates += 1
|
|
659
|
+
else:
|
|
660
|
+
num_creates += 1
|
|
661
|
+
else:
|
|
662
|
+
# In create-only mode, all are creates
|
|
663
|
+
num_creates = len(batch)
|
|
664
|
+
|
|
665
|
+
# Prepare payload
|
|
666
|
+
object_name = self.api_info["object_name"]
|
|
667
|
+
payload = {object_name: batch}
|
|
668
|
+
|
|
669
|
+
# Prepare query parameters
|
|
670
|
+
query_params = {}
|
|
671
|
+
if self.config.upsert:
|
|
672
|
+
query_params["upsert"] = "true"
|
|
673
|
+
|
|
674
|
+
# Make the request
|
|
675
|
+
api_endpoint = self.api_info["api_endpoint"]
|
|
676
|
+
|
|
677
|
+
response_data = await self.folio_client.async_httpx_client.post(
|
|
678
|
+
api_endpoint, json=payload, params=query_params
|
|
679
|
+
)
|
|
680
|
+
response_data.raise_for_status()
|
|
681
|
+
logger.info(
|
|
682
|
+
(
|
|
683
|
+
"Posting successful! Total rows: %s Total failed: %s "
|
|
684
|
+
"in %ss "
|
|
685
|
+
"Batch Size: %s Request size: %s "
|
|
686
|
+
),
|
|
687
|
+
self.stats.records_processed,
|
|
688
|
+
self.stats.records_failed,
|
|
689
|
+
response_data.elapsed.total_seconds(),
|
|
690
|
+
len(batch),
|
|
691
|
+
get_req_size(response_data),
|
|
692
|
+
)
|
|
693
|
+
self.stats.records_posted += len(batch)
|
|
694
|
+
self.stats.batches_posted += 1
|
|
695
|
+
|
|
696
|
+
return response_data, num_creates, num_updates
|
|
697
|
+
|
|
698
|
+
async def post_records(self, records) -> None:
|
|
699
|
+
"""
|
|
700
|
+
Post records in batches.
|
|
701
|
+
|
|
702
|
+
Failed records will be written to the file handle provided during initialization.
|
|
703
|
+
|
|
704
|
+
Args:
|
|
705
|
+
records: Records to post. Can be:
|
|
706
|
+
- List of dict records
|
|
707
|
+
- File-like object containing JSON lines (one record per line)
|
|
708
|
+
- String/Path to a file containing JSON lines
|
|
709
|
+
"""
|
|
710
|
+
# Normalize input to an iterator
|
|
711
|
+
if isinstance(records, (str, Path)):
|
|
712
|
+
# It's a file path
|
|
713
|
+
record_iterator = self._read_records_from_path(records)
|
|
714
|
+
elif hasattr(records, "read"):
|
|
715
|
+
# It's a file-like object
|
|
716
|
+
record_iterator = self._read_records_from_file_handle(records)
|
|
717
|
+
elif isinstance(records, list):
|
|
718
|
+
# It's already a list - wrap in a generator
|
|
719
|
+
record_iterator = iter(records)
|
|
720
|
+
else:
|
|
721
|
+
raise TypeError(
|
|
722
|
+
f"records must be a list, file path, or file-like object, got {type(records)}"
|
|
723
|
+
)
|
|
724
|
+
|
|
725
|
+
# Process records in batches
|
|
726
|
+
batch = []
|
|
727
|
+
for record in record_iterator:
|
|
728
|
+
batch.append(record)
|
|
729
|
+
|
|
730
|
+
# Post when batch is full
|
|
731
|
+
if len(batch) >= self.config.batch_size:
|
|
732
|
+
await self._post_single_batch(batch)
|
|
733
|
+
batch = []
|
|
734
|
+
|
|
735
|
+
# Post any remaining records
|
|
736
|
+
if batch:
|
|
737
|
+
await self._post_single_batch(batch)
|
|
738
|
+
|
|
739
|
+
def _read_records_from_path(self, file_path: Union[str, Path]) -> Generator[dict, None, None]:
|
|
740
|
+
"""
|
|
741
|
+
Generator that yields records from a file path.
|
|
742
|
+
|
|
743
|
+
Args:
|
|
744
|
+
file_path: Path to file containing JSON lines
|
|
745
|
+
|
|
746
|
+
Yields:
|
|
747
|
+
Parsed record dictionaries
|
|
748
|
+
"""
|
|
749
|
+
file_path = Path(file_path)
|
|
750
|
+
if not file_path.exists():
|
|
751
|
+
raise FileNotFoundError(f"Input file not found: {file_path}")
|
|
752
|
+
|
|
753
|
+
logger.info(f"Reading records from {file_path}")
|
|
754
|
+
|
|
755
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
756
|
+
yield from self._read_records_from_file_handle(f)
|
|
757
|
+
|
|
758
|
+
def _read_records_from_file_handle(self, file_handle) -> Generator[dict, None, None]:
|
|
759
|
+
"""
|
|
760
|
+
Generator that yields records from a file handle.
|
|
761
|
+
|
|
762
|
+
If a line cannot be parsed as JSON, writes the problematic line and all
|
|
763
|
+
remaining lines to the failed records file (if configured) before raising
|
|
764
|
+
an exception.
|
|
765
|
+
|
|
766
|
+
Args:
|
|
767
|
+
file_handle: File-like object containing JSON lines
|
|
768
|
+
|
|
769
|
+
Yields:
|
|
770
|
+
Parsed record dictionaries
|
|
771
|
+
|
|
772
|
+
Raises:
|
|
773
|
+
ValueError: If a line cannot be parsed as JSON
|
|
774
|
+
"""
|
|
775
|
+
for line_number, original_line in enumerate(file_handle, start=1):
|
|
776
|
+
line = original_line.strip()
|
|
777
|
+
if not line:
|
|
778
|
+
continue
|
|
779
|
+
|
|
780
|
+
try:
|
|
781
|
+
record = self._parse_json_line(line, line_number)
|
|
782
|
+
yield record
|
|
783
|
+
except ValueError:
|
|
784
|
+
# Write the failed line to failed records file
|
|
785
|
+
if self._failed_records_file_handle:
|
|
786
|
+
self._failed_records_file_handle.write(original_line)
|
|
787
|
+
# Write all remaining lines as-is
|
|
788
|
+
for remaining_line in file_handle:
|
|
789
|
+
self._failed_records_file_handle.write(remaining_line)
|
|
790
|
+
|
|
791
|
+
self._failed_records_file_handle.flush()
|
|
792
|
+
|
|
793
|
+
# Re-raise the exception
|
|
794
|
+
raise
|
|
795
|
+
|
|
796
|
+
async def _post_single_batch(self, batch: List[dict]) -> None:
|
|
797
|
+
"""
|
|
798
|
+
Post a single batch with error handling.
|
|
799
|
+
|
|
800
|
+
Args:
|
|
801
|
+
batch: List of records to post
|
|
802
|
+
"""
|
|
803
|
+
self.stats.records_processed += len(batch)
|
|
804
|
+
|
|
805
|
+
try:
|
|
806
|
+
_, num_creates, num_updates = await self.post_batch(batch)
|
|
807
|
+
|
|
808
|
+
# Success - update stats
|
|
809
|
+
self.stats.records_created += num_creates
|
|
810
|
+
self.stats.records_updated += num_updates
|
|
811
|
+
# Update progress bar if available
|
|
812
|
+
if hasattr(self, "reporter") and hasattr(self, "task_id"):
|
|
813
|
+
self.reporter.update_task(
|
|
814
|
+
self.task_id,
|
|
815
|
+
advance=len(batch),
|
|
816
|
+
posted=self.stats.records_posted,
|
|
817
|
+
created=self.stats.records_created,
|
|
818
|
+
updated=self.stats.records_updated,
|
|
819
|
+
failed=self.stats.records_failed,
|
|
820
|
+
)
|
|
821
|
+
|
|
822
|
+
except folioclient.FolioClientError as e:
|
|
823
|
+
logger.error(f"Batch failed: {e} - {e.response.text}")
|
|
824
|
+
self.stats.records_failed += len(batch)
|
|
825
|
+
self._write_failed_batch(batch)
|
|
826
|
+
|
|
827
|
+
# Update progress bar if available
|
|
828
|
+
if hasattr(self, "reporter") and hasattr(self, "task_id"):
|
|
829
|
+
self.reporter.update_task(
|
|
830
|
+
self.task_id,
|
|
831
|
+
advance=len(batch),
|
|
832
|
+
posted=self.stats.records_posted,
|
|
833
|
+
created=self.stats.records_created,
|
|
834
|
+
updated=self.stats.records_updated,
|
|
835
|
+
failed=self.stats.records_failed,
|
|
836
|
+
)
|
|
837
|
+
|
|
838
|
+
except folioclient.FolioConnectionError as e:
|
|
839
|
+
logger.error(f"Batch failed due to connection error: {e}")
|
|
840
|
+
self.stats.records_failed += len(batch)
|
|
841
|
+
self._write_failed_batch(batch)
|
|
842
|
+
|
|
843
|
+
# Update progress bar if available
|
|
844
|
+
if hasattr(self, "reporter") and hasattr(self, "task_id"):
|
|
845
|
+
self.reporter.update_task(
|
|
846
|
+
self.task_id,
|
|
847
|
+
advance=len(batch),
|
|
848
|
+
posted=self.stats.records_posted,
|
|
849
|
+
created=self.stats.records_created,
|
|
850
|
+
updated=self.stats.records_updated,
|
|
851
|
+
failed=self.stats.records_failed,
|
|
852
|
+
)
|
|
853
|
+
|
|
854
|
+
except Exception as e:
|
|
855
|
+
logger.error(f"Unexpected error during batch post: {e}")
|
|
856
|
+
if hasattr(e, "request"):
|
|
857
|
+
logger.debug(f"DEBUG: {e.request}, {e.request.content}")
|
|
858
|
+
self.stats.records_failed += len(batch)
|
|
859
|
+
self._write_failed_batch(batch)
|
|
860
|
+
|
|
861
|
+
# Update progress bar if available
|
|
862
|
+
if hasattr(self, "reporter") and hasattr(self, "task_id"):
|
|
863
|
+
self.reporter.update_task(
|
|
864
|
+
self.task_id,
|
|
865
|
+
advance=len(batch),
|
|
866
|
+
posted=self.stats.records_posted,
|
|
867
|
+
created=self.stats.records_created,
|
|
868
|
+
updated=self.stats.records_updated,
|
|
869
|
+
failed=self.stats.records_failed,
|
|
870
|
+
)
|
|
871
|
+
|
|
872
|
+
def _parse_json_line(self, line: str, line_number: int) -> dict:
|
|
873
|
+
"""
|
|
874
|
+
Parse a JSON line, handling both plain and tab-delimited formats.
|
|
875
|
+
|
|
876
|
+
Args:
|
|
877
|
+
line: Line to parse
|
|
878
|
+
line_number: Line number for error reporting
|
|
879
|
+
|
|
880
|
+
Returns:
|
|
881
|
+
Parsed record dictionary
|
|
882
|
+
|
|
883
|
+
Raises:
|
|
884
|
+
ValueError: If the line cannot be parsed as JSON
|
|
885
|
+
"""
|
|
886
|
+
try:
|
|
887
|
+
# Handle both plain JSON and tab-delimited format
|
|
888
|
+
# (tab-delimited: last field is the JSON)
|
|
889
|
+
json_str = line.split("\t")[-1] if "\t" in line else line
|
|
890
|
+
return json.loads(json_str)
|
|
891
|
+
except json.JSONDecodeError as e:
|
|
892
|
+
raise ValueError(
|
|
893
|
+
f"Invalid JSON at line {line_number}: {e}. Line content: {line[:100]}"
|
|
894
|
+
) from e
|
|
895
|
+
except Exception as e:
|
|
896
|
+
raise ValueError(f"Error processing line {line_number}: {e}") from e
|
|
897
|
+
|
|
898
|
+
async def do_work(
|
|
899
|
+
self,
|
|
900
|
+
file_paths: Union[str, Path, List[Union[str, Path]]],
|
|
901
|
+
) -> BatchPosterStats:
|
|
902
|
+
"""
|
|
903
|
+
Main orchestration method for processing files.
|
|
904
|
+
|
|
905
|
+
This is the primary entry point for batch posting from files. It handles:
|
|
906
|
+
|
|
907
|
+
- Single or multiple file processing
|
|
908
|
+
- Progress tracking and logging
|
|
909
|
+
- Failed record collection
|
|
910
|
+
- Statistics reporting
|
|
911
|
+
|
|
912
|
+
Mimics the folio_migration_tools BatchPoster.do_work() workflow.
|
|
913
|
+
|
|
914
|
+
Note:
|
|
915
|
+
To write failed records, pass a file handle or path to the
|
|
916
|
+
BatchPoster constructor's ``failed_records_file`` parameter.
|
|
917
|
+
|
|
918
|
+
Args:
|
|
919
|
+
file_paths: Path(s) to JSONL file(s) to process
|
|
920
|
+
|
|
921
|
+
Returns:
|
|
922
|
+
Final statistics from the posting operation
|
|
923
|
+
|
|
924
|
+
Example::
|
|
925
|
+
|
|
926
|
+
config = BatchPosterConfig(
|
|
927
|
+
object_type="Items",
|
|
928
|
+
batch_size=100,
|
|
929
|
+
upsert=True
|
|
930
|
+
)
|
|
931
|
+
|
|
932
|
+
reporter = RichProgressReporter(enabled=True)
|
|
933
|
+
|
|
934
|
+
# With failed records file
|
|
935
|
+
with open("failed_items.jsonl", "w") as failed_file:
|
|
936
|
+
poster = BatchPoster(
|
|
937
|
+
folio_client, config,
|
|
938
|
+
failed_records_file=failed_file,
|
|
939
|
+
reporter=reporter
|
|
940
|
+
)
|
|
941
|
+
async with poster:
|
|
942
|
+
stats = await poster.do_work(["items1.jsonl", "items2.jsonl"])
|
|
943
|
+
|
|
944
|
+
# Or let BatchPoster manage the file
|
|
945
|
+
poster = BatchPoster(
|
|
946
|
+
folio_client, config,
|
|
947
|
+
failed_records_file="failed_items.jsonl",
|
|
948
|
+
reporter=reporter
|
|
949
|
+
)
|
|
950
|
+
async with poster:
|
|
951
|
+
stats = await poster.do_work("items.jsonl")
|
|
952
|
+
|
|
953
|
+
print(f"Posted: {stats.records_posted}, Failed: {stats.records_failed}")
|
|
954
|
+
|
|
955
|
+
"""
|
|
956
|
+
# Reset statistics
|
|
957
|
+
self.stats = BatchPosterStats()
|
|
958
|
+
|
|
959
|
+
# Normalize file_paths to list
|
|
960
|
+
if isinstance(file_paths, (str, Path)):
|
|
961
|
+
files_to_process = [Path(file_paths)]
|
|
962
|
+
else:
|
|
963
|
+
files_to_process = [Path(p) for p in file_paths]
|
|
964
|
+
|
|
965
|
+
# Log start
|
|
966
|
+
logger.info(
|
|
967
|
+
"Starting batch posting of %d file(s) with batch_size=%d",
|
|
968
|
+
len(files_to_process),
|
|
969
|
+
self.config.batch_size,
|
|
970
|
+
)
|
|
971
|
+
logger.info("Object type: %s", self.config.object_type)
|
|
972
|
+
logger.info("Upsert mode: %s", "On" if self.config.upsert else "Off")
|
|
973
|
+
if self.config.upsert:
|
|
974
|
+
logger.info(
|
|
975
|
+
"Preservation settings: statistical_codes=%s, administrative_notes=%s, "
|
|
976
|
+
"temporary_locations=%s, temporary_loan_types=%s",
|
|
977
|
+
self.config.preserve_statistical_codes,
|
|
978
|
+
self.config.preserve_administrative_notes,
|
|
979
|
+
self.config.preserve_temporary_locations,
|
|
980
|
+
self.config.preserve_temporary_loan_types,
|
|
981
|
+
)
|
|
982
|
+
|
|
983
|
+
# Count total lines across all files for progress bar
|
|
984
|
+
total_lines = 0
|
|
985
|
+
for file_path in files_to_process:
|
|
986
|
+
with open(file_path, "rb") as f:
|
|
987
|
+
total_lines += sum(
|
|
988
|
+
buf.count(b"\n") for buf in iter(lambda: f.read(1024 * 1024), b"")
|
|
989
|
+
)
|
|
990
|
+
|
|
991
|
+
# Set up progress reporting
|
|
992
|
+
with self.reporter:
|
|
993
|
+
self.task_id = self.reporter.start_task(
|
|
994
|
+
f"posting_{self.config.object_type}",
|
|
995
|
+
total=total_lines,
|
|
996
|
+
description=f"Posting {self.config.object_type}",
|
|
997
|
+
)
|
|
998
|
+
|
|
999
|
+
# Process each file
|
|
1000
|
+
for idx, file_path in enumerate(files_to_process, start=1):
|
|
1001
|
+
logger.info(
|
|
1002
|
+
"Processing file %d of %d: %s",
|
|
1003
|
+
idx,
|
|
1004
|
+
len(files_to_process),
|
|
1005
|
+
file_path.name,
|
|
1006
|
+
)
|
|
1007
|
+
|
|
1008
|
+
try:
|
|
1009
|
+
await self.post_records(file_path)
|
|
1010
|
+
except Exception as e:
|
|
1011
|
+
logger.error("Error processing file %s: %s", file_path, e, exc_info=True)
|
|
1012
|
+
raise
|
|
1013
|
+
|
|
1014
|
+
return self.stats
|
|
1015
|
+
|
|
1016
|
+
async def rerun_failed_records_one_by_one(self) -> None:
|
|
1017
|
+
"""
|
|
1018
|
+
Reprocess failed records one at a time.
|
|
1019
|
+
|
|
1020
|
+
Streams through the failed records file, processing each record
|
|
1021
|
+
individually. Records that still fail are written to a new file
|
|
1022
|
+
with '_rerun' suffix. This gives each record a second chance
|
|
1023
|
+
with individual error handling.
|
|
1024
|
+
"""
|
|
1025
|
+
if not self._failed_records_path or not self._failed_records_path.exists():
|
|
1026
|
+
logger.warning("No failed records file to rerun")
|
|
1027
|
+
return
|
|
1028
|
+
|
|
1029
|
+
# Close the file handle if we own it
|
|
1030
|
+
if self._owns_file_handle and self._failed_records_file_handle:
|
|
1031
|
+
self._failed_records_file_handle.close()
|
|
1032
|
+
self._failed_records_file_handle = None
|
|
1033
|
+
|
|
1034
|
+
# Count records first for logging
|
|
1035
|
+
record_count = self._count_lines_in_file(self._failed_records_path)
|
|
1036
|
+
if record_count == 0:
|
|
1037
|
+
logger.info("No failed records to rerun")
|
|
1038
|
+
return
|
|
1039
|
+
|
|
1040
|
+
# Create new file for rerun failures with _rerun suffix
|
|
1041
|
+
rerun_failed_path = self._failed_records_path.with_stem(
|
|
1042
|
+
f"{self._failed_records_path.stem}_rerun"
|
|
1043
|
+
)
|
|
1044
|
+
|
|
1045
|
+
logger.info("=" * 60)
|
|
1046
|
+
logger.info("Rerunning %d failed records one at a time...", record_count)
|
|
1047
|
+
logger.info("=" * 60)
|
|
1048
|
+
|
|
1049
|
+
# Stream through failed records and process one at a time
|
|
1050
|
+
rerun_success = 0
|
|
1051
|
+
rerun_failed = 0
|
|
1052
|
+
|
|
1053
|
+
# Wrap in reporter context for progress display
|
|
1054
|
+
with self.reporter:
|
|
1055
|
+
# Start a new progress task for the rerun
|
|
1056
|
+
rerun_task_id = self.reporter.start_task(
|
|
1057
|
+
f"rerun_{self.config.object_type}",
|
|
1058
|
+
total=record_count,
|
|
1059
|
+
description=f"Rerunning failed {self.config.object_type}",
|
|
1060
|
+
)
|
|
1061
|
+
|
|
1062
|
+
with (
|
|
1063
|
+
open(self._failed_records_path, "r", encoding="utf-8") as infile,
|
|
1064
|
+
open(rerun_failed_path, "w", encoding="utf-8") as outfile,
|
|
1065
|
+
):
|
|
1066
|
+
for line in infile:
|
|
1067
|
+
line = line.strip()
|
|
1068
|
+
if not line:
|
|
1069
|
+
continue
|
|
1070
|
+
|
|
1071
|
+
try:
|
|
1072
|
+
record = json.loads(line)
|
|
1073
|
+
except json.JSONDecodeError:
|
|
1074
|
+
logger.warning("Could not parse failed record line: %s", line[:100])
|
|
1075
|
+
outfile.write(line + "\n")
|
|
1076
|
+
rerun_failed += 1
|
|
1077
|
+
self.reporter.update_task(
|
|
1078
|
+
rerun_task_id,
|
|
1079
|
+
advance=1,
|
|
1080
|
+
succeeded=rerun_success,
|
|
1081
|
+
failed=rerun_failed,
|
|
1082
|
+
)
|
|
1083
|
+
continue
|
|
1084
|
+
|
|
1085
|
+
record_id = record.get("id", "unknown")
|
|
1086
|
+
try:
|
|
1087
|
+
await self.post_batch([record])
|
|
1088
|
+
rerun_success += 1
|
|
1089
|
+
logger.debug("Rerun success for record %s", record_id)
|
|
1090
|
+
except Exception as e:
|
|
1091
|
+
outfile.write(json.dumps(record) + "\n")
|
|
1092
|
+
rerun_failed += 1
|
|
1093
|
+
|
|
1094
|
+
logger.debug("Rerun failed for record %s: %s", record_id, e)
|
|
1095
|
+
|
|
1096
|
+
self.reporter.update_task(
|
|
1097
|
+
rerun_task_id,
|
|
1098
|
+
advance=1,
|
|
1099
|
+
succeeded=rerun_success,
|
|
1100
|
+
failed=rerun_failed,
|
|
1101
|
+
)
|
|
1102
|
+
|
|
1103
|
+
# Finish the rerun task
|
|
1104
|
+
self.reporter.finish_task(rerun_task_id)
|
|
1105
|
+
|
|
1106
|
+
# Store rerun results in stats for final reporting
|
|
1107
|
+
self.stats.rerun_succeeded = rerun_success
|
|
1108
|
+
self.stats.rerun_still_failed = rerun_failed
|
|
1109
|
+
|
|
1110
|
+
logger.info("Rerun complete: %d succeeded, %d still failing", rerun_success, rerun_failed)
|
|
1111
|
+
if rerun_failed > 0:
|
|
1112
|
+
logger.info("Still-failing records written to: %s", rerun_failed_path)
|
|
1113
|
+
else:
|
|
1114
|
+
# Remove empty rerun file
|
|
1115
|
+
rerun_failed_path.unlink(missing_ok=True)
|
|
1116
|
+
|
|
1117
|
+
def _count_lines_in_file(self, file_path: Path) -> int:
|
|
1118
|
+
"""Count lines in a file using efficient binary newline counting."""
|
|
1119
|
+
with open(file_path, "rb") as f:
|
|
1120
|
+
return sum(buf.count(b"\n") for buf in iter(lambda: f.read(1024 * 1024), b""))
|
|
1121
|
+
|
|
1122
|
+
def get_stats(self) -> BatchPosterStats:
|
|
1123
|
+
"""
|
|
1124
|
+
Get current posting statistics.
|
|
1125
|
+
|
|
1126
|
+
Returns:
|
|
1127
|
+
Current statistics
|
|
1128
|
+
"""
|
|
1129
|
+
return self.stats
|
|
1130
|
+
|
|
1131
|
+
|
|
1132
|
+
def get_human_readable_size(size: int, precision: int = 2) -> str:
|
|
1133
|
+
"""
|
|
1134
|
+
Convert bytes to human-readable format.
|
|
1135
|
+
|
|
1136
|
+
Args:
|
|
1137
|
+
size: Size in bytes
|
|
1138
|
+
precision: Number of decimal places
|
|
1139
|
+
|
|
1140
|
+
Returns:
|
|
1141
|
+
Human-readable size string
|
|
1142
|
+
"""
|
|
1143
|
+
suffixes = ["B", "KB", "MB", "GB", "TB"]
|
|
1144
|
+
suffix_index = 0
|
|
1145
|
+
size_float = float(size)
|
|
1146
|
+
|
|
1147
|
+
while size_float >= 1024 and suffix_index < len(suffixes) - 1:
|
|
1148
|
+
suffix_index += 1
|
|
1149
|
+
size_float = size_float / 1024.0
|
|
1150
|
+
|
|
1151
|
+
return f"{size_float:.{precision}f}{suffixes[suffix_index]}"
|
|
1152
|
+
|
|
1153
|
+
|
|
1154
|
+
def get_req_size(response: httpx.Response):
|
|
1155
|
+
size = response.request.method
|
|
1156
|
+
size += str(response.request.url)
|
|
1157
|
+
size += "\r\n".join(f"{k}{v}" for k, v in response.request.headers.items())
|
|
1158
|
+
size += response.request.content.decode("utf-8") or ""
|
|
1159
|
+
return get_human_readable_size(len(size.encode("utf-8")))
|
|
1160
|
+
|
|
1161
|
+
|
|
1162
|
+
app = cyclopts.App(default_parameter=cyclopts.Parameter(negative=()))
|
|
1163
|
+
|
|
1164
|
+
|
|
1165
|
+
@app.default
|
|
1166
|
+
def main(
|
|
1167
|
+
config_file: Annotated[
|
|
1168
|
+
Path | None, cyclopts.Parameter(group="Job Configuration Parameters")
|
|
1169
|
+
] = None,
|
|
1170
|
+
*,
|
|
1171
|
+
gateway_url: Annotated[
|
|
1172
|
+
str | None,
|
|
1173
|
+
cyclopts.Parameter(
|
|
1174
|
+
env_var="FOLIO_GATEWAY_URL",
|
|
1175
|
+
show_env_var=True,
|
|
1176
|
+
group="FOLIO Connection Parameters",
|
|
1177
|
+
),
|
|
1178
|
+
] = None,
|
|
1179
|
+
tenant_id: Annotated[
|
|
1180
|
+
str | None,
|
|
1181
|
+
cyclopts.Parameter(
|
|
1182
|
+
env_var="FOLIO_TENANT_ID", show_env_var=True, group="FOLIO Connection Parameters"
|
|
1183
|
+
),
|
|
1184
|
+
] = None,
|
|
1185
|
+
username: Annotated[
|
|
1186
|
+
str | None,
|
|
1187
|
+
cyclopts.Parameter(
|
|
1188
|
+
env_var="FOLIO_USERNAME", show_env_var=True, group="FOLIO Connection Parameters"
|
|
1189
|
+
),
|
|
1190
|
+
] = None,
|
|
1191
|
+
password: Annotated[
|
|
1192
|
+
str | None,
|
|
1193
|
+
cyclopts.Parameter(
|
|
1194
|
+
env_var="FOLIO_PASSWORD", show_env_var=True, group="FOLIO Connection Parameters"
|
|
1195
|
+
),
|
|
1196
|
+
] = None,
|
|
1197
|
+
member_tenant_id: Annotated[
|
|
1198
|
+
str | None,
|
|
1199
|
+
cyclopts.Parameter(
|
|
1200
|
+
env_var="FOLIO_MEMBER_TENANT_ID",
|
|
1201
|
+
show_env_var=True,
|
|
1202
|
+
group="FOLIO Connection Parameters",
|
|
1203
|
+
),
|
|
1204
|
+
] = None,
|
|
1205
|
+
object_type: Annotated[
|
|
1206
|
+
Literal["Instances", "Holdings", "Items", "ShadowInstances"] | None,
|
|
1207
|
+
cyclopts.Parameter(group="Job Configuration Parameters"),
|
|
1208
|
+
] = None,
|
|
1209
|
+
file_paths: Annotated[
|
|
1210
|
+
tuple[Path, ...] | None,
|
|
1211
|
+
cyclopts.Parameter(
|
|
1212
|
+
name=["--file-paths", "--file-path"],
|
|
1213
|
+
help="Path(s) to JSONL file(s). Accepts multiple values and glob patterns.",
|
|
1214
|
+
group="Job Configuration Parameters",
|
|
1215
|
+
),
|
|
1216
|
+
] = None,
|
|
1217
|
+
batch_size: Annotated[
|
|
1218
|
+
int,
|
|
1219
|
+
cyclopts.Parameter(group="Job Configuration Parameters"),
|
|
1220
|
+
] = 100,
|
|
1221
|
+
upsert: Annotated[
|
|
1222
|
+
bool,
|
|
1223
|
+
cyclopts.Parameter(group="Job Configuration Parameters"),
|
|
1224
|
+
] = False,
|
|
1225
|
+
preserve_statistical_codes: Annotated[
|
|
1226
|
+
bool,
|
|
1227
|
+
cyclopts.Parameter(group="Job Configuration Parameters: --upsert options"),
|
|
1228
|
+
] = False,
|
|
1229
|
+
preserve_administrative_notes: Annotated[
|
|
1230
|
+
bool,
|
|
1231
|
+
cyclopts.Parameter(group="Job Configuration Parameters: --upsert options"),
|
|
1232
|
+
] = False,
|
|
1233
|
+
preserve_temporary_locations: Annotated[
|
|
1234
|
+
bool,
|
|
1235
|
+
cyclopts.Parameter(group="Job Configuration Parameters: --upsert options"),
|
|
1236
|
+
] = False,
|
|
1237
|
+
preserve_temporary_loan_types: Annotated[
|
|
1238
|
+
bool,
|
|
1239
|
+
cyclopts.Parameter(group="Job Configuration Parameters: --upsert options"),
|
|
1240
|
+
] = False,
|
|
1241
|
+
overwrite_item_status: Annotated[
|
|
1242
|
+
bool,
|
|
1243
|
+
cyclopts.Parameter(group="Job Configuration Parameters: --upsert options"),
|
|
1244
|
+
] = False,
|
|
1245
|
+
patch_existing_records: Annotated[
|
|
1246
|
+
bool,
|
|
1247
|
+
cyclopts.Parameter(group="Job Configuration Parameters: --upsert options"),
|
|
1248
|
+
] = False,
|
|
1249
|
+
patch_paths: Annotated[
|
|
1250
|
+
str | None,
|
|
1251
|
+
cyclopts.Parameter(
|
|
1252
|
+
help=(
|
|
1253
|
+
"Comma-separated list of field paths to patch during upsert (e.g., barcode,status)"
|
|
1254
|
+
),
|
|
1255
|
+
group="Job Configuration Parameters: --upsert options",
|
|
1256
|
+
),
|
|
1257
|
+
] = None,
|
|
1258
|
+
failed_records_file: Annotated[
|
|
1259
|
+
Path | None,
|
|
1260
|
+
cyclopts.Parameter(group="Job Configuration Parameters"),
|
|
1261
|
+
] = None,
|
|
1262
|
+
rerun_failed_records: Annotated[
|
|
1263
|
+
bool,
|
|
1264
|
+
cyclopts.Parameter(
|
|
1265
|
+
help="After the main run, reprocess failed records one at a time.",
|
|
1266
|
+
group="Job Configuration Parameters",
|
|
1267
|
+
),
|
|
1268
|
+
] = False,
|
|
1269
|
+
no_progress: Annotated[
|
|
1270
|
+
bool,
|
|
1271
|
+
cyclopts.Parameter(group="Job Configuration Parameters"),
|
|
1272
|
+
] = False,
|
|
1273
|
+
debug: Annotated[
|
|
1274
|
+
bool,
|
|
1275
|
+
cyclopts.Parameter(
|
|
1276
|
+
name=["--debug"], group="General Parameters", help="Enable debug logging"
|
|
1277
|
+
),
|
|
1278
|
+
] = False,
|
|
1279
|
+
) -> None:
|
|
1280
|
+
"""
|
|
1281
|
+
Command-line interface to batch post inventory records to FOLIO
|
|
1282
|
+
|
|
1283
|
+
Parameters:
|
|
1284
|
+
config_file: Path to JSON config file (overrides CLI parameters).
|
|
1285
|
+
gateway_url: The FOLIO API Gateway URL.
|
|
1286
|
+
tenant_id: The tenant id.
|
|
1287
|
+
username: The FOLIO username.
|
|
1288
|
+
password: The FOLIO password.
|
|
1289
|
+
member_tenant_id: The FOLIO ECS member tenant id (if applicable).
|
|
1290
|
+
object_type: Type of inventory object (Instances, Holdings, or Items).
|
|
1291
|
+
file_paths: Path(s) to JSONL file(s) to post.
|
|
1292
|
+
batch_size: Number of records to include in each batch (1-1000).
|
|
1293
|
+
upsert: Enable upsert mode to update existing records.
|
|
1294
|
+
preserve_statistical_codes: Preserve existing statistical codes during upsert.
|
|
1295
|
+
preserve_administrative_notes: Preserve existing administrative notes during upsert.
|
|
1296
|
+
preserve_temporary_locations: Preserve temporary location assignments during upsert.
|
|
1297
|
+
preserve_temporary_loan_types: Preserve temporary loan type assignments during upsert.
|
|
1298
|
+
overwrite_item_status: Overwrite item status during upsert.
|
|
1299
|
+
patch_existing_records: Enable selective field patching during upsert.
|
|
1300
|
+
patch_paths: Comma-separated list of field paths to patch.
|
|
1301
|
+
failed_records_file: Path to file for writing failed records.
|
|
1302
|
+
rerun_failed_records: After the main run, reprocess failed records one at a time.
|
|
1303
|
+
no_progress: Disable progress bar display.
|
|
1304
|
+
debug: Enable debug logging.
|
|
1305
|
+
"""
|
|
1306
|
+
set_up_cli_logging(logger, "folio_batch_poster", debug)
|
|
1307
|
+
|
|
1308
|
+
gateway_url, tenant_id, username, password = get_folio_connection_parameters(
|
|
1309
|
+
gateway_url, tenant_id, username, password
|
|
1310
|
+
)
|
|
1311
|
+
folio_client = folioclient.FolioClient(gateway_url, tenant_id, username, password)
|
|
1312
|
+
|
|
1313
|
+
if member_tenant_id:
|
|
1314
|
+
folio_client.tenant_id = member_tenant_id
|
|
1315
|
+
|
|
1316
|
+
# Handle file path expansion
|
|
1317
|
+
expanded_file_paths = expand_file_paths(file_paths)
|
|
1318
|
+
|
|
1319
|
+
expanded_file_paths.sort()
|
|
1320
|
+
|
|
1321
|
+
# Parse patch_paths if provided
|
|
1322
|
+
patch_paths_list = parse_patch_paths(patch_paths)
|
|
1323
|
+
|
|
1324
|
+
# Validate rerun_failed_records requires failed_records_file
|
|
1325
|
+
if rerun_failed_records and not failed_records_file:
|
|
1326
|
+
logger.critical("--rerun-failed-records requires --failed-records-file to be set")
|
|
1327
|
+
sys.exit(1)
|
|
1328
|
+
|
|
1329
|
+
try:
|
|
1330
|
+
if config_file:
|
|
1331
|
+
config, files_to_process = parse_config_file(config_file)
|
|
1332
|
+
else:
|
|
1333
|
+
if not object_type:
|
|
1334
|
+
logger.critical("--object-type is required when not using a config file")
|
|
1335
|
+
sys.exit(1)
|
|
1336
|
+
|
|
1337
|
+
if not expanded_file_paths:
|
|
1338
|
+
logger.critical("No files found to process. Exiting.")
|
|
1339
|
+
sys.exit(1)
|
|
1340
|
+
|
|
1341
|
+
config = BatchPoster.Config(
|
|
1342
|
+
object_type=object_type,
|
|
1343
|
+
batch_size=batch_size,
|
|
1344
|
+
upsert=upsert,
|
|
1345
|
+
preserve_statistical_codes=preserve_statistical_codes,
|
|
1346
|
+
preserve_administrative_notes=preserve_administrative_notes,
|
|
1347
|
+
preserve_temporary_locations=preserve_temporary_locations,
|
|
1348
|
+
preserve_temporary_loan_types=preserve_temporary_loan_types,
|
|
1349
|
+
preserve_item_status=not overwrite_item_status,
|
|
1350
|
+
patch_existing_records=patch_existing_records,
|
|
1351
|
+
patch_paths=patch_paths_list,
|
|
1352
|
+
rerun_failed_records=rerun_failed_records,
|
|
1353
|
+
no_progress=no_progress,
|
|
1354
|
+
)
|
|
1355
|
+
files_to_process = expanded_file_paths
|
|
1356
|
+
|
|
1357
|
+
logger.info(f"Processing {len(files_to_process)} file(s)")
|
|
1358
|
+
asyncio.run(run_batch_poster(folio_client, config, files_to_process, failed_records_file))
|
|
1359
|
+
|
|
1360
|
+
except Exception as e:
|
|
1361
|
+
logger.critical(f"An error occurred: {e}", exc_info=True)
|
|
1362
|
+
sys.exit(1)
|
|
1363
|
+
|
|
1364
|
+
|
|
1365
|
+
def parse_config_file(config_file):
|
|
1366
|
+
with open(config_file, "r") as f:
|
|
1367
|
+
config_data = json.load(f)
|
|
1368
|
+
# Convert file_paths if present in config
|
|
1369
|
+
if "file_paths" in config_data:
|
|
1370
|
+
config_data["file_paths"] = [Path(p) for p in config_data["file_paths"]]
|
|
1371
|
+
config = BatchPoster.Config(**config_data)
|
|
1372
|
+
files_to_process = config_data.get("file_paths", [])
|
|
1373
|
+
return config, files_to_process
|
|
1374
|
+
|
|
1375
|
+
|
|
1376
|
+
def parse_patch_paths(patch_paths):
|
|
1377
|
+
patch_paths_list = None
|
|
1378
|
+
if patch_paths:
|
|
1379
|
+
patch_paths_list = [p.strip() for p in patch_paths.split(",") if p.strip()]
|
|
1380
|
+
return patch_paths_list
|
|
1381
|
+
|
|
1382
|
+
|
|
1383
|
+
def expand_file_paths(file_paths):
|
|
1384
|
+
expanded_paths: List[Path] = []
|
|
1385
|
+
if file_paths:
|
|
1386
|
+
for file_path in file_paths:
|
|
1387
|
+
file_path_str = str(file_path)
|
|
1388
|
+
if any(char in file_path_str for char in ["*", "?", "["]):
|
|
1389
|
+
# It's a glob pattern - expand it
|
|
1390
|
+
expanded = glob_module.glob(file_path_str)
|
|
1391
|
+
expanded_paths.extend([Path(x) for x in expanded])
|
|
1392
|
+
else:
|
|
1393
|
+
# It's a regular path
|
|
1394
|
+
expanded_paths.append(file_path)
|
|
1395
|
+
return expanded_paths
|
|
1396
|
+
|
|
1397
|
+
|
|
1398
|
+
async def run_batch_poster(
|
|
1399
|
+
folio_client: FolioClient,
|
|
1400
|
+
config: "BatchPoster.Config",
|
|
1401
|
+
files_to_process: List[Path],
|
|
1402
|
+
failed_records_file: Path | None,
|
|
1403
|
+
):
|
|
1404
|
+
"""
|
|
1405
|
+
Run the batch poster operation.
|
|
1406
|
+
|
|
1407
|
+
Args:
|
|
1408
|
+
folio_client: Authenticated FOLIO client
|
|
1409
|
+
config: BatchPoster configuration
|
|
1410
|
+
files_to_process: List of file paths to process
|
|
1411
|
+
failed_records_file: Optional path for failed records
|
|
1412
|
+
"""
|
|
1413
|
+
async with folio_client:
|
|
1414
|
+
try:
|
|
1415
|
+
# Create progress reporter
|
|
1416
|
+
reporter = (
|
|
1417
|
+
NoOpProgressReporter()
|
|
1418
|
+
if config.no_progress
|
|
1419
|
+
else RichProgressReporter(show_speed=True, show_time=True)
|
|
1420
|
+
)
|
|
1421
|
+
|
|
1422
|
+
poster = BatchPoster(
|
|
1423
|
+
folio_client, config, failed_records_file=failed_records_file, reporter=reporter
|
|
1424
|
+
)
|
|
1425
|
+
async with poster:
|
|
1426
|
+
await poster.do_work(files_to_process)
|
|
1427
|
+
|
|
1428
|
+
# If rerun_failed_records is enabled and there are failures, reprocess them
|
|
1429
|
+
if config.rerun_failed_records and poster.stats.records_failed > 0:
|
|
1430
|
+
await poster.rerun_failed_records_one_by_one()
|
|
1431
|
+
|
|
1432
|
+
log_final_stats(poster)
|
|
1433
|
+
|
|
1434
|
+
except Exception as e:
|
|
1435
|
+
logger.critical(f"Batch posting failed: {e}", exc_info=True)
|
|
1436
|
+
raise
|
|
1437
|
+
|
|
1438
|
+
|
|
1439
|
+
def log_final_stats(poster: BatchPoster) -> None:
|
|
1440
|
+
"""
|
|
1441
|
+
Log the final statistics after batch posting.
|
|
1442
|
+
|
|
1443
|
+
Args:
|
|
1444
|
+
poster: The BatchPoster instance containing the stats
|
|
1445
|
+
"""
|
|
1446
|
+
# Log final statistics
|
|
1447
|
+
logger.info("=" * 60)
|
|
1448
|
+
logger.info("Batch posting complete!")
|
|
1449
|
+
logger.info("=" * 60)
|
|
1450
|
+
total_processed = poster.stats.records_processed
|
|
1451
|
+
logger.info("Total records processed: %d", total_processed)
|
|
1452
|
+
logger.info("Records posted successfully: %d", poster.stats.records_posted)
|
|
1453
|
+
logger.info("Records created: %d", poster.stats.records_created)
|
|
1454
|
+
logger.info("Records updated: %d", poster.stats.records_updated)
|
|
1455
|
+
logger.info("Records failed: %d", poster.stats.records_failed)
|
|
1456
|
+
logger.info("Total batches posted: %d", poster.stats.batches_posted)
|
|
1457
|
+
logger.info("Total batches failed: %d", poster.stats.batches_failed)
|
|
1458
|
+
if poster.config.rerun_failed_records:
|
|
1459
|
+
logger.info("Rerun succeeded: %d", poster.stats.rerun_succeeded)
|
|
1460
|
+
logger.info("Rerun still failed: %d", poster.stats.rerun_still_failed)
|
|
1461
|
+
if poster._failed_records_path:
|
|
1462
|
+
logger.info("Failed records written to: %s", poster._failed_records_path)
|
|
1463
|
+
|
|
1464
|
+
|
|
1465
|
+
if __name__ == "__main__":
|
|
1466
|
+
app()
|