folio_data_import 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1466 @@
1
+ """
2
+ BatchPoster module for FOLIO inventory batch operations.
3
+
4
+ This module provides functionality for batch posting of Instances, Holdings, and Items
5
+ to FOLIO's inventory storage endpoints with support for upsert operations.
6
+ """
7
+
8
+ import asyncio
9
+ import glob as glob_module
10
+ import json
11
+ import logging
12
+ import sys
13
+ from io import TextIOWrapper
14
+ from pathlib import Path
15
+ from typing import Annotated, Any, Dict, Generator, List, Literal, Union
16
+
17
+ import cyclopts
18
+ import folioclient
19
+ import httpx
20
+ from folioclient import FolioClient
21
+ from pydantic import BaseModel, Field
22
+
23
+ from folio_data_import import get_folio_connection_parameters, set_up_cli_logging
24
+ from folio_data_import._progress import (
25
+ NoOpProgressReporter,
26
+ ProgressReporter,
27
+ RichProgressReporter,
28
+ )
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+
33
+ class BatchPosterStats(BaseModel):
34
+ """Statistics for batch posting operations."""
35
+
36
+ records_processed: int = 0
37
+ records_posted: int = 0
38
+ records_created: int = 0
39
+ records_updated: int = 0
40
+ records_failed: int = 0
41
+ batches_posted: int = 0
42
+ batches_failed: int = 0
43
+ rerun_succeeded: int = 0
44
+ rerun_still_failed: int = 0
45
+
46
+
47
+ def get_api_info(object_type: str) -> Dict[str, Any]:
48
+ """
49
+ Get API endpoint information for a given object type.
50
+
51
+ Args:
52
+ object_type: The type of object (Instances, Holdings, Items)
53
+
54
+ Returns:
55
+ Dictionary containing API endpoint information
56
+
57
+ Raises:
58
+ ValueError: If object_type is not supported
59
+ """
60
+ api_info = {
61
+ "Items": {
62
+ "object_name": "items",
63
+ "api_endpoint": "/item-storage/batch/synchronous",
64
+ "query_endpoint": "/item-storage/items",
65
+ "is_batch": True,
66
+ "supports_upsert": True,
67
+ },
68
+ "Holdings": {
69
+ "object_name": "holdingsRecords",
70
+ "api_endpoint": "/holdings-storage/batch/synchronous",
71
+ "query_endpoint": "/holdings-storage/holdings",
72
+ "is_batch": True,
73
+ "supports_upsert": True,
74
+ },
75
+ "Instances": {
76
+ "object_name": "instances",
77
+ "api_endpoint": "/instance-storage/batch/synchronous",
78
+ "query_endpoint": "/instance-storage/instances",
79
+ "is_batch": True,
80
+ "supports_upsert": True,
81
+ },
82
+ "ShadowInstances": {
83
+ "object_name": "instances",
84
+ "api_endpoint": "/instance-storage/batch/synchronous",
85
+ "query_endpoint": "/instance-storage/instances",
86
+ "is_batch": True,
87
+ "supports_upsert": True,
88
+ },
89
+ }
90
+
91
+ if object_type not in api_info:
92
+ raise ValueError(
93
+ f"Unsupported object type: {object_type}. "
94
+ f"Supported types: {', '.join(api_info.keys())}"
95
+ )
96
+
97
+ return api_info[object_type]
98
+
99
+
100
+ def deep_update(target: dict, source: dict) -> None:
101
+ """
102
+ Recursively update target dictionary with values from source dictionary.
103
+
104
+ Args:
105
+ target: The dictionary to update
106
+ source: The dictionary to merge into target
107
+ """
108
+ for key, value in source.items():
109
+ if isinstance(value, dict) and key in target and isinstance(target[key], dict):
110
+ deep_update(target[key], value)
111
+ else:
112
+ target[key] = value
113
+
114
+
115
+ def extract_paths(record: dict, paths: List[str]) -> dict:
116
+ """
117
+ Extract specified paths from a record.
118
+
119
+ Args:
120
+ record: The record to extract from
121
+ paths: List of JSON paths to extract (e.g., ['statisticalCodeIds', 'status'])
122
+
123
+ Returns:
124
+ Dictionary containing only the specified paths
125
+ """
126
+ result = {}
127
+ for path in paths:
128
+ if path in record:
129
+ result[path] = record[path]
130
+ return result
131
+
132
+
133
+ class BatchPoster:
134
+ """
135
+ Handles batch posting of inventory records to FOLIO.
136
+
137
+ This class provides functionality for posting Instances, Holdings, and Items
138
+ to FOLIO's batch inventory endpoints with support for upsert operations.
139
+ """
140
+
141
+ class Config(BaseModel):
142
+ """Configuration for BatchPoster operations."""
143
+
144
+ object_type: Annotated[
145
+ Literal["Instances", "Holdings", "Items", "ShadowInstances"],
146
+ Field(
147
+ title="Object type",
148
+ description=(
149
+ "The type of inventory object to post: Instances, Holdings, Items, "
150
+ "or ShadowInstances (for consortium shadow copies)"
151
+ ),
152
+ ),
153
+ ]
154
+ batch_size: Annotated[
155
+ int,
156
+ Field(
157
+ title="Batch size",
158
+ description="Number of records to include in each batch (1-1000)",
159
+ ),
160
+ ] = 1
161
+ upsert: Annotated[
162
+ bool,
163
+ Field(
164
+ title="Upsert",
165
+ description=(
166
+ "Enable upsert mode to create new records or update existing ones. "
167
+ "When enabled, records with matching IDs will be updated instead "
168
+ "of causing errors."
169
+ ),
170
+ ),
171
+ ] = False
172
+ preserve_statistical_codes: Annotated[
173
+ bool,
174
+ Field(
175
+ title="Preserve statistical codes",
176
+ description=(
177
+ "Preserve existing statistical codes during upsert. "
178
+ "When enabled, statistical codes from existing records will be retained "
179
+ "and merged with new codes."
180
+ ),
181
+ ),
182
+ ] = False
183
+ preserve_administrative_notes: Annotated[
184
+ bool,
185
+ Field(
186
+ title="Preserve administrative notes",
187
+ description=(
188
+ "Preserve existing administrative notes during upsert. "
189
+ "When enabled, administrative notes from existing records will be retained "
190
+ "and merged with new notes."
191
+ ),
192
+ ),
193
+ ] = False
194
+ preserve_temporary_locations: Annotated[
195
+ bool,
196
+ Field(
197
+ title="Preserve temporary locations",
198
+ description=(
199
+ "Preserve temporary location assignments on items during upsert. "
200
+ "Only applicable when object_type is 'Items'."
201
+ ),
202
+ ),
203
+ ] = False
204
+ preserve_temporary_loan_types: Annotated[
205
+ bool,
206
+ Field(
207
+ title="Preserve temporary loan types",
208
+ description=(
209
+ "Preserve temporary loan type assignments on items during upsert. "
210
+ "Only applicable when object_type is 'Items'."
211
+ ),
212
+ ),
213
+ ] = False
214
+ preserve_item_status: Annotated[
215
+ bool,
216
+ Field(
217
+ title="Preserve item status",
218
+ description=(
219
+ "Preserve item status during upsert. When enabled, the status "
220
+ "field from existing records will be retained. Only applicable "
221
+ "when object_type is 'Items'."
222
+ ),
223
+ ),
224
+ ] = True
225
+ patch_existing_records: Annotated[
226
+ bool,
227
+ Field(
228
+ title="Patch existing records",
229
+ description=(
230
+ "Enable selective field patching during upsert. When enabled, only fields "
231
+ "specified in patch_paths will be updated, preserving all other fields."
232
+ ),
233
+ ),
234
+ ] = False
235
+ patch_paths: Annotated[
236
+ List[str] | None,
237
+ Field(
238
+ title="Patch paths",
239
+ description=(
240
+ "List of field paths to patch during upsert "
241
+ "(e.g., ['barcode', 'status']). "
242
+ "If empty and patch_existing_records is True, all fields "
243
+ "will be patched. Use this to selectively update only "
244
+ "specific fields while preserving others."
245
+ ),
246
+ ),
247
+ ] = None
248
+ rerun_failed_records: Annotated[
249
+ bool,
250
+ Field(
251
+ title="Rerun failed records",
252
+ description=(
253
+ "After the main run, reprocess any failed records one at a time. "
254
+ "Requires --failed-records-file to be set."
255
+ ),
256
+ ),
257
+ ] = False
258
+ no_progress: Annotated[
259
+ bool,
260
+ Field(
261
+ title="No progress bar",
262
+ description="Disable the progress bar display (e.g., for CI environments)",
263
+ ),
264
+ ] = False
265
+
266
+ def __init__(
267
+ self,
268
+ folio_client: FolioClient,
269
+ config: "BatchPoster.Config",
270
+ failed_records_file=None,
271
+ reporter: ProgressReporter | None = None,
272
+ ):
273
+ """
274
+ Initialize BatchPoster.
275
+
276
+ Args:
277
+ folio_client: Authenticated FOLIO client
278
+ config: Configuration for batch posting
279
+ failed_records_file: Optional file handle or path for writing failed records.
280
+ Can be an open file handle (managed by caller) or a string/Path
281
+ (will be opened/closed by BatchPoster).
282
+ reporter: Optional progress reporter. If None, uses NoOpProgressReporter.
283
+ """
284
+ self.folio_client = folio_client
285
+ self.config = config
286
+ self.reporter = reporter or NoOpProgressReporter()
287
+ self.api_info = get_api_info(config.object_type)
288
+ self.stats = BatchPosterStats()
289
+
290
+ # Handle failed records file
291
+ self._failed_records_file_handle: TextIOWrapper | None = None
292
+ self._failed_records_path: Path | None = None
293
+ self._owns_file_handle = False
294
+
295
+ if failed_records_file:
296
+ if hasattr(failed_records_file, "write"):
297
+ # It's a file handle - use it but don't close it
298
+ self._failed_records_file_handle = failed_records_file
299
+ self._owns_file_handle = False
300
+ else:
301
+ # It's a path - we'll open and manage it
302
+ self._failed_records_path = Path(failed_records_file)
303
+ self._owns_file_handle = True
304
+
305
+ # Validate upsert configuration
306
+ if config.upsert and not self.api_info["supports_upsert"]:
307
+ raise ValueError(f"Upsert is not supported for {config.object_type}")
308
+
309
+ async def __aenter__(self):
310
+ """Async context manager entry."""
311
+ # Open the file if we own it and it's not already open
312
+ if (
313
+ self._owns_file_handle
314
+ and self._failed_records_path
315
+ and not self._failed_records_file_handle
316
+ ):
317
+ self._failed_records_file_handle = open(
318
+ self._failed_records_path, "w", encoding="utf-8"
319
+ )
320
+ logger.info(f"Opened failed records file: {self._failed_records_path}")
321
+ return self
322
+
323
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
324
+ """Async context manager exit."""
325
+ # Only close the file if we opened it
326
+ if self._owns_file_handle and self._failed_records_file_handle:
327
+ self._failed_records_file_handle.close()
328
+ if self._failed_records_path:
329
+ logger.info(
330
+ f"Wrote {self.stats.records_failed} failed records "
331
+ f"to {self._failed_records_path}"
332
+ )
333
+ self._failed_records_file_handle = None
334
+
335
+ def _write_failed_record(self, record: dict) -> None:
336
+ """
337
+ Write a single failed record to the file immediately.
338
+
339
+ Args:
340
+ record: The record that failed to post
341
+ """
342
+ if self._failed_records_file_handle:
343
+ self._failed_records_file_handle.write(json.dumps(record) + "\n")
344
+ self._failed_records_file_handle.flush() # Ensure it's written immediately
345
+
346
+ def _write_failed_batch(self, batch: List[dict]) -> None:
347
+ """
348
+ Write a batch of failed records to the file immediately.
349
+
350
+ Args:
351
+ batch: List of records that failed to post
352
+ """
353
+ if self._failed_records_file_handle:
354
+ for record in batch:
355
+ self._failed_records_file_handle.write(json.dumps(record) + "\n")
356
+ self._failed_records_file_handle.flush() # Ensure they're written immediately
357
+
358
+ def handle_upsert_for_statistical_codes(self, updates: dict, keep_existing: dict) -> None:
359
+ """
360
+ Handle statistical codes during upsert based on configuration.
361
+
362
+ Args:
363
+ updates: Dictionary being prepared for update
364
+ keep_existing: Dictionary of fields to preserve from existing record
365
+ """
366
+ if not self.config.preserve_statistical_codes:
367
+ updates["statisticalCodeIds"] = []
368
+ keep_existing["statisticalCodeIds"] = []
369
+ else:
370
+ keep_existing["statisticalCodeIds"] = updates.pop("statisticalCodeIds", [])
371
+ updates["statisticalCodeIds"] = []
372
+
373
+ def handle_upsert_for_administrative_notes(self, updates: dict, keep_existing: dict) -> None:
374
+ """
375
+ Handle administrative notes during upsert based on configuration.
376
+
377
+ Args:
378
+ updates: Dictionary being prepared for update
379
+ keep_existing: Dictionary of fields to preserve from existing record
380
+ """
381
+ if not self.config.preserve_administrative_notes:
382
+ updates["administrativeNotes"] = []
383
+ keep_existing["administrativeNotes"] = []
384
+ else:
385
+ keep_existing["administrativeNotes"] = updates.pop("administrativeNotes", [])
386
+ updates["administrativeNotes"] = []
387
+
388
+ def handle_upsert_for_temporary_locations(self, updates: dict, keep_existing: dict) -> None:
389
+ """
390
+ Handle temporary locations during upsert based on configuration.
391
+
392
+ Args:
393
+ updates: Dictionary being prepared for update
394
+ keep_existing: Dictionary of fields to preserve from existing record
395
+ """
396
+ if self.config.preserve_temporary_locations:
397
+ keep_existing["temporaryLocationId"] = updates.pop("temporaryLocationId", None)
398
+
399
+ def handle_upsert_for_temporary_loan_types(self, updates: dict, keep_existing: dict) -> None:
400
+ """
401
+ Handle temporary loan types during upsert based on configuration.
402
+
403
+ Args:
404
+ updates: Dictionary being prepared for update
405
+ keep_existing: Dictionary of fields to preserve from existing record
406
+ """
407
+ if self.config.preserve_temporary_loan_types:
408
+ keep_existing["temporaryLoanTypeId"] = updates.pop("temporaryLoanTypeId", None)
409
+
410
+ def keep_existing_fields(self, updates: dict, existing_record: dict) -> None:
411
+ """
412
+ Preserve specific fields from existing record during upsert.
413
+
414
+ Always preserves ``hrid`` (human-readable ID) and ``lastCheckIn`` (circulation data)
415
+ from existing records to prevent data loss. Optionally preserves ``status``
416
+ based on configuration.
417
+
418
+ Args:
419
+ updates: Dictionary being prepared for update
420
+ existing_record: The existing record in FOLIO
421
+ """
422
+ # Always preserve these fields - they should never be overwritten
423
+ always_preserve = ["hrid", "lastCheckIn"]
424
+ for key in always_preserve:
425
+ if key in existing_record:
426
+ updates[key] = existing_record[key]
427
+
428
+ # Conditionally preserve item status
429
+ if self.config.preserve_item_status and "status" in existing_record:
430
+ updates["status"] = existing_record["status"]
431
+
432
+ def patch_record(
433
+ self, new_record: dict, existing_record: dict, patch_paths: List[str]
434
+ ) -> None:
435
+ """
436
+ Update new_record with values from existing_record according to patch_paths.
437
+
438
+ Args:
439
+ new_record: The new record to be updated
440
+ existing_record: The existing record to patch from
441
+ patch_paths: List of fields in JSON Path notation to patch during upsert
442
+ """
443
+ updates = {}
444
+ updates.update(existing_record)
445
+ keep_existing: Dict[str, Any] = {}
446
+
447
+ # Handle special field preservation rules
448
+ self.handle_upsert_for_administrative_notes(updates, keep_existing)
449
+ self.handle_upsert_for_statistical_codes(updates, keep_existing)
450
+
451
+ if self.config.object_type == "Items":
452
+ self.handle_upsert_for_temporary_locations(updates, keep_existing)
453
+ self.handle_upsert_for_temporary_loan_types(updates, keep_existing)
454
+
455
+ # Determine which fields to keep from new record
456
+ if not patch_paths:
457
+ keep_new = new_record
458
+ else:
459
+ keep_new = extract_paths(new_record, patch_paths)
460
+
461
+ # Special handling for instance status
462
+ if "instanceStatusId" in new_record:
463
+ updates["instanceStatusId"] = new_record["instanceStatusId"]
464
+
465
+ # Merge the updates
466
+ deep_update(updates, keep_new)
467
+
468
+ # Merge arrays from keep_existing, avoiding duplicates
469
+ for key, value in keep_existing.items():
470
+ if isinstance(value, list) and key in keep_new:
471
+ # Combine arrays and remove duplicates
472
+ updates[key] = list(dict.fromkeys(updates.get(key, []) + value))
473
+ elif key not in keep_new:
474
+ updates[key] = value
475
+
476
+ # Apply item-specific preservation
477
+ if self.config.object_type == "Items":
478
+ self.keep_existing_fields(updates, existing_record)
479
+
480
+ # Update the new_record in place
481
+ new_record.clear()
482
+ new_record.update(updates)
483
+
484
+ def prepare_record_for_upsert(self, new_record: dict, existing_record: dict) -> None:
485
+ """
486
+ Prepare a record for upsert by adding version and patching fields.
487
+
488
+ For MARC-sourced Instance records, only suppression flags, deleted status,
489
+ statistical codes, administrative notes, and instance status are allowed
490
+ to be patched. This protects MARC-managed fields from being overwritten.
491
+
492
+ Args:
493
+ new_record: The new record to prepare
494
+ existing_record: The existing record in FOLIO
495
+ """
496
+ # Set the version for optimistic locking
497
+ new_record["_version"] = existing_record.get("_version", 1)
498
+
499
+ # Check if this is a MARC-sourced record (Instances only)
500
+ is_marc_record = (
501
+ self.config.object_type == "Instances"
502
+ and "source" in existing_record
503
+ and "MARC" in existing_record.get("source", "")
504
+ )
505
+
506
+ if is_marc_record:
507
+ # For MARC records, only allow patching specific fields
508
+ # Filter patch_paths to only include allowed fields
509
+ allowed_marc_fields = {"discoverySuppress", "staffSuppress", "deleted"}
510
+ user_patch_paths = set(self.config.patch_paths or [])
511
+
512
+ # Only keep suppression/deleted fields from user's patch_paths
513
+ restricted_paths = [
514
+ path
515
+ for path in user_patch_paths
516
+ if any(allowed.lower() == path.lower() for allowed in allowed_marc_fields)
517
+ ]
518
+
519
+ # Always allow these fields for MARC records
520
+ restricted_paths.extend(
521
+ ["statisticalCodeIds", "administrativeNotes", "instanceStatusId"]
522
+ )
523
+
524
+ if self.config.patch_existing_records and user_patch_paths:
525
+ logger.debug(
526
+ "Record %s is MARC-sourced, restricting patch to: %s",
527
+ existing_record.get("id", "unknown"),
528
+ restricted_paths,
529
+ )
530
+
531
+ self.patch_record(new_record, existing_record, restricted_paths)
532
+
533
+ elif self.config.patch_existing_records:
534
+ # Apply patching with user-specified paths
535
+ self.patch_record(new_record, existing_record, self.config.patch_paths or [])
536
+
537
+ async def fetch_existing_records(self, record_ids: List[str]) -> Dict[str, dict]:
538
+ """
539
+ Fetch existing records from FOLIO by their IDs.
540
+
541
+ Args:
542
+ record_ids: List of record IDs to fetch
543
+
544
+ Returns:
545
+ Dictionary mapping record IDs to their full records
546
+ """
547
+ existing_records: Dict[str, dict] = {}
548
+ query_endpoint = self.api_info["query_endpoint"]
549
+ object_name = self.api_info["object_name"]
550
+
551
+ # Fetch in batches of 90 (FOLIO CQL limit for OR queries)
552
+ fetch_batch_size = 90
553
+
554
+ async def fetch_batch(batch_ids: List[str]) -> dict:
555
+ query = f"id==({' OR '.join(batch_ids)})"
556
+ params = {"query": query, "limit": fetch_batch_size}
557
+ try:
558
+ return await self.folio_client.folio_get_async(
559
+ query_endpoint, key=object_name, query_params=params
560
+ )
561
+ except folioclient.FolioClientError as e:
562
+ logger.error(f"FOLIO client error fetching existing records: {e}")
563
+ raise
564
+ except folioclient.FolioConnectionError as e:
565
+ logger.error(f"FOLIO connection error fetching existing records: {e}")
566
+ raise
567
+ except Exception as e:
568
+ logger.error(f"Failed to fetch existing records: {e}")
569
+ raise
570
+
571
+ # Create tasks for all batches
572
+ tasks = []
573
+ for i in range(0, len(record_ids), fetch_batch_size):
574
+ batch_slice = record_ids[i : i + fetch_batch_size]
575
+ tasks.append(fetch_batch(batch_slice))
576
+
577
+ # Fetch all batches concurrently
578
+ results = await asyncio.gather(*tasks)
579
+
580
+ # Process results
581
+ for result in results:
582
+ if isinstance(result, list):
583
+ for record in result:
584
+ existing_records[record["id"]] = record
585
+
586
+ return existing_records
587
+
588
+ @staticmethod
589
+ def set_consortium_source(record: dict) -> None:
590
+ """
591
+ Convert source field for consortium shadow instances.
592
+
593
+ For shadow instances in ECS/consortium environments, the source field
594
+ must be prefixed with "CONSORTIUM-" to distinguish them from local records.
595
+
596
+ Args:
597
+ record: The record to modify (modified in place)
598
+ """
599
+ source = record.get("source", "")
600
+ if source == "MARC":
601
+ record["source"] = "CONSORTIUM-MARC"
602
+ elif source == "FOLIO":
603
+ record["source"] = "CONSORTIUM-FOLIO"
604
+
605
+ async def set_versions_for_upsert(self, batch: List[dict]) -> None:
606
+ """
607
+ Fetch existing record versions and prepare batch for upsert.
608
+
609
+ Only records that already exist in FOLIO will have their _version set
610
+ and be prepared for update. New records will not have _version set.
611
+
612
+ Args:
613
+ batch: List of records to prepare for upsert
614
+ """
615
+ # Extract record IDs
616
+ record_ids = [record["id"] for record in batch if "id" in record]
617
+
618
+ if not record_ids:
619
+ return
620
+
621
+ # Fetch existing records
622
+ existing_records = await self.fetch_existing_records(record_ids)
623
+
624
+ # Only prepare records that already exist
625
+ for record in batch:
626
+ if "id" in record and record["id"] in existing_records:
627
+ self.prepare_record_for_upsert(record, existing_records[record["id"]])
628
+
629
+ async def post_batch(self, batch: List[dict]) -> tuple[httpx.Response, int, int]:
630
+ """
631
+ Post a batch of records to FOLIO.
632
+
633
+ Args:
634
+ batch: List of records to post
635
+
636
+ Returns:
637
+ Tuple of (response data dict, number of creates, number of updates)
638
+
639
+ Raises:
640
+ folioclient.FolioClientError: If FOLIO API returns an error
641
+ folioclient.FolioConnectionError: If connection to FOLIO fails
642
+ """
643
+ # Track creates vs updates before posting
644
+ num_creates = 0
645
+ num_updates = 0
646
+
647
+ # For ShadowInstances, convert source to consortium format
648
+ if self.config.object_type == "ShadowInstances":
649
+ for record in batch:
650
+ self.set_consortium_source(record)
651
+
652
+ # If upsert mode, set versions and track which are updates
653
+ if self.config.upsert:
654
+ await self.set_versions_for_upsert(batch)
655
+ # Count records with _version as updates, others as creates
656
+ for record in batch:
657
+ if "_version" in record:
658
+ num_updates += 1
659
+ else:
660
+ num_creates += 1
661
+ else:
662
+ # In create-only mode, all are creates
663
+ num_creates = len(batch)
664
+
665
+ # Prepare payload
666
+ object_name = self.api_info["object_name"]
667
+ payload = {object_name: batch}
668
+
669
+ # Prepare query parameters
670
+ query_params = {}
671
+ if self.config.upsert:
672
+ query_params["upsert"] = "true"
673
+
674
+ # Make the request
675
+ api_endpoint = self.api_info["api_endpoint"]
676
+
677
+ response_data = await self.folio_client.async_httpx_client.post(
678
+ api_endpoint, json=payload, params=query_params
679
+ )
680
+ response_data.raise_for_status()
681
+ logger.info(
682
+ (
683
+ "Posting successful! Total rows: %s Total failed: %s "
684
+ "in %ss "
685
+ "Batch Size: %s Request size: %s "
686
+ ),
687
+ self.stats.records_processed,
688
+ self.stats.records_failed,
689
+ response_data.elapsed.total_seconds(),
690
+ len(batch),
691
+ get_req_size(response_data),
692
+ )
693
+ self.stats.records_posted += len(batch)
694
+ self.stats.batches_posted += 1
695
+
696
+ return response_data, num_creates, num_updates
697
+
698
+ async def post_records(self, records) -> None:
699
+ """
700
+ Post records in batches.
701
+
702
+ Failed records will be written to the file handle provided during initialization.
703
+
704
+ Args:
705
+ records: Records to post. Can be:
706
+ - List of dict records
707
+ - File-like object containing JSON lines (one record per line)
708
+ - String/Path to a file containing JSON lines
709
+ """
710
+ # Normalize input to an iterator
711
+ if isinstance(records, (str, Path)):
712
+ # It's a file path
713
+ record_iterator = self._read_records_from_path(records)
714
+ elif hasattr(records, "read"):
715
+ # It's a file-like object
716
+ record_iterator = self._read_records_from_file_handle(records)
717
+ elif isinstance(records, list):
718
+ # It's already a list - wrap in a generator
719
+ record_iterator = iter(records)
720
+ else:
721
+ raise TypeError(
722
+ f"records must be a list, file path, or file-like object, got {type(records)}"
723
+ )
724
+
725
+ # Process records in batches
726
+ batch = []
727
+ for record in record_iterator:
728
+ batch.append(record)
729
+
730
+ # Post when batch is full
731
+ if len(batch) >= self.config.batch_size:
732
+ await self._post_single_batch(batch)
733
+ batch = []
734
+
735
+ # Post any remaining records
736
+ if batch:
737
+ await self._post_single_batch(batch)
738
+
739
+ def _read_records_from_path(self, file_path: Union[str, Path]) -> Generator[dict, None, None]:
740
+ """
741
+ Generator that yields records from a file path.
742
+
743
+ Args:
744
+ file_path: Path to file containing JSON lines
745
+
746
+ Yields:
747
+ Parsed record dictionaries
748
+ """
749
+ file_path = Path(file_path)
750
+ if not file_path.exists():
751
+ raise FileNotFoundError(f"Input file not found: {file_path}")
752
+
753
+ logger.info(f"Reading records from {file_path}")
754
+
755
+ with open(file_path, "r", encoding="utf-8") as f:
756
+ yield from self._read_records_from_file_handle(f)
757
+
758
+ def _read_records_from_file_handle(self, file_handle) -> Generator[dict, None, None]:
759
+ """
760
+ Generator that yields records from a file handle.
761
+
762
+ If a line cannot be parsed as JSON, writes the problematic line and all
763
+ remaining lines to the failed records file (if configured) before raising
764
+ an exception.
765
+
766
+ Args:
767
+ file_handle: File-like object containing JSON lines
768
+
769
+ Yields:
770
+ Parsed record dictionaries
771
+
772
+ Raises:
773
+ ValueError: If a line cannot be parsed as JSON
774
+ """
775
+ for line_number, original_line in enumerate(file_handle, start=1):
776
+ line = original_line.strip()
777
+ if not line:
778
+ continue
779
+
780
+ try:
781
+ record = self._parse_json_line(line, line_number)
782
+ yield record
783
+ except ValueError:
784
+ # Write the failed line to failed records file
785
+ if self._failed_records_file_handle:
786
+ self._failed_records_file_handle.write(original_line)
787
+ # Write all remaining lines as-is
788
+ for remaining_line in file_handle:
789
+ self._failed_records_file_handle.write(remaining_line)
790
+
791
+ self._failed_records_file_handle.flush()
792
+
793
+ # Re-raise the exception
794
+ raise
795
+
796
+ async def _post_single_batch(self, batch: List[dict]) -> None:
797
+ """
798
+ Post a single batch with error handling.
799
+
800
+ Args:
801
+ batch: List of records to post
802
+ """
803
+ self.stats.records_processed += len(batch)
804
+
805
+ try:
806
+ _, num_creates, num_updates = await self.post_batch(batch)
807
+
808
+ # Success - update stats
809
+ self.stats.records_created += num_creates
810
+ self.stats.records_updated += num_updates
811
+ # Update progress bar if available
812
+ if hasattr(self, "reporter") and hasattr(self, "task_id"):
813
+ self.reporter.update_task(
814
+ self.task_id,
815
+ advance=len(batch),
816
+ posted=self.stats.records_posted,
817
+ created=self.stats.records_created,
818
+ updated=self.stats.records_updated,
819
+ failed=self.stats.records_failed,
820
+ )
821
+
822
+ except folioclient.FolioClientError as e:
823
+ logger.error(f"Batch failed: {e} - {e.response.text}")
824
+ self.stats.records_failed += len(batch)
825
+ self._write_failed_batch(batch)
826
+
827
+ # Update progress bar if available
828
+ if hasattr(self, "reporter") and hasattr(self, "task_id"):
829
+ self.reporter.update_task(
830
+ self.task_id,
831
+ advance=len(batch),
832
+ posted=self.stats.records_posted,
833
+ created=self.stats.records_created,
834
+ updated=self.stats.records_updated,
835
+ failed=self.stats.records_failed,
836
+ )
837
+
838
+ except folioclient.FolioConnectionError as e:
839
+ logger.error(f"Batch failed due to connection error: {e}")
840
+ self.stats.records_failed += len(batch)
841
+ self._write_failed_batch(batch)
842
+
843
+ # Update progress bar if available
844
+ if hasattr(self, "reporter") and hasattr(self, "task_id"):
845
+ self.reporter.update_task(
846
+ self.task_id,
847
+ advance=len(batch),
848
+ posted=self.stats.records_posted,
849
+ created=self.stats.records_created,
850
+ updated=self.stats.records_updated,
851
+ failed=self.stats.records_failed,
852
+ )
853
+
854
+ except Exception as e:
855
+ logger.error(f"Unexpected error during batch post: {e}")
856
+ if hasattr(e, "request"):
857
+ logger.debug(f"DEBUG: {e.request}, {e.request.content}")
858
+ self.stats.records_failed += len(batch)
859
+ self._write_failed_batch(batch)
860
+
861
+ # Update progress bar if available
862
+ if hasattr(self, "reporter") and hasattr(self, "task_id"):
863
+ self.reporter.update_task(
864
+ self.task_id,
865
+ advance=len(batch),
866
+ posted=self.stats.records_posted,
867
+ created=self.stats.records_created,
868
+ updated=self.stats.records_updated,
869
+ failed=self.stats.records_failed,
870
+ )
871
+
872
+ def _parse_json_line(self, line: str, line_number: int) -> dict:
873
+ """
874
+ Parse a JSON line, handling both plain and tab-delimited formats.
875
+
876
+ Args:
877
+ line: Line to parse
878
+ line_number: Line number for error reporting
879
+
880
+ Returns:
881
+ Parsed record dictionary
882
+
883
+ Raises:
884
+ ValueError: If the line cannot be parsed as JSON
885
+ """
886
+ try:
887
+ # Handle both plain JSON and tab-delimited format
888
+ # (tab-delimited: last field is the JSON)
889
+ json_str = line.split("\t")[-1] if "\t" in line else line
890
+ return json.loads(json_str)
891
+ except json.JSONDecodeError as e:
892
+ raise ValueError(
893
+ f"Invalid JSON at line {line_number}: {e}. Line content: {line[:100]}"
894
+ ) from e
895
+ except Exception as e:
896
+ raise ValueError(f"Error processing line {line_number}: {e}") from e
897
+
898
+ async def do_work(
899
+ self,
900
+ file_paths: Union[str, Path, List[Union[str, Path]]],
901
+ ) -> BatchPosterStats:
902
+ """
903
+ Main orchestration method for processing files.
904
+
905
+ This is the primary entry point for batch posting from files. It handles:
906
+
907
+ - Single or multiple file processing
908
+ - Progress tracking and logging
909
+ - Failed record collection
910
+ - Statistics reporting
911
+
912
+ Mimics the folio_migration_tools BatchPoster.do_work() workflow.
913
+
914
+ Note:
915
+ To write failed records, pass a file handle or path to the
916
+ BatchPoster constructor's ``failed_records_file`` parameter.
917
+
918
+ Args:
919
+ file_paths: Path(s) to JSONL file(s) to process
920
+
921
+ Returns:
922
+ Final statistics from the posting operation
923
+
924
+ Example::
925
+
926
+ config = BatchPosterConfig(
927
+ object_type="Items",
928
+ batch_size=100,
929
+ upsert=True
930
+ )
931
+
932
+ reporter = RichProgressReporter(enabled=True)
933
+
934
+ # With failed records file
935
+ with open("failed_items.jsonl", "w") as failed_file:
936
+ poster = BatchPoster(
937
+ folio_client, config,
938
+ failed_records_file=failed_file,
939
+ reporter=reporter
940
+ )
941
+ async with poster:
942
+ stats = await poster.do_work(["items1.jsonl", "items2.jsonl"])
943
+
944
+ # Or let BatchPoster manage the file
945
+ poster = BatchPoster(
946
+ folio_client, config,
947
+ failed_records_file="failed_items.jsonl",
948
+ reporter=reporter
949
+ )
950
+ async with poster:
951
+ stats = await poster.do_work("items.jsonl")
952
+
953
+ print(f"Posted: {stats.records_posted}, Failed: {stats.records_failed}")
954
+
955
+ """
956
+ # Reset statistics
957
+ self.stats = BatchPosterStats()
958
+
959
+ # Normalize file_paths to list
960
+ if isinstance(file_paths, (str, Path)):
961
+ files_to_process = [Path(file_paths)]
962
+ else:
963
+ files_to_process = [Path(p) for p in file_paths]
964
+
965
+ # Log start
966
+ logger.info(
967
+ "Starting batch posting of %d file(s) with batch_size=%d",
968
+ len(files_to_process),
969
+ self.config.batch_size,
970
+ )
971
+ logger.info("Object type: %s", self.config.object_type)
972
+ logger.info("Upsert mode: %s", "On" if self.config.upsert else "Off")
973
+ if self.config.upsert:
974
+ logger.info(
975
+ "Preservation settings: statistical_codes=%s, administrative_notes=%s, "
976
+ "temporary_locations=%s, temporary_loan_types=%s",
977
+ self.config.preserve_statistical_codes,
978
+ self.config.preserve_administrative_notes,
979
+ self.config.preserve_temporary_locations,
980
+ self.config.preserve_temporary_loan_types,
981
+ )
982
+
983
+ # Count total lines across all files for progress bar
984
+ total_lines = 0
985
+ for file_path in files_to_process:
986
+ with open(file_path, "rb") as f:
987
+ total_lines += sum(
988
+ buf.count(b"\n") for buf in iter(lambda: f.read(1024 * 1024), b"")
989
+ )
990
+
991
+ # Set up progress reporting
992
+ with self.reporter:
993
+ self.task_id = self.reporter.start_task(
994
+ f"posting_{self.config.object_type}",
995
+ total=total_lines,
996
+ description=f"Posting {self.config.object_type}",
997
+ )
998
+
999
+ # Process each file
1000
+ for idx, file_path in enumerate(files_to_process, start=1):
1001
+ logger.info(
1002
+ "Processing file %d of %d: %s",
1003
+ idx,
1004
+ len(files_to_process),
1005
+ file_path.name,
1006
+ )
1007
+
1008
+ try:
1009
+ await self.post_records(file_path)
1010
+ except Exception as e:
1011
+ logger.error("Error processing file %s: %s", file_path, e, exc_info=True)
1012
+ raise
1013
+
1014
+ return self.stats
1015
+
1016
+ async def rerun_failed_records_one_by_one(self) -> None:
1017
+ """
1018
+ Reprocess failed records one at a time.
1019
+
1020
+ Streams through the failed records file, processing each record
1021
+ individually. Records that still fail are written to a new file
1022
+ with '_rerun' suffix. This gives each record a second chance
1023
+ with individual error handling.
1024
+ """
1025
+ if not self._failed_records_path or not self._failed_records_path.exists():
1026
+ logger.warning("No failed records file to rerun")
1027
+ return
1028
+
1029
+ # Close the file handle if we own it
1030
+ if self._owns_file_handle and self._failed_records_file_handle:
1031
+ self._failed_records_file_handle.close()
1032
+ self._failed_records_file_handle = None
1033
+
1034
+ # Count records first for logging
1035
+ record_count = self._count_lines_in_file(self._failed_records_path)
1036
+ if record_count == 0:
1037
+ logger.info("No failed records to rerun")
1038
+ return
1039
+
1040
+ # Create new file for rerun failures with _rerun suffix
1041
+ rerun_failed_path = self._failed_records_path.with_stem(
1042
+ f"{self._failed_records_path.stem}_rerun"
1043
+ )
1044
+
1045
+ logger.info("=" * 60)
1046
+ logger.info("Rerunning %d failed records one at a time...", record_count)
1047
+ logger.info("=" * 60)
1048
+
1049
+ # Stream through failed records and process one at a time
1050
+ rerun_success = 0
1051
+ rerun_failed = 0
1052
+
1053
+ # Wrap in reporter context for progress display
1054
+ with self.reporter:
1055
+ # Start a new progress task for the rerun
1056
+ rerun_task_id = self.reporter.start_task(
1057
+ f"rerun_{self.config.object_type}",
1058
+ total=record_count,
1059
+ description=f"Rerunning failed {self.config.object_type}",
1060
+ )
1061
+
1062
+ with (
1063
+ open(self._failed_records_path, "r", encoding="utf-8") as infile,
1064
+ open(rerun_failed_path, "w", encoding="utf-8") as outfile,
1065
+ ):
1066
+ for line in infile:
1067
+ line = line.strip()
1068
+ if not line:
1069
+ continue
1070
+
1071
+ try:
1072
+ record = json.loads(line)
1073
+ except json.JSONDecodeError:
1074
+ logger.warning("Could not parse failed record line: %s", line[:100])
1075
+ outfile.write(line + "\n")
1076
+ rerun_failed += 1
1077
+ self.reporter.update_task(
1078
+ rerun_task_id,
1079
+ advance=1,
1080
+ succeeded=rerun_success,
1081
+ failed=rerun_failed,
1082
+ )
1083
+ continue
1084
+
1085
+ record_id = record.get("id", "unknown")
1086
+ try:
1087
+ await self.post_batch([record])
1088
+ rerun_success += 1
1089
+ logger.debug("Rerun success for record %s", record_id)
1090
+ except Exception as e:
1091
+ outfile.write(json.dumps(record) + "\n")
1092
+ rerun_failed += 1
1093
+
1094
+ logger.debug("Rerun failed for record %s: %s", record_id, e)
1095
+
1096
+ self.reporter.update_task(
1097
+ rerun_task_id,
1098
+ advance=1,
1099
+ succeeded=rerun_success,
1100
+ failed=rerun_failed,
1101
+ )
1102
+
1103
+ # Finish the rerun task
1104
+ self.reporter.finish_task(rerun_task_id)
1105
+
1106
+ # Store rerun results in stats for final reporting
1107
+ self.stats.rerun_succeeded = rerun_success
1108
+ self.stats.rerun_still_failed = rerun_failed
1109
+
1110
+ logger.info("Rerun complete: %d succeeded, %d still failing", rerun_success, rerun_failed)
1111
+ if rerun_failed > 0:
1112
+ logger.info("Still-failing records written to: %s", rerun_failed_path)
1113
+ else:
1114
+ # Remove empty rerun file
1115
+ rerun_failed_path.unlink(missing_ok=True)
1116
+
1117
+ def _count_lines_in_file(self, file_path: Path) -> int:
1118
+ """Count lines in a file using efficient binary newline counting."""
1119
+ with open(file_path, "rb") as f:
1120
+ return sum(buf.count(b"\n") for buf in iter(lambda: f.read(1024 * 1024), b""))
1121
+
1122
+ def get_stats(self) -> BatchPosterStats:
1123
+ """
1124
+ Get current posting statistics.
1125
+
1126
+ Returns:
1127
+ Current statistics
1128
+ """
1129
+ return self.stats
1130
+
1131
+
1132
+ def get_human_readable_size(size: int, precision: int = 2) -> str:
1133
+ """
1134
+ Convert bytes to human-readable format.
1135
+
1136
+ Args:
1137
+ size: Size in bytes
1138
+ precision: Number of decimal places
1139
+
1140
+ Returns:
1141
+ Human-readable size string
1142
+ """
1143
+ suffixes = ["B", "KB", "MB", "GB", "TB"]
1144
+ suffix_index = 0
1145
+ size_float = float(size)
1146
+
1147
+ while size_float >= 1024 and suffix_index < len(suffixes) - 1:
1148
+ suffix_index += 1
1149
+ size_float = size_float / 1024.0
1150
+
1151
+ return f"{size_float:.{precision}f}{suffixes[suffix_index]}"
1152
+
1153
+
1154
+ def get_req_size(response: httpx.Response):
1155
+ size = response.request.method
1156
+ size += str(response.request.url)
1157
+ size += "\r\n".join(f"{k}{v}" for k, v in response.request.headers.items())
1158
+ size += response.request.content.decode("utf-8") or ""
1159
+ return get_human_readable_size(len(size.encode("utf-8")))
1160
+
1161
+
1162
+ app = cyclopts.App(default_parameter=cyclopts.Parameter(negative=()))
1163
+
1164
+
1165
+ @app.default
1166
+ def main(
1167
+ config_file: Annotated[
1168
+ Path | None, cyclopts.Parameter(group="Job Configuration Parameters")
1169
+ ] = None,
1170
+ *,
1171
+ gateway_url: Annotated[
1172
+ str | None,
1173
+ cyclopts.Parameter(
1174
+ env_var="FOLIO_GATEWAY_URL",
1175
+ show_env_var=True,
1176
+ group="FOLIO Connection Parameters",
1177
+ ),
1178
+ ] = None,
1179
+ tenant_id: Annotated[
1180
+ str | None,
1181
+ cyclopts.Parameter(
1182
+ env_var="FOLIO_TENANT_ID", show_env_var=True, group="FOLIO Connection Parameters"
1183
+ ),
1184
+ ] = None,
1185
+ username: Annotated[
1186
+ str | None,
1187
+ cyclopts.Parameter(
1188
+ env_var="FOLIO_USERNAME", show_env_var=True, group="FOLIO Connection Parameters"
1189
+ ),
1190
+ ] = None,
1191
+ password: Annotated[
1192
+ str | None,
1193
+ cyclopts.Parameter(
1194
+ env_var="FOLIO_PASSWORD", show_env_var=True, group="FOLIO Connection Parameters"
1195
+ ),
1196
+ ] = None,
1197
+ member_tenant_id: Annotated[
1198
+ str | None,
1199
+ cyclopts.Parameter(
1200
+ env_var="FOLIO_MEMBER_TENANT_ID",
1201
+ show_env_var=True,
1202
+ group="FOLIO Connection Parameters",
1203
+ ),
1204
+ ] = None,
1205
+ object_type: Annotated[
1206
+ Literal["Instances", "Holdings", "Items", "ShadowInstances"] | None,
1207
+ cyclopts.Parameter(group="Job Configuration Parameters"),
1208
+ ] = None,
1209
+ file_paths: Annotated[
1210
+ tuple[Path, ...] | None,
1211
+ cyclopts.Parameter(
1212
+ name=["--file-paths", "--file-path"],
1213
+ help="Path(s) to JSONL file(s). Accepts multiple values and glob patterns.",
1214
+ group="Job Configuration Parameters",
1215
+ ),
1216
+ ] = None,
1217
+ batch_size: Annotated[
1218
+ int,
1219
+ cyclopts.Parameter(group="Job Configuration Parameters"),
1220
+ ] = 100,
1221
+ upsert: Annotated[
1222
+ bool,
1223
+ cyclopts.Parameter(group="Job Configuration Parameters"),
1224
+ ] = False,
1225
+ preserve_statistical_codes: Annotated[
1226
+ bool,
1227
+ cyclopts.Parameter(group="Job Configuration Parameters: --upsert options"),
1228
+ ] = False,
1229
+ preserve_administrative_notes: Annotated[
1230
+ bool,
1231
+ cyclopts.Parameter(group="Job Configuration Parameters: --upsert options"),
1232
+ ] = False,
1233
+ preserve_temporary_locations: Annotated[
1234
+ bool,
1235
+ cyclopts.Parameter(group="Job Configuration Parameters: --upsert options"),
1236
+ ] = False,
1237
+ preserve_temporary_loan_types: Annotated[
1238
+ bool,
1239
+ cyclopts.Parameter(group="Job Configuration Parameters: --upsert options"),
1240
+ ] = False,
1241
+ overwrite_item_status: Annotated[
1242
+ bool,
1243
+ cyclopts.Parameter(group="Job Configuration Parameters: --upsert options"),
1244
+ ] = False,
1245
+ patch_existing_records: Annotated[
1246
+ bool,
1247
+ cyclopts.Parameter(group="Job Configuration Parameters: --upsert options"),
1248
+ ] = False,
1249
+ patch_paths: Annotated[
1250
+ str | None,
1251
+ cyclopts.Parameter(
1252
+ help=(
1253
+ "Comma-separated list of field paths to patch during upsert (e.g., barcode,status)"
1254
+ ),
1255
+ group="Job Configuration Parameters: --upsert options",
1256
+ ),
1257
+ ] = None,
1258
+ failed_records_file: Annotated[
1259
+ Path | None,
1260
+ cyclopts.Parameter(group="Job Configuration Parameters"),
1261
+ ] = None,
1262
+ rerun_failed_records: Annotated[
1263
+ bool,
1264
+ cyclopts.Parameter(
1265
+ help="After the main run, reprocess failed records one at a time.",
1266
+ group="Job Configuration Parameters",
1267
+ ),
1268
+ ] = False,
1269
+ no_progress: Annotated[
1270
+ bool,
1271
+ cyclopts.Parameter(group="Job Configuration Parameters"),
1272
+ ] = False,
1273
+ debug: Annotated[
1274
+ bool,
1275
+ cyclopts.Parameter(
1276
+ name=["--debug"], group="General Parameters", help="Enable debug logging"
1277
+ ),
1278
+ ] = False,
1279
+ ) -> None:
1280
+ """
1281
+ Command-line interface to batch post inventory records to FOLIO
1282
+
1283
+ Parameters:
1284
+ config_file: Path to JSON config file (overrides CLI parameters).
1285
+ gateway_url: The FOLIO API Gateway URL.
1286
+ tenant_id: The tenant id.
1287
+ username: The FOLIO username.
1288
+ password: The FOLIO password.
1289
+ member_tenant_id: The FOLIO ECS member tenant id (if applicable).
1290
+ object_type: Type of inventory object (Instances, Holdings, or Items).
1291
+ file_paths: Path(s) to JSONL file(s) to post.
1292
+ batch_size: Number of records to include in each batch (1-1000).
1293
+ upsert: Enable upsert mode to update existing records.
1294
+ preserve_statistical_codes: Preserve existing statistical codes during upsert.
1295
+ preserve_administrative_notes: Preserve existing administrative notes during upsert.
1296
+ preserve_temporary_locations: Preserve temporary location assignments during upsert.
1297
+ preserve_temporary_loan_types: Preserve temporary loan type assignments during upsert.
1298
+ overwrite_item_status: Overwrite item status during upsert.
1299
+ patch_existing_records: Enable selective field patching during upsert.
1300
+ patch_paths: Comma-separated list of field paths to patch.
1301
+ failed_records_file: Path to file for writing failed records.
1302
+ rerun_failed_records: After the main run, reprocess failed records one at a time.
1303
+ no_progress: Disable progress bar display.
1304
+ debug: Enable debug logging.
1305
+ """
1306
+ set_up_cli_logging(logger, "folio_batch_poster", debug)
1307
+
1308
+ gateway_url, tenant_id, username, password = get_folio_connection_parameters(
1309
+ gateway_url, tenant_id, username, password
1310
+ )
1311
+ folio_client = folioclient.FolioClient(gateway_url, tenant_id, username, password)
1312
+
1313
+ if member_tenant_id:
1314
+ folio_client.tenant_id = member_tenant_id
1315
+
1316
+ # Handle file path expansion
1317
+ expanded_file_paths = expand_file_paths(file_paths)
1318
+
1319
+ expanded_file_paths.sort()
1320
+
1321
+ # Parse patch_paths if provided
1322
+ patch_paths_list = parse_patch_paths(patch_paths)
1323
+
1324
+ # Validate rerun_failed_records requires failed_records_file
1325
+ if rerun_failed_records and not failed_records_file:
1326
+ logger.critical("--rerun-failed-records requires --failed-records-file to be set")
1327
+ sys.exit(1)
1328
+
1329
+ try:
1330
+ if config_file:
1331
+ config, files_to_process = parse_config_file(config_file)
1332
+ else:
1333
+ if not object_type:
1334
+ logger.critical("--object-type is required when not using a config file")
1335
+ sys.exit(1)
1336
+
1337
+ if not expanded_file_paths:
1338
+ logger.critical("No files found to process. Exiting.")
1339
+ sys.exit(1)
1340
+
1341
+ config = BatchPoster.Config(
1342
+ object_type=object_type,
1343
+ batch_size=batch_size,
1344
+ upsert=upsert,
1345
+ preserve_statistical_codes=preserve_statistical_codes,
1346
+ preserve_administrative_notes=preserve_administrative_notes,
1347
+ preserve_temporary_locations=preserve_temporary_locations,
1348
+ preserve_temporary_loan_types=preserve_temporary_loan_types,
1349
+ preserve_item_status=not overwrite_item_status,
1350
+ patch_existing_records=patch_existing_records,
1351
+ patch_paths=patch_paths_list,
1352
+ rerun_failed_records=rerun_failed_records,
1353
+ no_progress=no_progress,
1354
+ )
1355
+ files_to_process = expanded_file_paths
1356
+
1357
+ logger.info(f"Processing {len(files_to_process)} file(s)")
1358
+ asyncio.run(run_batch_poster(folio_client, config, files_to_process, failed_records_file))
1359
+
1360
+ except Exception as e:
1361
+ logger.critical(f"An error occurred: {e}", exc_info=True)
1362
+ sys.exit(1)
1363
+
1364
+
1365
+ def parse_config_file(config_file):
1366
+ with open(config_file, "r") as f:
1367
+ config_data = json.load(f)
1368
+ # Convert file_paths if present in config
1369
+ if "file_paths" in config_data:
1370
+ config_data["file_paths"] = [Path(p) for p in config_data["file_paths"]]
1371
+ config = BatchPoster.Config(**config_data)
1372
+ files_to_process = config_data.get("file_paths", [])
1373
+ return config, files_to_process
1374
+
1375
+
1376
+ def parse_patch_paths(patch_paths):
1377
+ patch_paths_list = None
1378
+ if patch_paths:
1379
+ patch_paths_list = [p.strip() for p in patch_paths.split(",") if p.strip()]
1380
+ return patch_paths_list
1381
+
1382
+
1383
+ def expand_file_paths(file_paths):
1384
+ expanded_paths: List[Path] = []
1385
+ if file_paths:
1386
+ for file_path in file_paths:
1387
+ file_path_str = str(file_path)
1388
+ if any(char in file_path_str for char in ["*", "?", "["]):
1389
+ # It's a glob pattern - expand it
1390
+ expanded = glob_module.glob(file_path_str)
1391
+ expanded_paths.extend([Path(x) for x in expanded])
1392
+ else:
1393
+ # It's a regular path
1394
+ expanded_paths.append(file_path)
1395
+ return expanded_paths
1396
+
1397
+
1398
+ async def run_batch_poster(
1399
+ folio_client: FolioClient,
1400
+ config: "BatchPoster.Config",
1401
+ files_to_process: List[Path],
1402
+ failed_records_file: Path | None,
1403
+ ):
1404
+ """
1405
+ Run the batch poster operation.
1406
+
1407
+ Args:
1408
+ folio_client: Authenticated FOLIO client
1409
+ config: BatchPoster configuration
1410
+ files_to_process: List of file paths to process
1411
+ failed_records_file: Optional path for failed records
1412
+ """
1413
+ async with folio_client:
1414
+ try:
1415
+ # Create progress reporter
1416
+ reporter = (
1417
+ NoOpProgressReporter()
1418
+ if config.no_progress
1419
+ else RichProgressReporter(show_speed=True, show_time=True)
1420
+ )
1421
+
1422
+ poster = BatchPoster(
1423
+ folio_client, config, failed_records_file=failed_records_file, reporter=reporter
1424
+ )
1425
+ async with poster:
1426
+ await poster.do_work(files_to_process)
1427
+
1428
+ # If rerun_failed_records is enabled and there are failures, reprocess them
1429
+ if config.rerun_failed_records and poster.stats.records_failed > 0:
1430
+ await poster.rerun_failed_records_one_by_one()
1431
+
1432
+ log_final_stats(poster)
1433
+
1434
+ except Exception as e:
1435
+ logger.critical(f"Batch posting failed: {e}", exc_info=True)
1436
+ raise
1437
+
1438
+
1439
+ def log_final_stats(poster: BatchPoster) -> None:
1440
+ """
1441
+ Log the final statistics after batch posting.
1442
+
1443
+ Args:
1444
+ poster: The BatchPoster instance containing the stats
1445
+ """
1446
+ # Log final statistics
1447
+ logger.info("=" * 60)
1448
+ logger.info("Batch posting complete!")
1449
+ logger.info("=" * 60)
1450
+ total_processed = poster.stats.records_processed
1451
+ logger.info("Total records processed: %d", total_processed)
1452
+ logger.info("Records posted successfully: %d", poster.stats.records_posted)
1453
+ logger.info("Records created: %d", poster.stats.records_created)
1454
+ logger.info("Records updated: %d", poster.stats.records_updated)
1455
+ logger.info("Records failed: %d", poster.stats.records_failed)
1456
+ logger.info("Total batches posted: %d", poster.stats.batches_posted)
1457
+ logger.info("Total batches failed: %d", poster.stats.batches_failed)
1458
+ if poster.config.rerun_failed_records:
1459
+ logger.info("Rerun succeeded: %d", poster.stats.rerun_succeeded)
1460
+ logger.info("Rerun still failed: %d", poster.stats.rerun_still_failed)
1461
+ if poster._failed_records_path:
1462
+ logger.info("Failed records written to: %s", poster._failed_records_path)
1463
+
1464
+
1465
+ if __name__ == "__main__":
1466
+ app()