folio-data-import 0.5.0b3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,737 @@
1
+ """Progress reporting abstraction for FOLIO data import tasks.
2
+
3
+ This module provides a UI-agnostic progress reporting system that can be used
4
+ across all import tasks (BatchPoster, UserImport, MARCDataImport, etc.) with
5
+ support for multiple simultaneous tasks and easy backend swapping.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import logging
12
+ from abc import ABC, abstractmethod
13
+ from datetime import datetime, timezone
14
+ from enum import Enum
15
+ from typing import Any, Protocol
16
+ from uuid import uuid4
17
+
18
+ from rich.progress import (
19
+ BarColumn,
20
+ MofNCompleteColumn,
21
+ Progress,
22
+ ProgressColumn,
23
+ SpinnerColumn,
24
+ Task,
25
+ TimeElapsedColumn,
26
+ TimeRemainingColumn,
27
+ )
28
+ from rich.text import Text
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+ # Optional Redis support
33
+ try:
34
+ import redis
35
+
36
+ REDIS_AVAILABLE = True
37
+ except ImportError:
38
+ REDIS_AVAILABLE = False
39
+ redis = None # type: ignore[assignment]
40
+
41
+
42
+ # =============================================================================
43
+ # Core Abstractions
44
+ # =============================================================================
45
+
46
+
47
+ class TaskStatus(Enum):
48
+ """Status of a progress task."""
49
+
50
+ PENDING = "pending"
51
+ RUNNING = "running"
52
+ COMPLETED = "completed"
53
+ FAILED = "failed"
54
+
55
+
56
+ class ProgressReporter(Protocol):
57
+ """Protocol defining the interface for progress reporters.
58
+
59
+ This protocol allows for easy swapping between different UI implementations
60
+ (CLI, GUI, web) without changing the core business logic.
61
+ """
62
+
63
+ def start_task(
64
+ self,
65
+ name: str,
66
+ total: int | None = None,
67
+ description: str | None = None,
68
+ ) -> str:
69
+ """Start a new progress task.
70
+
71
+ Args:
72
+ name: Unique identifier for the task
73
+ total: Total number of items to process (None for indeterminate)
74
+ description: Human-readable task description
75
+
76
+ Returns:
77
+ Task ID that can be used to update this task
78
+ """
79
+ ...
80
+
81
+ def update_task(
82
+ self,
83
+ task_id: str,
84
+ advance: int = 0,
85
+ total: int | None = None,
86
+ description: str | None = None,
87
+ **stats: Any,
88
+ ) -> None:
89
+ """Update an existing task's progress and statistics.
90
+
91
+ Args:
92
+ task_id: ID of the task to update
93
+ advance: Number of items to advance by
94
+ total: New total (if changed)
95
+ description: New description (if changed)
96
+ **stats: Additional statistics to track (created, updated, failed, etc.)
97
+ """
98
+ ...
99
+
100
+ def finish_task(self, task_id: str, status: TaskStatus = TaskStatus.COMPLETED) -> None:
101
+ """Mark a task as finished.
102
+
103
+ Args:
104
+ task_id: ID of the task to finish
105
+ status: Final status of the task
106
+ """
107
+ ...
108
+
109
+ def is_active(self) -> bool:
110
+ """Check if progress reporting is active."""
111
+ ...
112
+
113
+
114
+ class BaseProgressReporter(ABC):
115
+ """Abstract base class for progress reporters.
116
+
117
+ Provides common functionality and enforces the interface contract.
118
+ """
119
+
120
+ def __init__(self, enabled: bool = True) -> None:
121
+ """Initialize the progress reporter.
122
+
123
+ Args:
124
+ enabled: Whether progress reporting is enabled
125
+ """
126
+ self._enabled = enabled
127
+ self._tasks: dict[str, dict[str, Any]] = {}
128
+ self._active = False
129
+
130
+ @abstractmethod
131
+ def start_task(
132
+ self,
133
+ name: str,
134
+ total: int | None = None,
135
+ description: str | None = None,
136
+ ) -> str:
137
+ """Start a new progress task."""
138
+ pass
139
+
140
+ @abstractmethod
141
+ def update_task(
142
+ self,
143
+ task_id: str,
144
+ advance: int = 0,
145
+ total: int | None = None,
146
+ description: str | None = None,
147
+ **stats: Any,
148
+ ) -> None:
149
+ """Update an existing task."""
150
+ pass
151
+
152
+ @abstractmethod
153
+ def finish_task(self, task_id: str, status: TaskStatus = TaskStatus.COMPLETED) -> None:
154
+ """Finish a task."""
155
+ pass
156
+
157
+ def is_active(self) -> bool:
158
+ """Check if reporter is active."""
159
+ return self._enabled and self._active
160
+
161
+ def get_stats(self, task_id: str) -> dict[str, Any] | None:
162
+ """Get statistics for a task."""
163
+ return self._tasks.get(task_id)
164
+
165
+ def _update_stat_dict(self, stat_dict: dict[str, Any], stats: dict[str, Any]) -> None:
166
+ """Update a statistics dictionary with new values.
167
+
168
+ Args:
169
+ stat_dict: The dictionary to update
170
+ stats: Statistics to add (values are accumulated)
171
+ """
172
+ for key, value in stats.items():
173
+ if key in stat_dict:
174
+ stat_dict[key] += value
175
+ else:
176
+ stat_dict[key] = value
177
+
178
+ @abstractmethod
179
+ def __enter__(self) -> BaseProgressReporter:
180
+ """Enter context manager."""
181
+ pass
182
+
183
+ @abstractmethod
184
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
185
+ """Exit context manager."""
186
+ pass
187
+
188
+
189
+ # =============================================================================
190
+ # Rich CLI Implementation
191
+ # =============================================================================
192
+
193
+
194
+ class ItemsPerSecondColumn(ProgressColumn):
195
+ """Renders the speed in items per second."""
196
+
197
+ def render(self, task: Task) -> Text:
198
+ if task.speed is None:
199
+ return Text("?", style="progress.data.speed")
200
+ return Text(f"{task.speed:.0f}rec/s", style="progress.data.speed")
201
+
202
+
203
+ class UserStatsColumn(ProgressColumn):
204
+ def render(self, task: Task) -> Text:
205
+ created = task.fields.get("created", 0)
206
+ updated = task.fields.get("updated", 0)
207
+ failed = task.fields.get("failed", 0)
208
+ created_string = f"Created: {created}"
209
+ updated_string = f"Updated: {updated}"
210
+ failed_string = f"Failed: {failed}"
211
+ text = Text("(")
212
+ text.append(created_string, style="green")
213
+ text.append(" | ")
214
+ text.append(updated_string, style="cyan")
215
+ text.append(" | ")
216
+ text.append(failed_string, style="red")
217
+ text.append(")")
218
+ return text
219
+
220
+
221
+ class BatchPosterStatsColumn(ProgressColumn):
222
+ """Renders statistics for batch posting operations."""
223
+
224
+ def render(self, task: Task) -> Text:
225
+ posted = task.fields.get("posted", 0)
226
+ created = task.fields.get("created", 0)
227
+ updated = task.fields.get("updated", 0)
228
+ failed = task.fields.get("failed", 0)
229
+ posted_string = f"Posted: {posted}"
230
+ created_string = f"Created: {created}"
231
+ updated_string = f"Updated: {updated}"
232
+ failed_string = f"Failed: {failed}"
233
+ text = Text("(")
234
+ text.append(posted_string, style="bright_green")
235
+ text.append(" | ")
236
+ text.append(created_string, style="green")
237
+ text.append(" | ")
238
+ text.append(updated_string, style="cyan")
239
+ text.append(" | ")
240
+ text.append(failed_string, style="red")
241
+ text.append(")")
242
+ return text
243
+
244
+
245
+ class GenericStatsColumn(ProgressColumn):
246
+ """Renders generic statistics for any task.
247
+
248
+ The stat_configs class attribute can be customized by subclassing or
249
+ direct assignment to change which stats are displayed and their styling.
250
+
251
+ Example:
252
+ # Customize via subclass
253
+ class CustomStatsColumn(GenericStatsColumn):
254
+ stat_configs = [
255
+ ("imported", "Imported", "bright_blue"),
256
+ ("skipped", "Skipped", "yellow"),
257
+ ("failed", "Failed", "red"),
258
+ ]
259
+
260
+ # Or modify directly
261
+ GenericStatsColumn.stat_configs.append(("custom_stat", "Custom", "magenta"))
262
+ """
263
+
264
+ stat_configs: list[tuple[str, str, str]] = [
265
+ ("posted", "Posted", "bright_green"),
266
+ ("created", "Created", "green"),
267
+ ("updated", "Updated", "cyan"),
268
+ ("failed", "Failed", "red"),
269
+ ("processed", "Processed", "blue"),
270
+ ]
271
+
272
+ def render(self, task: Task) -> Text:
273
+ """Render statistics based on configured stats."""
274
+ stats_parts = []
275
+
276
+ for key, label, style in self.stat_configs:
277
+ if key in task.fields and task.fields[key] > 0:
278
+ stats_parts.append((f"{label}: {task.fields[key]}", style))
279
+
280
+ if not stats_parts:
281
+ return Text("")
282
+
283
+ text = Text("(")
284
+ for i, (stat_text, style) in enumerate(stats_parts):
285
+ if i > 0:
286
+ text.append(" | ")
287
+ text.append(stat_text, style=style)
288
+ text.append(")")
289
+ return text
290
+
291
+
292
+ class RichProgressReporter(BaseProgressReporter):
293
+ """Rich terminal-based progress reporter.
294
+
295
+ Provides a beautiful CLI progress display using the Rich library with
296
+ support for multiple simultaneous tasks, live updates, and logging.
297
+ """
298
+
299
+ def __init__(
300
+ self,
301
+ enabled: bool = True,
302
+ show_speed: bool = True,
303
+ show_time: bool = True,
304
+ ) -> None:
305
+ """Initialize the Rich progress reporter.
306
+
307
+ Args:
308
+ enabled: Whether progress reporting is enabled
309
+ show_speed: Whether to show items/second
310
+ show_time: Whether to show elapsed/remaining time
311
+ """
312
+ super().__init__(enabled)
313
+ self._show_speed = show_speed
314
+ self._show_time = show_time
315
+ self._progress: Progress | None = None
316
+ self._task_map: dict[str, Any] = {} # Maps task names to Rich TaskIDs
317
+
318
+ def _create_progress(self) -> Any:
319
+ """Create a Rich Progress instance with appropriate columns."""
320
+ columns: list[Any] = [SpinnerColumn(), "[progress.description]{task.description}"]
321
+
322
+ columns.append(BarColumn())
323
+ columns.append(MofNCompleteColumn())
324
+
325
+ if self._show_speed:
326
+ columns.append(ItemsPerSecondColumn())
327
+
328
+ if self._show_time:
329
+ columns.append(TimeElapsedColumn())
330
+ columns.append(TimeRemainingColumn())
331
+
332
+ columns.append(GenericStatsColumn())
333
+
334
+ return Progress(*columns)
335
+
336
+ def start_task(
337
+ self,
338
+ name: str,
339
+ total: int | None = None,
340
+ description: str | None = None,
341
+ ) -> str:
342
+ """Start a new progress task."""
343
+ if not self._enabled or self._progress is None:
344
+ return name
345
+
346
+ desc = description or name
347
+ task_id = self._progress.add_task(desc, total=total or 100)
348
+ self._task_map[name] = task_id
349
+ self._tasks[name] = {"total": total or 0, "completed": 0}
350
+
351
+ return name
352
+
353
+ def update_task(
354
+ self,
355
+ task_id: str,
356
+ advance: int = 0,
357
+ total: int | None = None,
358
+ description: str | None = None,
359
+ **stats: Any,
360
+ ) -> None:
361
+ """Update an existing task."""
362
+ if not self._enabled or self._progress is None or task_id not in self._task_map:
363
+ return
364
+
365
+ rich_task_id = self._task_map[task_id]
366
+
367
+ # Build update kwargs
368
+ update_kwargs = self._build_update_kwargs(advance, total, description, stats)
369
+
370
+ if update_kwargs:
371
+ self._progress.update(rich_task_id, **update_kwargs)
372
+
373
+ # Update our internal stats
374
+ self._update_internal_stats(task_id, advance, total, stats)
375
+
376
+ def _build_update_kwargs(
377
+ self,
378
+ advance: int,
379
+ total: int | None,
380
+ description: str | None,
381
+ stats: dict[str, Any],
382
+ ) -> dict[str, Any]:
383
+ """Build kwargs dict for progress update."""
384
+ update_kwargs: dict[str, Any] = {}
385
+
386
+ if advance > 0:
387
+ update_kwargs["advance"] = advance
388
+ if total is not None:
389
+ update_kwargs["total"] = total
390
+ if description is not None:
391
+ update_kwargs["description"] = description
392
+
393
+ # Add statistics as fields
394
+ update_kwargs.update(stats)
395
+
396
+ return update_kwargs
397
+
398
+ def _update_internal_stats(
399
+ self,
400
+ task_id: str,
401
+ advance: int,
402
+ total: int | None,
403
+ stats: dict[str, Any],
404
+ ) -> None:
405
+ """Update internal task statistics."""
406
+ if task_id not in self._tasks:
407
+ return
408
+
409
+ if advance > 0:
410
+ self._tasks[task_id]["completed"] += advance
411
+ if total is not None:
412
+ self._tasks[task_id]["total"] = total
413
+
414
+ # Update any additional stats
415
+ self._update_stat_dict(self._tasks[task_id], stats)
416
+
417
+ def finish_task(self, task_id: str, status: TaskStatus = TaskStatus.COMPLETED) -> None:
418
+ """Finish a task."""
419
+ if not self._enabled or self._progress is None or task_id not in self._task_map:
420
+ return
421
+
422
+ rich_task_id = self._task_map[task_id]
423
+
424
+ if status == TaskStatus.COMPLETED:
425
+ # Mark as complete
426
+ task = self._progress.tasks[rich_task_id]
427
+ if task.total and task.completed < task.total:
428
+ self._progress.update(rich_task_id, completed=task.total)
429
+ elif status == TaskStatus.FAILED:
430
+ # Could add visual indicator for failed tasks
431
+ self._progress.update(rich_task_id, description=f"[red]✗ {task.description}[/red]")
432
+
433
+ def __enter__(self) -> RichProgressReporter:
434
+ """Enter context manager."""
435
+ if not self._enabled:
436
+ self._active = True
437
+ return self
438
+
439
+ self._progress = self._create_progress()
440
+ self._progress.start()
441
+ self._active = True
442
+
443
+ return self
444
+
445
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
446
+ """Exit context manager."""
447
+ self._active = False
448
+
449
+ if self._progress:
450
+ self._progress.stop()
451
+ self._progress = None
452
+
453
+ self._task_map.clear()
454
+
455
+
456
+ class RedisProgressReporter(BaseProgressReporter):
457
+ """Progress reporter that stores state in Redis for distributed access.
458
+
459
+ Stores progress updates in Redis that can be accessed by separate processes
460
+ or API endpoints. Requires redis package to be installed.
461
+
462
+ Example:
463
+ >>> reporter = RedisProgressReporter(
464
+ ... redis_url="redis://localhost:6379",
465
+ ... session_id="import-123"
466
+ ... )
467
+ >>> with reporter:
468
+ ... task_id = reporter.start_task("users", total=100)
469
+ ... reporter.update_task(task_id, advance=10, created=5)
470
+ """
471
+
472
+ def __init__(
473
+ self,
474
+ enabled: bool = True,
475
+ redis_url: str = "redis://localhost:6379",
476
+ session_id: str | None = None,
477
+ ttl: int = 3600,
478
+ ) -> None:
479
+ """Initialize the Redis progress reporter.
480
+
481
+ Args:
482
+ enabled: Whether progress reporting is enabled
483
+ redis_url: Redis connection URL
484
+ session_id: Unique identifier for this progress session
485
+ ttl: Time-to-live for session data in seconds (default: 1 hour)
486
+
487
+ Raises:
488
+ ImportError: If redis package is not installed
489
+ """
490
+ super().__init__(enabled)
491
+
492
+ if not REDIS_AVAILABLE:
493
+ raise ImportError(
494
+ "Redis support requires the redis package. Install with optional "
495
+ "dependency: folio-data-import[redis]"
496
+ )
497
+
498
+ self._session_id = session_id or self._generate_session_id()
499
+ self._ttl = ttl
500
+ self._redis = redis.from_url(redis_url, decode_responses=True)
501
+ self._session_key = f"progress:session:{self._session_id}"
502
+
503
+ # Initialize session in Redis
504
+ if self._enabled:
505
+ self._init_session()
506
+
507
+ def _generate_session_id(self) -> str:
508
+ """Generate a unique session ID."""
509
+ return str(uuid4())
510
+
511
+ def _init_session(self) -> None:
512
+ """Initialize session data in Redis."""
513
+ session_data = {
514
+ "session_id": self._session_id,
515
+ "status": "pending",
516
+ "tasks": {},
517
+ "created_at": self._get_timestamp(),
518
+ "updated_at": self._get_timestamp(),
519
+ }
520
+
521
+ self._redis.set(self._session_key, json.dumps(session_data), ex=self._ttl)
522
+
523
+ def start_task(
524
+ self,
525
+ name: str,
526
+ total: int | None = None,
527
+ description: str | None = None,
528
+ ) -> str:
529
+ """Start a new progress task."""
530
+ self._tasks[name] = {"total": total or 0, "completed": 0}
531
+
532
+ if self._enabled:
533
+ # Get current session
534
+ session_data = json.loads(self._redis.get(self._session_key) or "{}")
535
+
536
+ # Add new task
537
+ session_data["tasks"][name] = {
538
+ "name": name,
539
+ "description": description or name,
540
+ "total": total,
541
+ "completed": 0,
542
+ "status": TaskStatus.RUNNING.value,
543
+ "stats": {},
544
+ "started_at": self._get_timestamp(),
545
+ }
546
+ session_data["updated_at"] = self._get_timestamp()
547
+ session_data["status"] = "running"
548
+
549
+ # Update Redis
550
+ self._redis.set(self._session_key, json.dumps(session_data), ex=self._ttl)
551
+
552
+ return name
553
+
554
+ def update_task(
555
+ self,
556
+ task_id: str,
557
+ advance: int = 0,
558
+ total: int | None = None,
559
+ description: str | None = None,
560
+ **stats: Any,
561
+ ) -> None:
562
+ """Update an existing task."""
563
+ if not self._enabled or task_id not in self._tasks:
564
+ return
565
+
566
+ # Update internal stats
567
+ if advance > 0:
568
+ self._tasks[task_id]["completed"] += advance
569
+ if total is not None:
570
+ self._tasks[task_id]["total"] = total
571
+
572
+ # Update any additional stats
573
+ self._update_stat_dict(self._tasks[task_id], stats)
574
+
575
+ # Update Redis
576
+ session_data = json.loads(self._redis.get(self._session_key) or "{}")
577
+
578
+ if task_id not in session_data.get("tasks", {}):
579
+ return
580
+
581
+ task_data = session_data["tasks"][task_id]
582
+ task_data["completed"] = self._tasks[task_id]["completed"]
583
+
584
+ if total is not None:
585
+ task_data["total"] = total
586
+ if description is not None:
587
+ task_data["description"] = description
588
+
589
+ # Update statistics
590
+ self._update_stat_dict(task_data["stats"], stats)
591
+
592
+ # Calculate progress percentage
593
+ if task_data["total"] and task_data["total"] > 0:
594
+ task_data["progress_percent"] = (task_data["completed"] / task_data["total"]) * 100
595
+ else:
596
+ task_data["progress_percent"] = 0
597
+
598
+ session_data["updated_at"] = self._get_timestamp()
599
+
600
+ # Update Redis
601
+ self._redis.set(self._session_key, json.dumps(session_data), ex=self._ttl)
602
+
603
+ def finish_task(self, task_id: str, status: TaskStatus = TaskStatus.COMPLETED) -> None:
604
+ """Finish a task."""
605
+ if not self._enabled or task_id not in self._tasks:
606
+ return
607
+
608
+ session_data = json.loads(self._redis.get(self._session_key) or "{}")
609
+
610
+ if task_id not in session_data.get("tasks", {}):
611
+ return
612
+
613
+ task_data = session_data["tasks"][task_id]
614
+ task_data["status"] = status.value
615
+ task_data["finished_at"] = self._get_timestamp()
616
+
617
+ # Check if all tasks are complete
618
+ all_complete = all(
619
+ t["status"] in [TaskStatus.COMPLETED.value, TaskStatus.FAILED.value]
620
+ for t in session_data["tasks"].values()
621
+ )
622
+ if all_complete:
623
+ session_data["status"] = "completed"
624
+
625
+ session_data["updated_at"] = self._get_timestamp()
626
+
627
+ # Update Redis
628
+ self._redis.set(self._session_key, json.dumps(session_data), ex=self._ttl)
629
+
630
+ def _get_timestamp(self) -> str:
631
+ """Get current timestamp."""
632
+ return datetime.now(timezone.utc).isoformat()
633
+
634
+ @classmethod
635
+ def get_session(
636
+ cls,
637
+ session_id: str,
638
+ redis_url: str = "redis://localhost:6379",
639
+ ) -> dict[str, Any] | None:
640
+ """Get the current state of a session from Redis.
641
+
642
+ Args:
643
+ session_id: The session ID to retrieve
644
+ redis_url: Redis connection URL
645
+
646
+ Returns:
647
+ Session data dictionary or None if not found
648
+
649
+ Raises:
650
+ ImportError: If redis package is not installed
651
+ """
652
+ if not REDIS_AVAILABLE:
653
+ raise ImportError(
654
+ "Redis support requires the redis package. Install it with: pip install redis"
655
+ )
656
+
657
+ r = redis.from_url(redis_url, decode_responses=True)
658
+ session_key = f"progress:session:{session_id}"
659
+ data = r.get(session_key)
660
+
661
+ return json.loads(data) if data else None
662
+
663
+ @classmethod
664
+ def delete_session(
665
+ cls,
666
+ session_id: str,
667
+ redis_url: str = "redis://localhost:6379",
668
+ ) -> bool:
669
+ """Delete a session from Redis.
670
+
671
+ Args:
672
+ session_id: The session ID to delete
673
+ redis_url: Redis connection URL
674
+
675
+ Returns:
676
+ True if deleted, False if not found
677
+
678
+ Raises:
679
+ ImportError: If redis package is not installed
680
+ """
681
+ if not REDIS_AVAILABLE:
682
+ raise ImportError(
683
+ "Redis support requires the redis package. Install it with: pip install redis"
684
+ )
685
+
686
+ r = redis.from_url(redis_url, decode_responses=True)
687
+ session_key = f"progress:session:{session_id}"
688
+ return bool(r.delete(session_key))
689
+
690
+ def __enter__(self) -> RedisProgressReporter:
691
+ """Enter context manager."""
692
+ self._active = True
693
+ return self
694
+
695
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
696
+ """Exit context manager."""
697
+ self._active = False
698
+
699
+
700
+ class NoOpProgressReporter(BaseProgressReporter):
701
+ """No-operation progress reporter for when progress display is disabled."""
702
+
703
+ def __init__(self) -> None:
704
+ """Initialize the no-op reporter."""
705
+ super().__init__(enabled=False)
706
+
707
+ def start_task(
708
+ self,
709
+ name: str,
710
+ total: int | None = None,
711
+ description: str | None = None,
712
+ ) -> str:
713
+ """Start a task (no-op)."""
714
+ return name
715
+
716
+ def update_task(
717
+ self,
718
+ task_id: str,
719
+ advance: int = 0,
720
+ total: int | None = None,
721
+ description: str | None = None,
722
+ **stats: Any,
723
+ ) -> None:
724
+ """Update a task (no-op)."""
725
+ pass
726
+
727
+ def finish_task(self, task_id: str, status: TaskStatus = TaskStatus.COMPLETED) -> None:
728
+ """Finish a task (no-op)."""
729
+ pass
730
+
731
+ def __enter__(self) -> NoOpProgressReporter:
732
+ """Enter context manager."""
733
+ return self
734
+
735
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
736
+ """Exit context manager."""
737
+ pass