cardiac-shared 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. cardiac_shared-0.3.0/LICENSE +21 -0
  2. cardiac_shared-0.3.0/PKG-INFO +183 -0
  3. cardiac_shared-0.3.0/README.md +136 -0
  4. cardiac_shared-0.3.0/cardiac_shared/__init__.py +127 -0
  5. cardiac_shared-0.3.0/cardiac_shared/batch/__init__.py +162 -0
  6. cardiac_shared-0.3.0/cardiac_shared/cache/__init__.py +133 -0
  7. cardiac_shared-0.3.0/cardiac_shared/config/__init__.py +154 -0
  8. cardiac_shared-0.3.0/cardiac_shared/environment/__init__.py +46 -0
  9. cardiac_shared-0.3.0/cardiac_shared/environment/runtime_detector.py +399 -0
  10. cardiac_shared-0.3.0/cardiac_shared/hardware/__init__.py +100 -0
  11. cardiac_shared-0.3.0/cardiac_shared/hardware/cpu_optimizer.py +425 -0
  12. cardiac_shared-0.3.0/cardiac_shared/hardware/detector.py +533 -0
  13. cardiac_shared-0.3.0/cardiac_shared/hardware/optimizer.py +377 -0
  14. cardiac_shared-0.3.0/cardiac_shared/hardware/profiles.py +286 -0
  15. cardiac_shared-0.3.0/cardiac_shared/io/__init__.py +28 -0
  16. cardiac_shared-0.3.0/cardiac_shared/io/dicom.py +173 -0
  17. cardiac_shared-0.3.0/cardiac_shared/io/nifti.py +152 -0
  18. cardiac_shared-0.3.0/cardiac_shared/io/zip_handler.py +174 -0
  19. cardiac_shared-0.3.0/cardiac_shared/parallel/__init__.py +21 -0
  20. cardiac_shared-0.3.0/cardiac_shared/parallel/processor.py +468 -0
  21. cardiac_shared-0.3.0/cardiac_shared/preprocessing/__init__.py +3 -0
  22. cardiac_shared-0.3.0/cardiac_shared/progress/__init__.py +17 -0
  23. cardiac_shared-0.3.0/cardiac_shared/progress/tracker.py +440 -0
  24. cardiac_shared-0.3.0/cardiac_shared/utils/__init__.py +3 -0
  25. cardiac_shared-0.3.0/cardiac_shared.egg-info/PKG-INFO +183 -0
  26. cardiac_shared-0.3.0/cardiac_shared.egg-info/SOURCES.txt +32 -0
  27. cardiac_shared-0.3.0/cardiac_shared.egg-info/dependency_links.txt +1 -0
  28. cardiac_shared-0.3.0/cardiac_shared.egg-info/requires.txt +22 -0
  29. cardiac_shared-0.3.0/cardiac_shared.egg-info/top_level.txt +1 -0
  30. cardiac_shared-0.3.0/pyproject.toml +91 -0
  31. cardiac_shared-0.3.0/setup.cfg +4 -0
  32. cardiac_shared-0.3.0/tests/test_environment.py +60 -0
  33. cardiac_shared-0.3.0/tests/test_hardware.py +87 -0
  34. cardiac_shared-0.3.0/tests/test_imports.py +78 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025-2026 Rong Zhu
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,183 @@
1
+ Metadata-Version: 2.4
2
+ Name: cardiac-shared
3
+ Version: 0.3.0
4
+ Summary: Shared utilities for cardiac imaging analysis - hardware detection, IO, environment detection
5
+ Author-email: Rong Zhu <zhurong0525@gmail.com>
6
+ Maintainer-email: Rong Zhu <zhurong0525@gmail.com>
7
+ License: MIT
8
+ Project-URL: Homepage, https://github.com/zhurong2020/cardiac-shared
9
+ Project-URL: Documentation, https://github.com/zhurong2020/cardiac-shared/blob/main/README.md
10
+ Project-URL: Repository, https://github.com/zhurong2020/cardiac-shared
11
+ Project-URL: Bug Tracker, https://github.com/zhurong2020/cardiac-shared/issues
12
+ Project-URL: Changelog, https://github.com/zhurong2020/cardiac-shared/blob/main/CHANGELOG.md
13
+ Keywords: cardiac,medical-imaging,dicom,nifti,ct-scan,hardware-detection,gpu,totalsegmentator
14
+ Classifier: Development Status :: 3 - Alpha
15
+ Classifier: Intended Audience :: Science/Research
16
+ Classifier: Intended Audience :: Healthcare Industry
17
+ Classifier: License :: OSI Approved :: MIT License
18
+ Classifier: Operating System :: OS Independent
19
+ Classifier: Programming Language :: Python :: 3
20
+ Classifier: Programming Language :: Python :: 3.8
21
+ Classifier: Programming Language :: Python :: 3.9
22
+ Classifier: Programming Language :: Python :: 3.10
23
+ Classifier: Programming Language :: Python :: 3.11
24
+ Classifier: Programming Language :: Python :: 3.12
25
+ Classifier: Topic :: Scientific/Engineering :: Medical Science Apps.
26
+ Requires-Python: >=3.8
27
+ Description-Content-Type: text/markdown
28
+ License-File: LICENSE
29
+ Requires-Dist: numpy<2.0,>=1.19.0
30
+ Requires-Dist: psutil>=5.8.0
31
+ Provides-Extra: dicom
32
+ Requires-Dist: pydicom>=2.0.0; extra == "dicom"
33
+ Provides-Extra: nifti
34
+ Requires-Dist: nibabel>=3.2.0; extra == "nifti"
35
+ Provides-Extra: gpu
36
+ Requires-Dist: torch>=1.9.0; extra == "gpu"
37
+ Provides-Extra: all
38
+ Requires-Dist: pydicom>=2.0.0; extra == "all"
39
+ Requires-Dist: nibabel>=3.2.0; extra == "all"
40
+ Requires-Dist: torch>=1.9.0; extra == "all"
41
+ Provides-Extra: dev
42
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
43
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
44
+ Requires-Dist: black>=22.0.0; extra == "dev"
45
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
46
+ Dynamic: license-file
47
+
48
+ # Cardiac Shared
49
+
50
+ Shared utilities for cardiac imaging analysis projects.
51
+
52
+ **Version**: 0.2.0
53
+
54
+ ## Installation
55
+
56
+ ```bash
57
+ # Install with all features
58
+ pip install -e ".[all]"
59
+
60
+ # Install with DICOM support only
61
+ pip install -e ".[dicom]"
62
+
63
+ # Install with NIfTI support only
64
+ pip install -e ".[nifti]"
65
+ ```
66
+
67
+ ## Modules
68
+
69
+ ### IO Module
70
+
71
+ | Function | Description |
72
+ |----------|-------------|
73
+ | `read_dicom_series(path)` | Read DICOM series from directory |
74
+ | `get_dicom_metadata(ds)` | Extract metadata from DICOM dataset |
75
+ | `load_nifti(path)` | Load NIfTI file with metadata |
76
+ | `save_nifti(volume, path)` | Save numpy array as NIfTI |
77
+ | `extract_zip(path)` | Context manager for ZIP extraction |
78
+ | `find_dicom_root(path)` | Find DICOM directory in extracted ZIP |
79
+
80
+ ### Hardware Module (v0.2.0)
81
+
82
+ | Function | Description |
83
+ |----------|-------------|
84
+ | `detect_hardware()` | Detect complete hardware info (GPU/CPU/RAM) |
85
+ | `HardwareInfo` | Dataclass with GPU, CPU, RAM, environment info |
86
+ | `print_hardware_summary(hw)` | Print formatted hardware summary |
87
+ | `get_optimal_config(hw)` | Get optimal inference configuration |
88
+ | `CPUOptimizer` | CPU optimization for hospital deployments |
89
+ | `apply_cpu_optimizations(config)` | Apply PyTorch CPU optimizations |
90
+
91
+ ### Environment Module (v0.2.0)
92
+
93
+ | Function | Description |
94
+ |----------|-------------|
95
+ | `detect_runtime()` | Detect runtime environment |
96
+ | `RuntimeEnvironment` | Dataclass with environment info |
97
+ | `detect_colab()` | Check if running in Google Colab |
98
+ | `detect_wsl()` | Check if running in WSL |
99
+ | `print_environment_summary(env)` | Print environment summary |
100
+
101
+ ## Usage
102
+
103
+ ### IO Operations
104
+
105
+ ```python
106
+ from cardiac_shared.io import read_dicom_series, load_nifti, extract_zip, find_dicom_root
107
+
108
+ # Read DICOM series
109
+ volume, metadata = read_dicom_series("/path/to/dicom/")
110
+
111
+ # Read NIfTI file
112
+ volume, metadata = load_nifti("/path/to/file.nii.gz")
113
+
114
+ # Extract ZIP and read DICOM
115
+ with extract_zip("/path/to/data.zip") as extracted_dir:
116
+ dicom_root = find_dicom_root(extracted_dir)
117
+ volume, metadata = read_dicom_series(dicom_root)
118
+ ```
119
+
120
+ ### Hardware Detection
121
+
122
+ ```python
123
+ from cardiac_shared import detect_hardware, print_hardware_summary
124
+
125
+ hw = detect_hardware()
126
+ print_hardware_summary(hw)
127
+
128
+ print(f"Performance Tier: {hw.performance_tier}")
129
+ print(f"Recommended Device: {hw.recommended_device}")
130
+ print(f"GPU Available: {hw.gpu.available}")
131
+ print(f"CPU Cores: {hw.cpu.physical_cores}")
132
+ ```
133
+
134
+ ### Environment Detection
135
+
136
+ ```python
137
+ from cardiac_shared import detect_runtime, print_environment_summary
138
+
139
+ env = detect_runtime()
140
+ print_environment_summary(env)
141
+
142
+ print(f"Runtime Type: {env.runtime_type}")
143
+ print(f"Is WSL: {env.is_wsl}")
144
+ print(f"Is Hospital Environment: {env.is_hospital_environment}")
145
+ ```
146
+
147
+ ### CPU Optimization (Hospital Deployment)
148
+
149
+ ```python
150
+ from cardiac_shared import detect_hardware, detect_runtime, CPUOptimizer
151
+
152
+ hw = detect_hardware()
153
+ env = detect_runtime()
154
+
155
+ if env.is_hospital_environment and not hw.gpu.available:
156
+ optimizer = CPUOptimizer()
157
+ config = optimizer.get_optimal_config()
158
+
159
+ print(f"CPU Tier: {config.tier.value}")
160
+ print(f"Recommended Workers: {config.num_workers}")
161
+ print(f"Batch Size: {config.batch_size}")
162
+
163
+ # Apply PyTorch optimizations
164
+ optimizer.apply_torch_optimizations(config)
165
+ ```
166
+
167
+ ## Projects Using This Package
168
+
169
+ - cardiac-ml-research (main project)
170
+ - ai-cac-research (CAC scoring research)
171
+ - pcfa (Pericardial Fat Analysis)
172
+ - vbca (Vertebra Body Composition Analysis)
173
+
174
+ ## Changelog
175
+
176
+ ### v0.2.0 (2026-01-01)
177
+ - Added `hardware` module (detector, cpu_optimizer)
178
+ - Added `environment` module (runtime_detector)
179
+ - Migrated from cardiac-ml-research/shared/
180
+
181
+ ### v0.1.0 (2026-01-01)
182
+ - Initial release
183
+ - IO modules: dicom, nifti, zip_handler
@@ -0,0 +1,136 @@
1
+ # Cardiac Shared
2
+
3
+ Shared utilities for cardiac imaging analysis projects.
4
+
5
+ **Version**: 0.2.0
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ # Install with all features
11
+ pip install -e ".[all]"
12
+
13
+ # Install with DICOM support only
14
+ pip install -e ".[dicom]"
15
+
16
+ # Install with NIfTI support only
17
+ pip install -e ".[nifti]"
18
+ ```
19
+
20
+ ## Modules
21
+
22
+ ### IO Module
23
+
24
+ | Function | Description |
25
+ |----------|-------------|
26
+ | `read_dicom_series(path)` | Read DICOM series from directory |
27
+ | `get_dicom_metadata(ds)` | Extract metadata from DICOM dataset |
28
+ | `load_nifti(path)` | Load NIfTI file with metadata |
29
+ | `save_nifti(volume, path)` | Save numpy array as NIfTI |
30
+ | `extract_zip(path)` | Context manager for ZIP extraction |
31
+ | `find_dicom_root(path)` | Find DICOM directory in extracted ZIP |
32
+
33
+ ### Hardware Module (v0.2.0)
34
+
35
+ | Function | Description |
36
+ |----------|-------------|
37
+ | `detect_hardware()` | Detect complete hardware info (GPU/CPU/RAM) |
38
+ | `HardwareInfo` | Dataclass with GPU, CPU, RAM, environment info |
39
+ | `print_hardware_summary(hw)` | Print formatted hardware summary |
40
+ | `get_optimal_config(hw)` | Get optimal inference configuration |
41
+ | `CPUOptimizer` | CPU optimization for hospital deployments |
42
+ | `apply_cpu_optimizations(config)` | Apply PyTorch CPU optimizations |
43
+
44
+ ### Environment Module (v0.2.0)
45
+
46
+ | Function | Description |
47
+ |----------|-------------|
48
+ | `detect_runtime()` | Detect runtime environment |
49
+ | `RuntimeEnvironment` | Dataclass with environment info |
50
+ | `detect_colab()` | Check if running in Google Colab |
51
+ | `detect_wsl()` | Check if running in WSL |
52
+ | `print_environment_summary(env)` | Print environment summary |
53
+
54
+ ## Usage
55
+
56
+ ### IO Operations
57
+
58
+ ```python
59
+ from cardiac_shared.io import read_dicom_series, load_nifti, extract_zip, find_dicom_root
60
+
61
+ # Read DICOM series
62
+ volume, metadata = read_dicom_series("/path/to/dicom/")
63
+
64
+ # Read NIfTI file
65
+ volume, metadata = load_nifti("/path/to/file.nii.gz")
66
+
67
+ # Extract ZIP and read DICOM
68
+ with extract_zip("/path/to/data.zip") as extracted_dir:
69
+ dicom_root = find_dicom_root(extracted_dir)
70
+ volume, metadata = read_dicom_series(dicom_root)
71
+ ```
72
+
73
+ ### Hardware Detection
74
+
75
+ ```python
76
+ from cardiac_shared import detect_hardware, print_hardware_summary
77
+
78
+ hw = detect_hardware()
79
+ print_hardware_summary(hw)
80
+
81
+ print(f"Performance Tier: {hw.performance_tier}")
82
+ print(f"Recommended Device: {hw.recommended_device}")
83
+ print(f"GPU Available: {hw.gpu.available}")
84
+ print(f"CPU Cores: {hw.cpu.physical_cores}")
85
+ ```
86
+
87
+ ### Environment Detection
88
+
89
+ ```python
90
+ from cardiac_shared import detect_runtime, print_environment_summary
91
+
92
+ env = detect_runtime()
93
+ print_environment_summary(env)
94
+
95
+ print(f"Runtime Type: {env.runtime_type}")
96
+ print(f"Is WSL: {env.is_wsl}")
97
+ print(f"Is Hospital Environment: {env.is_hospital_environment}")
98
+ ```
99
+
100
+ ### CPU Optimization (Hospital Deployment)
101
+
102
+ ```python
103
+ from cardiac_shared import detect_hardware, detect_runtime, CPUOptimizer
104
+
105
+ hw = detect_hardware()
106
+ env = detect_runtime()
107
+
108
+ if env.is_hospital_environment and not hw.gpu.available:
109
+ optimizer = CPUOptimizer()
110
+ config = optimizer.get_optimal_config()
111
+
112
+ print(f"CPU Tier: {config.tier.value}")
113
+ print(f"Recommended Workers: {config.num_workers}")
114
+ print(f"Batch Size: {config.batch_size}")
115
+
116
+ # Apply PyTorch optimizations
117
+ optimizer.apply_torch_optimizations(config)
118
+ ```
119
+
120
+ ## Projects Using This Package
121
+
122
+ - cardiac-ml-research (main project)
123
+ - ai-cac-research (CAC scoring research)
124
+ - pcfa (Pericardial Fat Analysis)
125
+ - vbca (Vertebra Body Composition Analysis)
126
+
127
+ ## Changelog
128
+
129
+ ### v0.2.0 (2026-01-01)
130
+ - Added `hardware` module (detector, cpu_optimizer)
131
+ - Added `environment` module (runtime_detector)
132
+ - Migrated from cardiac-ml-research/shared/
133
+
134
+ ### v0.1.0 (2026-01-01)
135
+ - Initial release
136
+ - IO modules: dicom, nifti, zip_handler
@@ -0,0 +1,127 @@
1
+ """
2
+ Cardiac Shared - Common utilities for cardiac imaging projects
3
+
4
+ This package provides shared IO, hardware detection, environment detection,
5
+ parallel processing, progress tracking, caching, and configuration management
6
+ for cardiac imaging analysis across multiple projects.
7
+
8
+ Modules:
9
+ - io: DICOM, NIfTI, and ZIP file handling
10
+ - hardware: Hardware detection and CPU optimization
11
+ - environment: Runtime environment detection
12
+ - parallel: Parallel processing with checkpoint support
13
+ - progress: Multi-level progress tracking
14
+ - cache: Result caching with resume capability
15
+ - batch: Batch processing framework
16
+ - config: YAML configuration management
17
+ """
18
+
19
+ __version__ = "0.3.0"
20
+
21
+ # IO modules
22
+ from cardiac_shared.io.dicom import read_dicom_series, get_dicom_metadata
23
+ from cardiac_shared.io.nifti import load_nifti, save_nifti
24
+ from cardiac_shared.io.zip_handler import extract_zip, find_dicom_root
25
+
26
+ # Hardware detection
27
+ from cardiac_shared.hardware import (
28
+ detect_hardware,
29
+ HardwareInfo,
30
+ GPUInfo,
31
+ CPUInfo,
32
+ RAMInfo,
33
+ print_hardware_summary,
34
+ get_optimal_config,
35
+ CPUOptimizer,
36
+ get_cpu_optimizer,
37
+ apply_cpu_optimizations,
38
+ )
39
+
40
+ # Environment detection
41
+ from cardiac_shared.environment import (
42
+ detect_runtime,
43
+ RuntimeEnvironment,
44
+ detect_colab,
45
+ detect_wsl,
46
+ print_environment_summary,
47
+ )
48
+
49
+ # Parallel processing
50
+ from cardiac_shared.parallel import (
51
+ ParallelProcessor,
52
+ ProcessingResult,
53
+ Checkpoint,
54
+ parallel_map,
55
+ parallel_map_with_checkpoint,
56
+ )
57
+
58
+ # Progress tracking
59
+ from cardiac_shared.progress import (
60
+ ProgressTracker,
61
+ ProgressLevel,
62
+ create_tracker,
63
+ )
64
+
65
+ # Cache management
66
+ from cardiac_shared.cache import CacheManager
67
+
68
+ # Batch processing
69
+ from cardiac_shared.batch import BatchProcessor, BatchConfig
70
+
71
+ # Configuration management
72
+ from cardiac_shared.config import ConfigManager, load_config
73
+
74
+ __all__ = [
75
+ # Version
76
+ '__version__',
77
+
78
+ # IO
79
+ 'read_dicom_series',
80
+ 'get_dicom_metadata',
81
+ 'load_nifti',
82
+ 'save_nifti',
83
+ 'extract_zip',
84
+ 'find_dicom_root',
85
+
86
+ # Hardware
87
+ 'detect_hardware',
88
+ 'HardwareInfo',
89
+ 'GPUInfo',
90
+ 'CPUInfo',
91
+ 'RAMInfo',
92
+ 'print_hardware_summary',
93
+ 'get_optimal_config',
94
+ 'CPUOptimizer',
95
+ 'get_cpu_optimizer',
96
+ 'apply_cpu_optimizations',
97
+
98
+ # Environment
99
+ 'detect_runtime',
100
+ 'RuntimeEnvironment',
101
+ 'detect_colab',
102
+ 'detect_wsl',
103
+ 'print_environment_summary',
104
+
105
+ # Parallel
106
+ 'ParallelProcessor',
107
+ 'ProcessingResult',
108
+ 'Checkpoint',
109
+ 'parallel_map',
110
+ 'parallel_map_with_checkpoint',
111
+
112
+ # Progress
113
+ 'ProgressTracker',
114
+ 'ProgressLevel',
115
+ 'create_tracker',
116
+
117
+ # Cache
118
+ 'CacheManager',
119
+
120
+ # Batch
121
+ 'BatchProcessor',
122
+ 'BatchConfig',
123
+
124
+ # Config
125
+ 'ConfigManager',
126
+ 'load_config',
127
+ ]
@@ -0,0 +1,162 @@
1
+ """
2
+ Batch Processing Module
3
+
4
+ Generic batch processor with resume capability for medical imaging pipelines.
5
+ """
6
+
7
+ from pathlib import Path
8
+ from typing import Callable, List, Any, Optional, Dict
9
+ from dataclasses import dataclass
10
+ import logging
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ @dataclass
16
+ class BatchConfig:
17
+ """Batch processing configuration"""
18
+ enable_resume: bool = True
19
+ cache_dir: Optional[Path] = None
20
+ save_intermediate: bool = True
21
+ log_interval: int = 10
22
+
23
+
24
+ class BatchProcessor:
25
+ """
26
+ Generic batch processor with resume capability
27
+
28
+ Features:
29
+ - Resume from interruption
30
+ - Progress tracking
31
+ - Cache management
32
+ - Flexible callback system
33
+
34
+ Example:
35
+ processor = BatchProcessor(cache_dir=Path("cache"))
36
+
37
+ results = processor.process(
38
+ items=patient_list,
39
+ process_func=analyze_patient,
40
+ get_item_id=lambda p: p.id,
41
+ desc="Analyzing patients"
42
+ )
43
+
44
+ processor.print_summary(results)
45
+ """
46
+
47
+ def __init__(self, config: Optional[BatchConfig] = None, cache_dir: Optional[Path] = None):
48
+ """
49
+ Initialize batch processor
50
+
51
+ Args:
52
+ config: BatchConfig instance
53
+ cache_dir: Directory for cache files
54
+ """
55
+ self.config = config or BatchConfig()
56
+ if cache_dir:
57
+ self.config.cache_dir = Path(cache_dir)
58
+
59
+ self._results: List[Dict] = []
60
+
61
+ def process(
62
+ self,
63
+ items: List[Any],
64
+ process_func: Callable[[Any], Any],
65
+ get_item_id: Optional[Callable[[Any], str]] = None,
66
+ desc: str = "Processing",
67
+ on_complete: Optional[Callable[[str, Any], None]] = None,
68
+ on_error: Optional[Callable[[str, Exception], None]] = None
69
+ ) -> List[Dict]:
70
+ """
71
+ Process items in batch
72
+
73
+ Args:
74
+ items: List of items to process
75
+ process_func: Function to process each item
76
+ get_item_id: Function to extract item ID (default: str)
77
+ desc: Description for logging
78
+ on_complete: Callback on successful processing
79
+ on_error: Callback on error
80
+
81
+ Returns:
82
+ List of result dictionaries
83
+ """
84
+ if get_item_id is None:
85
+ get_item_id = str
86
+
87
+ self._results = []
88
+ total = len(items)
89
+
90
+ logger.info(f"{desc}: Starting batch of {total} items")
91
+
92
+ for i, item in enumerate(items, 1):
93
+ item_id = get_item_id(item)
94
+
95
+ try:
96
+ result = process_func(item)
97
+
98
+ self._results.append({
99
+ 'item_id': item_id,
100
+ 'status': 'success',
101
+ 'result': result
102
+ })
103
+
104
+ if on_complete:
105
+ on_complete(item_id, result)
106
+
107
+ except Exception as e:
108
+ self._results.append({
109
+ 'item_id': item_id,
110
+ 'status': 'failed',
111
+ 'error': str(e)
112
+ })
113
+
114
+ if on_error:
115
+ on_error(item_id, e)
116
+
117
+ logger.warning(f"Failed to process {item_id}: {e}")
118
+
119
+ # Log progress periodically
120
+ if i % self.config.log_interval == 0 or i == total:
121
+ success = sum(1 for r in self._results if r['status'] == 'success')
122
+ logger.info(f"{desc}: {i}/{total} completed ({success} successful)")
123
+
124
+ return self._results
125
+
126
+ def get_statistics(self) -> Dict:
127
+ """Get processing statistics"""
128
+ total = len(self._results)
129
+ success = sum(1 for r in self._results if r['status'] == 'success')
130
+ failed = total - success
131
+
132
+ return {
133
+ 'total': total,
134
+ 'success': success,
135
+ 'failed': failed,
136
+ 'success_rate': (success / total * 100) if total > 0 else 0
137
+ }
138
+
139
+ def print_summary(self, results: Optional[List[Dict]] = None):
140
+ """Print processing summary"""
141
+ if results:
142
+ self._results = results
143
+
144
+ stats = self.get_statistics()
145
+
146
+ print()
147
+ print("=" * 60)
148
+ print("Batch Processing Summary")
149
+ print("=" * 60)
150
+ print(f"Total: {stats['total']}")
151
+ print(f"Success: {stats['success']} ({stats['success_rate']:.1f}%)")
152
+ print(f"Failed: {stats['failed']}")
153
+ print("=" * 60)
154
+
155
+ if stats['failed'] > 0:
156
+ print("\nFailed items:")
157
+ for r in self._results:
158
+ if r['status'] == 'failed':
159
+ print(f" - {r['item_id']}: {r.get('error', 'Unknown error')}")
160
+
161
+
162
+ __all__ = ['BatchProcessor', 'BatchConfig']