gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/_version.py +1 -1
- gitflow_analytics/classification/__init__.py +31 -0
- gitflow_analytics/classification/batch_classifier.py +752 -0
- gitflow_analytics/classification/classifier.py +464 -0
- gitflow_analytics/classification/feature_extractor.py +725 -0
- gitflow_analytics/classification/linguist_analyzer.py +574 -0
- gitflow_analytics/classification/model.py +455 -0
- gitflow_analytics/cli.py +4108 -350
- gitflow_analytics/cli_rich.py +198 -48
- gitflow_analytics/config/__init__.py +43 -0
- gitflow_analytics/config/errors.py +261 -0
- gitflow_analytics/config/loader.py +904 -0
- gitflow_analytics/config/profiles.py +264 -0
- gitflow_analytics/config/repository.py +124 -0
- gitflow_analytics/config/schema.py +441 -0
- gitflow_analytics/config/validator.py +154 -0
- gitflow_analytics/config.py +44 -508
- gitflow_analytics/core/analyzer.py +1209 -98
- gitflow_analytics/core/cache.py +1337 -29
- gitflow_analytics/core/data_fetcher.py +1193 -0
- gitflow_analytics/core/identity.py +363 -14
- gitflow_analytics/core/metrics_storage.py +526 -0
- gitflow_analytics/core/progress.py +372 -0
- gitflow_analytics/core/schema_version.py +269 -0
- gitflow_analytics/extractors/ml_tickets.py +1100 -0
- gitflow_analytics/extractors/story_points.py +8 -1
- gitflow_analytics/extractors/tickets.py +749 -11
- gitflow_analytics/identity_llm/__init__.py +6 -0
- gitflow_analytics/identity_llm/analysis_pass.py +231 -0
- gitflow_analytics/identity_llm/analyzer.py +464 -0
- gitflow_analytics/identity_llm/models.py +76 -0
- gitflow_analytics/integrations/github_integration.py +175 -11
- gitflow_analytics/integrations/jira_integration.py +461 -24
- gitflow_analytics/integrations/orchestrator.py +124 -1
- gitflow_analytics/metrics/activity_scoring.py +322 -0
- gitflow_analytics/metrics/branch_health.py +470 -0
- gitflow_analytics/metrics/dora.py +379 -20
- gitflow_analytics/models/database.py +843 -53
- gitflow_analytics/pm_framework/__init__.py +115 -0
- gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
- gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
- gitflow_analytics/pm_framework/base.py +406 -0
- gitflow_analytics/pm_framework/models.py +211 -0
- gitflow_analytics/pm_framework/orchestrator.py +652 -0
- gitflow_analytics/pm_framework/registry.py +333 -0
- gitflow_analytics/qualitative/__init__.py +9 -10
- gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
- gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
- gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
- gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
- gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
- gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
- gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
- gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
- gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
- gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
- gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
- gitflow_analytics/qualitative/core/__init__.py +4 -4
- gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
- gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
- gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
- gitflow_analytics/qualitative/core/processor.py +381 -248
- gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
- gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
- gitflow_analytics/qualitative/models/__init__.py +7 -7
- gitflow_analytics/qualitative/models/schemas.py +155 -121
- gitflow_analytics/qualitative/utils/__init__.py +4 -4
- gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
- gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
- gitflow_analytics/qualitative/utils/metrics.py +172 -158
- gitflow_analytics/qualitative/utils/text_processing.py +146 -104
- gitflow_analytics/reports/__init__.py +100 -0
- gitflow_analytics/reports/analytics_writer.py +539 -14
- gitflow_analytics/reports/base.py +648 -0
- gitflow_analytics/reports/branch_health_writer.py +322 -0
- gitflow_analytics/reports/classification_writer.py +924 -0
- gitflow_analytics/reports/cli_integration.py +427 -0
- gitflow_analytics/reports/csv_writer.py +1676 -212
- gitflow_analytics/reports/data_models.py +504 -0
- gitflow_analytics/reports/database_report_generator.py +427 -0
- gitflow_analytics/reports/example_usage.py +344 -0
- gitflow_analytics/reports/factory.py +499 -0
- gitflow_analytics/reports/formatters.py +698 -0
- gitflow_analytics/reports/html_generator.py +1116 -0
- gitflow_analytics/reports/interfaces.py +489 -0
- gitflow_analytics/reports/json_exporter.py +2770 -0
- gitflow_analytics/reports/narrative_writer.py +2287 -158
- gitflow_analytics/reports/story_point_correlation.py +1144 -0
- gitflow_analytics/reports/weekly_trends_writer.py +389 -0
- gitflow_analytics/training/__init__.py +5 -0
- gitflow_analytics/training/model_loader.py +377 -0
- gitflow_analytics/training/pipeline.py +550 -0
- gitflow_analytics/tui/__init__.py +1 -1
- gitflow_analytics/tui/app.py +129 -126
- gitflow_analytics/tui/screens/__init__.py +3 -3
- gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
- gitflow_analytics/tui/screens/configuration_screen.py +154 -178
- gitflow_analytics/tui/screens/loading_screen.py +100 -110
- gitflow_analytics/tui/screens/main_screen.py +89 -72
- gitflow_analytics/tui/screens/results_screen.py +305 -281
- gitflow_analytics/tui/widgets/__init__.py +2 -2
- gitflow_analytics/tui/widgets/data_table.py +67 -69
- gitflow_analytics/tui/widgets/export_modal.py +76 -76
- gitflow_analytics/tui/widgets/progress_widget.py +41 -46
- gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
- gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
- gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
- gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
|
@@ -1,25 +1,26 @@
|
|
|
1
1
|
"""Batch processing utilities for efficient commit analysis."""
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
+
import time
|
|
5
|
+
from collections.abc import Iterator
|
|
4
6
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
5
|
-
from typing import Any, Callable, Dict, Iterator, List, Optional, TypeVar
|
|
6
7
|
from threading import Lock
|
|
7
|
-
import
|
|
8
|
+
from typing import Any, Callable, Optional, TypeVar
|
|
8
9
|
|
|
9
|
-
T = TypeVar(
|
|
10
|
-
R = TypeVar(
|
|
10
|
+
T = TypeVar("T")
|
|
11
|
+
R = TypeVar("R")
|
|
11
12
|
|
|
12
13
|
|
|
13
14
|
class BatchProcessor:
|
|
14
15
|
"""Efficient batch processing for commit analysis.
|
|
15
|
-
|
|
16
|
+
|
|
16
17
|
This class provides utilities for processing large numbers of commits
|
|
17
18
|
in batches with parallel execution, progress tracking, and error handling.
|
|
18
19
|
"""
|
|
19
|
-
|
|
20
|
+
|
|
20
21
|
def __init__(self, batch_size: int = 1000, max_workers: int = 4):
|
|
21
22
|
"""Initialize batch processor.
|
|
22
|
-
|
|
23
|
+
|
|
23
24
|
Args:
|
|
24
25
|
batch_size: Number of items to process per batch
|
|
25
26
|
max_workers: Maximum number of worker threads
|
|
@@ -29,257 +30,269 @@ class BatchProcessor:
|
|
|
29
30
|
self.logger = logging.getLogger(__name__)
|
|
30
31
|
self._stats_lock = Lock()
|
|
31
32
|
self._processing_stats = {
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
33
|
+
"total_processed": 0,
|
|
34
|
+
"total_errors": 0,
|
|
35
|
+
"batch_times": [],
|
|
36
|
+
"start_time": None,
|
|
36
37
|
}
|
|
37
|
-
|
|
38
|
-
def create_batches(self, items:
|
|
38
|
+
|
|
39
|
+
def create_batches(self, items: list[T], batch_size: Optional[int] = None) -> Iterator[list[T]]:
|
|
39
40
|
"""Split items into batches for processing.
|
|
40
|
-
|
|
41
|
+
|
|
41
42
|
Args:
|
|
42
43
|
items: List of items to batch
|
|
43
44
|
batch_size: Override default batch size
|
|
44
|
-
|
|
45
|
+
|
|
45
46
|
Yields:
|
|
46
47
|
Batches of items
|
|
47
48
|
"""
|
|
48
49
|
batch_size = batch_size or self.batch_size
|
|
49
|
-
|
|
50
|
+
|
|
50
51
|
for i in range(0, len(items), batch_size):
|
|
51
|
-
yield items[i:i + batch_size]
|
|
52
|
-
|
|
53
|
-
def process_batches(
|
|
54
|
-
|
|
52
|
+
yield items[i : i + batch_size]
|
|
53
|
+
|
|
54
|
+
def process_batches(
|
|
55
|
+
self, items: list[T], processor_func: Callable[[list[T]], list[R]], parallel: bool = True
|
|
56
|
+
) -> list[R]:
|
|
55
57
|
"""Process items in batches with optional parallelization.
|
|
56
|
-
|
|
58
|
+
|
|
57
59
|
Args:
|
|
58
60
|
items: Items to process
|
|
59
61
|
processor_func: Function that processes a batch and returns results
|
|
60
62
|
parallel: Whether to use parallel processing
|
|
61
|
-
|
|
63
|
+
|
|
62
64
|
Returns:
|
|
63
65
|
List of all processing results
|
|
64
66
|
"""
|
|
65
67
|
if not items:
|
|
66
68
|
return []
|
|
67
|
-
|
|
69
|
+
|
|
68
70
|
self._reset_stats()
|
|
69
|
-
self._processing_stats[
|
|
70
|
-
|
|
71
|
+
self._processing_stats["start_time"] = time.time()
|
|
72
|
+
|
|
71
73
|
batches = list(self.create_batches(items))
|
|
72
74
|
self.logger.info(f"Processing {len(items)} items in {len(batches)} batches")
|
|
73
|
-
|
|
75
|
+
|
|
74
76
|
all_results = []
|
|
75
|
-
|
|
77
|
+
|
|
76
78
|
if parallel and len(batches) > 1:
|
|
77
79
|
all_results = self._process_parallel(batches, processor_func)
|
|
78
80
|
else:
|
|
79
81
|
all_results = self._process_sequential(batches, processor_func)
|
|
80
|
-
|
|
82
|
+
|
|
81
83
|
self._log_final_stats(len(items))
|
|
82
84
|
return all_results
|
|
83
|
-
|
|
84
|
-
def process_with_callback(
|
|
85
|
-
|
|
85
|
+
|
|
86
|
+
def process_with_callback(
|
|
87
|
+
self,
|
|
88
|
+
items: list[T],
|
|
89
|
+
processor_func: Callable[[list[T]], list[R]],
|
|
90
|
+
progress_callback: Optional[Callable[[int, int], None]] = None,
|
|
91
|
+
) -> list[R]:
|
|
86
92
|
"""Process batches with progress callback.
|
|
87
|
-
|
|
93
|
+
|
|
88
94
|
Args:
|
|
89
95
|
items: Items to process
|
|
90
96
|
processor_func: Function that processes a batch
|
|
91
97
|
progress_callback: Callback for progress updates (processed, total)
|
|
92
|
-
|
|
98
|
+
|
|
93
99
|
Returns:
|
|
94
100
|
List of all processing results
|
|
95
101
|
"""
|
|
96
102
|
if not items:
|
|
97
103
|
return []
|
|
98
|
-
|
|
104
|
+
|
|
99
105
|
self._reset_stats()
|
|
100
106
|
batches = list(self.create_batches(items))
|
|
101
107
|
all_results = []
|
|
102
108
|
processed_count = 0
|
|
103
|
-
|
|
109
|
+
|
|
104
110
|
for i, batch in enumerate(batches):
|
|
105
111
|
batch_start = time.time()
|
|
106
|
-
|
|
112
|
+
|
|
107
113
|
try:
|
|
108
114
|
batch_results = processor_func(batch)
|
|
109
115
|
all_results.extend(batch_results)
|
|
110
116
|
processed_count += len(batch)
|
|
111
|
-
|
|
117
|
+
|
|
112
118
|
with self._stats_lock:
|
|
113
|
-
self._processing_stats[
|
|
114
|
-
self._processing_stats[
|
|
115
|
-
|
|
119
|
+
self._processing_stats["total_processed"] += len(batch)
|
|
120
|
+
self._processing_stats["batch_times"].append(time.time() - batch_start)
|
|
121
|
+
|
|
116
122
|
except Exception as e:
|
|
117
123
|
self.logger.error(f"Error processing batch {i}: {e}")
|
|
118
124
|
with self._stats_lock:
|
|
119
|
-
self._processing_stats[
|
|
120
|
-
|
|
125
|
+
self._processing_stats["total_errors"] += len(batch)
|
|
126
|
+
|
|
121
127
|
# Call progress callback if provided
|
|
122
128
|
if progress_callback:
|
|
123
129
|
progress_callback(processed_count, len(items))
|
|
124
|
-
|
|
130
|
+
|
|
125
131
|
return all_results
|
|
126
|
-
|
|
127
|
-
def _process_parallel(
|
|
128
|
-
|
|
132
|
+
|
|
133
|
+
def _process_parallel(
|
|
134
|
+
self, batches: list[list[T]], processor_func: Callable[[list[T]], list[R]]
|
|
135
|
+
) -> list[R]:
|
|
129
136
|
"""Process batches in parallel using ThreadPoolExecutor.
|
|
130
|
-
|
|
137
|
+
|
|
131
138
|
Args:
|
|
132
139
|
batches: List of batches to process
|
|
133
140
|
processor_func: Function to process each batch
|
|
134
|
-
|
|
141
|
+
|
|
135
142
|
Returns:
|
|
136
143
|
Combined results from all batches
|
|
137
144
|
"""
|
|
138
145
|
all_results = []
|
|
139
|
-
|
|
146
|
+
|
|
140
147
|
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
|
|
141
148
|
# Submit all batches
|
|
142
149
|
future_to_batch = {
|
|
143
150
|
executor.submit(self._process_batch_with_timing, batch, processor_func): i
|
|
144
151
|
for i, batch in enumerate(batches)
|
|
145
152
|
}
|
|
146
|
-
|
|
153
|
+
|
|
147
154
|
# Collect results as they complete
|
|
148
155
|
for future in as_completed(future_to_batch):
|
|
149
156
|
batch_idx = future_to_batch[future]
|
|
150
|
-
|
|
157
|
+
|
|
151
158
|
try:
|
|
152
159
|
batch_results, batch_time = future.result()
|
|
153
160
|
all_results.extend(batch_results)
|
|
154
|
-
|
|
161
|
+
|
|
155
162
|
with self._stats_lock:
|
|
156
|
-
self._processing_stats[
|
|
157
|
-
self._processing_stats[
|
|
158
|
-
|
|
163
|
+
self._processing_stats["total_processed"] += len(batches[batch_idx])
|
|
164
|
+
self._processing_stats["batch_times"].append(batch_time)
|
|
165
|
+
|
|
159
166
|
except Exception as e:
|
|
160
167
|
self.logger.error(f"Error processing batch {batch_idx}: {e}")
|
|
161
168
|
with self._stats_lock:
|
|
162
|
-
self._processing_stats[
|
|
163
|
-
|
|
169
|
+
self._processing_stats["total_errors"] += len(batches[batch_idx])
|
|
170
|
+
|
|
164
171
|
return all_results
|
|
165
|
-
|
|
166
|
-
def _process_sequential(
|
|
167
|
-
|
|
172
|
+
|
|
173
|
+
def _process_sequential(
|
|
174
|
+
self, batches: list[list[T]], processor_func: Callable[[list[T]], list[R]]
|
|
175
|
+
) -> list[R]:
|
|
168
176
|
"""Process batches sequentially.
|
|
169
|
-
|
|
177
|
+
|
|
170
178
|
Args:
|
|
171
|
-
batches: List of batches to process
|
|
179
|
+
batches: List of batches to process
|
|
172
180
|
processor_func: Function to process each batch
|
|
173
|
-
|
|
181
|
+
|
|
174
182
|
Returns:
|
|
175
183
|
Combined results from all batches
|
|
176
184
|
"""
|
|
177
185
|
all_results = []
|
|
178
|
-
|
|
186
|
+
|
|
179
187
|
for i, batch in enumerate(batches):
|
|
180
188
|
try:
|
|
181
189
|
batch_results, batch_time = self._process_batch_with_timing(batch, processor_func)
|
|
182
190
|
all_results.extend(batch_results)
|
|
183
|
-
|
|
184
|
-
self._processing_stats[
|
|
185
|
-
self._processing_stats[
|
|
186
|
-
|
|
191
|
+
|
|
192
|
+
self._processing_stats["total_processed"] += len(batch)
|
|
193
|
+
self._processing_stats["batch_times"].append(batch_time)
|
|
194
|
+
|
|
187
195
|
except Exception as e:
|
|
188
196
|
self.logger.error(f"Error processing batch {i}: {e}")
|
|
189
|
-
self._processing_stats[
|
|
190
|
-
|
|
197
|
+
self._processing_stats["total_errors"] += len(batch)
|
|
198
|
+
|
|
191
199
|
return all_results
|
|
192
|
-
|
|
193
|
-
def _process_batch_with_timing(
|
|
194
|
-
|
|
200
|
+
|
|
201
|
+
def _process_batch_with_timing(
|
|
202
|
+
self, batch: list[T], processor_func: Callable[[list[T]], list[R]]
|
|
203
|
+
) -> tuple[list[R], float]:
|
|
195
204
|
"""Process a single batch with timing.
|
|
196
|
-
|
|
205
|
+
|
|
197
206
|
Args:
|
|
198
207
|
batch: Batch to process
|
|
199
208
|
processor_func: Processing function
|
|
200
|
-
|
|
209
|
+
|
|
201
210
|
Returns:
|
|
202
211
|
Tuple of (results, processing_time_seconds)
|
|
203
212
|
"""
|
|
204
213
|
start_time = time.time()
|
|
205
214
|
results = processor_func(batch)
|
|
206
215
|
processing_time = time.time() - start_time
|
|
207
|
-
|
|
216
|
+
|
|
208
217
|
return results, processing_time
|
|
209
|
-
|
|
218
|
+
|
|
210
219
|
def _reset_stats(self) -> None:
|
|
211
220
|
"""Reset processing statistics."""
|
|
212
221
|
with self._stats_lock:
|
|
213
222
|
self._processing_stats = {
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
223
|
+
"total_processed": 0,
|
|
224
|
+
"total_errors": 0,
|
|
225
|
+
"batch_times": [],
|
|
226
|
+
"start_time": time.time(),
|
|
218
227
|
}
|
|
219
|
-
|
|
228
|
+
|
|
220
229
|
def _log_final_stats(self, total_items: int) -> None:
|
|
221
230
|
"""Log final processing statistics.
|
|
222
|
-
|
|
231
|
+
|
|
223
232
|
Args:
|
|
224
233
|
total_items: Total number of items processed
|
|
225
234
|
"""
|
|
226
235
|
with self._stats_lock:
|
|
227
236
|
stats = self._processing_stats.copy()
|
|
228
|
-
|
|
229
|
-
if not stats[
|
|
237
|
+
|
|
238
|
+
if not stats["batch_times"]:
|
|
230
239
|
return
|
|
231
|
-
|
|
232
|
-
total_time = time.time() - stats[
|
|
233
|
-
avg_batch_time = sum(stats[
|
|
234
|
-
items_per_second = stats[
|
|
235
|
-
|
|
240
|
+
|
|
241
|
+
total_time = time.time() - stats["start_time"]
|
|
242
|
+
avg_batch_time = sum(stats["batch_times"]) / len(stats["batch_times"])
|
|
243
|
+
items_per_second = stats["total_processed"] / total_time if total_time > 0 else 0
|
|
244
|
+
|
|
236
245
|
self.logger.info(
|
|
237
246
|
f"Batch processing complete: {stats['total_processed']}/{total_items} items processed "
|
|
238
247
|
f"in {total_time:.2f}s ({items_per_second:.1f} items/s), "
|
|
239
248
|
f"{stats['total_errors']} errors, avg batch time: {avg_batch_time:.2f}s"
|
|
240
249
|
)
|
|
241
|
-
|
|
242
|
-
def get_processing_stats(self) ->
|
|
250
|
+
|
|
251
|
+
def get_processing_stats(self) -> dict[str, Any]:
|
|
243
252
|
"""Get current processing statistics.
|
|
244
|
-
|
|
253
|
+
|
|
245
254
|
Returns:
|
|
246
255
|
Dictionary with processing statistics
|
|
247
256
|
"""
|
|
248
257
|
with self._stats_lock:
|
|
249
258
|
stats = self._processing_stats.copy()
|
|
250
|
-
|
|
251
|
-
if stats[
|
|
252
|
-
elapsed_time = time.time() - stats[
|
|
253
|
-
avg_batch_time = sum(stats[
|
|
254
|
-
items_per_second = stats[
|
|
255
|
-
|
|
259
|
+
|
|
260
|
+
if stats["start_time"] and stats["batch_times"]:
|
|
261
|
+
elapsed_time = time.time() - stats["start_time"]
|
|
262
|
+
avg_batch_time = sum(stats["batch_times"]) / len(stats["batch_times"])
|
|
263
|
+
items_per_second = stats["total_processed"] / elapsed_time if elapsed_time > 0 else 0
|
|
264
|
+
|
|
256
265
|
return {
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
266
|
+
"total_processed": stats["total_processed"],
|
|
267
|
+
"total_errors": stats["total_errors"],
|
|
268
|
+
"elapsed_time_seconds": elapsed_time,
|
|
269
|
+
"avg_batch_time_seconds": avg_batch_time,
|
|
270
|
+
"items_per_second": items_per_second,
|
|
271
|
+
"batches_completed": len(stats["batch_times"]),
|
|
272
|
+
"error_rate": (
|
|
273
|
+
stats["total_errors"] / (stats["total_processed"] + stats["total_errors"])
|
|
274
|
+
if (stats["total_processed"] + stats["total_errors"]) > 0
|
|
275
|
+
else 0.0
|
|
276
|
+
),
|
|
264
277
|
}
|
|
265
278
|
else:
|
|
266
279
|
return {
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
280
|
+
"total_processed": 0,
|
|
281
|
+
"total_errors": 0,
|
|
282
|
+
"elapsed_time_seconds": 0,
|
|
283
|
+
"avg_batch_time_seconds": 0,
|
|
284
|
+
"items_per_second": 0,
|
|
285
|
+
"batches_completed": 0,
|
|
286
|
+
"error_rate": 0.0,
|
|
274
287
|
}
|
|
275
288
|
|
|
276
289
|
|
|
277
290
|
class ProgressTracker:
|
|
278
291
|
"""Simple progress tracking for long-running operations."""
|
|
279
|
-
|
|
292
|
+
|
|
280
293
|
def __init__(self, total: int, description: str = "Processing"):
|
|
281
294
|
"""Initialize progress tracker.
|
|
282
|
-
|
|
295
|
+
|
|
283
296
|
Args:
|
|
284
297
|
total: Total number of items to process
|
|
285
298
|
description: Description of the operation
|
|
@@ -290,28 +303,28 @@ class ProgressTracker:
|
|
|
290
303
|
self.start_time = time.time()
|
|
291
304
|
self.last_report = 0
|
|
292
305
|
self.logger = logging.getLogger(__name__)
|
|
293
|
-
|
|
306
|
+
|
|
294
307
|
def update(self, count: int = 1) -> None:
|
|
295
308
|
"""Update progress count.
|
|
296
|
-
|
|
309
|
+
|
|
297
310
|
Args:
|
|
298
311
|
count: Number of items processed since last update
|
|
299
312
|
"""
|
|
300
313
|
self.processed += count
|
|
301
|
-
|
|
314
|
+
|
|
302
315
|
# Report progress every 10% or every 1000 items, whichever is less frequent
|
|
303
316
|
report_interval = max(self.total // 10, 1000)
|
|
304
|
-
|
|
317
|
+
|
|
305
318
|
if self.processed - self.last_report >= report_interval or self.processed >= self.total:
|
|
306
319
|
self._report_progress()
|
|
307
320
|
self.last_report = self.processed
|
|
308
|
-
|
|
321
|
+
|
|
309
322
|
def _report_progress(self) -> None:
|
|
310
323
|
"""Report current progress."""
|
|
311
324
|
elapsed_time = time.time() - self.start_time
|
|
312
325
|
percentage = (self.processed / self.total) * 100 if self.total > 0 else 0
|
|
313
326
|
rate = self.processed / elapsed_time if elapsed_time > 0 else 0
|
|
314
|
-
|
|
327
|
+
|
|
315
328
|
# Estimate time remaining
|
|
316
329
|
if rate > 0 and self.processed < self.total:
|
|
317
330
|
remaining_items = self.total - self.processed
|
|
@@ -319,8 +332,8 @@ class ProgressTracker:
|
|
|
319
332
|
eta_str = f", ETA: {eta_seconds:.0f}s"
|
|
320
333
|
else:
|
|
321
334
|
eta_str = ""
|
|
322
|
-
|
|
335
|
+
|
|
323
336
|
self.logger.info(
|
|
324
337
|
f"{self.description}: {self.processed}/{self.total} ({percentage:.1f}%) "
|
|
325
338
|
f"at {rate:.1f} items/s{eta_str}"
|
|
326
|
-
)
|
|
339
|
+
)
|