gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. gitflow_analytics/_version.py +1 -1
  2. gitflow_analytics/classification/__init__.py +31 -0
  3. gitflow_analytics/classification/batch_classifier.py +752 -0
  4. gitflow_analytics/classification/classifier.py +464 -0
  5. gitflow_analytics/classification/feature_extractor.py +725 -0
  6. gitflow_analytics/classification/linguist_analyzer.py +574 -0
  7. gitflow_analytics/classification/model.py +455 -0
  8. gitflow_analytics/cli.py +4108 -350
  9. gitflow_analytics/cli_rich.py +198 -48
  10. gitflow_analytics/config/__init__.py +43 -0
  11. gitflow_analytics/config/errors.py +261 -0
  12. gitflow_analytics/config/loader.py +904 -0
  13. gitflow_analytics/config/profiles.py +264 -0
  14. gitflow_analytics/config/repository.py +124 -0
  15. gitflow_analytics/config/schema.py +441 -0
  16. gitflow_analytics/config/validator.py +154 -0
  17. gitflow_analytics/config.py +44 -508
  18. gitflow_analytics/core/analyzer.py +1209 -98
  19. gitflow_analytics/core/cache.py +1337 -29
  20. gitflow_analytics/core/data_fetcher.py +1193 -0
  21. gitflow_analytics/core/identity.py +363 -14
  22. gitflow_analytics/core/metrics_storage.py +526 -0
  23. gitflow_analytics/core/progress.py +372 -0
  24. gitflow_analytics/core/schema_version.py +269 -0
  25. gitflow_analytics/extractors/ml_tickets.py +1100 -0
  26. gitflow_analytics/extractors/story_points.py +8 -1
  27. gitflow_analytics/extractors/tickets.py +749 -11
  28. gitflow_analytics/identity_llm/__init__.py +6 -0
  29. gitflow_analytics/identity_llm/analysis_pass.py +231 -0
  30. gitflow_analytics/identity_llm/analyzer.py +464 -0
  31. gitflow_analytics/identity_llm/models.py +76 -0
  32. gitflow_analytics/integrations/github_integration.py +175 -11
  33. gitflow_analytics/integrations/jira_integration.py +461 -24
  34. gitflow_analytics/integrations/orchestrator.py +124 -1
  35. gitflow_analytics/metrics/activity_scoring.py +322 -0
  36. gitflow_analytics/metrics/branch_health.py +470 -0
  37. gitflow_analytics/metrics/dora.py +379 -20
  38. gitflow_analytics/models/database.py +843 -53
  39. gitflow_analytics/pm_framework/__init__.py +115 -0
  40. gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
  41. gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
  42. gitflow_analytics/pm_framework/base.py +406 -0
  43. gitflow_analytics/pm_framework/models.py +211 -0
  44. gitflow_analytics/pm_framework/orchestrator.py +652 -0
  45. gitflow_analytics/pm_framework/registry.py +333 -0
  46. gitflow_analytics/qualitative/__init__.py +9 -10
  47. gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
  48. gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
  49. gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
  50. gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
  51. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
  52. gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
  53. gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
  54. gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
  55. gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
  56. gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
  57. gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
  58. gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
  59. gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
  60. gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
  61. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
  62. gitflow_analytics/qualitative/core/__init__.py +4 -4
  63. gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
  64. gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
  65. gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
  66. gitflow_analytics/qualitative/core/processor.py +381 -248
  67. gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
  68. gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
  69. gitflow_analytics/qualitative/models/__init__.py +7 -7
  70. gitflow_analytics/qualitative/models/schemas.py +155 -121
  71. gitflow_analytics/qualitative/utils/__init__.py +4 -4
  72. gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
  73. gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
  74. gitflow_analytics/qualitative/utils/metrics.py +172 -158
  75. gitflow_analytics/qualitative/utils/text_processing.py +146 -104
  76. gitflow_analytics/reports/__init__.py +100 -0
  77. gitflow_analytics/reports/analytics_writer.py +539 -14
  78. gitflow_analytics/reports/base.py +648 -0
  79. gitflow_analytics/reports/branch_health_writer.py +322 -0
  80. gitflow_analytics/reports/classification_writer.py +924 -0
  81. gitflow_analytics/reports/cli_integration.py +427 -0
  82. gitflow_analytics/reports/csv_writer.py +1676 -212
  83. gitflow_analytics/reports/data_models.py +504 -0
  84. gitflow_analytics/reports/database_report_generator.py +427 -0
  85. gitflow_analytics/reports/example_usage.py +344 -0
  86. gitflow_analytics/reports/factory.py +499 -0
  87. gitflow_analytics/reports/formatters.py +698 -0
  88. gitflow_analytics/reports/html_generator.py +1116 -0
  89. gitflow_analytics/reports/interfaces.py +489 -0
  90. gitflow_analytics/reports/json_exporter.py +2770 -0
  91. gitflow_analytics/reports/narrative_writer.py +2287 -158
  92. gitflow_analytics/reports/story_point_correlation.py +1144 -0
  93. gitflow_analytics/reports/weekly_trends_writer.py +389 -0
  94. gitflow_analytics/training/__init__.py +5 -0
  95. gitflow_analytics/training/model_loader.py +377 -0
  96. gitflow_analytics/training/pipeline.py +550 -0
  97. gitflow_analytics/tui/__init__.py +1 -1
  98. gitflow_analytics/tui/app.py +129 -126
  99. gitflow_analytics/tui/screens/__init__.py +3 -3
  100. gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
  101. gitflow_analytics/tui/screens/configuration_screen.py +154 -178
  102. gitflow_analytics/tui/screens/loading_screen.py +100 -110
  103. gitflow_analytics/tui/screens/main_screen.py +89 -72
  104. gitflow_analytics/tui/screens/results_screen.py +305 -281
  105. gitflow_analytics/tui/widgets/__init__.py +2 -2
  106. gitflow_analytics/tui/widgets/data_table.py +67 -69
  107. gitflow_analytics/tui/widgets/export_modal.py +76 -76
  108. gitflow_analytics/tui/widgets/progress_widget.py +41 -46
  109. gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
  110. gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
  111. gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
  112. gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
  113. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
  114. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
  115. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
  116. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,35 @@
1
+ """LLM classifier module components.
2
+
3
+ This module provides modular components for LLM-based commit classification.
4
+ """
5
+
6
+ from .base import BaseLLMClassifier, ClassificationResult, LLMProviderConfig
7
+ from .batch_processor import BatchConfig, BatchProcessor, BatchResult
8
+ from .cache import LLMCache
9
+ from .cost_tracker import CostRecord, CostTracker, ModelPricing
10
+ from .openai_client import OpenAIClassifier, OpenAIConfig
11
+ from .prompts import PromptGenerator, PromptTemplate, PromptVersion
12
+ from .response_parser import ResponseParser
13
+
14
+ __all__ = [
15
+ # Base classes
16
+ "BaseLLMClassifier",
17
+ "ClassificationResult",
18
+ "LLMProviderConfig",
19
+ # Prompts
20
+ "PromptGenerator",
21
+ "PromptVersion",
22
+ "PromptTemplate",
23
+ # Providers
24
+ "OpenAIClassifier",
25
+ "OpenAIConfig",
26
+ # Components
27
+ "ResponseParser",
28
+ "CostTracker",
29
+ "ModelPricing",
30
+ "CostRecord",
31
+ "BatchProcessor",
32
+ "BatchConfig",
33
+ "BatchResult",
34
+ "LLMCache",
35
+ ]
@@ -0,0 +1,193 @@
1
+ """Base interface for LLM classifiers.
2
+
3
+ This module defines the abstract base class for all LLM-based classifiers,
4
+ establishing a consistent interface for different LLM providers.
5
+
6
+ WHY: Different LLM providers (OpenAI, Anthropic, OpenRouter, etc.) have different
7
+ APIs but should provide the same classification interface. This abstraction allows
8
+ easy switching between providers without changing the rest of the codebase.
9
+
10
+ DESIGN DECISIONS:
11
+ - Use ABC for enforcing interface implementation
12
+ - Define standard result format for all providers
13
+ - Include confidence scores and reasoning in results
14
+ - Support batch processing for efficiency
15
+ - Provide cost tracking interface
16
+ """
17
+
18
+ from abc import ABC, abstractmethod
19
+ from dataclasses import dataclass
20
+ from pathlib import Path
21
+ from typing import Any, Optional
22
+
23
+
24
+ @dataclass
25
+ class ClassificationResult:
26
+ """Standard result format for LLM classification.
27
+
28
+ WHY: Consistent result format across all LLM providers ensures
29
+ downstream code doesn't need provider-specific handling.
30
+ """
31
+
32
+ category: str
33
+ confidence: float
34
+ method: str # 'llm', 'cached', 'rule_fallback', etc.
35
+ reasoning: str
36
+ model: str
37
+ alternatives: list[dict[str, Any]] # Alternative classifications with scores
38
+ processing_time_ms: float
39
+ batch_id: Optional[str] = None
40
+
41
+ def to_dict(self) -> dict[str, Any]:
42
+ """Convert to dictionary format for serialization."""
43
+ result = {
44
+ "category": self.category,
45
+ "confidence": self.confidence,
46
+ "method": self.method,
47
+ "reasoning": self.reasoning,
48
+ "model": self.model,
49
+ "alternatives": self.alternatives,
50
+ "processing_time_ms": self.processing_time_ms,
51
+ }
52
+ if self.batch_id:
53
+ result["batch_id"] = self.batch_id
54
+ return result
55
+
56
+
57
+ @dataclass
58
+ class LLMProviderConfig:
59
+ """Base configuration for LLM providers.
60
+
61
+ WHY: Common configuration options that all providers need,
62
+ with ability to extend for provider-specific settings.
63
+ """
64
+
65
+ api_key: Optional[str] = None
66
+ model: str = "default"
67
+ temperature: float = 0.1
68
+ max_tokens: int = 50
69
+ timeout_seconds: float = 30.0
70
+ max_retries: int = 3
71
+ retry_delay_seconds: float = 1.0
72
+
73
+ # Rate limiting
74
+ max_daily_requests: int = 1000
75
+ max_requests_per_minute: int = 60
76
+
77
+ # Cost tracking
78
+ enable_cost_tracking: bool = True
79
+ cost_warning_threshold: float = 10.0 # USD
80
+
81
+ def validate(self) -> None:
82
+ """Validate configuration settings.
83
+
84
+ Raises:
85
+ ValueError: If configuration is invalid
86
+ """
87
+ if self.temperature < 0 or self.temperature > 2:
88
+ raise ValueError(f"Temperature must be between 0 and 2, got {self.temperature}")
89
+ if self.max_tokens < 1:
90
+ raise ValueError(f"max_tokens must be positive, got {self.max_tokens}")
91
+ if self.timeout_seconds <= 0:
92
+ raise ValueError(f"timeout_seconds must be positive, got {self.timeout_seconds}")
93
+
94
+
95
+ class BaseLLMClassifier(ABC):
96
+ """Abstract base class for LLM-based classifiers.
97
+
98
+ WHY: Defines the interface that all LLM providers must implement,
99
+ ensuring consistency and allowing provider switching.
100
+ """
101
+
102
+ def __init__(self, config: LLMProviderConfig, cache_dir: Optional[Path] = None):
103
+ """Initialize LLM classifier.
104
+
105
+ Args:
106
+ config: Provider-specific configuration
107
+ cache_dir: Directory for caching predictions
108
+ """
109
+ config.validate()
110
+ self.config = config
111
+ self.cache_dir = cache_dir or Path(".gitflow-cache")
112
+ self.cache_dir.mkdir(exist_ok=True)
113
+
114
+ # Cost tracking
115
+ self.total_tokens_used = 0
116
+ self.total_cost = 0.0
117
+ self.api_calls_made = 0
118
+
119
+ @abstractmethod
120
+ def classify_commit(
121
+ self, message: str, files_changed: Optional[list[str]] = None
122
+ ) -> ClassificationResult:
123
+ """Classify a single commit message.
124
+
125
+ Args:
126
+ message: Commit message to classify
127
+ files_changed: Optional list of changed files for context
128
+
129
+ Returns:
130
+ Classification result with category and metadata
131
+ """
132
+ pass
133
+
134
+ @abstractmethod
135
+ def classify_commits_batch(
136
+ self, commits: list[dict[str, Any]], batch_id: Optional[str] = None
137
+ ) -> list[ClassificationResult]:
138
+ """Classify a batch of commits.
139
+
140
+ Args:
141
+ commits: List of commit dictionaries
142
+ batch_id: Optional batch identifier for tracking
143
+
144
+ Returns:
145
+ List of classification results
146
+ """
147
+ pass
148
+
149
+ @abstractmethod
150
+ def get_provider_name(self) -> str:
151
+ """Get the name of the LLM provider.
152
+
153
+ Returns:
154
+ Provider name (e.g., 'openai', 'anthropic', 'openrouter')
155
+ """
156
+ pass
157
+
158
+ @abstractmethod
159
+ def estimate_cost(self, text: str) -> float:
160
+ """Estimate the cost of classifying the given text.
161
+
162
+ Args:
163
+ text: Text to be classified
164
+
165
+ Returns:
166
+ Estimated cost in USD
167
+ """
168
+ pass
169
+
170
+ def get_statistics(self) -> dict[str, Any]:
171
+ """Get usage statistics for this classifier.
172
+
173
+ Returns:
174
+ Dictionary with usage statistics
175
+ """
176
+ return {
177
+ "provider": self.get_provider_name(),
178
+ "model": self.config.model,
179
+ "api_calls_made": self.api_calls_made,
180
+ "total_tokens_used": self.total_tokens_used,
181
+ "total_cost": self.total_cost,
182
+ "average_tokens_per_call": (
183
+ self.total_tokens_used / self.api_calls_made if self.api_calls_made > 0 else 0
184
+ ),
185
+ "cost_warning_threshold": self.config.cost_warning_threshold,
186
+ "approaching_cost_limit": self.total_cost > self.config.cost_warning_threshold * 0.8,
187
+ }
188
+
189
+ def reset_statistics(self) -> None:
190
+ """Reset usage statistics."""
191
+ self.total_tokens_used = 0
192
+ self.total_cost = 0.0
193
+ self.api_calls_made = 0
@@ -0,0 +1,383 @@
1
+ """Batch processing logic for efficient LLM classification.
2
+
3
+ This module handles batch processing of commits for classification,
4
+ including progress tracking, error handling, and result aggregation.
5
+
6
+ WHY: Processing commits in batches improves efficiency, enables better
7
+ progress tracking, and allows for optimizations like parallel processing.
8
+
9
+ DESIGN DECISIONS:
10
+ - Support configurable batch sizes
11
+ - Provide detailed progress feedback
12
+ - Handle failures gracefully without losing progress
13
+ - Support resume from partial completion
14
+ - Track batch-level metrics
15
+ """
16
+
17
+ import hashlib
18
+ import logging
19
+ import time
20
+ from dataclasses import dataclass
21
+ from typing import Any, Callable, Optional
22
+
23
+ from ....core.progress import get_progress_service
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ @dataclass
29
+ class BatchConfig:
30
+ """Configuration for batch processing.
31
+
32
+ WHY: Centralizing batch configuration makes it easy to tune
33
+ performance characteristics for different scenarios.
34
+ """
35
+
36
+ batch_size: int = 50 # Number of commits per batch
37
+ max_parallel_batches: int = 1 # Currently serial, but structured for future parallel support
38
+ retry_failed_batches: bool = True
39
+ continue_on_batch_failure: bool = True
40
+ show_progress: bool = True
41
+ progress_nested: bool = True # Show nested progress bars
42
+
43
+ def validate(self) -> None:
44
+ """Validate batch configuration."""
45
+ if self.batch_size < 1:
46
+ raise ValueError(f"Batch size must be positive, got {self.batch_size}")
47
+ if self.max_parallel_batches < 1:
48
+ raise ValueError(
49
+ f"Max parallel batches must be positive, got {self.max_parallel_batches}"
50
+ )
51
+
52
+
53
+ @dataclass
54
+ class BatchResult:
55
+ """Result of processing a single batch.
56
+
57
+ WHY: Structured batch results enable better error handling
58
+ and performance analysis.
59
+ """
60
+
61
+ batch_id: str
62
+ total_items: int
63
+ successful_items: int
64
+ failed_items: int
65
+ results: list[dict[str, Any]]
66
+ errors: list[dict[str, Any]]
67
+ processing_time_seconds: float
68
+ retry_count: int = 0
69
+
70
+
71
+ class BatchProcessor:
72
+ """Processes commits in batches for LLM classification.
73
+
74
+ WHY: Batch processing improves efficiency and provides better
75
+ user feedback for large-scale classification tasks.
76
+ """
77
+
78
+ def __init__(self, config: Optional[BatchConfig] = None):
79
+ """Initialize batch processor.
80
+
81
+ Args:
82
+ config: Batch processing configuration
83
+ """
84
+ self.config = config or BatchConfig()
85
+ self.config.validate()
86
+
87
+ # Processing statistics
88
+ self.total_processed = 0
89
+ self.total_successful = 0
90
+ self.total_failed = 0
91
+ self.batch_results: list[BatchResult] = []
92
+
93
+ def process_commits(
94
+ self,
95
+ commits: list[dict[str, Any]],
96
+ classifier_func: Callable[[dict[str, Any]], dict[str, Any]],
97
+ job_description: str = "Processing commits",
98
+ ) -> list[dict[str, Any]]:
99
+ """Process commits in batches using the provided classifier.
100
+
101
+ Args:
102
+ commits: List of commits to process
103
+ classifier_func: Function to classify a single commit
104
+ job_description: Description for progress tracking
105
+
106
+ Returns:
107
+ List of classification results for all commits
108
+ """
109
+ if not commits:
110
+ return []
111
+
112
+ # Split into batches
113
+ batches = self._create_batches(commits)
114
+ logger.info(f"Processing {len(commits)} commits in {len(batches)} batches")
115
+
116
+ # Get progress service
117
+ progress = get_progress_service()
118
+
119
+ all_results = []
120
+
121
+ # Process batches
122
+ if self.config.show_progress:
123
+ with progress.progress(
124
+ total=len(batches), description=job_description, unit="batch", leave=True
125
+ ) as batch_ctx:
126
+ for i, batch in enumerate(batches, 1):
127
+ batch_id = self._generate_batch_id(batch, i)
128
+ progress.set_description(
129
+ batch_ctx, f"{job_description} (batch {i}/{len(batches)})"
130
+ )
131
+
132
+ # Process single batch
133
+ batch_result = self._process_single_batch(
134
+ batch, batch_id, classifier_func, progress
135
+ )
136
+
137
+ # Collect results
138
+ all_results.extend(batch_result.results)
139
+ self.batch_results.append(batch_result)
140
+
141
+ # Update progress
142
+ progress.update(batch_ctx, 1)
143
+
144
+ # Update statistics
145
+ self.total_processed += batch_result.total_items
146
+ self.total_successful += batch_result.successful_items
147
+ self.total_failed += batch_result.failed_items
148
+
149
+ # Log batch summary
150
+ if batch_result.failed_items > 0:
151
+ logger.warning(
152
+ f"Batch {batch_id}: {batch_result.failed_items}/{batch_result.total_items} failed"
153
+ )
154
+ else:
155
+ # Process without progress bars
156
+ for i, batch in enumerate(batches, 1):
157
+ batch_id = self._generate_batch_id(batch, i)
158
+ batch_result = self._process_single_batch(batch, batch_id, classifier_func, None)
159
+ all_results.extend(batch_result.results)
160
+ self.batch_results.append(batch_result)
161
+
162
+ self.total_processed += batch_result.total_items
163
+ self.total_successful += batch_result.successful_items
164
+ self.total_failed += batch_result.failed_items
165
+
166
+ # Log final summary
167
+ logger.info(
168
+ f"Batch processing complete: {self.total_successful}/{self.total_processed} successful"
169
+ )
170
+
171
+ return all_results
172
+
173
+ def _create_batches(self, commits: list[dict[str, Any]]) -> list[list[dict[str, Any]]]:
174
+ """Split commits into batches.
175
+
176
+ Args:
177
+ commits: List of commits to batch
178
+
179
+ Returns:
180
+ List of commit batches
181
+ """
182
+ batches = []
183
+ for i in range(0, len(commits), self.config.batch_size):
184
+ batch = commits[i : i + self.config.batch_size]
185
+ batches.append(batch)
186
+ return batches
187
+
188
+ def _generate_batch_id(self, batch: list[dict[str, Any]], batch_num: int) -> str:
189
+ """Generate a unique ID for a batch.
190
+
191
+ Args:
192
+ batch: Batch of commits
193
+ batch_num: Batch number
194
+
195
+ Returns:
196
+ Unique batch ID
197
+ """
198
+ # Create hash from first and last commit in batch
199
+ if batch:
200
+ first_msg = batch[0].get("message", "")
201
+ last_msg = batch[-1].get("message", "")
202
+ content = f"{batch_num}:{first_msg}:{last_msg}"
203
+ return hashlib.md5(content.encode()).hexdigest()[:8]
204
+ return f"batch_{batch_num}"
205
+
206
+ def _process_single_batch(
207
+ self,
208
+ batch: list[dict[str, Any]],
209
+ batch_id: str,
210
+ classifier_func: Callable[[dict[str, Any]], dict[str, Any]],
211
+ progress: Optional[Any],
212
+ ) -> BatchResult:
213
+ """Process a single batch of commits.
214
+
215
+ Args:
216
+ batch: Batch of commits to process
217
+ batch_id: Unique batch identifier
218
+ classifier_func: Classification function
219
+ progress: Progress tracking service
220
+
221
+ Returns:
222
+ BatchResult with processing outcomes
223
+ """
224
+ start_time = time.time()
225
+ results = []
226
+ errors = []
227
+
228
+ # Show nested progress for individual commits if configured
229
+ if self.config.show_progress and self.config.progress_nested and progress:
230
+ with progress.progress(
231
+ total=len(batch),
232
+ description=f"Batch {batch_id[:8]}",
233
+ unit="commit",
234
+ nested=True,
235
+ leave=False,
236
+ ) as commit_ctx:
237
+ for j, commit in enumerate(batch, 1):
238
+ # Update progress description
239
+ message_preview = commit.get("message", "")[:30]
240
+ progress.set_description(
241
+ commit_ctx, f"Batch {batch_id[:8]} ({j}/{len(batch)}): {message_preview}..."
242
+ )
243
+
244
+ # Process commit
245
+ result, error = self._process_single_commit(commit, batch_id, classifier_func)
246
+
247
+ if result:
248
+ results.append(result)
249
+ if error:
250
+ errors.append(error)
251
+
252
+ # Update progress
253
+ progress.update(commit_ctx, 1)
254
+ else:
255
+ # Process without nested progress
256
+ for commit in batch:
257
+ result, error = self._process_single_commit(commit, batch_id, classifier_func)
258
+
259
+ if result:
260
+ results.append(result)
261
+ if error:
262
+ errors.append(error)
263
+
264
+ # Create batch result
265
+ processing_time = time.time() - start_time
266
+
267
+ return BatchResult(
268
+ batch_id=batch_id,
269
+ total_items=len(batch),
270
+ successful_items=len(results),
271
+ failed_items=len(errors),
272
+ results=results,
273
+ errors=errors,
274
+ processing_time_seconds=processing_time,
275
+ )
276
+
277
+ def _process_single_commit(
278
+ self,
279
+ commit: dict[str, Any],
280
+ batch_id: str,
281
+ classifier_func: Callable[[dict[str, Any]], dict[str, Any]],
282
+ ) -> tuple[Optional[dict[str, Any]], Optional[dict[str, Any]]]:
283
+ """Process a single commit within a batch.
284
+
285
+ Args:
286
+ commit: Commit to process
287
+ batch_id: Batch identifier
288
+ classifier_func: Classification function
289
+
290
+ Returns:
291
+ Tuple of (result, error) where one will be None
292
+ """
293
+ try:
294
+ # Call classifier function
295
+ result = classifier_func(commit)
296
+
297
+ # Add batch ID to result
298
+ result["batch_id"] = batch_id
299
+
300
+ # Add original commit data if not present
301
+ if "commit_hash" not in result and "hash" in commit:
302
+ result["commit_hash"] = commit["hash"]
303
+ if "author" not in result and "author" in commit:
304
+ result["author"] = commit["author"]
305
+
306
+ return result, None
307
+
308
+ except Exception as e:
309
+ logger.debug(f"Failed to classify commit: {e}")
310
+
311
+ # Create error record
312
+ error = {
313
+ "batch_id": batch_id,
314
+ "commit_hash": commit.get("hash", "unknown"),
315
+ "message": commit.get("message", "")[:100],
316
+ "error": str(e),
317
+ "error_type": type(e).__name__,
318
+ }
319
+
320
+ # Return fallback result if configured to continue
321
+ if self.config.continue_on_batch_failure:
322
+ fallback_result = {
323
+ "category": "maintenance",
324
+ "confidence": 0.1,
325
+ "method": "error_fallback",
326
+ "reasoning": f"Classification failed: {str(e)}",
327
+ "batch_id": batch_id,
328
+ "commit_hash": commit.get("hash", "unknown"),
329
+ "error": True,
330
+ }
331
+ return fallback_result, error
332
+
333
+ return None, error
334
+
335
+ def get_statistics(self) -> dict[str, Any]:
336
+ """Get batch processing statistics.
337
+
338
+ Returns:
339
+ Dictionary with processing statistics
340
+ """
341
+ if not self.batch_results:
342
+ return {
343
+ "total_batches": 0,
344
+ "total_processed": 0,
345
+ "total_successful": 0,
346
+ "total_failed": 0,
347
+ "success_rate": 0.0,
348
+ "average_batch_time": 0.0,
349
+ }
350
+
351
+ total_time = sum(br.processing_time_seconds for br in self.batch_results)
352
+
353
+ return {
354
+ "total_batches": len(self.batch_results),
355
+ "total_processed": self.total_processed,
356
+ "total_successful": self.total_successful,
357
+ "total_failed": self.total_failed,
358
+ "success_rate": (
359
+ self.total_successful / self.total_processed if self.total_processed > 0 else 0.0
360
+ ),
361
+ "average_batch_time": total_time / len(self.batch_results),
362
+ "total_processing_time": total_time,
363
+ "batch_size": self.config.batch_size,
364
+ "batches_with_errors": sum(1 for br in self.batch_results if br.failed_items > 0),
365
+ }
366
+
367
+ def get_failed_commits(self) -> list[dict[str, Any]]:
368
+ """Get list of all failed commits.
369
+
370
+ Returns:
371
+ List of error records for failed commits
372
+ """
373
+ failed = []
374
+ for batch_result in self.batch_results:
375
+ failed.extend(batch_result.errors)
376
+ return failed
377
+
378
+ def reset_statistics(self) -> None:
379
+ """Reset all processing statistics."""
380
+ self.total_processed = 0
381
+ self.total_successful = 0
382
+ self.total_failed = 0
383
+ self.batch_results = []