gitflow-analytics 1.0.0__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/__init__.py +11 -9
- gitflow_analytics/_version.py +2 -2
- gitflow_analytics/cli.py +691 -243
- gitflow_analytics/cli_rich.py +353 -0
- gitflow_analytics/config.py +389 -96
- gitflow_analytics/core/analyzer.py +175 -78
- gitflow_analytics/core/branch_mapper.py +132 -132
- gitflow_analytics/core/cache.py +242 -173
- gitflow_analytics/core/identity.py +214 -178
- gitflow_analytics/extractors/base.py +13 -11
- gitflow_analytics/extractors/story_points.py +70 -59
- gitflow_analytics/extractors/tickets.py +111 -88
- gitflow_analytics/integrations/github_integration.py +91 -77
- gitflow_analytics/integrations/jira_integration.py +284 -0
- gitflow_analytics/integrations/orchestrator.py +99 -72
- gitflow_analytics/metrics/dora.py +183 -179
- gitflow_analytics/models/database.py +191 -54
- gitflow_analytics/qualitative/__init__.py +30 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
- gitflow_analytics/qualitative/classifiers/change_type.py +468 -0
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +399 -0
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +436 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +412 -0
- gitflow_analytics/qualitative/core/__init__.py +13 -0
- gitflow_analytics/qualitative/core/llm_fallback.py +653 -0
- gitflow_analytics/qualitative/core/nlp_engine.py +373 -0
- gitflow_analytics/qualitative/core/pattern_cache.py +457 -0
- gitflow_analytics/qualitative/core/processor.py +540 -0
- gitflow_analytics/qualitative/models/__init__.py +25 -0
- gitflow_analytics/qualitative/models/schemas.py +272 -0
- gitflow_analytics/qualitative/utils/__init__.py +13 -0
- gitflow_analytics/qualitative/utils/batch_processor.py +326 -0
- gitflow_analytics/qualitative/utils/cost_tracker.py +343 -0
- gitflow_analytics/qualitative/utils/metrics.py +347 -0
- gitflow_analytics/qualitative/utils/text_processing.py +243 -0
- gitflow_analytics/reports/analytics_writer.py +25 -8
- gitflow_analytics/reports/csv_writer.py +60 -32
- gitflow_analytics/reports/narrative_writer.py +21 -15
- gitflow_analytics/tui/__init__.py +5 -0
- gitflow_analytics/tui/app.py +721 -0
- gitflow_analytics/tui/screens/__init__.py +8 -0
- gitflow_analytics/tui/screens/analysis_progress_screen.py +487 -0
- gitflow_analytics/tui/screens/configuration_screen.py +547 -0
- gitflow_analytics/tui/screens/loading_screen.py +358 -0
- gitflow_analytics/tui/screens/main_screen.py +304 -0
- gitflow_analytics/tui/screens/results_screen.py +698 -0
- gitflow_analytics/tui/widgets/__init__.py +7 -0
- gitflow_analytics/tui/widgets/data_table.py +257 -0
- gitflow_analytics/tui/widgets/export_modal.py +301 -0
- gitflow_analytics/tui/widgets/progress_widget.py +192 -0
- gitflow_analytics-1.0.3.dist-info/METADATA +490 -0
- gitflow_analytics-1.0.3.dist-info/RECORD +62 -0
- gitflow_analytics-1.0.0.dist-info/METADATA +0 -201
- gitflow_analytics-1.0.0.dist-info/RECORD +0 -30
- {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,540 @@
|
|
|
1
|
+
"""Main orchestrator for qualitative analysis of Git commits."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import time
|
|
5
|
+
from typing import Dict, List, Any, Tuple, Optional
|
|
6
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from ...models.database import Database
|
|
10
|
+
from ..models.schemas import QualitativeConfig, QualitativeCommitData
|
|
11
|
+
from .nlp_engine import NLPEngine
|
|
12
|
+
from .llm_fallback import LLMFallback
|
|
13
|
+
from .pattern_cache import PatternCache
|
|
14
|
+
from ..utils.batch_processor import BatchProcessor, ProgressTracker
|
|
15
|
+
from ..utils.metrics import PerformanceMetrics
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class QualitativeProcessor:
|
|
19
|
+
"""Main orchestrator for qualitative analysis of Git commits.
|
|
20
|
+
|
|
21
|
+
This processor coordinates the entire qualitative analysis pipeline:
|
|
22
|
+
1. Pattern cache lookup for known commit patterns
|
|
23
|
+
2. Fast NLP processing for most commits
|
|
24
|
+
3. Strategic LLM fallback for uncertain cases
|
|
25
|
+
4. Pattern learning and cache updates
|
|
26
|
+
5. Performance monitoring and optimization
|
|
27
|
+
|
|
28
|
+
The system is designed to process 10,000+ commits in under 60 seconds
|
|
29
|
+
while maintaining high accuracy and keeping LLM costs low.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(self, config: QualitativeConfig, database: Database):
|
|
33
|
+
"""Initialize qualitative processor.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
config: Configuration for qualitative analysis
|
|
37
|
+
database: Database instance for caching and storage
|
|
38
|
+
"""
|
|
39
|
+
self.config = config
|
|
40
|
+
self.database = database
|
|
41
|
+
self.logger = logging.getLogger(__name__)
|
|
42
|
+
|
|
43
|
+
# Initialize core components
|
|
44
|
+
self.nlp_engine = NLPEngine(config.nlp_config)
|
|
45
|
+
self.pattern_cache = PatternCache(config.cache_config, database)
|
|
46
|
+
|
|
47
|
+
# Initialize LLM fallback if enabled
|
|
48
|
+
self.llm_fallback = None
|
|
49
|
+
if config.llm_config.openrouter_api_key:
|
|
50
|
+
try:
|
|
51
|
+
self.llm_fallback = LLMFallback(config.llm_config)
|
|
52
|
+
self.logger.info("LLM fallback system initialized")
|
|
53
|
+
except Exception as e:
|
|
54
|
+
self.logger.warning(f"LLM fallback initialization failed: {e}")
|
|
55
|
+
else:
|
|
56
|
+
self.logger.info("LLM fallback disabled (no API key configured)")
|
|
57
|
+
|
|
58
|
+
# Initialize utilities
|
|
59
|
+
self.batch_processor = BatchProcessor(
|
|
60
|
+
batch_size=config.batch_size,
|
|
61
|
+
max_workers=config.nlp_config.max_workers
|
|
62
|
+
)
|
|
63
|
+
self.metrics = PerformanceMetrics()
|
|
64
|
+
|
|
65
|
+
# Processing statistics
|
|
66
|
+
self.processing_stats = {
|
|
67
|
+
'total_processed': 0,
|
|
68
|
+
'cache_hits': 0,
|
|
69
|
+
'nlp_processed': 0,
|
|
70
|
+
'llm_processed': 0,
|
|
71
|
+
'processing_start_time': None,
|
|
72
|
+
'last_optimization': None
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
self.logger.info("Qualitative processor initialized")
|
|
76
|
+
|
|
77
|
+
def process_commits(self, commits: List[Dict[str, Any]],
|
|
78
|
+
show_progress: bool = True) -> List[QualitativeCommitData]:
|
|
79
|
+
"""Process commits with qualitative analysis.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
commits: List of commit dictionaries from GitFlow Analytics
|
|
83
|
+
show_progress: Whether to show progress information
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
List of QualitativeCommitData with analysis results
|
|
87
|
+
"""
|
|
88
|
+
if not commits:
|
|
89
|
+
return []
|
|
90
|
+
|
|
91
|
+
if not self.config.enabled:
|
|
92
|
+
self.logger.info("Qualitative analysis disabled in configuration")
|
|
93
|
+
return self._create_disabled_results(commits)
|
|
94
|
+
|
|
95
|
+
self.processing_stats['processing_start_time'] = time.time()
|
|
96
|
+
self.logger.info(f"Starting qualitative analysis of {len(commits)} commits")
|
|
97
|
+
|
|
98
|
+
# Setup progress tracking
|
|
99
|
+
progress_tracker = ProgressTracker(
|
|
100
|
+
total=len(commits),
|
|
101
|
+
description="Qualitative Analysis"
|
|
102
|
+
) if show_progress else None
|
|
103
|
+
|
|
104
|
+
# Step 1: Check cache for known patterns
|
|
105
|
+
cached_results, uncached_commits = self._check_cache(commits, progress_tracker)
|
|
106
|
+
self.logger.info(f"Cache provided {len(cached_results)} results, processing {len(uncached_commits)} commits")
|
|
107
|
+
|
|
108
|
+
# Step 2: Process uncached commits with NLP
|
|
109
|
+
nlp_results = []
|
|
110
|
+
if uncached_commits:
|
|
111
|
+
nlp_results = self._process_with_nlp(uncached_commits, progress_tracker)
|
|
112
|
+
|
|
113
|
+
# Step 3: Identify uncertain cases for LLM processing
|
|
114
|
+
confident_results, uncertain_commits = self._separate_by_confidence(nlp_results)
|
|
115
|
+
self.logger.info(f"NLP confident: {len(confident_results)}, uncertain: {len(uncertain_commits)}")
|
|
116
|
+
|
|
117
|
+
# Step 4: Process uncertain cases with LLM if available
|
|
118
|
+
llm_results = []
|
|
119
|
+
if uncertain_commits and self.llm_fallback:
|
|
120
|
+
llm_results = self._process_with_llm(uncertain_commits, progress_tracker)
|
|
121
|
+
else:
|
|
122
|
+
# If no LLM available, keep uncertain results with lower confidence
|
|
123
|
+
llm_results = [self._convert_to_uncertain_result(commit) for commit in uncertain_commits]
|
|
124
|
+
|
|
125
|
+
# Step 5: Update cache with new high-confidence patterns
|
|
126
|
+
all_new_results = confident_results + llm_results
|
|
127
|
+
if self.config.cache_config.enable_pattern_learning:
|
|
128
|
+
self.pattern_cache.learn_from_results(all_new_results)
|
|
129
|
+
|
|
130
|
+
# Step 6: Combine all results
|
|
131
|
+
all_results = cached_results + confident_results + llm_results
|
|
132
|
+
|
|
133
|
+
# Update processing statistics
|
|
134
|
+
self._update_processing_stats(len(commits), len(cached_results),
|
|
135
|
+
len(confident_results), len(llm_results))
|
|
136
|
+
|
|
137
|
+
# Periodic cache optimization
|
|
138
|
+
if self._should_optimize_cache():
|
|
139
|
+
self._optimize_system()
|
|
140
|
+
|
|
141
|
+
self.logger.info(f"Qualitative analysis completed in {time.time() - self.processing_stats['processing_start_time']:.2f}s")
|
|
142
|
+
return all_results
|
|
143
|
+
|
|
144
|
+
def _check_cache(self, commits: List[Dict[str, Any]],
|
|
145
|
+
progress_tracker: Optional[ProgressTracker]) -> Tuple[List[QualitativeCommitData], List[Dict[str, Any]]]:
|
|
146
|
+
"""Check pattern cache for known commit patterns.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
commits: List of commit dictionaries
|
|
150
|
+
progress_tracker: Optional progress tracker
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
Tuple of (cached_results, uncached_commits)
|
|
154
|
+
"""
|
|
155
|
+
cached_results = []
|
|
156
|
+
uncached_commits = []
|
|
157
|
+
|
|
158
|
+
for commit in commits:
|
|
159
|
+
cached_result = self.pattern_cache.lookup_pattern(
|
|
160
|
+
commit.get('message', ''),
|
|
161
|
+
commit.get('files_changed', [])
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
if cached_result:
|
|
165
|
+
# Convert cached result to QualitativeCommitData
|
|
166
|
+
result = self._create_result_from_cache(commit, cached_result)
|
|
167
|
+
cached_results.append(result)
|
|
168
|
+
self.processing_stats['cache_hits'] += 1
|
|
169
|
+
else:
|
|
170
|
+
uncached_commits.append(commit)
|
|
171
|
+
|
|
172
|
+
if progress_tracker:
|
|
173
|
+
progress_tracker.update(1)
|
|
174
|
+
|
|
175
|
+
return cached_results, uncached_commits
|
|
176
|
+
|
|
177
|
+
def _process_with_nlp(self, commits: List[Dict[str, Any]],
|
|
178
|
+
progress_tracker: Optional[ProgressTracker]) -> List[QualitativeCommitData]:
|
|
179
|
+
"""Process commits using NLP engine.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
commits: List of commit dictionaries
|
|
183
|
+
progress_tracker: Optional progress tracker
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
List of QualitativeCommitData from NLP processing
|
|
187
|
+
"""
|
|
188
|
+
if not commits:
|
|
189
|
+
return []
|
|
190
|
+
|
|
191
|
+
def process_batch_with_progress(batch: List[Dict[str, Any]]) -> List[QualitativeCommitData]:
|
|
192
|
+
results = self.nlp_engine.process_batch(batch)
|
|
193
|
+
if progress_tracker:
|
|
194
|
+
progress_tracker.update(len(batch))
|
|
195
|
+
return results
|
|
196
|
+
|
|
197
|
+
# Use batch processing for efficiency
|
|
198
|
+
if self.config.nlp_config.enable_parallel_processing and len(commits) > 1000:
|
|
199
|
+
all_results = self.batch_processor.process_batches(
|
|
200
|
+
commits,
|
|
201
|
+
process_batch_with_progress,
|
|
202
|
+
parallel=True
|
|
203
|
+
)
|
|
204
|
+
else:
|
|
205
|
+
all_results = self.batch_processor.process_batches(
|
|
206
|
+
commits,
|
|
207
|
+
process_batch_with_progress,
|
|
208
|
+
parallel=False
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
self.processing_stats['nlp_processed'] += len(commits)
|
|
212
|
+
return all_results
|
|
213
|
+
|
|
214
|
+
def _separate_by_confidence(self, results: List[QualitativeCommitData]) -> Tuple[List[QualitativeCommitData], List[Dict[str, Any]]]:
|
|
215
|
+
"""Separate results by confidence threshold.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
results: List of NLP analysis results
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
Tuple of (confident_results, uncertain_commit_dicts)
|
|
222
|
+
"""
|
|
223
|
+
confident_results = []
|
|
224
|
+
uncertain_commits = []
|
|
225
|
+
|
|
226
|
+
for result in results:
|
|
227
|
+
if result.confidence_score >= self.config.confidence_threshold:
|
|
228
|
+
confident_results.append(result)
|
|
229
|
+
else:
|
|
230
|
+
# Convert back to commit dict for LLM processing
|
|
231
|
+
commit_dict = {
|
|
232
|
+
'hash': result.hash,
|
|
233
|
+
'message': result.message,
|
|
234
|
+
'author_name': result.author_name,
|
|
235
|
+
'author_email': result.author_email,
|
|
236
|
+
'timestamp': result.timestamp,
|
|
237
|
+
'files_changed': result.files_changed,
|
|
238
|
+
'insertions': result.insertions,
|
|
239
|
+
'deletions': result.deletions
|
|
240
|
+
}
|
|
241
|
+
uncertain_commits.append(commit_dict)
|
|
242
|
+
|
|
243
|
+
return confident_results, uncertain_commits
|
|
244
|
+
|
|
245
|
+
def _process_with_llm(self, commits: List[Dict[str, Any]],
|
|
246
|
+
progress_tracker: Optional[ProgressTracker]) -> List[QualitativeCommitData]:
|
|
247
|
+
"""Process uncertain commits with LLM fallback.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
commits: List of uncertain commit dictionaries
|
|
251
|
+
progress_tracker: Optional progress tracker
|
|
252
|
+
|
|
253
|
+
Returns:
|
|
254
|
+
List of QualitativeCommitData from LLM processing
|
|
255
|
+
"""
|
|
256
|
+
if not commits or not self.llm_fallback:
|
|
257
|
+
return []
|
|
258
|
+
|
|
259
|
+
# Check LLM usage limits
|
|
260
|
+
max_llm_commits = int(len(commits) * self.config.max_llm_fallback_pct)
|
|
261
|
+
if len(commits) > max_llm_commits:
|
|
262
|
+
self.logger.warning(
|
|
263
|
+
f"LLM limit reached: processing {max_llm_commits} of {len(commits)} uncertain commits"
|
|
264
|
+
)
|
|
265
|
+
commits = commits[:max_llm_commits]
|
|
266
|
+
|
|
267
|
+
# Group similar commits for batch processing
|
|
268
|
+
grouped_commits = self.llm_fallback.group_similar_commits(commits)
|
|
269
|
+
self.logger.debug(f"Grouped {len(commits)} commits into {len(grouped_commits)} groups for LLM processing")
|
|
270
|
+
|
|
271
|
+
all_results = []
|
|
272
|
+
|
|
273
|
+
for group in grouped_commits:
|
|
274
|
+
try:
|
|
275
|
+
group_results = self.llm_fallback.process_group(group)
|
|
276
|
+
all_results.extend(group_results)
|
|
277
|
+
|
|
278
|
+
if progress_tracker:
|
|
279
|
+
progress_tracker.update(len(group))
|
|
280
|
+
|
|
281
|
+
except Exception as e:
|
|
282
|
+
self.logger.error(f"LLM processing failed for group of {len(group)} commits: {e}")
|
|
283
|
+
# Create fallback results for this group
|
|
284
|
+
fallback_results = [self._convert_to_uncertain_result(commit) for commit in group]
|
|
285
|
+
all_results.extend(fallback_results)
|
|
286
|
+
|
|
287
|
+
if progress_tracker:
|
|
288
|
+
progress_tracker.update(len(group))
|
|
289
|
+
|
|
290
|
+
self.processing_stats['llm_processed'] += len(commits)
|
|
291
|
+
return all_results
|
|
292
|
+
|
|
293
|
+
def _create_result_from_cache(self, commit: Dict[str, Any],
|
|
294
|
+
cached_data: Dict[str, Any]) -> QualitativeCommitData:
|
|
295
|
+
"""Create QualitativeCommitData from cached pattern.
|
|
296
|
+
|
|
297
|
+
Args:
|
|
298
|
+
commit: Original commit dictionary
|
|
299
|
+
cached_data: Cached classification data
|
|
300
|
+
|
|
301
|
+
Returns:
|
|
302
|
+
QualitativeCommitData object
|
|
303
|
+
"""
|
|
304
|
+
return QualitativeCommitData(
|
|
305
|
+
# Copy commit fields
|
|
306
|
+
hash=commit.get('hash', ''),
|
|
307
|
+
message=commit.get('message', ''),
|
|
308
|
+
author_name=commit.get('author_name', ''),
|
|
309
|
+
author_email=commit.get('author_email', ''),
|
|
310
|
+
timestamp=commit.get('timestamp', time.time()),
|
|
311
|
+
files_changed=commit.get('files_changed', []),
|
|
312
|
+
insertions=commit.get('insertions', 0),
|
|
313
|
+
deletions=commit.get('deletions', 0),
|
|
314
|
+
|
|
315
|
+
# Use cached classification data
|
|
316
|
+
change_type=cached_data.get('change_type', 'unknown'),
|
|
317
|
+
change_type_confidence=cached_data.get('change_type_confidence', 0.5),
|
|
318
|
+
business_domain=cached_data.get('business_domain', 'unknown'),
|
|
319
|
+
domain_confidence=cached_data.get('domain_confidence', 0.5),
|
|
320
|
+
risk_level=cached_data.get('risk_level', 'medium'),
|
|
321
|
+
risk_factors=cached_data.get('risk_factors', []),
|
|
322
|
+
intent_signals=cached_data.get('intent_signals', {}),
|
|
323
|
+
collaboration_patterns=cached_data.get('collaboration_patterns', {}),
|
|
324
|
+
technical_context={'processing_method': 'cached'},
|
|
325
|
+
|
|
326
|
+
# Processing metadata
|
|
327
|
+
processing_method='cache',
|
|
328
|
+
processing_time_ms=0.5, # Very fast for cached results
|
|
329
|
+
confidence_score=cached_data.get('confidence_score', 0.5)
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
def _convert_to_uncertain_result(self, commit: Dict[str, Any]) -> QualitativeCommitData:
|
|
333
|
+
"""Convert commit to uncertain result when LLM is unavailable.
|
|
334
|
+
|
|
335
|
+
Args:
|
|
336
|
+
commit: Commit dictionary
|
|
337
|
+
|
|
338
|
+
Returns:
|
|
339
|
+
QualitativeCommitData with uncertain classifications
|
|
340
|
+
"""
|
|
341
|
+
return QualitativeCommitData(
|
|
342
|
+
# Copy commit fields
|
|
343
|
+
hash=commit.get('hash', ''),
|
|
344
|
+
message=commit.get('message', ''),
|
|
345
|
+
author_name=commit.get('author_name', ''),
|
|
346
|
+
author_email=commit.get('author_email', ''),
|
|
347
|
+
timestamp=commit.get('timestamp', time.time()),
|
|
348
|
+
files_changed=commit.get('files_changed', []),
|
|
349
|
+
insertions=commit.get('insertions', 0),
|
|
350
|
+
deletions=commit.get('deletions', 0),
|
|
351
|
+
|
|
352
|
+
# Uncertain classifications
|
|
353
|
+
change_type='unknown',
|
|
354
|
+
change_type_confidence=0.3,
|
|
355
|
+
business_domain='unknown',
|
|
356
|
+
domain_confidence=0.3,
|
|
357
|
+
risk_level='medium',
|
|
358
|
+
risk_factors=['low_confidence_classification'],
|
|
359
|
+
intent_signals={'confidence': 0.3},
|
|
360
|
+
collaboration_patterns={},
|
|
361
|
+
technical_context={'processing_method': 'uncertain_fallback'},
|
|
362
|
+
|
|
363
|
+
# Processing metadata
|
|
364
|
+
processing_method='nlp',
|
|
365
|
+
processing_time_ms=1.0,
|
|
366
|
+
confidence_score=0.3
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
def _create_disabled_results(self, commits: List[Dict[str, Any]]) -> List[QualitativeCommitData]:
|
|
370
|
+
"""Create disabled results when qualitative analysis is turned off.
|
|
371
|
+
|
|
372
|
+
Args:
|
|
373
|
+
commits: List of commit dictionaries
|
|
374
|
+
|
|
375
|
+
Returns:
|
|
376
|
+
List of QualitativeCommitData with disabled status
|
|
377
|
+
"""
|
|
378
|
+
results = []
|
|
379
|
+
|
|
380
|
+
for commit in commits:
|
|
381
|
+
result = QualitativeCommitData(
|
|
382
|
+
# Copy commit fields
|
|
383
|
+
hash=commit.get('hash', ''),
|
|
384
|
+
message=commit.get('message', ''),
|
|
385
|
+
author_name=commit.get('author_name', ''),
|
|
386
|
+
author_email=commit.get('author_email', ''),
|
|
387
|
+
timestamp=commit.get('timestamp', time.time()),
|
|
388
|
+
files_changed=commit.get('files_changed', []),
|
|
389
|
+
insertions=commit.get('insertions', 0),
|
|
390
|
+
deletions=commit.get('deletions', 0),
|
|
391
|
+
|
|
392
|
+
# Disabled classifications
|
|
393
|
+
change_type='disabled',
|
|
394
|
+
change_type_confidence=0.0,
|
|
395
|
+
business_domain='disabled',
|
|
396
|
+
domain_confidence=0.0,
|
|
397
|
+
risk_level='unknown',
|
|
398
|
+
risk_factors=['qualitative_analysis_disabled'],
|
|
399
|
+
intent_signals={'disabled': True},
|
|
400
|
+
collaboration_patterns={},
|
|
401
|
+
technical_context={'processing_method': 'disabled'},
|
|
402
|
+
|
|
403
|
+
# Processing metadata
|
|
404
|
+
processing_method='disabled',
|
|
405
|
+
processing_time_ms=0.0,
|
|
406
|
+
confidence_score=0.0
|
|
407
|
+
)
|
|
408
|
+
results.append(result)
|
|
409
|
+
|
|
410
|
+
return results
|
|
411
|
+
|
|
412
|
+
def _update_processing_stats(self, total_commits: int, cached: int,
|
|
413
|
+
nlp_processed: int, llm_processed: int) -> None:
|
|
414
|
+
"""Update processing statistics.
|
|
415
|
+
|
|
416
|
+
Args:
|
|
417
|
+
total_commits: Total number of commits processed
|
|
418
|
+
cached: Number of cache hits
|
|
419
|
+
nlp_processed: Number processed by NLP
|
|
420
|
+
llm_processed: Number processed by LLM
|
|
421
|
+
"""
|
|
422
|
+
self.processing_stats['total_processed'] += total_commits
|
|
423
|
+
self.processing_stats['cache_hits'] += cached
|
|
424
|
+
self.processing_stats['nlp_processed'] += nlp_processed
|
|
425
|
+
self.processing_stats['llm_processed'] += llm_processed
|
|
426
|
+
|
|
427
|
+
# Log processing breakdown
|
|
428
|
+
cache_pct = (cached / total_commits) * 100 if total_commits > 0 else 0
|
|
429
|
+
nlp_pct = (nlp_processed / total_commits) * 100 if total_commits > 0 else 0
|
|
430
|
+
llm_pct = (llm_processed / total_commits) * 100 if total_commits > 0 else 0
|
|
431
|
+
|
|
432
|
+
self.logger.info(
|
|
433
|
+
f"Processing breakdown: {cache_pct:.1f}% cached, "
|
|
434
|
+
f"{nlp_pct:.1f}% NLP, {llm_pct:.1f}% LLM"
|
|
435
|
+
)
|
|
436
|
+
|
|
437
|
+
def _should_optimize_cache(self) -> bool:
|
|
438
|
+
"""Check if cache optimization should be performed.
|
|
439
|
+
|
|
440
|
+
Returns:
|
|
441
|
+
True if optimization should be performed
|
|
442
|
+
"""
|
|
443
|
+
# Optimize every 10,000 commits or every hour
|
|
444
|
+
if (self.processing_stats['total_processed'] % 10000 == 0 or
|
|
445
|
+
(self.processing_stats['last_optimization'] is None) or
|
|
446
|
+
(time.time() - self.processing_stats['last_optimization'] > 3600)):
|
|
447
|
+
return True
|
|
448
|
+
return False
|
|
449
|
+
|
|
450
|
+
def _optimize_system(self) -> None:
|
|
451
|
+
"""Perform system optimization."""
|
|
452
|
+
self.logger.info("Performing system optimization...")
|
|
453
|
+
|
|
454
|
+
# Optimize pattern cache
|
|
455
|
+
self.pattern_cache.optimize_cache()
|
|
456
|
+
|
|
457
|
+
# Update last optimization time
|
|
458
|
+
self.processing_stats['last_optimization'] = time.time()
|
|
459
|
+
|
|
460
|
+
self.logger.info("System optimization completed")
|
|
461
|
+
|
|
462
|
+
def get_processing_statistics(self) -> Dict[str, Any]:
|
|
463
|
+
"""Get comprehensive processing statistics.
|
|
464
|
+
|
|
465
|
+
Returns:
|
|
466
|
+
Dictionary with processing statistics
|
|
467
|
+
"""
|
|
468
|
+
# Get component statistics
|
|
469
|
+
cache_stats = self.pattern_cache.get_cache_statistics()
|
|
470
|
+
nlp_stats = self.nlp_engine.get_performance_stats()
|
|
471
|
+
|
|
472
|
+
# Calculate processing rates
|
|
473
|
+
total_time = time.time() - (self.processing_stats['processing_start_time'] or time.time())
|
|
474
|
+
commits_per_second = self.processing_stats['total_processed'] / total_time if total_time > 0 else 0
|
|
475
|
+
|
|
476
|
+
# Calculate method percentages
|
|
477
|
+
total = self.processing_stats['total_processed']
|
|
478
|
+
method_percentages = {
|
|
479
|
+
'cache': (self.processing_stats['cache_hits'] / total * 100) if total > 0 else 0,
|
|
480
|
+
'nlp': (self.processing_stats['nlp_processed'] / total * 100) if total > 0 else 0,
|
|
481
|
+
'llm': (self.processing_stats['llm_processed'] / total * 100) if total > 0 else 0
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
stats = {
|
|
485
|
+
'processing_summary': {
|
|
486
|
+
'total_commits_processed': self.processing_stats['total_processed'],
|
|
487
|
+
'commits_per_second': commits_per_second,
|
|
488
|
+
'total_processing_time_seconds': total_time,
|
|
489
|
+
'method_breakdown': method_percentages
|
|
490
|
+
},
|
|
491
|
+
'cache_statistics': cache_stats,
|
|
492
|
+
'nlp_statistics': nlp_stats,
|
|
493
|
+
'configuration': {
|
|
494
|
+
'enabled': self.config.enabled,
|
|
495
|
+
'confidence_threshold': self.config.confidence_threshold,
|
|
496
|
+
'max_llm_fallback_pct': self.config.max_llm_fallback_pct,
|
|
497
|
+
'batch_size': self.config.batch_size
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
# Add LLM statistics if available
|
|
502
|
+
if self.llm_fallback:
|
|
503
|
+
stats['llm_statistics'] = {
|
|
504
|
+
'cost_tracking': self.llm_fallback.cost_tracker.get_usage_stats(),
|
|
505
|
+
'model_usage': 'available'
|
|
506
|
+
}
|
|
507
|
+
else:
|
|
508
|
+
stats['llm_statistics'] = {'model_usage': 'disabled'}
|
|
509
|
+
|
|
510
|
+
return stats
|
|
511
|
+
|
|
512
|
+
def validate_setup(self) -> Tuple[bool, List[str]]:
|
|
513
|
+
"""Validate processor setup and dependencies.
|
|
514
|
+
|
|
515
|
+
Returns:
|
|
516
|
+
Tuple of (is_valid, list_of_issues)
|
|
517
|
+
"""
|
|
518
|
+
issues = []
|
|
519
|
+
|
|
520
|
+
# Validate NLP engine
|
|
521
|
+
nlp_valid, nlp_issues = self.nlp_engine.validate_setup()
|
|
522
|
+
if not nlp_valid:
|
|
523
|
+
issues.extend([f"NLP: {issue}" for issue in nlp_issues])
|
|
524
|
+
|
|
525
|
+
# Validate LLM fallback if configured
|
|
526
|
+
if self.config.llm_config.openrouter_api_key and self.llm_fallback is None:
|
|
527
|
+
issues.append("LLM: API key configured but fallback system failed to initialize")
|
|
528
|
+
|
|
529
|
+
# Validate configuration
|
|
530
|
+
config_warnings = self.config.validate()
|
|
531
|
+
issues.extend([f"Config: {warning}" for warning in config_warnings])
|
|
532
|
+
|
|
533
|
+
# Test database connection
|
|
534
|
+
try:
|
|
535
|
+
with self.database.get_session() as session:
|
|
536
|
+
session.execute("SELECT 1")
|
|
537
|
+
except Exception as e:
|
|
538
|
+
issues.append(f"Database: Connection failed - {e}")
|
|
539
|
+
|
|
540
|
+
return len(issues) == 0, issues
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""Data models and schemas for qualitative analysis."""
|
|
2
|
+
|
|
3
|
+
from .schemas import (
|
|
4
|
+
QualitativeCommitData,
|
|
5
|
+
QualitativeConfig,
|
|
6
|
+
NLPConfig,
|
|
7
|
+
LLMConfig,
|
|
8
|
+
CacheConfig,
|
|
9
|
+
ChangeTypeConfig,
|
|
10
|
+
IntentConfig,
|
|
11
|
+
DomainConfig,
|
|
12
|
+
RiskConfig,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"QualitativeCommitData",
|
|
17
|
+
"QualitativeConfig",
|
|
18
|
+
"NLPConfig",
|
|
19
|
+
"LLMConfig",
|
|
20
|
+
"CacheConfig",
|
|
21
|
+
"ChangeTypeConfig",
|
|
22
|
+
"IntentConfig",
|
|
23
|
+
"DomainConfig",
|
|
24
|
+
"RiskConfig",
|
|
25
|
+
]
|