gitflow-analytics 1.0.0__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/__init__.py +11 -9
- gitflow_analytics/_version.py +2 -2
- gitflow_analytics/cli.py +691 -243
- gitflow_analytics/cli_rich.py +353 -0
- gitflow_analytics/config.py +389 -96
- gitflow_analytics/core/analyzer.py +175 -78
- gitflow_analytics/core/branch_mapper.py +132 -132
- gitflow_analytics/core/cache.py +242 -173
- gitflow_analytics/core/identity.py +214 -178
- gitflow_analytics/extractors/base.py +13 -11
- gitflow_analytics/extractors/story_points.py +70 -59
- gitflow_analytics/extractors/tickets.py +111 -88
- gitflow_analytics/integrations/github_integration.py +91 -77
- gitflow_analytics/integrations/jira_integration.py +284 -0
- gitflow_analytics/integrations/orchestrator.py +99 -72
- gitflow_analytics/metrics/dora.py +183 -179
- gitflow_analytics/models/database.py +191 -54
- gitflow_analytics/qualitative/__init__.py +30 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
- gitflow_analytics/qualitative/classifiers/change_type.py +468 -0
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +399 -0
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +436 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +412 -0
- gitflow_analytics/qualitative/core/__init__.py +13 -0
- gitflow_analytics/qualitative/core/llm_fallback.py +653 -0
- gitflow_analytics/qualitative/core/nlp_engine.py +373 -0
- gitflow_analytics/qualitative/core/pattern_cache.py +457 -0
- gitflow_analytics/qualitative/core/processor.py +540 -0
- gitflow_analytics/qualitative/models/__init__.py +25 -0
- gitflow_analytics/qualitative/models/schemas.py +272 -0
- gitflow_analytics/qualitative/utils/__init__.py +13 -0
- gitflow_analytics/qualitative/utils/batch_processor.py +326 -0
- gitflow_analytics/qualitative/utils/cost_tracker.py +343 -0
- gitflow_analytics/qualitative/utils/metrics.py +347 -0
- gitflow_analytics/qualitative/utils/text_processing.py +243 -0
- gitflow_analytics/reports/analytics_writer.py +25 -8
- gitflow_analytics/reports/csv_writer.py +60 -32
- gitflow_analytics/reports/narrative_writer.py +21 -15
- gitflow_analytics/tui/__init__.py +5 -0
- gitflow_analytics/tui/app.py +721 -0
- gitflow_analytics/tui/screens/__init__.py +8 -0
- gitflow_analytics/tui/screens/analysis_progress_screen.py +487 -0
- gitflow_analytics/tui/screens/configuration_screen.py +547 -0
- gitflow_analytics/tui/screens/loading_screen.py +358 -0
- gitflow_analytics/tui/screens/main_screen.py +304 -0
- gitflow_analytics/tui/screens/results_screen.py +698 -0
- gitflow_analytics/tui/widgets/__init__.py +7 -0
- gitflow_analytics/tui/widgets/data_table.py +257 -0
- gitflow_analytics/tui/widgets/export_modal.py +301 -0
- gitflow_analytics/tui/widgets/progress_widget.py +192 -0
- gitflow_analytics-1.0.3.dist-info/METADATA +490 -0
- gitflow_analytics-1.0.3.dist-info/RECORD +62 -0
- gitflow_analytics-1.0.0.dist-info/METADATA +0 -201
- gitflow_analytics-1.0.0.dist-info/RECORD +0 -30
- {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,373 @@
|
|
|
1
|
+
"""NLP processing engine using spaCy for fast commit analysis."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import time
|
|
5
|
+
from typing import Dict, List, Optional, Tuple, Any
|
|
6
|
+
|
|
7
|
+
from ..models.schemas import NLPConfig, QualitativeCommitData
|
|
8
|
+
from ..classifiers.change_type import ChangeTypeClassifier
|
|
9
|
+
from ..classifiers.domain_classifier import DomainClassifier
|
|
10
|
+
from ..classifiers.intent_analyzer import IntentAnalyzer
|
|
11
|
+
from ..classifiers.risk_analyzer import RiskAnalyzer
|
|
12
|
+
from ..utils.text_processing import TextProcessor
|
|
13
|
+
from ..utils.metrics import PerformanceMetrics
|
|
14
|
+
|
|
15
|
+
try:
|
|
16
|
+
import spacy
|
|
17
|
+
from spacy.tokens import Doc
|
|
18
|
+
SPACY_AVAILABLE = True
|
|
19
|
+
except ImportError:
|
|
20
|
+
SPACY_AVAILABLE = False
|
|
21
|
+
Doc = Any # Type hint fallback
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class NLPEngine:
|
|
25
|
+
"""Core NLP processing engine using spaCy for fast commit analysis.
|
|
26
|
+
|
|
27
|
+
This engine provides the primary classification pipeline for commit analysis,
|
|
28
|
+
handling 85-90% of commits through fast NLP processing without requiring
|
|
29
|
+
expensive LLM calls.
|
|
30
|
+
|
|
31
|
+
The engine orchestrates multiple specialized classifiers:
|
|
32
|
+
- ChangeTypeClassifier: Determines commit type (feature, bugfix, etc.)
|
|
33
|
+
- DomainClassifier: Identifies business domain (frontend, backend, etc.)
|
|
34
|
+
- IntentAnalyzer: Extracts intent signals and urgency
|
|
35
|
+
- RiskAnalyzer: Assesses commit risk level
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(self, config: NLPConfig):
|
|
39
|
+
"""Initialize NLP engine with spaCy pipeline.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
config: NLP configuration
|
|
43
|
+
|
|
44
|
+
Raises:
|
|
45
|
+
ImportError: If spaCy is not available
|
|
46
|
+
OSError: If spaCy model is not installed
|
|
47
|
+
"""
|
|
48
|
+
if not SPACY_AVAILABLE:
|
|
49
|
+
raise ImportError(
|
|
50
|
+
"spaCy is required for NLP processing. Install with: pip install spacy"
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
self.config = config
|
|
54
|
+
self.logger = logging.getLogger(__name__)
|
|
55
|
+
|
|
56
|
+
# Initialize spaCy pipeline
|
|
57
|
+
self._init_spacy_pipeline()
|
|
58
|
+
|
|
59
|
+
# Initialize text processor
|
|
60
|
+
self.text_processor = TextProcessor()
|
|
61
|
+
|
|
62
|
+
# Initialize classifiers
|
|
63
|
+
self.change_classifier = ChangeTypeClassifier(config.change_type_config)
|
|
64
|
+
self.domain_classifier = DomainClassifier(config.domain_config)
|
|
65
|
+
self.intent_analyzer = IntentAnalyzer(config.intent_config)
|
|
66
|
+
self.risk_analyzer = RiskAnalyzer(config.risk_config)
|
|
67
|
+
|
|
68
|
+
# Performance tracking
|
|
69
|
+
self.metrics = PerformanceMetrics()
|
|
70
|
+
self.processing_times = []
|
|
71
|
+
|
|
72
|
+
self.logger.info(f"NLP engine initialized with model: {config.spacy_model}")
|
|
73
|
+
|
|
74
|
+
def _init_spacy_pipeline(self) -> None:
|
|
75
|
+
"""Initialize spaCy NLP pipeline with optimizations."""
|
|
76
|
+
try:
|
|
77
|
+
self.nlp = spacy.load(self.config.spacy_model)
|
|
78
|
+
|
|
79
|
+
# Optimize pipeline for speed if in fast mode
|
|
80
|
+
if self.config.fast_mode:
|
|
81
|
+
# Disable expensive components we don't need
|
|
82
|
+
disabled_components = []
|
|
83
|
+
if 'parser' in self.nlp.pipe_names:
|
|
84
|
+
disabled_components.append('parser')
|
|
85
|
+
if 'ner' in self.nlp.pipe_names:
|
|
86
|
+
disabled_components.append('ner')
|
|
87
|
+
|
|
88
|
+
if disabled_components:
|
|
89
|
+
self.nlp.disable_pipes(*disabled_components)
|
|
90
|
+
self.logger.info(f"Disabled spaCy components for speed: {disabled_components}")
|
|
91
|
+
|
|
92
|
+
except OSError as e:
|
|
93
|
+
raise OSError(
|
|
94
|
+
f"spaCy model '{self.config.spacy_model}' not found. "
|
|
95
|
+
f"Install with: python -m spacy download {self.config.spacy_model}"
|
|
96
|
+
) from e
|
|
97
|
+
|
|
98
|
+
def process_batch(self, commits: List[Dict[str, Any]]) -> List[QualitativeCommitData]:
|
|
99
|
+
"""Process a batch of commits efficiently using spaCy pipeline.
|
|
100
|
+
|
|
101
|
+
This method leverages spaCy's batch processing capabilities to analyze
|
|
102
|
+
multiple commit messages simultaneously for maximum efficiency.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
commits: List of commit dictionaries with message, files_changed, etc.
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
List of QualitativeCommitData with analysis results
|
|
109
|
+
"""
|
|
110
|
+
if not commits:
|
|
111
|
+
return []
|
|
112
|
+
|
|
113
|
+
start_time = time.time()
|
|
114
|
+
|
|
115
|
+
# Extract messages for batch processing
|
|
116
|
+
messages = [commit.get('message', '') for commit in commits]
|
|
117
|
+
|
|
118
|
+
# Process all messages through spaCy pipeline at once
|
|
119
|
+
try:
|
|
120
|
+
docs = list(self.nlp.pipe(
|
|
121
|
+
messages,
|
|
122
|
+
batch_size=self.config.spacy_batch_size,
|
|
123
|
+
disable=[] if not self.config.fast_mode else ['parser', 'ner']
|
|
124
|
+
))
|
|
125
|
+
except Exception as e:
|
|
126
|
+
self.logger.error(f"spaCy processing failed: {e}")
|
|
127
|
+
# Fallback to individual processing
|
|
128
|
+
docs = []
|
|
129
|
+
for message in messages:
|
|
130
|
+
try:
|
|
131
|
+
docs.append(self.nlp(message))
|
|
132
|
+
except Exception:
|
|
133
|
+
# Create empty doc as fallback
|
|
134
|
+
docs.append(self.nlp(""))
|
|
135
|
+
|
|
136
|
+
# Analyze each commit with its processed document
|
|
137
|
+
results = []
|
|
138
|
+
for commit, doc in zip(commits, docs):
|
|
139
|
+
try:
|
|
140
|
+
result = self._analyze_commit(commit, doc)
|
|
141
|
+
results.append(result)
|
|
142
|
+
except Exception as e:
|
|
143
|
+
self.logger.error(f"Error analyzing commit {commit.get('hash', 'unknown')}: {e}")
|
|
144
|
+
# Create fallback result
|
|
145
|
+
results.append(self._create_fallback_result(commit))
|
|
146
|
+
|
|
147
|
+
# Track performance
|
|
148
|
+
processing_time = (time.time() - start_time) * 1000 # ms
|
|
149
|
+
self.processing_times.append(processing_time)
|
|
150
|
+
|
|
151
|
+
# Record metrics
|
|
152
|
+
avg_confidence = sum(r.confidence_score for r in results) / len(results) if results else 0.0
|
|
153
|
+
self.metrics.record_processing(
|
|
154
|
+
operation='nlp_batch',
|
|
155
|
+
processing_time_ms=processing_time,
|
|
156
|
+
items_processed=len(commits),
|
|
157
|
+
confidence_score=avg_confidence,
|
|
158
|
+
method_used='nlp'
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
self.logger.debug(
|
|
162
|
+
f"Processed {len(commits)} commits in {processing_time:.1f}ms "
|
|
163
|
+
f"({len(commits) * 1000 / processing_time:.1f} commits/sec)"
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
return results
|
|
167
|
+
|
|
168
|
+
def _analyze_commit(self, commit: Dict[str, Any], doc: Doc) -> QualitativeCommitData:
|
|
169
|
+
"""Analyze a single commit with all classifiers.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
commit: Commit dictionary with message, files, etc.
|
|
173
|
+
doc: spaCy processed document
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
QualitativeCommitData with analysis results
|
|
177
|
+
"""
|
|
178
|
+
analysis_start = time.time()
|
|
179
|
+
|
|
180
|
+
# Extract basic commit info
|
|
181
|
+
message = commit.get('message', '')
|
|
182
|
+
files_changed = commit.get('files_changed', [])
|
|
183
|
+
|
|
184
|
+
# Run all classifiers
|
|
185
|
+
change_type, change_confidence = self.change_classifier.classify(
|
|
186
|
+
message, doc, files_changed
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
domain, domain_confidence = self.domain_classifier.classify(
|
|
190
|
+
message, doc, files_changed
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
intent_signals = self.intent_analyzer.analyze(message, doc)
|
|
194
|
+
|
|
195
|
+
risk_assessment = self.risk_analyzer.assess(commit, doc)
|
|
196
|
+
|
|
197
|
+
# Calculate overall confidence score
|
|
198
|
+
overall_confidence = self._calculate_overall_confidence(
|
|
199
|
+
change_confidence,
|
|
200
|
+
domain_confidence,
|
|
201
|
+
intent_signals.get('confidence', 0.5)
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
# Extract technical context
|
|
205
|
+
technical_context = {
|
|
206
|
+
'file_patterns': self.text_processor.extract_file_patterns(files_changed),
|
|
207
|
+
'complexity_metrics': self.text_processor.calculate_commit_complexity(
|
|
208
|
+
message, files_changed,
|
|
209
|
+
commit.get('insertions', 0),
|
|
210
|
+
commit.get('deletions', 0)
|
|
211
|
+
),
|
|
212
|
+
'semantic_fingerprint': self.text_processor.create_semantic_fingerprint(
|
|
213
|
+
message, files_changed
|
|
214
|
+
)
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
processing_time = (time.time() - analysis_start) * 1000 # ms
|
|
218
|
+
|
|
219
|
+
return QualitativeCommitData(
|
|
220
|
+
# Copy existing commit fields
|
|
221
|
+
hash=commit.get('hash', ''),
|
|
222
|
+
message=message,
|
|
223
|
+
author_name=commit.get('author_name', ''),
|
|
224
|
+
author_email=commit.get('author_email', ''),
|
|
225
|
+
timestamp=commit.get('timestamp', time.time()),
|
|
226
|
+
files_changed=files_changed,
|
|
227
|
+
insertions=commit.get('insertions', 0),
|
|
228
|
+
deletions=commit.get('deletions', 0),
|
|
229
|
+
|
|
230
|
+
# Qualitative analysis results
|
|
231
|
+
change_type=change_type,
|
|
232
|
+
change_type_confidence=change_confidence,
|
|
233
|
+
business_domain=domain,
|
|
234
|
+
domain_confidence=domain_confidence,
|
|
235
|
+
risk_level=risk_assessment['level'],
|
|
236
|
+
risk_factors=risk_assessment['factors'],
|
|
237
|
+
intent_signals=intent_signals,
|
|
238
|
+
collaboration_patterns={}, # TODO: Implement collaboration analysis
|
|
239
|
+
technical_context=technical_context,
|
|
240
|
+
|
|
241
|
+
# Processing metadata
|
|
242
|
+
processing_method='nlp',
|
|
243
|
+
processing_time_ms=processing_time,
|
|
244
|
+
confidence_score=overall_confidence
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
def _calculate_overall_confidence(self, change_confidence: float,
|
|
248
|
+
domain_confidence: float,
|
|
249
|
+
intent_confidence: float) -> float:
|
|
250
|
+
"""Calculate weighted overall confidence score.
|
|
251
|
+
|
|
252
|
+
Args:
|
|
253
|
+
change_confidence: Change type classification confidence
|
|
254
|
+
domain_confidence: Domain classification confidence
|
|
255
|
+
intent_confidence: Intent analysis confidence
|
|
256
|
+
|
|
257
|
+
Returns:
|
|
258
|
+
Overall confidence score (0.0 to 1.0)
|
|
259
|
+
"""
|
|
260
|
+
# Weighted average with change_type being most important
|
|
261
|
+
weights = {
|
|
262
|
+
'change': 0.5, # Change type is most critical
|
|
263
|
+
'domain': 0.3, # Domain is important for reporting
|
|
264
|
+
'intent': 0.2 # Intent is supplementary
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
overall = (
|
|
268
|
+
change_confidence * weights['change'] +
|
|
269
|
+
domain_confidence * weights['domain'] +
|
|
270
|
+
intent_confidence * weights['intent']
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
return min(1.0, max(0.0, overall))
|
|
274
|
+
|
|
275
|
+
def _create_fallback_result(self, commit: Dict[str, Any]) -> QualitativeCommitData:
|
|
276
|
+
"""Create a fallback result when analysis fails.
|
|
277
|
+
|
|
278
|
+
Args:
|
|
279
|
+
commit: Commit dictionary
|
|
280
|
+
|
|
281
|
+
Returns:
|
|
282
|
+
QualitativeCommitData with default values
|
|
283
|
+
"""
|
|
284
|
+
return QualitativeCommitData(
|
|
285
|
+
# Basic commit info
|
|
286
|
+
hash=commit.get('hash', ''),
|
|
287
|
+
message=commit.get('message', ''),
|
|
288
|
+
author_name=commit.get('author_name', ''),
|
|
289
|
+
author_email=commit.get('author_email', ''),
|
|
290
|
+
timestamp=commit.get('timestamp', time.time()),
|
|
291
|
+
files_changed=commit.get('files_changed', []),
|
|
292
|
+
insertions=commit.get('insertions', 0),
|
|
293
|
+
deletions=commit.get('deletions', 0),
|
|
294
|
+
|
|
295
|
+
# Default classifications
|
|
296
|
+
change_type='unknown',
|
|
297
|
+
change_type_confidence=0.0,
|
|
298
|
+
business_domain='unknown',
|
|
299
|
+
domain_confidence=0.0,
|
|
300
|
+
risk_level='medium',
|
|
301
|
+
risk_factors=['analysis_failed'],
|
|
302
|
+
intent_signals={'confidence': 0.0, 'signals': []},
|
|
303
|
+
collaboration_patterns={},
|
|
304
|
+
technical_context={},
|
|
305
|
+
|
|
306
|
+
# Processing metadata
|
|
307
|
+
processing_method='nlp',
|
|
308
|
+
processing_time_ms=0.0,
|
|
309
|
+
confidence_score=0.0
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
def get_performance_stats(self) -> Dict[str, Any]:
|
|
313
|
+
"""Get NLP engine performance statistics.
|
|
314
|
+
|
|
315
|
+
Returns:
|
|
316
|
+
Dictionary with performance metrics
|
|
317
|
+
"""
|
|
318
|
+
if not self.processing_times:
|
|
319
|
+
return {
|
|
320
|
+
'total_batches': 0,
|
|
321
|
+
'avg_processing_time_ms': 0.0,
|
|
322
|
+
'min_processing_time_ms': 0.0,
|
|
323
|
+
'max_processing_time_ms': 0.0,
|
|
324
|
+
'total_processing_time_ms': 0.0
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
return {
|
|
328
|
+
'total_batches': len(self.processing_times),
|
|
329
|
+
'avg_processing_time_ms': sum(self.processing_times) / len(self.processing_times),
|
|
330
|
+
'min_processing_time_ms': min(self.processing_times),
|
|
331
|
+
'max_processing_time_ms': max(self.processing_times),
|
|
332
|
+
'total_processing_time_ms': sum(self.processing_times),
|
|
333
|
+
'spacy_model': self.config.spacy_model,
|
|
334
|
+
'fast_mode': self.config.fast_mode,
|
|
335
|
+
'batch_size': self.config.spacy_batch_size
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
def validate_setup(self) -> Tuple[bool, List[str]]:
|
|
339
|
+
"""Validate NLP engine setup and dependencies.
|
|
340
|
+
|
|
341
|
+
Returns:
|
|
342
|
+
Tuple of (is_valid, list_of_issues)
|
|
343
|
+
"""
|
|
344
|
+
issues = []
|
|
345
|
+
|
|
346
|
+
# Check spaCy availability
|
|
347
|
+
if not SPACY_AVAILABLE:
|
|
348
|
+
issues.append("spaCy not installed")
|
|
349
|
+
return False, issues
|
|
350
|
+
|
|
351
|
+
# Check model availability
|
|
352
|
+
try:
|
|
353
|
+
test_nlp = spacy.load(self.config.spacy_model)
|
|
354
|
+
# Test basic functionality
|
|
355
|
+
test_doc = test_nlp("test commit message")
|
|
356
|
+
if not test_doc:
|
|
357
|
+
issues.append("spaCy model not functioning properly")
|
|
358
|
+
except OSError:
|
|
359
|
+
issues.append(f"spaCy model '{self.config.spacy_model}' not installed")
|
|
360
|
+
except Exception as e:
|
|
361
|
+
issues.append(f"spaCy model error: {e}")
|
|
362
|
+
|
|
363
|
+
# Check classifier initialization
|
|
364
|
+
for classifier_name, classifier in [
|
|
365
|
+
('change_type', self.change_classifier),
|
|
366
|
+
('domain', self.domain_classifier),
|
|
367
|
+
('intent', self.intent_analyzer),
|
|
368
|
+
('risk', self.risk_analyzer)
|
|
369
|
+
]:
|
|
370
|
+
if not hasattr(classifier, 'classify') and not hasattr(classifier, 'analyze') and not hasattr(classifier, 'assess'):
|
|
371
|
+
issues.append(f"{classifier_name} classifier not properly initialized")
|
|
372
|
+
|
|
373
|
+
return len(issues) == 0, issues
|