gitflow-analytics 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. gitflow_analytics/__init__.py +11 -11
  2. gitflow_analytics/_version.py +2 -2
  3. gitflow_analytics/cli.py +612 -258
  4. gitflow_analytics/cli_rich.py +353 -0
  5. gitflow_analytics/config.py +251 -141
  6. gitflow_analytics/core/analyzer.py +140 -103
  7. gitflow_analytics/core/branch_mapper.py +132 -132
  8. gitflow_analytics/core/cache.py +240 -169
  9. gitflow_analytics/core/identity.py +210 -173
  10. gitflow_analytics/extractors/base.py +13 -11
  11. gitflow_analytics/extractors/story_points.py +70 -59
  12. gitflow_analytics/extractors/tickets.py +101 -87
  13. gitflow_analytics/integrations/github_integration.py +84 -77
  14. gitflow_analytics/integrations/jira_integration.py +116 -104
  15. gitflow_analytics/integrations/orchestrator.py +86 -85
  16. gitflow_analytics/metrics/dora.py +181 -177
  17. gitflow_analytics/models/database.py +190 -53
  18. gitflow_analytics/qualitative/__init__.py +30 -0
  19. gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
  20. gitflow_analytics/qualitative/classifiers/change_type.py +468 -0
  21. gitflow_analytics/qualitative/classifiers/domain_classifier.py +399 -0
  22. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +436 -0
  23. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +412 -0
  24. gitflow_analytics/qualitative/core/__init__.py +13 -0
  25. gitflow_analytics/qualitative/core/llm_fallback.py +653 -0
  26. gitflow_analytics/qualitative/core/nlp_engine.py +373 -0
  27. gitflow_analytics/qualitative/core/pattern_cache.py +457 -0
  28. gitflow_analytics/qualitative/core/processor.py +540 -0
  29. gitflow_analytics/qualitative/models/__init__.py +25 -0
  30. gitflow_analytics/qualitative/models/schemas.py +272 -0
  31. gitflow_analytics/qualitative/utils/__init__.py +13 -0
  32. gitflow_analytics/qualitative/utils/batch_processor.py +326 -0
  33. gitflow_analytics/qualitative/utils/cost_tracker.py +343 -0
  34. gitflow_analytics/qualitative/utils/metrics.py +347 -0
  35. gitflow_analytics/qualitative/utils/text_processing.py +243 -0
  36. gitflow_analytics/reports/analytics_writer.py +11 -4
  37. gitflow_analytics/reports/csv_writer.py +51 -31
  38. gitflow_analytics/reports/narrative_writer.py +16 -14
  39. gitflow_analytics/tui/__init__.py +5 -0
  40. gitflow_analytics/tui/app.py +721 -0
  41. gitflow_analytics/tui/screens/__init__.py +8 -0
  42. gitflow_analytics/tui/screens/analysis_progress_screen.py +487 -0
  43. gitflow_analytics/tui/screens/configuration_screen.py +547 -0
  44. gitflow_analytics/tui/screens/loading_screen.py +358 -0
  45. gitflow_analytics/tui/screens/main_screen.py +304 -0
  46. gitflow_analytics/tui/screens/results_screen.py +698 -0
  47. gitflow_analytics/tui/widgets/__init__.py +7 -0
  48. gitflow_analytics/tui/widgets/data_table.py +257 -0
  49. gitflow_analytics/tui/widgets/export_modal.py +301 -0
  50. gitflow_analytics/tui/widgets/progress_widget.py +192 -0
  51. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/METADATA +31 -4
  52. gitflow_analytics-1.0.3.dist-info/RECORD +62 -0
  53. gitflow_analytics-1.0.1.dist-info/RECORD +0 -31
  54. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/WHEEL +0 -0
  55. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/entry_points.txt +0 -0
  56. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/licenses/LICENSE +0 -0
  57. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,373 @@
1
+ """NLP processing engine using spaCy for fast commit analysis."""
2
+
3
+ import logging
4
+ import time
5
+ from typing import Dict, List, Optional, Tuple, Any
6
+
7
+ from ..models.schemas import NLPConfig, QualitativeCommitData
8
+ from ..classifiers.change_type import ChangeTypeClassifier
9
+ from ..classifiers.domain_classifier import DomainClassifier
10
+ from ..classifiers.intent_analyzer import IntentAnalyzer
11
+ from ..classifiers.risk_analyzer import RiskAnalyzer
12
+ from ..utils.text_processing import TextProcessor
13
+ from ..utils.metrics import PerformanceMetrics
14
+
15
+ try:
16
+ import spacy
17
+ from spacy.tokens import Doc
18
+ SPACY_AVAILABLE = True
19
+ except ImportError:
20
+ SPACY_AVAILABLE = False
21
+ Doc = Any # Type hint fallback
22
+
23
+
24
+ class NLPEngine:
25
+ """Core NLP processing engine using spaCy for fast commit analysis.
26
+
27
+ This engine provides the primary classification pipeline for commit analysis,
28
+ handling 85-90% of commits through fast NLP processing without requiring
29
+ expensive LLM calls.
30
+
31
+ The engine orchestrates multiple specialized classifiers:
32
+ - ChangeTypeClassifier: Determines commit type (feature, bugfix, etc.)
33
+ - DomainClassifier: Identifies business domain (frontend, backend, etc.)
34
+ - IntentAnalyzer: Extracts intent signals and urgency
35
+ - RiskAnalyzer: Assesses commit risk level
36
+ """
37
+
38
+ def __init__(self, config: NLPConfig):
39
+ """Initialize NLP engine with spaCy pipeline.
40
+
41
+ Args:
42
+ config: NLP configuration
43
+
44
+ Raises:
45
+ ImportError: If spaCy is not available
46
+ OSError: If spaCy model is not installed
47
+ """
48
+ if not SPACY_AVAILABLE:
49
+ raise ImportError(
50
+ "spaCy is required for NLP processing. Install with: pip install spacy"
51
+ )
52
+
53
+ self.config = config
54
+ self.logger = logging.getLogger(__name__)
55
+
56
+ # Initialize spaCy pipeline
57
+ self._init_spacy_pipeline()
58
+
59
+ # Initialize text processor
60
+ self.text_processor = TextProcessor()
61
+
62
+ # Initialize classifiers
63
+ self.change_classifier = ChangeTypeClassifier(config.change_type_config)
64
+ self.domain_classifier = DomainClassifier(config.domain_config)
65
+ self.intent_analyzer = IntentAnalyzer(config.intent_config)
66
+ self.risk_analyzer = RiskAnalyzer(config.risk_config)
67
+
68
+ # Performance tracking
69
+ self.metrics = PerformanceMetrics()
70
+ self.processing_times = []
71
+
72
+ self.logger.info(f"NLP engine initialized with model: {config.spacy_model}")
73
+
74
+ def _init_spacy_pipeline(self) -> None:
75
+ """Initialize spaCy NLP pipeline with optimizations."""
76
+ try:
77
+ self.nlp = spacy.load(self.config.spacy_model)
78
+
79
+ # Optimize pipeline for speed if in fast mode
80
+ if self.config.fast_mode:
81
+ # Disable expensive components we don't need
82
+ disabled_components = []
83
+ if 'parser' in self.nlp.pipe_names:
84
+ disabled_components.append('parser')
85
+ if 'ner' in self.nlp.pipe_names:
86
+ disabled_components.append('ner')
87
+
88
+ if disabled_components:
89
+ self.nlp.disable_pipes(*disabled_components)
90
+ self.logger.info(f"Disabled spaCy components for speed: {disabled_components}")
91
+
92
+ except OSError as e:
93
+ raise OSError(
94
+ f"spaCy model '{self.config.spacy_model}' not found. "
95
+ f"Install with: python -m spacy download {self.config.spacy_model}"
96
+ ) from e
97
+
98
+ def process_batch(self, commits: List[Dict[str, Any]]) -> List[QualitativeCommitData]:
99
+ """Process a batch of commits efficiently using spaCy pipeline.
100
+
101
+ This method leverages spaCy's batch processing capabilities to analyze
102
+ multiple commit messages simultaneously for maximum efficiency.
103
+
104
+ Args:
105
+ commits: List of commit dictionaries with message, files_changed, etc.
106
+
107
+ Returns:
108
+ List of QualitativeCommitData with analysis results
109
+ """
110
+ if not commits:
111
+ return []
112
+
113
+ start_time = time.time()
114
+
115
+ # Extract messages for batch processing
116
+ messages = [commit.get('message', '') for commit in commits]
117
+
118
+ # Process all messages through spaCy pipeline at once
119
+ try:
120
+ docs = list(self.nlp.pipe(
121
+ messages,
122
+ batch_size=self.config.spacy_batch_size,
123
+ disable=[] if not self.config.fast_mode else ['parser', 'ner']
124
+ ))
125
+ except Exception as e:
126
+ self.logger.error(f"spaCy processing failed: {e}")
127
+ # Fallback to individual processing
128
+ docs = []
129
+ for message in messages:
130
+ try:
131
+ docs.append(self.nlp(message))
132
+ except Exception:
133
+ # Create empty doc as fallback
134
+ docs.append(self.nlp(""))
135
+
136
+ # Analyze each commit with its processed document
137
+ results = []
138
+ for commit, doc in zip(commits, docs):
139
+ try:
140
+ result = self._analyze_commit(commit, doc)
141
+ results.append(result)
142
+ except Exception as e:
143
+ self.logger.error(f"Error analyzing commit {commit.get('hash', 'unknown')}: {e}")
144
+ # Create fallback result
145
+ results.append(self._create_fallback_result(commit))
146
+
147
+ # Track performance
148
+ processing_time = (time.time() - start_time) * 1000 # ms
149
+ self.processing_times.append(processing_time)
150
+
151
+ # Record metrics
152
+ avg_confidence = sum(r.confidence_score for r in results) / len(results) if results else 0.0
153
+ self.metrics.record_processing(
154
+ operation='nlp_batch',
155
+ processing_time_ms=processing_time,
156
+ items_processed=len(commits),
157
+ confidence_score=avg_confidence,
158
+ method_used='nlp'
159
+ )
160
+
161
+ self.logger.debug(
162
+ f"Processed {len(commits)} commits in {processing_time:.1f}ms "
163
+ f"({len(commits) * 1000 / processing_time:.1f} commits/sec)"
164
+ )
165
+
166
+ return results
167
+
168
+ def _analyze_commit(self, commit: Dict[str, Any], doc: Doc) -> QualitativeCommitData:
169
+ """Analyze a single commit with all classifiers.
170
+
171
+ Args:
172
+ commit: Commit dictionary with message, files, etc.
173
+ doc: spaCy processed document
174
+
175
+ Returns:
176
+ QualitativeCommitData with analysis results
177
+ """
178
+ analysis_start = time.time()
179
+
180
+ # Extract basic commit info
181
+ message = commit.get('message', '')
182
+ files_changed = commit.get('files_changed', [])
183
+
184
+ # Run all classifiers
185
+ change_type, change_confidence = self.change_classifier.classify(
186
+ message, doc, files_changed
187
+ )
188
+
189
+ domain, domain_confidence = self.domain_classifier.classify(
190
+ message, doc, files_changed
191
+ )
192
+
193
+ intent_signals = self.intent_analyzer.analyze(message, doc)
194
+
195
+ risk_assessment = self.risk_analyzer.assess(commit, doc)
196
+
197
+ # Calculate overall confidence score
198
+ overall_confidence = self._calculate_overall_confidence(
199
+ change_confidence,
200
+ domain_confidence,
201
+ intent_signals.get('confidence', 0.5)
202
+ )
203
+
204
+ # Extract technical context
205
+ technical_context = {
206
+ 'file_patterns': self.text_processor.extract_file_patterns(files_changed),
207
+ 'complexity_metrics': self.text_processor.calculate_commit_complexity(
208
+ message, files_changed,
209
+ commit.get('insertions', 0),
210
+ commit.get('deletions', 0)
211
+ ),
212
+ 'semantic_fingerprint': self.text_processor.create_semantic_fingerprint(
213
+ message, files_changed
214
+ )
215
+ }
216
+
217
+ processing_time = (time.time() - analysis_start) * 1000 # ms
218
+
219
+ return QualitativeCommitData(
220
+ # Copy existing commit fields
221
+ hash=commit.get('hash', ''),
222
+ message=message,
223
+ author_name=commit.get('author_name', ''),
224
+ author_email=commit.get('author_email', ''),
225
+ timestamp=commit.get('timestamp', time.time()),
226
+ files_changed=files_changed,
227
+ insertions=commit.get('insertions', 0),
228
+ deletions=commit.get('deletions', 0),
229
+
230
+ # Qualitative analysis results
231
+ change_type=change_type,
232
+ change_type_confidence=change_confidence,
233
+ business_domain=domain,
234
+ domain_confidence=domain_confidence,
235
+ risk_level=risk_assessment['level'],
236
+ risk_factors=risk_assessment['factors'],
237
+ intent_signals=intent_signals,
238
+ collaboration_patterns={}, # TODO: Implement collaboration analysis
239
+ technical_context=technical_context,
240
+
241
+ # Processing metadata
242
+ processing_method='nlp',
243
+ processing_time_ms=processing_time,
244
+ confidence_score=overall_confidence
245
+ )
246
+
247
+ def _calculate_overall_confidence(self, change_confidence: float,
248
+ domain_confidence: float,
249
+ intent_confidence: float) -> float:
250
+ """Calculate weighted overall confidence score.
251
+
252
+ Args:
253
+ change_confidence: Change type classification confidence
254
+ domain_confidence: Domain classification confidence
255
+ intent_confidence: Intent analysis confidence
256
+
257
+ Returns:
258
+ Overall confidence score (0.0 to 1.0)
259
+ """
260
+ # Weighted average with change_type being most important
261
+ weights = {
262
+ 'change': 0.5, # Change type is most critical
263
+ 'domain': 0.3, # Domain is important for reporting
264
+ 'intent': 0.2 # Intent is supplementary
265
+ }
266
+
267
+ overall = (
268
+ change_confidence * weights['change'] +
269
+ domain_confidence * weights['domain'] +
270
+ intent_confidence * weights['intent']
271
+ )
272
+
273
+ return min(1.0, max(0.0, overall))
274
+
275
+ def _create_fallback_result(self, commit: Dict[str, Any]) -> QualitativeCommitData:
276
+ """Create a fallback result when analysis fails.
277
+
278
+ Args:
279
+ commit: Commit dictionary
280
+
281
+ Returns:
282
+ QualitativeCommitData with default values
283
+ """
284
+ return QualitativeCommitData(
285
+ # Basic commit info
286
+ hash=commit.get('hash', ''),
287
+ message=commit.get('message', ''),
288
+ author_name=commit.get('author_name', ''),
289
+ author_email=commit.get('author_email', ''),
290
+ timestamp=commit.get('timestamp', time.time()),
291
+ files_changed=commit.get('files_changed', []),
292
+ insertions=commit.get('insertions', 0),
293
+ deletions=commit.get('deletions', 0),
294
+
295
+ # Default classifications
296
+ change_type='unknown',
297
+ change_type_confidence=0.0,
298
+ business_domain='unknown',
299
+ domain_confidence=0.0,
300
+ risk_level='medium',
301
+ risk_factors=['analysis_failed'],
302
+ intent_signals={'confidence': 0.0, 'signals': []},
303
+ collaboration_patterns={},
304
+ technical_context={},
305
+
306
+ # Processing metadata
307
+ processing_method='nlp',
308
+ processing_time_ms=0.0,
309
+ confidence_score=0.0
310
+ )
311
+
312
+ def get_performance_stats(self) -> Dict[str, Any]:
313
+ """Get NLP engine performance statistics.
314
+
315
+ Returns:
316
+ Dictionary with performance metrics
317
+ """
318
+ if not self.processing_times:
319
+ return {
320
+ 'total_batches': 0,
321
+ 'avg_processing_time_ms': 0.0,
322
+ 'min_processing_time_ms': 0.0,
323
+ 'max_processing_time_ms': 0.0,
324
+ 'total_processing_time_ms': 0.0
325
+ }
326
+
327
+ return {
328
+ 'total_batches': len(self.processing_times),
329
+ 'avg_processing_time_ms': sum(self.processing_times) / len(self.processing_times),
330
+ 'min_processing_time_ms': min(self.processing_times),
331
+ 'max_processing_time_ms': max(self.processing_times),
332
+ 'total_processing_time_ms': sum(self.processing_times),
333
+ 'spacy_model': self.config.spacy_model,
334
+ 'fast_mode': self.config.fast_mode,
335
+ 'batch_size': self.config.spacy_batch_size
336
+ }
337
+
338
+ def validate_setup(self) -> Tuple[bool, List[str]]:
339
+ """Validate NLP engine setup and dependencies.
340
+
341
+ Returns:
342
+ Tuple of (is_valid, list_of_issues)
343
+ """
344
+ issues = []
345
+
346
+ # Check spaCy availability
347
+ if not SPACY_AVAILABLE:
348
+ issues.append("spaCy not installed")
349
+ return False, issues
350
+
351
+ # Check model availability
352
+ try:
353
+ test_nlp = spacy.load(self.config.spacy_model)
354
+ # Test basic functionality
355
+ test_doc = test_nlp("test commit message")
356
+ if not test_doc:
357
+ issues.append("spaCy model not functioning properly")
358
+ except OSError:
359
+ issues.append(f"spaCy model '{self.config.spacy_model}' not installed")
360
+ except Exception as e:
361
+ issues.append(f"spaCy model error: {e}")
362
+
363
+ # Check classifier initialization
364
+ for classifier_name, classifier in [
365
+ ('change_type', self.change_classifier),
366
+ ('domain', self.domain_classifier),
367
+ ('intent', self.intent_analyzer),
368
+ ('risk', self.risk_analyzer)
369
+ ]:
370
+ if not hasattr(classifier, 'classify') and not hasattr(classifier, 'analyze') and not hasattr(classifier, 'assess'):
371
+ issues.append(f"{classifier_name} classifier not properly initialized")
372
+
373
+ return len(issues) == 0, issues