gitflow-analytics 1.0.1__py3-none-any.whl → 1.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. gitflow_analytics/__init__.py +11 -11
  2. gitflow_analytics/_version.py +2 -2
  3. gitflow_analytics/classification/__init__.py +31 -0
  4. gitflow_analytics/classification/batch_classifier.py +752 -0
  5. gitflow_analytics/classification/classifier.py +464 -0
  6. gitflow_analytics/classification/feature_extractor.py +725 -0
  7. gitflow_analytics/classification/linguist_analyzer.py +574 -0
  8. gitflow_analytics/classification/model.py +455 -0
  9. gitflow_analytics/cli.py +4490 -378
  10. gitflow_analytics/cli_rich.py +503 -0
  11. gitflow_analytics/config/__init__.py +43 -0
  12. gitflow_analytics/config/errors.py +261 -0
  13. gitflow_analytics/config/loader.py +904 -0
  14. gitflow_analytics/config/profiles.py +264 -0
  15. gitflow_analytics/config/repository.py +124 -0
  16. gitflow_analytics/config/schema.py +441 -0
  17. gitflow_analytics/config/validator.py +154 -0
  18. gitflow_analytics/config.py +44 -398
  19. gitflow_analytics/core/analyzer.py +1320 -172
  20. gitflow_analytics/core/branch_mapper.py +132 -132
  21. gitflow_analytics/core/cache.py +1554 -175
  22. gitflow_analytics/core/data_fetcher.py +1193 -0
  23. gitflow_analytics/core/identity.py +571 -185
  24. gitflow_analytics/core/metrics_storage.py +526 -0
  25. gitflow_analytics/core/progress.py +372 -0
  26. gitflow_analytics/core/schema_version.py +269 -0
  27. gitflow_analytics/extractors/base.py +13 -11
  28. gitflow_analytics/extractors/ml_tickets.py +1100 -0
  29. gitflow_analytics/extractors/story_points.py +77 -59
  30. gitflow_analytics/extractors/tickets.py +841 -89
  31. gitflow_analytics/identity_llm/__init__.py +6 -0
  32. gitflow_analytics/identity_llm/analysis_pass.py +231 -0
  33. gitflow_analytics/identity_llm/analyzer.py +464 -0
  34. gitflow_analytics/identity_llm/models.py +76 -0
  35. gitflow_analytics/integrations/github_integration.py +258 -87
  36. gitflow_analytics/integrations/jira_integration.py +572 -123
  37. gitflow_analytics/integrations/orchestrator.py +206 -82
  38. gitflow_analytics/metrics/activity_scoring.py +322 -0
  39. gitflow_analytics/metrics/branch_health.py +470 -0
  40. gitflow_analytics/metrics/dora.py +542 -179
  41. gitflow_analytics/models/database.py +986 -59
  42. gitflow_analytics/pm_framework/__init__.py +115 -0
  43. gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
  44. gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
  45. gitflow_analytics/pm_framework/base.py +406 -0
  46. gitflow_analytics/pm_framework/models.py +211 -0
  47. gitflow_analytics/pm_framework/orchestrator.py +652 -0
  48. gitflow_analytics/pm_framework/registry.py +333 -0
  49. gitflow_analytics/qualitative/__init__.py +29 -0
  50. gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
  51. gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
  52. gitflow_analytics/qualitative/classifiers/change_type.py +742 -0
  53. gitflow_analytics/qualitative/classifiers/domain_classifier.py +506 -0
  54. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +535 -0
  55. gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
  56. gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
  57. gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
  58. gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
  59. gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
  60. gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
  61. gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
  62. gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
  63. gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
  64. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +438 -0
  65. gitflow_analytics/qualitative/core/__init__.py +13 -0
  66. gitflow_analytics/qualitative/core/llm_fallback.py +657 -0
  67. gitflow_analytics/qualitative/core/nlp_engine.py +382 -0
  68. gitflow_analytics/qualitative/core/pattern_cache.py +479 -0
  69. gitflow_analytics/qualitative/core/processor.py +673 -0
  70. gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
  71. gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
  72. gitflow_analytics/qualitative/models/__init__.py +25 -0
  73. gitflow_analytics/qualitative/models/schemas.py +306 -0
  74. gitflow_analytics/qualitative/utils/__init__.py +13 -0
  75. gitflow_analytics/qualitative/utils/batch_processor.py +339 -0
  76. gitflow_analytics/qualitative/utils/cost_tracker.py +345 -0
  77. gitflow_analytics/qualitative/utils/metrics.py +361 -0
  78. gitflow_analytics/qualitative/utils/text_processing.py +285 -0
  79. gitflow_analytics/reports/__init__.py +100 -0
  80. gitflow_analytics/reports/analytics_writer.py +550 -18
  81. gitflow_analytics/reports/base.py +648 -0
  82. gitflow_analytics/reports/branch_health_writer.py +322 -0
  83. gitflow_analytics/reports/classification_writer.py +924 -0
  84. gitflow_analytics/reports/cli_integration.py +427 -0
  85. gitflow_analytics/reports/csv_writer.py +1700 -216
  86. gitflow_analytics/reports/data_models.py +504 -0
  87. gitflow_analytics/reports/database_report_generator.py +427 -0
  88. gitflow_analytics/reports/example_usage.py +344 -0
  89. gitflow_analytics/reports/factory.py +499 -0
  90. gitflow_analytics/reports/formatters.py +698 -0
  91. gitflow_analytics/reports/html_generator.py +1116 -0
  92. gitflow_analytics/reports/interfaces.py +489 -0
  93. gitflow_analytics/reports/json_exporter.py +2770 -0
  94. gitflow_analytics/reports/narrative_writer.py +2289 -158
  95. gitflow_analytics/reports/story_point_correlation.py +1144 -0
  96. gitflow_analytics/reports/weekly_trends_writer.py +389 -0
  97. gitflow_analytics/training/__init__.py +5 -0
  98. gitflow_analytics/training/model_loader.py +377 -0
  99. gitflow_analytics/training/pipeline.py +550 -0
  100. gitflow_analytics/tui/__init__.py +5 -0
  101. gitflow_analytics/tui/app.py +724 -0
  102. gitflow_analytics/tui/screens/__init__.py +8 -0
  103. gitflow_analytics/tui/screens/analysis_progress_screen.py +496 -0
  104. gitflow_analytics/tui/screens/configuration_screen.py +523 -0
  105. gitflow_analytics/tui/screens/loading_screen.py +348 -0
  106. gitflow_analytics/tui/screens/main_screen.py +321 -0
  107. gitflow_analytics/tui/screens/results_screen.py +722 -0
  108. gitflow_analytics/tui/widgets/__init__.py +7 -0
  109. gitflow_analytics/tui/widgets/data_table.py +255 -0
  110. gitflow_analytics/tui/widgets/export_modal.py +301 -0
  111. gitflow_analytics/tui/widgets/progress_widget.py +187 -0
  112. gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
  113. gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
  114. gitflow_analytics-1.0.1.dist-info/METADATA +0 -463
  115. gitflow_analytics-1.0.1.dist-info/RECORD +0 -31
  116. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
  117. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
  118. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
  119. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,382 @@
1
+ """NLP processing engine using spaCy for fast commit analysis."""
2
+
3
+ import logging
4
+ import time
5
+ from datetime import datetime
6
+ from typing import Any
7
+
8
+ from ..classifiers.change_type import ChangeTypeClassifier
9
+ from ..classifiers.domain_classifier import DomainClassifier
10
+ from ..classifiers.intent_analyzer import IntentAnalyzer
11
+ from ..classifiers.risk_analyzer import RiskAnalyzer
12
+ from ..models.schemas import NLPConfig, QualitativeCommitData
13
+ from ..utils.metrics import PerformanceMetrics
14
+ from ..utils.text_processing import TextProcessor
15
+
16
+ try:
17
+ import spacy
18
+ from spacy.tokens import Doc
19
+
20
+ SPACY_AVAILABLE = True
21
+ except ImportError:
22
+ SPACY_AVAILABLE = False
23
+ Doc = Any # Type hint fallback
24
+
25
+
26
+ class NLPEngine:
27
+ """Core NLP processing engine using spaCy for fast commit analysis.
28
+
29
+ This engine provides the primary classification pipeline for commit analysis,
30
+ handling 85-90% of commits through fast NLP processing without requiring
31
+ expensive LLM calls.
32
+
33
+ The engine orchestrates multiple specialized classifiers:
34
+ - ChangeTypeClassifier: Determines commit type (feature, bugfix, etc.)
35
+ - DomainClassifier: Identifies business domain (frontend, backend, etc.)
36
+ - IntentAnalyzer: Extracts intent signals and urgency
37
+ - RiskAnalyzer: Assesses commit risk level
38
+ """
39
+
40
+ def __init__(self, config: NLPConfig):
41
+ """Initialize NLP engine with spaCy pipeline.
42
+
43
+ Args:
44
+ config: NLP configuration
45
+
46
+ Raises:
47
+ ImportError: If spaCy is not available
48
+ OSError: If spaCy model is not installed
49
+ """
50
+ if not SPACY_AVAILABLE:
51
+ # Create a temporary logger since self.logger doesn't exist yet
52
+ temp_logger = logging.getLogger(__name__)
53
+ temp_logger.warning(
54
+ "spaCy is not available. NLP processing will be disabled. "
55
+ "To enable ML features, install spaCy: pip install spacy"
56
+ )
57
+ raise ImportError(
58
+ "spaCy is required for NLP processing. Install with: pip install spacy"
59
+ )
60
+
61
+ self.config = config
62
+ self.logger = logging.getLogger(__name__)
63
+
64
+ # Initialize spaCy pipeline
65
+ self._init_spacy_pipeline()
66
+
67
+ # Initialize text processor
68
+ self.text_processor = TextProcessor()
69
+
70
+ # Initialize classifiers
71
+ self.change_classifier = ChangeTypeClassifier(config.change_type_config)
72
+ self.domain_classifier = DomainClassifier(config.domain_config)
73
+ self.intent_analyzer = IntentAnalyzer(config.intent_config)
74
+ self.risk_analyzer = RiskAnalyzer(config.risk_config)
75
+
76
+ # Performance tracking
77
+ self.metrics = PerformanceMetrics()
78
+ self.processing_times = []
79
+
80
+ self.logger.info(f"NLP engine initialized with model: {config.spacy_model}")
81
+
82
+ def _init_spacy_pipeline(self) -> None:
83
+ """Initialize spaCy NLP pipeline with optimizations."""
84
+ try:
85
+ self.nlp = spacy.load(self.config.spacy_model)
86
+
87
+ # Optimize pipeline for speed if in fast mode
88
+ if self.config.fast_mode:
89
+ # Disable expensive components we don't need
90
+ disabled_components = []
91
+ if "parser" in self.nlp.pipe_names:
92
+ disabled_components.append("parser")
93
+ if "ner" in self.nlp.pipe_names:
94
+ disabled_components.append("ner")
95
+
96
+ if disabled_components:
97
+ self.nlp.disable_pipes(*disabled_components)
98
+ self.logger.info(f"Disabled spaCy components for speed: {disabled_components}")
99
+
100
+ except OSError as e:
101
+ self.logger.warning(
102
+ f"spaCy model '{self.config.spacy_model}' not found. "
103
+ f"ML features will be disabled. To enable, install with: python -m spacy download {self.config.spacy_model}"
104
+ )
105
+ # Raise the original error since the NLP engine requires spaCy
106
+ raise OSError(
107
+ f"spaCy model '{self.config.spacy_model}' not found. "
108
+ f"Install with: python -m spacy download {self.config.spacy_model}"
109
+ ) from e
110
+
111
+ def process_batch(self, commits: list[dict[str, Any]]) -> list[QualitativeCommitData]:
112
+ """Process a batch of commits efficiently using spaCy pipeline.
113
+
114
+ This method leverages spaCy's batch processing capabilities to analyze
115
+ multiple commit messages simultaneously for maximum efficiency.
116
+
117
+ Args:
118
+ commits: List of commit dictionaries with message, files_changed, etc.
119
+
120
+ Returns:
121
+ List of QualitativeCommitData with analysis results
122
+ """
123
+ if not commits:
124
+ return []
125
+
126
+ start_time = time.time()
127
+
128
+ # Extract messages for batch processing
129
+ messages = [commit.get("message", "") for commit in commits]
130
+
131
+ # Process all messages through spaCy pipeline at once
132
+ try:
133
+ docs = list(
134
+ self.nlp.pipe(
135
+ messages,
136
+ batch_size=self.config.spacy_batch_size,
137
+ disable=[] if not self.config.fast_mode else ["parser", "ner"],
138
+ )
139
+ )
140
+ except Exception as e:
141
+ self.logger.error(f"spaCy processing failed: {e}")
142
+ # Fallback to individual processing
143
+ docs = []
144
+ for message in messages:
145
+ try:
146
+ docs.append(self.nlp(message))
147
+ except Exception:
148
+ # Create empty doc as fallback
149
+ docs.append(self.nlp(""))
150
+
151
+ # Analyze each commit with its processed document
152
+ results = []
153
+ for commit, doc in zip(commits, docs):
154
+ try:
155
+ result = self._analyze_commit(commit, doc)
156
+ results.append(result)
157
+ except Exception as e:
158
+ self.logger.error(f"Error analyzing commit {commit.get('hash', 'unknown')}: {e}")
159
+ # Create fallback result
160
+ results.append(self._create_fallback_result(commit))
161
+
162
+ # Track performance
163
+ processing_time = (time.time() - start_time) * 1000 # ms
164
+ self.processing_times.append(processing_time)
165
+
166
+ # Record metrics
167
+ avg_confidence = sum(r.confidence_score for r in results) / len(results) if results else 0.0
168
+ self.metrics.record_processing(
169
+ operation="nlp_batch",
170
+ processing_time_ms=processing_time,
171
+ items_processed=len(commits),
172
+ confidence_score=avg_confidence,
173
+ method_used="nlp",
174
+ )
175
+
176
+ self.logger.debug(
177
+ f"Processed {len(commits)} commits in {processing_time:.1f}ms "
178
+ f"({len(commits) * 1000 / processing_time:.1f} commits/sec)"
179
+ )
180
+
181
+ return results
182
+
183
+ def _analyze_commit(self, commit: dict[str, Any], doc: Doc) -> QualitativeCommitData:
184
+ """Analyze a single commit with all classifiers.
185
+
186
+ Args:
187
+ commit: Commit dictionary with message, files, etc.
188
+ doc: spaCy processed document
189
+
190
+ Returns:
191
+ QualitativeCommitData with analysis results
192
+ """
193
+ analysis_start = time.time()
194
+
195
+ # Extract basic commit info
196
+ message = commit.get("message", "")
197
+ files_changed = commit.get("files_changed", [])
198
+
199
+ # Run all classifiers
200
+ change_type, change_confidence = self.change_classifier.classify(
201
+ message, doc, files_changed
202
+ )
203
+
204
+ domain, domain_confidence = self.domain_classifier.classify(message, doc, files_changed)
205
+
206
+ intent_signals = self.intent_analyzer.analyze(message, doc)
207
+
208
+ risk_assessment = self.risk_analyzer.assess(commit, doc)
209
+
210
+ # Calculate overall confidence score
211
+ overall_confidence = self._calculate_overall_confidence(
212
+ change_confidence, domain_confidence, intent_signals.get("confidence", 0.5)
213
+ )
214
+
215
+ # Extract technical context
216
+ technical_context = {
217
+ "file_patterns": self.text_processor.extract_file_patterns(files_changed),
218
+ "complexity_metrics": self.text_processor.calculate_commit_complexity(
219
+ message, files_changed, commit.get("insertions", 0), commit.get("deletions", 0)
220
+ ),
221
+ "semantic_fingerprint": self.text_processor.create_semantic_fingerprint(
222
+ message, files_changed
223
+ ),
224
+ }
225
+
226
+ processing_time = (time.time() - analysis_start) * 1000 # ms
227
+
228
+ return QualitativeCommitData(
229
+ # Copy existing commit fields
230
+ hash=commit.get("hash", ""),
231
+ message=message,
232
+ author_name=commit.get("author_name", ""),
233
+ author_email=commit.get("author_email", ""),
234
+ timestamp=commit.get("timestamp", datetime.now()),
235
+ files_changed=files_changed,
236
+ insertions=commit.get("insertions", 0),
237
+ deletions=commit.get("deletions", 0),
238
+ # Qualitative analysis results
239
+ change_type=change_type,
240
+ change_type_confidence=change_confidence,
241
+ business_domain=domain,
242
+ domain_confidence=domain_confidence,
243
+ risk_level=risk_assessment["level"],
244
+ risk_factors=risk_assessment["factors"],
245
+ intent_signals=intent_signals,
246
+ collaboration_patterns={}, # TODO: Implement collaboration analysis
247
+ technical_context=technical_context,
248
+ # Processing metadata
249
+ processing_method="nlp",
250
+ processing_time_ms=processing_time,
251
+ confidence_score=overall_confidence,
252
+ )
253
+
254
+ def _calculate_overall_confidence(
255
+ self, change_confidence: float, domain_confidence: float, intent_confidence: float
256
+ ) -> float:
257
+ """Calculate weighted overall confidence score.
258
+
259
+ Args:
260
+ change_confidence: Change type classification confidence
261
+ domain_confidence: Domain classification confidence
262
+ intent_confidence: Intent analysis confidence
263
+
264
+ Returns:
265
+ Overall confidence score (0.0 to 1.0)
266
+ """
267
+ # Weighted average with change_type being most important
268
+ weights = {
269
+ "change": 0.5, # Change type is most critical
270
+ "domain": 0.3, # Domain is important for reporting
271
+ "intent": 0.2, # Intent is supplementary
272
+ }
273
+
274
+ overall = (
275
+ change_confidence * weights["change"]
276
+ + domain_confidence * weights["domain"]
277
+ + intent_confidence * weights["intent"]
278
+ )
279
+
280
+ return min(1.0, max(0.0, overall))
281
+
282
+ def _create_fallback_result(self, commit: dict[str, Any]) -> QualitativeCommitData:
283
+ """Create a fallback result when analysis fails.
284
+
285
+ Args:
286
+ commit: Commit dictionary
287
+
288
+ Returns:
289
+ QualitativeCommitData with default values
290
+ """
291
+ return QualitativeCommitData(
292
+ # Basic commit info
293
+ hash=commit.get("hash", ""),
294
+ message=commit.get("message", ""),
295
+ author_name=commit.get("author_name", ""),
296
+ author_email=commit.get("author_email", ""),
297
+ timestamp=commit.get("timestamp", time.time()),
298
+ files_changed=commit.get("files_changed", []),
299
+ insertions=commit.get("insertions", 0),
300
+ deletions=commit.get("deletions", 0),
301
+ # Default classifications
302
+ change_type="unknown",
303
+ change_type_confidence=0.0,
304
+ business_domain="unknown",
305
+ domain_confidence=0.0,
306
+ risk_level="medium",
307
+ risk_factors=["analysis_failed"],
308
+ intent_signals={"confidence": 0.0, "signals": []},
309
+ collaboration_patterns={},
310
+ technical_context={},
311
+ # Processing metadata
312
+ processing_method="nlp",
313
+ processing_time_ms=0.0,
314
+ confidence_score=0.0,
315
+ )
316
+
317
+ def get_performance_stats(self) -> dict[str, Any]:
318
+ """Get NLP engine performance statistics.
319
+
320
+ Returns:
321
+ Dictionary with performance metrics
322
+ """
323
+ if not self.processing_times:
324
+ return {
325
+ "total_batches": 0,
326
+ "avg_processing_time_ms": 0.0,
327
+ "min_processing_time_ms": 0.0,
328
+ "max_processing_time_ms": 0.0,
329
+ "total_processing_time_ms": 0.0,
330
+ }
331
+
332
+ return {
333
+ "total_batches": len(self.processing_times),
334
+ "avg_processing_time_ms": sum(self.processing_times) / len(self.processing_times),
335
+ "min_processing_time_ms": min(self.processing_times),
336
+ "max_processing_time_ms": max(self.processing_times),
337
+ "total_processing_time_ms": sum(self.processing_times),
338
+ "spacy_model": self.config.spacy_model,
339
+ "fast_mode": self.config.fast_mode,
340
+ "batch_size": self.config.spacy_batch_size,
341
+ }
342
+
343
+ def validate_setup(self) -> tuple[bool, list[str]]:
344
+ """Validate NLP engine setup and dependencies.
345
+
346
+ Returns:
347
+ Tuple of (is_valid, list_of_issues)
348
+ """
349
+ issues = []
350
+
351
+ # Check spaCy availability
352
+ if not SPACY_AVAILABLE:
353
+ issues.append("spaCy not installed")
354
+ return False, issues
355
+
356
+ # Check model availability
357
+ try:
358
+ test_nlp = spacy.load(self.config.spacy_model)
359
+ # Test basic functionality
360
+ test_doc = test_nlp("test commit message")
361
+ if not test_doc:
362
+ issues.append("spaCy model not functioning properly")
363
+ except OSError:
364
+ issues.append(f"spaCy model '{self.config.spacy_model}' not installed")
365
+ except Exception as e:
366
+ issues.append(f"spaCy model error: {e}")
367
+
368
+ # Check classifier initialization
369
+ for classifier_name, classifier in [
370
+ ("change_type", self.change_classifier),
371
+ ("domain", self.domain_classifier),
372
+ ("intent", self.intent_analyzer),
373
+ ("risk", self.risk_analyzer),
374
+ ]:
375
+ if (
376
+ not hasattr(classifier, "classify")
377
+ and not hasattr(classifier, "analyze")
378
+ and not hasattr(classifier, "assess")
379
+ ):
380
+ issues.append(f"{classifier_name} classifier not properly initialized")
381
+
382
+ return len(issues) == 0, issues