gitflow-analytics 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. gitflow_analytics/__init__.py +11 -11
  2. gitflow_analytics/_version.py +2 -2
  3. gitflow_analytics/cli.py +612 -258
  4. gitflow_analytics/cli_rich.py +353 -0
  5. gitflow_analytics/config.py +251 -141
  6. gitflow_analytics/core/analyzer.py +140 -103
  7. gitflow_analytics/core/branch_mapper.py +132 -132
  8. gitflow_analytics/core/cache.py +240 -169
  9. gitflow_analytics/core/identity.py +210 -173
  10. gitflow_analytics/extractors/base.py +13 -11
  11. gitflow_analytics/extractors/story_points.py +70 -59
  12. gitflow_analytics/extractors/tickets.py +101 -87
  13. gitflow_analytics/integrations/github_integration.py +84 -77
  14. gitflow_analytics/integrations/jira_integration.py +116 -104
  15. gitflow_analytics/integrations/orchestrator.py +86 -85
  16. gitflow_analytics/metrics/dora.py +181 -177
  17. gitflow_analytics/models/database.py +190 -53
  18. gitflow_analytics/qualitative/__init__.py +30 -0
  19. gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
  20. gitflow_analytics/qualitative/classifiers/change_type.py +468 -0
  21. gitflow_analytics/qualitative/classifiers/domain_classifier.py +399 -0
  22. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +436 -0
  23. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +412 -0
  24. gitflow_analytics/qualitative/core/__init__.py +13 -0
  25. gitflow_analytics/qualitative/core/llm_fallback.py +653 -0
  26. gitflow_analytics/qualitative/core/nlp_engine.py +373 -0
  27. gitflow_analytics/qualitative/core/pattern_cache.py +457 -0
  28. gitflow_analytics/qualitative/core/processor.py +540 -0
  29. gitflow_analytics/qualitative/models/__init__.py +25 -0
  30. gitflow_analytics/qualitative/models/schemas.py +272 -0
  31. gitflow_analytics/qualitative/utils/__init__.py +13 -0
  32. gitflow_analytics/qualitative/utils/batch_processor.py +326 -0
  33. gitflow_analytics/qualitative/utils/cost_tracker.py +343 -0
  34. gitflow_analytics/qualitative/utils/metrics.py +347 -0
  35. gitflow_analytics/qualitative/utils/text_processing.py +243 -0
  36. gitflow_analytics/reports/analytics_writer.py +11 -4
  37. gitflow_analytics/reports/csv_writer.py +51 -31
  38. gitflow_analytics/reports/narrative_writer.py +16 -14
  39. gitflow_analytics/tui/__init__.py +5 -0
  40. gitflow_analytics/tui/app.py +721 -0
  41. gitflow_analytics/tui/screens/__init__.py +8 -0
  42. gitflow_analytics/tui/screens/analysis_progress_screen.py +487 -0
  43. gitflow_analytics/tui/screens/configuration_screen.py +547 -0
  44. gitflow_analytics/tui/screens/loading_screen.py +358 -0
  45. gitflow_analytics/tui/screens/main_screen.py +304 -0
  46. gitflow_analytics/tui/screens/results_screen.py +698 -0
  47. gitflow_analytics/tui/widgets/__init__.py +7 -0
  48. gitflow_analytics/tui/widgets/data_table.py +257 -0
  49. gitflow_analytics/tui/widgets/export_modal.py +301 -0
  50. gitflow_analytics/tui/widgets/progress_widget.py +192 -0
  51. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/METADATA +31 -4
  52. gitflow_analytics-1.0.3.dist-info/RECORD +62 -0
  53. gitflow_analytics-1.0.1.dist-info/RECORD +0 -31
  54. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/WHEEL +0 -0
  55. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/entry_points.txt +0 -0
  56. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/licenses/LICENSE +0 -0
  57. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,468 @@
1
+ """Change type classifier using semantic analysis of commit messages."""
2
+
3
+ import logging
4
+ import re
5
+ from typing import Dict, List, Tuple, Set, Any, Optional
6
+ from pathlib import Path
7
+
8
+ from ..models.schemas import ChangeTypeConfig
9
+
10
+ try:
11
+ import spacy
12
+ from spacy.tokens import Doc
13
+ SPACY_AVAILABLE = True
14
+ except ImportError:
15
+ SPACY_AVAILABLE = False
16
+ Doc = Any
17
+
18
+
19
+ class ChangeTypeClassifier:
20
+ """Classify commits by change type using semantic analysis.
21
+
22
+ This classifier determines the type of change represented by a commit
23
+ (feature, bugfix, refactor, etc.) by analyzing the commit message semantics
24
+ and file patterns.
25
+
26
+ The classification uses a combination of:
27
+ - Semantic keyword matching with action/object/context patterns
28
+ - File pattern analysis for additional signals
29
+ - Rule-based patterns for common commit message formats
30
+ """
31
+
32
+ def __init__(self, config: ChangeTypeConfig):
33
+ """Initialize change type classifier.
34
+
35
+ Args:
36
+ config: Configuration for change type classification
37
+ """
38
+ self.config = config
39
+ self.logger = logging.getLogger(__name__)
40
+
41
+ # Define semantic patterns for each change type
42
+ self.change_patterns = {
43
+ 'feature': {
44
+ 'action_words': {
45
+ 'add', 'implement', 'create', 'build', 'introduce', 'develop',
46
+ 'enable', 'support', 'allow', 'provide', 'include'
47
+ },
48
+ 'object_words': {
49
+ 'feature', 'functionality', 'capability', 'component', 'module',
50
+ 'endpoint', 'api', 'service', 'interface', 'system'
51
+ },
52
+ 'context_words': {
53
+ 'new', 'initial', 'first', 'user', 'client', 'support',
54
+ 'enhancement', 'improvement'
55
+ }
56
+ },
57
+ 'bugfix': {
58
+ 'action_words': {
59
+ 'fix', 'resolve', 'correct', 'repair', 'patch', 'address',
60
+ 'handle', 'solve', 'debug', 'prevent'
61
+ },
62
+ 'object_words': {
63
+ 'bug', 'issue', 'problem', 'error', 'defect', 'exception',
64
+ 'crash', 'failure', 'leak', 'regression'
65
+ },
66
+ 'context_words': {
67
+ 'broken', 'failing', 'incorrect', 'wrong', 'invalid',
68
+ 'missing', 'null', 'undefined'
69
+ }
70
+ },
71
+ 'refactor': {
72
+ 'action_words': {
73
+ 'refactor', 'restructure', 'reorganize', 'cleanup', 'simplify',
74
+ 'optimize', 'improve', 'enhance', 'streamline', 'consolidate'
75
+ },
76
+ 'object_words': {
77
+ 'code', 'structure', 'architecture', 'design', 'logic',
78
+ 'method', 'function', 'class', 'module'
79
+ },
80
+ 'context_words': {
81
+ 'better', 'cleaner', 'simpler', 'efficient', 'maintainable',
82
+ 'readable', 'performance'
83
+ }
84
+ },
85
+ 'docs': {
86
+ 'action_words': {
87
+ 'update', 'add', 'improve', 'write', 'document', 'clarify',
88
+ 'explain', 'describe', 'detail'
89
+ },
90
+ 'object_words': {
91
+ 'documentation', 'readme', 'docs', 'comment', 'docstring',
92
+ 'guide', 'tutorial', 'example', 'specification'
93
+ },
94
+ 'context_words': {
95
+ 'explain', 'clarify', 'describe', 'instruction', 'help'
96
+ }
97
+ },
98
+ 'test': {
99
+ 'action_words': {
100
+ 'add', 'update', 'fix', 'improve', 'write', 'create',
101
+ 'enhance', 'extend'
102
+ },
103
+ 'object_words': {
104
+ 'test', 'spec', 'coverage', 'unit', 'integration', 'e2e',
105
+ 'testing', 'mock', 'stub', 'fixture'
106
+ },
107
+ 'context_words': {
108
+ 'testing', 'verify', 'validate', 'check', 'ensure',
109
+ 'coverage', 'assertion'
110
+ }
111
+ },
112
+ 'chore': {
113
+ 'action_words': {
114
+ 'update', 'bump', 'upgrade', 'configure', 'setup', 'install',
115
+ 'remove', 'delete', 'clean'
116
+ },
117
+ 'object_words': {
118
+ 'dependency', 'package', 'config', 'configuration', 'build',
119
+ 'version', 'tool', 'script', 'workflow'
120
+ },
121
+ 'context_words': {
122
+ 'maintenance', 'housekeeping', 'routine', 'automated',
123
+ 'ci', 'cd', 'pipeline'
124
+ }
125
+ },
126
+ 'security': {
127
+ 'action_words': {
128
+ 'fix', 'secure', 'protect', 'validate', 'sanitize',
129
+ 'encrypt', 'authenticate', 'authorize'
130
+ },
131
+ 'object_words': {
132
+ 'security', 'vulnerability', 'exploit', 'xss', 'csrf',
133
+ 'injection', 'authentication', 'authorization', 'permission'
134
+ },
135
+ 'context_words': {
136
+ 'secure', 'safe', 'protected', 'validated', 'sanitized',
137
+ 'encrypted', 'threat', 'attack'
138
+ }
139
+ },
140
+ 'hotfix': {
141
+ 'action_words': {
142
+ 'hotfix', 'fix', 'patch', 'urgent', 'critical', 'emergency'
143
+ },
144
+ 'object_words': {
145
+ 'production', 'critical', 'urgent', 'emergency', 'hotfix',
146
+ 'issue', 'bug', 'problem'
147
+ },
148
+ 'context_words': {
149
+ 'urgent', 'critical', 'immediate', 'production', 'live',
150
+ 'emergency', 'asap'
151
+ }
152
+ },
153
+ 'config': {
154
+ 'action_words': {
155
+ 'configure', 'setup', 'adjust', 'modify', 'change',
156
+ 'update', 'tweak'
157
+ },
158
+ 'object_words': {
159
+ 'config', 'configuration', 'settings', 'environment',
160
+ 'parameter', 'option', 'flag', 'variable'
161
+ },
162
+ 'context_words': {
163
+ 'environment', 'production', 'development', 'staging',
164
+ 'deployment', 'setup'
165
+ }
166
+ }
167
+ }
168
+
169
+ # File pattern signals for change types
170
+ self.file_patterns = {
171
+ 'test': [
172
+ r'.*test.*\.py$', r'.*spec.*\.js$', r'.*test.*\.java$',
173
+ r'test_.*\.py$', r'.*_test\.go$', r'.*\.test\.(js|ts)$',
174
+ r'__tests__/.*', r'tests?/.*', r'spec/.*'
175
+ ],
176
+ 'docs': [
177
+ r'.*\.md$', r'.*\.rst$', r'.*\.txt$', r'README.*',
178
+ r'CHANGELOG.*', r'docs?/.*', r'documentation/.*'
179
+ ],
180
+ 'config': [
181
+ r'.*\.ya?ml$', r'.*\.json$', r'.*\.toml$', r'.*\.ini$',
182
+ r'.*\.env.*', r'Dockerfile.*', r'.*config.*', r'\.github/.*'
183
+ ],
184
+ 'chore': [
185
+ r'package.*\.json$', r'requirements.*\.txt$', r'Pipfile.*',
186
+ r'pom\.xml$', r'build\.gradle$', r'.*\.lock$'
187
+ ]
188
+ }
189
+
190
+ # Compile regex patterns for efficiency
191
+ self._compile_file_patterns()
192
+
193
+ # Common commit message prefixes
194
+ self.prefix_patterns = {
195
+ 'feat': 'feature',
196
+ 'feature': 'feature',
197
+ 'fix': 'bugfix',
198
+ 'bugfix': 'bugfix',
199
+ 'refactor': 'refactor',
200
+ 'docs': 'docs',
201
+ 'test': 'test',
202
+ 'chore': 'chore',
203
+ 'security': 'security',
204
+ 'hotfix': 'hotfix',
205
+ 'config': 'config',
206
+ 'style': 'chore', # Style changes are usually chores
207
+ 'perf': 'refactor', # Performance improvements are refactoring
208
+ 'build': 'chore',
209
+ 'ci': 'chore'
210
+ }
211
+
212
+ def _compile_file_patterns(self) -> None:
213
+ """Compile regex patterns for file matching."""
214
+ self.compiled_file_patterns = {}
215
+ for change_type, patterns in self.file_patterns.items():
216
+ self.compiled_file_patterns[change_type] = [
217
+ re.compile(pattern, re.IGNORECASE) for pattern in patterns
218
+ ]
219
+
220
+ def classify(self, message: str, doc: Doc, files: List[str]) -> Tuple[str, float]:
221
+ """Classify commit change type with confidence score.
222
+
223
+ Args:
224
+ message: Commit message
225
+ doc: spaCy processed document
226
+ files: List of changed files
227
+
228
+ Returns:
229
+ Tuple of (change_type, confidence_score)
230
+ """
231
+ if not message:
232
+ return 'unknown', 0.0
233
+
234
+ # Step 1: Check for conventional commit prefixes
235
+ prefix_result = self._check_conventional_prefix(message)
236
+ if prefix_result:
237
+ change_type, confidence = prefix_result
238
+ if confidence >= self.config.min_confidence:
239
+ return change_type, confidence
240
+
241
+ # Step 2: Semantic analysis of message content
242
+ semantic_scores = self._analyze_semantic_content(message, doc)
243
+
244
+ # Step 3: File pattern analysis
245
+ file_scores = self._analyze_file_patterns(files)
246
+
247
+ # Step 4: Combine scores with weights
248
+ combined_scores = self._combine_scores(semantic_scores, file_scores)
249
+
250
+ # Step 5: Select best match
251
+ if not combined_scores:
252
+ return 'unknown', 0.0
253
+
254
+ best_type = max(combined_scores.keys(), key=lambda k: combined_scores[k])
255
+ confidence = combined_scores[best_type]
256
+
257
+ # Apply confidence threshold
258
+ if confidence < self.config.min_confidence:
259
+ return 'unknown', confidence
260
+
261
+ return best_type, confidence
262
+
263
+ def _check_conventional_prefix(self, message: str) -> Optional[Tuple[str, float]]:
264
+ """Check for conventional commit message prefixes.
265
+
266
+ Args:
267
+ message: Commit message
268
+
269
+ Returns:
270
+ Tuple of (change_type, confidence) if found, None otherwise
271
+ """
272
+ # Look for conventional commit format: type(scope): description
273
+ conventional_pattern = r'^(\w+)(?:\([^)]*\))?\s*:\s*(.+)'
274
+ match = re.match(conventional_pattern, message.strip(), re.IGNORECASE)
275
+
276
+ if match:
277
+ prefix = match.group(1).lower()
278
+ if prefix in self.prefix_patterns:
279
+ return self.prefix_patterns[prefix], 0.9 # High confidence for explicit prefixes
280
+
281
+ # Check for simple prefixes at start of message
282
+ words = message.lower().split()
283
+ if words:
284
+ first_word = words[0].rstrip(':').rstrip('-')
285
+ if first_word in self.prefix_patterns:
286
+ return self.prefix_patterns[first_word], 0.8
287
+
288
+ return None
289
+
290
+ def _analyze_semantic_content(self, message: str, doc: Doc) -> Dict[str, float]:
291
+ """Analyze semantic content of commit message.
292
+
293
+ Args:
294
+ message: Commit message
295
+ doc: spaCy processed document
296
+
297
+ Returns:
298
+ Dictionary of change_type -> confidence_score
299
+ """
300
+ if not SPACY_AVAILABLE or not doc:
301
+ # Fallback to simple keyword matching
302
+ return self._simple_keyword_analysis(message.lower())
303
+
304
+ # Extract semantic features from spaCy doc
305
+ features = self._extract_semantic_features(doc)
306
+
307
+ # Calculate similarity to each change type
308
+ scores = {}
309
+ for change_type, patterns in self.change_patterns.items():
310
+ similarity = self._calculate_semantic_similarity(features, patterns)
311
+ if similarity > 0:
312
+ scores[change_type] = similarity
313
+
314
+ return scores
315
+
316
+ def _extract_semantic_features(self, doc: Doc) -> Dict[str, Set[str]]:
317
+ """Extract semantic features from spaCy document.
318
+
319
+ Args:
320
+ doc: spaCy processed document
321
+
322
+ Returns:
323
+ Dictionary of feature_type -> set_of_words
324
+ """
325
+ features = {
326
+ 'verbs': set(),
327
+ 'nouns': set(),
328
+ 'adjectives': set(),
329
+ 'entities': set(),
330
+ 'lemmas': set()
331
+ }
332
+
333
+ for token in doc:
334
+ if token.is_stop or token.is_punct or len(token.text) < 2:
335
+ continue
336
+
337
+ lemma = token.lemma_.lower()
338
+ features['lemmas'].add(lemma)
339
+
340
+ if token.pos_ == 'VERB':
341
+ features['verbs'].add(lemma)
342
+ elif token.pos_ in ['NOUN', 'PROPN']:
343
+ features['nouns'].add(lemma)
344
+ elif token.pos_ == 'ADJ':
345
+ features['adjectives'].add(lemma)
346
+
347
+ # Add named entities
348
+ for ent in doc.ents:
349
+ features['entities'].add(ent.text.lower())
350
+
351
+ return features
352
+
353
+ def _calculate_semantic_similarity(self, features: Dict[str, Set[str]],
354
+ patterns: Dict[str, Set[str]]) -> float:
355
+ """Calculate semantic similarity between features and patterns.
356
+
357
+ Args:
358
+ features: Extracted semantic features
359
+ patterns: Change type patterns
360
+
361
+ Returns:
362
+ Similarity score (0.0 to 1.0)
363
+ """
364
+ similarity_score = 0.0
365
+
366
+ # Action words (verbs) - highest weight
367
+ action_matches = len(features['verbs'].intersection(patterns['action_words']))
368
+ if action_matches > 0:
369
+ similarity_score += action_matches * 0.5
370
+
371
+ # Object words (nouns) - medium weight
372
+ object_matches = len(features['nouns'].intersection(patterns['object_words']))
373
+ if object_matches > 0:
374
+ similarity_score += object_matches * 0.3
375
+
376
+ # Context words (any lemma) - lower weight
377
+ all_lemmas = features['lemmas']
378
+ context_matches = len(all_lemmas.intersection(patterns['context_words']))
379
+ if context_matches > 0:
380
+ similarity_score += context_matches * 0.2
381
+
382
+ # Normalize by maximum possible score
383
+ max_possible = len(patterns['action_words']) * 0.5 + \
384
+ len(patterns['object_words']) * 0.3 + \
385
+ len(patterns['context_words']) * 0.2
386
+
387
+ return min(1.0, similarity_score / max_possible) if max_possible > 0 else 0.0
388
+
389
+ def _simple_keyword_analysis(self, message: str) -> Dict[str, float]:
390
+ """Simple keyword-based analysis fallback.
391
+
392
+ Args:
393
+ message: Lowercase commit message
394
+
395
+ Returns:
396
+ Dictionary of change_type -> confidence_score
397
+ """
398
+ scores = {}
399
+ words = set(re.findall(r'\b\w+\b', message))
400
+
401
+ for change_type, patterns in self.change_patterns.items():
402
+ all_pattern_words = patterns['action_words'] | patterns['object_words'] | patterns['context_words']
403
+ matches = len(words.intersection(all_pattern_words))
404
+
405
+ if matches > 0:
406
+ # Simple scoring based on keyword matches
407
+ scores[change_type] = min(1.0, matches / 5.0) # Scale to 0-1
408
+
409
+ return scores
410
+
411
+ def _analyze_file_patterns(self, files: List[str]) -> Dict[str, float]:
412
+ """Analyze file patterns for change type signals.
413
+
414
+ Args:
415
+ files: List of changed file paths
416
+
417
+ Returns:
418
+ Dictionary of change_type -> confidence_score
419
+ """
420
+ if not files:
421
+ return {}
422
+
423
+ scores = {}
424
+
425
+ for change_type, patterns in self.compiled_file_patterns.items():
426
+ matching_files = 0
427
+
428
+ for file_path in files:
429
+ for pattern in patterns:
430
+ if pattern.search(file_path):
431
+ matching_files += 1
432
+ break # Don't double-count same file
433
+
434
+ if matching_files > 0:
435
+ # File pattern confidence based on proportion of matching files
436
+ confidence = min(1.0, matching_files / len(files))
437
+ scores[change_type] = confidence
438
+
439
+ return scores
440
+
441
+ def _combine_scores(self, semantic_scores: Dict[str, float],
442
+ file_scores: Dict[str, float]) -> Dict[str, float]:
443
+ """Combine semantic and file pattern scores.
444
+
445
+ Args:
446
+ semantic_scores: Scores from semantic analysis
447
+ file_scores: Scores from file pattern analysis
448
+
449
+ Returns:
450
+ Combined scores dictionary
451
+ """
452
+ combined = {}
453
+ all_types = set(semantic_scores.keys()) | set(file_scores.keys())
454
+
455
+ for change_type in all_types:
456
+ semantic_score = semantic_scores.get(change_type, 0.0)
457
+ file_score = file_scores.get(change_type, 0.0)
458
+
459
+ # Weighted combination
460
+ combined_score = (
461
+ semantic_score * self.config.semantic_weight +
462
+ file_score * self.config.file_pattern_weight
463
+ )
464
+
465
+ if combined_score > 0:
466
+ combined[change_type] = combined_score
467
+
468
+ return combined