gitflow-analytics 1.0.0__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. gitflow_analytics/__init__.py +11 -9
  2. gitflow_analytics/_version.py +2 -2
  3. gitflow_analytics/cli.py +691 -243
  4. gitflow_analytics/cli_rich.py +353 -0
  5. gitflow_analytics/config.py +389 -96
  6. gitflow_analytics/core/analyzer.py +175 -78
  7. gitflow_analytics/core/branch_mapper.py +132 -132
  8. gitflow_analytics/core/cache.py +242 -173
  9. gitflow_analytics/core/identity.py +214 -178
  10. gitflow_analytics/extractors/base.py +13 -11
  11. gitflow_analytics/extractors/story_points.py +70 -59
  12. gitflow_analytics/extractors/tickets.py +111 -88
  13. gitflow_analytics/integrations/github_integration.py +91 -77
  14. gitflow_analytics/integrations/jira_integration.py +284 -0
  15. gitflow_analytics/integrations/orchestrator.py +99 -72
  16. gitflow_analytics/metrics/dora.py +183 -179
  17. gitflow_analytics/models/database.py +191 -54
  18. gitflow_analytics/qualitative/__init__.py +30 -0
  19. gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
  20. gitflow_analytics/qualitative/classifiers/change_type.py +468 -0
  21. gitflow_analytics/qualitative/classifiers/domain_classifier.py +399 -0
  22. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +436 -0
  23. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +412 -0
  24. gitflow_analytics/qualitative/core/__init__.py +13 -0
  25. gitflow_analytics/qualitative/core/llm_fallback.py +653 -0
  26. gitflow_analytics/qualitative/core/nlp_engine.py +373 -0
  27. gitflow_analytics/qualitative/core/pattern_cache.py +457 -0
  28. gitflow_analytics/qualitative/core/processor.py +540 -0
  29. gitflow_analytics/qualitative/models/__init__.py +25 -0
  30. gitflow_analytics/qualitative/models/schemas.py +272 -0
  31. gitflow_analytics/qualitative/utils/__init__.py +13 -0
  32. gitflow_analytics/qualitative/utils/batch_processor.py +326 -0
  33. gitflow_analytics/qualitative/utils/cost_tracker.py +343 -0
  34. gitflow_analytics/qualitative/utils/metrics.py +347 -0
  35. gitflow_analytics/qualitative/utils/text_processing.py +243 -0
  36. gitflow_analytics/reports/analytics_writer.py +25 -8
  37. gitflow_analytics/reports/csv_writer.py +60 -32
  38. gitflow_analytics/reports/narrative_writer.py +21 -15
  39. gitflow_analytics/tui/__init__.py +5 -0
  40. gitflow_analytics/tui/app.py +721 -0
  41. gitflow_analytics/tui/screens/__init__.py +8 -0
  42. gitflow_analytics/tui/screens/analysis_progress_screen.py +487 -0
  43. gitflow_analytics/tui/screens/configuration_screen.py +547 -0
  44. gitflow_analytics/tui/screens/loading_screen.py +358 -0
  45. gitflow_analytics/tui/screens/main_screen.py +304 -0
  46. gitflow_analytics/tui/screens/results_screen.py +698 -0
  47. gitflow_analytics/tui/widgets/__init__.py +7 -0
  48. gitflow_analytics/tui/widgets/data_table.py +257 -0
  49. gitflow_analytics/tui/widgets/export_modal.py +301 -0
  50. gitflow_analytics/tui/widgets/progress_widget.py +192 -0
  51. gitflow_analytics-1.0.3.dist-info/METADATA +490 -0
  52. gitflow_analytics-1.0.3.dist-info/RECORD +62 -0
  53. gitflow_analytics-1.0.0.dist-info/METADATA +0 -201
  54. gitflow_analytics-1.0.0.dist-info/RECORD +0 -30
  55. {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/WHEEL +0 -0
  56. {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/entry_points.txt +0 -0
  57. {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/licenses/LICENSE +0 -0
  58. {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,399 @@
1
+ """Domain classifier for identifying technical domains of commits."""
2
+
3
+ import logging
4
+ import re
5
+ from collections import defaultdict
6
+ from typing import Dict, List, Tuple, Set, Any
7
+ from pathlib import Path
8
+
9
+ from ..models.schemas import DomainConfig
10
+
11
+ try:
12
+ import spacy
13
+ from spacy.tokens import Doc
14
+ SPACY_AVAILABLE = True
15
+ except ImportError:
16
+ SPACY_AVAILABLE = False
17
+ Doc = Any
18
+
19
+
20
+ class DomainClassifier:
21
+ """Classify commits by technical domain (frontend, backend, etc.).
22
+
23
+ This classifier determines the technical domain or business area
24
+ affected by a commit by analyzing both the commit message content
25
+ and the patterns of files that were changed.
26
+
27
+ Domains identified:
28
+ - frontend: UI/UX, client-side code
29
+ - backend: Server-side logic, APIs
30
+ - database: Data models, migrations, queries
31
+ - infrastructure: Deployment, configuration, DevOps
32
+ - mobile: Mobile app development
33
+ - devops: CI/CD, build tools, automation
34
+ """
35
+
36
+ def __init__(self, config: DomainConfig):
37
+ """Initialize domain classifier.
38
+
39
+ Args:
40
+ config: Configuration for domain classification
41
+ """
42
+ self.config = config
43
+ self.logger = logging.getLogger(__name__)
44
+
45
+ # Compile file patterns for efficient matching
46
+ self._compile_file_patterns()
47
+
48
+ # Keyword patterns for semantic analysis
49
+ self.keyword_patterns = config.keyword_patterns
50
+
51
+ # Directory patterns that strongly indicate domains
52
+ self.directory_indicators = {
53
+ 'frontend': {
54
+ 'src/components', 'src/pages', 'src/views', 'public', 'assets',
55
+ 'static', 'styles', 'css', 'scss', 'ui', 'components', 'pages'
56
+ },
57
+ 'backend': {
58
+ 'src/controllers', 'src/services', 'src/api', 'api', 'server',
59
+ 'controllers', 'services', 'handlers', 'routes', 'middleware'
60
+ },
61
+ 'database': {
62
+ 'migrations', 'models', 'schemas', 'seeds', 'data', 'sql',
63
+ 'database', 'db', 'repositories'
64
+ },
65
+ 'infrastructure': {
66
+ 'terraform', 'ansible', 'k8s', 'kubernetes', 'helm', 'charts',
67
+ 'infrastructure', 'deploy', 'deployment', 'ops'
68
+ },
69
+ 'mobile': {
70
+ 'android', 'ios', 'mobile', 'app', 'native', 'react-native',
71
+ 'flutter', 'swift', 'kotlin'
72
+ },
73
+ 'devops': {
74
+ '.github', '.gitlab', 'ci', 'cd', 'scripts', 'build', 'docker',
75
+ 'jenkins', 'actions', 'workflows'
76
+ }
77
+ }
78
+
79
+ # Technology stack indicators
80
+ self.tech_indicators = {
81
+ 'frontend': {
82
+ 'react', 'vue', 'angular', 'svelte', 'jquery', 'bootstrap',
83
+ 'tailwind', 'css', 'html', 'javascript', 'typescript', 'jsx', 'tsx'
84
+ },
85
+ 'backend': {
86
+ 'django', 'flask', 'fastapi', 'express', 'spring', 'rails',
87
+ 'laravel', 'api', 'endpoint', 'service', 'controller'
88
+ },
89
+ 'database': {
90
+ 'mysql', 'postgresql', 'mongodb', 'redis', 'elasticsearch',
91
+ 'migration', 'schema', 'query', 'orm', 'sql'
92
+ },
93
+ 'infrastructure': {
94
+ 'aws', 'gcp', 'azure', 'docker', 'kubernetes', 'terraform',
95
+ 'ansible', 'helm', 'nginx', 'apache'
96
+ },
97
+ 'mobile': {
98
+ 'android', 'ios', 'swift', 'kotlin', 'flutter', 'react-native',
99
+ 'xamarin', 'cordova', 'ionic'
100
+ },
101
+ 'devops': {
102
+ 'jenkins', 'gitlab-ci', 'github-actions', 'circleci', 'travis',
103
+ 'docker', 'kubernetes', 'helm', 'terraform'
104
+ }
105
+ }
106
+
107
+ def _compile_file_patterns(self) -> None:
108
+ """Compile file extension patterns for efficient matching."""
109
+ self.compiled_file_patterns = {}
110
+
111
+ for domain, patterns in self.config.file_patterns.items():
112
+ compiled_patterns = []
113
+ for pattern in patterns:
114
+ try:
115
+ # Convert glob patterns to regex
116
+ regex_pattern = self._glob_to_regex(pattern)
117
+ compiled_patterns.append(re.compile(regex_pattern, re.IGNORECASE))
118
+ except re.error as e:
119
+ self.logger.warning(f"Invalid file pattern '{pattern}' for domain {domain}: {e}")
120
+
121
+ self.compiled_file_patterns[domain] = compiled_patterns
122
+
123
+ def _glob_to_regex(self, pattern: str) -> str:
124
+ """Convert glob pattern to regex.
125
+
126
+ Args:
127
+ pattern: Glob pattern (e.g., '*.js', '**/models/**')
128
+
129
+ Returns:
130
+ Equivalent regex pattern
131
+ """
132
+ # Simple glob to regex conversion
133
+ pattern = pattern.replace('.', r'\.')
134
+ pattern = pattern.replace('*', '.*')
135
+ pattern = pattern.replace('?', '.')
136
+ pattern = f'^{pattern}$'
137
+ return pattern
138
+
139
+ def classify(self, message: str, doc: Doc, files: List[str]) -> Tuple[str, float]:
140
+ """Classify commit domain with confidence score.
141
+
142
+ Args:
143
+ message: Commit message
144
+ doc: spaCy processed document (may be None)
145
+ files: List of changed files
146
+
147
+ Returns:
148
+ Tuple of (domain, confidence_score)
149
+ """
150
+ if not message and not files:
151
+ return 'unknown', 0.0
152
+
153
+ # Analyze file patterns (primary signal)
154
+ file_scores = self._analyze_file_patterns(files)
155
+
156
+ # Analyze directory patterns
157
+ dir_scores = self._analyze_directory_patterns(files)
158
+
159
+ # Analyze message content
160
+ message_scores = self._analyze_message_content(message, doc)
161
+
162
+ # Combine all signals
163
+ combined_scores = self._combine_domain_scores(file_scores, dir_scores, message_scores)
164
+
165
+ if not combined_scores:
166
+ return 'unknown', 0.0
167
+
168
+ # Select best domain
169
+ best_domain = max(combined_scores.keys(), key=lambda k: combined_scores[k])
170
+ confidence = combined_scores[best_domain]
171
+
172
+ # Apply confidence threshold
173
+ if confidence < self.config.min_confidence:
174
+ return 'unknown', confidence
175
+
176
+ return best_domain, confidence
177
+
178
+ def _analyze_file_patterns(self, files: List[str]) -> Dict[str, float]:
179
+ """Analyze file patterns to determine domain.
180
+
181
+ Args:
182
+ files: List of file paths
183
+
184
+ Returns:
185
+ Dictionary of domain -> confidence_score
186
+ """
187
+ if not files:
188
+ return {}
189
+
190
+ domain_matches = defaultdict(int)
191
+
192
+ for file_path in files:
193
+ for domain, patterns in self.compiled_file_patterns.items():
194
+ for pattern in patterns:
195
+ if pattern.search(file_path):
196
+ domain_matches[domain] += 1
197
+ break # Don't double-count same file for same domain
198
+
199
+ # Convert to confidence scores
200
+ scores = {}
201
+ total_files = len(files)
202
+
203
+ for domain, matches in domain_matches.items():
204
+ # Confidence based on proportion of matching files
205
+ confidence = matches / total_files
206
+ scores[domain] = min(1.0, confidence * 2) # Boost confidence for strong signals
207
+
208
+ return scores
209
+
210
+ def _analyze_directory_patterns(self, files: List[str]) -> Dict[str, float]:
211
+ """Analyze directory patterns for domain signals.
212
+
213
+ Args:
214
+ files: List of file paths
215
+
216
+ Returns:
217
+ Dictionary of domain -> confidence_score
218
+ """
219
+ if not files:
220
+ return {}
221
+
222
+ domain_scores = defaultdict(float)
223
+
224
+ for file_path in files:
225
+ # Normalize path separators and convert to lowercase
226
+ normalized_path = file_path.replace('\\', '/').lower()
227
+ path_parts = normalized_path.split('/')
228
+
229
+ # Check each domain's directory indicators
230
+ for domain, indicators in self.directory_indicators.items():
231
+ for indicator in indicators:
232
+ # Check if indicator appears in any part of the path
233
+ if any(indicator in part for part in path_parts):
234
+ domain_scores[domain] += 1.0
235
+ break
236
+ # Also check full path contains indicator
237
+ elif indicator in normalized_path:
238
+ domain_scores[domain] += 0.5
239
+
240
+ # Normalize scores
241
+ scores = {}
242
+ max_score = max(domain_scores.values()) if domain_scores else 0
243
+
244
+ if max_score > 0:
245
+ for domain, score in domain_scores.items():
246
+ scores[domain] = min(1.0, score / max_score)
247
+
248
+ return scores
249
+
250
+ def _analyze_message_content(self, message: str, doc: Doc) -> Dict[str, float]:
251
+ """Analyze commit message content for domain keywords.
252
+
253
+ Args:
254
+ message: Commit message
255
+ doc: spaCy processed document (may be None)
256
+
257
+ Returns:
258
+ Dictionary of domain -> confidence_score
259
+ """
260
+ if not message:
261
+ return {}
262
+
263
+ # Convert message to lowercase for analysis
264
+ message_lower = message.lower()
265
+
266
+ # Extract keywords from message
267
+ if SPACY_AVAILABLE and doc:
268
+ # Use spaCy for better keyword extraction
269
+ keywords = self._extract_keywords_from_doc(doc)
270
+ else:
271
+ # Fallback to simple word extraction
272
+ keywords = set(re.findall(r'\b\w+\b', message_lower))
273
+
274
+ # Score domains based on keyword matches
275
+ domain_scores = {}
276
+
277
+ for domain, domain_keywords in self.keyword_patterns.items():
278
+ keyword_matches = len(keywords.intersection(set(word.lower() for word in domain_keywords)))
279
+
280
+ if keyword_matches > 0:
281
+ # Base score from keyword matches
282
+ base_score = min(1.0, keyword_matches / 3.0) # Scale to 0-1
283
+
284
+ # Boost score for technology indicators
285
+ tech_keywords = self.tech_indicators.get(domain, set())
286
+ tech_matches = len(keywords.intersection(tech_keywords))
287
+ tech_boost = min(0.3, tech_matches * 0.1)
288
+
289
+ domain_scores[domain] = min(1.0, base_score + tech_boost)
290
+
291
+ return domain_scores
292
+
293
+ def _extract_keywords_from_doc(self, doc: Doc) -> Set[str]:
294
+ """Extract meaningful keywords from spaCy document.
295
+
296
+ Args:
297
+ doc: spaCy processed document
298
+
299
+ Returns:
300
+ Set of extracted keywords
301
+ """
302
+ keywords = set()
303
+
304
+ for token in doc:
305
+ if (not token.is_stop and
306
+ not token.is_punct and
307
+ len(token.text) > 2 and
308
+ token.pos_ in ['NOUN', 'PROPN', 'ADJ', 'VERB']):
309
+ keywords.add(token.lemma_.lower())
310
+
311
+ # Add named entities
312
+ for ent in doc.ents:
313
+ if len(ent.text) > 2:
314
+ keywords.add(ent.text.lower())
315
+
316
+ return keywords
317
+
318
+ def _combine_domain_scores(self, file_scores: Dict[str, float],
319
+ dir_scores: Dict[str, float],
320
+ message_scores: Dict[str, float]) -> Dict[str, float]:
321
+ """Combine scores from different analysis methods.
322
+
323
+ Args:
324
+ file_scores: Scores from file pattern analysis
325
+ dir_scores: Scores from directory pattern analysis
326
+ message_scores: Scores from message content analysis
327
+
328
+ Returns:
329
+ Combined scores dictionary
330
+ """
331
+ all_domains = set(file_scores.keys()) | set(dir_scores.keys()) | set(message_scores.keys())
332
+ combined_scores = {}
333
+
334
+ # Weights for different signal types
335
+ weights = {
336
+ 'file': 0.5, # File patterns are strongest signal
337
+ 'directory': 0.3, # Directory patterns are also strong
338
+ 'message': 0.2 # Message content provides additional context
339
+ }
340
+
341
+ for domain in all_domains:
342
+ file_score = file_scores.get(domain, 0.0)
343
+ dir_score = dir_scores.get(domain, 0.0)
344
+ message_score = message_scores.get(domain, 0.0)
345
+
346
+ # Weighted combination
347
+ combined_score = (
348
+ file_score * weights['file'] +
349
+ dir_score * weights['directory'] +
350
+ message_score * weights['message']
351
+ )
352
+
353
+ # Bonus for multiple signal types agreeing
354
+ signal_count = sum(1 for score in [file_score, dir_score, message_score] if score > 0)
355
+ if signal_count > 1:
356
+ combined_score *= (1.0 + (signal_count - 1) * 0.1) # 10% bonus per additional signal
357
+
358
+ if combined_score > 0:
359
+ combined_scores[domain] = min(1.0, combined_score)
360
+
361
+ return combined_scores
362
+
363
+ def get_domain_statistics(self, files: List[str]) -> Dict[str, Any]:
364
+ """Get detailed domain analysis statistics for debugging.
365
+
366
+ Args:
367
+ files: List of file paths
368
+
369
+ Returns:
370
+ Dictionary with detailed analysis breakdown
371
+ """
372
+ stats = {
373
+ 'total_files': len(files),
374
+ 'file_analysis': self._analyze_file_patterns(files),
375
+ 'directory_analysis': self._analyze_directory_patterns(files),
376
+ 'file_extensions': {},
377
+ 'directory_breakdown': {},
378
+ }
379
+
380
+ # File extension breakdown
381
+ extensions = defaultdict(int)
382
+ directories = defaultdict(int)
383
+
384
+ for file_path in files:
385
+ # Extract extension
386
+ if '.' in file_path:
387
+ ext = file_path.split('.')[-1].lower()
388
+ extensions[ext] += 1
389
+
390
+ # Extract directories
391
+ path_parts = file_path.split('/')
392
+ for part in path_parts[:-1]: # Exclude filename
393
+ if part:
394
+ directories[part] += 1
395
+
396
+ stats['file_extensions'] = dict(extensions)
397
+ stats['directory_breakdown'] = dict(directories)
398
+
399
+ return stats