gitflow-analytics 1.0.0__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. gitflow_analytics/__init__.py +11 -9
  2. gitflow_analytics/_version.py +2 -2
  3. gitflow_analytics/cli.py +691 -243
  4. gitflow_analytics/cli_rich.py +353 -0
  5. gitflow_analytics/config.py +389 -96
  6. gitflow_analytics/core/analyzer.py +175 -78
  7. gitflow_analytics/core/branch_mapper.py +132 -132
  8. gitflow_analytics/core/cache.py +242 -173
  9. gitflow_analytics/core/identity.py +214 -178
  10. gitflow_analytics/extractors/base.py +13 -11
  11. gitflow_analytics/extractors/story_points.py +70 -59
  12. gitflow_analytics/extractors/tickets.py +111 -88
  13. gitflow_analytics/integrations/github_integration.py +91 -77
  14. gitflow_analytics/integrations/jira_integration.py +284 -0
  15. gitflow_analytics/integrations/orchestrator.py +99 -72
  16. gitflow_analytics/metrics/dora.py +183 -179
  17. gitflow_analytics/models/database.py +191 -54
  18. gitflow_analytics/qualitative/__init__.py +30 -0
  19. gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
  20. gitflow_analytics/qualitative/classifiers/change_type.py +468 -0
  21. gitflow_analytics/qualitative/classifiers/domain_classifier.py +399 -0
  22. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +436 -0
  23. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +412 -0
  24. gitflow_analytics/qualitative/core/__init__.py +13 -0
  25. gitflow_analytics/qualitative/core/llm_fallback.py +653 -0
  26. gitflow_analytics/qualitative/core/nlp_engine.py +373 -0
  27. gitflow_analytics/qualitative/core/pattern_cache.py +457 -0
  28. gitflow_analytics/qualitative/core/processor.py +540 -0
  29. gitflow_analytics/qualitative/models/__init__.py +25 -0
  30. gitflow_analytics/qualitative/models/schemas.py +272 -0
  31. gitflow_analytics/qualitative/utils/__init__.py +13 -0
  32. gitflow_analytics/qualitative/utils/batch_processor.py +326 -0
  33. gitflow_analytics/qualitative/utils/cost_tracker.py +343 -0
  34. gitflow_analytics/qualitative/utils/metrics.py +347 -0
  35. gitflow_analytics/qualitative/utils/text_processing.py +243 -0
  36. gitflow_analytics/reports/analytics_writer.py +25 -8
  37. gitflow_analytics/reports/csv_writer.py +60 -32
  38. gitflow_analytics/reports/narrative_writer.py +21 -15
  39. gitflow_analytics/tui/__init__.py +5 -0
  40. gitflow_analytics/tui/app.py +721 -0
  41. gitflow_analytics/tui/screens/__init__.py +8 -0
  42. gitflow_analytics/tui/screens/analysis_progress_screen.py +487 -0
  43. gitflow_analytics/tui/screens/configuration_screen.py +547 -0
  44. gitflow_analytics/tui/screens/loading_screen.py +358 -0
  45. gitflow_analytics/tui/screens/main_screen.py +304 -0
  46. gitflow_analytics/tui/screens/results_screen.py +698 -0
  47. gitflow_analytics/tui/widgets/__init__.py +7 -0
  48. gitflow_analytics/tui/widgets/data_table.py +257 -0
  49. gitflow_analytics/tui/widgets/export_modal.py +301 -0
  50. gitflow_analytics/tui/widgets/progress_widget.py +192 -0
  51. gitflow_analytics-1.0.3.dist-info/METADATA +490 -0
  52. gitflow_analytics-1.0.3.dist-info/RECORD +62 -0
  53. gitflow_analytics-1.0.0.dist-info/METADATA +0 -201
  54. gitflow_analytics-1.0.0.dist-info/RECORD +0 -30
  55. {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/WHEEL +0 -0
  56. {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/entry_points.txt +0 -0
  57. {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/licenses/LICENSE +0 -0
  58. {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,412 @@
1
+ """Risk analyzer for assessing commit risk levels."""
2
+
3
+ import logging
4
+ import re
5
+ from typing import Dict, List, Any, Set
6
+ from pathlib import Path
7
+
8
+ from ..models.schemas import RiskConfig
9
+
10
+ try:
11
+ import spacy
12
+ from spacy.tokens import Doc
13
+ SPACY_AVAILABLE = True
14
+ except ImportError:
15
+ SPACY_AVAILABLE = False
16
+ Doc = Any
17
+
18
+
19
+ class RiskAnalyzer:
20
+ """Analyze commits to assess risk level and identify risk factors.
21
+
22
+ This analyzer evaluates multiple dimensions of risk:
23
+ - Content risk: Security-sensitive keywords, critical system changes
24
+ - Size risk: Large commits affecting many files/lines
25
+ - Context risk: Production deployments, emergency fixes
26
+ - Pattern risk: File patterns indicating high-risk areas
27
+
28
+ Risk levels: low, medium, high, critical
29
+ """
30
+
31
+ def __init__(self, config: RiskConfig):
32
+ """Initialize risk analyzer.
33
+
34
+ Args:
35
+ config: Configuration for risk analysis
36
+ """
37
+ self.config = config
38
+ self.logger = logging.getLogger(__name__)
39
+
40
+ # Compile file risk patterns for efficiency
41
+ self._compile_file_patterns()
42
+
43
+ # Additional risk patterns not in config
44
+ self.critical_keywords = {
45
+ 'password', 'secret', 'key', 'token', 'credential', 'auth',
46
+ 'admin', 'root', 'sudo', 'permission', 'access', 'security'
47
+ }
48
+
49
+ self.production_keywords = {
50
+ 'production', 'prod', 'live', 'release', 'deploy', 'deployment',
51
+ 'critical', 'urgent', 'emergency', 'hotfix', 'immediate'
52
+ }
53
+
54
+ self.database_keywords = {
55
+ 'database', 'db', 'migration', 'schema', 'table', 'column',
56
+ 'index', 'constraint', 'trigger', 'procedure'
57
+ }
58
+
59
+ # File extension risk mapping
60
+ self.extension_risk = {
61
+ # High risk extensions
62
+ '.sql': 'high',
63
+ '.py': 'medium', # Could be config or critical logic
64
+ '.js': 'medium',
65
+ '.php': 'medium',
66
+ '.java': 'medium',
67
+ '.cs': 'medium',
68
+ '.go': 'medium',
69
+ '.rb': 'medium',
70
+
71
+ # Configuration files
72
+ '.yml': 'medium',
73
+ '.yaml': 'medium',
74
+ '.json': 'medium',
75
+ '.toml': 'medium',
76
+ '.ini': 'medium',
77
+ '.conf': 'medium',
78
+ '.config': 'medium',
79
+
80
+ # Low risk extensions
81
+ '.md': 'low',
82
+ '.txt': 'low',
83
+ '.rst': 'low',
84
+ '.css': 'low',
85
+ '.scss': 'low',
86
+ '.less': 'low',
87
+ }
88
+
89
+ def _compile_file_patterns(self) -> None:
90
+ """Compile file risk patterns for efficient matching."""
91
+ self.compiled_file_patterns = {}
92
+
93
+ for pattern, risk_level in self.config.file_risk_patterns.items():
94
+ try:
95
+ # Convert glob pattern to regex
96
+ regex_pattern = self._glob_to_regex(pattern)
97
+ self.compiled_file_patterns[re.compile(regex_pattern, re.IGNORECASE)] = risk_level
98
+ except re.error as e:
99
+ self.logger.warning(f"Invalid risk pattern '{pattern}': {e}")
100
+
101
+ def _glob_to_regex(self, pattern: str) -> str:
102
+ """Convert glob pattern to regex."""
103
+ pattern = pattern.replace('.', r'\.')
104
+ pattern = pattern.replace('*', '.*')
105
+ pattern = pattern.replace('?', '.')
106
+ pattern = f'^{pattern}$'
107
+ return pattern
108
+
109
+ def assess(self, commit: Dict[str, Any], doc: Doc) -> Dict[str, Any]:
110
+ """Assess risk level and identify risk factors for a commit.
111
+
112
+ Args:
113
+ commit: Commit dictionary with message, files, stats, etc.
114
+ doc: spaCy processed document (may be None)
115
+
116
+ Returns:
117
+ Dictionary with 'level' and 'factors' keys
118
+ """
119
+ risk_factors = []
120
+ risk_scores = []
121
+
122
+ # Analyze message content for risk keywords
123
+ message_risk = self._analyze_message_risk(commit.get('message', ''), doc)
124
+ risk_factors.extend(message_risk['factors'])
125
+ risk_scores.append(message_risk['score'])
126
+
127
+ # Analyze file patterns for risk
128
+ file_risk = self._analyze_file_risk(commit.get('files_changed', []))
129
+ risk_factors.extend(file_risk['factors'])
130
+ risk_scores.append(file_risk['score'])
131
+
132
+ # Analyze commit size for risk
133
+ size_risk = self._analyze_size_risk(commit)
134
+ risk_factors.extend(size_risk['factors'])
135
+ risk_scores.append(size_risk['score'])
136
+
137
+ # Analyze timing and context
138
+ context_risk = self._analyze_context_risk(commit)
139
+ risk_factors.extend(context_risk['factors'])
140
+ risk_scores.append(context_risk['score'])
141
+
142
+ # Calculate overall risk level
143
+ max_risk_score = max(risk_scores) if risk_scores else 0.0
144
+ risk_level = self._score_to_level(max_risk_score)
145
+
146
+ return {
147
+ 'level': risk_level,
148
+ 'factors': list(set(risk_factors)), # Remove duplicates
149
+ 'score': max_risk_score,
150
+ 'breakdown': {
151
+ 'message_risk': message_risk['score'],
152
+ 'file_risk': file_risk['score'],
153
+ 'size_risk': size_risk['score'],
154
+ 'context_risk': context_risk['score']
155
+ }
156
+ }
157
+
158
+ def _analyze_message_risk(self, message: str, doc: Doc) -> Dict[str, Any]:
159
+ """Analyze commit message for risk indicators.
160
+
161
+ Args:
162
+ message: Commit message
163
+ doc: spaCy processed document
164
+
165
+ Returns:
166
+ Dictionary with score and factors
167
+ """
168
+ if not message:
169
+ return {'score': 0.0, 'factors': []}
170
+
171
+ message_lower = message.lower()
172
+ factors = []
173
+ risk_score = 0.0
174
+
175
+ # Check for high-risk patterns
176
+ for pattern in self.config.high_risk_patterns:
177
+ if pattern.lower() in message_lower:
178
+ factors.append(f"high_risk_keyword:{pattern}")
179
+ risk_score = max(risk_score, 0.8) # High risk
180
+
181
+ # Check for medium-risk patterns
182
+ for pattern in self.config.medium_risk_patterns:
183
+ if pattern.lower() in message_lower:
184
+ factors.append(f"medium_risk_keyword:{pattern}")
185
+ risk_score = max(risk_score, 0.5) # Medium risk
186
+
187
+ # Check for critical security keywords
188
+ for keyword in self.critical_keywords:
189
+ if keyword in message_lower:
190
+ factors.append(f"security_keyword:{keyword}")
191
+ risk_score = max(risk_score, 0.9) # Critical risk
192
+
193
+ # Check for production-related keywords
194
+ for keyword in self.production_keywords:
195
+ if keyword in message_lower:
196
+ factors.append(f"production_keyword:{keyword}")
197
+ risk_score = max(risk_score, 0.7) # High risk
198
+
199
+ # Check for database-related keywords
200
+ for keyword in self.database_keywords:
201
+ if keyword in message_lower:
202
+ factors.append(f"database_keyword:{keyword}")
203
+ risk_score = max(risk_score, 0.6) # Medium-high risk
204
+
205
+ # Check for urgency indicators
206
+ urgency_patterns = [
207
+ r'\b(urgent|critical|emergency|asap|immediate)\b',
208
+ r'\b(hotfix|quickfix|patch)\b',
209
+ r'\b(breaking|major)\b'
210
+ ]
211
+
212
+ for pattern in urgency_patterns:
213
+ if re.search(pattern, message_lower):
214
+ factors.append(f"urgency_indicator:{pattern}")
215
+ risk_score = max(risk_score, 0.6)
216
+
217
+ return {'score': risk_score, 'factors': factors}
218
+
219
+ def _analyze_file_risk(self, files: List[str]) -> Dict[str, Any]:
220
+ """Analyze changed files for risk indicators.
221
+
222
+ Args:
223
+ files: List of file paths
224
+
225
+ Returns:
226
+ Dictionary with score and factors
227
+ """
228
+ if not files:
229
+ return {'score': 0.0, 'factors': []}
230
+
231
+ factors = []
232
+ risk_score = 0.0
233
+
234
+ for file_path in files:
235
+ file_lower = file_path.lower()
236
+
237
+ # Check compiled file risk patterns
238
+ for pattern, risk_level in self.compiled_file_patterns.items():
239
+ if pattern.search(file_path):
240
+ factors.append(f"file_pattern:{risk_level}:{file_path}")
241
+ if risk_level == 'critical':
242
+ risk_score = max(risk_score, 1.0)
243
+ elif risk_level == 'high':
244
+ risk_score = max(risk_score, 0.8)
245
+ elif risk_level == 'medium':
246
+ risk_score = max(risk_score, 0.5)
247
+
248
+ # Check file extensions
249
+ if '.' in file_path:
250
+ ext = '.' + file_path.split('.')[-1].lower()
251
+ if ext in self.extension_risk:
252
+ ext_risk = self.extension_risk[ext]
253
+ factors.append(f"file_extension:{ext_risk}:{ext}")
254
+ if ext_risk == 'high':
255
+ risk_score = max(risk_score, 0.7)
256
+ elif ext_risk == 'medium':
257
+ risk_score = max(risk_score, 0.4)
258
+
259
+ # Check for sensitive file names
260
+ sensitive_patterns = [
261
+ r'.*password.*', r'.*secret.*', r'.*key.*', r'.*token.*',
262
+ r'.*config.*', r'.*env.*', r'.*credential.*'
263
+ ]
264
+
265
+ for pattern in sensitive_patterns:
266
+ if re.search(pattern, file_lower):
267
+ factors.append(f"sensitive_filename:{file_path}")
268
+ risk_score = max(risk_score, 0.8)
269
+ break
270
+
271
+ return {'score': risk_score, 'factors': factors}
272
+
273
+ def _analyze_size_risk(self, commit: Dict[str, Any]) -> Dict[str, Any]:
274
+ """Analyze commit size for risk indicators.
275
+
276
+ Args:
277
+ commit: Commit dictionary
278
+
279
+ Returns:
280
+ Dictionary with score and factors
281
+ """
282
+ factors = []
283
+ risk_score = 0.0
284
+
285
+ files_changed = len(commit.get('files_changed', []))
286
+ insertions = commit.get('insertions', 0)
287
+ deletions = commit.get('deletions', 0)
288
+ total_changes = insertions + deletions
289
+
290
+ # Check file count thresholds
291
+ if files_changed >= self.config.size_thresholds['large_commit_files']:
292
+ factors.append(f"large_file_count:{files_changed}")
293
+ if files_changed >= 50: # Very large
294
+ risk_score = max(risk_score, 0.8)
295
+ else:
296
+ risk_score = max(risk_score, 0.6)
297
+
298
+ # Check line change thresholds
299
+ if total_changes >= self.config.size_thresholds['massive_commit_lines']:
300
+ factors.append(f"massive_changes:{total_changes}")
301
+ risk_score = max(risk_score, 0.9)
302
+ elif total_changes >= self.config.size_thresholds['large_commit_lines']:
303
+ factors.append(f"large_changes:{total_changes}")
304
+ risk_score = max(risk_score, 0.6)
305
+
306
+ # Check deletion ratio (high deletion ratio can be risky)
307
+ if total_changes > 0:
308
+ deletion_ratio = deletions / total_changes
309
+ if deletion_ratio > 0.7: # More than 70% deletions
310
+ factors.append(f"high_deletion_ratio:{deletion_ratio:.2f}")
311
+ risk_score = max(risk_score, 0.5)
312
+
313
+ return {'score': risk_score, 'factors': factors}
314
+
315
+ def _analyze_context_risk(self, commit: Dict[str, Any]) -> Dict[str, Any]:
316
+ """Analyze commit context for risk indicators.
317
+
318
+ Args:
319
+ commit: Commit dictionary
320
+
321
+ Returns:
322
+ Dictionary with score and factors
323
+ """
324
+ factors = []
325
+ risk_score = 0.0
326
+
327
+ # Check branch context if available
328
+ branch = commit.get('branch', '').lower()
329
+ if branch:
330
+ if any(term in branch for term in ['main', 'master', 'prod', 'production']):
331
+ factors.append(f"main_branch:{branch}")
332
+ risk_score = max(risk_score, 0.6)
333
+ elif 'hotfix' in branch:
334
+ factors.append(f"hotfix_branch:{branch}")
335
+ risk_score = max(risk_score, 0.8)
336
+
337
+ # Check commit timing (if timestamp available)
338
+ # Weekend/night commits might be higher risk
339
+ timestamp = commit.get('timestamp')
340
+ if timestamp:
341
+ # This would require datetime analysis
342
+ # For now, skip this check
343
+ pass
344
+
345
+ # Check for merge commits
346
+ if commit.get('is_merge', False):
347
+ factors.append("merge_commit")
348
+ # Merges can be risky depending on what's being merged
349
+ risk_score = max(risk_score, 0.3)
350
+
351
+ return {'score': risk_score, 'factors': factors}
352
+
353
+ def _score_to_level(self, score: float) -> str:
354
+ """Convert risk score to risk level.
355
+
356
+ Args:
357
+ score: Risk score (0.0 to 1.0)
358
+
359
+ Returns:
360
+ Risk level string
361
+ """
362
+ if score >= 0.9:
363
+ return 'critical'
364
+ elif score >= 0.7:
365
+ return 'high'
366
+ elif score >= 0.4:
367
+ return 'medium'
368
+ else:
369
+ return 'low'
370
+
371
+ def get_risk_statistics(self, commits: List[Dict[str, Any]]) -> Dict[str, Any]:
372
+ """Get risk analysis statistics for a set of commits.
373
+
374
+ Args:
375
+ commits: List of commit dictionaries
376
+
377
+ Returns:
378
+ Dictionary with risk statistics
379
+ """
380
+ if not commits:
381
+ return {'total_commits': 0}
382
+
383
+ risk_levels = {'low': 0, 'medium': 0, 'high': 0, 'critical': 0}
384
+ all_factors = []
385
+
386
+ for commit in commits:
387
+ # Quick risk assessment without full doc processing
388
+ risk_result = self.assess(commit, None)
389
+ risk_levels[risk_result['level']] += 1
390
+ all_factors.extend(risk_result['factors'])
391
+
392
+ # Count factor frequencies
393
+ factor_counts = {}
394
+ for factor in all_factors:
395
+ factor_type = factor.split(':')[0] if ':' in factor else factor
396
+ factor_counts[factor_type] = factor_counts.get(factor_type, 0) + 1
397
+
398
+ return {
399
+ 'total_commits': len(commits),
400
+ 'risk_distribution': risk_levels,
401
+ 'risk_percentages': {
402
+ level: (count / len(commits)) * 100
403
+ for level, count in risk_levels.items()
404
+ },
405
+ 'common_risk_factors': sorted(
406
+ factor_counts.items(),
407
+ key=lambda x: x[1],
408
+ reverse=True
409
+ )[:10],
410
+ 'high_risk_commits': risk_levels['high'] + risk_levels['critical'],
411
+ 'high_risk_percentage': ((risk_levels['high'] + risk_levels['critical']) / len(commits)) * 100
412
+ }
@@ -0,0 +1,13 @@
1
+ """Core processing components for qualitative analysis."""
2
+
3
+ from .processor import QualitativeProcessor
4
+ from .nlp_engine import NLPEngine
5
+ from .llm_fallback import LLMFallback
6
+ from .pattern_cache import PatternCache
7
+
8
+ __all__ = [
9
+ "QualitativeProcessor",
10
+ "NLPEngine",
11
+ "LLMFallback",
12
+ "PatternCache",
13
+ ]