gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. gitflow_analytics/_version.py +1 -1
  2. gitflow_analytics/classification/__init__.py +31 -0
  3. gitflow_analytics/classification/batch_classifier.py +752 -0
  4. gitflow_analytics/classification/classifier.py +464 -0
  5. gitflow_analytics/classification/feature_extractor.py +725 -0
  6. gitflow_analytics/classification/linguist_analyzer.py +574 -0
  7. gitflow_analytics/classification/model.py +455 -0
  8. gitflow_analytics/cli.py +4158 -350
  9. gitflow_analytics/cli_rich.py +198 -48
  10. gitflow_analytics/config/__init__.py +43 -0
  11. gitflow_analytics/config/errors.py +261 -0
  12. gitflow_analytics/config/loader.py +905 -0
  13. gitflow_analytics/config/profiles.py +264 -0
  14. gitflow_analytics/config/repository.py +124 -0
  15. gitflow_analytics/config/schema.py +444 -0
  16. gitflow_analytics/config/validator.py +154 -0
  17. gitflow_analytics/config.py +44 -508
  18. gitflow_analytics/core/analyzer.py +1209 -98
  19. gitflow_analytics/core/cache.py +1337 -29
  20. gitflow_analytics/core/data_fetcher.py +1285 -0
  21. gitflow_analytics/core/identity.py +363 -14
  22. gitflow_analytics/core/metrics_storage.py +526 -0
  23. gitflow_analytics/core/progress.py +372 -0
  24. gitflow_analytics/core/schema_version.py +269 -0
  25. gitflow_analytics/extractors/ml_tickets.py +1100 -0
  26. gitflow_analytics/extractors/story_points.py +8 -1
  27. gitflow_analytics/extractors/tickets.py +749 -11
  28. gitflow_analytics/identity_llm/__init__.py +6 -0
  29. gitflow_analytics/identity_llm/analysis_pass.py +231 -0
  30. gitflow_analytics/identity_llm/analyzer.py +464 -0
  31. gitflow_analytics/identity_llm/models.py +76 -0
  32. gitflow_analytics/integrations/github_integration.py +175 -11
  33. gitflow_analytics/integrations/jira_integration.py +461 -24
  34. gitflow_analytics/integrations/orchestrator.py +124 -1
  35. gitflow_analytics/metrics/activity_scoring.py +322 -0
  36. gitflow_analytics/metrics/branch_health.py +470 -0
  37. gitflow_analytics/metrics/dora.py +379 -20
  38. gitflow_analytics/models/database.py +843 -53
  39. gitflow_analytics/pm_framework/__init__.py +115 -0
  40. gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
  41. gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
  42. gitflow_analytics/pm_framework/base.py +406 -0
  43. gitflow_analytics/pm_framework/models.py +211 -0
  44. gitflow_analytics/pm_framework/orchestrator.py +652 -0
  45. gitflow_analytics/pm_framework/registry.py +333 -0
  46. gitflow_analytics/qualitative/__init__.py +9 -10
  47. gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
  48. gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
  49. gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
  50. gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
  51. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
  52. gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
  53. gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
  54. gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
  55. gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
  56. gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
  57. gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
  58. gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
  59. gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
  60. gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
  61. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
  62. gitflow_analytics/qualitative/core/__init__.py +4 -4
  63. gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
  64. gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
  65. gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
  66. gitflow_analytics/qualitative/core/processor.py +381 -248
  67. gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
  68. gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
  69. gitflow_analytics/qualitative/models/__init__.py +7 -7
  70. gitflow_analytics/qualitative/models/schemas.py +155 -121
  71. gitflow_analytics/qualitative/utils/__init__.py +4 -4
  72. gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
  73. gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
  74. gitflow_analytics/qualitative/utils/metrics.py +172 -158
  75. gitflow_analytics/qualitative/utils/text_processing.py +146 -104
  76. gitflow_analytics/reports/__init__.py +100 -0
  77. gitflow_analytics/reports/analytics_writer.py +539 -14
  78. gitflow_analytics/reports/base.py +648 -0
  79. gitflow_analytics/reports/branch_health_writer.py +322 -0
  80. gitflow_analytics/reports/classification_writer.py +924 -0
  81. gitflow_analytics/reports/cli_integration.py +427 -0
  82. gitflow_analytics/reports/csv_writer.py +1676 -212
  83. gitflow_analytics/reports/data_models.py +504 -0
  84. gitflow_analytics/reports/database_report_generator.py +427 -0
  85. gitflow_analytics/reports/example_usage.py +344 -0
  86. gitflow_analytics/reports/factory.py +499 -0
  87. gitflow_analytics/reports/formatters.py +698 -0
  88. gitflow_analytics/reports/html_generator.py +1116 -0
  89. gitflow_analytics/reports/interfaces.py +489 -0
  90. gitflow_analytics/reports/json_exporter.py +2770 -0
  91. gitflow_analytics/reports/narrative_writer.py +2287 -158
  92. gitflow_analytics/reports/story_point_correlation.py +1144 -0
  93. gitflow_analytics/reports/weekly_trends_writer.py +389 -0
  94. gitflow_analytics/training/__init__.py +5 -0
  95. gitflow_analytics/training/model_loader.py +377 -0
  96. gitflow_analytics/training/pipeline.py +550 -0
  97. gitflow_analytics/tui/__init__.py +1 -1
  98. gitflow_analytics/tui/app.py +129 -126
  99. gitflow_analytics/tui/screens/__init__.py +3 -3
  100. gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
  101. gitflow_analytics/tui/screens/configuration_screen.py +154 -178
  102. gitflow_analytics/tui/screens/loading_screen.py +100 -110
  103. gitflow_analytics/tui/screens/main_screen.py +89 -72
  104. gitflow_analytics/tui/screens/results_screen.py +305 -281
  105. gitflow_analytics/tui/widgets/__init__.py +2 -2
  106. gitflow_analytics/tui/widgets/data_table.py +67 -69
  107. gitflow_analytics/tui/widgets/export_modal.py +76 -76
  108. gitflow_analytics/tui/widgets/progress_widget.py +41 -46
  109. gitflow_analytics-1.3.11.dist-info/METADATA +1015 -0
  110. gitflow_analytics-1.3.11.dist-info/RECORD +122 -0
  111. gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
  112. gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
  113. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/WHEEL +0 -0
  114. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/entry_points.txt +0 -0
  115. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/licenses/LICENSE +0 -0
  116. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/top_level.txt +0 -0
@@ -1,95 +1,120 @@
1
1
  """Risk analyzer for assessing commit risk levels."""
2
2
 
3
+ import importlib.util
3
4
  import logging
4
5
  import re
5
- from typing import Dict, List, Any, Set
6
- from pathlib import Path
6
+ from typing import Any
7
7
 
8
8
  from ..models.schemas import RiskConfig
9
9
 
10
- try:
11
- import spacy
10
+ # Check if spacy is available without importing it
11
+ SPACY_AVAILABLE = importlib.util.find_spec("spacy") is not None
12
+
13
+ if SPACY_AVAILABLE:
12
14
  from spacy.tokens import Doc
13
- SPACY_AVAILABLE = True
14
- except ImportError:
15
- SPACY_AVAILABLE = False
15
+ else:
16
16
  Doc = Any
17
17
 
18
18
 
19
19
  class RiskAnalyzer:
20
20
  """Analyze commits to assess risk level and identify risk factors.
21
-
21
+
22
22
  This analyzer evaluates multiple dimensions of risk:
23
23
  - Content risk: Security-sensitive keywords, critical system changes
24
24
  - Size risk: Large commits affecting many files/lines
25
25
  - Context risk: Production deployments, emergency fixes
26
26
  - Pattern risk: File patterns indicating high-risk areas
27
-
27
+
28
28
  Risk levels: low, medium, high, critical
29
29
  """
30
-
30
+
31
31
  def __init__(self, config: RiskConfig):
32
32
  """Initialize risk analyzer.
33
-
33
+
34
34
  Args:
35
35
  config: Configuration for risk analysis
36
36
  """
37
37
  self.config = config
38
38
  self.logger = logging.getLogger(__name__)
39
-
39
+
40
40
  # Compile file risk patterns for efficiency
41
41
  self._compile_file_patterns()
42
-
42
+
43
43
  # Additional risk patterns not in config
44
44
  self.critical_keywords = {
45
- 'password', 'secret', 'key', 'token', 'credential', 'auth',
46
- 'admin', 'root', 'sudo', 'permission', 'access', 'security'
45
+ "password",
46
+ "secret",
47
+ "key",
48
+ "token",
49
+ "credential",
50
+ "auth",
51
+ "admin",
52
+ "root",
53
+ "sudo",
54
+ "permission",
55
+ "access",
56
+ "security",
47
57
  }
48
-
58
+
49
59
  self.production_keywords = {
50
- 'production', 'prod', 'live', 'release', 'deploy', 'deployment',
51
- 'critical', 'urgent', 'emergency', 'hotfix', 'immediate'
60
+ "production",
61
+ "prod",
62
+ "live",
63
+ "release",
64
+ "deploy",
65
+ "deployment",
66
+ "critical",
67
+ "urgent",
68
+ "emergency",
69
+ "hotfix",
70
+ "immediate",
52
71
  }
53
-
72
+
54
73
  self.database_keywords = {
55
- 'database', 'db', 'migration', 'schema', 'table', 'column',
56
- 'index', 'constraint', 'trigger', 'procedure'
74
+ "database",
75
+ "db",
76
+ "migration",
77
+ "schema",
78
+ "table",
79
+ "column",
80
+ "index",
81
+ "constraint",
82
+ "trigger",
83
+ "procedure",
57
84
  }
58
-
85
+
59
86
  # File extension risk mapping
60
87
  self.extension_risk = {
61
88
  # High risk extensions
62
- '.sql': 'high',
63
- '.py': 'medium', # Could be config or critical logic
64
- '.js': 'medium',
65
- '.php': 'medium',
66
- '.java': 'medium',
67
- '.cs': 'medium',
68
- '.go': 'medium',
69
- '.rb': 'medium',
70
-
89
+ ".sql": "high",
90
+ ".py": "medium", # Could be config or critical logic
91
+ ".js": "medium",
92
+ ".php": "medium",
93
+ ".java": "medium",
94
+ ".cs": "medium",
95
+ ".go": "medium",
96
+ ".rb": "medium",
71
97
  # Configuration files
72
- '.yml': 'medium',
73
- '.yaml': 'medium',
74
- '.json': 'medium',
75
- '.toml': 'medium',
76
- '.ini': 'medium',
77
- '.conf': 'medium',
78
- '.config': 'medium',
79
-
98
+ ".yml": "medium",
99
+ ".yaml": "medium",
100
+ ".json": "medium",
101
+ ".toml": "medium",
102
+ ".ini": "medium",
103
+ ".conf": "medium",
104
+ ".config": "medium",
80
105
  # Low risk extensions
81
- '.md': 'low',
82
- '.txt': 'low',
83
- '.rst': 'low',
84
- '.css': 'low',
85
- '.scss': 'low',
86
- '.less': 'low',
106
+ ".md": "low",
107
+ ".txt": "low",
108
+ ".rst": "low",
109
+ ".css": "low",
110
+ ".scss": "low",
111
+ ".less": "low",
87
112
  }
88
-
113
+
89
114
  def _compile_file_patterns(self) -> None:
90
115
  """Compile file risk patterns for efficient matching."""
91
116
  self.compiled_file_patterns = {}
92
-
117
+
93
118
  for pattern, risk_level in self.config.file_risk_patterns.items():
94
119
  try:
95
120
  # Convert glob pattern to regex
@@ -97,316 +122,317 @@ class RiskAnalyzer:
97
122
  self.compiled_file_patterns[re.compile(regex_pattern, re.IGNORECASE)] = risk_level
98
123
  except re.error as e:
99
124
  self.logger.warning(f"Invalid risk pattern '{pattern}': {e}")
100
-
125
+
101
126
  def _glob_to_regex(self, pattern: str) -> str:
102
127
  """Convert glob pattern to regex."""
103
- pattern = pattern.replace('.', r'\.')
104
- pattern = pattern.replace('*', '.*')
105
- pattern = pattern.replace('?', '.')
106
- pattern = f'^{pattern}$'
128
+ pattern = pattern.replace(".", r"\.")
129
+ pattern = pattern.replace("*", ".*")
130
+ pattern = pattern.replace("?", ".")
131
+ pattern = f"^{pattern}$"
107
132
  return pattern
108
-
109
- def assess(self, commit: Dict[str, Any], doc: Doc) -> Dict[str, Any]:
133
+
134
+ def assess(self, commit: dict[str, Any], doc: Doc) -> dict[str, Any]:
110
135
  """Assess risk level and identify risk factors for a commit.
111
-
136
+
112
137
  Args:
113
138
  commit: Commit dictionary with message, files, stats, etc.
114
139
  doc: spaCy processed document (may be None)
115
-
116
- Returns:
140
+
141
+ Returns:
117
142
  Dictionary with 'level' and 'factors' keys
118
143
  """
119
144
  risk_factors = []
120
145
  risk_scores = []
121
-
146
+
122
147
  # Analyze message content for risk keywords
123
- message_risk = self._analyze_message_risk(commit.get('message', ''), doc)
124
- risk_factors.extend(message_risk['factors'])
125
- risk_scores.append(message_risk['score'])
126
-
148
+ message_risk = self._analyze_message_risk(commit.get("message", ""), doc)
149
+ risk_factors.extend(message_risk["factors"])
150
+ risk_scores.append(message_risk["score"])
151
+
127
152
  # Analyze file patterns for risk
128
- file_risk = self._analyze_file_risk(commit.get('files_changed', []))
129
- risk_factors.extend(file_risk['factors'])
130
- risk_scores.append(file_risk['score'])
131
-
153
+ file_risk = self._analyze_file_risk(commit.get("files_changed", []))
154
+ risk_factors.extend(file_risk["factors"])
155
+ risk_scores.append(file_risk["score"])
156
+
132
157
  # Analyze commit size for risk
133
158
  size_risk = self._analyze_size_risk(commit)
134
- risk_factors.extend(size_risk['factors'])
135
- risk_scores.append(size_risk['score'])
136
-
159
+ risk_factors.extend(size_risk["factors"])
160
+ risk_scores.append(size_risk["score"])
161
+
137
162
  # Analyze timing and context
138
163
  context_risk = self._analyze_context_risk(commit)
139
- risk_factors.extend(context_risk['factors'])
140
- risk_scores.append(context_risk['score'])
141
-
164
+ risk_factors.extend(context_risk["factors"])
165
+ risk_scores.append(context_risk["score"])
166
+
142
167
  # Calculate overall risk level
143
168
  max_risk_score = max(risk_scores) if risk_scores else 0.0
144
169
  risk_level = self._score_to_level(max_risk_score)
145
-
170
+
146
171
  return {
147
- 'level': risk_level,
148
- 'factors': list(set(risk_factors)), # Remove duplicates
149
- 'score': max_risk_score,
150
- 'breakdown': {
151
- 'message_risk': message_risk['score'],
152
- 'file_risk': file_risk['score'],
153
- 'size_risk': size_risk['score'],
154
- 'context_risk': context_risk['score']
155
- }
172
+ "level": risk_level,
173
+ "factors": list(set(risk_factors)), # Remove duplicates
174
+ "score": max_risk_score,
175
+ "breakdown": {
176
+ "message_risk": message_risk["score"],
177
+ "file_risk": file_risk["score"],
178
+ "size_risk": size_risk["score"],
179
+ "context_risk": context_risk["score"],
180
+ },
156
181
  }
157
-
158
- def _analyze_message_risk(self, message: str, doc: Doc) -> Dict[str, Any]:
182
+
183
+ def _analyze_message_risk(self, message: str, doc: Doc) -> dict[str, Any]:
159
184
  """Analyze commit message for risk indicators.
160
-
185
+
161
186
  Args:
162
187
  message: Commit message
163
188
  doc: spaCy processed document
164
-
189
+
165
190
  Returns:
166
191
  Dictionary with score and factors
167
192
  """
168
193
  if not message:
169
- return {'score': 0.0, 'factors': []}
170
-
194
+ return {"score": 0.0, "factors": []}
195
+
171
196
  message_lower = message.lower()
172
197
  factors = []
173
198
  risk_score = 0.0
174
-
199
+
175
200
  # Check for high-risk patterns
176
201
  for pattern in self.config.high_risk_patterns:
177
202
  if pattern.lower() in message_lower:
178
203
  factors.append(f"high_risk_keyword:{pattern}")
179
204
  risk_score = max(risk_score, 0.8) # High risk
180
-
205
+
181
206
  # Check for medium-risk patterns
182
207
  for pattern in self.config.medium_risk_patterns:
183
208
  if pattern.lower() in message_lower:
184
209
  factors.append(f"medium_risk_keyword:{pattern}")
185
210
  risk_score = max(risk_score, 0.5) # Medium risk
186
-
211
+
187
212
  # Check for critical security keywords
188
213
  for keyword in self.critical_keywords:
189
214
  if keyword in message_lower:
190
215
  factors.append(f"security_keyword:{keyword}")
191
216
  risk_score = max(risk_score, 0.9) # Critical risk
192
-
217
+
193
218
  # Check for production-related keywords
194
219
  for keyword in self.production_keywords:
195
220
  if keyword in message_lower:
196
221
  factors.append(f"production_keyword:{keyword}")
197
222
  risk_score = max(risk_score, 0.7) # High risk
198
-
223
+
199
224
  # Check for database-related keywords
200
225
  for keyword in self.database_keywords:
201
226
  if keyword in message_lower:
202
227
  factors.append(f"database_keyword:{keyword}")
203
228
  risk_score = max(risk_score, 0.6) # Medium-high risk
204
-
229
+
205
230
  # Check for urgency indicators
206
231
  urgency_patterns = [
207
- r'\b(urgent|critical|emergency|asap|immediate)\b',
208
- r'\b(hotfix|quickfix|patch)\b',
209
- r'\b(breaking|major)\b'
232
+ r"\b(urgent|critical|emergency|asap|immediate)\b",
233
+ r"\b(hotfix|quickfix|patch)\b",
234
+ r"\b(breaking|major)\b",
210
235
  ]
211
-
236
+
212
237
  for pattern in urgency_patterns:
213
238
  if re.search(pattern, message_lower):
214
239
  factors.append(f"urgency_indicator:{pattern}")
215
240
  risk_score = max(risk_score, 0.6)
216
-
217
- return {'score': risk_score, 'factors': factors}
218
-
219
- def _analyze_file_risk(self, files: List[str]) -> Dict[str, Any]:
241
+
242
+ return {"score": risk_score, "factors": factors}
243
+
244
+ def _analyze_file_risk(self, files: list[str]) -> dict[str, Any]:
220
245
  """Analyze changed files for risk indicators.
221
-
246
+
222
247
  Args:
223
248
  files: List of file paths
224
-
249
+
225
250
  Returns:
226
251
  Dictionary with score and factors
227
252
  """
228
253
  if not files:
229
- return {'score': 0.0, 'factors': []}
230
-
254
+ return {"score": 0.0, "factors": []}
255
+
231
256
  factors = []
232
257
  risk_score = 0.0
233
-
258
+
234
259
  for file_path in files:
235
260
  file_lower = file_path.lower()
236
-
261
+
237
262
  # Check compiled file risk patterns
238
263
  for pattern, risk_level in self.compiled_file_patterns.items():
239
264
  if pattern.search(file_path):
240
265
  factors.append(f"file_pattern:{risk_level}:{file_path}")
241
- if risk_level == 'critical':
266
+ if risk_level == "critical":
242
267
  risk_score = max(risk_score, 1.0)
243
- elif risk_level == 'high':
268
+ elif risk_level == "high":
244
269
  risk_score = max(risk_score, 0.8)
245
- elif risk_level == 'medium':
270
+ elif risk_level == "medium":
246
271
  risk_score = max(risk_score, 0.5)
247
-
272
+
248
273
  # Check file extensions
249
- if '.' in file_path:
250
- ext = '.' + file_path.split('.')[-1].lower()
274
+ if "." in file_path:
275
+ ext = "." + file_path.split(".")[-1].lower()
251
276
  if ext in self.extension_risk:
252
277
  ext_risk = self.extension_risk[ext]
253
278
  factors.append(f"file_extension:{ext_risk}:{ext}")
254
- if ext_risk == 'high':
279
+ if ext_risk == "high":
255
280
  risk_score = max(risk_score, 0.7)
256
- elif ext_risk == 'medium':
281
+ elif ext_risk == "medium":
257
282
  risk_score = max(risk_score, 0.4)
258
-
283
+
259
284
  # Check for sensitive file names
260
285
  sensitive_patterns = [
261
- r'.*password.*', r'.*secret.*', r'.*key.*', r'.*token.*',
262
- r'.*config.*', r'.*env.*', r'.*credential.*'
286
+ r".*password.*",
287
+ r".*secret.*",
288
+ r".*key.*",
289
+ r".*token.*",
290
+ r".*config.*",
291
+ r".*env.*",
292
+ r".*credential.*",
263
293
  ]
264
-
294
+
265
295
  for pattern in sensitive_patterns:
266
296
  if re.search(pattern, file_lower):
267
297
  factors.append(f"sensitive_filename:{file_path}")
268
298
  risk_score = max(risk_score, 0.8)
269
299
  break
270
-
271
- return {'score': risk_score, 'factors': factors}
272
-
273
- def _analyze_size_risk(self, commit: Dict[str, Any]) -> Dict[str, Any]:
300
+
301
+ return {"score": risk_score, "factors": factors}
302
+
303
+ def _analyze_size_risk(self, commit: dict[str, Any]) -> dict[str, Any]:
274
304
  """Analyze commit size for risk indicators.
275
-
305
+
276
306
  Args:
277
307
  commit: Commit dictionary
278
-
308
+
279
309
  Returns:
280
310
  Dictionary with score and factors
281
311
  """
282
312
  factors = []
283
313
  risk_score = 0.0
284
-
285
- files_changed = len(commit.get('files_changed', []))
286
- insertions = commit.get('insertions', 0)
287
- deletions = commit.get('deletions', 0)
314
+
315
+ files_changed = len(commit.get("files_changed", []))
316
+ insertions = commit.get("insertions", 0)
317
+ deletions = commit.get("deletions", 0)
288
318
  total_changes = insertions + deletions
289
-
319
+
290
320
  # Check file count thresholds
291
- if files_changed >= self.config.size_thresholds['large_commit_files']:
321
+ if files_changed >= self.config.size_thresholds["large_commit_files"]:
292
322
  factors.append(f"large_file_count:{files_changed}")
293
- if files_changed >= 50: # Very large
294
- risk_score = max(risk_score, 0.8)
295
- else:
296
- risk_score = max(risk_score, 0.6)
297
-
323
+ # Very large commits get higher risk score
324
+ risk_score = max(risk_score, 0.8) if files_changed >= 50 else max(risk_score, 0.6)
325
+
298
326
  # Check line change thresholds
299
- if total_changes >= self.config.size_thresholds['massive_commit_lines']:
327
+ if total_changes >= self.config.size_thresholds["massive_commit_lines"]:
300
328
  factors.append(f"massive_changes:{total_changes}")
301
329
  risk_score = max(risk_score, 0.9)
302
- elif total_changes >= self.config.size_thresholds['large_commit_lines']:
330
+ elif total_changes >= self.config.size_thresholds["large_commit_lines"]:
303
331
  factors.append(f"large_changes:{total_changes}")
304
332
  risk_score = max(risk_score, 0.6)
305
-
333
+
306
334
  # Check deletion ratio (high deletion ratio can be risky)
307
335
  if total_changes > 0:
308
336
  deletion_ratio = deletions / total_changes
309
337
  if deletion_ratio > 0.7: # More than 70% deletions
310
338
  factors.append(f"high_deletion_ratio:{deletion_ratio:.2f}")
311
339
  risk_score = max(risk_score, 0.5)
312
-
313
- return {'score': risk_score, 'factors': factors}
314
-
315
- def _analyze_context_risk(self, commit: Dict[str, Any]) -> Dict[str, Any]:
340
+
341
+ return {"score": risk_score, "factors": factors}
342
+
343
+ def _analyze_context_risk(self, commit: dict[str, Any]) -> dict[str, Any]:
316
344
  """Analyze commit context for risk indicators.
317
-
345
+
318
346
  Args:
319
347
  commit: Commit dictionary
320
-
348
+
321
349
  Returns:
322
350
  Dictionary with score and factors
323
351
  """
324
352
  factors = []
325
353
  risk_score = 0.0
326
-
354
+
327
355
  # Check branch context if available
328
- branch = commit.get('branch', '').lower()
356
+ branch = commit.get("branch", "").lower()
329
357
  if branch:
330
- if any(term in branch for term in ['main', 'master', 'prod', 'production']):
358
+ if any(term in branch for term in ["main", "master", "prod", "production"]):
331
359
  factors.append(f"main_branch:{branch}")
332
360
  risk_score = max(risk_score, 0.6)
333
- elif 'hotfix' in branch:
361
+ elif "hotfix" in branch:
334
362
  factors.append(f"hotfix_branch:{branch}")
335
363
  risk_score = max(risk_score, 0.8)
336
-
364
+
337
365
  # Check commit timing (if timestamp available)
338
366
  # Weekend/night commits might be higher risk
339
- timestamp = commit.get('timestamp')
367
+ timestamp = commit.get("timestamp")
340
368
  if timestamp:
341
369
  # This would require datetime analysis
342
370
  # For now, skip this check
343
371
  pass
344
-
372
+
345
373
  # Check for merge commits
346
- if commit.get('is_merge', False):
374
+ if commit.get("is_merge", False):
347
375
  factors.append("merge_commit")
348
376
  # Merges can be risky depending on what's being merged
349
377
  risk_score = max(risk_score, 0.3)
350
-
351
- return {'score': risk_score, 'factors': factors}
352
-
378
+
379
+ return {"score": risk_score, "factors": factors}
380
+
353
381
  def _score_to_level(self, score: float) -> str:
354
382
  """Convert risk score to risk level.
355
-
383
+
356
384
  Args:
357
385
  score: Risk score (0.0 to 1.0)
358
-
386
+
359
387
  Returns:
360
388
  Risk level string
361
389
  """
362
390
  if score >= 0.9:
363
- return 'critical'
391
+ return "critical"
364
392
  elif score >= 0.7:
365
- return 'high'
393
+ return "high"
366
394
  elif score >= 0.4:
367
- return 'medium'
395
+ return "medium"
368
396
  else:
369
- return 'low'
370
-
371
- def get_risk_statistics(self, commits: List[Dict[str, Any]]) -> Dict[str, Any]:
397
+ return "low"
398
+
399
+ def get_risk_statistics(self, commits: list[dict[str, Any]]) -> dict[str, Any]:
372
400
  """Get risk analysis statistics for a set of commits.
373
-
401
+
374
402
  Args:
375
403
  commits: List of commit dictionaries
376
-
404
+
377
405
  Returns:
378
406
  Dictionary with risk statistics
379
407
  """
380
408
  if not commits:
381
- return {'total_commits': 0}
382
-
383
- risk_levels = {'low': 0, 'medium': 0, 'high': 0, 'critical': 0}
409
+ return {"total_commits": 0}
410
+
411
+ risk_levels = {"low": 0, "medium": 0, "high": 0, "critical": 0}
384
412
  all_factors = []
385
-
413
+
386
414
  for commit in commits:
387
415
  # Quick risk assessment without full doc processing
388
416
  risk_result = self.assess(commit, None)
389
- risk_levels[risk_result['level']] += 1
390
- all_factors.extend(risk_result['factors'])
391
-
417
+ risk_levels[risk_result["level"]] += 1
418
+ all_factors.extend(risk_result["factors"])
419
+
392
420
  # Count factor frequencies
393
421
  factor_counts = {}
394
422
  for factor in all_factors:
395
- factor_type = factor.split(':')[0] if ':' in factor else factor
423
+ factor_type = factor.split(":")[0] if ":" in factor else factor
396
424
  factor_counts[factor_type] = factor_counts.get(factor_type, 0) + 1
397
-
425
+
398
426
  return {
399
- 'total_commits': len(commits),
400
- 'risk_distribution': risk_levels,
401
- 'risk_percentages': {
402
- level: (count / len(commits)) * 100
403
- for level, count in risk_levels.items()
427
+ "total_commits": len(commits),
428
+ "risk_distribution": risk_levels,
429
+ "risk_percentages": {
430
+ level: (count / len(commits)) * 100 for level, count in risk_levels.items()
404
431
  },
405
- 'common_risk_factors': sorted(
406
- factor_counts.items(),
407
- key=lambda x: x[1],
408
- reverse=True
409
- )[:10],
410
- 'high_risk_commits': risk_levels['high'] + risk_levels['critical'],
411
- 'high_risk_percentage': ((risk_levels['high'] + risk_levels['critical']) / len(commits)) * 100
412
- }
432
+ "common_risk_factors": sorted(factor_counts.items(), key=lambda x: x[1], reverse=True)[
433
+ :10
434
+ ],
435
+ "high_risk_commits": risk_levels["high"] + risk_levels["critical"],
436
+ "high_risk_percentage": ((risk_levels["high"] + risk_levels["critical"]) / len(commits))
437
+ * 100,
438
+ }
@@ -1,13 +1,13 @@
1
1
  """Core processing components for qualitative analysis."""
2
2
 
3
- from .processor import QualitativeProcessor
4
- from .nlp_engine import NLPEngine
5
3
  from .llm_fallback import LLMFallback
4
+ from .nlp_engine import NLPEngine
6
5
  from .pattern_cache import PatternCache
6
+ from .processor import QualitativeProcessor
7
7
 
8
8
  __all__ = [
9
9
  "QualitativeProcessor",
10
- "NLPEngine",
10
+ "NLPEngine",
11
11
  "LLMFallback",
12
12
  "PatternCache",
13
- ]
13
+ ]