gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/_version.py +1 -1
- gitflow_analytics/classification/__init__.py +31 -0
- gitflow_analytics/classification/batch_classifier.py +752 -0
- gitflow_analytics/classification/classifier.py +464 -0
- gitflow_analytics/classification/feature_extractor.py +725 -0
- gitflow_analytics/classification/linguist_analyzer.py +574 -0
- gitflow_analytics/classification/model.py +455 -0
- gitflow_analytics/cli.py +4108 -350
- gitflow_analytics/cli_rich.py +198 -48
- gitflow_analytics/config/__init__.py +43 -0
- gitflow_analytics/config/errors.py +261 -0
- gitflow_analytics/config/loader.py +904 -0
- gitflow_analytics/config/profiles.py +264 -0
- gitflow_analytics/config/repository.py +124 -0
- gitflow_analytics/config/schema.py +441 -0
- gitflow_analytics/config/validator.py +154 -0
- gitflow_analytics/config.py +44 -508
- gitflow_analytics/core/analyzer.py +1209 -98
- gitflow_analytics/core/cache.py +1337 -29
- gitflow_analytics/core/data_fetcher.py +1193 -0
- gitflow_analytics/core/identity.py +363 -14
- gitflow_analytics/core/metrics_storage.py +526 -0
- gitflow_analytics/core/progress.py +372 -0
- gitflow_analytics/core/schema_version.py +269 -0
- gitflow_analytics/extractors/ml_tickets.py +1100 -0
- gitflow_analytics/extractors/story_points.py +8 -1
- gitflow_analytics/extractors/tickets.py +749 -11
- gitflow_analytics/identity_llm/__init__.py +6 -0
- gitflow_analytics/identity_llm/analysis_pass.py +231 -0
- gitflow_analytics/identity_llm/analyzer.py +464 -0
- gitflow_analytics/identity_llm/models.py +76 -0
- gitflow_analytics/integrations/github_integration.py +175 -11
- gitflow_analytics/integrations/jira_integration.py +461 -24
- gitflow_analytics/integrations/orchestrator.py +124 -1
- gitflow_analytics/metrics/activity_scoring.py +322 -0
- gitflow_analytics/metrics/branch_health.py +470 -0
- gitflow_analytics/metrics/dora.py +379 -20
- gitflow_analytics/models/database.py +843 -53
- gitflow_analytics/pm_framework/__init__.py +115 -0
- gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
- gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
- gitflow_analytics/pm_framework/base.py +406 -0
- gitflow_analytics/pm_framework/models.py +211 -0
- gitflow_analytics/pm_framework/orchestrator.py +652 -0
- gitflow_analytics/pm_framework/registry.py +333 -0
- gitflow_analytics/qualitative/__init__.py +9 -10
- gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
- gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
- gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
- gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
- gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
- gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
- gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
- gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
- gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
- gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
- gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
- gitflow_analytics/qualitative/core/__init__.py +4 -4
- gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
- gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
- gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
- gitflow_analytics/qualitative/core/processor.py +381 -248
- gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
- gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
- gitflow_analytics/qualitative/models/__init__.py +7 -7
- gitflow_analytics/qualitative/models/schemas.py +155 -121
- gitflow_analytics/qualitative/utils/__init__.py +4 -4
- gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
- gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
- gitflow_analytics/qualitative/utils/metrics.py +172 -158
- gitflow_analytics/qualitative/utils/text_processing.py +146 -104
- gitflow_analytics/reports/__init__.py +100 -0
- gitflow_analytics/reports/analytics_writer.py +539 -14
- gitflow_analytics/reports/base.py +648 -0
- gitflow_analytics/reports/branch_health_writer.py +322 -0
- gitflow_analytics/reports/classification_writer.py +924 -0
- gitflow_analytics/reports/cli_integration.py +427 -0
- gitflow_analytics/reports/csv_writer.py +1676 -212
- gitflow_analytics/reports/data_models.py +504 -0
- gitflow_analytics/reports/database_report_generator.py +427 -0
- gitflow_analytics/reports/example_usage.py +344 -0
- gitflow_analytics/reports/factory.py +499 -0
- gitflow_analytics/reports/formatters.py +698 -0
- gitflow_analytics/reports/html_generator.py +1116 -0
- gitflow_analytics/reports/interfaces.py +489 -0
- gitflow_analytics/reports/json_exporter.py +2770 -0
- gitflow_analytics/reports/narrative_writer.py +2287 -158
- gitflow_analytics/reports/story_point_correlation.py +1144 -0
- gitflow_analytics/reports/weekly_trends_writer.py +389 -0
- gitflow_analytics/training/__init__.py +5 -0
- gitflow_analytics/training/model_loader.py +377 -0
- gitflow_analytics/training/pipeline.py +550 -0
- gitflow_analytics/tui/__init__.py +1 -1
- gitflow_analytics/tui/app.py +129 -126
- gitflow_analytics/tui/screens/__init__.py +3 -3
- gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
- gitflow_analytics/tui/screens/configuration_screen.py +154 -178
- gitflow_analytics/tui/screens/loading_screen.py +100 -110
- gitflow_analytics/tui/screens/main_screen.py +89 -72
- gitflow_analytics/tui/screens/results_screen.py +305 -281
- gitflow_analytics/tui/widgets/__init__.py +2 -2
- gitflow_analytics/tui/widgets/data_table.py +67 -69
- gitflow_analytics/tui/widgets/export_modal.py +76 -76
- gitflow_analytics/tui/widgets/progress_widget.py +41 -46
- gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
- gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
- gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
- gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
|
@@ -1,95 +1,120 @@
|
|
|
1
1
|
"""Risk analyzer for assessing commit risk levels."""
|
|
2
2
|
|
|
3
|
+
import importlib.util
|
|
3
4
|
import logging
|
|
4
5
|
import re
|
|
5
|
-
from typing import
|
|
6
|
-
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
7
|
|
|
8
8
|
from ..models.schemas import RiskConfig
|
|
9
9
|
|
|
10
|
-
|
|
11
|
-
|
|
10
|
+
# Check if spacy is available without importing it
|
|
11
|
+
SPACY_AVAILABLE = importlib.util.find_spec("spacy") is not None
|
|
12
|
+
|
|
13
|
+
if SPACY_AVAILABLE:
|
|
12
14
|
from spacy.tokens import Doc
|
|
13
|
-
|
|
14
|
-
except ImportError:
|
|
15
|
-
SPACY_AVAILABLE = False
|
|
15
|
+
else:
|
|
16
16
|
Doc = Any
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
class RiskAnalyzer:
|
|
20
20
|
"""Analyze commits to assess risk level and identify risk factors.
|
|
21
|
-
|
|
21
|
+
|
|
22
22
|
This analyzer evaluates multiple dimensions of risk:
|
|
23
23
|
- Content risk: Security-sensitive keywords, critical system changes
|
|
24
24
|
- Size risk: Large commits affecting many files/lines
|
|
25
25
|
- Context risk: Production deployments, emergency fixes
|
|
26
26
|
- Pattern risk: File patterns indicating high-risk areas
|
|
27
|
-
|
|
27
|
+
|
|
28
28
|
Risk levels: low, medium, high, critical
|
|
29
29
|
"""
|
|
30
|
-
|
|
30
|
+
|
|
31
31
|
def __init__(self, config: RiskConfig):
|
|
32
32
|
"""Initialize risk analyzer.
|
|
33
|
-
|
|
33
|
+
|
|
34
34
|
Args:
|
|
35
35
|
config: Configuration for risk analysis
|
|
36
36
|
"""
|
|
37
37
|
self.config = config
|
|
38
38
|
self.logger = logging.getLogger(__name__)
|
|
39
|
-
|
|
39
|
+
|
|
40
40
|
# Compile file risk patterns for efficiency
|
|
41
41
|
self._compile_file_patterns()
|
|
42
|
-
|
|
42
|
+
|
|
43
43
|
# Additional risk patterns not in config
|
|
44
44
|
self.critical_keywords = {
|
|
45
|
-
|
|
46
|
-
|
|
45
|
+
"password",
|
|
46
|
+
"secret",
|
|
47
|
+
"key",
|
|
48
|
+
"token",
|
|
49
|
+
"credential",
|
|
50
|
+
"auth",
|
|
51
|
+
"admin",
|
|
52
|
+
"root",
|
|
53
|
+
"sudo",
|
|
54
|
+
"permission",
|
|
55
|
+
"access",
|
|
56
|
+
"security",
|
|
47
57
|
}
|
|
48
|
-
|
|
58
|
+
|
|
49
59
|
self.production_keywords = {
|
|
50
|
-
|
|
51
|
-
|
|
60
|
+
"production",
|
|
61
|
+
"prod",
|
|
62
|
+
"live",
|
|
63
|
+
"release",
|
|
64
|
+
"deploy",
|
|
65
|
+
"deployment",
|
|
66
|
+
"critical",
|
|
67
|
+
"urgent",
|
|
68
|
+
"emergency",
|
|
69
|
+
"hotfix",
|
|
70
|
+
"immediate",
|
|
52
71
|
}
|
|
53
|
-
|
|
72
|
+
|
|
54
73
|
self.database_keywords = {
|
|
55
|
-
|
|
56
|
-
|
|
74
|
+
"database",
|
|
75
|
+
"db",
|
|
76
|
+
"migration",
|
|
77
|
+
"schema",
|
|
78
|
+
"table",
|
|
79
|
+
"column",
|
|
80
|
+
"index",
|
|
81
|
+
"constraint",
|
|
82
|
+
"trigger",
|
|
83
|
+
"procedure",
|
|
57
84
|
}
|
|
58
|
-
|
|
85
|
+
|
|
59
86
|
# File extension risk mapping
|
|
60
87
|
self.extension_risk = {
|
|
61
88
|
# High risk extensions
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
89
|
+
".sql": "high",
|
|
90
|
+
".py": "medium", # Could be config or critical logic
|
|
91
|
+
".js": "medium",
|
|
92
|
+
".php": "medium",
|
|
93
|
+
".java": "medium",
|
|
94
|
+
".cs": "medium",
|
|
95
|
+
".go": "medium",
|
|
96
|
+
".rb": "medium",
|
|
71
97
|
# Configuration files
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
98
|
+
".yml": "medium",
|
|
99
|
+
".yaml": "medium",
|
|
100
|
+
".json": "medium",
|
|
101
|
+
".toml": "medium",
|
|
102
|
+
".ini": "medium",
|
|
103
|
+
".conf": "medium",
|
|
104
|
+
".config": "medium",
|
|
80
105
|
# Low risk extensions
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
106
|
+
".md": "low",
|
|
107
|
+
".txt": "low",
|
|
108
|
+
".rst": "low",
|
|
109
|
+
".css": "low",
|
|
110
|
+
".scss": "low",
|
|
111
|
+
".less": "low",
|
|
87
112
|
}
|
|
88
|
-
|
|
113
|
+
|
|
89
114
|
def _compile_file_patterns(self) -> None:
|
|
90
115
|
"""Compile file risk patterns for efficient matching."""
|
|
91
116
|
self.compiled_file_patterns = {}
|
|
92
|
-
|
|
117
|
+
|
|
93
118
|
for pattern, risk_level in self.config.file_risk_patterns.items():
|
|
94
119
|
try:
|
|
95
120
|
# Convert glob pattern to regex
|
|
@@ -97,316 +122,317 @@ class RiskAnalyzer:
|
|
|
97
122
|
self.compiled_file_patterns[re.compile(regex_pattern, re.IGNORECASE)] = risk_level
|
|
98
123
|
except re.error as e:
|
|
99
124
|
self.logger.warning(f"Invalid risk pattern '{pattern}': {e}")
|
|
100
|
-
|
|
125
|
+
|
|
101
126
|
def _glob_to_regex(self, pattern: str) -> str:
|
|
102
127
|
"""Convert glob pattern to regex."""
|
|
103
|
-
pattern = pattern.replace(
|
|
104
|
-
pattern = pattern.replace(
|
|
105
|
-
pattern = pattern.replace(
|
|
106
|
-
pattern = f
|
|
128
|
+
pattern = pattern.replace(".", r"\.")
|
|
129
|
+
pattern = pattern.replace("*", ".*")
|
|
130
|
+
pattern = pattern.replace("?", ".")
|
|
131
|
+
pattern = f"^{pattern}$"
|
|
107
132
|
return pattern
|
|
108
|
-
|
|
109
|
-
def assess(self, commit:
|
|
133
|
+
|
|
134
|
+
def assess(self, commit: dict[str, Any], doc: Doc) -> dict[str, Any]:
|
|
110
135
|
"""Assess risk level and identify risk factors for a commit.
|
|
111
|
-
|
|
136
|
+
|
|
112
137
|
Args:
|
|
113
138
|
commit: Commit dictionary with message, files, stats, etc.
|
|
114
139
|
doc: spaCy processed document (may be None)
|
|
115
|
-
|
|
116
|
-
Returns:
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
117
142
|
Dictionary with 'level' and 'factors' keys
|
|
118
143
|
"""
|
|
119
144
|
risk_factors = []
|
|
120
145
|
risk_scores = []
|
|
121
|
-
|
|
146
|
+
|
|
122
147
|
# Analyze message content for risk keywords
|
|
123
|
-
message_risk = self._analyze_message_risk(commit.get(
|
|
124
|
-
risk_factors.extend(message_risk[
|
|
125
|
-
risk_scores.append(message_risk[
|
|
126
|
-
|
|
148
|
+
message_risk = self._analyze_message_risk(commit.get("message", ""), doc)
|
|
149
|
+
risk_factors.extend(message_risk["factors"])
|
|
150
|
+
risk_scores.append(message_risk["score"])
|
|
151
|
+
|
|
127
152
|
# Analyze file patterns for risk
|
|
128
|
-
file_risk = self._analyze_file_risk(commit.get(
|
|
129
|
-
risk_factors.extend(file_risk[
|
|
130
|
-
risk_scores.append(file_risk[
|
|
131
|
-
|
|
153
|
+
file_risk = self._analyze_file_risk(commit.get("files_changed", []))
|
|
154
|
+
risk_factors.extend(file_risk["factors"])
|
|
155
|
+
risk_scores.append(file_risk["score"])
|
|
156
|
+
|
|
132
157
|
# Analyze commit size for risk
|
|
133
158
|
size_risk = self._analyze_size_risk(commit)
|
|
134
|
-
risk_factors.extend(size_risk[
|
|
135
|
-
risk_scores.append(size_risk[
|
|
136
|
-
|
|
159
|
+
risk_factors.extend(size_risk["factors"])
|
|
160
|
+
risk_scores.append(size_risk["score"])
|
|
161
|
+
|
|
137
162
|
# Analyze timing and context
|
|
138
163
|
context_risk = self._analyze_context_risk(commit)
|
|
139
|
-
risk_factors.extend(context_risk[
|
|
140
|
-
risk_scores.append(context_risk[
|
|
141
|
-
|
|
164
|
+
risk_factors.extend(context_risk["factors"])
|
|
165
|
+
risk_scores.append(context_risk["score"])
|
|
166
|
+
|
|
142
167
|
# Calculate overall risk level
|
|
143
168
|
max_risk_score = max(risk_scores) if risk_scores else 0.0
|
|
144
169
|
risk_level = self._score_to_level(max_risk_score)
|
|
145
|
-
|
|
170
|
+
|
|
146
171
|
return {
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
}
|
|
172
|
+
"level": risk_level,
|
|
173
|
+
"factors": list(set(risk_factors)), # Remove duplicates
|
|
174
|
+
"score": max_risk_score,
|
|
175
|
+
"breakdown": {
|
|
176
|
+
"message_risk": message_risk["score"],
|
|
177
|
+
"file_risk": file_risk["score"],
|
|
178
|
+
"size_risk": size_risk["score"],
|
|
179
|
+
"context_risk": context_risk["score"],
|
|
180
|
+
},
|
|
156
181
|
}
|
|
157
|
-
|
|
158
|
-
def _analyze_message_risk(self, message: str, doc: Doc) ->
|
|
182
|
+
|
|
183
|
+
def _analyze_message_risk(self, message: str, doc: Doc) -> dict[str, Any]:
|
|
159
184
|
"""Analyze commit message for risk indicators.
|
|
160
|
-
|
|
185
|
+
|
|
161
186
|
Args:
|
|
162
187
|
message: Commit message
|
|
163
188
|
doc: spaCy processed document
|
|
164
|
-
|
|
189
|
+
|
|
165
190
|
Returns:
|
|
166
191
|
Dictionary with score and factors
|
|
167
192
|
"""
|
|
168
193
|
if not message:
|
|
169
|
-
return {
|
|
170
|
-
|
|
194
|
+
return {"score": 0.0, "factors": []}
|
|
195
|
+
|
|
171
196
|
message_lower = message.lower()
|
|
172
197
|
factors = []
|
|
173
198
|
risk_score = 0.0
|
|
174
|
-
|
|
199
|
+
|
|
175
200
|
# Check for high-risk patterns
|
|
176
201
|
for pattern in self.config.high_risk_patterns:
|
|
177
202
|
if pattern.lower() in message_lower:
|
|
178
203
|
factors.append(f"high_risk_keyword:{pattern}")
|
|
179
204
|
risk_score = max(risk_score, 0.8) # High risk
|
|
180
|
-
|
|
205
|
+
|
|
181
206
|
# Check for medium-risk patterns
|
|
182
207
|
for pattern in self.config.medium_risk_patterns:
|
|
183
208
|
if pattern.lower() in message_lower:
|
|
184
209
|
factors.append(f"medium_risk_keyword:{pattern}")
|
|
185
210
|
risk_score = max(risk_score, 0.5) # Medium risk
|
|
186
|
-
|
|
211
|
+
|
|
187
212
|
# Check for critical security keywords
|
|
188
213
|
for keyword in self.critical_keywords:
|
|
189
214
|
if keyword in message_lower:
|
|
190
215
|
factors.append(f"security_keyword:{keyword}")
|
|
191
216
|
risk_score = max(risk_score, 0.9) # Critical risk
|
|
192
|
-
|
|
217
|
+
|
|
193
218
|
# Check for production-related keywords
|
|
194
219
|
for keyword in self.production_keywords:
|
|
195
220
|
if keyword in message_lower:
|
|
196
221
|
factors.append(f"production_keyword:{keyword}")
|
|
197
222
|
risk_score = max(risk_score, 0.7) # High risk
|
|
198
|
-
|
|
223
|
+
|
|
199
224
|
# Check for database-related keywords
|
|
200
225
|
for keyword in self.database_keywords:
|
|
201
226
|
if keyword in message_lower:
|
|
202
227
|
factors.append(f"database_keyword:{keyword}")
|
|
203
228
|
risk_score = max(risk_score, 0.6) # Medium-high risk
|
|
204
|
-
|
|
229
|
+
|
|
205
230
|
# Check for urgency indicators
|
|
206
231
|
urgency_patterns = [
|
|
207
|
-
r
|
|
208
|
-
r
|
|
209
|
-
r
|
|
232
|
+
r"\b(urgent|critical|emergency|asap|immediate)\b",
|
|
233
|
+
r"\b(hotfix|quickfix|patch)\b",
|
|
234
|
+
r"\b(breaking|major)\b",
|
|
210
235
|
]
|
|
211
|
-
|
|
236
|
+
|
|
212
237
|
for pattern in urgency_patterns:
|
|
213
238
|
if re.search(pattern, message_lower):
|
|
214
239
|
factors.append(f"urgency_indicator:{pattern}")
|
|
215
240
|
risk_score = max(risk_score, 0.6)
|
|
216
|
-
|
|
217
|
-
return {
|
|
218
|
-
|
|
219
|
-
def _analyze_file_risk(self, files:
|
|
241
|
+
|
|
242
|
+
return {"score": risk_score, "factors": factors}
|
|
243
|
+
|
|
244
|
+
def _analyze_file_risk(self, files: list[str]) -> dict[str, Any]:
|
|
220
245
|
"""Analyze changed files for risk indicators.
|
|
221
|
-
|
|
246
|
+
|
|
222
247
|
Args:
|
|
223
248
|
files: List of file paths
|
|
224
|
-
|
|
249
|
+
|
|
225
250
|
Returns:
|
|
226
251
|
Dictionary with score and factors
|
|
227
252
|
"""
|
|
228
253
|
if not files:
|
|
229
|
-
return {
|
|
230
|
-
|
|
254
|
+
return {"score": 0.0, "factors": []}
|
|
255
|
+
|
|
231
256
|
factors = []
|
|
232
257
|
risk_score = 0.0
|
|
233
|
-
|
|
258
|
+
|
|
234
259
|
for file_path in files:
|
|
235
260
|
file_lower = file_path.lower()
|
|
236
|
-
|
|
261
|
+
|
|
237
262
|
# Check compiled file risk patterns
|
|
238
263
|
for pattern, risk_level in self.compiled_file_patterns.items():
|
|
239
264
|
if pattern.search(file_path):
|
|
240
265
|
factors.append(f"file_pattern:{risk_level}:{file_path}")
|
|
241
|
-
if risk_level ==
|
|
266
|
+
if risk_level == "critical":
|
|
242
267
|
risk_score = max(risk_score, 1.0)
|
|
243
|
-
elif risk_level ==
|
|
268
|
+
elif risk_level == "high":
|
|
244
269
|
risk_score = max(risk_score, 0.8)
|
|
245
|
-
elif risk_level ==
|
|
270
|
+
elif risk_level == "medium":
|
|
246
271
|
risk_score = max(risk_score, 0.5)
|
|
247
|
-
|
|
272
|
+
|
|
248
273
|
# Check file extensions
|
|
249
|
-
if
|
|
250
|
-
ext =
|
|
274
|
+
if "." in file_path:
|
|
275
|
+
ext = "." + file_path.split(".")[-1].lower()
|
|
251
276
|
if ext in self.extension_risk:
|
|
252
277
|
ext_risk = self.extension_risk[ext]
|
|
253
278
|
factors.append(f"file_extension:{ext_risk}:{ext}")
|
|
254
|
-
if ext_risk ==
|
|
279
|
+
if ext_risk == "high":
|
|
255
280
|
risk_score = max(risk_score, 0.7)
|
|
256
|
-
elif ext_risk ==
|
|
281
|
+
elif ext_risk == "medium":
|
|
257
282
|
risk_score = max(risk_score, 0.4)
|
|
258
|
-
|
|
283
|
+
|
|
259
284
|
# Check for sensitive file names
|
|
260
285
|
sensitive_patterns = [
|
|
261
|
-
r
|
|
262
|
-
r
|
|
286
|
+
r".*password.*",
|
|
287
|
+
r".*secret.*",
|
|
288
|
+
r".*key.*",
|
|
289
|
+
r".*token.*",
|
|
290
|
+
r".*config.*",
|
|
291
|
+
r".*env.*",
|
|
292
|
+
r".*credential.*",
|
|
263
293
|
]
|
|
264
|
-
|
|
294
|
+
|
|
265
295
|
for pattern in sensitive_patterns:
|
|
266
296
|
if re.search(pattern, file_lower):
|
|
267
297
|
factors.append(f"sensitive_filename:{file_path}")
|
|
268
298
|
risk_score = max(risk_score, 0.8)
|
|
269
299
|
break
|
|
270
|
-
|
|
271
|
-
return {
|
|
272
|
-
|
|
273
|
-
def _analyze_size_risk(self, commit:
|
|
300
|
+
|
|
301
|
+
return {"score": risk_score, "factors": factors}
|
|
302
|
+
|
|
303
|
+
def _analyze_size_risk(self, commit: dict[str, Any]) -> dict[str, Any]:
|
|
274
304
|
"""Analyze commit size for risk indicators.
|
|
275
|
-
|
|
305
|
+
|
|
276
306
|
Args:
|
|
277
307
|
commit: Commit dictionary
|
|
278
|
-
|
|
308
|
+
|
|
279
309
|
Returns:
|
|
280
310
|
Dictionary with score and factors
|
|
281
311
|
"""
|
|
282
312
|
factors = []
|
|
283
313
|
risk_score = 0.0
|
|
284
|
-
|
|
285
|
-
files_changed = len(commit.get(
|
|
286
|
-
insertions = commit.get(
|
|
287
|
-
deletions = commit.get(
|
|
314
|
+
|
|
315
|
+
files_changed = len(commit.get("files_changed", []))
|
|
316
|
+
insertions = commit.get("insertions", 0)
|
|
317
|
+
deletions = commit.get("deletions", 0)
|
|
288
318
|
total_changes = insertions + deletions
|
|
289
|
-
|
|
319
|
+
|
|
290
320
|
# Check file count thresholds
|
|
291
|
-
if files_changed >= self.config.size_thresholds[
|
|
321
|
+
if files_changed >= self.config.size_thresholds["large_commit_files"]:
|
|
292
322
|
factors.append(f"large_file_count:{files_changed}")
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
risk_score = max(risk_score, 0.6)
|
|
297
|
-
|
|
323
|
+
# Very large commits get higher risk score
|
|
324
|
+
risk_score = max(risk_score, 0.8) if files_changed >= 50 else max(risk_score, 0.6)
|
|
325
|
+
|
|
298
326
|
# Check line change thresholds
|
|
299
|
-
if total_changes >= self.config.size_thresholds[
|
|
327
|
+
if total_changes >= self.config.size_thresholds["massive_commit_lines"]:
|
|
300
328
|
factors.append(f"massive_changes:{total_changes}")
|
|
301
329
|
risk_score = max(risk_score, 0.9)
|
|
302
|
-
elif total_changes >= self.config.size_thresholds[
|
|
330
|
+
elif total_changes >= self.config.size_thresholds["large_commit_lines"]:
|
|
303
331
|
factors.append(f"large_changes:{total_changes}")
|
|
304
332
|
risk_score = max(risk_score, 0.6)
|
|
305
|
-
|
|
333
|
+
|
|
306
334
|
# Check deletion ratio (high deletion ratio can be risky)
|
|
307
335
|
if total_changes > 0:
|
|
308
336
|
deletion_ratio = deletions / total_changes
|
|
309
337
|
if deletion_ratio > 0.7: # More than 70% deletions
|
|
310
338
|
factors.append(f"high_deletion_ratio:{deletion_ratio:.2f}")
|
|
311
339
|
risk_score = max(risk_score, 0.5)
|
|
312
|
-
|
|
313
|
-
return {
|
|
314
|
-
|
|
315
|
-
def _analyze_context_risk(self, commit:
|
|
340
|
+
|
|
341
|
+
return {"score": risk_score, "factors": factors}
|
|
342
|
+
|
|
343
|
+
def _analyze_context_risk(self, commit: dict[str, Any]) -> dict[str, Any]:
|
|
316
344
|
"""Analyze commit context for risk indicators.
|
|
317
|
-
|
|
345
|
+
|
|
318
346
|
Args:
|
|
319
347
|
commit: Commit dictionary
|
|
320
|
-
|
|
348
|
+
|
|
321
349
|
Returns:
|
|
322
350
|
Dictionary with score and factors
|
|
323
351
|
"""
|
|
324
352
|
factors = []
|
|
325
353
|
risk_score = 0.0
|
|
326
|
-
|
|
354
|
+
|
|
327
355
|
# Check branch context if available
|
|
328
|
-
branch = commit.get(
|
|
356
|
+
branch = commit.get("branch", "").lower()
|
|
329
357
|
if branch:
|
|
330
|
-
if any(term in branch for term in [
|
|
358
|
+
if any(term in branch for term in ["main", "master", "prod", "production"]):
|
|
331
359
|
factors.append(f"main_branch:{branch}")
|
|
332
360
|
risk_score = max(risk_score, 0.6)
|
|
333
|
-
elif
|
|
361
|
+
elif "hotfix" in branch:
|
|
334
362
|
factors.append(f"hotfix_branch:{branch}")
|
|
335
363
|
risk_score = max(risk_score, 0.8)
|
|
336
|
-
|
|
364
|
+
|
|
337
365
|
# Check commit timing (if timestamp available)
|
|
338
366
|
# Weekend/night commits might be higher risk
|
|
339
|
-
timestamp = commit.get(
|
|
367
|
+
timestamp = commit.get("timestamp")
|
|
340
368
|
if timestamp:
|
|
341
369
|
# This would require datetime analysis
|
|
342
370
|
# For now, skip this check
|
|
343
371
|
pass
|
|
344
|
-
|
|
372
|
+
|
|
345
373
|
# Check for merge commits
|
|
346
|
-
if commit.get(
|
|
374
|
+
if commit.get("is_merge", False):
|
|
347
375
|
factors.append("merge_commit")
|
|
348
376
|
# Merges can be risky depending on what's being merged
|
|
349
377
|
risk_score = max(risk_score, 0.3)
|
|
350
|
-
|
|
351
|
-
return {
|
|
352
|
-
|
|
378
|
+
|
|
379
|
+
return {"score": risk_score, "factors": factors}
|
|
380
|
+
|
|
353
381
|
def _score_to_level(self, score: float) -> str:
|
|
354
382
|
"""Convert risk score to risk level.
|
|
355
|
-
|
|
383
|
+
|
|
356
384
|
Args:
|
|
357
385
|
score: Risk score (0.0 to 1.0)
|
|
358
|
-
|
|
386
|
+
|
|
359
387
|
Returns:
|
|
360
388
|
Risk level string
|
|
361
389
|
"""
|
|
362
390
|
if score >= 0.9:
|
|
363
|
-
return
|
|
391
|
+
return "critical"
|
|
364
392
|
elif score >= 0.7:
|
|
365
|
-
return
|
|
393
|
+
return "high"
|
|
366
394
|
elif score >= 0.4:
|
|
367
|
-
return
|
|
395
|
+
return "medium"
|
|
368
396
|
else:
|
|
369
|
-
return
|
|
370
|
-
|
|
371
|
-
def get_risk_statistics(self, commits:
|
|
397
|
+
return "low"
|
|
398
|
+
|
|
399
|
+
def get_risk_statistics(self, commits: list[dict[str, Any]]) -> dict[str, Any]:
|
|
372
400
|
"""Get risk analysis statistics for a set of commits.
|
|
373
|
-
|
|
401
|
+
|
|
374
402
|
Args:
|
|
375
403
|
commits: List of commit dictionaries
|
|
376
|
-
|
|
404
|
+
|
|
377
405
|
Returns:
|
|
378
406
|
Dictionary with risk statistics
|
|
379
407
|
"""
|
|
380
408
|
if not commits:
|
|
381
|
-
return {
|
|
382
|
-
|
|
383
|
-
risk_levels = {
|
|
409
|
+
return {"total_commits": 0}
|
|
410
|
+
|
|
411
|
+
risk_levels = {"low": 0, "medium": 0, "high": 0, "critical": 0}
|
|
384
412
|
all_factors = []
|
|
385
|
-
|
|
413
|
+
|
|
386
414
|
for commit in commits:
|
|
387
415
|
# Quick risk assessment without full doc processing
|
|
388
416
|
risk_result = self.assess(commit, None)
|
|
389
|
-
risk_levels[risk_result[
|
|
390
|
-
all_factors.extend(risk_result[
|
|
391
|
-
|
|
417
|
+
risk_levels[risk_result["level"]] += 1
|
|
418
|
+
all_factors.extend(risk_result["factors"])
|
|
419
|
+
|
|
392
420
|
# Count factor frequencies
|
|
393
421
|
factor_counts = {}
|
|
394
422
|
for factor in all_factors:
|
|
395
|
-
factor_type = factor.split(
|
|
423
|
+
factor_type = factor.split(":")[0] if ":" in factor else factor
|
|
396
424
|
factor_counts[factor_type] = factor_counts.get(factor_type, 0) + 1
|
|
397
|
-
|
|
425
|
+
|
|
398
426
|
return {
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
level: (count / len(commits)) * 100
|
|
403
|
-
for level, count in risk_levels.items()
|
|
427
|
+
"total_commits": len(commits),
|
|
428
|
+
"risk_distribution": risk_levels,
|
|
429
|
+
"risk_percentages": {
|
|
430
|
+
level: (count / len(commits)) * 100 for level, count in risk_levels.items()
|
|
404
431
|
},
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
}
|
|
432
|
+
"common_risk_factors": sorted(factor_counts.items(), key=lambda x: x[1], reverse=True)[
|
|
433
|
+
:10
|
|
434
|
+
],
|
|
435
|
+
"high_risk_commits": risk_levels["high"] + risk_levels["critical"],
|
|
436
|
+
"high_risk_percentage": ((risk_levels["high"] + risk_levels["critical"]) / len(commits))
|
|
437
|
+
* 100,
|
|
438
|
+
}
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
"""Core processing components for qualitative analysis."""
|
|
2
2
|
|
|
3
|
-
from .processor import QualitativeProcessor
|
|
4
|
-
from .nlp_engine import NLPEngine
|
|
5
3
|
from .llm_fallback import LLMFallback
|
|
4
|
+
from .nlp_engine import NLPEngine
|
|
6
5
|
from .pattern_cache import PatternCache
|
|
6
|
+
from .processor import QualitativeProcessor
|
|
7
7
|
|
|
8
8
|
__all__ = [
|
|
9
9
|
"QualitativeProcessor",
|
|
10
|
-
"NLPEngine",
|
|
10
|
+
"NLPEngine",
|
|
11
11
|
"LLMFallback",
|
|
12
12
|
"PatternCache",
|
|
13
|
-
]
|
|
13
|
+
]
|