gitflow-analytics 1.3.6__py3-none-any.whl → 3.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/_version.py +1 -1
- gitflow_analytics/classification/batch_classifier.py +156 -4
- gitflow_analytics/cli.py +897 -179
- gitflow_analytics/config/loader.py +40 -1
- gitflow_analytics/config/schema.py +4 -0
- gitflow_analytics/core/cache.py +20 -0
- gitflow_analytics/core/data_fetcher.py +1254 -228
- gitflow_analytics/core/git_auth.py +169 -0
- gitflow_analytics/core/git_timeout_wrapper.py +347 -0
- gitflow_analytics/core/metrics_storage.py +12 -3
- gitflow_analytics/core/progress.py +219 -18
- gitflow_analytics/core/subprocess_git.py +145 -0
- gitflow_analytics/extractors/ml_tickets.py +3 -2
- gitflow_analytics/extractors/tickets.py +93 -8
- gitflow_analytics/integrations/jira_integration.py +1 -1
- gitflow_analytics/integrations/orchestrator.py +47 -29
- gitflow_analytics/metrics/branch_health.py +3 -2
- gitflow_analytics/models/database.py +72 -1
- gitflow_analytics/pm_framework/adapters/jira_adapter.py +12 -5
- gitflow_analytics/pm_framework/orchestrator.py +8 -3
- gitflow_analytics/qualitative/classifiers/llm/openai_client.py +24 -4
- gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +3 -1
- gitflow_analytics/qualitative/core/llm_fallback.py +34 -2
- gitflow_analytics/reports/narrative_writer.py +118 -74
- gitflow_analytics/security/__init__.py +11 -0
- gitflow_analytics/security/config.py +189 -0
- gitflow_analytics/security/extractors/__init__.py +7 -0
- gitflow_analytics/security/extractors/dependency_checker.py +379 -0
- gitflow_analytics/security/extractors/secret_detector.py +197 -0
- gitflow_analytics/security/extractors/vulnerability_scanner.py +333 -0
- gitflow_analytics/security/llm_analyzer.py +347 -0
- gitflow_analytics/security/reports/__init__.py +5 -0
- gitflow_analytics/security/reports/security_report.py +358 -0
- gitflow_analytics/security/security_analyzer.py +414 -0
- gitflow_analytics/tui/app.py +3 -1
- gitflow_analytics/tui/progress_adapter.py +313 -0
- gitflow_analytics/tui/screens/analysis_progress_screen.py +407 -46
- gitflow_analytics/tui/screens/results_screen.py +219 -206
- gitflow_analytics/ui/__init__.py +21 -0
- gitflow_analytics/ui/progress_display.py +1477 -0
- gitflow_analytics/verify_activity.py +697 -0
- {gitflow_analytics-1.3.6.dist-info → gitflow_analytics-3.3.0.dist-info}/METADATA +2 -1
- {gitflow_analytics-1.3.6.dist-info → gitflow_analytics-3.3.0.dist-info}/RECORD +47 -31
- gitflow_analytics/cli_rich.py +0 -503
- {gitflow_analytics-1.3.6.dist-info → gitflow_analytics-3.3.0.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.3.6.dist-info → gitflow_analytics-3.3.0.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.3.6.dist-info → gitflow_analytics-3.3.0.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.3.6.dist-info → gitflow_analytics-3.3.0.dist-info}/top_level.txt +0 -0
|
@@ -113,6 +113,11 @@ class LLMFallback:
|
|
|
113
113
|
# Batch processing cache
|
|
114
114
|
self.batch_cache = {}
|
|
115
115
|
|
|
116
|
+
# Circuit breaker state
|
|
117
|
+
self.consecutive_failures = 0
|
|
118
|
+
self.circuit_breaker_open = False
|
|
119
|
+
self.max_consecutive_failures = 3 # Open circuit after 3 failures
|
|
120
|
+
|
|
116
121
|
# Token encoder for cost estimation
|
|
117
122
|
try:
|
|
118
123
|
self.encoding = tiktoken.get_encoding("cl100k_base") # GPT-4 encoding
|
|
@@ -142,6 +147,7 @@ class LLMFallback:
|
|
|
142
147
|
return openai.OpenAI(
|
|
143
148
|
base_url=self.config.base_url,
|
|
144
149
|
api_key=api_key,
|
|
150
|
+
timeout=30.0, # 30 second timeout to prevent hanging
|
|
145
151
|
default_headers={
|
|
146
152
|
"HTTP-Referer": "https://github.com/bobmatnyc/gitflow-analytics",
|
|
147
153
|
"X-Title": "GitFlow Analytics - Qualitative Analysis",
|
|
@@ -213,6 +219,14 @@ class LLMFallback:
|
|
|
213
219
|
if not commits:
|
|
214
220
|
return []
|
|
215
221
|
|
|
222
|
+
# Check circuit breaker state
|
|
223
|
+
if self.circuit_breaker_open:
|
|
224
|
+
self.logger.warning(
|
|
225
|
+
f"Circuit breaker open ({self.consecutive_failures} consecutive failures), "
|
|
226
|
+
"using fallback classification"
|
|
227
|
+
)
|
|
228
|
+
return self._create_fallback_results(commits)
|
|
229
|
+
|
|
216
230
|
start_time = time.time()
|
|
217
231
|
|
|
218
232
|
# Check cache first
|
|
@@ -263,6 +277,15 @@ class LLMFallback:
|
|
|
263
277
|
if results:
|
|
264
278
|
self.batch_cache[cache_key] = self._create_template_from_results(results)
|
|
265
279
|
|
|
280
|
+
# Reset circuit breaker on success
|
|
281
|
+
if self.consecutive_failures > 0:
|
|
282
|
+
self.logger.info(
|
|
283
|
+
f"API call succeeded, resetting circuit breaker "
|
|
284
|
+
f"(was {self.consecutive_failures} failures)"
|
|
285
|
+
)
|
|
286
|
+
self.consecutive_failures = 0
|
|
287
|
+
self.circuit_breaker_open = False
|
|
288
|
+
|
|
266
289
|
# Update processing time in results
|
|
267
290
|
for result in results:
|
|
268
291
|
result.processing_time_ms = (processing_time * 1000) / len(results)
|
|
@@ -272,6 +295,15 @@ class LLMFallback:
|
|
|
272
295
|
except Exception as e:
|
|
273
296
|
self.logger.error(f"OpenRouter processing failed: {e}")
|
|
274
297
|
|
|
298
|
+
# Increment failure counter and check circuit breaker
|
|
299
|
+
self.consecutive_failures += 1
|
|
300
|
+
if self.consecutive_failures >= self.max_consecutive_failures:
|
|
301
|
+
self.circuit_breaker_open = True
|
|
302
|
+
self.logger.error(
|
|
303
|
+
f"Circuit breaker opened after {self.consecutive_failures} consecutive failures. "
|
|
304
|
+
"All future LLM calls will use fallback classification until manual reset."
|
|
305
|
+
)
|
|
306
|
+
|
|
275
307
|
# Record failed call
|
|
276
308
|
self.cost_tracker.record_call(
|
|
277
309
|
model=selected_model,
|
|
@@ -283,8 +315,8 @@ class LLMFallback:
|
|
|
283
315
|
error_message=str(e),
|
|
284
316
|
)
|
|
285
317
|
|
|
286
|
-
# Try fallback model if primary failed
|
|
287
|
-
if selected_model != self.config.fallback_model:
|
|
318
|
+
# Try fallback model if primary failed AND circuit breaker not open
|
|
319
|
+
if selected_model != self.config.fallback_model and not self.circuit_breaker_open:
|
|
288
320
|
return self._retry_with_fallback_model(commits, prompt)
|
|
289
321
|
else:
|
|
290
322
|
return self._create_fallback_results(commits)
|
|
@@ -7,6 +7,7 @@ from pathlib import Path
|
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
9
|
from ..metrics.activity_scoring import ActivityScorer
|
|
10
|
+
from ..core.progress import get_progress_service
|
|
10
11
|
|
|
11
12
|
# Get logger for this module
|
|
12
13
|
logger = logging.getLogger(__name__)
|
|
@@ -174,110 +175,153 @@ class NarrativeReportGenerator:
|
|
|
174
175
|
# Store analysis period for use in weekly trends calculation
|
|
175
176
|
self._analysis_start_date = analysis_start_date
|
|
176
177
|
self._analysis_end_date = analysis_end_date
|
|
177
|
-
|
|
178
|
+
|
|
178
179
|
logger.debug(f"DEBUG NARRATIVE: Starting report generation with exclude_authors: {exclude_authors}")
|
|
179
180
|
logger.debug(f"DEBUG NARRATIVE: Analysis period: {analysis_start_date} to {analysis_end_date}")
|
|
180
181
|
logger.debug(f"DEBUG NARRATIVE: Input data sizes - commits: {len(commits)}, developer_stats: {len(developer_stats)}, "
|
|
181
182
|
f"activity_dist: {len(activity_dist)}, focus_data: {len(focus_data)}")
|
|
182
|
-
|
|
183
|
+
|
|
183
184
|
# Sample some developer_stats to see their structure
|
|
184
185
|
if developer_stats:
|
|
185
186
|
for i, dev in enumerate(developer_stats[:3]):
|
|
186
187
|
logger.debug(f"DEBUG NARRATIVE: Sample developer_stats[{i}]: canonical_id='{dev.get('canonical_id', '')}', "
|
|
187
188
|
f"primary_name='{dev.get('primary_name', '')}', name='{dev.get('name', '')}', "
|
|
188
189
|
f"primary_email='{dev.get('primary_email', '')}'")
|
|
189
|
-
|
|
190
|
+
|
|
190
191
|
# Filter out excluded authors in Phase 2 using canonical_id
|
|
191
192
|
if exclude_authors:
|
|
192
193
|
logger.debug(f"DEBUG NARRATIVE: Applying exclusion filter with {len(exclude_authors)} excluded authors")
|
|
193
|
-
|
|
194
|
+
|
|
194
195
|
original_commits = len(commits)
|
|
195
196
|
commits = self._filter_excluded_authors(commits, exclude_authors)
|
|
196
197
|
filtered_commits = original_commits - len(commits)
|
|
197
|
-
|
|
198
|
+
|
|
198
199
|
# Filter other data structures too
|
|
199
200
|
logger.debug(f"DEBUG NARRATIVE: Filtering developer_stats (original: {len(developer_stats)})")
|
|
200
|
-
developer_stats = self._filter_excluded_authors(developer_stats, exclude_authors)
|
|
201
|
+
developer_stats = self._filter_excluded_authors(developer_stats, exclude_authors)
|
|
201
202
|
logger.debug(f"DEBUG NARRATIVE: After filtering developer_stats: {len(developer_stats)}")
|
|
202
|
-
|
|
203
|
+
|
|
203
204
|
activity_dist = self._filter_excluded_authors(activity_dist, exclude_authors)
|
|
204
205
|
focus_data = self._filter_excluded_authors(focus_data, exclude_authors)
|
|
205
|
-
|
|
206
|
+
|
|
206
207
|
if filtered_commits > 0:
|
|
207
208
|
logger.info(f"Filtered out {filtered_commits} commits from {len(exclude_authors)} excluded authors in narrative report")
|
|
208
|
-
|
|
209
|
+
|
|
209
210
|
# Log remaining developers after filtering
|
|
210
211
|
if developer_stats:
|
|
211
212
|
remaining_devs = [dev.get('primary_name', dev.get('name', 'Unknown')) for dev in developer_stats]
|
|
212
213
|
logger.debug(f"DEBUG NARRATIVE: Remaining developers after filtering: {remaining_devs}")
|
|
213
214
|
else:
|
|
214
215
|
logger.debug("DEBUG NARRATIVE: No exclusion filter applied")
|
|
215
|
-
|
|
216
|
-
report = StringIO()
|
|
217
|
-
|
|
218
|
-
# Header
|
|
219
|
-
report.write("# GitFlow Analytics Report\n\n")
|
|
220
216
|
|
|
221
|
-
#
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
report
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
217
|
+
# Initialize progress tracking for narrative report generation
|
|
218
|
+
progress_service = get_progress_service()
|
|
219
|
+
|
|
220
|
+
# Count all sections to be generated (including conditional ones)
|
|
221
|
+
sections = []
|
|
222
|
+
sections.append(("Executive Summary", True))
|
|
223
|
+
sections.append(("Qualitative Analysis", bool(chatgpt_summary)))
|
|
224
|
+
sections.append(("Team Composition", True))
|
|
225
|
+
sections.append(("Project Activity", True))
|
|
226
|
+
sections.append(("Development Patterns", True))
|
|
227
|
+
sections.append(("Commit Classification Analysis", ticket_analysis.get("ml_analysis", {}).get("enabled", False)))
|
|
228
|
+
sections.append(("Pull Request Analysis", bool(pr_metrics and pr_metrics.get("total_prs", 0) > 0)))
|
|
229
|
+
sections.append(("Issue Tracking", True))
|
|
230
|
+
sections.append(("PM Platform Integration", bool(pm_data and "metrics" in pm_data)))
|
|
231
|
+
sections.append(("Recommendations", True))
|
|
232
|
+
|
|
233
|
+
# Filter to only included sections
|
|
234
|
+
active_sections = [name for name, include in sections if include]
|
|
235
|
+
total_sections = len(active_sections)
|
|
236
|
+
|
|
237
|
+
logger.debug(f"Generating narrative report with {total_sections} sections: {', '.join(active_sections)}")
|
|
238
|
+
|
|
239
|
+
# Create progress context for narrative report generation
|
|
240
|
+
with progress_service.progress(total_sections, "Generating narrative report sections", unit="sections") as progress_ctx:
|
|
241
|
+
report = StringIO()
|
|
242
|
+
|
|
243
|
+
# Header
|
|
244
|
+
report.write("# GitFlow Analytics Report\n\n")
|
|
245
|
+
|
|
246
|
+
# Log datetime formatting
|
|
247
|
+
now = datetime.now()
|
|
248
|
+
logger.debug(
|
|
249
|
+
f"Formatting current datetime for report header: {now} (tzinfo: {getattr(now, 'tzinfo', 'N/A')})"
|
|
250
|
+
)
|
|
251
|
+
formatted_time = now.strftime("%Y-%m-%d %H:%M:%S")
|
|
252
|
+
logger.debug(f" Formatted time: {formatted_time}")
|
|
253
|
+
|
|
254
|
+
report.write(f"**Generated**: {formatted_time}\n")
|
|
255
|
+
report.write(f"**Analysis Period**: Last {weeks} weeks\n\n")
|
|
256
|
+
|
|
257
|
+
# Executive Summary
|
|
258
|
+
progress_service.set_description(progress_ctx, "Generating Executive Summary")
|
|
259
|
+
report.write("## Executive Summary\n\n")
|
|
260
|
+
self._write_executive_summary(report, commits, developer_stats, ticket_analysis, prs, branch_health_metrics, pm_data)
|
|
261
|
+
progress_service.update(progress_ctx)
|
|
262
|
+
|
|
263
|
+
# Add ChatGPT qualitative insights if available
|
|
264
|
+
if chatgpt_summary:
|
|
265
|
+
progress_service.set_description(progress_ctx, "Generating Qualitative Analysis")
|
|
266
|
+
report.write("\n## Qualitative Analysis\n\n")
|
|
267
|
+
report.write(chatgpt_summary)
|
|
268
|
+
report.write("\n")
|
|
269
|
+
progress_service.update(progress_ctx)
|
|
270
|
+
|
|
271
|
+
# Team Composition
|
|
272
|
+
progress_service.set_description(progress_ctx, "Generating Team Composition")
|
|
273
|
+
report.write("\n## Team Composition\n\n")
|
|
274
|
+
self._write_team_composition(report, developer_stats, focus_data, commits, prs, ticket_analysis, weeks)
|
|
275
|
+
progress_service.update(progress_ctx)
|
|
276
|
+
|
|
277
|
+
# Project Activity
|
|
278
|
+
progress_service.set_description(progress_ctx, "Generating Project Activity")
|
|
279
|
+
report.write("\n## Project Activity\n\n")
|
|
280
|
+
self._write_project_activity(report, activity_dist, commits, branch_health_metrics, ticket_analysis, weeks)
|
|
281
|
+
progress_service.update(progress_ctx)
|
|
282
|
+
|
|
283
|
+
# Development Patterns
|
|
284
|
+
progress_service.set_description(progress_ctx, "Generating Development Patterns")
|
|
285
|
+
report.write("\n## Development Patterns\n\n")
|
|
286
|
+
self._write_development_patterns(report, insights, focus_data)
|
|
287
|
+
progress_service.update(progress_ctx)
|
|
288
|
+
|
|
289
|
+
# Commit Classification Analysis (if ML analysis is available)
|
|
290
|
+
if ticket_analysis.get("ml_analysis", {}).get("enabled", False):
|
|
291
|
+
progress_service.set_description(progress_ctx, "Generating Commit Classification Analysis")
|
|
292
|
+
report.write("\n## Commit Classification Analysis\n\n")
|
|
293
|
+
self._write_commit_classification_analysis(report, ticket_analysis)
|
|
294
|
+
progress_service.update(progress_ctx)
|
|
295
|
+
|
|
296
|
+
# Pull Request Analysis (if available)
|
|
297
|
+
if pr_metrics and pr_metrics.get("total_prs", 0) > 0:
|
|
298
|
+
progress_service.set_description(progress_ctx, "Generating Pull Request Analysis")
|
|
299
|
+
report.write("\n## Pull Request Analysis\n\n")
|
|
300
|
+
self._write_pr_analysis(report, pr_metrics, prs)
|
|
301
|
+
progress_service.update(progress_ctx)
|
|
302
|
+
|
|
303
|
+
# Issue Tracking (includes Enhanced Untracked Analysis)
|
|
304
|
+
progress_service.set_description(progress_ctx, "Generating Issue Tracking")
|
|
305
|
+
report.write("\n## Issue Tracking\n\n")
|
|
306
|
+
self._write_ticket_tracking(report, ticket_analysis, developer_stats)
|
|
307
|
+
progress_service.update(progress_ctx)
|
|
308
|
+
|
|
309
|
+
# PM Platform Insights
|
|
310
|
+
if pm_data and "metrics" in pm_data:
|
|
311
|
+
progress_service.set_description(progress_ctx, "Generating PM Platform Integration")
|
|
312
|
+
report.write("\n## PM Platform Integration\n\n")
|
|
313
|
+
self._write_pm_insights(report, pm_data)
|
|
314
|
+
progress_service.update(progress_ctx)
|
|
315
|
+
|
|
316
|
+
# Recommendations
|
|
317
|
+
progress_service.set_description(progress_ctx, "Generating Recommendations")
|
|
318
|
+
report.write("\n## Recommendations\n\n")
|
|
319
|
+
self._write_recommendations(report, insights, ticket_analysis, focus_data)
|
|
320
|
+
progress_service.update(progress_ctx)
|
|
321
|
+
|
|
322
|
+
# Write to file
|
|
323
|
+
with open(output_path, "w") as f:
|
|
324
|
+
f.write(report.getvalue())
|
|
281
325
|
|
|
282
326
|
return output_path
|
|
283
327
|
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""Security analysis module for GitFlow Analytics.
|
|
2
|
+
|
|
3
|
+
This module provides comprehensive security analysis of git commits using a hybrid approach:
|
|
4
|
+
1. Specialized security tools (Semgrep, Bandit, etc.) for known patterns
|
|
5
|
+
2. LLM analysis for novel vulnerabilities and context-aware security review
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .config import SecurityConfig
|
|
9
|
+
from .security_analyzer import SecurityAnalyzer
|
|
10
|
+
|
|
11
|
+
__all__ = ["SecurityAnalyzer", "SecurityConfig"]
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
"""Security configuration module."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import Dict, List, Optional
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class SecretScanningConfig:
|
|
9
|
+
"""Configuration for secret detection."""
|
|
10
|
+
|
|
11
|
+
enabled: bool = True
|
|
12
|
+
patterns: Dict[str, str] = field(
|
|
13
|
+
default_factory=lambda: {
|
|
14
|
+
# AWS
|
|
15
|
+
"aws_access_key": r"AKIA[0-9A-Z]{16}",
|
|
16
|
+
"aws_secret_key": r"aws['\"][0-9a-zA-Z/+=]{40}['\"]",
|
|
17
|
+
# GitHub
|
|
18
|
+
"github_token": r"gh[ps]_[a-zA-Z0-9]{36}",
|
|
19
|
+
"github_oauth": r"gho_[a-zA-Z0-9]{36}",
|
|
20
|
+
"github_app_token": r"ghs_[a-zA-Z0-9]{36}",
|
|
21
|
+
# Generic API Keys
|
|
22
|
+
"api_key": r"(api[_-]?key|apikey)(.{0,20})?['\"]([0-9a-zA-Z]{32,45})['\"]",
|
|
23
|
+
"secret": r"(secret|password|passwd|pwd)(.{0,20})?['\"]([0-9a-zA-Z]{8,})['\"]",
|
|
24
|
+
# Private Keys
|
|
25
|
+
"private_key": r"-----BEGIN (RSA|DSA|EC|OPENSSH) PRIVATE KEY-----",
|
|
26
|
+
# Database URLs
|
|
27
|
+
"db_url": r"(postgres|postgresql|mysql|mongodb|redis)://[^:]+:[^@]+@[^/]+",
|
|
28
|
+
# JWT
|
|
29
|
+
"jwt": r"eyJ[A-Za-z0-9-_]+\.eyJ[A-Za-z0-9-_]+\.[A-Za-z0-9-_]+",
|
|
30
|
+
# Slack
|
|
31
|
+
"slack_token": r"xox[baprs]-[0-9a-zA-Z]{10,48}",
|
|
32
|
+
# Google
|
|
33
|
+
"google_api": r"AIza[0-9A-Za-z\\-_]{35}",
|
|
34
|
+
# Stripe
|
|
35
|
+
"stripe_key": r"(sk|pk)_(test|live)_[0-9a-zA-Z]{24,}",
|
|
36
|
+
}
|
|
37
|
+
)
|
|
38
|
+
entropy_threshold: float = 4.5
|
|
39
|
+
exclude_paths: List[str] = field(
|
|
40
|
+
default_factory=lambda: [
|
|
41
|
+
"*.test.*",
|
|
42
|
+
"*.spec.*",
|
|
43
|
+
"*_test.go",
|
|
44
|
+
"test_*.py",
|
|
45
|
+
"*/tests/*",
|
|
46
|
+
"*/test/*",
|
|
47
|
+
"*.md",
|
|
48
|
+
"*.txt",
|
|
49
|
+
]
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass
|
|
54
|
+
class VulnerabilityScanningConfig:
|
|
55
|
+
"""Configuration for vulnerability scanning."""
|
|
56
|
+
|
|
57
|
+
enabled: bool = True
|
|
58
|
+
|
|
59
|
+
# Tool-specific configurations
|
|
60
|
+
enable_semgrep: bool = True
|
|
61
|
+
semgrep_rules: List[str] = field(
|
|
62
|
+
default_factory=lambda: [
|
|
63
|
+
"auto", # Use Semgrep's auto configuration
|
|
64
|
+
"p/security-audit",
|
|
65
|
+
"p/owasp-top-ten",
|
|
66
|
+
]
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
enable_bandit: bool = True # Python
|
|
70
|
+
bandit_severity: str = "medium" # low, medium, high
|
|
71
|
+
|
|
72
|
+
enable_gosec: bool = True # Go
|
|
73
|
+
enable_eslint_security: bool = True # JavaScript/TypeScript
|
|
74
|
+
enable_brakeman: bool = False # Ruby on Rails
|
|
75
|
+
|
|
76
|
+
# Custom patterns for quick checks
|
|
77
|
+
vulnerability_patterns: Dict[str, str] = field(
|
|
78
|
+
default_factory=lambda: {
|
|
79
|
+
"sql_injection": r"(SELECT|DELETE|INSERT|UPDATE|DROP).*\+.*(?:request|params|input)",
|
|
80
|
+
"command_injection": r"(exec|eval|system|popen|subprocess).*\+.*(?:request|params|input)",
|
|
81
|
+
"xss": r"innerHTML\s*=.*(?:request|params|input)",
|
|
82
|
+
"path_traversal": r"\.\./.*(?:request|params|input)",
|
|
83
|
+
"weak_crypto": r"(md5|sha1|des|rc4)\s*\(",
|
|
84
|
+
"hardcoded_sql": r"(SELECT|DELETE|INSERT|UPDATE).*FROM.*WHERE.*=\s*['\"]",
|
|
85
|
+
}
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@dataclass
|
|
90
|
+
class DependencyScanningConfig:
|
|
91
|
+
"""Configuration for dependency vulnerability scanning."""
|
|
92
|
+
|
|
93
|
+
enabled: bool = True
|
|
94
|
+
check_npm: bool = True
|
|
95
|
+
check_pip: bool = True
|
|
96
|
+
check_go: bool = True
|
|
97
|
+
check_ruby: bool = True
|
|
98
|
+
vulnerability_db: str = "ghsa" # GitHub Security Advisory Database
|
|
99
|
+
severity_threshold: str = "medium" # low, medium, high, critical
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
@dataclass
|
|
103
|
+
class LLMSecurityConfig:
|
|
104
|
+
"""Configuration for LLM-based security analysis."""
|
|
105
|
+
|
|
106
|
+
enabled: bool = True
|
|
107
|
+
model: str = "claude-3-haiku-20240307" # Fast and cost-effective
|
|
108
|
+
api_key: Optional[str] = None
|
|
109
|
+
|
|
110
|
+
# LLM analysis prompts
|
|
111
|
+
code_review_prompt: str = """Analyze this code change for security vulnerabilities:
|
|
112
|
+
|
|
113
|
+
Files changed: {files_changed}
|
|
114
|
+
Lines added:
|
|
115
|
+
{lines_added}
|
|
116
|
+
|
|
117
|
+
Focus on:
|
|
118
|
+
1. Authentication/authorization issues
|
|
119
|
+
2. Input validation problems
|
|
120
|
+
3. Data exposure risks
|
|
121
|
+
4. Injection vulnerabilities
|
|
122
|
+
5. Cryptographic weaknesses
|
|
123
|
+
6. Any other security concerns
|
|
124
|
+
|
|
125
|
+
Provide a brief, specific analysis. If no issues found, state "No security issues detected."
|
|
126
|
+
"""
|
|
127
|
+
|
|
128
|
+
commit_review_prompt: str = """Review this git commit for security implications:
|
|
129
|
+
|
|
130
|
+
Message: {message}
|
|
131
|
+
Files: {files}
|
|
132
|
+
Category: {category}
|
|
133
|
+
|
|
134
|
+
Identify any security-relevant changes or potential risks. Be concise and specific.
|
|
135
|
+
"""
|
|
136
|
+
|
|
137
|
+
max_lines_for_llm: int = 500 # Limit lines sent to LLM for cost control
|
|
138
|
+
confidence_threshold: float = 0.7
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
@dataclass
|
|
142
|
+
class SecurityConfig:
|
|
143
|
+
"""Main security configuration."""
|
|
144
|
+
|
|
145
|
+
enabled: bool = False # Disabled by default, opt-in
|
|
146
|
+
|
|
147
|
+
secret_scanning: SecretScanningConfig = field(default_factory=SecretScanningConfig)
|
|
148
|
+
vulnerability_scanning: VulnerabilityScanningConfig = field(
|
|
149
|
+
default_factory=VulnerabilityScanningConfig
|
|
150
|
+
)
|
|
151
|
+
dependency_scanning: DependencyScanningConfig = field(default_factory=DependencyScanningConfig)
|
|
152
|
+
llm_security: LLMSecurityConfig = field(default_factory=LLMSecurityConfig)
|
|
153
|
+
|
|
154
|
+
# Output configuration
|
|
155
|
+
generate_sarif: bool = False # Generate SARIF format for GitHub Security
|
|
156
|
+
fail_on_critical: bool = False # Fail analysis if critical issues found
|
|
157
|
+
|
|
158
|
+
# Performance
|
|
159
|
+
max_concurrent_scans: int = 4
|
|
160
|
+
scan_timeout_seconds: int = 30
|
|
161
|
+
|
|
162
|
+
@classmethod
|
|
163
|
+
def from_dict(cls, data: Dict) -> "SecurityConfig":
|
|
164
|
+
"""Create SecurityConfig from dictionary."""
|
|
165
|
+
if not data:
|
|
166
|
+
return cls()
|
|
167
|
+
|
|
168
|
+
config = cls(enabled=data.get("enabled", False))
|
|
169
|
+
|
|
170
|
+
if "secret_scanning" in data:
|
|
171
|
+
config.secret_scanning = SecretScanningConfig(**data["secret_scanning"])
|
|
172
|
+
|
|
173
|
+
if "vulnerability_scanning" in data:
|
|
174
|
+
config.vulnerability_scanning = VulnerabilityScanningConfig(
|
|
175
|
+
**data["vulnerability_scanning"]
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
if "dependency_scanning" in data:
|
|
179
|
+
config.dependency_scanning = DependencyScanningConfig(**data["dependency_scanning"])
|
|
180
|
+
|
|
181
|
+
if "llm_security" in data:
|
|
182
|
+
config.llm_security = LLMSecurityConfig(**data["llm_security"])
|
|
183
|
+
|
|
184
|
+
config.generate_sarif = data.get("generate_sarif", False)
|
|
185
|
+
config.fail_on_critical = data.get("fail_on_critical", False)
|
|
186
|
+
config.max_concurrent_scans = data.get("max_concurrent_scans", 4)
|
|
187
|
+
config.scan_timeout_seconds = data.get("scan_timeout_seconds", 30)
|
|
188
|
+
|
|
189
|
+
return config
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
"""Security extractors for analyzing code changes."""
|
|
2
|
+
|
|
3
|
+
from .dependency_checker import DependencyChecker
|
|
4
|
+
from .secret_detector import SecretDetector
|
|
5
|
+
from .vulnerability_scanner import VulnerabilityScanner
|
|
6
|
+
|
|
7
|
+
__all__ = ["SecretDetector", "VulnerabilityScanner", "DependencyChecker"]
|