codesecure-core 1.0.0b10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codesecure/__init__.py +19 -0
- codesecure/ai_providers/__init__.py +0 -0
- codesecure/ai_providers/anthropic_provider.py +144 -0
- codesecure/ai_providers/azure_cli.py +428 -0
- codesecure/ai_providers/base.py +89 -0
- codesecure/ai_providers/google_cli.py +195 -0
- codesecure/ai_providers/guides.py +51 -0
- codesecure/ai_providers/kiro_cli.py +421 -0
- codesecure/ai_providers/manager.py +525 -0
- codesecure/ai_providers/openai_provider.py +143 -0
- codesecure/ai_providers/prompts.py +276 -0
- codesecure/common/__init__.py +64 -0
- codesecure/common/cloud_provider.py +208 -0
- codesecure/common/config.py +78 -0
- codesecure/common/config_manager.py +80 -0
- codesecure/common/logging.py +205 -0
- codesecure/common/models.py +247 -0
- codesecure/common/performance.py +111 -0
- codesecure/jobs/__init__.py +18 -0
- codesecure/jobs/manager.py +421 -0
- codesecure/reports/__init__.py +32 -0
- codesecure/reports/generator.py +202 -0
- codesecure/reports/html.py +766 -0
- codesecure/reports/json.py +148 -0
- codesecure/reports/markdown.py +293 -0
- codesecure/reports/sarif.py +229 -0
- codesecure/scanners/__init__.py +61 -0
- codesecure/scanners/bandit.py +165 -0
- codesecure/scanners/base.py +344 -0
- codesecure/scanners/checkov.py +198 -0
- codesecure/scanners/data_merger.py +270 -0
- codesecure/scanners/deduplication_engine.py +282 -0
- codesecure/scanners/detect_secrets.py +109 -0
- codesecure/scanners/engine.py +351 -0
- codesecure/scanners/framework_mapper.py +158 -0
- codesecure/scanners/grype.py +124 -0
- codesecure/scanners/npm_audit.py +134 -0
- codesecure/scanners/pip_audit.py +149 -0
- codesecure/scanners/pip_licenses.py +130 -0
- codesecure/scanners/semgrep.py +123 -0
- codesecure/scanners/syft.py +111 -0
- codesecure/security/__init__.py +45 -0
- codesecure/security/output_sanitizer.py +202 -0
- codesecure/security/security_validator.py +300 -0
- codesecure/security/subprocess_runner.py +366 -0
- codesecure/security/validators.py +300 -0
- codesecure/security_matrix/__init__.py +0 -0
- codesecure/security_matrix/file_filter.py +0 -0
- codesecure/security_matrix/neolifter.py +0 -0
- codesecure/security_validate/__init__.py +0 -0
- codesecure/security_validate/cdk.py +0 -0
- codesecure/security_validate/cloudformation.py +0 -0
- codesecure/security_validate/severity.py +0 -0
- codesecure/security_validate/terraform.py +0 -0
- codesecure/telemetry/__init__.py +3 -0
- codesecure/telemetry/client.py +90 -0
- codesecure/threat_model/__init__.py +0 -0
- codesecure/threat_model/action_plan.py +0 -0
- codesecure/threat_model/stride_engine.py +0 -0
- codesecure_core-1.0.0b10.dist-info/METADATA +101 -0
- codesecure_core-1.0.0b10.dist-info/RECORD +64 -0
- codesecure_core-1.0.0b10.dist-info/WHEEL +5 -0
- codesecure_core-1.0.0b10.dist-info/licenses/LICENSE +21 -0
- codesecure_core-1.0.0b10.dist-info/top_level.txt +1 -0
codesecure/__init__.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CodeSecure - Enterprise Security Analysis Platform.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
from importlib.metadata import version, PackageNotFoundError
|
|
7
|
+
except ImportError:
|
|
8
|
+
# Fallback for Python < 3.8
|
|
9
|
+
try:
|
|
10
|
+
from importlib_metadata import version, PackageNotFoundError
|
|
11
|
+
except ImportError:
|
|
12
|
+
version = lambda _: "unknown"
|
|
13
|
+
PackageNotFoundError = Exception
|
|
14
|
+
|
|
15
|
+
try:
|
|
16
|
+
__version__ = version("codesecure-core")
|
|
17
|
+
except (PackageNotFoundError, NameError):
|
|
18
|
+
# Fallback for development where the package is not installed
|
|
19
|
+
__version__ = "1.0.0b9"
|
|
File without changes
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from typing import List, Dict, Any, Optional
|
|
3
|
+
|
|
4
|
+
from codesecure.common.models import Finding, EnhancedFinding
|
|
5
|
+
from codesecure.common.logging import get_logger
|
|
6
|
+
from codesecure.ai_providers.base import (
|
|
7
|
+
BaseAIProvider,
|
|
8
|
+
AIProviderType,
|
|
9
|
+
AIProviderStatus,
|
|
10
|
+
)
|
|
11
|
+
from codesecure.ai_providers.prompts import GenericPromptBuilder, GenericMarkdownParser
|
|
12
|
+
from codesecure.common.config import get_env
|
|
13
|
+
|
|
14
|
+
logger = get_logger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class AnthropicWrapper(BaseAIProvider):
|
|
18
|
+
"""Anthropic Provider Implementation"""
|
|
19
|
+
|
|
20
|
+
PROVIDER_TYPE = AIProviderType.AWS # Currently aliased/planned for AWS Bedrock or direct Anthropic
|
|
21
|
+
|
|
22
|
+
def __init__(self):
|
|
23
|
+
self.prompt_builder = GenericPromptBuilder()
|
|
24
|
+
self.parser = GenericMarkdownParser()
|
|
25
|
+
self._api_key = get_env("CODESECURE_ANTHROPIC_API_KEY", "")
|
|
26
|
+
self._model = get_env("CODESECURE_ANTHROPIC_MODEL", "claude-3-5-sonnet-20241022")
|
|
27
|
+
|
|
28
|
+
def _get_client(self):
|
|
29
|
+
try:
|
|
30
|
+
import anthropic
|
|
31
|
+
return anthropic.AsyncAnthropic(api_key=self._api_key)
|
|
32
|
+
except ImportError:
|
|
33
|
+
raise RuntimeError("anthropic SDK not installed")
|
|
34
|
+
|
|
35
|
+
def set_api_key(self, api_key: str):
|
|
36
|
+
self._api_key = api_key
|
|
37
|
+
|
|
38
|
+
def set_model(self, model: str):
|
|
39
|
+
self._model = model
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def is_available(self) -> bool:
|
|
43
|
+
try:
|
|
44
|
+
import anthropic
|
|
45
|
+
return bool(self._api_key)
|
|
46
|
+
except ImportError:
|
|
47
|
+
return False
|
|
48
|
+
|
|
49
|
+
async def check_availability(self) -> tuple[AIProviderStatus, str]:
|
|
50
|
+
try:
|
|
51
|
+
import anthropic
|
|
52
|
+
except ImportError:
|
|
53
|
+
return AIProviderStatus.UNAVAILABLE, "Anthropic SDK not found (install with 'pip install anthropic')"
|
|
54
|
+
|
|
55
|
+
if not self._api_key:
|
|
56
|
+
return AIProviderStatus.UNAVAILABLE, "CODESECURE_ANTHROPIC_API_KEY is not set"
|
|
57
|
+
|
|
58
|
+
return AIProviderStatus.AVAILABLE, "Ready"
|
|
59
|
+
|
|
60
|
+
def _classify_error(self, err_msg: str, status_code: Optional[int] = None) -> str:
|
|
61
|
+
err_lower = err_msg.lower()
|
|
62
|
+
if status_code in (401, 403) or any(w in err_lower for w in ["invalid x-api-key", "authentication"]):
|
|
63
|
+
return "auth"
|
|
64
|
+
if status_code == 429 or any(w in err_lower for w in ["rate_limit", "too many requests"]):
|
|
65
|
+
return "rate_limit"
|
|
66
|
+
return "other"
|
|
67
|
+
|
|
68
|
+
async def test_connection(self) -> tuple[AIProviderStatus, str]:
|
|
69
|
+
status, msg = await self.check_availability()
|
|
70
|
+
if status != AIProviderStatus.AVAILABLE:
|
|
71
|
+
return status, msg
|
|
72
|
+
|
|
73
|
+
try:
|
|
74
|
+
client = self._get_client()
|
|
75
|
+
logger.info("Validating Anthropic API key with model %s...", self._model)
|
|
76
|
+
await client.messages.create(
|
|
77
|
+
model=self._model,
|
|
78
|
+
messages=[{"role": "user", "content": "Reply with 'ok'"}],
|
|
79
|
+
max_tokens=5,
|
|
80
|
+
)
|
|
81
|
+
return AIProviderStatus.AVAILABLE, "Ready"
|
|
82
|
+
except Exception as e:
|
|
83
|
+
err_type = self._classify_error(str(e), getattr(e, "status_code", None))
|
|
84
|
+
if err_type == "auth":
|
|
85
|
+
return AIProviderStatus.ERROR, f"Anthropic Auth Error: {e}"
|
|
86
|
+
if err_type == "rate_limit":
|
|
87
|
+
return AIProviderStatus.ERROR, f"Anthropic Rate Limit Error: {e}"
|
|
88
|
+
return AIProviderStatus.ERROR, f"Anthropic Connection Failed: {str(e)}"
|
|
89
|
+
|
|
90
|
+
async def analyze_findings_batch(
|
|
91
|
+
self,
|
|
92
|
+
findings: List[Finding],
|
|
93
|
+
code_contexts: Dict[str, str],
|
|
94
|
+
batch_size: int = 10,
|
|
95
|
+
app_context: Optional[Dict[str, Any]] = None,
|
|
96
|
+
is_workspace_scan: bool = False
|
|
97
|
+
) -> List[EnhancedFinding]:
|
|
98
|
+
|
|
99
|
+
if not findings:
|
|
100
|
+
return []
|
|
101
|
+
|
|
102
|
+
client = self._get_client()
|
|
103
|
+
prompt = self.prompt_builder.build_batch_prompt(findings, app_context=app_context, is_workspace_scan=is_workspace_scan)
|
|
104
|
+
|
|
105
|
+
system_prompt = (
|
|
106
|
+
"You are CodeSecure, an expert AI security assistant. "
|
|
107
|
+
"Analyze security findings and provide remediation in Markdown format. "
|
|
108
|
+
"Follow the ZERO-CHATTER RULE: remediation code blocks must contain "
|
|
109
|
+
"ONLY pure source code, no explanations."
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
try:
|
|
113
|
+
logger.info("Sending batch of %d findings to Anthropic (%s)...", len(findings), self._model)
|
|
114
|
+
|
|
115
|
+
response = await client.messages.create(
|
|
116
|
+
model=self._model,
|
|
117
|
+
system=system_prompt,
|
|
118
|
+
messages=[
|
|
119
|
+
{"role": "user", "content": prompt}
|
|
120
|
+
],
|
|
121
|
+
temperature=0.2,
|
|
122
|
+
max_tokens=4096,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
text = response.content[0].text if response.content else ""
|
|
126
|
+
logger.debug("Received AI response length: %d characters", len(text))
|
|
127
|
+
|
|
128
|
+
return self.parser.parse(text, findings)
|
|
129
|
+
|
|
130
|
+
except Exception as e:
|
|
131
|
+
err_type = self._classify_error(str(e), getattr(e, "status_code", None))
|
|
132
|
+
if err_type == "rate_limit":
|
|
133
|
+
raise RuntimeError(f"Anthropic Rate Limit Exceeded: {e}")
|
|
134
|
+
logger.exception("Anthropic Analysis failed: %s", e)
|
|
135
|
+
raise e
|
|
136
|
+
|
|
137
|
+
def enhance_finding(self, finding: Finding, analysis: Any) -> EnhancedFinding:
|
|
138
|
+
pass
|
|
139
|
+
|
|
140
|
+
async def generate_stride_analysis(self, repo_path: Any, category: str) -> Dict[str, Any]:
|
|
141
|
+
return {"error": "Not implemented"}
|
|
142
|
+
|
|
143
|
+
def get_anthropic_wrapper() -> AnthropicWrapper:
|
|
144
|
+
return AnthropicWrapper()
|
|
@@ -0,0 +1,428 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import re
|
|
3
|
+
import shutil
|
|
4
|
+
import os
|
|
5
|
+
from typing import List, Dict, Any, Optional, AsyncIterator
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from codesecure.common.models import Finding, AzureEnhancedFinding
|
|
9
|
+
from codesecure.common.logging import get_logger
|
|
10
|
+
from codesecure.ai_providers.base import (
|
|
11
|
+
BaseAIProvider,
|
|
12
|
+
AIProviderType,
|
|
13
|
+
AIProviderStatus,
|
|
14
|
+
PromptBuilder,
|
|
15
|
+
CLIExecutor,
|
|
16
|
+
MarkdownParser
|
|
17
|
+
)
|
|
18
|
+
from codesecure.common.config import AI_BATCH_TIMEOUT
|
|
19
|
+
|
|
20
|
+
from codesecure.ai_providers.guides import get_guide
|
|
21
|
+
|
|
22
|
+
logger = get_logger(__name__)
|
|
23
|
+
|
|
24
|
+
# --- 1. Prompt Builder ---
|
|
25
|
+
class AzurePromptBuilder(PromptBuilder):
|
|
26
|
+
"""Build structured prompts for Azure CLI"""
|
|
27
|
+
|
|
28
|
+
SYSTEM_CONTEXT_TEMPLATE = """You are an Azure security expert.
|
|
29
|
+
|
|
30
|
+
**Application context:**
|
|
31
|
+
- **Application name:** {app_name}
|
|
32
|
+
- **Cloud services detected:** {cloud_services}
|
|
33
|
+
|
|
34
|
+
Analyze the following security findings and provide enhanced details in Markdown format.
|
|
35
|
+
Strictly adhere to the **Critical Requirements** and the specific cloud guide below.
|
|
36
|
+
|
|
37
|
+
{enhancement_guide}
|
|
38
|
+
|
|
39
|
+
For each finding, provide the analysis in the exact structure specified. Failure to follow these requirements will result in invalid analysis."""
|
|
40
|
+
|
|
41
|
+
def build_batch_prompt(self, findings: List[Finding], app_context: Optional[Dict[str, Any]] = None) -> str:
|
|
42
|
+
"""Build prompt for batch of findings"""
|
|
43
|
+
|
|
44
|
+
app_context = app_context or {}
|
|
45
|
+
app_name = app_context.get("name", "Unknown Application")
|
|
46
|
+
services = ", ".join(app_context.get("services", ["Azure Core Services"]))
|
|
47
|
+
|
|
48
|
+
# Select guide based on app context or default to azure
|
|
49
|
+
cloud_type = app_context.get("cloud_provider", "azure")
|
|
50
|
+
|
|
51
|
+
system_context = self.SYSTEM_CONTEXT_TEMPLATE.format(
|
|
52
|
+
app_name=app_name,
|
|
53
|
+
cloud_services=services,
|
|
54
|
+
enhancement_guide=get_guide(cloud_type)
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
prompt = f"{system_context}\n\n"
|
|
58
|
+
prompt += "## Security Findings to Analyze\n\n"
|
|
59
|
+
|
|
60
|
+
for i, finding in enumerate(findings, 1):
|
|
61
|
+
prompt += f"""### Finding {i}
|
|
62
|
+
- **Scanner**: {finding.scanner}
|
|
63
|
+
- **Severity**: {finding.severity}
|
|
64
|
+
- **Title**: {finding.title}
|
|
65
|
+
- **File**: {finding.file_path}
|
|
66
|
+
- **Lines**: {finding.line_start}-{finding.line_end}
|
|
67
|
+
- **Code**:
|
|
68
|
+
```
|
|
69
|
+
{finding.code_snippet}
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
prompt += self._get_response_template()
|
|
75
|
+
return prompt
|
|
76
|
+
|
|
77
|
+
def _get_response_template(self) -> str:
|
|
78
|
+
return """
|
|
79
|
+
## Required Response Format (for each finding)
|
|
80
|
+
|
|
81
|
+
### Finding N Analysis
|
|
82
|
+
|
|
83
|
+
#### Detailed Description
|
|
84
|
+
[Comprehensive explanation of the vulnerability]
|
|
85
|
+
|
|
86
|
+
#### Attack Scenario
|
|
87
|
+
[Step-by-step attack scenario showing how this could be exploited]
|
|
88
|
+
|
|
89
|
+
#### Business Impact
|
|
90
|
+
[Impact on business operations, data, reputation with realistic numeric estimates]
|
|
91
|
+
|
|
92
|
+
#### False Positive Analysis
|
|
93
|
+
- **Is False Positive**: [true|false]
|
|
94
|
+
- **Confidence**: [0.0-1.0]
|
|
95
|
+
- **Explanation**: [Required]
|
|
96
|
+
|
|
97
|
+
#### Code Snippets
|
|
98
|
+
- **Before**:
|
|
99
|
+
```
|
|
100
|
+
[Original vulnerable code snippet]
|
|
101
|
+
```
|
|
102
|
+
Lines: [start]-[end]
|
|
103
|
+
|
|
104
|
+
- **After**:
|
|
105
|
+
```
|
|
106
|
+
[Fixed code snippet showing remediation]
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
#### Azure Recommendation
|
|
110
|
+
[Azure-specific security recommendation aligned with Azure Security Benchmark]
|
|
111
|
+
|
|
112
|
+
#### Azure Security Benchmark Pillar
|
|
113
|
+
[Network Security|Identity Management|Privileged Access|Data Protection|Asset Management|Logging and Threat Detection|Incident Response|Posture and Vulnerability Management|Endpoint Security|Backup and Recovery]
|
|
114
|
+
|
|
115
|
+
#### Implementation Steps
|
|
116
|
+
1. [Step 1]
|
|
117
|
+
2. [Step 2]
|
|
118
|
+
3. [Step 3]
|
|
119
|
+
|
|
120
|
+
#### Verification Test Cases
|
|
121
|
+
- **Test Case 1**: [Description] -> **Expected Output**: [Output]
|
|
122
|
+
|
|
123
|
+
#### Rollback Procedure
|
|
124
|
+
[Steps to rollback if remediation causes issues]
|
|
125
|
+
|
|
126
|
+
#### Azure Documentation
|
|
127
|
+
- [Link 1](https://learn.microsoft.com/azure/...)
|
|
128
|
+
- [Link 2](https://learn.microsoft.com/azure/...)
|
|
129
|
+
"""
|
|
130
|
+
|
|
131
|
+
# --- 2. CLI Executor ---
|
|
132
|
+
class AzureCLIExecutor(CLIExecutor):
|
|
133
|
+
"""Execute Azure OpenAI via 'az rest' - no extensions required"""
|
|
134
|
+
|
|
135
|
+
COMMAND = "az"
|
|
136
|
+
TIMEOUT = AI_BATCH_TIMEOUT
|
|
137
|
+
# Azure OpenAI API version
|
|
138
|
+
API_VERSION = "2024-08-01-preview"
|
|
139
|
+
|
|
140
|
+
def _get_azure_openai_config(self) -> tuple:
|
|
141
|
+
"""Get Azure OpenAI endpoint and deployment from environment."""
|
|
142
|
+
endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT", "")
|
|
143
|
+
deployment = os.environ.get("AZURE_OPENAI_DEPLOYMENT", "gpt-4o")
|
|
144
|
+
|
|
145
|
+
if not endpoint:
|
|
146
|
+
raise RuntimeError(
|
|
147
|
+
"AZURE_OPENAI_ENDPOINT environment variable not set. "
|
|
148
|
+
"Set it to your Azure OpenAI resource endpoint, e.g.: "
|
|
149
|
+
"https://<your-resource>.openai.azure.com"
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
# Ensure endpoint doesn't have trailing slash
|
|
153
|
+
endpoint = endpoint.rstrip("/")
|
|
154
|
+
return endpoint, deployment
|
|
155
|
+
|
|
156
|
+
async def execute(self, prompt: str) -> str:
|
|
157
|
+
"""Execute Azure OpenAI via 'az rest' and return markdown response"""
|
|
158
|
+
import json as _json
|
|
159
|
+
|
|
160
|
+
full_path = shutil.which(self.COMMAND)
|
|
161
|
+
if not full_path:
|
|
162
|
+
raise FileNotFoundError(f"CLI command '{self.COMMAND}' not found.")
|
|
163
|
+
|
|
164
|
+
endpoint, deployment = self._get_azure_openai_config()
|
|
165
|
+
|
|
166
|
+
# Build the Azure OpenAI Chat Completions REST URL
|
|
167
|
+
url = (
|
|
168
|
+
f"{endpoint}/openai/deployments/{deployment}"
|
|
169
|
+
f"/chat/completions?api-version={self.API_VERSION}"
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
# Build the request body
|
|
173
|
+
body = _json.dumps({
|
|
174
|
+
"messages": [
|
|
175
|
+
{"role": "system", "content": "You are an Azure security expert. Respond in Markdown format."},
|
|
176
|
+
{"role": "user", "content": prompt}
|
|
177
|
+
],
|
|
178
|
+
"temperature": 0.3,
|
|
179
|
+
"max_tokens": 4096
|
|
180
|
+
})
|
|
181
|
+
|
|
182
|
+
# Use 'az rest' which handles authentication automatically
|
|
183
|
+
# SEC-006: shell=False enforced
|
|
184
|
+
if os.name == 'nt' and full_path.lower().endswith('.cmd'):
|
|
185
|
+
cmd = [
|
|
186
|
+
"cmd.exe", "/c", full_path,
|
|
187
|
+
"rest",
|
|
188
|
+
"--method", "POST",
|
|
189
|
+
"--url", url,
|
|
190
|
+
"--body", body,
|
|
191
|
+
"--resource", "https://cognitiveservices.azure.com",
|
|
192
|
+
]
|
|
193
|
+
else:
|
|
194
|
+
cmd = [
|
|
195
|
+
full_path,
|
|
196
|
+
"rest",
|
|
197
|
+
"--method", "POST",
|
|
198
|
+
"--url", url,
|
|
199
|
+
"--body", body,
|
|
200
|
+
"--resource", "https://cognitiveservices.azure.com",
|
|
201
|
+
]
|
|
202
|
+
|
|
203
|
+
env = os.environ.copy()
|
|
204
|
+
|
|
205
|
+
process = await asyncio.create_subprocess_exec(
|
|
206
|
+
*cmd,
|
|
207
|
+
stdin=asyncio.subprocess.PIPE,
|
|
208
|
+
stdout=asyncio.subprocess.PIPE,
|
|
209
|
+
stderr=asyncio.subprocess.PIPE,
|
|
210
|
+
env=env
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
try:
|
|
214
|
+
stdout, stderr = await asyncio.wait_for(
|
|
215
|
+
process.communicate(),
|
|
216
|
+
timeout=self.TIMEOUT
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
if process.returncode != 0:
|
|
220
|
+
error_msg = stderr.decode().strip()
|
|
221
|
+
if any(kw in error_msg.lower() for kw in ["429", "quota", "rate limit"]):
|
|
222
|
+
raise RuntimeError(f"Azure AI Quota Exceeded: {error_msg}")
|
|
223
|
+
|
|
224
|
+
logger.error("Azure CLI failed (Exit %d): %s", process.returncode, error_msg)
|
|
225
|
+
raise RuntimeError(f"Azure CLI Error: {error_msg}")
|
|
226
|
+
|
|
227
|
+
# Parse the JSON response and extract the assistant's message content
|
|
228
|
+
response_data = _json.loads(stdout.decode())
|
|
229
|
+
choices = response_data.get("choices", [])
|
|
230
|
+
if not choices:
|
|
231
|
+
raise RuntimeError("Azure OpenAI returned empty response")
|
|
232
|
+
|
|
233
|
+
content = choices[0].get("message", {}).get("content", "")
|
|
234
|
+
return self.clean_output(content)
|
|
235
|
+
|
|
236
|
+
except asyncio.TimeoutError:
|
|
237
|
+
logger.error("Azure CLI timed out after %d seconds", self.TIMEOUT)
|
|
238
|
+
try:
|
|
239
|
+
process.kill()
|
|
240
|
+
except:
|
|
241
|
+
pass
|
|
242
|
+
raise RuntimeError(f"Azure CLI Timed Out ({self.TIMEOUT}s)")
|
|
243
|
+
except RuntimeError:
|
|
244
|
+
raise
|
|
245
|
+
except Exception as e:
|
|
246
|
+
logger.exception("Azure CLI execution failed: %s", e)
|
|
247
|
+
raise e
|
|
248
|
+
|
|
249
|
+
# --- 3. Markdown Parser ---
|
|
250
|
+
class AzureMarkdownParser(MarkdownParser):
|
|
251
|
+
"""Parse Azure CLI markdown response into AzureEnhancedFindings"""
|
|
252
|
+
|
|
253
|
+
FINDING_PATTERN = r"### \s*(?:Analysis\s+for\s+)?Finding\s*(\d+)(?::)?(?:\s*Analysis)?.*?(?:\n|$)"
|
|
254
|
+
SECTION_PATTERNS = {
|
|
255
|
+
"detailed_description": r"#### (?:Detailed\s+)?Description(?::)?\s*(.+?)(?=####|$)",
|
|
256
|
+
"attack_scenario": r"#### Attack Scenario(?::)?\s*(.+?)(?=####|$)",
|
|
257
|
+
"business_impact": r"#### Business Impact(?::)?\s*(.+?)(?=####|$)",
|
|
258
|
+
"remediation_code": r"#### (?:Remediation Code|Code Snippets).*?After\*\*:(?::)?\s*```(?:\w+)?[\s\n]+(.*?)```",
|
|
259
|
+
"code_after": r"#### (?:Remediation Code|Code Snippets).*?After\*\*:(?::)?\s*```(?:\w+)?[\s\n]+(.*?)```",
|
|
260
|
+
"azure_recommendation": r"#### Azure Recommendation(?::)?\s*(.+?)(?=####|$)",
|
|
261
|
+
"azure_benchmark_pillar": r"#### Azure Security Benchmark Pillar(?::)?\s*(.+?)(?=####|$)",
|
|
262
|
+
"rollback_procedure": r"#### Rollback Procedure(?::)?\s*(.+?)(?=####|$)",
|
|
263
|
+
"test_cases": r"#### Verification Test Cases(?::)?\s*(.+?)(?=####|$)",
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
def parse(self, markdown_text: str, original_findings: List[Finding]) -> List[AzureEnhancedFinding]:
|
|
267
|
+
"""Parse markdown response and merge with original findings"""
|
|
268
|
+
if not markdown_text:
|
|
269
|
+
return []
|
|
270
|
+
|
|
271
|
+
finding_sections = re.split(self.FINDING_PATTERN, markdown_text)
|
|
272
|
+
enhanced = []
|
|
273
|
+
|
|
274
|
+
for i in range(1, len(finding_sections), 2):
|
|
275
|
+
try:
|
|
276
|
+
finding_idx = int(finding_sections[i]) - 1
|
|
277
|
+
section = finding_sections[i+1]
|
|
278
|
+
|
|
279
|
+
if finding_idx < 0 or finding_idx >= len(original_findings):
|
|
280
|
+
continue
|
|
281
|
+
|
|
282
|
+
original = original_findings[finding_idx]
|
|
283
|
+
|
|
284
|
+
detailed_description = self._extract("detailed_description", section)
|
|
285
|
+
if not detailed_description:
|
|
286
|
+
detailed_description = original.description
|
|
287
|
+
|
|
288
|
+
enhanced.append(AzureEnhancedFinding(
|
|
289
|
+
id=original.id,
|
|
290
|
+
scanner=original.scanner,
|
|
291
|
+
severity=original.severity,
|
|
292
|
+
title=original.title,
|
|
293
|
+
description=original.description,
|
|
294
|
+
file_path=original.file_path,
|
|
295
|
+
detailed_description=detailed_description,
|
|
296
|
+
attack_scenario=self._extract("attack_scenario", section),
|
|
297
|
+
business_impact=self._extract("business_impact", section),
|
|
298
|
+
exploitability="unknown",
|
|
299
|
+
code_snippet=original.code_snippet,
|
|
300
|
+
code_snippet_before=original.code_snippet,
|
|
301
|
+
code_snippet_after=self._extract("code_after", section),
|
|
302
|
+
line_start=original.line_start,
|
|
303
|
+
line_end=original.line_end,
|
|
304
|
+
remediation_code=self._extract("remediation_code", section),
|
|
305
|
+
implementation_steps=self._extract_list("Implementation Steps", section),
|
|
306
|
+
test_cases=self._extract_list_items("test_cases", section),
|
|
307
|
+
rollback_procedure=self._extract("rollback_procedure", section),
|
|
308
|
+
azure_recommendation=self._extract("azure_recommendation", section),
|
|
309
|
+
azure_security_benchmark_pillar=self._extract("azure_benchmark_pillar", section),
|
|
310
|
+
azure_doc_links=self._extract_links(section),
|
|
311
|
+
cwe_ids=original.cwe_ids,
|
|
312
|
+
owasp_category=original.owasp_category,
|
|
313
|
+
false_positive_confidence=self._extract_fp_confidence(section),
|
|
314
|
+
is_false_positive=self._extract_fp_status(section),
|
|
315
|
+
fp_explanation=self._extract_fp_explanation(section),
|
|
316
|
+
))
|
|
317
|
+
except Exception as e:
|
|
318
|
+
logger.error(f"Error parsing Azure finding section: {e}")
|
|
319
|
+
|
|
320
|
+
return enhanced
|
|
321
|
+
|
|
322
|
+
def _extract(self, field: str, section: str) -> str:
|
|
323
|
+
pattern = self.SECTION_PATTERNS[field]
|
|
324
|
+
match = re.search(pattern, section, re.DOTALL | re.IGNORECASE)
|
|
325
|
+
return match.group(1).strip() if match else ""
|
|
326
|
+
|
|
327
|
+
def _extract_list(self, title: str, section: str) -> List[str]:
|
|
328
|
+
pattern = fr"#### {title}\s+(.+?)(?=####|$)"
|
|
329
|
+
match = re.search(pattern, section, re.DOTALL)
|
|
330
|
+
if not match: return []
|
|
331
|
+
return re.findall(r"^\d+\.\s+(.+)$", match.group(1), re.MULTILINE)
|
|
332
|
+
|
|
333
|
+
def _extract_list_items(self, field: str, section: str) -> List[str]:
|
|
334
|
+
content = self._extract(field, section)
|
|
335
|
+
if not content: return []
|
|
336
|
+
return re.findall(r"(?:-|\*|\d+\.)\s+(.+)$", content, re.MULTILINE)
|
|
337
|
+
|
|
338
|
+
def _extract_links(self, section: str) -> List[str]:
|
|
339
|
+
return re.findall(r'http[s]?://learn\.microsoft\.com/[^\s\)]+', section)
|
|
340
|
+
|
|
341
|
+
def _extract_fp_confidence(self, section: str) -> float:
|
|
342
|
+
match = re.search(r'\*\*Confidence\*\*:\s*([\d.]+)', section)
|
|
343
|
+
return float(match.group(1)) if match else 0.0
|
|
344
|
+
|
|
345
|
+
def _extract_fp_status(self, section: str) -> bool:
|
|
346
|
+
match = re.search(r'\*\*Is False Positive\*\*:\s*(true|false)', section, re.I)
|
|
347
|
+
return match.group(1).lower() == 'true' if match else False
|
|
348
|
+
|
|
349
|
+
def _extract_fp_explanation(self, section: str) -> str:
|
|
350
|
+
match = re.search(r'\*\*Explanation\*\*:\s*(.+)', section)
|
|
351
|
+
return match.group(1).strip() if match else ""
|
|
352
|
+
|
|
353
|
+
# --- 4. Wrapper ---
|
|
354
|
+
class AzureCLIWrapper(BaseAIProvider):
|
|
355
|
+
"""Azure AI CLI Provider Implementation"""
|
|
356
|
+
|
|
357
|
+
PROVIDER_TYPE = AIProviderType.AZURE
|
|
358
|
+
|
|
359
|
+
def __init__(self):
|
|
360
|
+
self.prompt_builder = AzurePromptBuilder()
|
|
361
|
+
self.executor = AzureCLIExecutor()
|
|
362
|
+
self.parser = AzureMarkdownParser()
|
|
363
|
+
self._is_available = shutil.which("az") is not None
|
|
364
|
+
|
|
365
|
+
@property
|
|
366
|
+
def is_available(self) -> bool:
|
|
367
|
+
return self._is_available
|
|
368
|
+
|
|
369
|
+
async def check_availability(self) -> tuple[AIProviderStatus, str]:
|
|
370
|
+
if not self._is_available:
|
|
371
|
+
return AIProviderStatus.UNAVAILABLE, "Azure CLI not found (install from https://aka.ms/installazurecliwindows)"
|
|
372
|
+
|
|
373
|
+
# Check for AI extension or similar
|
|
374
|
+
try:
|
|
375
|
+
process = await asyncio.create_subprocess_exec(
|
|
376
|
+
"az", "--version",
|
|
377
|
+
stdout=asyncio.subprocess.PIPE,
|
|
378
|
+
stderr=asyncio.subprocess.PIPE
|
|
379
|
+
)
|
|
380
|
+
stdout, stderr = await process.communicate()
|
|
381
|
+
if process.returncode != 0:
|
|
382
|
+
return AIProviderStatus.UNAVAILABLE, "Azure CLI is installed but not functioning correctly"
|
|
383
|
+
except Exception as e:
|
|
384
|
+
return AIProviderStatus.UNAVAILABLE, f"Azure CLI check failed: {str(e)}"
|
|
385
|
+
|
|
386
|
+
return AIProviderStatus.AVAILABLE, "Ready"
|
|
387
|
+
|
|
388
|
+
async def analyze_findings_batch(
|
|
389
|
+
self,
|
|
390
|
+
findings: List[Finding],
|
|
391
|
+
code_contexts: Dict[str, str],
|
|
392
|
+
batch_size: int = 5,
|
|
393
|
+
app_context: Optional[Dict[str, Any]] = None
|
|
394
|
+
) -> List[AzureEnhancedFinding]:
|
|
395
|
+
if not findings:
|
|
396
|
+
return []
|
|
397
|
+
|
|
398
|
+
prompt = self.prompt_builder.build_batch_prompt(findings, app_context=app_context)
|
|
399
|
+
|
|
400
|
+
try:
|
|
401
|
+
markdown_response = await self.executor.execute(prompt)
|
|
402
|
+
return self.parser.parse(markdown_response, findings)
|
|
403
|
+
except Exception as e:
|
|
404
|
+
logger.exception("Azure analysis failed for batch")
|
|
405
|
+
raise e
|
|
406
|
+
|
|
407
|
+
def enhance_finding(self, finding: Finding, analysis: Any) -> AzureEnhancedFinding:
|
|
408
|
+
pass
|
|
409
|
+
|
|
410
|
+
async def generate_stride_analysis(self, repo_path: Any, category: str) -> Dict[str, Any]:
|
|
411
|
+
return {"error": "Not implemented"}
|
|
412
|
+
|
|
413
|
+
async def test_connection(self) -> tuple[AIProviderStatus, str]:
|
|
414
|
+
"""Perform a minimal AI request to verify connectivity and quota."""
|
|
415
|
+
try:
|
|
416
|
+
# Minimal prompt to check connectivity and quota
|
|
417
|
+
await self.executor.execute("hi")
|
|
418
|
+
return AIProviderStatus.AVAILABLE, "Ready"
|
|
419
|
+
except RuntimeError as e:
|
|
420
|
+
err_msg = str(e)
|
|
421
|
+
if any(kw in err_msg.lower() for kw in ["429", "quota", "rate limit", "exhausted"]):
|
|
422
|
+
return AIProviderStatus.ERROR, "AI Analysis Failed: Quota Exceeded (429)"
|
|
423
|
+
return AIProviderStatus.ERROR, f"AI Connection Failed: {err_msg}"
|
|
424
|
+
except Exception as e:
|
|
425
|
+
return AIProviderStatus.ERROR, f"AI Connection Failed: {str(e)}"
|
|
426
|
+
|
|
427
|
+
def get_azure_wrapper() -> AzureCLIWrapper:
|
|
428
|
+
return AzureCLIWrapper()
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import List, Any, Optional, AsyncIterator, Dict
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from enum import Enum
|
|
5
|
+
|
|
6
|
+
from codesecure.common.models import Finding, EnhancedFinding
|
|
7
|
+
from codesecure.common.logging import get_logger
|
|
8
|
+
|
|
9
|
+
logger = get_logger(__name__)
|
|
10
|
+
|
|
11
|
+
class AIProviderType(str, Enum):
|
|
12
|
+
GOOGLE = "google"
|
|
13
|
+
AWS = "aws"
|
|
14
|
+
AZURE = "azure"
|
|
15
|
+
OPENAI = "openai"
|
|
16
|
+
|
|
17
|
+
class AIProviderStatus(str, Enum):
|
|
18
|
+
AVAILABLE = "available"
|
|
19
|
+
UNAVAILABLE = "unavailable"
|
|
20
|
+
ERROR = "error"
|
|
21
|
+
|
|
22
|
+
class PromptBuilder(ABC):
|
|
23
|
+
"""Abstract base class for building AI prompts."""
|
|
24
|
+
@abstractmethod
|
|
25
|
+
def build_batch_prompt(self, findings: List[Finding], app_context: Optional[Dict[str, Any]] = None) -> str:
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
class CLIExecutor(ABC):
|
|
29
|
+
"""Abstract base class for executing CLI commands."""
|
|
30
|
+
@abstractmethod
|
|
31
|
+
async def execute(self, prompt: str) -> str:
|
|
32
|
+
"""Execute the CLI command with the given prompt and return output."""
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
@staticmethod
|
|
36
|
+
def clean_output(text: str) -> str:
|
|
37
|
+
"""Remove ANSI escape sequences and SGR codes from text."""
|
|
38
|
+
import re
|
|
39
|
+
ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
|
|
40
|
+
return ansi_escape.sub('', text)
|
|
41
|
+
|
|
42
|
+
class MarkdownParser(ABC):
|
|
43
|
+
"""Abstract base class for parsing markdown responses."""
|
|
44
|
+
@abstractmethod
|
|
45
|
+
def parse(self, markdown: str, original_findings: List[Finding]) -> List[EnhancedFinding]:
|
|
46
|
+
pass
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class AIAnalysisResult:
|
|
50
|
+
"""Raw result from AI analysis"""
|
|
51
|
+
content: str
|
|
52
|
+
metadata: Dict[str, Any]
|
|
53
|
+
|
|
54
|
+
class BaseAIProvider(ABC):
|
|
55
|
+
"""Base class for AI providers integration."""
|
|
56
|
+
|
|
57
|
+
PROVIDER_TYPE: AIProviderType
|
|
58
|
+
|
|
59
|
+
@property
|
|
60
|
+
@abstractmethod
|
|
61
|
+
def is_available(self) -> bool:
|
|
62
|
+
pass
|
|
63
|
+
|
|
64
|
+
@abstractmethod
|
|
65
|
+
async def check_availability(self) -> tuple[AIProviderStatus, str]:
|
|
66
|
+
pass
|
|
67
|
+
|
|
68
|
+
@abstractmethod
|
|
69
|
+
async def analyze_findings_batch(
|
|
70
|
+
self,
|
|
71
|
+
findings: List[Finding],
|
|
72
|
+
code_contexts: Dict[str, str],
|
|
73
|
+
batch_size: int = 5,
|
|
74
|
+
app_context: Optional[Dict[str, Any]] = None
|
|
75
|
+
) -> List[Any]: # Returns raw analysis objects/dicts
|
|
76
|
+
pass
|
|
77
|
+
|
|
78
|
+
@abstractmethod
|
|
79
|
+
def enhance_finding(self, finding: Finding, analysis: Any) -> EnhancedFinding:
|
|
80
|
+
pass
|
|
81
|
+
|
|
82
|
+
@abstractmethod
|
|
83
|
+
async def generate_stride_analysis(self, repo_path: Any, category: str) -> Dict[str, Any]:
|
|
84
|
+
pass
|
|
85
|
+
|
|
86
|
+
@abstractmethod
|
|
87
|
+
async def test_connection(self) -> tuple[AIProviderStatus, str]:
|
|
88
|
+
"""Perform a minimal AI request to verify connectivity and quota."""
|
|
89
|
+
pass
|