codesecure-core 1.0.0b10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. codesecure/__init__.py +19 -0
  2. codesecure/ai_providers/__init__.py +0 -0
  3. codesecure/ai_providers/anthropic_provider.py +144 -0
  4. codesecure/ai_providers/azure_cli.py +428 -0
  5. codesecure/ai_providers/base.py +89 -0
  6. codesecure/ai_providers/google_cli.py +195 -0
  7. codesecure/ai_providers/guides.py +51 -0
  8. codesecure/ai_providers/kiro_cli.py +421 -0
  9. codesecure/ai_providers/manager.py +525 -0
  10. codesecure/ai_providers/openai_provider.py +143 -0
  11. codesecure/ai_providers/prompts.py +276 -0
  12. codesecure/common/__init__.py +64 -0
  13. codesecure/common/cloud_provider.py +208 -0
  14. codesecure/common/config.py +78 -0
  15. codesecure/common/config_manager.py +80 -0
  16. codesecure/common/logging.py +205 -0
  17. codesecure/common/models.py +247 -0
  18. codesecure/common/performance.py +111 -0
  19. codesecure/jobs/__init__.py +18 -0
  20. codesecure/jobs/manager.py +421 -0
  21. codesecure/reports/__init__.py +32 -0
  22. codesecure/reports/generator.py +202 -0
  23. codesecure/reports/html.py +766 -0
  24. codesecure/reports/json.py +148 -0
  25. codesecure/reports/markdown.py +293 -0
  26. codesecure/reports/sarif.py +229 -0
  27. codesecure/scanners/__init__.py +61 -0
  28. codesecure/scanners/bandit.py +165 -0
  29. codesecure/scanners/base.py +344 -0
  30. codesecure/scanners/checkov.py +198 -0
  31. codesecure/scanners/data_merger.py +270 -0
  32. codesecure/scanners/deduplication_engine.py +282 -0
  33. codesecure/scanners/detect_secrets.py +109 -0
  34. codesecure/scanners/engine.py +351 -0
  35. codesecure/scanners/framework_mapper.py +158 -0
  36. codesecure/scanners/grype.py +124 -0
  37. codesecure/scanners/npm_audit.py +134 -0
  38. codesecure/scanners/pip_audit.py +149 -0
  39. codesecure/scanners/pip_licenses.py +130 -0
  40. codesecure/scanners/semgrep.py +123 -0
  41. codesecure/scanners/syft.py +111 -0
  42. codesecure/security/__init__.py +45 -0
  43. codesecure/security/output_sanitizer.py +202 -0
  44. codesecure/security/security_validator.py +300 -0
  45. codesecure/security/subprocess_runner.py +366 -0
  46. codesecure/security/validators.py +300 -0
  47. codesecure/security_matrix/__init__.py +0 -0
  48. codesecure/security_matrix/file_filter.py +0 -0
  49. codesecure/security_matrix/neolifter.py +0 -0
  50. codesecure/security_validate/__init__.py +0 -0
  51. codesecure/security_validate/cdk.py +0 -0
  52. codesecure/security_validate/cloudformation.py +0 -0
  53. codesecure/security_validate/severity.py +0 -0
  54. codesecure/security_validate/terraform.py +0 -0
  55. codesecure/telemetry/__init__.py +3 -0
  56. codesecure/telemetry/client.py +90 -0
  57. codesecure/threat_model/__init__.py +0 -0
  58. codesecure/threat_model/action_plan.py +0 -0
  59. codesecure/threat_model/stride_engine.py +0 -0
  60. codesecure_core-1.0.0b10.dist-info/METADATA +101 -0
  61. codesecure_core-1.0.0b10.dist-info/RECORD +64 -0
  62. codesecure_core-1.0.0b10.dist-info/WHEEL +5 -0
  63. codesecure_core-1.0.0b10.dist-info/licenses/LICENSE +21 -0
  64. codesecure_core-1.0.0b10.dist-info/top_level.txt +1 -0
codesecure/__init__.py ADDED
@@ -0,0 +1,19 @@
1
+ """
2
+ CodeSecure - Enterprise Security Analysis Platform.
3
+ """
4
+
5
+ try:
6
+ from importlib.metadata import version, PackageNotFoundError
7
+ except ImportError:
8
+ # Fallback for Python < 3.8
9
+ try:
10
+ from importlib_metadata import version, PackageNotFoundError
11
+ except ImportError:
12
+ version = lambda _: "unknown"
13
+ PackageNotFoundError = Exception
14
+
15
+ try:
16
+ __version__ = version("codesecure-core")
17
+ except (PackageNotFoundError, NameError):
18
+ # Fallback for development where the package is not installed
19
+ __version__ = "1.0.0b9"
File without changes
@@ -0,0 +1,144 @@
1
+ import asyncio
2
+ from typing import List, Dict, Any, Optional
3
+
4
+ from codesecure.common.models import Finding, EnhancedFinding
5
+ from codesecure.common.logging import get_logger
6
+ from codesecure.ai_providers.base import (
7
+ BaseAIProvider,
8
+ AIProviderType,
9
+ AIProviderStatus,
10
+ )
11
+ from codesecure.ai_providers.prompts import GenericPromptBuilder, GenericMarkdownParser
12
+ from codesecure.common.config import get_env
13
+
14
+ logger = get_logger(__name__)
15
+
16
+
17
+ class AnthropicWrapper(BaseAIProvider):
18
+ """Anthropic Provider Implementation"""
19
+
20
+ PROVIDER_TYPE = AIProviderType.AWS # Currently aliased/planned for AWS Bedrock or direct Anthropic
21
+
22
+ def __init__(self):
23
+ self.prompt_builder = GenericPromptBuilder()
24
+ self.parser = GenericMarkdownParser()
25
+ self._api_key = get_env("CODESECURE_ANTHROPIC_API_KEY", "")
26
+ self._model = get_env("CODESECURE_ANTHROPIC_MODEL", "claude-3-5-sonnet-20241022")
27
+
28
+ def _get_client(self):
29
+ try:
30
+ import anthropic
31
+ return anthropic.AsyncAnthropic(api_key=self._api_key)
32
+ except ImportError:
33
+ raise RuntimeError("anthropic SDK not installed")
34
+
35
+ def set_api_key(self, api_key: str):
36
+ self._api_key = api_key
37
+
38
+ def set_model(self, model: str):
39
+ self._model = model
40
+
41
+ @property
42
+ def is_available(self) -> bool:
43
+ try:
44
+ import anthropic
45
+ return bool(self._api_key)
46
+ except ImportError:
47
+ return False
48
+
49
+ async def check_availability(self) -> tuple[AIProviderStatus, str]:
50
+ try:
51
+ import anthropic
52
+ except ImportError:
53
+ return AIProviderStatus.UNAVAILABLE, "Anthropic SDK not found (install with 'pip install anthropic')"
54
+
55
+ if not self._api_key:
56
+ return AIProviderStatus.UNAVAILABLE, "CODESECURE_ANTHROPIC_API_KEY is not set"
57
+
58
+ return AIProviderStatus.AVAILABLE, "Ready"
59
+
60
+ def _classify_error(self, err_msg: str, status_code: Optional[int] = None) -> str:
61
+ err_lower = err_msg.lower()
62
+ if status_code in (401, 403) or any(w in err_lower for w in ["invalid x-api-key", "authentication"]):
63
+ return "auth"
64
+ if status_code == 429 or any(w in err_lower for w in ["rate_limit", "too many requests"]):
65
+ return "rate_limit"
66
+ return "other"
67
+
68
+ async def test_connection(self) -> tuple[AIProviderStatus, str]:
69
+ status, msg = await self.check_availability()
70
+ if status != AIProviderStatus.AVAILABLE:
71
+ return status, msg
72
+
73
+ try:
74
+ client = self._get_client()
75
+ logger.info("Validating Anthropic API key with model %s...", self._model)
76
+ await client.messages.create(
77
+ model=self._model,
78
+ messages=[{"role": "user", "content": "Reply with 'ok'"}],
79
+ max_tokens=5,
80
+ )
81
+ return AIProviderStatus.AVAILABLE, "Ready"
82
+ except Exception as e:
83
+ err_type = self._classify_error(str(e), getattr(e, "status_code", None))
84
+ if err_type == "auth":
85
+ return AIProviderStatus.ERROR, f"Anthropic Auth Error: {e}"
86
+ if err_type == "rate_limit":
87
+ return AIProviderStatus.ERROR, f"Anthropic Rate Limit Error: {e}"
88
+ return AIProviderStatus.ERROR, f"Anthropic Connection Failed: {str(e)}"
89
+
90
+ async def analyze_findings_batch(
91
+ self,
92
+ findings: List[Finding],
93
+ code_contexts: Dict[str, str],
94
+ batch_size: int = 10,
95
+ app_context: Optional[Dict[str, Any]] = None,
96
+ is_workspace_scan: bool = False
97
+ ) -> List[EnhancedFinding]:
98
+
99
+ if not findings:
100
+ return []
101
+
102
+ client = self._get_client()
103
+ prompt = self.prompt_builder.build_batch_prompt(findings, app_context=app_context, is_workspace_scan=is_workspace_scan)
104
+
105
+ system_prompt = (
106
+ "You are CodeSecure, an expert AI security assistant. "
107
+ "Analyze security findings and provide remediation in Markdown format. "
108
+ "Follow the ZERO-CHATTER RULE: remediation code blocks must contain "
109
+ "ONLY pure source code, no explanations."
110
+ )
111
+
112
+ try:
113
+ logger.info("Sending batch of %d findings to Anthropic (%s)...", len(findings), self._model)
114
+
115
+ response = await client.messages.create(
116
+ model=self._model,
117
+ system=system_prompt,
118
+ messages=[
119
+ {"role": "user", "content": prompt}
120
+ ],
121
+ temperature=0.2,
122
+ max_tokens=4096,
123
+ )
124
+
125
+ text = response.content[0].text if response.content else ""
126
+ logger.debug("Received AI response length: %d characters", len(text))
127
+
128
+ return self.parser.parse(text, findings)
129
+
130
+ except Exception as e:
131
+ err_type = self._classify_error(str(e), getattr(e, "status_code", None))
132
+ if err_type == "rate_limit":
133
+ raise RuntimeError(f"Anthropic Rate Limit Exceeded: {e}")
134
+ logger.exception("Anthropic Analysis failed: %s", e)
135
+ raise e
136
+
137
+ def enhance_finding(self, finding: Finding, analysis: Any) -> EnhancedFinding:
138
+ pass
139
+
140
+ async def generate_stride_analysis(self, repo_path: Any, category: str) -> Dict[str, Any]:
141
+ return {"error": "Not implemented"}
142
+
143
+ def get_anthropic_wrapper() -> AnthropicWrapper:
144
+ return AnthropicWrapper()
@@ -0,0 +1,428 @@
1
+ import asyncio
2
+ import re
3
+ import shutil
4
+ import os
5
+ from typing import List, Dict, Any, Optional, AsyncIterator
6
+ from pathlib import Path
7
+
8
+ from codesecure.common.models import Finding, AzureEnhancedFinding
9
+ from codesecure.common.logging import get_logger
10
+ from codesecure.ai_providers.base import (
11
+ BaseAIProvider,
12
+ AIProviderType,
13
+ AIProviderStatus,
14
+ PromptBuilder,
15
+ CLIExecutor,
16
+ MarkdownParser
17
+ )
18
+ from codesecure.common.config import AI_BATCH_TIMEOUT
19
+
20
+ from codesecure.ai_providers.guides import get_guide
21
+
22
+ logger = get_logger(__name__)
23
+
24
+ # --- 1. Prompt Builder ---
25
+ class AzurePromptBuilder(PromptBuilder):
26
+ """Build structured prompts for Azure CLI"""
27
+
28
+ SYSTEM_CONTEXT_TEMPLATE = """You are an Azure security expert.
29
+
30
+ **Application context:**
31
+ - **Application name:** {app_name}
32
+ - **Cloud services detected:** {cloud_services}
33
+
34
+ Analyze the following security findings and provide enhanced details in Markdown format.
35
+ Strictly adhere to the **Critical Requirements** and the specific cloud guide below.
36
+
37
+ {enhancement_guide}
38
+
39
+ For each finding, provide the analysis in the exact structure specified. Failure to follow these requirements will result in invalid analysis."""
40
+
41
+ def build_batch_prompt(self, findings: List[Finding], app_context: Optional[Dict[str, Any]] = None) -> str:
42
+ """Build prompt for batch of findings"""
43
+
44
+ app_context = app_context or {}
45
+ app_name = app_context.get("name", "Unknown Application")
46
+ services = ", ".join(app_context.get("services", ["Azure Core Services"]))
47
+
48
+ # Select guide based on app context or default to azure
49
+ cloud_type = app_context.get("cloud_provider", "azure")
50
+
51
+ system_context = self.SYSTEM_CONTEXT_TEMPLATE.format(
52
+ app_name=app_name,
53
+ cloud_services=services,
54
+ enhancement_guide=get_guide(cloud_type)
55
+ )
56
+
57
+ prompt = f"{system_context}\n\n"
58
+ prompt += "## Security Findings to Analyze\n\n"
59
+
60
+ for i, finding in enumerate(findings, 1):
61
+ prompt += f"""### Finding {i}
62
+ - **Scanner**: {finding.scanner}
63
+ - **Severity**: {finding.severity}
64
+ - **Title**: {finding.title}
65
+ - **File**: {finding.file_path}
66
+ - **Lines**: {finding.line_start}-{finding.line_end}
67
+ - **Code**:
68
+ ```
69
+ {finding.code_snippet}
70
+ ```
71
+
72
+ """
73
+
74
+ prompt += self._get_response_template()
75
+ return prompt
76
+
77
+ def _get_response_template(self) -> str:
78
+ return """
79
+ ## Required Response Format (for each finding)
80
+
81
+ ### Finding N Analysis
82
+
83
+ #### Detailed Description
84
+ [Comprehensive explanation of the vulnerability]
85
+
86
+ #### Attack Scenario
87
+ [Step-by-step attack scenario showing how this could be exploited]
88
+
89
+ #### Business Impact
90
+ [Impact on business operations, data, reputation with realistic numeric estimates]
91
+
92
+ #### False Positive Analysis
93
+ - **Is False Positive**: [true|false]
94
+ - **Confidence**: [0.0-1.0]
95
+ - **Explanation**: [Required]
96
+
97
+ #### Code Snippets
98
+ - **Before**:
99
+ ```
100
+ [Original vulnerable code snippet]
101
+ ```
102
+ Lines: [start]-[end]
103
+
104
+ - **After**:
105
+ ```
106
+ [Fixed code snippet showing remediation]
107
+ ```
108
+
109
+ #### Azure Recommendation
110
+ [Azure-specific security recommendation aligned with Azure Security Benchmark]
111
+
112
+ #### Azure Security Benchmark Pillar
113
+ [Network Security|Identity Management|Privileged Access|Data Protection|Asset Management|Logging and Threat Detection|Incident Response|Posture and Vulnerability Management|Endpoint Security|Backup and Recovery]
114
+
115
+ #### Implementation Steps
116
+ 1. [Step 1]
117
+ 2. [Step 2]
118
+ 3. [Step 3]
119
+
120
+ #### Verification Test Cases
121
+ - **Test Case 1**: [Description] -> **Expected Output**: [Output]
122
+
123
+ #### Rollback Procedure
124
+ [Steps to rollback if remediation causes issues]
125
+
126
+ #### Azure Documentation
127
+ - [Link 1](https://learn.microsoft.com/azure/...)
128
+ - [Link 2](https://learn.microsoft.com/azure/...)
129
+ """
130
+
131
+ # --- 2. CLI Executor ---
132
+ class AzureCLIExecutor(CLIExecutor):
133
+ """Execute Azure OpenAI via 'az rest' - no extensions required"""
134
+
135
+ COMMAND = "az"
136
+ TIMEOUT = AI_BATCH_TIMEOUT
137
+ # Azure OpenAI API version
138
+ API_VERSION = "2024-08-01-preview"
139
+
140
+ def _get_azure_openai_config(self) -> tuple:
141
+ """Get Azure OpenAI endpoint and deployment from environment."""
142
+ endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT", "")
143
+ deployment = os.environ.get("AZURE_OPENAI_DEPLOYMENT", "gpt-4o")
144
+
145
+ if not endpoint:
146
+ raise RuntimeError(
147
+ "AZURE_OPENAI_ENDPOINT environment variable not set. "
148
+ "Set it to your Azure OpenAI resource endpoint, e.g.: "
149
+ "https://<your-resource>.openai.azure.com"
150
+ )
151
+
152
+ # Ensure endpoint doesn't have trailing slash
153
+ endpoint = endpoint.rstrip("/")
154
+ return endpoint, deployment
155
+
156
+ async def execute(self, prompt: str) -> str:
157
+ """Execute Azure OpenAI via 'az rest' and return markdown response"""
158
+ import json as _json
159
+
160
+ full_path = shutil.which(self.COMMAND)
161
+ if not full_path:
162
+ raise FileNotFoundError(f"CLI command '{self.COMMAND}' not found.")
163
+
164
+ endpoint, deployment = self._get_azure_openai_config()
165
+
166
+ # Build the Azure OpenAI Chat Completions REST URL
167
+ url = (
168
+ f"{endpoint}/openai/deployments/{deployment}"
169
+ f"/chat/completions?api-version={self.API_VERSION}"
170
+ )
171
+
172
+ # Build the request body
173
+ body = _json.dumps({
174
+ "messages": [
175
+ {"role": "system", "content": "You are an Azure security expert. Respond in Markdown format."},
176
+ {"role": "user", "content": prompt}
177
+ ],
178
+ "temperature": 0.3,
179
+ "max_tokens": 4096
180
+ })
181
+
182
+ # Use 'az rest' which handles authentication automatically
183
+ # SEC-006: shell=False enforced
184
+ if os.name == 'nt' and full_path.lower().endswith('.cmd'):
185
+ cmd = [
186
+ "cmd.exe", "/c", full_path,
187
+ "rest",
188
+ "--method", "POST",
189
+ "--url", url,
190
+ "--body", body,
191
+ "--resource", "https://cognitiveservices.azure.com",
192
+ ]
193
+ else:
194
+ cmd = [
195
+ full_path,
196
+ "rest",
197
+ "--method", "POST",
198
+ "--url", url,
199
+ "--body", body,
200
+ "--resource", "https://cognitiveservices.azure.com",
201
+ ]
202
+
203
+ env = os.environ.copy()
204
+
205
+ process = await asyncio.create_subprocess_exec(
206
+ *cmd,
207
+ stdin=asyncio.subprocess.PIPE,
208
+ stdout=asyncio.subprocess.PIPE,
209
+ stderr=asyncio.subprocess.PIPE,
210
+ env=env
211
+ )
212
+
213
+ try:
214
+ stdout, stderr = await asyncio.wait_for(
215
+ process.communicate(),
216
+ timeout=self.TIMEOUT
217
+ )
218
+
219
+ if process.returncode != 0:
220
+ error_msg = stderr.decode().strip()
221
+ if any(kw in error_msg.lower() for kw in ["429", "quota", "rate limit"]):
222
+ raise RuntimeError(f"Azure AI Quota Exceeded: {error_msg}")
223
+
224
+ logger.error("Azure CLI failed (Exit %d): %s", process.returncode, error_msg)
225
+ raise RuntimeError(f"Azure CLI Error: {error_msg}")
226
+
227
+ # Parse the JSON response and extract the assistant's message content
228
+ response_data = _json.loads(stdout.decode())
229
+ choices = response_data.get("choices", [])
230
+ if not choices:
231
+ raise RuntimeError("Azure OpenAI returned empty response")
232
+
233
+ content = choices[0].get("message", {}).get("content", "")
234
+ return self.clean_output(content)
235
+
236
+ except asyncio.TimeoutError:
237
+ logger.error("Azure CLI timed out after %d seconds", self.TIMEOUT)
238
+ try:
239
+ process.kill()
240
+ except:
241
+ pass
242
+ raise RuntimeError(f"Azure CLI Timed Out ({self.TIMEOUT}s)")
243
+ except RuntimeError:
244
+ raise
245
+ except Exception as e:
246
+ logger.exception("Azure CLI execution failed: %s", e)
247
+ raise e
248
+
249
+ # --- 3. Markdown Parser ---
250
+ class AzureMarkdownParser(MarkdownParser):
251
+ """Parse Azure CLI markdown response into AzureEnhancedFindings"""
252
+
253
+ FINDING_PATTERN = r"### \s*(?:Analysis\s+for\s+)?Finding\s*(\d+)(?::)?(?:\s*Analysis)?.*?(?:\n|$)"
254
+ SECTION_PATTERNS = {
255
+ "detailed_description": r"#### (?:Detailed\s+)?Description(?::)?\s*(.+?)(?=####|$)",
256
+ "attack_scenario": r"#### Attack Scenario(?::)?\s*(.+?)(?=####|$)",
257
+ "business_impact": r"#### Business Impact(?::)?\s*(.+?)(?=####|$)",
258
+ "remediation_code": r"#### (?:Remediation Code|Code Snippets).*?After\*\*:(?::)?\s*```(?:\w+)?[\s\n]+(.*?)```",
259
+ "code_after": r"#### (?:Remediation Code|Code Snippets).*?After\*\*:(?::)?\s*```(?:\w+)?[\s\n]+(.*?)```",
260
+ "azure_recommendation": r"#### Azure Recommendation(?::)?\s*(.+?)(?=####|$)",
261
+ "azure_benchmark_pillar": r"#### Azure Security Benchmark Pillar(?::)?\s*(.+?)(?=####|$)",
262
+ "rollback_procedure": r"#### Rollback Procedure(?::)?\s*(.+?)(?=####|$)",
263
+ "test_cases": r"#### Verification Test Cases(?::)?\s*(.+?)(?=####|$)",
264
+ }
265
+
266
+ def parse(self, markdown_text: str, original_findings: List[Finding]) -> List[AzureEnhancedFinding]:
267
+ """Parse markdown response and merge with original findings"""
268
+ if not markdown_text:
269
+ return []
270
+
271
+ finding_sections = re.split(self.FINDING_PATTERN, markdown_text)
272
+ enhanced = []
273
+
274
+ for i in range(1, len(finding_sections), 2):
275
+ try:
276
+ finding_idx = int(finding_sections[i]) - 1
277
+ section = finding_sections[i+1]
278
+
279
+ if finding_idx < 0 or finding_idx >= len(original_findings):
280
+ continue
281
+
282
+ original = original_findings[finding_idx]
283
+
284
+ detailed_description = self._extract("detailed_description", section)
285
+ if not detailed_description:
286
+ detailed_description = original.description
287
+
288
+ enhanced.append(AzureEnhancedFinding(
289
+ id=original.id,
290
+ scanner=original.scanner,
291
+ severity=original.severity,
292
+ title=original.title,
293
+ description=original.description,
294
+ file_path=original.file_path,
295
+ detailed_description=detailed_description,
296
+ attack_scenario=self._extract("attack_scenario", section),
297
+ business_impact=self._extract("business_impact", section),
298
+ exploitability="unknown",
299
+ code_snippet=original.code_snippet,
300
+ code_snippet_before=original.code_snippet,
301
+ code_snippet_after=self._extract("code_after", section),
302
+ line_start=original.line_start,
303
+ line_end=original.line_end,
304
+ remediation_code=self._extract("remediation_code", section),
305
+ implementation_steps=self._extract_list("Implementation Steps", section),
306
+ test_cases=self._extract_list_items("test_cases", section),
307
+ rollback_procedure=self._extract("rollback_procedure", section),
308
+ azure_recommendation=self._extract("azure_recommendation", section),
309
+ azure_security_benchmark_pillar=self._extract("azure_benchmark_pillar", section),
310
+ azure_doc_links=self._extract_links(section),
311
+ cwe_ids=original.cwe_ids,
312
+ owasp_category=original.owasp_category,
313
+ false_positive_confidence=self._extract_fp_confidence(section),
314
+ is_false_positive=self._extract_fp_status(section),
315
+ fp_explanation=self._extract_fp_explanation(section),
316
+ ))
317
+ except Exception as e:
318
+ logger.error(f"Error parsing Azure finding section: {e}")
319
+
320
+ return enhanced
321
+
322
+ def _extract(self, field: str, section: str) -> str:
323
+ pattern = self.SECTION_PATTERNS[field]
324
+ match = re.search(pattern, section, re.DOTALL | re.IGNORECASE)
325
+ return match.group(1).strip() if match else ""
326
+
327
+ def _extract_list(self, title: str, section: str) -> List[str]:
328
+ pattern = fr"#### {title}\s+(.+?)(?=####|$)"
329
+ match = re.search(pattern, section, re.DOTALL)
330
+ if not match: return []
331
+ return re.findall(r"^\d+\.\s+(.+)$", match.group(1), re.MULTILINE)
332
+
333
+ def _extract_list_items(self, field: str, section: str) -> List[str]:
334
+ content = self._extract(field, section)
335
+ if not content: return []
336
+ return re.findall(r"(?:-|\*|\d+\.)\s+(.+)$", content, re.MULTILINE)
337
+
338
+ def _extract_links(self, section: str) -> List[str]:
339
+ return re.findall(r'http[s]?://learn\.microsoft\.com/[^\s\)]+', section)
340
+
341
+ def _extract_fp_confidence(self, section: str) -> float:
342
+ match = re.search(r'\*\*Confidence\*\*:\s*([\d.]+)', section)
343
+ return float(match.group(1)) if match else 0.0
344
+
345
+ def _extract_fp_status(self, section: str) -> bool:
346
+ match = re.search(r'\*\*Is False Positive\*\*:\s*(true|false)', section, re.I)
347
+ return match.group(1).lower() == 'true' if match else False
348
+
349
+ def _extract_fp_explanation(self, section: str) -> str:
350
+ match = re.search(r'\*\*Explanation\*\*:\s*(.+)', section)
351
+ return match.group(1).strip() if match else ""
352
+
353
+ # --- 4. Wrapper ---
354
+ class AzureCLIWrapper(BaseAIProvider):
355
+ """Azure AI CLI Provider Implementation"""
356
+
357
+ PROVIDER_TYPE = AIProviderType.AZURE
358
+
359
+ def __init__(self):
360
+ self.prompt_builder = AzurePromptBuilder()
361
+ self.executor = AzureCLIExecutor()
362
+ self.parser = AzureMarkdownParser()
363
+ self._is_available = shutil.which("az") is not None
364
+
365
+ @property
366
+ def is_available(self) -> bool:
367
+ return self._is_available
368
+
369
+ async def check_availability(self) -> tuple[AIProviderStatus, str]:
370
+ if not self._is_available:
371
+ return AIProviderStatus.UNAVAILABLE, "Azure CLI not found (install from https://aka.ms/installazurecliwindows)"
372
+
373
+ # Check for AI extension or similar
374
+ try:
375
+ process = await asyncio.create_subprocess_exec(
376
+ "az", "--version",
377
+ stdout=asyncio.subprocess.PIPE,
378
+ stderr=asyncio.subprocess.PIPE
379
+ )
380
+ stdout, stderr = await process.communicate()
381
+ if process.returncode != 0:
382
+ return AIProviderStatus.UNAVAILABLE, "Azure CLI is installed but not functioning correctly"
383
+ except Exception as e:
384
+ return AIProviderStatus.UNAVAILABLE, f"Azure CLI check failed: {str(e)}"
385
+
386
+ return AIProviderStatus.AVAILABLE, "Ready"
387
+
388
+ async def analyze_findings_batch(
389
+ self,
390
+ findings: List[Finding],
391
+ code_contexts: Dict[str, str],
392
+ batch_size: int = 5,
393
+ app_context: Optional[Dict[str, Any]] = None
394
+ ) -> List[AzureEnhancedFinding]:
395
+ if not findings:
396
+ return []
397
+
398
+ prompt = self.prompt_builder.build_batch_prompt(findings, app_context=app_context)
399
+
400
+ try:
401
+ markdown_response = await self.executor.execute(prompt)
402
+ return self.parser.parse(markdown_response, findings)
403
+ except Exception as e:
404
+ logger.exception("Azure analysis failed for batch")
405
+ raise e
406
+
407
+ def enhance_finding(self, finding: Finding, analysis: Any) -> AzureEnhancedFinding:
408
+ pass
409
+
410
+ async def generate_stride_analysis(self, repo_path: Any, category: str) -> Dict[str, Any]:
411
+ return {"error": "Not implemented"}
412
+
413
+ async def test_connection(self) -> tuple[AIProviderStatus, str]:
414
+ """Perform a minimal AI request to verify connectivity and quota."""
415
+ try:
416
+ # Minimal prompt to check connectivity and quota
417
+ await self.executor.execute("hi")
418
+ return AIProviderStatus.AVAILABLE, "Ready"
419
+ except RuntimeError as e:
420
+ err_msg = str(e)
421
+ if any(kw in err_msg.lower() for kw in ["429", "quota", "rate limit", "exhausted"]):
422
+ return AIProviderStatus.ERROR, "AI Analysis Failed: Quota Exceeded (429)"
423
+ return AIProviderStatus.ERROR, f"AI Connection Failed: {err_msg}"
424
+ except Exception as e:
425
+ return AIProviderStatus.ERROR, f"AI Connection Failed: {str(e)}"
426
+
427
+ def get_azure_wrapper() -> AzureCLIWrapper:
428
+ return AzureCLIWrapper()
@@ -0,0 +1,89 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import List, Any, Optional, AsyncIterator, Dict
3
+ from dataclasses import dataclass
4
+ from enum import Enum
5
+
6
+ from codesecure.common.models import Finding, EnhancedFinding
7
+ from codesecure.common.logging import get_logger
8
+
9
+ logger = get_logger(__name__)
10
+
11
+ class AIProviderType(str, Enum):
12
+ GOOGLE = "google"
13
+ AWS = "aws"
14
+ AZURE = "azure"
15
+ OPENAI = "openai"
16
+
17
+ class AIProviderStatus(str, Enum):
18
+ AVAILABLE = "available"
19
+ UNAVAILABLE = "unavailable"
20
+ ERROR = "error"
21
+
22
+ class PromptBuilder(ABC):
23
+ """Abstract base class for building AI prompts."""
24
+ @abstractmethod
25
+ def build_batch_prompt(self, findings: List[Finding], app_context: Optional[Dict[str, Any]] = None) -> str:
26
+ pass
27
+
28
+ class CLIExecutor(ABC):
29
+ """Abstract base class for executing CLI commands."""
30
+ @abstractmethod
31
+ async def execute(self, prompt: str) -> str:
32
+ """Execute the CLI command with the given prompt and return output."""
33
+ pass
34
+
35
+ @staticmethod
36
+ def clean_output(text: str) -> str:
37
+ """Remove ANSI escape sequences and SGR codes from text."""
38
+ import re
39
+ ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
40
+ return ansi_escape.sub('', text)
41
+
42
+ class MarkdownParser(ABC):
43
+ """Abstract base class for parsing markdown responses."""
44
+ @abstractmethod
45
+ def parse(self, markdown: str, original_findings: List[Finding]) -> List[EnhancedFinding]:
46
+ pass
47
+
48
+ @dataclass
49
+ class AIAnalysisResult:
50
+ """Raw result from AI analysis"""
51
+ content: str
52
+ metadata: Dict[str, Any]
53
+
54
+ class BaseAIProvider(ABC):
55
+ """Base class for AI providers integration."""
56
+
57
+ PROVIDER_TYPE: AIProviderType
58
+
59
+ @property
60
+ @abstractmethod
61
+ def is_available(self) -> bool:
62
+ pass
63
+
64
+ @abstractmethod
65
+ async def check_availability(self) -> tuple[AIProviderStatus, str]:
66
+ pass
67
+
68
+ @abstractmethod
69
+ async def analyze_findings_batch(
70
+ self,
71
+ findings: List[Finding],
72
+ code_contexts: Dict[str, str],
73
+ batch_size: int = 5,
74
+ app_context: Optional[Dict[str, Any]] = None
75
+ ) -> List[Any]: # Returns raw analysis objects/dicts
76
+ pass
77
+
78
+ @abstractmethod
79
+ def enhance_finding(self, finding: Finding, analysis: Any) -> EnhancedFinding:
80
+ pass
81
+
82
+ @abstractmethod
83
+ async def generate_stride_analysis(self, repo_path: Any, category: str) -> Dict[str, Any]:
84
+ pass
85
+
86
+ @abstractmethod
87
+ async def test_connection(self) -> tuple[AIProviderStatus, str]:
88
+ """Perform a minimal AI request to verify connectivity and quota."""
89
+ pass