ship-safe 4.3.0 → 5.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,261 @@
1
+ /**
2
+ * Agentic Security Agent
3
+ * ========================
4
+ *
5
+ * Detects security vulnerabilities in AI agent implementations.
6
+ * Covers the OWASP Top 10 for Agentic Applications (2026):
7
+ * ASI01 — Agent Goal Hijacking
8
+ * ASI02 — Tool Misuse
9
+ * ASI03 — Identity & Privilege Abuse
10
+ * ASI04 — Memory Poisoning
11
+ * ASI05 — Cascading Hallucination
12
+ * ASI06 — Supply Chain Vulnerabilities
13
+ *
14
+ * 48% of cybersecurity professionals identify agentic AI as
15
+ * the top attack vector for 2026.
16
+ */
17
+
18
+ import path from 'path';
19
+ import { BaseAgent } from './base-agent.js';
20
+
21
+ // =============================================================================
22
+ // AGENTIC SECURITY PATTERNS
23
+ // =============================================================================
24
+
25
+ const PATTERNS = [
26
+ // ── Goal Hijacking (ASI01) ───────────────────────────────────────────────
27
+ {
28
+ rule: 'AGENT_USER_INPUT_IN_SYSTEM_PROMPT',
29
+ title: 'Agent: User Input in System Prompt / Goal',
30
+ regex: /(?:system|instructions|goal|objective|persona)\s*[:=]\s*(?:`[^`]*\$\{|.*\+\s*(?:req\.|request\.|user|input|message|query|body))/g,
31
+ severity: 'critical',
32
+ cwe: 'CWE-74',
33
+ owasp: 'A03:2021',
34
+ description: 'User input concatenated into agent system prompt or goal definition. Enables agent goal hijacking — the attacker can rewrite the agent\'s objectives.',
35
+ fix: 'Separate system instructions from user input. Use structured message roles (system vs user). Never interpolate user input into system prompts.',
36
+ },
37
+ {
38
+ rule: 'AGENT_NO_GOAL_BOUNDARY',
39
+ title: 'Agent: Missing Goal Boundary Enforcement',
40
+ regex: /(?:agent|assistant|bot)[\s\S]{0,200}(?:system|instructions)\s*[:=]\s*(?:req\.|request\.|input|body|query|params)/g,
41
+ severity: 'critical',
42
+ cwe: 'CWE-284',
43
+ owasp: 'A01:2021',
44
+ description: 'Agent goal or system instructions set directly from external input without boundary enforcement.',
45
+ fix: 'Hardcode agent goals. If customization is needed, validate against an allowlist of approved goal templates.',
46
+ },
47
+
48
+ // ── Tool Misuse (ASI02) ──────────────────────────────────────────────────
49
+ {
50
+ rule: 'AGENT_UNRESTRICTED_TOOLS',
51
+ title: 'Agent: Unrestricted Tool Access',
52
+ regex: /(?:tools|actions|capabilities|functions)\s*[:=]\s*(?:\[\s*\.{3}|"all"|'all'|"\*"|'\*'|Object\.keys|getAll|listAll)/g,
53
+ severity: 'critical',
54
+ cwe: 'CWE-269',
55
+ owasp: 'A01:2021',
56
+ description: 'Agent given wildcard or unbounded tool access. Prompt injection can trigger any available tool.',
57
+ fix: 'Restrict agent tools to minimum required set. Use explicit allowlists, not wildcard access.',
58
+ },
59
+ {
60
+ rule: 'AGENT_TOOL_NO_CONFIRMATION',
61
+ title: 'Agent: Destructive Tools Without Human Confirmation',
62
+ regex: /(?:auto_approve|auto_execute|requireConfirmation\s*[:=]\s*false|confirm\s*[:=]\s*false|human_in_loop\s*[:=]\s*false|humanInTheLoop\s*[:=]\s*false|approval\s*[:=]\s*false)/gi,
63
+ severity: 'high',
64
+ cwe: 'CWE-862',
65
+ owasp: 'A01:2021',
66
+ description: 'Agent configured to auto-execute tools without human confirmation. Prompt injection can trigger destructive actions.',
67
+ fix: 'Require human-in-the-loop confirmation for destructive operations (write, delete, send, pay).',
68
+ },
69
+ {
70
+ rule: 'AGENT_TOOL_SHELL_ACCESS',
71
+ title: 'Agent: Tool With Shell/Command Execution',
72
+ regex: /(?:tools|functions)[\s\S]{0,500}(?:exec\s*\(|execSync|spawn|child_process|subprocess|os\.system|shell\s*[:=]\s*true)/g,
73
+ severity: 'critical',
74
+ cwe: 'CWE-78',
75
+ owasp: 'A03:2021',
76
+ description: 'Agent has access to a tool that executes shell commands. Prompt injection achieves RCE.',
77
+ fix: 'Remove shell execution tools from agent capabilities. If needed, use strict command allowlists.',
78
+ },
79
+ {
80
+ rule: 'AGENT_UNVALIDATED_TOOL_OUTPUT',
81
+ title: 'Agent: Tool Output Used Without Validation',
82
+ regex: /(?:tool_result|toolResult|function_result|tool_output)[\s\S]{0,200}(?:eval\s*\(|exec\s*\(|innerHTML|dangerouslySetInnerHTML|\.query\s*\(|\.execute\s*\()/g,
83
+ severity: 'critical',
84
+ cwe: 'CWE-94',
85
+ owasp: 'A03:2021',
86
+ description: 'Tool output passed directly to dangerous sinks (eval, SQL, HTML). Poisoned tool results can achieve code execution.',
87
+ fix: 'Validate and sanitize all tool outputs before using them in code execution, SQL queries, or HTML rendering.',
88
+ },
89
+
90
+ // ── Identity & Privilege Abuse (ASI03) ───────────────────────────────────
91
+ {
92
+ rule: 'AGENT_ESCALATED_PERMISSIONS',
93
+ title: 'Agent: Runs With Elevated Permissions',
94
+ regex: /(?:agent|bot|assistant)[\s\S]{0,300}(?:admin|sudo|root|superuser|service.?role|elevated|full.?access|all.?permissions)/gi,
95
+ severity: 'high',
96
+ cwe: 'CWE-269',
97
+ owasp: 'A04:2021',
98
+ confidence: 'medium',
99
+ description: 'Agent configured with elevated permissions (admin, root, service-role). Prompt injection inherits these privileges.',
100
+ fix: 'Apply principle of least privilege. Agents should have minimal permissions required for their specific task.',
101
+ },
102
+ {
103
+ rule: 'AGENT_CREDENTIAL_FORWARDING',
104
+ title: 'Agent: Credentials Passed Between Tools',
105
+ regex: /(?:tool|function|action)[\s\S]{0,300}(?:credential|password|secret|token|apiKey|api_key)[\s\S]{0,100}(?:forward|pass|send|share|propagate|next)/gi,
106
+ severity: 'high',
107
+ cwe: 'CWE-522',
108
+ owasp: 'A07:2021',
109
+ confidence: 'medium',
110
+ description: 'Agent forwards credentials between tools or to external services. Compromised tools can steal credentials.',
111
+ fix: 'Scope credentials per-tool. Never forward authentication tokens between tool invocations.',
112
+ },
113
+
114
+ // ── Memory Poisoning (ASI04) ─────────────────────────────────────────────
115
+ {
116
+ rule: 'AGENT_MEMORY_USER_WRITE',
117
+ title: 'Agent: User Input Written to Persistent Memory',
118
+ regex: /(?:memory|context|history|state|knowledge)[\s\S]{0,100}(?:\.append|\.push|\.add|\.set|\.save|\.store|\.write|\.update)\s*\(\s*(?:user|input|message|query|req\.|request\.)/g,
119
+ severity: 'high',
120
+ cwe: 'CWE-472',
121
+ owasp: 'A03:2021',
122
+ description: 'User-controlled content written directly to agent persistent memory. Enables memory poisoning — attacker instructions persist across sessions.',
123
+ fix: 'Sanitize and validate content before writing to agent memory. Separate user messages from system state.',
124
+ },
125
+ {
126
+ rule: 'AGENT_MEMORY_NO_EXPIRY',
127
+ title: 'Agent: Persistent Memory Without Expiration',
128
+ regex: /(?:memory|longTermMemory|persistentState)[\s\S]{0,200}(?:save|store|persist|write)(?![\s\S]{0,200}(?:ttl|expir|maxAge|retention|cleanup|prune))/g,
129
+ severity: 'medium',
130
+ cwe: 'CWE-404',
131
+ owasp: 'A04:2021',
132
+ confidence: 'low',
133
+ description: 'Agent memory persists without expiration policy. Poisoned memories remain indefinitely.',
134
+ fix: 'Set TTL or retention policies on agent memory. Implement periodic cleanup of stale entries.',
135
+ },
136
+
137
+ // ── Unbounded Execution ──────────────────────────────────────────────────
138
+ {
139
+ rule: 'AGENT_NO_ITERATION_LIMIT',
140
+ title: 'Agent: Execution Loop Without Iteration Limit',
141
+ regex: /(?:while\s*\(\s*true|for\s*\(\s*;\s*;\s*\)|loop\s*\{)[\s\S]{0,500}(?:agent|llm|completion|chat|generate|invoke)/g,
142
+ severity: 'high',
143
+ cwe: 'CWE-835',
144
+ owasp: 'A04:2021',
145
+ description: 'Agent runs in an unbounded loop without iteration limits. Enables denial of wallet and runaway costs.',
146
+ fix: 'Set maxIterations or maxSteps limit on agent execution loops. Add timeout enforcement.',
147
+ },
148
+ {
149
+ rule: 'AGENT_NO_TIMEOUT',
150
+ title: 'Agent: No Timeout on Execution',
151
+ regex: /(?:agent|AgentExecutor|runAgent|createAgent)\s*\(\s*\{(?:(?!timeout|maxTime|deadline|abort|signal).)*\}\s*\)/gs,
152
+ severity: 'medium',
153
+ cwe: 'CWE-400',
154
+ owasp: 'A04:2021',
155
+ confidence: 'low',
156
+ description: 'Agent execution without timeout configuration. Runaway agents can consume unlimited resources.',
157
+ fix: 'Set explicit timeout on agent execution. Use AbortController or equivalent mechanism.',
158
+ },
159
+ {
160
+ rule: 'AGENT_NO_COST_LIMIT',
161
+ title: 'Agent: No Spending/Token Limit',
162
+ regex: /(?:agent|completion|chat)[\s\S]{0,300}(?:model|engine)\s*[:=](?![\s\S]{0,300}(?:max_tokens|maxTokens|budget|cost|limit|cap))/g,
163
+ severity: 'medium',
164
+ cwe: 'CWE-770',
165
+ owasp: 'A04:2021',
166
+ confidence: 'low',
167
+ description: 'Agent makes LLM calls without token or cost limits. Enables denial of wallet attacks.',
168
+ fix: 'Set max_tokens on all LLM calls. Implement per-session cost budgets.',
169
+ },
170
+
171
+ // ── Multi-Agent Risks ────────────────────────────────────────────────────
172
+ {
173
+ rule: 'AGENT_RECURSIVE_INVOCATION',
174
+ title: 'Agent: Recursive Self-Invocation',
175
+ regex: /(?:agent|assistant)[\s\S]{0,200}(?:call|invoke|run|execute)[\s\S]{0,100}(?:self|this|agent|itself)/g,
176
+ severity: 'high',
177
+ cwe: 'CWE-674',
178
+ owasp: 'A04:2021',
179
+ confidence: 'medium',
180
+ description: 'Agent can recursively invoke itself or spawn sub-agents without depth limits. Enables infinite loops.',
181
+ fix: 'Set max recursion depth for agent self-invocation. Track and limit sub-agent spawn depth.',
182
+ },
183
+ {
184
+ rule: 'AGENT_CHAIN_NO_ISOLATION',
185
+ title: 'Agent: Multi-Agent Chain Without Privilege Isolation',
186
+ regex: /(?:pipe|chain|sequence|workflow)[\s\S]{0,300}(?:agent|step|task)[\s\S]{0,200}(?:agent|step|task)(?![\s\S]{0,200}(?:permission|scope|restrict|isolat))/g,
187
+ severity: 'medium',
188
+ cwe: 'CWE-269',
189
+ owasp: 'A04:2021',
190
+ confidence: 'low',
191
+ description: 'Multi-agent pipeline without privilege isolation between steps. A compromised agent can escalate through the chain.',
192
+ fix: 'Apply privilege isolation between agents in a chain. Each agent should have scoped permissions.',
193
+ },
194
+
195
+ // ── Output Safety ────────────────────────────────────────────────────────
196
+ {
197
+ rule: 'AGENT_OUTPUT_TO_ACTION',
198
+ title: 'Agent: LLM Output Directly Triggers Actions',
199
+ regex: /(?:completion|response|output|result|generated)[\s\S]{0,100}(?:\.execute|\.run|\.send|\.post|\.delete|\.pay|\.transfer|\.deploy)/g,
200
+ severity: 'high',
201
+ cwe: 'CWE-862',
202
+ owasp: 'A01:2021',
203
+ confidence: 'medium',
204
+ description: 'LLM output directly triggers side-effect actions without validation. Hallucinated or injected outputs can cause unintended actions.',
205
+ fix: 'Validate LLM output against expected schemas before executing side effects. Add human confirmation for irreversible actions.',
206
+ },
207
+ {
208
+ rule: 'AGENT_NO_OUTPUT_SCHEMA',
209
+ title: 'Agent: No Schema Validation on LLM Output',
210
+ regex: /(?:JSON\.parse|json\.loads)\s*\(\s*(?:completion|response|output|result|generated|llm|ai|gpt|claude)(?![\s\S]{0,200}(?:schema|validate|zod|yup|joi|ajv|parse|safeParse|type_adapter))/g,
211
+ severity: 'medium',
212
+ cwe: 'CWE-20',
213
+ owasp: 'A03:2021',
214
+ description: 'LLM JSON output parsed without schema validation. Malformed or malicious output can cause unexpected behavior.',
215
+ fix: 'Validate LLM structured output against a schema (Zod, Joi, Pydantic) before processing.',
216
+ },
217
+
218
+ // ── Audit & Observability ────────────────────────────────────────────────
219
+ {
220
+ rule: 'AGENT_NO_AUDIT_LOG',
221
+ title: 'Agent: Tool Invocations Not Logged',
222
+ regex: /(?:tool_call|function_call|executeTool|callTool|tool\.run)[\s\S]{0,300}(?![\s\S]{0,300}(?:log|audit|record|track|monitor|trace|emit|publish))/g,
223
+ severity: 'medium',
224
+ cwe: 'CWE-778',
225
+ owasp: 'A09:2021',
226
+ confidence: 'low',
227
+ description: 'Agent tool invocations are not being logged or audited. Makes incident response and forensics impossible.',
228
+ fix: 'Log all tool invocations including: tool name, arguments, caller identity, timestamp, and result status.',
229
+ },
230
+ ];
231
+
232
+ // =============================================================================
233
+ // AGENTIC SECURITY AGENT
234
+ // =============================================================================
235
+
236
+ export class AgenticSecurityAgent extends BaseAgent {
237
+ constructor() {
238
+ super(
239
+ 'AgenticSecurityAgent',
240
+ 'Detect AI agent security vulnerabilities — goal hijacking, tool misuse, memory poisoning, unbounded execution',
241
+ 'llm'
242
+ );
243
+ }
244
+
245
+ async analyze(context) {
246
+ const { files } = context;
247
+
248
+ const codeFiles = files.filter(f => {
249
+ const ext = path.extname(f).toLowerCase();
250
+ return ['.js', '.jsx', '.ts', '.tsx', '.mjs', '.cjs', '.py', '.rb', '.go'].includes(ext);
251
+ });
252
+
253
+ let findings = [];
254
+ for (const file of codeFiles) {
255
+ findings = findings.concat(this.scanFileWithPatterns(file, PATTERNS));
256
+ }
257
+ return findings;
258
+ }
259
+ }
260
+
261
+ export default AgenticSecurityAgent;
@@ -18,7 +18,7 @@
18
18
  import fs from 'fs';
19
19
  import path from 'path';
20
20
  import fg from 'fast-glob';
21
- import { SKIP_DIRS, SKIP_EXTENSIONS, MAX_FILE_SIZE, loadGitignorePatterns } from '../utils/patterns.js';
21
+ import { SKIP_DIRS, SKIP_EXTENSIONS, SKIP_FILENAMES, MAX_FILE_SIZE, loadGitignorePatterns } from '../utils/patterns.js';
22
22
 
23
23
  // =============================================================================
24
24
  // FINDING FACTORY
@@ -86,6 +86,15 @@ export class BaseAgent {
86
86
  throw new Error(`${this.name}.analyze() not implemented`);
87
87
  }
88
88
 
89
+ /**
90
+ * Whether this agent should run given the recon results.
91
+ * Override in subclasses to skip irrelevant scans.
92
+ * Default: always run.
93
+ */
94
+ shouldRun(recon) {
95
+ return true;
96
+ }
97
+
89
98
  // ── Helpers available to all agents ─────────────────────────────────────────
90
99
 
91
100
  /**
@@ -122,6 +131,7 @@ export class BaseAgent {
122
131
  const ext = path.extname(file).toLowerCase();
123
132
  if (SKIP_EXTENSIONS.has(ext)) return false;
124
133
  const basename = path.basename(file);
134
+ if (SKIP_FILENAMES.has(basename)) return false;
125
135
  if (basename.endsWith('.min.js') || basename.endsWith('.min.css')) return false;
126
136
  try {
127
137
  const stats = fs.statSync(file);
@@ -0,0 +1,333 @@
1
+ /**
2
+ * DeepAnalyzer — LLM-Powered Taint Analysis
3
+ * ============================================
4
+ *
5
+ * Takes critical/high findings nominated by regex scan and sends them
6
+ * to an LLM for deeper analysis: taint reachability, sanitization
7
+ * verification, and exploitability assessment.
8
+ *
9
+ * Supports:
10
+ * - Anthropic API (ANTHROPIC_API_KEY)
11
+ * - OpenAI API (OPENAI_API_KEY)
12
+ * - Google Gemini (GOOGLE_API_KEY)
13
+ * - Ollama local models (--local flag)
14
+ *
15
+ * USAGE:
16
+ * const analyzer = new DeepAnalyzer({ provider, budgetCents: 50 });
17
+ * const enrichedFindings = await analyzer.analyze(findings, context);
18
+ */
19
+
20
+ import fs from 'fs';
21
+ import path from 'path';
22
+ import { createProvider, autoDetectProvider } from '../providers/llm-provider.js';
23
+
24
+ // =============================================================================
25
+ // CONSTANTS
26
+ // =============================================================================
27
+
28
+ /** Max file content to send per finding (tokens are expensive) */
29
+ const MAX_FILE_CHARS = 4000;
30
+
31
+ /** Max findings to analyze per run (cost control) */
32
+ const MAX_FINDINGS = 30;
33
+
34
+ /** Approximate cost per 1K input tokens (Haiku pricing) */
35
+ const COST_PER_1K_INPUT = 0.08; // cents
36
+ const COST_PER_1K_OUTPUT = 0.4; // cents
37
+
38
+ /** Estimated tokens per finding analysis */
39
+ const EST_INPUT_TOKENS_PER_FINDING = 1500;
40
+ const EST_OUTPUT_TOKENS_PER_FINDING = 300;
41
+
42
+ // =============================================================================
43
+ // SYSTEM PROMPT
44
+ // =============================================================================
45
+
46
+ const SYSTEM_PROMPT = `You are a security code auditor performing taint analysis. For each finding, determine:
47
+
48
+ 1. **Tainted**: Is the flagged value controllable by an external user (via HTTP request, file upload, CLI args, env vars, database read, etc.)?
49
+ 2. **Sanitized**: Is there sanitization, validation, or encoding between the source and sink that neutralizes the risk?
50
+ 3. **Exploitability**: Rate as "confirmed", "likely", "unlikely", or "false_positive".
51
+ 4. **Reasoning**: One sentence explaining your verdict.
52
+
53
+ Respond with a JSON array ONLY. No markdown, no explanation outside JSON.
54
+
55
+ [{
56
+ "findingId": "<id>",
57
+ "tainted": true|false,
58
+ "sanitized": true|false,
59
+ "exploitability": "confirmed"|"likely"|"unlikely"|"false_positive",
60
+ "reasoning": "<one sentence>"
61
+ }]
62
+
63
+ Rules:
64
+ - If the value is a hardcoded string literal with no user input path, it is NOT tainted.
65
+ - If there is a validation library (zod, joi, yup, ajv) or sanitization function between input and sink, mark sanitized=true.
66
+ - If the code is in a test file, example, or documentation, mark as false_positive.
67
+ - If you cannot determine taint flow from the provided context, mark exploitability as "unlikely" rather than guessing.
68
+ - Be conservative: only mark "confirmed" when there is a clear, unsanitized path from user input to dangerous sink.`;
69
+
70
+ // =============================================================================
71
+ // DEEP ANALYZER
72
+ // =============================================================================
73
+
74
+ export class DeepAnalyzer {
75
+ /**
76
+ * @param {object} options
77
+ * @param {object} options.provider — LLM provider instance (from createProvider)
78
+ * @param {number} options.budgetCents — Max spend in cents (default: 50)
79
+ * @param {boolean} options.verbose — Log analysis progress
80
+ */
81
+ constructor(options = {}) {
82
+ this.provider = options.provider || null;
83
+ this.budgetCents = options.budgetCents ?? 50;
84
+ this.verbose = options.verbose || false;
85
+ this.spentCents = 0;
86
+ this.analyzedCount = 0;
87
+ }
88
+
89
+ /**
90
+ * Create a DeepAnalyzer with auto-detected provider.
91
+ * Returns null if no provider is available.
92
+ */
93
+ static create(rootPath, options = {}) {
94
+ // --local flag: use Ollama
95
+ if (options.local) {
96
+ const provider = createProvider('ollama', null, {
97
+ model: options.model || 'llama3.2',
98
+ baseUrl: options.ollamaUrl || 'http://localhost:11434/api/chat',
99
+ });
100
+ return new DeepAnalyzer({ provider, ...options });
101
+ }
102
+
103
+ // Auto-detect from env
104
+ const provider = autoDetectProvider(rootPath);
105
+ if (!provider) return null;
106
+
107
+ return new DeepAnalyzer({ provider, ...options });
108
+ }
109
+
110
+ /**
111
+ * Analyze findings with LLM-powered taint analysis.
112
+ * Only processes critical/high findings to optimize cost.
113
+ *
114
+ * @param {object[]} findings — All findings from agents
115
+ * @param {object} context — { rootPath, recon }
116
+ * @returns {Promise<object[]>} — Findings with deepAnalysis attached
117
+ */
118
+ async analyze(findings, context = {}) {
119
+ if (!this.provider) return findings;
120
+
121
+ // Filter to critical/high only
122
+ const candidates = findings.filter(
123
+ f => f.severity === 'critical' || f.severity === 'high'
124
+ );
125
+
126
+ if (candidates.length === 0) return findings;
127
+
128
+ // Cap at MAX_FINDINGS
129
+ const toAnalyze = candidates.slice(0, MAX_FINDINGS);
130
+
131
+ // Check budget before starting
132
+ const estimatedCost = this._estimateCost(toAnalyze.length);
133
+ if (estimatedCost > this.budgetCents) {
134
+ const affordable = Math.floor(
135
+ this.budgetCents / (estimatedCost / toAnalyze.length)
136
+ );
137
+ toAnalyze.length = Math.max(1, affordable);
138
+ }
139
+
140
+ // Batch findings (5 per request to balance cost vs. context)
141
+ const batchSize = 5;
142
+ const results = new Map();
143
+
144
+ for (let i = 0; i < toAnalyze.length; i += batchSize) {
145
+ // Budget check before each batch
146
+ if (this.spentCents >= this.budgetCents) {
147
+ if (this.verbose) {
148
+ console.log(` Deep analysis: budget exhausted (${this.spentCents}c / ${this.budgetCents}c)`);
149
+ }
150
+ break;
151
+ }
152
+
153
+ const batch = toAnalyze.slice(i, i + batchSize);
154
+ const prompt = this._buildPrompt(batch, context);
155
+
156
+ try {
157
+ const response = await this.provider.complete(
158
+ SYSTEM_PROMPT,
159
+ prompt,
160
+ { maxTokens: 1500 }
161
+ );
162
+
163
+ // Track cost
164
+ const inputTokens = Math.ceil(prompt.length / 4);
165
+ const outputTokens = Math.ceil(response.length / 4);
166
+ this.spentCents += (inputTokens / 1000) * COST_PER_1K_INPUT
167
+ + (outputTokens / 1000) * COST_PER_1K_OUTPUT;
168
+
169
+ // Parse response
170
+ const analyses = this._parseResponse(response);
171
+ for (const analysis of analyses) {
172
+ results.set(analysis.findingId, analysis);
173
+ }
174
+
175
+ this.analyzedCount += batch.length;
176
+ } catch (err) {
177
+ if (this.verbose) {
178
+ console.log(` Deep analysis batch failed: ${err.message}`);
179
+ }
180
+ // Continue with remaining batches
181
+ }
182
+ }
183
+
184
+ // Attach deep analysis to findings
185
+ for (const finding of findings) {
186
+ const id = this._findingId(finding);
187
+ const analysis = results.get(id);
188
+
189
+ if (analysis) {
190
+ finding.deepAnalysis = {
191
+ tainted: analysis.tainted,
192
+ sanitized: analysis.sanitized,
193
+ exploitability: analysis.exploitability,
194
+ reasoning: analysis.reasoning,
195
+ };
196
+
197
+ // Adjust confidence based on deep analysis
198
+ if (analysis.exploitability === 'false_positive') {
199
+ finding.confidence = 'low';
200
+ } else if (analysis.exploitability === 'unlikely') {
201
+ if (finding.confidence === 'high') finding.confidence = 'medium';
202
+ } else if (analysis.exploitability === 'confirmed') {
203
+ finding.confidence = 'high';
204
+ }
205
+ }
206
+ }
207
+
208
+ return findings;
209
+ }
210
+
211
+ /**
212
+ * Build the analysis prompt for a batch of findings.
213
+ */
214
+ _buildPrompt(findings, context) {
215
+ const items = findings.map(f => {
216
+ const id = this._findingId(f);
217
+ const fileContent = this._getFileContext(f);
218
+
219
+ return {
220
+ findingId: id,
221
+ rule: f.rule,
222
+ severity: f.severity,
223
+ title: f.title,
224
+ description: f.description,
225
+ file: f.file ? path.basename(f.file) : 'unknown',
226
+ line: f.line,
227
+ matched: (f.matched || '').slice(0, 200),
228
+ codeContext: fileContent,
229
+ };
230
+ });
231
+
232
+ // Add project context if available
233
+ let projectContext = '';
234
+ if (context.recon) {
235
+ const r = context.recon;
236
+ const parts = [];
237
+ if (r.frameworks?.length) parts.push(`Frameworks: ${r.frameworks.join(', ')}`);
238
+ if (r.databases?.length) parts.push(`Databases: ${r.databases.join(', ')}`);
239
+ if (r.authPatterns?.length) parts.push(`Auth: ${r.authPatterns.join(', ')}`);
240
+ if (parts.length) projectContext = `\nProject context:\n${parts.join('\n')}\n`;
241
+ }
242
+
243
+ return `Analyze these ${items.length} security findings for taint reachability and exploitability.
244
+ ${projectContext}
245
+ Findings:
246
+ ${JSON.stringify(items, null, 2)}`;
247
+ }
248
+
249
+ /**
250
+ * Get file content around the finding for LLM context.
251
+ */
252
+ _getFileContext(finding) {
253
+ if (!finding.file) return '';
254
+
255
+ try {
256
+ const content = fs.readFileSync(finding.file, 'utf-8');
257
+ const lines = content.split('\n');
258
+ const lineNum = finding.line || 1;
259
+
260
+ // Get a window of ~40 lines around the finding
261
+ const start = Math.max(0, lineNum - 21);
262
+ const end = Math.min(lines.length, lineNum + 20);
263
+ let context = lines.slice(start, end)
264
+ .map((l, i) => `${start + i + 1}: ${l}`)
265
+ .join('\n');
266
+
267
+ // Truncate if too long
268
+ if (context.length > MAX_FILE_CHARS) {
269
+ context = context.slice(0, MAX_FILE_CHARS) + '\n... (truncated)';
270
+ }
271
+
272
+ return context;
273
+ } catch {
274
+ return '';
275
+ }
276
+ }
277
+
278
+ /**
279
+ * Generate a stable ID for a finding.
280
+ */
281
+ _findingId(finding) {
282
+ const file = finding.file ? path.basename(finding.file) : 'unknown';
283
+ return `${file}:${finding.line}:${finding.rule}`;
284
+ }
285
+
286
+ /**
287
+ * Parse LLM response into analysis objects.
288
+ */
289
+ _parseResponse(text) {
290
+ const cleaned = text
291
+ .replace(/^```(?:json)?\s*/i, '')
292
+ .replace(/\s*```\s*$/i, '')
293
+ .trim();
294
+
295
+ try {
296
+ const parsed = JSON.parse(cleaned);
297
+ if (!Array.isArray(parsed)) return [];
298
+
299
+ // Validate each entry
300
+ return parsed.filter(item =>
301
+ item.findingId &&
302
+ typeof item.tainted === 'boolean' &&
303
+ typeof item.sanitized === 'boolean' &&
304
+ ['confirmed', 'likely', 'unlikely', 'false_positive'].includes(item.exploitability)
305
+ );
306
+ } catch {
307
+ return [];
308
+ }
309
+ }
310
+
311
+ /**
312
+ * Estimate cost for analyzing N findings (in cents).
313
+ */
314
+ _estimateCost(count) {
315
+ const inputCost = (count * EST_INPUT_TOKENS_PER_FINDING / 1000) * COST_PER_1K_INPUT;
316
+ const outputCost = (count * EST_OUTPUT_TOKENS_PER_FINDING / 1000) * COST_PER_1K_OUTPUT;
317
+ return inputCost + outputCost;
318
+ }
319
+
320
+ /**
321
+ * Get analysis stats.
322
+ */
323
+ getStats() {
324
+ return {
325
+ analyzedCount: this.analyzedCount,
326
+ spentCents: Math.round(this.spentCents * 100) / 100,
327
+ budgetCents: this.budgetCents,
328
+ provider: this.provider?.name || 'none',
329
+ };
330
+ }
331
+ }
332
+
333
+ export default DeepAnalyzer;
@@ -19,13 +19,20 @@ export { GitHistoryScanner } from './git-history-scanner.js';
19
19
  export { CICDScanner } from './cicd-scanner.js';
20
20
  export { APIFuzzer } from './api-fuzzer.js';
21
21
  export { SupabaseRLSAgent } from './supabase-rls-agent.js';
22
+ export { MCPSecurityAgent } from './mcp-security-agent.js';
23
+ export { AgenticSecurityAgent } from './agentic-security-agent.js';
24
+ export { RAGSecurityAgent } from './rag-security-agent.js';
25
+ export { PIIComplianceAgent } from './pii-compliance-agent.js';
26
+ export { VerifierAgent } from './verifier-agent.js';
27
+ export { DeepAnalyzer } from './deep-analyzer.js';
22
28
  export { ScoringEngine, GRADES, CATEGORIES } from './scoring-engine.js';
23
29
  export { SBOMGenerator } from './sbom-generator.js';
24
30
  export { PolicyEngine } from './policy-engine.js';
25
31
  export { HTMLReporter } from './html-reporter.js';
26
32
 
27
33
  /**
28
- * Create a fully configured orchestrator with all 12 agents.
34
+ * Create a fully configured orchestrator with all 15 scanning agents.
35
+ * (VerifierAgent and DeepAnalyzer run as post-processors, not in the agent pool.)
29
36
  */
30
37
  import { Orchestrator as OrchestratorClass } from './orchestrator.js';
31
38
  import { InjectionTester as InjectionTesterClass } from './injection-tester.js';
@@ -39,6 +46,10 @@ import { GitHistoryScanner as GitHistoryScannerClass } from './git-history-scann
39
46
  import { CICDScanner as CICDScannerClass } from './cicd-scanner.js';
40
47
  import { APIFuzzer as APIFuzzerClass } from './api-fuzzer.js';
41
48
  import { SupabaseRLSAgent as SupabaseRLSAgentClass } from './supabase-rls-agent.js';
49
+ import { MCPSecurityAgent as MCPSecurityAgentClass } from './mcp-security-agent.js';
50
+ import { AgenticSecurityAgent as AgenticSecurityAgentClass } from './agentic-security-agent.js';
51
+ import { RAGSecurityAgent as RAGSecurityAgentClass } from './rag-security-agent.js';
52
+ import { PIIComplianceAgent as PIIComplianceAgentClass } from './pii-compliance-agent.js';
42
53
 
43
54
  export function buildOrchestrator() {
44
55
  const orchestrator = new OrchestratorClass();
@@ -54,6 +65,10 @@ export function buildOrchestrator() {
54
65
  new CICDScannerClass(),
55
66
  new APIFuzzerClass(),
56
67
  new SupabaseRLSAgentClass(),
68
+ new MCPSecurityAgentClass(),
69
+ new AgenticSecurityAgentClass(),
70
+ new RAGSecurityAgentClass(),
71
+ new PIIComplianceAgentClass(),
57
72
  ]);
58
73
  return orchestrator;
59
74
  }