ship-safe 7.0.0 → 9.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +80 -21
- package/cli/agents/agent-attestation-agent.js +318 -0
- package/cli/agents/agentic-security-agent.js +35 -0
- package/cli/agents/cicd-scanner.js +22 -0
- package/cli/agents/config-auditor.js +235 -0
- package/cli/agents/deep-analyzer.js +473 -133
- package/cli/agents/hermes-security-agent.js +536 -0
- package/cli/agents/index.js +63 -22
- package/cli/agents/managed-agent-scanner.js +333 -0
- package/cli/agents/orchestrator.js +13 -3
- package/cli/agents/supply-chain-agent.js +1 -1
- package/cli/bin/ship-safe.js +129 -5
- package/cli/commands/audit.js +149 -3
- package/cli/commands/autofix.js +383 -0
- package/cli/commands/env-audit.js +349 -0
- package/cli/commands/init.js +104 -0
- package/cli/commands/mcp.js +270 -0
- package/cli/commands/red-team.js +2 -2
- package/cli/commands/scan-mcp.js +78 -0
- package/cli/commands/scan-skill.js +248 -5
- package/cli/commands/watch.js +142 -5
- package/cli/index.js +5 -0
- package/cli/providers/llm-provider.js +50 -2
- package/cli/utils/hermes-tool-registry.js +252 -0
- package/cli/utils/patterns.js +1 -0
- package/cli/utils/plugin-loader.js +276 -0
- package/cli/utils/scan-playbook.js +312 -0
- package/cli/utils/security-memory.js +296 -0
- package/package.json +2 -2
|
@@ -1,16 +1,33 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* DeepAnalyzer — LLM-Powered Taint Analysis
|
|
3
|
-
*
|
|
2
|
+
* DeepAnalyzer — Multi-Tier LLM-Powered Taint Analysis
|
|
3
|
+
* ======================================================
|
|
4
4
|
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
5
|
+
* Hermes-inspired three-tier analysis pipeline:
|
|
6
|
+
*
|
|
7
|
+
* Tier 1 (Haiku / cheap model) — Fast triage of all critical+high findings.
|
|
8
|
+
* Labels each finding: "skip" | "review" | "escalate".
|
|
9
|
+
* Skips obvious false-positives early and cheaply.
|
|
10
|
+
*
|
|
11
|
+
* Tier 2 (Sonnet / mid model) — Deep taint analysis of "review" findings.
|
|
12
|
+
* Full file context, sanitization checking, exploitability rating.
|
|
13
|
+
*
|
|
14
|
+
* Tier 3 (Opus / frontier model) — Full exploit-chain reasoning for "escalate"
|
|
15
|
+
* findings (confirmed critical severity with untrusted input path).
|
|
16
|
+
* Returns attack vector, business impact, and exact fix.
|
|
17
|
+
*
|
|
18
|
+
* When only one provider/model is configured (non-Anthropic), the pipeline falls
|
|
19
|
+
* back gracefully to a single-tier analysis identical to the previous behavior.
|
|
20
|
+
*
|
|
21
|
+
* Structured output:
|
|
22
|
+
* When the provider is AnthropicProvider, all LLM calls use the tool-use API
|
|
23
|
+
* (tool_choice: forced) which guarantees JSON matching the schema — no regex
|
|
24
|
+
* cleanup, no silent dropped findings.
|
|
8
25
|
*
|
|
9
26
|
* Supports:
|
|
10
|
-
* - Anthropic API (ANTHROPIC_API_KEY)
|
|
11
|
-
* - OpenAI API (OPENAI_API_KEY)
|
|
12
|
-
* - Google Gemini (GOOGLE_API_KEY)
|
|
13
|
-
* - Ollama
|
|
27
|
+
* - Anthropic API (ANTHROPIC_API_KEY) — full multi-tier + structured output
|
|
28
|
+
* - OpenAI API (OPENAI_API_KEY) — single-tier, text parsing
|
|
29
|
+
* - Google Gemini (GOOGLE_API_KEY) — single-tier, text parsing
|
|
30
|
+
* - Ollama / Gemma4 (--local) — large context, schema-enforced output
|
|
14
31
|
*
|
|
15
32
|
* USAGE:
|
|
16
33
|
* const analyzer = new DeepAnalyzer({ provider, budgetCents: 50 });
|
|
@@ -21,36 +38,151 @@ import fs from 'fs';
|
|
|
21
38
|
import path from 'path';
|
|
22
39
|
import { createProvider, autoDetectProvider } from '../providers/llm-provider.js';
|
|
23
40
|
|
|
41
|
+
// Lazy-import ScanPlaybook to avoid circular dep; only used when rootPath is known
|
|
42
|
+
let _ScanPlaybook = null;
|
|
43
|
+
async function getScanPlaybook() {
|
|
44
|
+
if (!_ScanPlaybook) {
|
|
45
|
+
const mod = await import('../utils/scan-playbook.js');
|
|
46
|
+
_ScanPlaybook = mod.ScanPlaybook;
|
|
47
|
+
}
|
|
48
|
+
return _ScanPlaybook;
|
|
49
|
+
}
|
|
50
|
+
|
|
24
51
|
// =============================================================================
|
|
25
52
|
// CONSTANTS
|
|
26
53
|
// =============================================================================
|
|
27
54
|
|
|
28
|
-
/** Max file content per finding for standard providers
|
|
55
|
+
/** Max file content per finding for standard providers */
|
|
29
56
|
const MAX_FILE_CHARS_DEFAULT = 4000;
|
|
30
57
|
|
|
31
|
-
/**
|
|
32
|
-
* Max file content per finding for large-context providers (Gemma 4 128K–256K).
|
|
33
|
-
* Sending the full file enables cross-function taint tracing that a 40-line
|
|
34
|
-
* window cannot catch.
|
|
35
|
-
*/
|
|
58
|
+
/** Max file content per finding for large-context providers (Gemma 4 128K–256K) */
|
|
36
59
|
const MAX_FILE_CHARS_LARGE_CTX = 80000;
|
|
37
60
|
|
|
38
61
|
/** Max findings to analyze per run (cost control) */
|
|
39
62
|
const MAX_FINDINGS = 30;
|
|
40
63
|
|
|
41
|
-
|
|
42
|
-
const COST_PER_1K_INPUT
|
|
43
|
-
const COST_PER_1K_OUTPUT = 0.4;
|
|
64
|
+
// Approximate cost per 1K tokens (Haiku pricing used as baseline)
|
|
65
|
+
const COST_PER_1K_INPUT = 0.08; // cents
|
|
66
|
+
const COST_PER_1K_OUTPUT = 0.4; // cents
|
|
44
67
|
|
|
45
|
-
|
|
46
|
-
const EST_INPUT_TOKENS_PER_FINDING = 1500;
|
|
68
|
+
const EST_INPUT_TOKENS_PER_FINDING = 1500;
|
|
47
69
|
const EST_OUTPUT_TOKENS_PER_FINDING = 300;
|
|
48
70
|
|
|
71
|
+
// Multi-tier Anthropic model IDs
|
|
72
|
+
const TIER1_MODEL = 'claude-haiku-4-5-20251001'; // fast triage
|
|
73
|
+
const TIER2_MODEL = 'claude-sonnet-4-6'; // deep analysis
|
|
74
|
+
const TIER3_MODEL = 'claude-opus-4-6'; // exploit chain
|
|
75
|
+
|
|
49
76
|
// =============================================================================
|
|
50
|
-
//
|
|
77
|
+
// JSON SCHEMAS — used with Anthropic tool-use for guaranteed output
|
|
51
78
|
// =============================================================================
|
|
52
79
|
|
|
53
|
-
|
|
80
|
+
/** Tier 1: quick triage schema */
|
|
81
|
+
const TRIAGE_SCHEMA = {
|
|
82
|
+
type: 'object',
|
|
83
|
+
properties: {
|
|
84
|
+
results: {
|
|
85
|
+
type: 'array',
|
|
86
|
+
items: {
|
|
87
|
+
type: 'object',
|
|
88
|
+
properties: {
|
|
89
|
+
findingId: { type: 'string' },
|
|
90
|
+
tier: { type: 'string', enum: ['skip', 'review', 'escalate'] },
|
|
91
|
+
reason: { type: 'string' },
|
|
92
|
+
},
|
|
93
|
+
required: ['findingId', 'tier', 'reason'],
|
|
94
|
+
additionalProperties: false,
|
|
95
|
+
},
|
|
96
|
+
},
|
|
97
|
+
},
|
|
98
|
+
required: ['results'],
|
|
99
|
+
};
|
|
100
|
+
|
|
101
|
+
/** Tier 2: deep analysis schema */
|
|
102
|
+
const DEEP_ANALYSIS_SCHEMA = {
|
|
103
|
+
type: 'object',
|
|
104
|
+
properties: {
|
|
105
|
+
results: {
|
|
106
|
+
type: 'array',
|
|
107
|
+
items: {
|
|
108
|
+
type: 'object',
|
|
109
|
+
properties: {
|
|
110
|
+
findingId: { type: 'string' },
|
|
111
|
+
tainted: { type: 'boolean' },
|
|
112
|
+
sanitized: { type: 'boolean' },
|
|
113
|
+
exploitability: { type: 'string', enum: ['confirmed', 'likely', 'unlikely', 'false_positive'] },
|
|
114
|
+
reasoning: { type: 'string' },
|
|
115
|
+
},
|
|
116
|
+
required: ['findingId', 'tainted', 'sanitized', 'exploitability', 'reasoning'],
|
|
117
|
+
additionalProperties: false,
|
|
118
|
+
},
|
|
119
|
+
},
|
|
120
|
+
},
|
|
121
|
+
required: ['results'],
|
|
122
|
+
};
|
|
123
|
+
|
|
124
|
+
/** Tier 3: exploit-chain schema */
|
|
125
|
+
const EXPLOIT_SCHEMA = {
|
|
126
|
+
type: 'object',
|
|
127
|
+
properties: {
|
|
128
|
+
results: {
|
|
129
|
+
type: 'array',
|
|
130
|
+
items: {
|
|
131
|
+
type: 'object',
|
|
132
|
+
properties: {
|
|
133
|
+
findingId: { type: 'string' },
|
|
134
|
+
tainted: { type: 'boolean' },
|
|
135
|
+
sanitized: { type: 'boolean' },
|
|
136
|
+
exploitability: { type: 'string', enum: ['confirmed', 'likely', 'unlikely', 'false_positive'] },
|
|
137
|
+
reasoning: { type: 'string' },
|
|
138
|
+
attackVector: { type: 'string' },
|
|
139
|
+
businessImpact: { type: 'string' },
|
|
140
|
+
fix: { type: 'string' },
|
|
141
|
+
},
|
|
142
|
+
required: ['findingId', 'tainted', 'sanitized', 'exploitability', 'reasoning', 'attackVector', 'businessImpact', 'fix'],
|
|
143
|
+
additionalProperties: false,
|
|
144
|
+
},
|
|
145
|
+
},
|
|
146
|
+
},
|
|
147
|
+
required: ['results'],
|
|
148
|
+
};
|
|
149
|
+
|
|
150
|
+
// =============================================================================
|
|
151
|
+
// SYSTEM PROMPTS
|
|
152
|
+
// =============================================================================
|
|
153
|
+
|
|
154
|
+
const TRIAGE_SYSTEM = `You are a fast security triage assistant. For each finding, quickly decide:
|
|
155
|
+
- "skip" — Obvious false positive (hardcoded literal, test file, sanitized value, documentation).
|
|
156
|
+
- "review" — Needs deeper analysis. Possibly tainted user input reaching a dangerous sink.
|
|
157
|
+
- "escalate" — Clear, unsanitized path from user-controlled input to a critical sink (SQL query, shell exec, file write, deserialization). Escalate only when confident.
|
|
158
|
+
|
|
159
|
+
Be conservative: prefer "review" over "escalate" when unsure.`;
|
|
160
|
+
|
|
161
|
+
const DEEP_SYSTEM = `You are a security code auditor performing taint analysis. For each finding, determine:
|
|
162
|
+
|
|
163
|
+
1. Tainted: Is the flagged value controllable by an external user (HTTP request, file upload, CLI args, env vars, DB read)?
|
|
164
|
+
2. Sanitized: Is there sanitization, validation, or encoding between source and sink that neutralizes the risk?
|
|
165
|
+
3. Exploitability: "confirmed" | "likely" | "unlikely" | "false_positive"
|
|
166
|
+
4. Reasoning: One concise sentence explaining your verdict.
|
|
167
|
+
|
|
168
|
+
Rules:
|
|
169
|
+
- Hardcoded string literals with no user input path → NOT tainted.
|
|
170
|
+
- Validation library (zod, joi, yup, ajv) or sanitize function between input and sink → sanitized=true.
|
|
171
|
+
- Test/example/documentation file → false_positive.
|
|
172
|
+
- Cannot determine taint flow from provided context → "unlikely".
|
|
173
|
+
- Only "confirmed" when there is a clear, unsanitized path from user input to dangerous sink.`;
|
|
174
|
+
|
|
175
|
+
const EXPLOIT_SYSTEM = `You are an expert security researcher performing full exploit-chain analysis. For each confirmed critical finding:
|
|
176
|
+
|
|
177
|
+
1. Trace the complete attack vector from attacker-controlled input to dangerous sink.
|
|
178
|
+
2. Assess the real-world business impact (data breach, account takeover, RCE, etc.).
|
|
179
|
+
3. Write a precise, actionable fix (code change, library call, or config update).
|
|
180
|
+
4. Rate exploitability as "confirmed" only if the path is fully unsanitized; otherwise "likely".
|
|
181
|
+
|
|
182
|
+
Be specific. Code references, line numbers, and exact fix suggestions are expected.`;
|
|
183
|
+
|
|
184
|
+
// Fallback system prompt for non-tiered (single provider) analysis
|
|
185
|
+
const SINGLE_TIER_SYSTEM = `You are a security code auditor performing taint analysis. For each finding, determine:
|
|
54
186
|
|
|
55
187
|
1. **Tainted**: Is the flagged value controllable by an external user (via HTTP request, file upload, CLI args, env vars, database read, etc.)?
|
|
56
188
|
2. **Sanitized**: Is there sanitization, validation, or encoding between the source and sink that neutralizes the risk?
|
|
@@ -81,9 +213,9 @@ Rules:
|
|
|
81
213
|
export class DeepAnalyzer {
|
|
82
214
|
/**
|
|
83
215
|
* @param {object} options
|
|
84
|
-
* @param {object}
|
|
85
|
-
* @param {number}
|
|
86
|
-
* @param {boolean} options.verbose
|
|
216
|
+
* @param {object} options.provider — LLM provider instance (from createProvider)
|
|
217
|
+
* @param {number} options.budgetCents — Max spend in cents (default: 50)
|
|
218
|
+
* @param {boolean} options.verbose — Log analysis progress
|
|
87
219
|
*/
|
|
88
220
|
constructor(options = {}) {
|
|
89
221
|
this.provider = options.provider || null;
|
|
@@ -91,14 +223,18 @@ export class DeepAnalyzer {
|
|
|
91
223
|
this.verbose = options.verbose || false;
|
|
92
224
|
this.spentCents = 0;
|
|
93
225
|
this.analyzedCount = 0;
|
|
226
|
+
this._tier2Count = 0;
|
|
227
|
+
this._tier3Count = 0;
|
|
228
|
+
this._skippedCount = 0;
|
|
94
229
|
|
|
95
|
-
//
|
|
96
|
-
// increase file context and batch size to take full advantage.
|
|
230
|
+
// Large-context mode for local models (Gemma 4, etc.)
|
|
97
231
|
const ctxWindow = this.provider?.contextWindow ?? 0;
|
|
98
232
|
this.largeContext = ctxWindow >= 65536;
|
|
99
233
|
this.maxFileChars = this.largeContext ? MAX_FILE_CHARS_LARGE_CTX : MAX_FILE_CHARS_DEFAULT;
|
|
100
|
-
// Larger batches for local large-context models (no per-token cost)
|
|
101
234
|
this.batchSize = this.largeContext ? 15 : 5;
|
|
235
|
+
|
|
236
|
+
// Whether we can use multi-tier Anthropic routing
|
|
237
|
+
this._isAnthropic = this.provider?.name === 'Anthropic';
|
|
102
238
|
}
|
|
103
239
|
|
|
104
240
|
/**
|
|
@@ -106,7 +242,6 @@ export class DeepAnalyzer {
|
|
|
106
242
|
* Returns null if no provider is available.
|
|
107
243
|
*/
|
|
108
244
|
static create(rootPath, options = {}) {
|
|
109
|
-
// --local flag: use Gemma 4 via Ollama (structured output, large context)
|
|
110
245
|
if (options.local) {
|
|
111
246
|
const provider = createProvider('gemma4', null, {
|
|
112
247
|
model: options.model,
|
|
@@ -115,7 +250,6 @@ export class DeepAnalyzer {
|
|
|
115
250
|
return new DeepAnalyzer({ provider, ...options });
|
|
116
251
|
}
|
|
117
252
|
|
|
118
|
-
// Auto-detect from env, honouring explicit --provider / --base-url / --model
|
|
119
253
|
const provider = autoDetectProvider(rootPath, {
|
|
120
254
|
provider: options.provider,
|
|
121
255
|
baseUrl: options.baseUrl,
|
|
@@ -128,7 +262,7 @@ export class DeepAnalyzer {
|
|
|
128
262
|
|
|
129
263
|
/**
|
|
130
264
|
* Analyze findings with LLM-powered taint analysis.
|
|
131
|
-
*
|
|
265
|
+
* Uses multi-tier pipeline when Anthropic is detected; single-tier otherwise.
|
|
132
266
|
*
|
|
133
267
|
* @param {object[]} findings — All findings from agents
|
|
134
268
|
* @param {object} context — { rootPath, recon }
|
|
@@ -137,82 +271,48 @@ export class DeepAnalyzer {
|
|
|
137
271
|
async analyze(findings, context = {}) {
|
|
138
272
|
if (!this.provider) return findings;
|
|
139
273
|
|
|
140
|
-
//
|
|
274
|
+
// Load playbook context once — injected into all LLM calls for this run
|
|
275
|
+
if (context.rootPath) {
|
|
276
|
+
try {
|
|
277
|
+
const PlaybookClass = await getScanPlaybook();
|
|
278
|
+
const playbook = new PlaybookClass(context.rootPath);
|
|
279
|
+
this._playbookContext = playbook.getPromptContext();
|
|
280
|
+
} catch { this._playbookContext = ''; }
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
// Only analyze critical/high findings
|
|
141
284
|
const candidates = findings.filter(
|
|
142
285
|
f => f.severity === 'critical' || f.severity === 'high'
|
|
143
286
|
);
|
|
144
|
-
|
|
145
287
|
if (candidates.length === 0) return findings;
|
|
146
288
|
|
|
147
|
-
// Cap at MAX_FINDINGS
|
|
289
|
+
// Cap at MAX_FINDINGS with budget scaling
|
|
148
290
|
const toAnalyze = candidates.slice(0, MAX_FINDINGS);
|
|
149
|
-
|
|
150
|
-
// Check budget before starting
|
|
151
291
|
const estimatedCost = this._estimateCost(toAnalyze.length);
|
|
152
292
|
if (estimatedCost > this.budgetCents) {
|
|
153
|
-
const affordable = Math.floor(
|
|
154
|
-
this.budgetCents / (estimatedCost / toAnalyze.length)
|
|
155
|
-
);
|
|
293
|
+
const affordable = Math.floor(this.budgetCents / (estimatedCost / toAnalyze.length));
|
|
156
294
|
toAnalyze.length = Math.max(1, affordable);
|
|
157
295
|
}
|
|
158
296
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
for (let i = 0; i < toAnalyze.length; i += this.batchSize) {
|
|
163
|
-
// Budget check before each batch
|
|
164
|
-
if (this.spentCents >= this.budgetCents) {
|
|
165
|
-
if (this.verbose) {
|
|
166
|
-
console.log(` Deep analysis: budget exhausted (${this.spentCents}c / ${this.budgetCents}c)`);
|
|
167
|
-
}
|
|
168
|
-
break;
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
const batch = toAnalyze.slice(i, i + this.batchSize);
|
|
172
|
-
const prompt = this._buildPrompt(batch, context);
|
|
173
|
-
|
|
174
|
-
try {
|
|
175
|
-
const response = await this.provider.complete(
|
|
176
|
-
SYSTEM_PROMPT,
|
|
177
|
-
prompt,
|
|
178
|
-
{ maxTokens: 1500 }
|
|
179
|
-
);
|
|
180
|
-
|
|
181
|
-
// Track cost
|
|
182
|
-
const inputTokens = Math.ceil(prompt.length / 4);
|
|
183
|
-
const outputTokens = Math.ceil(response.length / 4);
|
|
184
|
-
this.spentCents += (inputTokens / 1000) * COST_PER_1K_INPUT
|
|
185
|
-
+ (outputTokens / 1000) * COST_PER_1K_OUTPUT;
|
|
186
|
-
|
|
187
|
-
// Parse response
|
|
188
|
-
const analyses = this._parseResponse(response);
|
|
189
|
-
for (const analysis of analyses) {
|
|
190
|
-
results.set(analysis.findingId, analysis);
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
this.analyzedCount += batch.length;
|
|
194
|
-
} catch (err) {
|
|
195
|
-
if (this.verbose) {
|
|
196
|
-
console.log(` Deep analysis batch failed: ${err.message}`);
|
|
197
|
-
}
|
|
198
|
-
// Continue with remaining batches
|
|
199
|
-
}
|
|
200
|
-
}
|
|
297
|
+
const results = this._isAnthropic
|
|
298
|
+
? await this._analyzeTiered(toAnalyze, context)
|
|
299
|
+
: await this._analyzeSingleTier(toAnalyze, context);
|
|
201
300
|
|
|
202
301
|
// Attach deep analysis to findings
|
|
203
302
|
for (const finding of findings) {
|
|
204
303
|
const id = this._findingId(finding);
|
|
205
304
|
const analysis = results.get(id);
|
|
206
|
-
|
|
207
305
|
if (analysis) {
|
|
208
306
|
finding.deepAnalysis = {
|
|
209
|
-
tainted:
|
|
210
|
-
sanitized:
|
|
307
|
+
tainted: analysis.tainted,
|
|
308
|
+
sanitized: analysis.sanitized,
|
|
211
309
|
exploitability: analysis.exploitability,
|
|
212
|
-
reasoning:
|
|
310
|
+
reasoning: analysis.reasoning,
|
|
311
|
+
...(analysis.attackVector ? { attackVector: analysis.attackVector } : {}),
|
|
312
|
+
...(analysis.businessImpact ? { businessImpact: analysis.businessImpact } : {}),
|
|
313
|
+
...(analysis.fix ? { fix: analysis.fix } : {}),
|
|
213
314
|
};
|
|
214
315
|
|
|
215
|
-
// Adjust confidence based on deep analysis
|
|
216
316
|
if (analysis.exploitability === 'false_positive') {
|
|
217
317
|
finding.confidence = 'low';
|
|
218
318
|
} else if (analysis.exploitability === 'unlikely') {
|
|
@@ -226,16 +326,106 @@ export class DeepAnalyzer {
|
|
|
226
326
|
return findings;
|
|
227
327
|
}
|
|
228
328
|
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
329
|
+
// ===========================================================================
|
|
330
|
+
// MULTI-TIER PIPELINE (Anthropic only)
|
|
331
|
+
// ===========================================================================
|
|
332
|
+
|
|
333
|
+
async _analyzeTiered(findings, context) {
|
|
334
|
+
const results = new Map();
|
|
335
|
+
|
|
336
|
+
// ── Tier 1: Haiku triage ────────────────────────────────────────────────
|
|
337
|
+
if (this.verbose) console.log(` [Tier 1] Triaging ${findings.length} findings with Haiku...`);
|
|
338
|
+
|
|
339
|
+
const triageMap = await this._runTriage(findings, context);
|
|
340
|
+
|
|
341
|
+
const toReview = findings.filter(f => triageMap.get(this._findingId(f)) === 'review');
|
|
342
|
+
const toEscalate = findings.filter(f => triageMap.get(this._findingId(f)) === 'escalate');
|
|
343
|
+
const skipped = findings.length - toReview.length - toEscalate.length;
|
|
344
|
+
|
|
345
|
+
this._skippedCount += skipped;
|
|
346
|
+
|
|
347
|
+
if (this.verbose) {
|
|
348
|
+
console.log(` [Tier 1] Results: ${toEscalate.length} escalate, ${toReview.length} review, ${skipped} skip`);
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
// ── Tier 2: Sonnet deep analysis ────────────────────────────────────────
|
|
352
|
+
if (toReview.length > 0 && this.spentCents < this.budgetCents) {
|
|
353
|
+
if (this.verbose) console.log(` [Tier 2] Deep-analyzing ${toReview.length} findings with Sonnet...`);
|
|
354
|
+
const tier2Results = await this._runDeepAnalysis(toReview, context, TIER2_MODEL);
|
|
355
|
+
for (const [id, analysis] of tier2Results) results.set(id, analysis);
|
|
356
|
+
this._tier2Count += toReview.length;
|
|
357
|
+
}
|
|
236
358
|
|
|
237
|
-
|
|
238
|
-
|
|
359
|
+
// ── Tier 3: Opus exploit chain ──────────────────────────────────────────
|
|
360
|
+
if (toEscalate.length > 0 && this.spentCents < this.budgetCents) {
|
|
361
|
+
if (this.verbose) console.log(` [Tier 3] Running exploit-chain analysis on ${toEscalate.length} findings with Opus...`);
|
|
362
|
+
const tier3Results = await this._runExploitChain(toEscalate, context);
|
|
363
|
+
for (const [id, analysis] of tier3Results) results.set(id, analysis);
|
|
364
|
+
this._tier3Count += toEscalate.length;
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
this.analyzedCount += findings.length - skipped;
|
|
368
|
+
return results;
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
/** Tier 1: quick triage — returns Map<findingId, 'skip'|'review'|'escalate'> */
|
|
372
|
+
async _runTriage(findings, context) {
|
|
373
|
+
const triageMap = new Map();
|
|
374
|
+
// Default everything to 'review' so nothing is silently dropped on error
|
|
375
|
+
for (const f of findings) triageMap.set(this._findingId(f), 'review');
|
|
376
|
+
|
|
377
|
+
const batchSize = 10; // Haiku can handle larger batches
|
|
378
|
+
for (let i = 0; i < findings.length; i += batchSize) {
|
|
379
|
+
if (this.spentCents >= this.budgetCents) break;
|
|
380
|
+
const batch = findings.slice(i, i + batchSize);
|
|
381
|
+
// Tier 1 is about cheap, fast signal — no file context, metadata only.
|
|
382
|
+
// File context is fetched only in Tier 2+ where it's worth the token cost.
|
|
383
|
+
const items = batch.map(f => ({
|
|
384
|
+
findingId: this._findingId(f),
|
|
385
|
+
rule: f.rule,
|
|
386
|
+
severity: f.severity,
|
|
387
|
+
title: f.title,
|
|
388
|
+
file: f.file ? path.basename(f.file) : 'unknown',
|
|
389
|
+
line: f.line,
|
|
390
|
+
matched: (f.matched || '').slice(0, 200),
|
|
391
|
+
description: (f.description || '').slice(0, 120),
|
|
392
|
+
}));
|
|
393
|
+
|
|
394
|
+
const prompt = `Triage these ${items.length} security findings. For each, decide: "skip" (obvious false-positive), "review" (needs deeper analysis), or "escalate" (confirmed critical, clear user-input-to-dangerous-sink path).\n\nFindings:\n${JSON.stringify(items, null, 2)}`;
|
|
395
|
+
|
|
396
|
+
try {
|
|
397
|
+
const result = await this.provider.completeWithTools(
|
|
398
|
+
TRIAGE_SYSTEM,
|
|
399
|
+
prompt,
|
|
400
|
+
'triage_findings',
|
|
401
|
+
TRIAGE_SCHEMA,
|
|
402
|
+
{ maxTokens: 1024, model: TIER1_MODEL }
|
|
403
|
+
);
|
|
404
|
+
|
|
405
|
+
this._trackCost(prompt.length, JSON.stringify(result || '').length);
|
|
406
|
+
|
|
407
|
+
for (const item of (result?.results ?? [])) {
|
|
408
|
+
if (triageMap.has(item.findingId)) {
|
|
409
|
+
triageMap.set(item.findingId, item.tier);
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
} catch (err) {
|
|
413
|
+
if (this.verbose) console.log(` [Tier 1] Batch failed: ${err.message}`);
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
return triageMap;
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
/** Tier 2: deep taint analysis — returns Map<findingId, analysis> */
|
|
421
|
+
async _runDeepAnalysis(findings, context, model = TIER2_MODEL) {
|
|
422
|
+
const results = new Map();
|
|
423
|
+
|
|
424
|
+
for (let i = 0; i < findings.length; i += this.batchSize) {
|
|
425
|
+
if (this.spentCents >= this.budgetCents) break;
|
|
426
|
+
const batch = findings.slice(i, i + this.batchSize);
|
|
427
|
+
const items = batch.map(f => ({
|
|
428
|
+
findingId: this._findingId(f),
|
|
239
429
|
rule: f.rule,
|
|
240
430
|
severity: f.severity,
|
|
241
431
|
title: f.title,
|
|
@@ -243,31 +433,187 @@ export class DeepAnalyzer {
|
|
|
243
433
|
file: f.file ? path.basename(f.file) : 'unknown',
|
|
244
434
|
line: f.line,
|
|
245
435
|
matched: (f.matched || '').slice(0, 200),
|
|
246
|
-
codeContext:
|
|
436
|
+
codeContext: this._getFileContext(f),
|
|
437
|
+
}));
|
|
438
|
+
|
|
439
|
+
let projectContext = this._buildProjectContext(context);
|
|
440
|
+
const prompt = `Analyze these ${items.length} security findings for taint reachability and exploitability.${projectContext}\n\nFindings:\n${JSON.stringify(items, null, 2)}`;
|
|
441
|
+
|
|
442
|
+
try {
|
|
443
|
+
const result = await this.provider.completeWithTools(
|
|
444
|
+
DEEP_SYSTEM,
|
|
445
|
+
prompt,
|
|
446
|
+
'report_analysis',
|
|
447
|
+
DEEP_ANALYSIS_SCHEMA,
|
|
448
|
+
{ maxTokens: 1500, model }
|
|
449
|
+
);
|
|
450
|
+
|
|
451
|
+
this._trackCost(prompt.length, JSON.stringify(result || '').length);
|
|
452
|
+
|
|
453
|
+
for (const item of (result?.results ?? [])) {
|
|
454
|
+
results.set(item.findingId, item);
|
|
455
|
+
}
|
|
456
|
+
} catch (err) {
|
|
457
|
+
if (this.verbose) console.log(` [Tier 2] Batch failed: ${err.message}`);
|
|
458
|
+
// Fallback: try plain text completion + parse
|
|
459
|
+
try {
|
|
460
|
+
const fallbackResult = await this._runSingleTierBatch(batch, context, model);
|
|
461
|
+
for (const [id, analysis] of fallbackResult) results.set(id, analysis);
|
|
462
|
+
} catch { /* ignore */ }
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
return results;
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
/** Tier 3: exploit-chain analysis — returns Map<findingId, analysis> */
|
|
470
|
+
async _runExploitChain(findings, context) {
|
|
471
|
+
const results = new Map();
|
|
472
|
+
|
|
473
|
+
// Single findings per call for maximum depth
|
|
474
|
+
for (const finding of findings) {
|
|
475
|
+
if (this.spentCents >= this.budgetCents) break;
|
|
476
|
+
|
|
477
|
+
const item = {
|
|
478
|
+
findingId: this._findingId(finding),
|
|
479
|
+
rule: finding.rule,
|
|
480
|
+
severity: finding.severity,
|
|
481
|
+
title: finding.title,
|
|
482
|
+
description: finding.description,
|
|
483
|
+
file: finding.file ? path.basename(finding.file) : 'unknown',
|
|
484
|
+
line: finding.line,
|
|
485
|
+
matched: (finding.matched || '').slice(0, 400),
|
|
486
|
+
codeContext: this._getFileContext(finding), // Full context window
|
|
247
487
|
};
|
|
248
|
-
});
|
|
249
488
|
|
|
250
|
-
|
|
251
|
-
|
|
489
|
+
const prompt = `Perform full exploit-chain analysis on this security finding.\n\nFinding:\n${JSON.stringify(item, null, 2)}`;
|
|
490
|
+
|
|
491
|
+
try {
|
|
492
|
+
const result = await this.provider.completeWithTools(
|
|
493
|
+
EXPLOIT_SYSTEM,
|
|
494
|
+
prompt,
|
|
495
|
+
'report_exploit_chain',
|
|
496
|
+
EXPLOIT_SCHEMA,
|
|
497
|
+
{ maxTokens: 2048, model: TIER3_MODEL }
|
|
498
|
+
);
|
|
499
|
+
|
|
500
|
+
this._trackCost(prompt.length, JSON.stringify(result || '').length);
|
|
501
|
+
|
|
502
|
+
for (const analysis of (result?.results ?? [])) {
|
|
503
|
+
results.set(analysis.findingId, analysis);
|
|
504
|
+
}
|
|
505
|
+
} catch (err) {
|
|
506
|
+
if (this.verbose) console.log(` [Tier 3] Failed for ${item.findingId}: ${err.message}`);
|
|
507
|
+
// Fallback to Tier 2 analysis on error
|
|
508
|
+
try {
|
|
509
|
+
const fallback = await this._runDeepAnalysis([finding], context, TIER2_MODEL);
|
|
510
|
+
for (const [id, analysis] of fallback) results.set(id, analysis);
|
|
511
|
+
} catch { /* ignore */ }
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
return results;
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
// ===========================================================================
|
|
519
|
+
// SINGLE-TIER PIPELINE (non-Anthropic providers)
|
|
520
|
+
// ===========================================================================
|
|
521
|
+
|
|
522
|
+
async _analyzeSingleTier(findings, context) {
|
|
523
|
+
const results = new Map();
|
|
524
|
+
|
|
525
|
+
for (let i = 0; i < findings.length; i += this.batchSize) {
|
|
526
|
+
if (this.spentCents >= this.budgetCents) {
|
|
527
|
+
if (this.verbose) console.log(` Deep analysis: budget exhausted (${this.spentCents}c / ${this.budgetCents}c)`);
|
|
528
|
+
break;
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
const batch = findings.slice(i, i + this.batchSize);
|
|
532
|
+
try {
|
|
533
|
+
const batchResults = await this._runSingleTierBatch(batch, context);
|
|
534
|
+
for (const [id, analysis] of batchResults) results.set(id, analysis);
|
|
535
|
+
this.analyzedCount += batch.length;
|
|
536
|
+
} catch (err) {
|
|
537
|
+
if (this.verbose) console.log(` Deep analysis batch failed: ${err.message}`);
|
|
538
|
+
// Continue with remaining batches
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
return results;
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
async _runSingleTierBatch(batch, context, model = null) {
|
|
546
|
+
const results = new Map();
|
|
547
|
+
const prompt = this._buildSingleTierPrompt(batch, context);
|
|
548
|
+
|
|
549
|
+
const response = await this.provider.complete(
|
|
550
|
+
SINGLE_TIER_SYSTEM,
|
|
551
|
+
prompt,
|
|
552
|
+
{ maxTokens: 1500, ...(model ? { model } : {}) }
|
|
553
|
+
);
|
|
554
|
+
|
|
555
|
+
this._trackCost(prompt.length, response.length);
|
|
556
|
+
|
|
557
|
+
const analyses = this._parseTextResponse(response);
|
|
558
|
+
for (const analysis of analyses) {
|
|
559
|
+
results.set(analysis.findingId, analysis);
|
|
560
|
+
}
|
|
561
|
+
return results;
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
_buildSingleTierPrompt(findings, context) {
|
|
565
|
+
const items = findings.map(f => ({
|
|
566
|
+
findingId: this._findingId(f),
|
|
567
|
+
rule: f.rule,
|
|
568
|
+
severity: f.severity,
|
|
569
|
+
title: f.title,
|
|
570
|
+
description: f.description,
|
|
571
|
+
file: f.file ? path.basename(f.file) : 'unknown',
|
|
572
|
+
line: f.line,
|
|
573
|
+
matched: (f.matched || '').slice(0, 200),
|
|
574
|
+
codeContext: this._getFileContext(f),
|
|
575
|
+
}));
|
|
576
|
+
|
|
577
|
+
const projectContext = this._buildProjectContext(context);
|
|
578
|
+
return `Analyze these ${items.length} security findings for taint reachability and exploitability.${projectContext}\n\nFindings:\n${JSON.stringify(items, null, 2)}`;
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
// ===========================================================================
|
|
582
|
+
// HELPERS
|
|
583
|
+
// ===========================================================================
|
|
584
|
+
|
|
585
|
+
_buildProjectContext(context) {
|
|
586
|
+
const parts = [];
|
|
587
|
+
|
|
588
|
+
// Playbook context (accumulated across scans — richer than single-run recon)
|
|
589
|
+
if (context.rootPath) {
|
|
590
|
+
try {
|
|
591
|
+
// Use cached playbook context if available (set by analyze())
|
|
592
|
+
if (this._playbookContext) {
|
|
593
|
+
parts.push(`Repo playbook:\n${this._playbookContext}`);
|
|
594
|
+
}
|
|
595
|
+
} catch { /* ignore */ }
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
// Single-run recon context
|
|
252
599
|
if (context.recon) {
|
|
253
600
|
const r = context.recon;
|
|
254
|
-
const
|
|
255
|
-
if (r.frameworks?.length)
|
|
256
|
-
if (r.databases?.length)
|
|
257
|
-
if (r.authPatterns?.length)
|
|
258
|
-
if (
|
|
601
|
+
const reconParts = [];
|
|
602
|
+
if (r.frameworks?.length) reconParts.push(`Frameworks: ${r.frameworks.join(', ')}`);
|
|
603
|
+
if (r.databases?.length) reconParts.push(`Databases: ${r.databases.join(', ')}`);
|
|
604
|
+
if (r.authPatterns?.length) reconParts.push(`Auth: ${r.authPatterns.join(', ')}`);
|
|
605
|
+
if (reconParts.length) parts.push(reconParts.join('\n'));
|
|
259
606
|
}
|
|
260
607
|
|
|
261
|
-
return
|
|
262
|
-
${projectContext}
|
|
263
|
-
Findings:
|
|
264
|
-
${JSON.stringify(items, null, 2)}`;
|
|
608
|
+
return parts.length ? `\n\nProject context:\n${parts.join('\n\n')}` : '';
|
|
265
609
|
}
|
|
266
610
|
|
|
267
611
|
/**
|
|
268
612
|
* Get file content around the finding for LLM context.
|
|
613
|
+
* @param {object} finding
|
|
614
|
+
* @param {number} windowLines — Lines before/after (default: 20 = 40 line window)
|
|
269
615
|
*/
|
|
270
|
-
_getFileContext(finding) {
|
|
616
|
+
_getFileContext(finding, windowLines = 20) {
|
|
271
617
|
if (!finding.file) return '';
|
|
272
618
|
|
|
273
619
|
try {
|
|
@@ -277,13 +623,10 @@ ${JSON.stringify(items, null, 2)}`;
|
|
|
277
623
|
|
|
278
624
|
let context;
|
|
279
625
|
if (this.largeContext) {
|
|
280
|
-
// Large-context providers (Gemma 4): send the entire file so the model
|
|
281
|
-
// can trace taint flows across functions, not just the immediate window.
|
|
282
626
|
context = lines.map((l, i) => `${i + 1}: ${l}`).join('\n');
|
|
283
627
|
} else {
|
|
284
|
-
|
|
285
|
-
const
|
|
286
|
-
const end = Math.min(lines.length, lineNum + 20);
|
|
628
|
+
const start = Math.max(0, lineNum - windowLines - 1);
|
|
629
|
+
const end = Math.min(lines.length, lineNum + windowLines);
|
|
287
630
|
context = lines.slice(start, end)
|
|
288
631
|
.map((l, i) => `${start + i + 1}: ${l}`)
|
|
289
632
|
.join('\n');
|
|
@@ -299,18 +642,19 @@ ${JSON.stringify(items, null, 2)}`;
|
|
|
299
642
|
}
|
|
300
643
|
}
|
|
301
644
|
|
|
302
|
-
/**
|
|
303
|
-
* Generate a stable ID for a finding.
|
|
304
|
-
*/
|
|
305
645
|
_findingId(finding) {
|
|
306
646
|
const file = finding.file ? path.basename(finding.file) : 'unknown';
|
|
307
647
|
return `${file}:${finding.line}:${finding.rule}`;
|
|
308
648
|
}
|
|
309
649
|
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
650
|
+
_trackCost(promptChars, responseChars) {
|
|
651
|
+
const inputTokens = Math.ceil(promptChars / 4);
|
|
652
|
+
const outputTokens = Math.ceil(responseChars / 4);
|
|
653
|
+
this.spentCents += (inputTokens / 1000) * COST_PER_1K_INPUT
|
|
654
|
+
+ (outputTokens / 1000) * COST_PER_1K_OUTPUT;
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
_parseTextResponse(text) {
|
|
314
658
|
const cleaned = text
|
|
315
659
|
.replace(/^```(?:json)?\s*/i, '')
|
|
316
660
|
.replace(/\s*```\s*$/i, '')
|
|
@@ -319,8 +663,6 @@ ${JSON.stringify(items, null, 2)}`;
|
|
|
319
663
|
try {
|
|
320
664
|
const parsed = JSON.parse(cleaned);
|
|
321
665
|
if (!Array.isArray(parsed)) return [];
|
|
322
|
-
|
|
323
|
-
// Validate each entry
|
|
324
666
|
return parsed.filter(item =>
|
|
325
667
|
item.findingId &&
|
|
326
668
|
typeof item.tainted === 'boolean' &&
|
|
@@ -332,24 +674,22 @@ ${JSON.stringify(items, null, 2)}`;
|
|
|
332
674
|
}
|
|
333
675
|
}
|
|
334
676
|
|
|
335
|
-
/**
|
|
336
|
-
* Estimate cost for analyzing N findings (in cents).
|
|
337
|
-
*/
|
|
338
677
|
_estimateCost(count) {
|
|
339
|
-
const inputCost
|
|
678
|
+
const inputCost = (count * EST_INPUT_TOKENS_PER_FINDING / 1000) * COST_PER_1K_INPUT;
|
|
340
679
|
const outputCost = (count * EST_OUTPUT_TOKENS_PER_FINDING / 1000) * COST_PER_1K_OUTPUT;
|
|
341
680
|
return inputCost + outputCost;
|
|
342
681
|
}
|
|
343
682
|
|
|
344
|
-
/**
|
|
345
|
-
* Get analysis stats.
|
|
346
|
-
*/
|
|
347
683
|
getStats() {
|
|
348
684
|
return {
|
|
349
685
|
analyzedCount: this.analyzedCount,
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
686
|
+
skippedCount: this._skippedCount,
|
|
687
|
+
tier2Count: this._tier2Count,
|
|
688
|
+
tier3Count: this._tier3Count,
|
|
689
|
+
spentCents: Math.round(this.spentCents * 100) / 100,
|
|
690
|
+
budgetCents: this.budgetCents,
|
|
691
|
+
provider: this.provider?.name || 'none',
|
|
692
|
+
multiTier: this._isAnthropic,
|
|
353
693
|
};
|
|
354
694
|
}
|
|
355
695
|
}
|