ship-safe 8.0.0 → 9.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,16 +1,33 @@
1
1
  /**
2
- * DeepAnalyzer — LLM-Powered Taint Analysis
3
- * ============================================
2
+ * DeepAnalyzer — Multi-Tier LLM-Powered Taint Analysis
3
+ * ======================================================
4
4
  *
5
- * Takes critical/high findings nominated by regex scan and sends them
6
- * to an LLM for deeper analysis: taint reachability, sanitization
7
- * verification, and exploitability assessment.
5
+ * Hermes-inspired three-tier analysis pipeline:
6
+ *
7
+ * Tier 1 (Haiku / cheap model) — Fast triage of all critical+high findings.
8
+ * Labels each finding: "skip" | "review" | "escalate".
9
+ * Skips obvious false-positives early and cheaply.
10
+ *
11
+ * Tier 2 (Sonnet / mid model) — Deep taint analysis of "review" findings.
12
+ * Full file context, sanitization checking, exploitability rating.
13
+ *
14
+ * Tier 3 (Opus / frontier model) — Full exploit-chain reasoning for "escalate"
15
+ * findings (confirmed critical severity with untrusted input path).
16
+ * Returns attack vector, business impact, and exact fix.
17
+ *
18
+ * When only one provider/model is configured (non-Anthropic), the pipeline falls
19
+ * back gracefully to a single-tier analysis identical to the previous behavior.
20
+ *
21
+ * Structured output:
22
+ * When the provider is AnthropicProvider, all LLM calls use the tool-use API
23
+ * (tool_choice: forced) which guarantees JSON matching the schema — no regex
24
+ * cleanup, no silent dropped findings.
8
25
  *
9
26
  * Supports:
10
- * - Anthropic API (ANTHROPIC_API_KEY)
11
- * - OpenAI API (OPENAI_API_KEY)
12
- * - Google Gemini (GOOGLE_API_KEY)
13
- * - Ollama local models (--local flag)
27
+ * - Anthropic API (ANTHROPIC_API_KEY) — full multi-tier + structured output
28
+ * - OpenAI API (OPENAI_API_KEY) — single-tier, text parsing
29
+ * - Google Gemini (GOOGLE_API_KEY) — single-tier, text parsing
30
+ * - Ollama / Gemma4 (--local) — large context, schema-enforced output
14
31
  *
15
32
  * USAGE:
16
33
  * const analyzer = new DeepAnalyzer({ provider, budgetCents: 50 });
@@ -21,36 +38,151 @@ import fs from 'fs';
21
38
  import path from 'path';
22
39
  import { createProvider, autoDetectProvider } from '../providers/llm-provider.js';
23
40
 
41
+ // Lazy-import ScanPlaybook to avoid circular dep; only used when rootPath is known
42
+ let _ScanPlaybook = null;
43
+ async function getScanPlaybook() {
44
+ if (!_ScanPlaybook) {
45
+ const mod = await import('../utils/scan-playbook.js');
46
+ _ScanPlaybook = mod.ScanPlaybook;
47
+ }
48
+ return _ScanPlaybook;
49
+ }
50
+
24
51
  // =============================================================================
25
52
  // CONSTANTS
26
53
  // =============================================================================
27
54
 
28
- /** Max file content per finding for standard providers (tokens cost money) */
55
+ /** Max file content per finding for standard providers */
29
56
  const MAX_FILE_CHARS_DEFAULT = 4000;
30
57
 
31
- /**
32
- * Max file content per finding for large-context providers (Gemma 4 128K–256K).
33
- * Sending the full file enables cross-function taint tracing that a 40-line
34
- * window cannot catch.
35
- */
58
+ /** Max file content per finding for large-context providers (Gemma 4 128K–256K) */
36
59
  const MAX_FILE_CHARS_LARGE_CTX = 80000;
37
60
 
38
61
  /** Max findings to analyze per run (cost control) */
39
62
  const MAX_FINDINGS = 30;
40
63
 
41
- /** Approximate cost per 1K input tokens (Haiku pricing) */
42
- const COST_PER_1K_INPUT = 0.08; // cents
43
- const COST_PER_1K_OUTPUT = 0.4; // cents
64
+ // Approximate cost per 1K tokens (Haiku pricing used as baseline)
65
+ const COST_PER_1K_INPUT = 0.08; // cents
66
+ const COST_PER_1K_OUTPUT = 0.4; // cents
44
67
 
45
- /** Estimated tokens per finding analysis */
46
- const EST_INPUT_TOKENS_PER_FINDING = 1500;
68
+ const EST_INPUT_TOKENS_PER_FINDING = 1500;
47
69
  const EST_OUTPUT_TOKENS_PER_FINDING = 300;
48
70
 
71
+ // Multi-tier Anthropic model IDs
72
+ const TIER1_MODEL = 'claude-haiku-4-5-20251001'; // fast triage
73
+ const TIER2_MODEL = 'claude-sonnet-4-6'; // deep analysis
74
+ const TIER3_MODEL = 'claude-opus-4-6'; // exploit chain
75
+
49
76
  // =============================================================================
50
- // SYSTEM PROMPT
77
+ // JSON SCHEMAS — used with Anthropic tool-use for guaranteed output
51
78
  // =============================================================================
52
79
 
53
- const SYSTEM_PROMPT = `You are a security code auditor performing taint analysis. For each finding, determine:
80
+ /** Tier 1: quick triage schema */
81
+ const TRIAGE_SCHEMA = {
82
+ type: 'object',
83
+ properties: {
84
+ results: {
85
+ type: 'array',
86
+ items: {
87
+ type: 'object',
88
+ properties: {
89
+ findingId: { type: 'string' },
90
+ tier: { type: 'string', enum: ['skip', 'review', 'escalate'] },
91
+ reason: { type: 'string' },
92
+ },
93
+ required: ['findingId', 'tier', 'reason'],
94
+ additionalProperties: false,
95
+ },
96
+ },
97
+ },
98
+ required: ['results'],
99
+ };
100
+
101
+ /** Tier 2: deep analysis schema */
102
+ const DEEP_ANALYSIS_SCHEMA = {
103
+ type: 'object',
104
+ properties: {
105
+ results: {
106
+ type: 'array',
107
+ items: {
108
+ type: 'object',
109
+ properties: {
110
+ findingId: { type: 'string' },
111
+ tainted: { type: 'boolean' },
112
+ sanitized: { type: 'boolean' },
113
+ exploitability: { type: 'string', enum: ['confirmed', 'likely', 'unlikely', 'false_positive'] },
114
+ reasoning: { type: 'string' },
115
+ },
116
+ required: ['findingId', 'tainted', 'sanitized', 'exploitability', 'reasoning'],
117
+ additionalProperties: false,
118
+ },
119
+ },
120
+ },
121
+ required: ['results'],
122
+ };
123
+
124
+ /** Tier 3: exploit-chain schema */
125
+ const EXPLOIT_SCHEMA = {
126
+ type: 'object',
127
+ properties: {
128
+ results: {
129
+ type: 'array',
130
+ items: {
131
+ type: 'object',
132
+ properties: {
133
+ findingId: { type: 'string' },
134
+ tainted: { type: 'boolean' },
135
+ sanitized: { type: 'boolean' },
136
+ exploitability: { type: 'string', enum: ['confirmed', 'likely', 'unlikely', 'false_positive'] },
137
+ reasoning: { type: 'string' },
138
+ attackVector: { type: 'string' },
139
+ businessImpact: { type: 'string' },
140
+ fix: { type: 'string' },
141
+ },
142
+ required: ['findingId', 'tainted', 'sanitized', 'exploitability', 'reasoning', 'attackVector', 'businessImpact', 'fix'],
143
+ additionalProperties: false,
144
+ },
145
+ },
146
+ },
147
+ required: ['results'],
148
+ };
149
+
150
+ // =============================================================================
151
+ // SYSTEM PROMPTS
152
+ // =============================================================================
153
+
154
+ const TRIAGE_SYSTEM = `You are a fast security triage assistant. For each finding, quickly decide:
155
+ - "skip" — Obvious false positive (hardcoded literal, test file, sanitized value, documentation).
156
+ - "review" — Needs deeper analysis. Possibly tainted user input reaching a dangerous sink.
157
+ - "escalate" — Clear, unsanitized path from user-controlled input to a critical sink (SQL query, shell exec, file write, deserialization). Escalate only when confident.
158
+
159
+ Be conservative: prefer "review" over "escalate" when unsure.`;
160
+
161
+ const DEEP_SYSTEM = `You are a security code auditor performing taint analysis. For each finding, determine:
162
+
163
+ 1. Tainted: Is the flagged value controllable by an external user (HTTP request, file upload, CLI args, env vars, DB read)?
164
+ 2. Sanitized: Is there sanitization, validation, or encoding between source and sink that neutralizes the risk?
165
+ 3. Exploitability: "confirmed" | "likely" | "unlikely" | "false_positive"
166
+ 4. Reasoning: One concise sentence explaining your verdict.
167
+
168
+ Rules:
169
+ - Hardcoded string literals with no user input path → NOT tainted.
170
+ - Validation library (zod, joi, yup, ajv) or sanitize function between input and sink → sanitized=true.
171
+ - Test/example/documentation file → false_positive.
172
+ - Cannot determine taint flow from provided context → "unlikely".
173
+ - Only "confirmed" when there is a clear, unsanitized path from user input to dangerous sink.`;
174
+
175
+ const EXPLOIT_SYSTEM = `You are an expert security researcher performing full exploit-chain analysis. For each confirmed critical finding:
176
+
177
+ 1. Trace the complete attack vector from attacker-controlled input to dangerous sink.
178
+ 2. Assess the real-world business impact (data breach, account takeover, RCE, etc.).
179
+ 3. Write a precise, actionable fix (code change, library call, or config update).
180
+ 4. Rate exploitability as "confirmed" only if the path is fully unsanitized; otherwise "likely".
181
+
182
+ Be specific. Code references, line numbers, and exact fix suggestions are expected.`;
183
+
184
+ // Fallback system prompt for non-tiered (single provider) analysis
185
+ const SINGLE_TIER_SYSTEM = `You are a security code auditor performing taint analysis. For each finding, determine:
54
186
 
55
187
  1. **Tainted**: Is the flagged value controllable by an external user (via HTTP request, file upload, CLI args, env vars, database read, etc.)?
56
188
  2. **Sanitized**: Is there sanitization, validation, or encoding between the source and sink that neutralizes the risk?
@@ -81,9 +213,9 @@ Rules:
81
213
  export class DeepAnalyzer {
82
214
  /**
83
215
  * @param {object} options
84
- * @param {object} options.provider — LLM provider instance (from createProvider)
85
- * @param {number} options.budgetCents — Max spend in cents (default: 50)
86
- * @param {boolean} options.verbose — Log analysis progress
216
+ * @param {object} options.provider — LLM provider instance (from createProvider)
217
+ * @param {number} options.budgetCents — Max spend in cents (default: 50)
218
+ * @param {boolean} options.verbose — Log analysis progress
87
219
  */
88
220
  constructor(options = {}) {
89
221
  this.provider = options.provider || null;
@@ -91,14 +223,18 @@ export class DeepAnalyzer {
91
223
  this.verbose = options.verbose || false;
92
224
  this.spentCents = 0;
93
225
  this.analyzedCount = 0;
226
+ this._tier2Count = 0;
227
+ this._tier3Count = 0;
228
+ this._skippedCount = 0;
94
229
 
95
- // If the provider advertises a large context window (Gemma 4, etc.),
96
- // increase file context and batch size to take full advantage.
230
+ // Large-context mode for local models (Gemma 4, etc.)
97
231
  const ctxWindow = this.provider?.contextWindow ?? 0;
98
232
  this.largeContext = ctxWindow >= 65536;
99
233
  this.maxFileChars = this.largeContext ? MAX_FILE_CHARS_LARGE_CTX : MAX_FILE_CHARS_DEFAULT;
100
- // Larger batches for local large-context models (no per-token cost)
101
234
  this.batchSize = this.largeContext ? 15 : 5;
235
+
236
+ // Whether we can use multi-tier Anthropic routing
237
+ this._isAnthropic = this.provider?.name === 'Anthropic';
102
238
  }
103
239
 
104
240
  /**
@@ -106,7 +242,6 @@ export class DeepAnalyzer {
106
242
  * Returns null if no provider is available.
107
243
  */
108
244
  static create(rootPath, options = {}) {
109
- // --local flag: use Gemma 4 via Ollama (structured output, large context)
110
245
  if (options.local) {
111
246
  const provider = createProvider('gemma4', null, {
112
247
  model: options.model,
@@ -115,7 +250,6 @@ export class DeepAnalyzer {
115
250
  return new DeepAnalyzer({ provider, ...options });
116
251
  }
117
252
 
118
- // Auto-detect from env, honouring explicit --provider / --base-url / --model
119
253
  const provider = autoDetectProvider(rootPath, {
120
254
  provider: options.provider,
121
255
  baseUrl: options.baseUrl,
@@ -128,7 +262,7 @@ export class DeepAnalyzer {
128
262
 
129
263
  /**
130
264
  * Analyze findings with LLM-powered taint analysis.
131
- * Only processes critical/high findings to optimize cost.
265
+ * Uses multi-tier pipeline when Anthropic is detected; single-tier otherwise.
132
266
  *
133
267
  * @param {object[]} findings — All findings from agents
134
268
  * @param {object} context — { rootPath, recon }
@@ -137,82 +271,48 @@ export class DeepAnalyzer {
137
271
  async analyze(findings, context = {}) {
138
272
  if (!this.provider) return findings;
139
273
 
140
- // Filter to critical/high only
274
+ // Load playbook context once — injected into all LLM calls for this run
275
+ if (context.rootPath) {
276
+ try {
277
+ const PlaybookClass = await getScanPlaybook();
278
+ const playbook = new PlaybookClass(context.rootPath);
279
+ this._playbookContext = playbook.getPromptContext();
280
+ } catch { this._playbookContext = ''; }
281
+ }
282
+
283
+ // Only analyze critical/high findings
141
284
  const candidates = findings.filter(
142
285
  f => f.severity === 'critical' || f.severity === 'high'
143
286
  );
144
-
145
287
  if (candidates.length === 0) return findings;
146
288
 
147
- // Cap at MAX_FINDINGS
289
+ // Cap at MAX_FINDINGS with budget scaling
148
290
  const toAnalyze = candidates.slice(0, MAX_FINDINGS);
149
-
150
- // Check budget before starting
151
291
  const estimatedCost = this._estimateCost(toAnalyze.length);
152
292
  if (estimatedCost > this.budgetCents) {
153
- const affordable = Math.floor(
154
- this.budgetCents / (estimatedCost / toAnalyze.length)
155
- );
293
+ const affordable = Math.floor(this.budgetCents / (estimatedCost / toAnalyze.length));
156
294
  toAnalyze.length = Math.max(1, affordable);
157
295
  }
158
296
 
159
- // Batch findings — larger batches for large-context providers (Gemma 4 etc.)
160
- const results = new Map();
161
-
162
- for (let i = 0; i < toAnalyze.length; i += this.batchSize) {
163
- // Budget check before each batch
164
- if (this.spentCents >= this.budgetCents) {
165
- if (this.verbose) {
166
- console.log(` Deep analysis: budget exhausted (${this.spentCents}c / ${this.budgetCents}c)`);
167
- }
168
- break;
169
- }
170
-
171
- const batch = toAnalyze.slice(i, i + this.batchSize);
172
- const prompt = this._buildPrompt(batch, context);
173
-
174
- try {
175
- const response = await this.provider.complete(
176
- SYSTEM_PROMPT,
177
- prompt,
178
- { maxTokens: 1500 }
179
- );
180
-
181
- // Track cost
182
- const inputTokens = Math.ceil(prompt.length / 4);
183
- const outputTokens = Math.ceil(response.length / 4);
184
- this.spentCents += (inputTokens / 1000) * COST_PER_1K_INPUT
185
- + (outputTokens / 1000) * COST_PER_1K_OUTPUT;
186
-
187
- // Parse response
188
- const analyses = this._parseResponse(response);
189
- for (const analysis of analyses) {
190
- results.set(analysis.findingId, analysis);
191
- }
192
-
193
- this.analyzedCount += batch.length;
194
- } catch (err) {
195
- if (this.verbose) {
196
- console.log(` Deep analysis batch failed: ${err.message}`);
197
- }
198
- // Continue with remaining batches
199
- }
200
- }
297
+ const results = this._isAnthropic
298
+ ? await this._analyzeTiered(toAnalyze, context)
299
+ : await this._analyzeSingleTier(toAnalyze, context);
201
300
 
202
301
  // Attach deep analysis to findings
203
302
  for (const finding of findings) {
204
303
  const id = this._findingId(finding);
205
304
  const analysis = results.get(id);
206
-
207
305
  if (analysis) {
208
306
  finding.deepAnalysis = {
209
- tainted: analysis.tainted,
210
- sanitized: analysis.sanitized,
307
+ tainted: analysis.tainted,
308
+ sanitized: analysis.sanitized,
211
309
  exploitability: analysis.exploitability,
212
- reasoning: analysis.reasoning,
310
+ reasoning: analysis.reasoning,
311
+ ...(analysis.attackVector ? { attackVector: analysis.attackVector } : {}),
312
+ ...(analysis.businessImpact ? { businessImpact: analysis.businessImpact } : {}),
313
+ ...(analysis.fix ? { fix: analysis.fix } : {}),
213
314
  };
214
315
 
215
- // Adjust confidence based on deep analysis
216
316
  if (analysis.exploitability === 'false_positive') {
217
317
  finding.confidence = 'low';
218
318
  } else if (analysis.exploitability === 'unlikely') {
@@ -226,16 +326,106 @@ export class DeepAnalyzer {
226
326
  return findings;
227
327
  }
228
328
 
229
- /**
230
- * Build the analysis prompt for a batch of findings.
231
- */
232
- _buildPrompt(findings, context) {
233
- const items = findings.map(f => {
234
- const id = this._findingId(f);
235
- const fileContent = this._getFileContext(f);
329
+ // ===========================================================================
330
+ // MULTI-TIER PIPELINE (Anthropic only)
331
+ // ===========================================================================
332
+
333
+ async _analyzeTiered(findings, context) {
334
+ const results = new Map();
335
+
336
+ // ── Tier 1: Haiku triage ────────────────────────────────────────────────
337
+ if (this.verbose) console.log(` [Tier 1] Triaging ${findings.length} findings with Haiku...`);
338
+
339
+ const triageMap = await this._runTriage(findings, context);
340
+
341
+ const toReview = findings.filter(f => triageMap.get(this._findingId(f)) === 'review');
342
+ const toEscalate = findings.filter(f => triageMap.get(this._findingId(f)) === 'escalate');
343
+ const skipped = findings.length - toReview.length - toEscalate.length;
344
+
345
+ this._skippedCount += skipped;
346
+
347
+ if (this.verbose) {
348
+ console.log(` [Tier 1] Results: ${toEscalate.length} escalate, ${toReview.length} review, ${skipped} skip`);
349
+ }
350
+
351
+ // ── Tier 2: Sonnet deep analysis ────────────────────────────────────────
352
+ if (toReview.length > 0 && this.spentCents < this.budgetCents) {
353
+ if (this.verbose) console.log(` [Tier 2] Deep-analyzing ${toReview.length} findings with Sonnet...`);
354
+ const tier2Results = await this._runDeepAnalysis(toReview, context, TIER2_MODEL);
355
+ for (const [id, analysis] of tier2Results) results.set(id, analysis);
356
+ this._tier2Count += toReview.length;
357
+ }
236
358
 
237
- return {
238
- findingId: id,
359
+ // ── Tier 3: Opus exploit chain ──────────────────────────────────────────
360
+ if (toEscalate.length > 0 && this.spentCents < this.budgetCents) {
361
+ if (this.verbose) console.log(` [Tier 3] Running exploit-chain analysis on ${toEscalate.length} findings with Opus...`);
362
+ const tier3Results = await this._runExploitChain(toEscalate, context);
363
+ for (const [id, analysis] of tier3Results) results.set(id, analysis);
364
+ this._tier3Count += toEscalate.length;
365
+ }
366
+
367
+ this.analyzedCount += findings.length - skipped;
368
+ return results;
369
+ }
370
+
371
+ /** Tier 1: quick triage — returns Map<findingId, 'skip'|'review'|'escalate'> */
372
+ async _runTriage(findings, context) {
373
+ const triageMap = new Map();
374
+ // Default everything to 'review' so nothing is silently dropped on error
375
+ for (const f of findings) triageMap.set(this._findingId(f), 'review');
376
+
377
+ const batchSize = 10; // Haiku can handle larger batches
378
+ for (let i = 0; i < findings.length; i += batchSize) {
379
+ if (this.spentCents >= this.budgetCents) break;
380
+ const batch = findings.slice(i, i + batchSize);
381
+ // Tier 1 is about cheap, fast signal — no file context, metadata only.
382
+ // File context is fetched only in Tier 2+ where it's worth the token cost.
383
+ const items = batch.map(f => ({
384
+ findingId: this._findingId(f),
385
+ rule: f.rule,
386
+ severity: f.severity,
387
+ title: f.title,
388
+ file: f.file ? path.basename(f.file) : 'unknown',
389
+ line: f.line,
390
+ matched: (f.matched || '').slice(0, 200),
391
+ description: (f.description || '').slice(0, 120),
392
+ }));
393
+
394
+ const prompt = `Triage these ${items.length} security findings. For each, decide: "skip" (obvious false-positive), "review" (needs deeper analysis), or "escalate" (confirmed critical, clear user-input-to-dangerous-sink path).\n\nFindings:\n${JSON.stringify(items, null, 2)}`;
395
+
396
+ try {
397
+ const result = await this.provider.completeWithTools(
398
+ TRIAGE_SYSTEM,
399
+ prompt,
400
+ 'triage_findings',
401
+ TRIAGE_SCHEMA,
402
+ { maxTokens: 1024, model: TIER1_MODEL }
403
+ );
404
+
405
+ this._trackCost(prompt.length, JSON.stringify(result || '').length);
406
+
407
+ for (const item of (result?.results ?? [])) {
408
+ if (triageMap.has(item.findingId)) {
409
+ triageMap.set(item.findingId, item.tier);
410
+ }
411
+ }
412
+ } catch (err) {
413
+ if (this.verbose) console.log(` [Tier 1] Batch failed: ${err.message}`);
414
+ }
415
+ }
416
+
417
+ return triageMap;
418
+ }
419
+
420
+ /** Tier 2: deep taint analysis — returns Map<findingId, analysis> */
421
+ async _runDeepAnalysis(findings, context, model = TIER2_MODEL) {
422
+ const results = new Map();
423
+
424
+ for (let i = 0; i < findings.length; i += this.batchSize) {
425
+ if (this.spentCents >= this.budgetCents) break;
426
+ const batch = findings.slice(i, i + this.batchSize);
427
+ const items = batch.map(f => ({
428
+ findingId: this._findingId(f),
239
429
  rule: f.rule,
240
430
  severity: f.severity,
241
431
  title: f.title,
@@ -243,31 +433,187 @@ export class DeepAnalyzer {
243
433
  file: f.file ? path.basename(f.file) : 'unknown',
244
434
  line: f.line,
245
435
  matched: (f.matched || '').slice(0, 200),
246
- codeContext: fileContent,
436
+ codeContext: this._getFileContext(f),
437
+ }));
438
+
439
+ let projectContext = this._buildProjectContext(context);
440
+ const prompt = `Analyze these ${items.length} security findings for taint reachability and exploitability.${projectContext}\n\nFindings:\n${JSON.stringify(items, null, 2)}`;
441
+
442
+ try {
443
+ const result = await this.provider.completeWithTools(
444
+ DEEP_SYSTEM,
445
+ prompt,
446
+ 'report_analysis',
447
+ DEEP_ANALYSIS_SCHEMA,
448
+ { maxTokens: 1500, model }
449
+ );
450
+
451
+ this._trackCost(prompt.length, JSON.stringify(result || '').length);
452
+
453
+ for (const item of (result?.results ?? [])) {
454
+ results.set(item.findingId, item);
455
+ }
456
+ } catch (err) {
457
+ if (this.verbose) console.log(` [Tier 2] Batch failed: ${err.message}`);
458
+ // Fallback: try plain text completion + parse
459
+ try {
460
+ const fallbackResult = await this._runSingleTierBatch(batch, context, model);
461
+ for (const [id, analysis] of fallbackResult) results.set(id, analysis);
462
+ } catch { /* ignore */ }
463
+ }
464
+ }
465
+
466
+ return results;
467
+ }
468
+
469
+ /** Tier 3: exploit-chain analysis — returns Map<findingId, analysis> */
470
+ async _runExploitChain(findings, context) {
471
+ const results = new Map();
472
+
473
+ // Single findings per call for maximum depth
474
+ for (const finding of findings) {
475
+ if (this.spentCents >= this.budgetCents) break;
476
+
477
+ const item = {
478
+ findingId: this._findingId(finding),
479
+ rule: finding.rule,
480
+ severity: finding.severity,
481
+ title: finding.title,
482
+ description: finding.description,
483
+ file: finding.file ? path.basename(finding.file) : 'unknown',
484
+ line: finding.line,
485
+ matched: (finding.matched || '').slice(0, 400),
486
+ codeContext: this._getFileContext(finding), // Full context window
247
487
  };
248
- });
249
488
 
250
- // Add project context if available
251
- let projectContext = '';
489
+ const prompt = `Perform full exploit-chain analysis on this security finding.\n\nFinding:\n${JSON.stringify(item, null, 2)}`;
490
+
491
+ try {
492
+ const result = await this.provider.completeWithTools(
493
+ EXPLOIT_SYSTEM,
494
+ prompt,
495
+ 'report_exploit_chain',
496
+ EXPLOIT_SCHEMA,
497
+ { maxTokens: 2048, model: TIER3_MODEL }
498
+ );
499
+
500
+ this._trackCost(prompt.length, JSON.stringify(result || '').length);
501
+
502
+ for (const analysis of (result?.results ?? [])) {
503
+ results.set(analysis.findingId, analysis);
504
+ }
505
+ } catch (err) {
506
+ if (this.verbose) console.log(` [Tier 3] Failed for ${item.findingId}: ${err.message}`);
507
+ // Fallback to Tier 2 analysis on error
508
+ try {
509
+ const fallback = await this._runDeepAnalysis([finding], context, TIER2_MODEL);
510
+ for (const [id, analysis] of fallback) results.set(id, analysis);
511
+ } catch { /* ignore */ }
512
+ }
513
+ }
514
+
515
+ return results;
516
+ }
517
+
518
+ // ===========================================================================
519
+ // SINGLE-TIER PIPELINE (non-Anthropic providers)
520
+ // ===========================================================================
521
+
522
+ async _analyzeSingleTier(findings, context) {
523
+ const results = new Map();
524
+
525
+ for (let i = 0; i < findings.length; i += this.batchSize) {
526
+ if (this.spentCents >= this.budgetCents) {
527
+ if (this.verbose) console.log(` Deep analysis: budget exhausted (${this.spentCents}c / ${this.budgetCents}c)`);
528
+ break;
529
+ }
530
+
531
+ const batch = findings.slice(i, i + this.batchSize);
532
+ try {
533
+ const batchResults = await this._runSingleTierBatch(batch, context);
534
+ for (const [id, analysis] of batchResults) results.set(id, analysis);
535
+ this.analyzedCount += batch.length;
536
+ } catch (err) {
537
+ if (this.verbose) console.log(` Deep analysis batch failed: ${err.message}`);
538
+ // Continue with remaining batches
539
+ }
540
+ }
541
+
542
+ return results;
543
+ }
544
+
545
+ async _runSingleTierBatch(batch, context, model = null) {
546
+ const results = new Map();
547
+ const prompt = this._buildSingleTierPrompt(batch, context);
548
+
549
+ const response = await this.provider.complete(
550
+ SINGLE_TIER_SYSTEM,
551
+ prompt,
552
+ { maxTokens: 1500, ...(model ? { model } : {}) }
553
+ );
554
+
555
+ this._trackCost(prompt.length, response.length);
556
+
557
+ const analyses = this._parseTextResponse(response);
558
+ for (const analysis of analyses) {
559
+ results.set(analysis.findingId, analysis);
560
+ }
561
+ return results;
562
+ }
563
+
564
+ _buildSingleTierPrompt(findings, context) {
565
+ const items = findings.map(f => ({
566
+ findingId: this._findingId(f),
567
+ rule: f.rule,
568
+ severity: f.severity,
569
+ title: f.title,
570
+ description: f.description,
571
+ file: f.file ? path.basename(f.file) : 'unknown',
572
+ line: f.line,
573
+ matched: (f.matched || '').slice(0, 200),
574
+ codeContext: this._getFileContext(f),
575
+ }));
576
+
577
+ const projectContext = this._buildProjectContext(context);
578
+ return `Analyze these ${items.length} security findings for taint reachability and exploitability.${projectContext}\n\nFindings:\n${JSON.stringify(items, null, 2)}`;
579
+ }
580
+
581
+ // ===========================================================================
582
+ // HELPERS
583
+ // ===========================================================================
584
+
585
+ _buildProjectContext(context) {
586
+ const parts = [];
587
+
588
+ // Playbook context (accumulated across scans — richer than single-run recon)
589
+ if (context.rootPath) {
590
+ try {
591
+ // Use cached playbook context if available (set by analyze())
592
+ if (this._playbookContext) {
593
+ parts.push(`Repo playbook:\n${this._playbookContext}`);
594
+ }
595
+ } catch { /* ignore */ }
596
+ }
597
+
598
+ // Single-run recon context
252
599
  if (context.recon) {
253
600
  const r = context.recon;
254
- const parts = [];
255
- if (r.frameworks?.length) parts.push(`Frameworks: ${r.frameworks.join(', ')}`);
256
- if (r.databases?.length) parts.push(`Databases: ${r.databases.join(', ')}`);
257
- if (r.authPatterns?.length) parts.push(`Auth: ${r.authPatterns.join(', ')}`);
258
- if (parts.length) projectContext = `\nProject context:\n${parts.join('\n')}\n`;
601
+ const reconParts = [];
602
+ if (r.frameworks?.length) reconParts.push(`Frameworks: ${r.frameworks.join(', ')}`);
603
+ if (r.databases?.length) reconParts.push(`Databases: ${r.databases.join(', ')}`);
604
+ if (r.authPatterns?.length) reconParts.push(`Auth: ${r.authPatterns.join(', ')}`);
605
+ if (reconParts.length) parts.push(reconParts.join('\n'));
259
606
  }
260
607
 
261
- return `Analyze these ${items.length} security findings for taint reachability and exploitability.
262
- ${projectContext}
263
- Findings:
264
- ${JSON.stringify(items, null, 2)}`;
608
+ return parts.length ? `\n\nProject context:\n${parts.join('\n\n')}` : '';
265
609
  }
266
610
 
267
611
  /**
268
612
  * Get file content around the finding for LLM context.
613
+ * @param {object} finding
614
+ * @param {number} windowLines — Lines before/after (default: 20 = 40 line window)
269
615
  */
270
- _getFileContext(finding) {
616
+ _getFileContext(finding, windowLines = 20) {
271
617
  if (!finding.file) return '';
272
618
 
273
619
  try {
@@ -277,13 +623,10 @@ ${JSON.stringify(items, null, 2)}`;
277
623
 
278
624
  let context;
279
625
  if (this.largeContext) {
280
- // Large-context providers (Gemma 4): send the entire file so the model
281
- // can trace taint flows across functions, not just the immediate window.
282
626
  context = lines.map((l, i) => `${i + 1}: ${l}`).join('\n');
283
627
  } else {
284
- // Standard providers: 40-line window around the finding
285
- const start = Math.max(0, lineNum - 21);
286
- const end = Math.min(lines.length, lineNum + 20);
628
+ const start = Math.max(0, lineNum - windowLines - 1);
629
+ const end = Math.min(lines.length, lineNum + windowLines);
287
630
  context = lines.slice(start, end)
288
631
  .map((l, i) => `${start + i + 1}: ${l}`)
289
632
  .join('\n');
@@ -299,18 +642,19 @@ ${JSON.stringify(items, null, 2)}`;
299
642
  }
300
643
  }
301
644
 
302
- /**
303
- * Generate a stable ID for a finding.
304
- */
305
645
  _findingId(finding) {
306
646
  const file = finding.file ? path.basename(finding.file) : 'unknown';
307
647
  return `${file}:${finding.line}:${finding.rule}`;
308
648
  }
309
649
 
310
- /**
311
- * Parse LLM response into analysis objects.
312
- */
313
- _parseResponse(text) {
650
+ _trackCost(promptChars, responseChars) {
651
+ const inputTokens = Math.ceil(promptChars / 4);
652
+ const outputTokens = Math.ceil(responseChars / 4);
653
+ this.spentCents += (inputTokens / 1000) * COST_PER_1K_INPUT
654
+ + (outputTokens / 1000) * COST_PER_1K_OUTPUT;
655
+ }
656
+
657
+ _parseTextResponse(text) {
314
658
  const cleaned = text
315
659
  .replace(/^```(?:json)?\s*/i, '')
316
660
  .replace(/\s*```\s*$/i, '')
@@ -319,8 +663,6 @@ ${JSON.stringify(items, null, 2)}`;
319
663
  try {
320
664
  const parsed = JSON.parse(cleaned);
321
665
  if (!Array.isArray(parsed)) return [];
322
-
323
- // Validate each entry
324
666
  return parsed.filter(item =>
325
667
  item.findingId &&
326
668
  typeof item.tainted === 'boolean' &&
@@ -332,24 +674,22 @@ ${JSON.stringify(items, null, 2)}`;
332
674
  }
333
675
  }
334
676
 
335
- /**
336
- * Estimate cost for analyzing N findings (in cents).
337
- */
338
677
  _estimateCost(count) {
339
- const inputCost = (count * EST_INPUT_TOKENS_PER_FINDING / 1000) * COST_PER_1K_INPUT;
678
+ const inputCost = (count * EST_INPUT_TOKENS_PER_FINDING / 1000) * COST_PER_1K_INPUT;
340
679
  const outputCost = (count * EST_OUTPUT_TOKENS_PER_FINDING / 1000) * COST_PER_1K_OUTPUT;
341
680
  return inputCost + outputCost;
342
681
  }
343
682
 
344
- /**
345
- * Get analysis stats.
346
- */
347
683
  getStats() {
348
684
  return {
349
685
  analyzedCount: this.analyzedCount,
350
- spentCents: Math.round(this.spentCents * 100) / 100,
351
- budgetCents: this.budgetCents,
352
- provider: this.provider?.name || 'none',
686
+ skippedCount: this._skippedCount,
687
+ tier2Count: this._tier2Count,
688
+ tier3Count: this._tier3Count,
689
+ spentCents: Math.round(this.spentCents * 100) / 100,
690
+ budgetCents: this.budgetCents,
691
+ provider: this.provider?.name || 'none',
692
+ multiTier: this._isAnthropic,
353
693
  };
354
694
  }
355
695
  }