ship-safe 4.2.0 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +134 -25
- package/cli/__tests__/agents.test.js +805 -0
- package/cli/agents/agentic-security-agent.js +261 -0
- package/cli/agents/api-fuzzer.js +111 -0
- package/cli/agents/base-agent.js +271 -253
- package/cli/agents/config-auditor.js +71 -0
- package/cli/agents/deep-analyzer.js +333 -0
- package/cli/agents/html-reporter.js +370 -363
- package/cli/agents/index.js +74 -56
- package/cli/agents/injection-tester.js +45 -0
- package/cli/agents/mcp-security-agent.js +358 -0
- package/cli/agents/mobile-scanner.js +6 -0
- package/cli/agents/orchestrator.js +109 -7
- package/cli/agents/pii-compliance-agent.js +301 -0
- package/cli/agents/rag-security-agent.js +204 -0
- package/cli/agents/sbom-generator.js +100 -11
- package/cli/agents/scoring-engine.js +4 -0
- package/cli/agents/supabase-rls-agent.js +154 -0
- package/cli/agents/supply-chain-agent.js +507 -274
- package/cli/agents/verifier-agent.js +292 -0
- package/cli/bin/ship-safe.js +46 -6
- package/cli/commands/audit.js +59 -1
- package/cli/commands/baseline.js +192 -0
- package/cli/commands/ci.js +260 -0
- package/cli/commands/red-team.js +8 -2
- package/cli/index.js +4 -0
- package/cli/utils/autofix-rules.js +74 -0
- package/cli/utils/pdf-generator.js +94 -0
- package/cli/utils/secrets-verifier.js +247 -0
- package/package.json +2 -2
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DeepAnalyzer — LLM-Powered Taint Analysis
|
|
3
|
+
* ============================================
|
|
4
|
+
*
|
|
5
|
+
* Takes critical/high findings nominated by regex scan and sends them
|
|
6
|
+
* to an LLM for deeper analysis: taint reachability, sanitization
|
|
7
|
+
* verification, and exploitability assessment.
|
|
8
|
+
*
|
|
9
|
+
* Supports:
|
|
10
|
+
* - Anthropic API (ANTHROPIC_API_KEY)
|
|
11
|
+
* - OpenAI API (OPENAI_API_KEY)
|
|
12
|
+
* - Google Gemini (GOOGLE_API_KEY)
|
|
13
|
+
* - Ollama local models (--local flag)
|
|
14
|
+
*
|
|
15
|
+
* USAGE:
|
|
16
|
+
* const analyzer = new DeepAnalyzer({ provider, budgetCents: 50 });
|
|
17
|
+
* const enrichedFindings = await analyzer.analyze(findings, context);
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
import fs from 'fs';
|
|
21
|
+
import path from 'path';
|
|
22
|
+
import { createProvider, autoDetectProvider } from '../providers/llm-provider.js';
|
|
23
|
+
|
|
24
|
+
// =============================================================================
|
|
25
|
+
// CONSTANTS
|
|
26
|
+
// =============================================================================
|
|
27
|
+
|
|
28
|
+
/** Max file content to send per finding (tokens are expensive) */
|
|
29
|
+
const MAX_FILE_CHARS = 4000;
|
|
30
|
+
|
|
31
|
+
/** Max findings to analyze per run (cost control) */
|
|
32
|
+
const MAX_FINDINGS = 30;
|
|
33
|
+
|
|
34
|
+
/** Approximate cost per 1K input tokens (Haiku pricing) */
|
|
35
|
+
const COST_PER_1K_INPUT = 0.08; // cents
|
|
36
|
+
const COST_PER_1K_OUTPUT = 0.4; // cents
|
|
37
|
+
|
|
38
|
+
/** Estimated tokens per finding analysis */
|
|
39
|
+
const EST_INPUT_TOKENS_PER_FINDING = 1500;
|
|
40
|
+
const EST_OUTPUT_TOKENS_PER_FINDING = 300;
|
|
41
|
+
|
|
42
|
+
// =============================================================================
|
|
43
|
+
// SYSTEM PROMPT
|
|
44
|
+
// =============================================================================
|
|
45
|
+
|
|
46
|
+
const SYSTEM_PROMPT = `You are a security code auditor performing taint analysis. For each finding, determine:
|
|
47
|
+
|
|
48
|
+
1. **Tainted**: Is the flagged value controllable by an external user (via HTTP request, file upload, CLI args, env vars, database read, etc.)?
|
|
49
|
+
2. **Sanitized**: Is there sanitization, validation, or encoding between the source and sink that neutralizes the risk?
|
|
50
|
+
3. **Exploitability**: Rate as "confirmed", "likely", "unlikely", or "false_positive".
|
|
51
|
+
4. **Reasoning**: One sentence explaining your verdict.
|
|
52
|
+
|
|
53
|
+
Respond with a JSON array ONLY. No markdown, no explanation outside JSON.
|
|
54
|
+
|
|
55
|
+
[{
|
|
56
|
+
"findingId": "<id>",
|
|
57
|
+
"tainted": true|false,
|
|
58
|
+
"sanitized": true|false,
|
|
59
|
+
"exploitability": "confirmed"|"likely"|"unlikely"|"false_positive",
|
|
60
|
+
"reasoning": "<one sentence>"
|
|
61
|
+
}]
|
|
62
|
+
|
|
63
|
+
Rules:
|
|
64
|
+
- If the value is a hardcoded string literal with no user input path, it is NOT tainted.
|
|
65
|
+
- If there is a validation library (zod, joi, yup, ajv) or sanitization function between input and sink, mark sanitized=true.
|
|
66
|
+
- If the code is in a test file, example, or documentation, mark as false_positive.
|
|
67
|
+
- If you cannot determine taint flow from the provided context, mark exploitability as "unlikely" rather than guessing.
|
|
68
|
+
- Be conservative: only mark "confirmed" when there is a clear, unsanitized path from user input to dangerous sink.`;
|
|
69
|
+
|
|
70
|
+
// =============================================================================
|
|
71
|
+
// DEEP ANALYZER
|
|
72
|
+
// =============================================================================
|
|
73
|
+
|
|
74
|
+
export class DeepAnalyzer {
|
|
75
|
+
/**
|
|
76
|
+
* @param {object} options
|
|
77
|
+
* @param {object} options.provider — LLM provider instance (from createProvider)
|
|
78
|
+
* @param {number} options.budgetCents — Max spend in cents (default: 50)
|
|
79
|
+
* @param {boolean} options.verbose — Log analysis progress
|
|
80
|
+
*/
|
|
81
|
+
constructor(options = {}) {
|
|
82
|
+
this.provider = options.provider || null;
|
|
83
|
+
this.budgetCents = options.budgetCents ?? 50;
|
|
84
|
+
this.verbose = options.verbose || false;
|
|
85
|
+
this.spentCents = 0;
|
|
86
|
+
this.analyzedCount = 0;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Create a DeepAnalyzer with auto-detected provider.
|
|
91
|
+
* Returns null if no provider is available.
|
|
92
|
+
*/
|
|
93
|
+
static create(rootPath, options = {}) {
|
|
94
|
+
// --local flag: use Ollama
|
|
95
|
+
if (options.local) {
|
|
96
|
+
const provider = createProvider('ollama', null, {
|
|
97
|
+
model: options.model || 'llama3.2',
|
|
98
|
+
baseUrl: options.ollamaUrl || 'http://localhost:11434/api/chat',
|
|
99
|
+
});
|
|
100
|
+
return new DeepAnalyzer({ provider, ...options });
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Auto-detect from env
|
|
104
|
+
const provider = autoDetectProvider(rootPath);
|
|
105
|
+
if (!provider) return null;
|
|
106
|
+
|
|
107
|
+
return new DeepAnalyzer({ provider, ...options });
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Analyze findings with LLM-powered taint analysis.
|
|
112
|
+
* Only processes critical/high findings to optimize cost.
|
|
113
|
+
*
|
|
114
|
+
* @param {object[]} findings — All findings from agents
|
|
115
|
+
* @param {object} context — { rootPath, recon }
|
|
116
|
+
* @returns {Promise<object[]>} — Findings with deepAnalysis attached
|
|
117
|
+
*/
|
|
118
|
+
async analyze(findings, context = {}) {
|
|
119
|
+
if (!this.provider) return findings;
|
|
120
|
+
|
|
121
|
+
// Filter to critical/high only
|
|
122
|
+
const candidates = findings.filter(
|
|
123
|
+
f => f.severity === 'critical' || f.severity === 'high'
|
|
124
|
+
);
|
|
125
|
+
|
|
126
|
+
if (candidates.length === 0) return findings;
|
|
127
|
+
|
|
128
|
+
// Cap at MAX_FINDINGS
|
|
129
|
+
const toAnalyze = candidates.slice(0, MAX_FINDINGS);
|
|
130
|
+
|
|
131
|
+
// Check budget before starting
|
|
132
|
+
const estimatedCost = this._estimateCost(toAnalyze.length);
|
|
133
|
+
if (estimatedCost > this.budgetCents) {
|
|
134
|
+
const affordable = Math.floor(
|
|
135
|
+
this.budgetCents / (estimatedCost / toAnalyze.length)
|
|
136
|
+
);
|
|
137
|
+
toAnalyze.length = Math.max(1, affordable);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Batch findings (5 per request to balance cost vs. context)
|
|
141
|
+
const batchSize = 5;
|
|
142
|
+
const results = new Map();
|
|
143
|
+
|
|
144
|
+
for (let i = 0; i < toAnalyze.length; i += batchSize) {
|
|
145
|
+
// Budget check before each batch
|
|
146
|
+
if (this.spentCents >= this.budgetCents) {
|
|
147
|
+
if (this.verbose) {
|
|
148
|
+
console.log(` Deep analysis: budget exhausted (${this.spentCents}c / ${this.budgetCents}c)`);
|
|
149
|
+
}
|
|
150
|
+
break;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
const batch = toAnalyze.slice(i, i + batchSize);
|
|
154
|
+
const prompt = this._buildPrompt(batch, context);
|
|
155
|
+
|
|
156
|
+
try {
|
|
157
|
+
const response = await this.provider.complete(
|
|
158
|
+
SYSTEM_PROMPT,
|
|
159
|
+
prompt,
|
|
160
|
+
{ maxTokens: 1500 }
|
|
161
|
+
);
|
|
162
|
+
|
|
163
|
+
// Track cost
|
|
164
|
+
const inputTokens = Math.ceil(prompt.length / 4);
|
|
165
|
+
const outputTokens = Math.ceil(response.length / 4);
|
|
166
|
+
this.spentCents += (inputTokens / 1000) * COST_PER_1K_INPUT
|
|
167
|
+
+ (outputTokens / 1000) * COST_PER_1K_OUTPUT;
|
|
168
|
+
|
|
169
|
+
// Parse response
|
|
170
|
+
const analyses = this._parseResponse(response);
|
|
171
|
+
for (const analysis of analyses) {
|
|
172
|
+
results.set(analysis.findingId, analysis);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
this.analyzedCount += batch.length;
|
|
176
|
+
} catch (err) {
|
|
177
|
+
if (this.verbose) {
|
|
178
|
+
console.log(` Deep analysis batch failed: ${err.message}`);
|
|
179
|
+
}
|
|
180
|
+
// Continue with remaining batches
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
// Attach deep analysis to findings
|
|
185
|
+
for (const finding of findings) {
|
|
186
|
+
const id = this._findingId(finding);
|
|
187
|
+
const analysis = results.get(id);
|
|
188
|
+
|
|
189
|
+
if (analysis) {
|
|
190
|
+
finding.deepAnalysis = {
|
|
191
|
+
tainted: analysis.tainted,
|
|
192
|
+
sanitized: analysis.sanitized,
|
|
193
|
+
exploitability: analysis.exploitability,
|
|
194
|
+
reasoning: analysis.reasoning,
|
|
195
|
+
};
|
|
196
|
+
|
|
197
|
+
// Adjust confidence based on deep analysis
|
|
198
|
+
if (analysis.exploitability === 'false_positive') {
|
|
199
|
+
finding.confidence = 'low';
|
|
200
|
+
} else if (analysis.exploitability === 'unlikely') {
|
|
201
|
+
if (finding.confidence === 'high') finding.confidence = 'medium';
|
|
202
|
+
} else if (analysis.exploitability === 'confirmed') {
|
|
203
|
+
finding.confidence = 'high';
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
return findings;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* Build the analysis prompt for a batch of findings.
|
|
213
|
+
*/
|
|
214
|
+
_buildPrompt(findings, context) {
|
|
215
|
+
const items = findings.map(f => {
|
|
216
|
+
const id = this._findingId(f);
|
|
217
|
+
const fileContent = this._getFileContext(f);
|
|
218
|
+
|
|
219
|
+
return {
|
|
220
|
+
findingId: id,
|
|
221
|
+
rule: f.rule,
|
|
222
|
+
severity: f.severity,
|
|
223
|
+
title: f.title,
|
|
224
|
+
description: f.description,
|
|
225
|
+
file: f.file ? path.basename(f.file) : 'unknown',
|
|
226
|
+
line: f.line,
|
|
227
|
+
matched: (f.matched || '').slice(0, 200),
|
|
228
|
+
codeContext: fileContent,
|
|
229
|
+
};
|
|
230
|
+
});
|
|
231
|
+
|
|
232
|
+
// Add project context if available
|
|
233
|
+
let projectContext = '';
|
|
234
|
+
if (context.recon) {
|
|
235
|
+
const r = context.recon;
|
|
236
|
+
const parts = [];
|
|
237
|
+
if (r.frameworks?.length) parts.push(`Frameworks: ${r.frameworks.join(', ')}`);
|
|
238
|
+
if (r.databases?.length) parts.push(`Databases: ${r.databases.join(', ')}`);
|
|
239
|
+
if (r.authPatterns?.length) parts.push(`Auth: ${r.authPatterns.join(', ')}`);
|
|
240
|
+
if (parts.length) projectContext = `\nProject context:\n${parts.join('\n')}\n`;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
return `Analyze these ${items.length} security findings for taint reachability and exploitability.
|
|
244
|
+
${projectContext}
|
|
245
|
+
Findings:
|
|
246
|
+
${JSON.stringify(items, null, 2)}`;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/**
|
|
250
|
+
* Get file content around the finding for LLM context.
|
|
251
|
+
*/
|
|
252
|
+
_getFileContext(finding) {
|
|
253
|
+
if (!finding.file) return '';
|
|
254
|
+
|
|
255
|
+
try {
|
|
256
|
+
const content = fs.readFileSync(finding.file, 'utf-8');
|
|
257
|
+
const lines = content.split('\n');
|
|
258
|
+
const lineNum = finding.line || 1;
|
|
259
|
+
|
|
260
|
+
// Get a window of ~40 lines around the finding
|
|
261
|
+
const start = Math.max(0, lineNum - 21);
|
|
262
|
+
const end = Math.min(lines.length, lineNum + 20);
|
|
263
|
+
let context = lines.slice(start, end)
|
|
264
|
+
.map((l, i) => `${start + i + 1}: ${l}`)
|
|
265
|
+
.join('\n');
|
|
266
|
+
|
|
267
|
+
// Truncate if too long
|
|
268
|
+
if (context.length > MAX_FILE_CHARS) {
|
|
269
|
+
context = context.slice(0, MAX_FILE_CHARS) + '\n... (truncated)';
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
return context;
|
|
273
|
+
} catch {
|
|
274
|
+
return '';
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
/**
|
|
279
|
+
* Generate a stable ID for a finding.
|
|
280
|
+
*/
|
|
281
|
+
_findingId(finding) {
|
|
282
|
+
const file = finding.file ? path.basename(finding.file) : 'unknown';
|
|
283
|
+
return `${file}:${finding.line}:${finding.rule}`;
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
/**
|
|
287
|
+
* Parse LLM response into analysis objects.
|
|
288
|
+
*/
|
|
289
|
+
_parseResponse(text) {
|
|
290
|
+
const cleaned = text
|
|
291
|
+
.replace(/^```(?:json)?\s*/i, '')
|
|
292
|
+
.replace(/\s*```\s*$/i, '')
|
|
293
|
+
.trim();
|
|
294
|
+
|
|
295
|
+
try {
|
|
296
|
+
const parsed = JSON.parse(cleaned);
|
|
297
|
+
if (!Array.isArray(parsed)) return [];
|
|
298
|
+
|
|
299
|
+
// Validate each entry
|
|
300
|
+
return parsed.filter(item =>
|
|
301
|
+
item.findingId &&
|
|
302
|
+
typeof item.tainted === 'boolean' &&
|
|
303
|
+
typeof item.sanitized === 'boolean' &&
|
|
304
|
+
['confirmed', 'likely', 'unlikely', 'false_positive'].includes(item.exploitability)
|
|
305
|
+
);
|
|
306
|
+
} catch {
|
|
307
|
+
return [];
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
/**
|
|
312
|
+
* Estimate cost for analyzing N findings (in cents).
|
|
313
|
+
*/
|
|
314
|
+
_estimateCost(count) {
|
|
315
|
+
const inputCost = (count * EST_INPUT_TOKENS_PER_FINDING / 1000) * COST_PER_1K_INPUT;
|
|
316
|
+
const outputCost = (count * EST_OUTPUT_TOKENS_PER_FINDING / 1000) * COST_PER_1K_OUTPUT;
|
|
317
|
+
return inputCost + outputCost;
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
/**
|
|
321
|
+
* Get analysis stats.
|
|
322
|
+
*/
|
|
323
|
+
getStats() {
|
|
324
|
+
return {
|
|
325
|
+
analyzedCount: this.analyzedCount,
|
|
326
|
+
spentCents: Math.round(this.spentCents * 100) / 100,
|
|
327
|
+
budgetCents: this.budgetCents,
|
|
328
|
+
provider: this.provider?.name || 'none',
|
|
329
|
+
};
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
export default DeepAnalyzer;
|