ship-safe 9.1.0 → 9.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli/agents/deep-analyzer.js +26 -16
- package/cli/agents/orchestrator.js +6 -4
- package/cli/agents/stateful-watcher.js +238 -0
- package/cli/agents/swarm-orchestrator.js +200 -0
- package/cli/bin/ship-safe.js +18 -2
- package/cli/commands/audit.js +2 -5
- package/cli/commands/red-team.js +66 -19
- package/cli/commands/rotate.js +200 -3
- package/cli/commands/team-report.js +415 -0
- package/cli/commands/watch.js +134 -0
- package/cli/providers/llm-provider.js +91 -2
- package/cli/utils/output.js +21 -0
- package/package.json +1 -1
|
@@ -233,8 +233,9 @@ export class DeepAnalyzer {
|
|
|
233
233
|
this.maxFileChars = this.largeContext ? MAX_FILE_CHARS_LARGE_CTX : MAX_FILE_CHARS_DEFAULT;
|
|
234
234
|
this.batchSize = this.largeContext ? 15 : 5;
|
|
235
235
|
|
|
236
|
-
// Whether we can use multi-tier
|
|
236
|
+
// Whether we can use multi-tier structured output routing
|
|
237
237
|
this._isAnthropic = this.provider?.name === 'Anthropic';
|
|
238
|
+
this._supportsTools = this._isAnthropic || this.provider?.supportsStructuredOutput === true;
|
|
238
239
|
}
|
|
239
240
|
|
|
240
241
|
/**
|
|
@@ -294,7 +295,7 @@ export class DeepAnalyzer {
|
|
|
294
295
|
toAnalyze.length = Math.max(1, affordable);
|
|
295
296
|
}
|
|
296
297
|
|
|
297
|
-
const results = this.
|
|
298
|
+
const results = this._supportsTools
|
|
298
299
|
? await this._analyzeTiered(toAnalyze, context)
|
|
299
300
|
: await this._analyzeSingleTier(toAnalyze, context);
|
|
300
301
|
|
|
@@ -333,10 +334,16 @@ export class DeepAnalyzer {
|
|
|
333
334
|
async _analyzeTiered(findings, context) {
|
|
334
335
|
const results = new Map();
|
|
335
336
|
|
|
337
|
+
// Model selection: Anthropic uses tier-specific models; others use provider's default
|
|
338
|
+
const tier1Model = this._isAnthropic ? TIER1_MODEL : null;
|
|
339
|
+
const tier2Model = this._isAnthropic ? TIER2_MODEL : null;
|
|
340
|
+
const tier3Model = this._isAnthropic ? TIER3_MODEL : null;
|
|
341
|
+
const providerLabel = this._isAnthropic ? 'Haiku' : this.provider.name;
|
|
342
|
+
|
|
336
343
|
// ── Tier 1: Haiku triage ────────────────────────────────────────────────
|
|
337
|
-
if (this.verbose) console.log(` [Tier 1] Triaging ${findings.length} findings with
|
|
344
|
+
if (this.verbose) console.log(` [Tier 1] Triaging ${findings.length} findings with ${providerLabel}...`);
|
|
338
345
|
|
|
339
|
-
const triageMap = await this._runTriage(findings, context);
|
|
346
|
+
const triageMap = await this._runTriage(findings, context, tier1Model);
|
|
340
347
|
|
|
341
348
|
const toReview = findings.filter(f => triageMap.get(this._findingId(f)) === 'review');
|
|
342
349
|
const toEscalate = findings.filter(f => triageMap.get(this._findingId(f)) === 'escalate');
|
|
@@ -350,16 +357,18 @@ export class DeepAnalyzer {
|
|
|
350
357
|
|
|
351
358
|
// ── Tier 2: Sonnet deep analysis ────────────────────────────────────────
|
|
352
359
|
if (toReview.length > 0 && this.spentCents < this.budgetCents) {
|
|
353
|
-
|
|
354
|
-
|
|
360
|
+
const tier2Label = this._isAnthropic ? 'Sonnet' : this.provider.name;
|
|
361
|
+
if (this.verbose) console.log(` [Tier 2] Deep-analyzing ${toReview.length} findings with ${tier2Label}...`);
|
|
362
|
+
const tier2Results = await this._runDeepAnalysis(toReview, context, tier2Model);
|
|
355
363
|
for (const [id, analysis] of tier2Results) results.set(id, analysis);
|
|
356
364
|
this._tier2Count += toReview.length;
|
|
357
365
|
}
|
|
358
366
|
|
|
359
367
|
// ── Tier 3: Opus exploit chain ──────────────────────────────────────────
|
|
360
368
|
if (toEscalate.length > 0 && this.spentCents < this.budgetCents) {
|
|
361
|
-
|
|
362
|
-
|
|
369
|
+
const tier3Label = this._isAnthropic ? 'Opus' : this.provider.name;
|
|
370
|
+
if (this.verbose) console.log(` [Tier 3] Running exploit-chain analysis on ${toEscalate.length} findings with ${tier3Label}...`);
|
|
371
|
+
const tier3Results = await this._runExploitChain(toEscalate, context, tier3Model);
|
|
363
372
|
for (const [id, analysis] of tier3Results) results.set(id, analysis);
|
|
364
373
|
this._tier3Count += toEscalate.length;
|
|
365
374
|
}
|
|
@@ -369,7 +378,7 @@ export class DeepAnalyzer {
|
|
|
369
378
|
}
|
|
370
379
|
|
|
371
380
|
/** Tier 1: quick triage — returns Map<findingId, 'skip'|'review'|'escalate'> */
|
|
372
|
-
async _runTriage(findings, context) {
|
|
381
|
+
async _runTriage(findings, context, model = null) {
|
|
373
382
|
const triageMap = new Map();
|
|
374
383
|
// Default everything to 'review' so nothing is silently dropped on error
|
|
375
384
|
for (const f of findings) triageMap.set(this._findingId(f), 'review');
|
|
@@ -399,7 +408,7 @@ export class DeepAnalyzer {
|
|
|
399
408
|
prompt,
|
|
400
409
|
'triage_findings',
|
|
401
410
|
TRIAGE_SCHEMA,
|
|
402
|
-
{ maxTokens: 1024, model:
|
|
411
|
+
{ maxTokens: 1024, ...(model ? { model } : {}) }
|
|
403
412
|
);
|
|
404
413
|
|
|
405
414
|
this._trackCost(prompt.length, JSON.stringify(result || '').length);
|
|
@@ -418,7 +427,7 @@ export class DeepAnalyzer {
|
|
|
418
427
|
}
|
|
419
428
|
|
|
420
429
|
/** Tier 2: deep taint analysis — returns Map<findingId, analysis> */
|
|
421
|
-
async _runDeepAnalysis(findings, context, model =
|
|
430
|
+
async _runDeepAnalysis(findings, context, model = null) {
|
|
422
431
|
const results = new Map();
|
|
423
432
|
|
|
424
433
|
for (let i = 0; i < findings.length; i += this.batchSize) {
|
|
@@ -445,7 +454,7 @@ export class DeepAnalyzer {
|
|
|
445
454
|
prompt,
|
|
446
455
|
'report_analysis',
|
|
447
456
|
DEEP_ANALYSIS_SCHEMA,
|
|
448
|
-
{ maxTokens: 1500, model }
|
|
457
|
+
{ maxTokens: 1500, ...(model ? { model } : {}) }
|
|
449
458
|
);
|
|
450
459
|
|
|
451
460
|
this._trackCost(prompt.length, JSON.stringify(result || '').length);
|
|
@@ -467,7 +476,7 @@ export class DeepAnalyzer {
|
|
|
467
476
|
}
|
|
468
477
|
|
|
469
478
|
/** Tier 3: exploit-chain analysis — returns Map<findingId, analysis> */
|
|
470
|
-
async _runExploitChain(findings, context) {
|
|
479
|
+
async _runExploitChain(findings, context, model = null) {
|
|
471
480
|
const results = new Map();
|
|
472
481
|
|
|
473
482
|
// Single findings per call for maximum depth
|
|
@@ -494,7 +503,7 @@ export class DeepAnalyzer {
|
|
|
494
503
|
prompt,
|
|
495
504
|
'report_exploit_chain',
|
|
496
505
|
EXPLOIT_SCHEMA,
|
|
497
|
-
{ maxTokens: 2048, model:
|
|
506
|
+
{ maxTokens: 2048, ...(model ? { model } : {}) }
|
|
498
507
|
);
|
|
499
508
|
|
|
500
509
|
this._trackCost(prompt.length, JSON.stringify(result || '').length);
|
|
@@ -506,7 +515,7 @@ export class DeepAnalyzer {
|
|
|
506
515
|
if (this.verbose) console.log(` [Tier 3] Failed for ${item.findingId}: ${err.message}`);
|
|
507
516
|
// Fallback to Tier 2 analysis on error
|
|
508
517
|
try {
|
|
509
|
-
const fallback = await this._runDeepAnalysis([finding], context, TIER2_MODEL);
|
|
518
|
+
const fallback = await this._runDeepAnalysis([finding], context, this._isAnthropic ? TIER2_MODEL : null);
|
|
510
519
|
for (const [id, analysis] of fallback) results.set(id, analysis);
|
|
511
520
|
} catch { /* ignore */ }
|
|
512
521
|
}
|
|
@@ -689,7 +698,8 @@ export class DeepAnalyzer {
|
|
|
689
698
|
spentCents: Math.round(this.spentCents * 100) / 100,
|
|
690
699
|
budgetCents: this.budgetCents,
|
|
691
700
|
provider: this.provider?.name || 'none',
|
|
692
|
-
multiTier: this.
|
|
701
|
+
multiTier: this._supportsTools,
|
|
702
|
+
isAnthropic: this._isAnthropic,
|
|
693
703
|
};
|
|
694
704
|
}
|
|
695
705
|
}
|
|
@@ -235,12 +235,14 @@ export class Orchestrator {
|
|
|
235
235
|
const stats = analyzer.getStats();
|
|
236
236
|
if (deepSpinner) {
|
|
237
237
|
if (stats.multiTier) {
|
|
238
|
+
const providerName = analyzer.provider?.name || 'unknown';
|
|
239
|
+
const cascade = stats.isAnthropic !== false ? 'Haiku→Sonnet→Opus' : `${providerName} (3-tier)`;
|
|
238
240
|
const tierNote = stats.tier3Count > 0
|
|
239
|
-
? `, ${stats.tier3Count} escalated to
|
|
240
|
-
: stats.tier2Count > 0 ? `, ${stats.tier2Count} via
|
|
241
|
+
? `, ${stats.tier3Count} escalated to tier-3`
|
|
242
|
+
: stats.tier2Count > 0 ? `, ${stats.tier2Count} via tier-2` : '';
|
|
241
243
|
const skipNote = stats.skippedCount > 0 ? `, ${stats.skippedCount} triaged away` : '';
|
|
242
244
|
deepSpinner.succeed(chalk.green(
|
|
243
|
-
`Deep analysis (
|
|
245
|
+
`Deep analysis (${cascade}): ${stats.analyzedCount} analyzed${tierNote}${skipNote} (${stats.spentCents}¢)`
|
|
244
246
|
));
|
|
245
247
|
} else {
|
|
246
248
|
deepSpinner.succeed(chalk.green(
|
|
@@ -252,7 +254,7 @@ export class Orchestrator {
|
|
|
252
254
|
if (deepSpinner) deepSpinner.fail(chalk.yellow(`Deep analysis failed: ${err.message}`));
|
|
253
255
|
}
|
|
254
256
|
} else if (!quiet) {
|
|
255
|
-
console.log(chalk.gray(' Deep analysis: no LLM provider found (set ANTHROPIC_API_KEY or use --local)'));
|
|
257
|
+
console.log(chalk.gray(' Deep analysis: no LLM provider found (set ANTHROPIC_API_KEY, MOONSHOT_API_KEY, or use --local)'));
|
|
256
258
|
}
|
|
257
259
|
}
|
|
258
260
|
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* StatefulWatcher — Persistent K2.6 Security Session
|
|
3
|
+
* ====================================================
|
|
4
|
+
*
|
|
5
|
+
* Keeps a Kimi K2.6 conversation thread open across file-change events.
|
|
6
|
+
* Each scan sends only the diff — not the full codebase — so the model
|
|
7
|
+
* builds understanding incrementally rather than restarting from scratch.
|
|
8
|
+
*
|
|
9
|
+
* Advantages over stateless watch:
|
|
10
|
+
* - No duplicate findings on repeated scans of unchanged files
|
|
11
|
+
* - Model understands which files are already clean vs. risky
|
|
12
|
+
* - Diffs are small → faster, cheaper per event
|
|
13
|
+
* - K2.6's 12h+ session length handles full work sessions without reset
|
|
14
|
+
*
|
|
15
|
+
* USAGE (via watch command):
|
|
16
|
+
* npx ship-safe watch . --deep --stateful
|
|
17
|
+
* npx ship-safe watch . --deep --stateful --provider kimi
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
import fs from 'fs';
|
|
21
|
+
import path from 'path';
|
|
22
|
+
import { autoDetectProvider } from '../providers/llm-provider.js';
|
|
23
|
+
import { createFinding } from './base-agent.js';
|
|
24
|
+
|
|
25
|
+
// Max chars of diff content per event
|
|
26
|
+
const MAX_DIFF_CHARS = 20_000;
|
|
27
|
+
|
|
28
|
+
// =============================================================================
|
|
29
|
+
// STATEFUL WATCHER
|
|
30
|
+
// =============================================================================
|
|
31
|
+
|
|
32
|
+
export class StatefulWatcher {
|
|
33
|
+
/**
|
|
34
|
+
* @param {object} options
|
|
35
|
+
* @param {object} options.provider — LLM provider (Kimi preferred)
|
|
36
|
+
* @param {string} options.rootPath
|
|
37
|
+
* @param {boolean} options.verbose
|
|
38
|
+
*/
|
|
39
|
+
constructor(options = {}) {
|
|
40
|
+
this.provider = options.provider;
|
|
41
|
+
this.rootPath = options.rootPath;
|
|
42
|
+
this.verbose = options.verbose || false;
|
|
43
|
+
|
|
44
|
+
// Persistent conversation thread
|
|
45
|
+
this._messages = [];
|
|
46
|
+
this._scanCount = 0;
|
|
47
|
+
this._baselineSet = false;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
static create(rootPath, options = {}) {
|
|
51
|
+
const providerName = typeof options.provider === 'string' ? options.provider : 'kimi';
|
|
52
|
+
const provider = autoDetectProvider(rootPath, { provider: providerName, model: options.model || 'kimi-k2.6' });
|
|
53
|
+
if (!provider) return null;
|
|
54
|
+
return new StatefulWatcher({ provider, rootPath, verbose: options.verbose });
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Set the initial baseline — called once on watcher start.
|
|
59
|
+
* The model receives a codebase summary and primes its security context.
|
|
60
|
+
*
|
|
61
|
+
* @param {object} recon — Output from ReconAgent
|
|
62
|
+
* @param {string[]} files — All scannable files
|
|
63
|
+
*/
|
|
64
|
+
async setBaseline(recon, files) {
|
|
65
|
+
const summary = this._buildReconSummary(recon);
|
|
66
|
+
const fileList = files
|
|
67
|
+
.slice(0, 200)
|
|
68
|
+
.map(f => path.relative(this.rootPath, f))
|
|
69
|
+
.join('\n');
|
|
70
|
+
|
|
71
|
+
const baselineMsg = `You are a persistent security monitor for this codebase. I will send you file changes as they happen. For each change, identify new security issues introduced by that specific change.
|
|
72
|
+
|
|
73
|
+
Project context:
|
|
74
|
+
${summary}
|
|
75
|
+
|
|
76
|
+
File inventory (${files.length} total):
|
|
77
|
+
${fileList}
|
|
78
|
+
|
|
79
|
+
Respond to each update with a JSON array of findings. Use this format:
|
|
80
|
+
[{"file":"<relative path>","line":<number>,"severity":"critical|high|medium|low","rule":"<rule-id>","title":"<title>","description":"<description>","remediation":"<fix>"}]
|
|
81
|
+
|
|
82
|
+
If no new issues are introduced by the change, respond with an empty array: []
|
|
83
|
+
Never include issues you already reported in previous messages.`;
|
|
84
|
+
|
|
85
|
+
this._messages.push({ role: 'user', content: baselineMsg });
|
|
86
|
+
|
|
87
|
+
try {
|
|
88
|
+
const ack = await this._callProvider('You are a security expert. Acknowledge you understand the codebase context.', this._messages);
|
|
89
|
+
this._messages.push({ role: 'assistant', content: ack });
|
|
90
|
+
this._baselineSet = true;
|
|
91
|
+
if (this.verbose) console.log(` [Stateful] Baseline set. Provider: ${this.provider.name}`);
|
|
92
|
+
} catch (err) {
|
|
93
|
+
if (this.verbose) console.log(` [Stateful] Baseline failed: ${err.message}`);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Analyze a set of changed files. Sends only diffs to the persistent session.
|
|
99
|
+
*
|
|
100
|
+
* @param {string[]} changedFiles — Absolute paths of changed files
|
|
101
|
+
* @returns {Promise<object[]>} — New findings introduced by this change
|
|
102
|
+
*/
|
|
103
|
+
async analyzeChanges(changedFiles) {
|
|
104
|
+
if (!this._baselineSet) return [];
|
|
105
|
+
this._scanCount++;
|
|
106
|
+
|
|
107
|
+
const diffs = this._readChanges(changedFiles);
|
|
108
|
+
if (!diffs) return [];
|
|
109
|
+
|
|
110
|
+
const updateMsg = `Files changed (scan #${this._scanCount}):\n\n${diffs}\n\nWhat NEW security issues does this change introduce? Reply with the JSON findings array only.`;
|
|
111
|
+
|
|
112
|
+
this._messages.push({ role: 'user', content: updateMsg });
|
|
113
|
+
|
|
114
|
+
try {
|
|
115
|
+
const response = await this._callProvider(
|
|
116
|
+
'You are a persistent security monitor. Report only NEW issues from the latest change.',
|
|
117
|
+
this._messages
|
|
118
|
+
);
|
|
119
|
+
|
|
120
|
+
this._messages.push({ role: 'assistant', content: response });
|
|
121
|
+
|
|
122
|
+
const findings = this._parseFindings(response, changedFiles[0]);
|
|
123
|
+
if (this.verbose && findings.length > 0) {
|
|
124
|
+
console.log(` [Stateful] Scan #${this._scanCount}: ${findings.length} new finding(s)`);
|
|
125
|
+
}
|
|
126
|
+
return findings;
|
|
127
|
+
} catch (err) {
|
|
128
|
+
if (this.verbose) console.log(` [Stateful] Scan failed: ${err.message}`);
|
|
129
|
+
return [];
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
_readChanges(changedFiles) {
|
|
134
|
+
const parts = [];
|
|
135
|
+
let totalChars = 0;
|
|
136
|
+
|
|
137
|
+
for (const filePath of changedFiles) {
|
|
138
|
+
if (totalChars >= MAX_DIFF_CHARS) break;
|
|
139
|
+
try {
|
|
140
|
+
const relPath = path.relative(this.rootPath, filePath);
|
|
141
|
+
const content = fs.readFileSync(filePath, 'utf-8');
|
|
142
|
+
const snippet = content.slice(0, Math.min(5000, MAX_DIFF_CHARS - totalChars));
|
|
143
|
+
parts.push(`### ${relPath}\n\`\`\`\n${snippet}\n\`\`\``);
|
|
144
|
+
totalChars += snippet.length;
|
|
145
|
+
} catch { /* skip */ }
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
return parts.length ? parts.join('\n\n') : null;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
_buildReconSummary(recon) {
|
|
152
|
+
if (!recon) return 'No recon data.';
|
|
153
|
+
const parts = [];
|
|
154
|
+
if (recon.frameworks?.length) parts.push(`Frameworks: ${recon.frameworks.join(', ')}`);
|
|
155
|
+
if (recon.databases?.length) parts.push(`Databases: ${recon.databases.join(', ')}`);
|
|
156
|
+
if (recon.authPatterns?.length) parts.push(`Auth: ${recon.authPatterns.join(', ')}`);
|
|
157
|
+
if (recon.languages?.length) parts.push(`Languages: ${recon.languages.join(', ')}`);
|
|
158
|
+
return parts.join('\n') || 'General codebase.';
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
async _callProvider(systemPrompt, messages) {
|
|
162
|
+
// Use multi-turn messages if provider supports it (OpenAI format)
|
|
163
|
+
if (this.provider.baseUrl && typeof this.provider.complete === 'function') {
|
|
164
|
+
const response = await fetch(this.provider.baseUrl, {
|
|
165
|
+
method: 'POST',
|
|
166
|
+
headers: {
|
|
167
|
+
'Authorization': `Bearer ${this.provider.apiKey}`,
|
|
168
|
+
'Content-Type': 'application/json',
|
|
169
|
+
},
|
|
170
|
+
body: JSON.stringify({
|
|
171
|
+
model: this.provider.model,
|
|
172
|
+
max_tokens: 2048,
|
|
173
|
+
messages: [
|
|
174
|
+
{ role: 'system', content: systemPrompt },
|
|
175
|
+
...messages,
|
|
176
|
+
],
|
|
177
|
+
}),
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
if (!response.ok) {
|
|
181
|
+
throw new Error(`${this.provider.name} API error: HTTP ${response.status}`);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
const data = await response.json();
|
|
185
|
+
return data.choices?.[0]?.message?.content || '';
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// Fallback: single-turn (for providers without persistent context)
|
|
189
|
+
const lastMsg = messages[messages.length - 1];
|
|
190
|
+
return this.provider.complete(systemPrompt, lastMsg?.content || '', { maxTokens: 2048 });
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
_parseFindings(text, refFile) {
|
|
194
|
+
const cleaned = text
|
|
195
|
+
.replace(/^```(?:json)?\s*/i, '')
|
|
196
|
+
.replace(/\s*```\s*$/i, '')
|
|
197
|
+
.trim();
|
|
198
|
+
|
|
199
|
+
try {
|
|
200
|
+
const raw = JSON.parse(cleaned);
|
|
201
|
+
if (!Array.isArray(raw)) return [];
|
|
202
|
+
|
|
203
|
+
return raw
|
|
204
|
+
.filter(r => r.title && r.severity)
|
|
205
|
+
.map(r => {
|
|
206
|
+
const filePath = r.file
|
|
207
|
+
? path.resolve(this.rootPath, r.file)
|
|
208
|
+
: refFile || null;
|
|
209
|
+
|
|
210
|
+
return createFinding({
|
|
211
|
+
file: filePath,
|
|
212
|
+
line: r.line || 0,
|
|
213
|
+
severity: ['critical', 'high', 'medium', 'low', 'info'].includes(r.severity) ? r.severity : 'medium',
|
|
214
|
+
confidence: 'medium',
|
|
215
|
+
rule: r.rule || 'stateful:monitor',
|
|
216
|
+
title: r.title,
|
|
217
|
+
description: r.description || r.title,
|
|
218
|
+
matched: '',
|
|
219
|
+
remediation: r.remediation || '',
|
|
220
|
+
category: 'Stateful Monitor',
|
|
221
|
+
});
|
|
222
|
+
});
|
|
223
|
+
} catch {
|
|
224
|
+
return [];
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
getStats() {
|
|
229
|
+
return {
|
|
230
|
+
scanCount: this._scanCount,
|
|
231
|
+
provider: this.provider?.name || 'none',
|
|
232
|
+
model: this.provider?.model || 'unknown',
|
|
233
|
+
messageCount: this._messages.length,
|
|
234
|
+
};
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
export default StatefulWatcher;
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SwarmOrchestrator — K2.6-Powered Parallel Security Swarm
|
|
3
|
+
* ==========================================================
|
|
4
|
+
*
|
|
5
|
+
* Instead of running 23 agents locally in Node.js (chunks of 6),
|
|
6
|
+
* --swarm sends the entire task to Kimi K2.6 and lets its native
|
|
7
|
+
* 300-agent swarm handle parallel analysis.
|
|
8
|
+
*
|
|
9
|
+
* Each of Ship Safe's 23 attack classes is assigned as an explicit
|
|
10
|
+
* sub-agent role. K2.6 fans out, each sub-agent scans for its class,
|
|
11
|
+
* and results are returned as a consolidated findings array.
|
|
12
|
+
*
|
|
13
|
+
* Output is mapped back to Ship Safe's Finding format so SARIF,
|
|
14
|
+
* HTML reports, and CI exit codes work unchanged.
|
|
15
|
+
*
|
|
16
|
+
* USAGE:
|
|
17
|
+
* npx ship-safe red-team . --swarm
|
|
18
|
+
* npx ship-safe red-team . --swarm --provider kimi
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
import fs from 'fs';
|
|
22
|
+
import path from 'path';
|
|
23
|
+
import { createProvider, autoDetectProvider } from '../providers/llm-provider.js';
|
|
24
|
+
import { ReconAgent } from './recon-agent.js';
|
|
25
|
+
import { createFinding } from './base-agent.js';
|
|
26
|
+
|
|
27
|
+
// =============================================================================
|
|
28
|
+
// AGENT ROLE DEFINITIONS — maps Ship Safe's 23 attack classes to swarm roles
|
|
29
|
+
// =============================================================================
|
|
30
|
+
|
|
31
|
+
const SWARM_ROLES = [
|
|
32
|
+
{ id: 'injection', name: 'Injection Tester', desc: 'SQL injection, command injection, LDAP injection, XPath injection, template injection' },
|
|
33
|
+
{ id: 'auth-bypass', name: 'Auth Bypass Agent', desc: 'Authentication bypass, authorization flaws, privilege escalation, JWT weaknesses' },
|
|
34
|
+
{ id: 'ssrf', name: 'SSRF Prober', desc: 'Server-side request forgery, SSRF via redirects, internal service exposure' },
|
|
35
|
+
{ id: 'supply-chain', name: 'Supply Chain Auditor', desc: 'Dependency confusion, typosquatting, malicious packages, outdated deps with CVEs' },
|
|
36
|
+
{ id: 'config', name: 'Config Auditor', desc: 'Hardcoded secrets, insecure defaults, exposed debug endpoints, misconfigured CORS' },
|
|
37
|
+
{ id: 'llm-redteam', name: 'LLM Red Team', desc: 'Prompt injection, jailbreaks, unsafe LLM output rendering, model inversion' },
|
|
38
|
+
{ id: 'mobile', name: 'Mobile Scanner', desc: 'Insecure data storage, weak crypto, insecure communication, exported components' },
|
|
39
|
+
{ id: 'git-history', name: 'Git History Scanner', desc: 'Secrets committed in git history, deleted files with sensitive data' },
|
|
40
|
+
{ id: 'cicd', name: 'CI/CD Scanner', desc: 'Insecure GitHub Actions, exposed secrets in workflows, artifact poisoning' },
|
|
41
|
+
{ id: 'api-fuzzer', name: 'API Fuzzer', desc: 'Missing input validation, mass assignment, insecure direct object references (IDOR)' },
|
|
42
|
+
{ id: 'supabase-rls', name: 'Supabase RLS Agent', desc: 'Missing row-level security, exposed Supabase service keys, insecure RLS policies' },
|
|
43
|
+
{ id: 'mcp-security', name: 'MCP Security Agent', desc: 'Tool poisoning, MCP server misconfiguration, unsafe tool definitions' },
|
|
44
|
+
{ id: 'agentic-security', name: 'Agentic Security Agent', desc: 'Agentic loop vulnerabilities, unsafe tool use, context window attacks' },
|
|
45
|
+
{ id: 'rag-security', name: 'RAG Security Agent', desc: 'Prompt injection via retrieved documents, data poisoning, retrieval manipulation' },
|
|
46
|
+
{ id: 'pii-compliance', name: 'PII Compliance Agent', desc: 'PII exposure, GDPR/CCPA violations, unencrypted personal data' },
|
|
47
|
+
{ id: 'vibe-coding', name: 'Vibe Coding Agent', desc: 'AI-generated code security issues, hardcoded values from iterative prompting' },
|
|
48
|
+
{ id: 'exception-handler', name: 'Exception Handler Agent', desc: 'Stack traces in responses, error information disclosure, unhandled exceptions' },
|
|
49
|
+
{ id: 'agent-config', name: 'Agent Config Scanner', desc: 'Insecure agent config files (.cursorrules, CLAUDE.md, MCP configs)' },
|
|
50
|
+
{ id: 'memory-poisoning', name: 'Memory Poisoning Agent', desc: 'Malicious content in AI memory stores, embedding poisoning' },
|
|
51
|
+
{ id: 'managed-agent', name: 'Managed Agent Scanner', desc: 'Insecure managed agent platforms, overprivileged agents' },
|
|
52
|
+
{ id: 'hermes-security', name: 'Hermes Security Agent', desc: 'Hermes CLI security, agent tool permissions, orchestrator misconfiguration' },
|
|
53
|
+
{ id: 'agent-attestation', name: 'Agent Attestation Agent', desc: 'Missing agent identity verification, unauthenticated agent-to-agent calls' },
|
|
54
|
+
{ id: 'agentic-supply-chain', name: 'Agentic Supply Chain Agent', desc: 'Compromised AI integrations, OAuth scope creep, MCP server supply chain' },
|
|
55
|
+
];
|
|
56
|
+
|
|
57
|
+
// Max file content to include in the swarm prompt (cost control)
|
|
58
|
+
const MAX_FILE_CHARS = 200_000;
|
|
59
|
+
const MAX_FILES = 100;
|
|
60
|
+
|
|
61
|
+
// =============================================================================
|
|
62
|
+
// SWARM ORCHESTRATOR
|
|
63
|
+
// =============================================================================
|
|
64
|
+
|
|
65
|
+
export class SwarmOrchestrator {
|
|
66
|
+
/**
|
|
67
|
+
* @param {object} options
|
|
68
|
+
* @param {object} options.provider — LLM provider (must be Kimi or OpenAI-compatible with tool use)
|
|
69
|
+
* @param {boolean} options.verbose
|
|
70
|
+
* @param {number} options.budgetCents
|
|
71
|
+
*/
|
|
72
|
+
constructor(options = {}) {
|
|
73
|
+
this.provider = options.provider;
|
|
74
|
+
this.verbose = options.verbose || false;
|
|
75
|
+
this.budgetCents = options.budgetCents ?? 200;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
static create(rootPath, options = {}) {
|
|
79
|
+
if (typeof options.provider === 'string') {
|
|
80
|
+
// Explicit provider requested
|
|
81
|
+
const provider = autoDetectProvider(rootPath, { provider: options.provider, model: options.model });
|
|
82
|
+
if (!provider) return null;
|
|
83
|
+
return new SwarmOrchestrator({ provider, verbose: options.verbose, budgetCents: options.budgetCents });
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// Auto-select: prefer deepseek-flash (1M ctx, cheap) then kimi as fallback
|
|
87
|
+
for (const [providerName, swarmModel] of [
|
|
88
|
+
['deepseek-flash', 'deepseek-v4-flash'],
|
|
89
|
+
['kimi', 'moonshot-v1-128k'],
|
|
90
|
+
]) {
|
|
91
|
+
const provider = autoDetectProvider(rootPath, { provider: providerName, model: swarmModel });
|
|
92
|
+
if (provider) return new SwarmOrchestrator({ provider, verbose: options.verbose, budgetCents: options.budgetCents });
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
return null;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Run the swarm scan against a codebase.
|
|
100
|
+
*
|
|
101
|
+
* @param {string} rootPath
|
|
102
|
+
* @param {object} reconData — Output from ReconAgent
|
|
103
|
+
* @param {string[]} files — All scannable files
|
|
104
|
+
* @returns {Promise<object[]>} — findings[]
|
|
105
|
+
*/
|
|
106
|
+
async run(rootPath, reconData, files) {
|
|
107
|
+
const codeBundle = this._bundleCode(rootPath, files);
|
|
108
|
+
const prompt = this._buildSwarmPrompt(reconData, codeBundle, rootPath);
|
|
109
|
+
|
|
110
|
+
const systemPrompt = `You are a security swarm coordinator. You MUST respond with ONLY a valid JSON object — no prose, no markdown, no explanation, no code fences. Your response must start with { and end with }. Deploy all ${SWARM_ROLES.length} sub-agents, each scanning for their attack class, then output the consolidated JSON findings.`;
|
|
111
|
+
|
|
112
|
+
const jsonInstruction = '\n\nOutput a JSON object with exactly these keys: {"findings":[{"agentId":"<agent-id>","file":"<relative-path>","line":<number>,"severity":"critical|high|medium|low","rule":"<rule-id>","title":"<title>","description":"<description>","remediation":"<fix>"}],"agentSummary":[{"agentId":"<agent-id>","findingCount":<number>,"status":"clean|findings"}]}';
|
|
113
|
+
|
|
114
|
+
const text = await this.provider.complete(systemPrompt, prompt + jsonInstruction, { maxTokens: 8192, jsonMode: true });
|
|
115
|
+
let raw = null;
|
|
116
|
+
try {
|
|
117
|
+
raw = JSON.parse(text || '{}');
|
|
118
|
+
} catch {
|
|
119
|
+
if (this.verbose) console.log(' [Swarm] JSON parse failed. Preview:', text?.slice(0, 200));
|
|
120
|
+
raw = null;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
return this._mapFindings(raw?.findings ?? [], rootPath);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
_bundleCode(rootPath, files) {
|
|
127
|
+
let bundle = '';
|
|
128
|
+
let totalChars = 0;
|
|
129
|
+
const selected = files.slice(0, MAX_FILES);
|
|
130
|
+
|
|
131
|
+
for (const filePath of selected) {
|
|
132
|
+
if (totalChars >= MAX_FILE_CHARS) break;
|
|
133
|
+
try {
|
|
134
|
+
const relPath = path.relative(rootPath, filePath);
|
|
135
|
+
const content = fs.readFileSync(filePath, 'utf-8');
|
|
136
|
+
const snippet = content.slice(0, Math.min(8000, MAX_FILE_CHARS - totalChars));
|
|
137
|
+
bundle += `\n\n### ${relPath}\n\`\`\`\n${snippet}\n\`\`\``;
|
|
138
|
+
totalChars += snippet.length;
|
|
139
|
+
} catch { /* skip unreadable */ }
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
return bundle;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
_buildSwarmPrompt(recon, codeBundle, rootPath) {
|
|
146
|
+
const projectName = path.basename(rootPath);
|
|
147
|
+
const reconSummary = recon
|
|
148
|
+
? [
|
|
149
|
+
recon.frameworks?.length ? `Frameworks: ${recon.frameworks.join(', ')}` : '',
|
|
150
|
+
recon.databases?.length ? `Databases: ${recon.databases.join(', ')}` : '',
|
|
151
|
+
recon.authPatterns?.length ? `Auth patterns: ${recon.authPatterns.join(', ')}` : '',
|
|
152
|
+
recon.languages?.length ? `Languages: ${recon.languages.join(', ')}` : '',
|
|
153
|
+
].filter(Boolean).join('\n')
|
|
154
|
+
: '';
|
|
155
|
+
|
|
156
|
+
const agentList = SWARM_ROLES.map((r, i) =>
|
|
157
|
+
` Sub-agent ${String(i + 1).padStart(2, '0')} [${r.id}] — ${r.name}: ${r.desc}`
|
|
158
|
+
).join('\n');
|
|
159
|
+
|
|
160
|
+
return `# Security Swarm Task: ${projectName}
|
|
161
|
+
|
|
162
|
+
## Project Context
|
|
163
|
+
${reconSummary || 'No recon data available.'}
|
|
164
|
+
|
|
165
|
+
## Sub-Agent Assignments
|
|
166
|
+
Deploy all ${SWARM_ROLES.length} sub-agents in parallel. Each scans for exactly their assigned attack class:
|
|
167
|
+
|
|
168
|
+
${agentList}
|
|
169
|
+
|
|
170
|
+
## Instructions
|
|
171
|
+
1. Each sub-agent independently analyzes the full codebase for its attack class.
|
|
172
|
+
2. For each finding, record: agentId (the sub-agent's id), file path, line number, severity, a rule identifier, title, description, the matched snippet, and remediation advice.
|
|
173
|
+
3. Severity scale: critical (exploitable now), high (likely exploitable), medium (potential issue), low (best practice), info (note).
|
|
174
|
+
4. Report all findings from all sub-agents in the tool call, even if the list is long.
|
|
175
|
+
5. If a sub-agent finds nothing, include it in agentSummary with status "clean" and findingCount 0.
|
|
176
|
+
|
|
177
|
+
## Codebase
|
|
178
|
+
${codeBundle}`;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
_mapFindings(rawFindings, rootPath) {
|
|
182
|
+
return rawFindings.map(r => {
|
|
183
|
+
const role = SWARM_ROLES.find(a => a.id === r.agentId) || { name: 'SwarmAgent', id: r.agentId };
|
|
184
|
+
return createFinding({
|
|
185
|
+
file: r.file ? path.resolve(rootPath, r.file) : null,
|
|
186
|
+
line: r.line || 0,
|
|
187
|
+
severity: r.severity || 'medium',
|
|
188
|
+
confidence: 'medium',
|
|
189
|
+
rule: r.rule || `swarm:${role.id}`,
|
|
190
|
+
title: r.title,
|
|
191
|
+
description: r.description,
|
|
192
|
+
matched: r.matched || '',
|
|
193
|
+
remediation: r.remediation || '',
|
|
194
|
+
category: role.name,
|
|
195
|
+
});
|
|
196
|
+
});
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
export default SwarmOrchestrator;
|
package/cli/bin/ship-safe.js
CHANGED
|
@@ -50,6 +50,7 @@ import { legalCommand } from '../commands/legal.js';
|
|
|
50
50
|
import { runLiveAdvisories } from '../commands/live-advisories.js';
|
|
51
51
|
import { envAuditCommand } from '../commands/env-audit.js';
|
|
52
52
|
import { autofixCommand } from '../commands/autofix.js';
|
|
53
|
+
import { teamReportCommand } from '../commands/team-report.js';
|
|
53
54
|
import { memoryCommand } from '../utils/security-memory.js';
|
|
54
55
|
import { playbookCommand } from '../utils/scan-playbook.js';
|
|
55
56
|
import { listPluginFiles, scaffoldPlugin } from '../utils/plugin-loader.js';
|
|
@@ -175,6 +176,7 @@ program
|
|
|
175
176
|
.command('rotate [path]')
|
|
176
177
|
.description('Revoke and rotate exposed secrets — opens provider dashboards with step-by-step guide')
|
|
177
178
|
.option('--provider <name>', 'Only rotate secrets for a specific provider (e.g. github, stripe, openai)')
|
|
179
|
+
.option('--plan <file>', 'Execute a rotation plan downloaded from shipsafecli.com/rotate')
|
|
178
180
|
.action(rotateCommand);
|
|
179
181
|
|
|
180
182
|
// -----------------------------------------------------------------------------
|
|
@@ -226,7 +228,7 @@ program
|
|
|
226
228
|
.option('--deep', 'LLM-powered taint analysis for critical/high findings')
|
|
227
229
|
.option('--local', 'Use local Ollama model for deep analysis (default: llama3.2)')
|
|
228
230
|
.option('--model <model>', 'LLM model to use for deep/AI analysis')
|
|
229
|
-
.option('--provider <name>', 'LLM provider: anthropic, openai, google, ollama, groq, together, mistral, cohere, deepseek, xai, lmstudio')
|
|
231
|
+
.option('--provider <name>', 'LLM provider: anthropic, openai, google, ollama, groq, together, mistral, cohere, deepseek, xai, kimi, lmstudio')
|
|
230
232
|
.option('--base-url <url>', 'Custom OpenAI-compatible endpoint (e.g. http://localhost:1234/v1/chat/completions)')
|
|
231
233
|
.option('--budget <cents>', 'Max spend in cents for deep analysis (default: 50)', parseInt)
|
|
232
234
|
.option('--verify', 'Check if leaked secrets are still active (probes provider APIs)')
|
|
@@ -264,14 +266,25 @@ program
|
|
|
264
266
|
.option('--no-deps', 'Skip dependency audit')
|
|
265
267
|
.option('--no-ai', 'Skip AI classification')
|
|
266
268
|
.option('--deep', 'LLM-powered taint analysis for critical/high findings')
|
|
269
|
+
.option('--swarm', 'Use Kimi K2.6 native 300-agent swarm instead of local agent execution (requires MOONSHOT_API_KEY)')
|
|
267
270
|
.option('--local', 'Use local Ollama model for deep analysis (default: llama3.2)')
|
|
268
271
|
.option('--model <model>', 'LLM model for deep analysis')
|
|
269
|
-
.option('--provider <name>', 'LLM provider: anthropic, openai, google, ollama, groq, together, mistral, cohere, deepseek, xai, lmstudio')
|
|
272
|
+
.option('--provider <name>', 'LLM provider: anthropic, openai, google, ollama, groq, together, mistral, cohere, deepseek, xai, kimi, lmstudio')
|
|
270
273
|
.option('--base-url <url>', 'Custom OpenAI-compatible endpoint (e.g. http://localhost:1234/v1/chat/completions)')
|
|
271
274
|
.option('--budget <cents>', 'Max spend in cents for deep analysis (default: 50)', parseInt)
|
|
272
275
|
.option('-v, --verbose', 'Verbose output')
|
|
273
276
|
.action(redTeamCommand);
|
|
274
277
|
|
|
278
|
+
// -----------------------------------------------------------------------------
|
|
279
|
+
// TEAM REPORT COMMAND
|
|
280
|
+
// -----------------------------------------------------------------------------
|
|
281
|
+
program
|
|
282
|
+
.command('team-report [file]')
|
|
283
|
+
.description('Convert Hermes Agent team output into a professional Ship Safe report')
|
|
284
|
+
.option('--html [path]', 'Save as HTML report (default: team-report.html)')
|
|
285
|
+
.option('--json', 'JSON output')
|
|
286
|
+
.action(teamReportCommand);
|
|
287
|
+
|
|
275
288
|
// -----------------------------------------------------------------------------
|
|
276
289
|
// WATCH COMMAND
|
|
277
290
|
// -----------------------------------------------------------------------------
|
|
@@ -281,6 +294,9 @@ program
|
|
|
281
294
|
.option('--poll', 'Use polling mode (for network drives)')
|
|
282
295
|
.option('--configs', 'Watch only agent config files (openclaw.json, .cursorrules, mcp.json, etc.)')
|
|
283
296
|
.option('--deep', 'Run full agent scanning on changes (not just pattern matching)')
|
|
297
|
+
.option('--stateful', 'Keep Kimi K2.6 conversation context between scans for incremental analysis (requires MOONSHOT_API_KEY)')
|
|
298
|
+
.option('--model <model>', 'LLM model for stateful watch (default: kimi-k2.6)')
|
|
299
|
+
.option('--provider <name>', 'LLM provider for stateful watch (default: kimi)')
|
|
284
300
|
.option('--status', 'Show current watch status and exit')
|
|
285
301
|
.option('--threshold <score>', 'Alert when score drops below threshold', parseInt)
|
|
286
302
|
.option('--debounce <ms>', 'Debounce interval in ms (default: 1500)', parseInt)
|