ship-safe 9.1.1 → 9.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli/agents/llm-redteam.js +24 -2
- package/cli/agents/stateful-watcher.js +4 -7
- package/cli/agents/swarm-orchestrator.js +27 -65
- package/cli/bin/ship-safe.js +62 -7
- package/cli/commands/agent-fix.js +960 -0
- package/cli/commands/audit.js +24 -11
- package/cli/commands/red-team.js +10 -6
- package/cli/commands/shell.js +415 -0
- package/cli/commands/team-report.js +415 -0
- package/cli/commands/undo.js +143 -0
- package/cli/providers/llm-provider.js +149 -18
- package/cli/utils/output.js +21 -0
- package/package.json +1 -1
|
@@ -142,6 +142,11 @@ const PATTERNS = [
|
|
|
142
142
|
confidence: 'medium',
|
|
143
143
|
description: 'System prompt hardcoded in code. If client-side, users can extract it.',
|
|
144
144
|
fix: 'Keep system prompts server-side only. Load from environment variables or config.',
|
|
145
|
+
// Skip clearly server-side files where defining a system prompt is correct.
|
|
146
|
+
// The rule is only meaningful for code that ships to a browser/client.
|
|
147
|
+
skipFile: (f) => /(?:^|\/)(?:cli|server|backend|api|lib|services|workers|jobs|scripts)\//.test(f.replace(/\\/g, '/'))
|
|
148
|
+
|| /\.(?:server|api)\.(?:js|ts|mjs|cjs|tsx)$/.test(f)
|
|
149
|
+
|| /\/api\//.test(f.replace(/\\/g, '/')),
|
|
145
150
|
},
|
|
146
151
|
|
|
147
152
|
// ── LLM10: Unbounded Consumption ───────────────────────────────────────────
|
|
@@ -219,12 +224,24 @@ const PATTERNS = [
|
|
|
219
224
|
{
|
|
220
225
|
rule: 'PROMPT_INJECTION_PATTERN',
|
|
221
226
|
title: 'Known Prompt Injection Pattern',
|
|
222
|
-
|
|
227
|
+
// The phrase "system prompt" is *not* an injection attack — it's how every
|
|
228
|
+
// LLM developer talks about prompts. Match the actual jailbreak verbs instead.
|
|
229
|
+
regex: /(?:ignore\s+(?:all\s+)?previous\s+instructions|disregard\s+(?:all\s+)?(?:previous|prior)|you\s+are\s+now\s+DAN|jailbreak\s+(?:the|this)|bypass\s+(?:your|the)\s+(?:rules|instructions|guidelines)|reveal\s+your\s+system\s+prompt)/gi,
|
|
223
230
|
severity: 'high',
|
|
224
231
|
cwe: 'CWE-77',
|
|
225
232
|
owasp: 'LLM01',
|
|
226
233
|
description: 'Known prompt injection pattern detected in code. Ensure this is for testing only.',
|
|
227
234
|
fix: 'If in test data, add # ship-safe-ignore. If in user-facing code, add input filtering.',
|
|
235
|
+
// Skip files where the pattern appears intentionally: tests, red-team rules,
|
|
236
|
+
// detection-rule definitions, and security tool source code.
|
|
237
|
+
skipFile: (f) => {
|
|
238
|
+
const p = f.replace(/\\/g, '/');
|
|
239
|
+
return /__tests__\//.test(p)
|
|
240
|
+
|| /\.(?:test|spec)\.(?:js|ts|mjs|cjs|tsx|jsx)$/.test(p)
|
|
241
|
+
|| /(?:^|\/)(?:red-?team|llm-?redteam|prompt-?injection|memory-?poisoning|jailbreak)/.test(p)
|
|
242
|
+
|| /\/agents\/[^/]*(?:redteam|injection|llm)/i.test(p)
|
|
243
|
+
|| /(?:scan-playbook|threat-intel|patterns)\.(?:js|ts)$/.test(p);
|
|
244
|
+
},
|
|
228
245
|
},
|
|
229
246
|
];
|
|
230
247
|
|
|
@@ -242,7 +259,12 @@ export class LLMRedTeam extends BaseAgent {
|
|
|
242
259
|
|
|
243
260
|
let findings = [];
|
|
244
261
|
for (const file of codeFiles) {
|
|
245
|
-
|
|
262
|
+
// Honor per-pattern skipFile predicates so rules that are clearly false
|
|
263
|
+
// positives in known contexts (server-side prompts, redteam test data)
|
|
264
|
+
// never get sent to the agent for "fixing".
|
|
265
|
+
const applicable = PATTERNS.filter(p => !p.skipFile || !p.skipFile(file));
|
|
266
|
+
if (applicable.length === 0) continue;
|
|
267
|
+
findings = findings.concat(this.scanFileWithPatterns(file, applicable));
|
|
246
268
|
}
|
|
247
269
|
return findings;
|
|
248
270
|
}
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
|
|
20
20
|
import fs from 'fs';
|
|
21
21
|
import path from 'path';
|
|
22
|
-
import {
|
|
22
|
+
import { autoDetectProvider } from '../providers/llm-provider.js';
|
|
23
23
|
import { createFinding } from './base-agent.js';
|
|
24
24
|
|
|
25
25
|
// Max chars of diff content per event
|
|
@@ -48,13 +48,10 @@ export class StatefulWatcher {
|
|
|
48
48
|
}
|
|
49
49
|
|
|
50
50
|
static create(rootPath, options = {}) {
|
|
51
|
-
const
|
|
52
|
-
|
|
53
|
-
model: options.model || 'kimi-k2.6',
|
|
54
|
-
});
|
|
55
|
-
|
|
51
|
+
const providerName = typeof options.provider === 'string' ? options.provider : 'kimi';
|
|
52
|
+
const provider = autoDetectProvider(rootPath, { provider: providerName, model: options.model || 'kimi-k2.6' });
|
|
56
53
|
if (!provider) return null;
|
|
57
|
-
return new StatefulWatcher({ provider, rootPath,
|
|
54
|
+
return new StatefulWatcher({ provider, rootPath, verbose: options.verbose });
|
|
58
55
|
}
|
|
59
56
|
|
|
60
57
|
/**
|
|
@@ -76,13 +76,23 @@ export class SwarmOrchestrator {
|
|
|
76
76
|
}
|
|
77
77
|
|
|
78
78
|
static create(rootPath, options = {}) {
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
79
|
+
if (typeof options.provider === 'string') {
|
|
80
|
+
// Explicit provider requested
|
|
81
|
+
const provider = autoDetectProvider(rootPath, { provider: options.provider, model: options.model });
|
|
82
|
+
if (!provider) return null;
|
|
83
|
+
return new SwarmOrchestrator({ provider, verbose: options.verbose, budgetCents: options.budgetCents });
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// Auto-select: prefer deepseek-flash (1M ctx, cheap) then kimi as fallback
|
|
87
|
+
for (const [providerName, swarmModel] of [
|
|
88
|
+
['deepseek-flash', 'deepseek-v4-flash'],
|
|
89
|
+
['kimi', 'moonshot-v1-128k'],
|
|
90
|
+
]) {
|
|
91
|
+
const provider = autoDetectProvider(rootPath, { provider: providerName, model: swarmModel });
|
|
92
|
+
if (provider) return new SwarmOrchestrator({ provider, verbose: options.verbose, budgetCents: options.budgetCents });
|
|
93
|
+
}
|
|
83
94
|
|
|
84
|
-
|
|
85
|
-
return new SwarmOrchestrator({ provider, ...options });
|
|
95
|
+
return null;
|
|
86
96
|
}
|
|
87
97
|
|
|
88
98
|
/**
|
|
@@ -97,65 +107,17 @@ export class SwarmOrchestrator {
|
|
|
97
107
|
const codeBundle = this._bundleCode(rootPath, files);
|
|
98
108
|
const prompt = this._buildSwarmPrompt(reconData, codeBundle, rootPath);
|
|
99
109
|
|
|
100
|
-
const systemPrompt = `You are a security swarm coordinator. You
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
const
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
properties: {
|
|
112
|
-
agentId: { type: 'string' },
|
|
113
|
-
file: { type: 'string' },
|
|
114
|
-
line: { type: 'integer' },
|
|
115
|
-
severity: { type: 'string', enum: ['critical', 'high', 'medium', 'low', 'info'] },
|
|
116
|
-
rule: { type: 'string' },
|
|
117
|
-
title: { type: 'string' },
|
|
118
|
-
description: { type: 'string' },
|
|
119
|
-
matched: { type: 'string' },
|
|
120
|
-
remediation: { type: 'string' },
|
|
121
|
-
},
|
|
122
|
-
required: ['agentId', 'severity', 'rule', 'title', 'description'],
|
|
123
|
-
additionalProperties: false,
|
|
124
|
-
},
|
|
125
|
-
},
|
|
126
|
-
agentSummary: {
|
|
127
|
-
type: 'array',
|
|
128
|
-
items: {
|
|
129
|
-
type: 'object',
|
|
130
|
-
properties: {
|
|
131
|
-
agentId: { type: 'string' },
|
|
132
|
-
findingCount: { type: 'integer' },
|
|
133
|
-
status: { type: 'string', enum: ['clean', 'findings', 'error'] },
|
|
134
|
-
},
|
|
135
|
-
required: ['agentId', 'findingCount', 'status'],
|
|
136
|
-
additionalProperties: false,
|
|
137
|
-
},
|
|
138
|
-
},
|
|
139
|
-
},
|
|
140
|
-
required: ['findings', 'agentSummary'],
|
|
141
|
-
};
|
|
142
|
-
|
|
143
|
-
let raw;
|
|
144
|
-
if (this.provider.completeWithTools) {
|
|
145
|
-
raw = await this.provider.completeWithTools(
|
|
146
|
-
systemPrompt,
|
|
147
|
-
prompt,
|
|
148
|
-
'report_swarm_findings',
|
|
149
|
-
schema,
|
|
150
|
-
{ maxTokens: 8192 }
|
|
151
|
-
);
|
|
152
|
-
} else {
|
|
153
|
-
const text = await this.provider.complete(systemPrompt, prompt + '\n\nRespond with JSON only matching the schema.', { maxTokens: 8192 });
|
|
154
|
-
try {
|
|
155
|
-
raw = JSON.parse(text.replace(/^```(?:json)?\s*/i, '').replace(/\s*```\s*$/i, '').trim());
|
|
156
|
-
} catch {
|
|
157
|
-
raw = null;
|
|
158
|
-
}
|
|
110
|
+
const systemPrompt = `You are a security swarm coordinator. You MUST respond with ONLY a valid JSON object — no prose, no markdown, no explanation, no code fences. Your response must start with { and end with }. Deploy all ${SWARM_ROLES.length} sub-agents, each scanning for their attack class, then output the consolidated JSON findings.`;
|
|
111
|
+
|
|
112
|
+
const jsonInstruction = '\n\nOutput a JSON object with exactly these keys: {"findings":[{"agentId":"<agent-id>","file":"<relative-path>","line":<number>,"severity":"critical|high|medium|low","rule":"<rule-id>","title":"<title>","description":"<description>","remediation":"<fix>"}],"agentSummary":[{"agentId":"<agent-id>","findingCount":<number>,"status":"clean|findings"}]}';
|
|
113
|
+
|
|
114
|
+
const text = await this.provider.complete(systemPrompt, prompt + jsonInstruction, { maxTokens: 8192, jsonMode: true });
|
|
115
|
+
let raw = null;
|
|
116
|
+
try {
|
|
117
|
+
raw = JSON.parse(text || '{}');
|
|
118
|
+
} catch {
|
|
119
|
+
if (this.verbose) console.log(' [Swarm] JSON parse failed. Preview:', text?.slice(0, 200));
|
|
120
|
+
raw = null;
|
|
159
121
|
}
|
|
160
122
|
|
|
161
123
|
return this._mapFindings(raw?.findings ?? [], rootPath);
|
package/cli/bin/ship-safe.js
CHANGED
|
@@ -29,6 +29,9 @@ import { mcpCommand } from '../commands/mcp.js';
|
|
|
29
29
|
import { remediateCommand } from '../commands/remediate.js';
|
|
30
30
|
import { rotateCommand } from '../commands/rotate.js';
|
|
31
31
|
import { agentCommand } from '../commands/agent.js';
|
|
32
|
+
import { agentFixCommand } from '../commands/agent-fix.js';
|
|
33
|
+
import { undoCommand } from '../commands/undo.js';
|
|
34
|
+
import { shellCommand } from '../commands/shell.js';
|
|
32
35
|
import { depsCommand } from '../commands/deps.js';
|
|
33
36
|
import { scoreCommand } from '../commands/score.js';
|
|
34
37
|
import { redTeamCommand } from '../commands/red-team.js';
|
|
@@ -50,6 +53,7 @@ import { legalCommand } from '../commands/legal.js';
|
|
|
50
53
|
import { runLiveAdvisories } from '../commands/live-advisories.js';
|
|
51
54
|
import { envAuditCommand } from '../commands/env-audit.js';
|
|
52
55
|
import { autofixCommand } from '../commands/autofix.js';
|
|
56
|
+
import { teamReportCommand } from '../commands/team-report.js';
|
|
53
57
|
import { memoryCommand } from '../utils/security-memory.js';
|
|
54
58
|
import { playbookCommand } from '../utils/scan-playbook.js';
|
|
55
59
|
import { listPluginFiles, scaffoldPlugin } from '../utils/plugin-loader.js';
|
|
@@ -183,10 +187,46 @@ program
|
|
|
183
187
|
// -----------------------------------------------------------------------------
|
|
184
188
|
program
|
|
185
189
|
.command('agent [path]')
|
|
186
|
-
.description('
|
|
187
|
-
.option('--
|
|
188
|
-
.option('--
|
|
189
|
-
.
|
|
190
|
+
.description('Interactive security agent: scan, plan each fix, ask before changing, verify the fix worked')
|
|
191
|
+
.option('--plan-only', 'Generate plans for review but never write changes')
|
|
192
|
+
.option('--severity <level>', 'Minimum severity to fix (critical|high|medium|low)', 'low')
|
|
193
|
+
.option('--provider <name>', 'LLM provider: deepseek-flash | deepseek | openai | kimi | anthropic')
|
|
194
|
+
.option('--model <model>', 'Specific model name to use')
|
|
195
|
+
.option('--think', 'Enable extended thinking (GPT-5.5 reasoning_effort:high, Claude extended thinking)')
|
|
196
|
+
.option('--allow-dirty', 'Allow running with uncommitted changes in the working tree')
|
|
197
|
+
.option('--branch [name]', 'Create a branch and commit one fix per file (default name: ship-safe/fixes-<timestamp>)')
|
|
198
|
+
.option('--pr', 'After fixing, push the branch and open a pull request via gh CLI (requires --branch)')
|
|
199
|
+
.option('--yolo', 'Auto-accept every plan without prompting (use with caution; pairs well with --branch)')
|
|
200
|
+
.option('--auto-low', 'Auto-accept plans marked risk:low; prompt for medium/high')
|
|
201
|
+
.option('--sandbox', 'Verify each fix in a Docker sandbox (not yet implemented)')
|
|
202
|
+
.option('--legacy', 'Use the legacy non-interactive Claude-only agent')
|
|
203
|
+
.action((targetPath, options) => {
|
|
204
|
+
if (options.legacy) {
|
|
205
|
+
return agentCommand(targetPath, options);
|
|
206
|
+
}
|
|
207
|
+
return agentFixCommand(targetPath, options);
|
|
208
|
+
});
|
|
209
|
+
|
|
210
|
+
// -----------------------------------------------------------------------------
|
|
211
|
+
// UNDO COMMAND
|
|
212
|
+
// -----------------------------------------------------------------------------
|
|
213
|
+
program
|
|
214
|
+
.command('undo [path]')
|
|
215
|
+
.description('Revert the last fix applied by `ship-safe agent` (or all fixes with --all)')
|
|
216
|
+
.option('--all', 'Revert every fix in the log instead of just the last one')
|
|
217
|
+
.option('--dry-run', 'Show what would be reverted without writing anything')
|
|
218
|
+
.action(undoCommand);
|
|
219
|
+
|
|
220
|
+
// -----------------------------------------------------------------------------
|
|
221
|
+
// SHELL COMMAND
|
|
222
|
+
// -----------------------------------------------------------------------------
|
|
223
|
+
program
|
|
224
|
+
.command('shell [path]')
|
|
225
|
+
.description('Interactive REPL: scan, fix, ask questions — all in one session')
|
|
226
|
+
.option('--provider <name>', 'LLM provider: deepseek-flash | deepseek | openai | kimi | anthropic')
|
|
227
|
+
.option('--model <model>', 'Specific model name to use')
|
|
228
|
+
.option('--think', 'Enable extended thinking mode')
|
|
229
|
+
.action(shellCommand);
|
|
190
230
|
|
|
191
231
|
// -----------------------------------------------------------------------------
|
|
192
232
|
// DEPS COMMAND
|
|
@@ -225,6 +265,7 @@ program
|
|
|
225
265
|
.option('--baseline', 'Only show findings not in the baseline')
|
|
226
266
|
.option('--pdf [file]', 'Generate PDF report (requires Chrome/Chromium)')
|
|
227
267
|
.option('--deep', 'LLM-powered taint analysis for critical/high findings')
|
|
268
|
+
.option('--think', 'Enable extended thinking mode (GPT-5.5 reasoning_effort:high, Claude extended thinking)')
|
|
228
269
|
.option('--local', 'Use local Ollama model for deep analysis (default: llama3.2)')
|
|
229
270
|
.option('--model <model>', 'LLM model to use for deep/AI analysis')
|
|
230
271
|
.option('--provider <name>', 'LLM provider: anthropic, openai, google, ollama, groq, together, mistral, cohere, deepseek, xai, kimi, lmstudio')
|
|
@@ -265,7 +306,8 @@ program
|
|
|
265
306
|
.option('--no-deps', 'Skip dependency audit')
|
|
266
307
|
.option('--no-ai', 'Skip AI classification')
|
|
267
308
|
.option('--deep', 'LLM-powered taint analysis for critical/high findings')
|
|
268
|
-
.option('--swarm', 'Use
|
|
309
|
+
.option('--swarm', 'Use AI swarm mode — 23 parallel agents via DeepSeek V4 Flash or Kimi K2.6 (requires DEEPSEEK_API_KEY or MOONSHOT_API_KEY)')
|
|
310
|
+
.option('--think', 'Enable extended thinking mode (GPT-5.5 reasoning_effort:high, Claude extended thinking)')
|
|
269
311
|
.option('--local', 'Use local Ollama model for deep analysis (default: llama3.2)')
|
|
270
312
|
.option('--model <model>', 'LLM model for deep analysis')
|
|
271
313
|
.option('--provider <name>', 'LLM provider: anthropic, openai, google, ollama, groq, together, mistral, cohere, deepseek, xai, kimi, lmstudio')
|
|
@@ -274,6 +316,16 @@ program
|
|
|
274
316
|
.option('-v, --verbose', 'Verbose output')
|
|
275
317
|
.action(redTeamCommand);
|
|
276
318
|
|
|
319
|
+
// -----------------------------------------------------------------------------
|
|
320
|
+
// TEAM REPORT COMMAND
|
|
321
|
+
// -----------------------------------------------------------------------------
|
|
322
|
+
program
|
|
323
|
+
.command('team-report [file]')
|
|
324
|
+
.description('Convert Hermes Agent team output into a professional Ship Safe report')
|
|
325
|
+
.option('--html [path]', 'Save as HTML report (default: team-report.html)')
|
|
326
|
+
.option('--json', 'JSON output')
|
|
327
|
+
.action(teamReportCommand);
|
|
328
|
+
|
|
277
329
|
// -----------------------------------------------------------------------------
|
|
278
330
|
// WATCH COMMAND
|
|
279
331
|
// -----------------------------------------------------------------------------
|
|
@@ -608,8 +660,11 @@ How it works:
|
|
|
608
660
|
// PARSE AND RUN
|
|
609
661
|
// -----------------------------------------------------------------------------
|
|
610
662
|
|
|
611
|
-
//
|
|
612
|
-
|
|
663
|
+
// No command + interactive TTY → drop into the REPL.
|
|
664
|
+
// Help banner is still available via `--help` and shown when stdin is piped.
|
|
665
|
+
if (process.argv.length === 2 && process.stdin.isTTY) {
|
|
666
|
+
shellCommand('.', {});
|
|
667
|
+
} else if (process.argv.length === 2) {
|
|
613
668
|
console.log(banner);
|
|
614
669
|
console.log(chalk.yellow('\nQuick start:\n'));
|
|
615
670
|
console.log(chalk.cyan.bold(' v9.0 — Agent Studio, Teams & Findings'));
|