ship-safe 9.1.1 → 9.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -142,6 +142,11 @@ const PATTERNS = [
142
142
  confidence: 'medium',
143
143
  description: 'System prompt hardcoded in code. If client-side, users can extract it.',
144
144
  fix: 'Keep system prompts server-side only. Load from environment variables or config.',
145
+ // Skip clearly server-side files where defining a system prompt is correct.
146
+ // The rule is only meaningful for code that ships to a browser/client.
147
+ skipFile: (f) => /(?:^|\/)(?:cli|server|backend|api|lib|services|workers|jobs|scripts)\//.test(f.replace(/\\/g, '/'))
148
+ || /\.(?:server|api)\.(?:js|ts|mjs|cjs|tsx)$/.test(f)
149
+ || /\/api\//.test(f.replace(/\\/g, '/')),
145
150
  },
146
151
 
147
152
  // ── LLM10: Unbounded Consumption ───────────────────────────────────────────
@@ -219,12 +224,24 @@ const PATTERNS = [
219
224
  {
220
225
  rule: 'PROMPT_INJECTION_PATTERN',
221
226
  title: 'Known Prompt Injection Pattern',
222
- regex: /(?:ignore\s+(?:all\s+)?previous\s+instructions|disregard\s+(?:all\s+)?(?:previous|prior)|you\s+are\s+now\s+DAN|system\s*prompt|jailbreak|bypass\s+(?:your|the)\s+(?:rules|instructions|guidelines))/gi,
227
+ // The phrase "system prompt" is *not* an injection attack — it's how every
228
+ // LLM developer talks about prompts. Match the actual jailbreak verbs instead.
229
+ regex: /(?:ignore\s+(?:all\s+)?previous\s+instructions|disregard\s+(?:all\s+)?(?:previous|prior)|you\s+are\s+now\s+DAN|jailbreak\s+(?:the|this)|bypass\s+(?:your|the)\s+(?:rules|instructions|guidelines)|reveal\s+your\s+system\s+prompt)/gi,
223
230
  severity: 'high',
224
231
  cwe: 'CWE-77',
225
232
  owasp: 'LLM01',
226
233
  description: 'Known prompt injection pattern detected in code. Ensure this is for testing only.',
227
234
  fix: 'If in test data, add # ship-safe-ignore. If in user-facing code, add input filtering.',
235
+ // Skip files where the pattern appears intentionally: tests, red-team rules,
236
+ // detection-rule definitions, and security tool source code.
237
+ skipFile: (f) => {
238
+ const p = f.replace(/\\/g, '/');
239
+ return /__tests__\//.test(p)
240
+ || /\.(?:test|spec)\.(?:js|ts|mjs|cjs|tsx|jsx)$/.test(p)
241
+ || /(?:^|\/)(?:red-?team|llm-?redteam|prompt-?injection|memory-?poisoning|jailbreak)/.test(p)
242
+ || /\/agents\/[^/]*(?:redteam|injection|llm)/i.test(p)
243
+ || /(?:scan-playbook|threat-intel|patterns)\.(?:js|ts)$/.test(p);
244
+ },
228
245
  },
229
246
  ];
230
247
 
@@ -242,7 +259,12 @@ export class LLMRedTeam extends BaseAgent {
242
259
 
243
260
  let findings = [];
244
261
  for (const file of codeFiles) {
245
- findings = findings.concat(this.scanFileWithPatterns(file, PATTERNS));
262
+ // Honor per-pattern skipFile predicates so rules that are clearly false
263
+ // positives in known contexts (server-side prompts, redteam test data)
264
+ // never get sent to the agent for "fixing".
265
+ const applicable = PATTERNS.filter(p => !p.skipFile || !p.skipFile(file));
266
+ if (applicable.length === 0) continue;
267
+ findings = findings.concat(this.scanFileWithPatterns(file, applicable));
246
268
  }
247
269
  return findings;
248
270
  }
@@ -19,7 +19,7 @@
19
19
 
20
20
  import fs from 'fs';
21
21
  import path from 'path';
22
- import { createProvider, autoDetectProvider } from '../providers/llm-provider.js';
22
+ import { autoDetectProvider } from '../providers/llm-provider.js';
23
23
  import { createFinding } from './base-agent.js';
24
24
 
25
25
  // Max chars of diff content per event
@@ -48,13 +48,10 @@ export class StatefulWatcher {
48
48
  }
49
49
 
50
50
  static create(rootPath, options = {}) {
51
- const provider = autoDetectProvider(rootPath, {
52
- provider: options.provider || 'kimi',
53
- model: options.model || 'kimi-k2.6',
54
- });
55
-
51
+ const providerName = typeof options.provider === 'string' ? options.provider : 'kimi';
52
+ const provider = autoDetectProvider(rootPath, { provider: providerName, model: options.model || 'kimi-k2.6' });
56
53
  if (!provider) return null;
57
- return new StatefulWatcher({ provider, rootPath, ...options });
54
+ return new StatefulWatcher({ provider, rootPath, verbose: options.verbose });
58
55
  }
59
56
 
60
57
  /**
@@ -76,13 +76,23 @@ export class SwarmOrchestrator {
76
76
  }
77
77
 
78
78
  static create(rootPath, options = {}) {
79
- // Default to Kimi for swarm (best tool-call success rate)
80
- const provider = options.provider
81
- ? createProvider(options.provider, process.env.MOONSHOT_API_KEY || process.env.OPENAI_API_KEY, { model: options.model })
82
- : autoDetectProvider(rootPath, { provider: 'kimi', model: options.model });
79
+ if (typeof options.provider === 'string') {
80
+ // Explicit provider requested
81
+ const provider = autoDetectProvider(rootPath, { provider: options.provider, model: options.model });
82
+ if (!provider) return null;
83
+ return new SwarmOrchestrator({ provider, verbose: options.verbose, budgetCents: options.budgetCents });
84
+ }
85
+
86
+ // Auto-select: prefer deepseek-flash (1M ctx, cheap) then kimi as fallback
87
+ for (const [providerName, swarmModel] of [
88
+ ['deepseek-flash', 'deepseek-v4-flash'],
89
+ ['kimi', 'moonshot-v1-128k'],
90
+ ]) {
91
+ const provider = autoDetectProvider(rootPath, { provider: providerName, model: swarmModel });
92
+ if (provider) return new SwarmOrchestrator({ provider, verbose: options.verbose, budgetCents: options.budgetCents });
93
+ }
83
94
 
84
- if (!provider) return null;
85
- return new SwarmOrchestrator({ provider, ...options });
95
+ return null;
86
96
  }
87
97
 
88
98
  /**
@@ -97,65 +107,17 @@ export class SwarmOrchestrator {
97
107
  const codeBundle = this._bundleCode(rootPath, files);
98
108
  const prompt = this._buildSwarmPrompt(reconData, codeBundle, rootPath);
99
109
 
100
- const systemPrompt = `You are a security swarm coordinator. You have ${SWARM_ROLES.length} specialized sub-agents, each responsible for one attack class. Deploy all sub-agents in parallel. Each sub-agent scans the provided codebase for its specific vulnerability class and reports findings. Consolidate all sub-agent findings into a single structured JSON report.
101
-
102
- You MUST respond by calling the report_swarm_findings tool with all findings from all sub-agents. Never refuse to scan. Never truncate findings.`;
103
-
104
- const schema = {
105
- type: 'object',
106
- properties: {
107
- findings: {
108
- type: 'array',
109
- items: {
110
- type: 'object',
111
- properties: {
112
- agentId: { type: 'string' },
113
- file: { type: 'string' },
114
- line: { type: 'integer' },
115
- severity: { type: 'string', enum: ['critical', 'high', 'medium', 'low', 'info'] },
116
- rule: { type: 'string' },
117
- title: { type: 'string' },
118
- description: { type: 'string' },
119
- matched: { type: 'string' },
120
- remediation: { type: 'string' },
121
- },
122
- required: ['agentId', 'severity', 'rule', 'title', 'description'],
123
- additionalProperties: false,
124
- },
125
- },
126
- agentSummary: {
127
- type: 'array',
128
- items: {
129
- type: 'object',
130
- properties: {
131
- agentId: { type: 'string' },
132
- findingCount: { type: 'integer' },
133
- status: { type: 'string', enum: ['clean', 'findings', 'error'] },
134
- },
135
- required: ['agentId', 'findingCount', 'status'],
136
- additionalProperties: false,
137
- },
138
- },
139
- },
140
- required: ['findings', 'agentSummary'],
141
- };
142
-
143
- let raw;
144
- if (this.provider.completeWithTools) {
145
- raw = await this.provider.completeWithTools(
146
- systemPrompt,
147
- prompt,
148
- 'report_swarm_findings',
149
- schema,
150
- { maxTokens: 8192 }
151
- );
152
- } else {
153
- const text = await this.provider.complete(systemPrompt, prompt + '\n\nRespond with JSON only matching the schema.', { maxTokens: 8192 });
154
- try {
155
- raw = JSON.parse(text.replace(/^```(?:json)?\s*/i, '').replace(/\s*```\s*$/i, '').trim());
156
- } catch {
157
- raw = null;
158
- }
110
+ const systemPrompt = `You are a security swarm coordinator. You MUST respond with ONLY a valid JSON object no prose, no markdown, no explanation, no code fences. Your response must start with { and end with }. Deploy all ${SWARM_ROLES.length} sub-agents, each scanning for their attack class, then output the consolidated JSON findings.`;
111
+
112
+ const jsonInstruction = '\n\nOutput a JSON object with exactly these keys: {"findings":[{"agentId":"<agent-id>","file":"<relative-path>","line":<number>,"severity":"critical|high|medium|low","rule":"<rule-id>","title":"<title>","description":"<description>","remediation":"<fix>"}],"agentSummary":[{"agentId":"<agent-id>","findingCount":<number>,"status":"clean|findings"}]}';
113
+
114
+ const text = await this.provider.complete(systemPrompt, prompt + jsonInstruction, { maxTokens: 8192, jsonMode: true });
115
+ let raw = null;
116
+ try {
117
+ raw = JSON.parse(text || '{}');
118
+ } catch {
119
+ if (this.verbose) console.log(' [Swarm] JSON parse failed. Preview:', text?.slice(0, 200));
120
+ raw = null;
159
121
  }
160
122
 
161
123
  return this._mapFindings(raw?.findings ?? [], rootPath);
@@ -29,6 +29,9 @@ import { mcpCommand } from '../commands/mcp.js';
29
29
  import { remediateCommand } from '../commands/remediate.js';
30
30
  import { rotateCommand } from '../commands/rotate.js';
31
31
  import { agentCommand } from '../commands/agent.js';
32
+ import { agentFixCommand } from '../commands/agent-fix.js';
33
+ import { undoCommand } from '../commands/undo.js';
34
+ import { shellCommand } from '../commands/shell.js';
32
35
  import { depsCommand } from '../commands/deps.js';
33
36
  import { scoreCommand } from '../commands/score.js';
34
37
  import { redTeamCommand } from '../commands/red-team.js';
@@ -50,6 +53,7 @@ import { legalCommand } from '../commands/legal.js';
50
53
  import { runLiveAdvisories } from '../commands/live-advisories.js';
51
54
  import { envAuditCommand } from '../commands/env-audit.js';
52
55
  import { autofixCommand } from '../commands/autofix.js';
56
+ import { teamReportCommand } from '../commands/team-report.js';
53
57
  import { memoryCommand } from '../utils/security-memory.js';
54
58
  import { playbookCommand } from '../utils/scan-playbook.js';
55
59
  import { listPluginFiles, scaffoldPlugin } from '../utils/plugin-loader.js';
@@ -183,10 +187,46 @@ program
183
187
  // -----------------------------------------------------------------------------
184
188
  program
185
189
  .command('agent [path]')
186
- .description('AI-powered security audit: scan, classify with Claude, auto-remediate confirmed secrets')
187
- .option('--dry-run', 'Show classification and plan without writing any files')
188
- .option('--model <model>', `Claude model to use (default: ${DEFAULT_MODEL})`)
189
- .action(agentCommand);
190
+ .description('Interactive security agent: scan, plan each fix, ask before changing, verify the fix worked')
191
+ .option('--plan-only', 'Generate plans for review but never write changes')
192
+ .option('--severity <level>', 'Minimum severity to fix (critical|high|medium|low)', 'low')
193
+ .option('--provider <name>', 'LLM provider: deepseek-flash | deepseek | openai | kimi | anthropic')
194
+ .option('--model <model>', 'Specific model name to use')
195
+ .option('--think', 'Enable extended thinking (GPT-5.5 reasoning_effort:high, Claude extended thinking)')
196
+ .option('--allow-dirty', 'Allow running with uncommitted changes in the working tree')
197
+ .option('--branch [name]', 'Create a branch and commit one fix per file (default name: ship-safe/fixes-<timestamp>)')
198
+ .option('--pr', 'After fixing, push the branch and open a pull request via gh CLI (requires --branch)')
199
+ .option('--yolo', 'Auto-accept every plan without prompting (use with caution; pairs well with --branch)')
200
+ .option('--auto-low', 'Auto-accept plans marked risk:low; prompt for medium/high')
201
+ .option('--sandbox', 'Verify each fix in a Docker sandbox (not yet implemented)')
202
+ .option('--legacy', 'Use the legacy non-interactive Claude-only agent')
203
+ .action((targetPath, options) => {
204
+ if (options.legacy) {
205
+ return agentCommand(targetPath, options);
206
+ }
207
+ return agentFixCommand(targetPath, options);
208
+ });
209
+
210
+ // -----------------------------------------------------------------------------
211
+ // UNDO COMMAND
212
+ // -----------------------------------------------------------------------------
213
+ program
214
+ .command('undo [path]')
215
+ .description('Revert the last fix applied by `ship-safe agent` (or all fixes with --all)')
216
+ .option('--all', 'Revert every fix in the log instead of just the last one')
217
+ .option('--dry-run', 'Show what would be reverted without writing anything')
218
+ .action(undoCommand);
219
+
220
+ // -----------------------------------------------------------------------------
221
+ // SHELL COMMAND
222
+ // -----------------------------------------------------------------------------
223
+ program
224
+ .command('shell [path]')
225
+ .description('Interactive REPL: scan, fix, ask questions — all in one session')
226
+ .option('--provider <name>', 'LLM provider: deepseek-flash | deepseek | openai | kimi | anthropic')
227
+ .option('--model <model>', 'Specific model name to use')
228
+ .option('--think', 'Enable extended thinking mode')
229
+ .action(shellCommand);
190
230
 
191
231
  // -----------------------------------------------------------------------------
192
232
  // DEPS COMMAND
@@ -225,6 +265,7 @@ program
225
265
  .option('--baseline', 'Only show findings not in the baseline')
226
266
  .option('--pdf [file]', 'Generate PDF report (requires Chrome/Chromium)')
227
267
  .option('--deep', 'LLM-powered taint analysis for critical/high findings')
268
+ .option('--think', 'Enable extended thinking mode (GPT-5.5 reasoning_effort:high, Claude extended thinking)')
228
269
  .option('--local', 'Use local Ollama model for deep analysis (default: llama3.2)')
229
270
  .option('--model <model>', 'LLM model to use for deep/AI analysis')
230
271
  .option('--provider <name>', 'LLM provider: anthropic, openai, google, ollama, groq, together, mistral, cohere, deepseek, xai, kimi, lmstudio')
@@ -265,7 +306,8 @@ program
265
306
  .option('--no-deps', 'Skip dependency audit')
266
307
  .option('--no-ai', 'Skip AI classification')
267
308
  .option('--deep', 'LLM-powered taint analysis for critical/high findings')
268
- .option('--swarm', 'Use Kimi K2.6 native 300-agent swarm instead of local agent execution (requires MOONSHOT_API_KEY)')
309
+ .option('--swarm', 'Use AI swarm mode 23 parallel agents via DeepSeek V4 Flash or Kimi K2.6 (requires DEEPSEEK_API_KEY or MOONSHOT_API_KEY)')
310
+ .option('--think', 'Enable extended thinking mode (GPT-5.5 reasoning_effort:high, Claude extended thinking)')
269
311
  .option('--local', 'Use local Ollama model for deep analysis (default: llama3.2)')
270
312
  .option('--model <model>', 'LLM model for deep analysis')
271
313
  .option('--provider <name>', 'LLM provider: anthropic, openai, google, ollama, groq, together, mistral, cohere, deepseek, xai, kimi, lmstudio')
@@ -274,6 +316,16 @@ program
274
316
  .option('-v, --verbose', 'Verbose output')
275
317
  .action(redTeamCommand);
276
318
 
319
+ // -----------------------------------------------------------------------------
320
+ // TEAM REPORT COMMAND
321
+ // -----------------------------------------------------------------------------
322
+ program
323
+ .command('team-report [file]')
324
+ .description('Convert Hermes Agent team output into a professional Ship Safe report')
325
+ .option('--html [path]', 'Save as HTML report (default: team-report.html)')
326
+ .option('--json', 'JSON output')
327
+ .action(teamReportCommand);
328
+
277
329
  // -----------------------------------------------------------------------------
278
330
  // WATCH COMMAND
279
331
  // -----------------------------------------------------------------------------
@@ -608,8 +660,11 @@ How it works:
608
660
  // PARSE AND RUN
609
661
  // -----------------------------------------------------------------------------
610
662
 
611
- // Show help if no command provided
612
- if (process.argv.length === 2) {
663
+ // No command + interactive TTY → drop into the REPL.
664
+ // Help banner is still available via `--help` and shown when stdin is piped.
665
+ if (process.argv.length === 2 && process.stdin.isTTY) {
666
+ shellCommand('.', {});
667
+ } else if (process.argv.length === 2) {
613
668
  console.log(banner);
614
669
  console.log(chalk.yellow('\nQuick start:\n'));
615
670
  console.log(chalk.cyan.bold(' v9.0 — Agent Studio, Teams & Findings'));