npm - ship-safe - Versions diffs - 9.1.1 → 9.2.0 - Mend

ship-safe 9.1.1 → 9.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/cli/agents/llm-redteam.js +24 -2
package/cli/agents/stateful-watcher.js +4 -7
package/cli/agents/swarm-orchestrator.js +27 -65
package/cli/bin/ship-safe.js +62 -7
package/cli/commands/agent-fix.js +960 -0
package/cli/commands/audit.js +24 -11
package/cli/commands/red-team.js +10 -6
package/cli/commands/shell.js +415 -0
package/cli/commands/team-report.js +415 -0
package/cli/commands/undo.js +143 -0
package/cli/providers/llm-provider.js +149 -18
package/cli/utils/output.js +21 -0
package/package.json +1 -1

package/cli/agents/llm-redteam.js CHANGED Viewed

@@ -142,6 +142,11 @@ const PATTERNS = [
     confidence: 'medium',
     description: 'System prompt hardcoded in code. If client-side, users can extract it.',
     fix: 'Keep system prompts server-side only. Load from environment variables or config.',
+    // Skip clearly server-side files where defining a system prompt is correct.
+    // The rule is only meaningful for code that ships to a browser/client.
+    skipFile: (f) => /(?:^|\/)(?:cli|server|backend|api|lib|services|workers|jobs|scripts)\//.test(f.replace(/\\/g, '/'))
+                  || /\.(?:server|api)\.(?:js|ts|mjs|cjs|tsx)$/.test(f)
+                  || /\/api\//.test(f.replace(/\\/g, '/')),
   },
   // ── LLM10: Unbounded Consumption ───────────────────────────────────────────
@@ -219,12 +224,24 @@ const PATTERNS = [
   {
     rule: 'PROMPT_INJECTION_PATTERN',
     title: 'Known Prompt Injection Pattern',
-    regex: /(?:ignore\s+(?:all\s+)?previous\s+instructions|disregard\s+(?:all\s+)?(?:previous|prior)|you\s+are\s+now\s+DAN|system\s*prompt|jailbreak|bypass\s+(?:your|the)\s+(?:rules|instructions|guidelines))/gi,
+    // The phrase "system prompt" is *not* an injection attack — it's how every
+    // LLM developer talks about prompts. Match the actual jailbreak verbs instead.
+    regex: /(?:ignore\s+(?:all\s+)?previous\s+instructions|disregard\s+(?:all\s+)?(?:previous|prior)|you\s+are\s+now\s+DAN|jailbreak\s+(?:the|this)|bypass\s+(?:your|the)\s+(?:rules|instructions|guidelines)|reveal\s+your\s+system\s+prompt)/gi,
     severity: 'high',
     cwe: 'CWE-77',
     owasp: 'LLM01',
     description: 'Known prompt injection pattern detected in code. Ensure this is for testing only.',
     fix: 'If in test data, add # ship-safe-ignore. If in user-facing code, add input filtering.',
+    // Skip files where the pattern appears intentionally: tests, red-team rules,
+    // detection-rule definitions, and security tool source code.
+    skipFile: (f) => {
+      const p = f.replace(/\\/g, '/');
+      return /__tests__\//.test(p)
+          || /\.(?:test|spec)\.(?:js|ts|mjs|cjs|tsx|jsx)$/.test(p)
+          || /(?:^|\/)(?:red-?team|llm-?redteam|prompt-?injection|memory-?poisoning|jailbreak)/.test(p)
+          || /\/agents\/[^/]*(?:redteam|injection|llm)/i.test(p)
+          || /(?:scan-playbook|threat-intel|patterns)\.(?:js|ts)$/.test(p);
+    },
   },
 ];
@@ -242,7 +259,12 @@ export class LLMRedTeam extends BaseAgent {
     let findings = [];
     for (const file of codeFiles) {
-      findings = findings.concat(this.scanFileWithPatterns(file, PATTERNS));
+      // Honor per-pattern skipFile predicates so rules that are clearly false
+      // positives in known contexts (server-side prompts, redteam test data)
+      // never get sent to the agent for "fixing".
+      const applicable = PATTERNS.filter(p => !p.skipFile || !p.skipFile(file));
+      if (applicable.length === 0) continue;
+      findings = findings.concat(this.scanFileWithPatterns(file, applicable));
     }
     return findings;
   }

package/cli/agents/stateful-watcher.js CHANGED Viewed

@@ -19,7 +19,7 @@
 import fs from 'fs';
 import path from 'path';
-import { createProvider, autoDetectProvider } from '../providers/llm-provider.js';
+import { autoDetectProvider } from '../providers/llm-provider.js';
 import { createFinding } from './base-agent.js';
 // Max chars of diff content per event
@@ -48,13 +48,10 @@ export class StatefulWatcher {
   }
   static create(rootPath, options = {}) {
-    const provider = autoDetectProvider(rootPath, {
-      provider: options.provider || 'kimi',
-      model: options.model || 'kimi-k2.6',
-    });
+    const providerName = typeof options.provider === 'string' ? options.provider : 'kimi';
+    const provider = autoDetectProvider(rootPath, { provider: providerName, model: options.model || 'kimi-k2.6' });
     if (!provider) return null;
-    return new StatefulWatcher({ provider, rootPath, ...options });
+    return new StatefulWatcher({ provider, rootPath, verbose: options.verbose });
   }
   /**

package/cli/agents/swarm-orchestrator.js CHANGED Viewed

@@ -76,13 +76,23 @@ export class SwarmOrchestrator {
   }
   static create(rootPath, options = {}) {
-    // Default to Kimi for swarm (best tool-call success rate)
-    const provider = options.provider
-      ? createProvider(options.provider, process.env.MOONSHOT_API_KEY || process.env.OPENAI_API_KEY, { model: options.model })
-      : autoDetectProvider(rootPath, { provider: 'kimi', model: options.model });
+    if (typeof options.provider === 'string') {
+      // Explicit provider requested
+      const provider = autoDetectProvider(rootPath, { provider: options.provider, model: options.model });
+      if (!provider) return null;
+      return new SwarmOrchestrator({ provider, verbose: options.verbose, budgetCents: options.budgetCents });
+    }
+    // Auto-select: prefer deepseek-flash (1M ctx, cheap) then kimi as fallback
+    for (const [providerName, swarmModel] of [
+      ['deepseek-flash', 'deepseek-v4-flash'],
+      ['kimi',           'moonshot-v1-128k'],
+    ]) {
+      const provider = autoDetectProvider(rootPath, { provider: providerName, model: swarmModel });
+      if (provider) return new SwarmOrchestrator({ provider, verbose: options.verbose, budgetCents: options.budgetCents });
+    }
-    if (!provider) return null;
-    return new SwarmOrchestrator({ provider, ...options });
+    return null;
   }
   /**
@@ -97,65 +107,17 @@ export class SwarmOrchestrator {
     const codeBundle = this._bundleCode(rootPath, files);
     const prompt = this._buildSwarmPrompt(reconData, codeBundle, rootPath);
-    const systemPrompt = `You are a security swarm coordinator. You have ${SWARM_ROLES.length} specialized sub-agents, each responsible for one attack class. Deploy all sub-agents in parallel. Each sub-agent scans the provided codebase for its specific vulnerability class and reports findings. Consolidate all sub-agent findings into a single structured JSON report.
-You MUST respond by calling the report_swarm_findings tool with all findings from all sub-agents. Never refuse to scan. Never truncate findings.`;
-    const schema = {
-      type: 'object',
-      properties: {
-        findings: {
-          type: 'array',
-          items: {
-            type: 'object',
-            properties: {
-              agentId:     { type: 'string' },
-              file:        { type: 'string' },
-              line:        { type: 'integer' },
-              severity:    { type: 'string', enum: ['critical', 'high', 'medium', 'low', 'info'] },
-              rule:        { type: 'string' },
-              title:       { type: 'string' },
-              description: { type: 'string' },
-              matched:     { type: 'string' },
-              remediation: { type: 'string' },
-            },
-            required: ['agentId', 'severity', 'rule', 'title', 'description'],
-            additionalProperties: false,
-          },
-        },
-        agentSummary: {
-          type: 'array',
-          items: {
-            type: 'object',
-            properties: {
-              agentId:      { type: 'string' },
-              findingCount: { type: 'integer' },
-              status:       { type: 'string', enum: ['clean', 'findings', 'error'] },
-            },
-            required: ['agentId', 'findingCount', 'status'],
-            additionalProperties: false,
-          },
-        },
-      },
-      required: ['findings', 'agentSummary'],
-    };
-    let raw;
-    if (this.provider.completeWithTools) {
-      raw = await this.provider.completeWithTools(
-        systemPrompt,
-        prompt,
-        'report_swarm_findings',
-        schema,
-        { maxTokens: 8192 }
-      );
-    } else {
-      const text = await this.provider.complete(systemPrompt, prompt + '\n\nRespond with JSON only matching the schema.', { maxTokens: 8192 });
-      try {
-        raw = JSON.parse(text.replace(/^```(?:json)?\s*/i, '').replace(/\s*```\s*$/i, '').trim());
-      } catch {
-        raw = null;
-      }
+    const systemPrompt = `You are a security swarm coordinator. You MUST respond with ONLY a valid JSON object — no prose, no markdown, no explanation, no code fences. Your response must start with { and end with }. Deploy all ${SWARM_ROLES.length} sub-agents, each scanning for their attack class, then output the consolidated JSON findings.`;
+    const jsonInstruction = '\n\nOutput a JSON object with exactly these keys: {"findings":[{"agentId":"<agent-id>","file":"<relative-path>","line":<number>,"severity":"critical|high|medium|low","rule":"<rule-id>","title":"<title>","description":"<description>","remediation":"<fix>"}],"agentSummary":[{"agentId":"<agent-id>","findingCount":<number>,"status":"clean|findings"}]}';
+    const text = await this.provider.complete(systemPrompt, prompt + jsonInstruction, { maxTokens: 8192, jsonMode: true });
+    let raw = null;
+    try {
+      raw = JSON.parse(text || '{}');
+    } catch {
+      if (this.verbose) console.log('  [Swarm] JSON parse failed. Preview:', text?.slice(0, 200));
+      raw = null;
     }
     return this._mapFindings(raw?.findings ?? [], rootPath);

package/cli/bin/ship-safe.js CHANGED Viewed

@@ -29,6 +29,9 @@ import { mcpCommand } from '../commands/mcp.js';
 import { remediateCommand } from '../commands/remediate.js';
 import { rotateCommand } from '../commands/rotate.js';
 import { agentCommand } from '../commands/agent.js';
+import { agentFixCommand } from '../commands/agent-fix.js';
+import { undoCommand } from '../commands/undo.js';
+import { shellCommand } from '../commands/shell.js';
 import { depsCommand } from '../commands/deps.js';
 import { scoreCommand } from '../commands/score.js';
 import { redTeamCommand } from '../commands/red-team.js';
@@ -50,6 +53,7 @@ import { legalCommand } from '../commands/legal.js';
 import { runLiveAdvisories } from '../commands/live-advisories.js';
 import { envAuditCommand } from '../commands/env-audit.js';
 import { autofixCommand } from '../commands/autofix.js';
+import { teamReportCommand } from '../commands/team-report.js';
 import { memoryCommand } from '../utils/security-memory.js';
 import { playbookCommand } from '../utils/scan-playbook.js';
 import { listPluginFiles, scaffoldPlugin } from '../utils/plugin-loader.js';
@@ -183,10 +187,46 @@ program
 // -----------------------------------------------------------------------------
 program
   .command('agent [path]')
-  .description('AI-powered security audit: scan, classify with Claude, auto-remediate confirmed secrets')
-  .option('--dry-run', 'Show classification and plan without writing any files')
-  .option('--model <model>', `Claude model to use (default: ${DEFAULT_MODEL})`)
-  .action(agentCommand);
+  .description('Interactive security agent: scan, plan each fix, ask before changing, verify the fix worked')
+  .option('--plan-only', 'Generate plans for review but never write changes')
+  .option('--severity <level>', 'Minimum severity to fix (critical|high|medium|low)', 'low')
+  .option('--provider <name>', 'LLM provider: deepseek-flash | deepseek | openai | kimi | anthropic')
+  .option('--model <model>', 'Specific model name to use')
+  .option('--think', 'Enable extended thinking (GPT-5.5 reasoning_effort:high, Claude extended thinking)')
+  .option('--allow-dirty', 'Allow running with uncommitted changes in the working tree')
+  .option('--branch [name]', 'Create a branch and commit one fix per file (default name: ship-safe/fixes-<timestamp>)')
+  .option('--pr', 'After fixing, push the branch and open a pull request via gh CLI (requires --branch)')
+  .option('--yolo', 'Auto-accept every plan without prompting (use with caution; pairs well with --branch)')
+  .option('--auto-low', 'Auto-accept plans marked risk:low; prompt for medium/high')
+  .option('--sandbox', 'Verify each fix in a Docker sandbox (not yet implemented)')
+  .option('--legacy', 'Use the legacy non-interactive Claude-only agent')
+  .action((targetPath, options) => {
+    if (options.legacy) {
+      return agentCommand(targetPath, options);
+    }
+    return agentFixCommand(targetPath, options);
+  });
+// -----------------------------------------------------------------------------
+// UNDO COMMAND
+// -----------------------------------------------------------------------------
+program
+  .command('undo [path]')
+  .description('Revert the last fix applied by `ship-safe agent` (or all fixes with --all)')
+  .option('--all', 'Revert every fix in the log instead of just the last one')
+  .option('--dry-run', 'Show what would be reverted without writing anything')
+  .action(undoCommand);
+// -----------------------------------------------------------------------------
+// SHELL COMMAND
+// -----------------------------------------------------------------------------
+program
+  .command('shell [path]')
+  .description('Interactive REPL: scan, fix, ask questions — all in one session')
+  .option('--provider <name>', 'LLM provider: deepseek-flash | deepseek | openai | kimi | anthropic')
+  .option('--model <model>', 'Specific model name to use')
+  .option('--think', 'Enable extended thinking mode')
+  .action(shellCommand);
 // -----------------------------------------------------------------------------
 // DEPS COMMAND
@@ -225,6 +265,7 @@ program
   .option('--baseline', 'Only show findings not in the baseline')
   .option('--pdf [file]', 'Generate PDF report (requires Chrome/Chromium)')
   .option('--deep', 'LLM-powered taint analysis for critical/high findings')
+  .option('--think', 'Enable extended thinking mode (GPT-5.5 reasoning_effort:high, Claude extended thinking)')
   .option('--local', 'Use local Ollama model for deep analysis (default: llama3.2)')
   .option('--model <model>', 'LLM model to use for deep/AI analysis')
   .option('--provider <name>', 'LLM provider: anthropic, openai, google, ollama, groq, together, mistral, cohere, deepseek, xai, kimi, lmstudio')
@@ -265,7 +306,8 @@ program
   .option('--no-deps', 'Skip dependency audit')
   .option('--no-ai', 'Skip AI classification')
   .option('--deep', 'LLM-powered taint analysis for critical/high findings')
-  .option('--swarm', 'Use Kimi K2.6 native 300-agent swarm instead of local agent execution (requires MOONSHOT_API_KEY)')
+  .option('--swarm', 'Use AI swarm mode — 23 parallel agents via DeepSeek V4 Flash or Kimi K2.6 (requires DEEPSEEK_API_KEY or MOONSHOT_API_KEY)')
+  .option('--think', 'Enable extended thinking mode (GPT-5.5 reasoning_effort:high, Claude extended thinking)')
   .option('--local', 'Use local Ollama model for deep analysis (default: llama3.2)')
   .option('--model <model>', 'LLM model for deep analysis')
   .option('--provider <name>', 'LLM provider: anthropic, openai, google, ollama, groq, together, mistral, cohere, deepseek, xai, kimi, lmstudio')
@@ -274,6 +316,16 @@ program
   .option('-v, --verbose', 'Verbose output')
   .action(redTeamCommand);
+// -----------------------------------------------------------------------------
+// TEAM REPORT COMMAND
+// -----------------------------------------------------------------------------
+program
+  .command('team-report [file]')
+  .description('Convert Hermes Agent team output into a professional Ship Safe report')
+  .option('--html [path]', 'Save as HTML report (default: team-report.html)')
+  .option('--json', 'JSON output')
+  .action(teamReportCommand);
 // -----------------------------------------------------------------------------
 // WATCH COMMAND
 // -----------------------------------------------------------------------------
@@ -608,8 +660,11 @@ How it works:
 // PARSE AND RUN
 // -----------------------------------------------------------------------------
-// Show help if no command provided
-if (process.argv.length === 2) {
+// No command + interactive TTY → drop into the REPL.
+// Help banner is still available via `--help` and shown when stdin is piped.
+if (process.argv.length === 2 && process.stdin.isTTY) {
+  shellCommand('.', {});
+} else if (process.argv.length === 2) {
   console.log(banner);
   console.log(chalk.yellow('\nQuick start:\n'));
   console.log(chalk.cyan.bold('  v9.0 — Agent Studio, Teams & Findings'));