npm - ship-safe - Versions diffs - 6.4.0 → 8.0.0 - Mend

ship-safe 6.4.0 → 8.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/README.md +80 -23
package/cli/agents/agent-attestation-agent.js +318 -0
package/cli/agents/agent-config-scanner.js +15 -0
package/cli/agents/agentic-security-agent.js +35 -0
package/cli/agents/cicd-scanner.js +22 -0
package/cli/agents/config-auditor.js +235 -0
package/cli/agents/deep-analyzer.js +39 -19
package/cli/agents/hermes-security-agent.js +536 -0
package/cli/agents/index.js +65 -21
package/cli/agents/managed-agent-scanner.js +333 -0
package/cli/agents/memory-poisoning-agent.js +304 -0
package/cli/agents/scoring-engine.js +16 -1
package/cli/agents/supply-chain-agent.js +129 -3
package/cli/bin/ship-safe.js +178 -5
package/cli/commands/audit.js +116 -2
package/cli/commands/autofix.js +383 -0
package/cli/commands/env-audit.js +349 -0
package/cli/commands/live-advisories.js +241 -0
package/cli/commands/red-team.js +2 -2
package/cli/commands/scan-mcp.js +78 -0
package/cli/commands/scan-skill.js +248 -5
package/cli/commands/watch.js +205 -0
package/cli/index.js +5 -0
package/cli/providers/llm-provider.js +89 -1
package/cli/utils/compliance-map.js +66 -0
package/cli/utils/hermes-tool-registry.js +252 -0
package/cli/utils/patterns.js +1 -0
package/cli/utils/plugin-loader.js +276 -0
package/cli/utils/scan-playbook.js +312 -0
package/cli/utils/security-memory.js +296 -0
package/package.json +2 -2

package/cli/commands/scan-skill.js CHANGED Viewed

@@ -19,6 +19,59 @@ import { createHash } from 'crypto';
 import * as output from '../utils/output.js';
 import { ThreatIntel } from '../utils/threat-intel.js';
+// =============================================================================
+// HERMES SKILL FRONTMATTER PATTERNS (Track D — cross-skill/tool binding)
+// =============================================================================
+// Built-in tool registries that skills may reference.
+// Ship Safe tools are added lazily in checkHermesFrontmatter() to avoid
+// loading hermes-tool-registry.js (and its crypto import) on every invocation.
+const KNOWN_TOOL_REGISTRIES = {
+  // Common Hermes community tools (names only — no handler)
+  'web_search': 'hermes-community',
+  'web_browser': 'hermes-community',
+  'file_read': 'hermes-community',
+  'file_write': 'hermes-community',
+  'code_execute': 'hermes-community',
+  'github_api': 'hermes-community',
+  'memory_store': 'hermes-community',
+  'memory_retrieve': 'hermes-community',
+};
+// Hermes-specific patterns to check in skill markdown/frontmatter
+const HERMES_SKILL_PATTERNS = [
+  {
+    name: 'Hermes: XML tool_call injection',
+    regex: /<tool_call>[\s\S]{0,300}<\/tool_call>/gi,
+    severity: 'critical',
+    note: 'Skill body contains a <tool_call> block — will be executed by Hermes agents that load this skill.',
+  },
+  {
+    name: 'Hermes: function_calls injection',
+    regex: /<function_calls>[\s\S]{0,300}<\/function_calls>/gi,
+    severity: 'critical',
+    note: 'Skill body contains a <function_calls> block — classic Hermes function-call injection.',
+  },
+  {
+    name: 'Hermes: Forced tool invocation instruction',
+    regex: /(?:you\s+must\s+(?:call|invoke|use)\s+(?:the\s+)?tool|always\s+(?:call|invoke|run)\s+(?:the\s+)?(?:tool|function)|tool\s+MUST\s+be\s+(?:called|invoked|used))/gi,
+    severity: 'high',
+    note: 'Skill instructs agent to call a specific tool unconditionally — bypasses agent autonomy.',
+  },
+  {
+    name: 'Hermes: Plan/goal hijacking',
+    regex: /(?:update\s+(?:your\s+)?(?:goal|plan|objective)\s+to|change\s+(?:your\s+)?(?:goal|plan|objective)|your\s+(?:new\s+)?(?:goal|plan|primary\s+objective)\s+(?:is|should\s+be))/gi,
+    severity: 'critical',
+    note: 'Skill attempts to overwrite the agent\'s goal or plan state — ASI-01 Goal Hijacking.',
+  },
+  {
+    name: 'Hermes: Memory layer write instruction',
+    regex: /(?:write\s+(?:this|the\s+following)\s+to\s+(?:memory|episodic|semantic|working)\s+memory|store\s+(?:this|the\s+following)\s+in\s+(?:memory|episodic|semantic))/gi,
+    severity: 'high',
+    note: 'Skill instructs agent to write attacker-controlled data to memory — ASI-06 Memory Poisoning.',
+  },
+];
 // =============================================================================
 // POPULAR SKILL NAMES (for typosquatting detection)
 // =============================================================================
@@ -113,7 +166,7 @@ export async function scanSkillCommand(target, options = {}) {
   console.log(chalk.gray(`  Size: ${content.length} bytes`));
   console.log();
-  const findings = analyzeSkill(content, skillName, source);
+  const findings = await analyzeSkill(content, skillName, source);
   if (options.json) {
     console.log(JSON.stringify({ skill: skillName, source, findings, summary: getSummary(findings) }, null, 2));
@@ -127,7 +180,7 @@ export async function scanSkillCommand(target, options = {}) {
 // SKILL ANALYSIS
 // =============================================================================
-function analyzeSkill(content, skillName, source) {
+async function analyzeSkill(content, skillName, source) {
   const findings = [];
   // 1. Static pattern analysis
@@ -152,10 +205,12 @@ function analyzeSkill(content, skillName, source) {
   try {
     const manifest = JSON.parse(content);
     if (manifest.permissions) {
-      const dangerous = ['shell', 'exec', 'system', 'network', 'filesystem', 'admin', 'root'];
+      const dangerousPerm = [/\bshell\b/i, /\bexec\b/i, /\bsystem\b/i, /\badmin\b/i, /\broot\b/i,
+        /filesystem\s*:\s*(write|read-write)/i, /network\s*:\s*(unrestricted|all)/i,
+        /^filesystem$/i, /^network$/i];
       for (const perm of (Array.isArray(manifest.permissions) ? manifest.permissions : [])) {
         const permStr = typeof perm === 'string' ? perm : perm.name || '';
-        if (dangerous.some(d => permStr.toLowerCase().includes(d))) {
+        if (dangerousPerm.some(p => p.test(permStr))) {
           findings.push({
             check: 'permission-audit',
             name: `Dangerous permission: ${permStr}`,
@@ -216,6 +271,194 @@ function analyzeSkill(content, skillName, source) {
     });
   }
+  // 6. Hermes-specific: frontmatter tool binding + permission drift validation
+  findings.push(...(await checkHermesFrontmatter(content)));
+  // 7. Hermes-specific: function-call injection and goal hijacking in body
+  findings.push(...checkHermesBodyPatterns(content, lines));
+  return findings;
+}
+// =============================================================================
+// HERMES FRONTMATTER VALIDATION (Track D)
+// =============================================================================
+/**
+ * Parse YAML frontmatter block (between --- delimiters) from markdown skill.
+ * Returns a plain object with string/array values; null if no frontmatter.
+ */
+function parseFrontmatter(content) {
+  const match = content.match(/^---\r?\n([\s\S]*?)\r?\n---/);
+  if (!match) return null;
+  const fm = {};
+  const yamlBlock = match[1];
+  for (const line of yamlBlock.split('\n')) {
+    const kv = line.match(/^(\w[\w-]*):\s*(.*)$/);
+    if (!kv) continue;
+    const [, key, rawVal] = kv;
+    const val = rawVal.trim();
+    if (val.startsWith('[') && val.endsWith(']')) {
+      // Inline array: [a, b, c]
+      fm[key] = val.slice(1, -1).split(',').map(s => s.trim().replace(/['"]/g, '')).filter(Boolean);
+    } else {
+      fm[key] = val.replace(/^['"]|['"]$/g, '');
+    }
+  }
+  // Collect multi-line list values (indented - items)
+  const listRe = /^(\w[\w-]*):\s*\n((?:\s+-\s+.+\n?)+)/gm;
+  let m;
+  while ((m = listRe.exec(yamlBlock)) !== null) {
+    const [, key, block] = m;
+    fm[key] = block.match(/-\s+(.+)/g)?.map(s => s.replace(/^-\s+/, '').replace(/['"]/g, '').trim()) ?? [];
+  }
+  return fm;
+}
+let _hermesToolsLoaded = false;
+async function ensureHermesToolsLoaded() {
+  if (_hermesToolsLoaded) return;
+  try {
+    const { HERMES_TOOLS } = await import('../utils/hermes-tool-registry.js');
+    for (const t of HERMES_TOOLS) KNOWN_TOOL_REGISTRIES[t.name] = 'ship-safe';
+  } catch { /* non-fatal — registry unavailable */ }
+  _hermesToolsLoaded = true;
+}
+async function checkHermesFrontmatter(content) {
+  await ensureHermesToolsLoaded();
+  const findings = [];
+  const fm = parseFrontmatter(content);
+  // Not a markdown skill with frontmatter — skip
+  if (!fm) return findings;
+  // ── Check: missing permissions field ──────────────────────────────────────
+  if (!fm.permissions) {
+    findings.push({
+      check: 'hermes-frontmatter',
+      name: 'Hermes: Skill missing permissions field (ASI-02 Excessive Agency)',
+      severity: 'medium',
+      line: 0,
+      matched: 'No permissions: field in frontmatter — skill may be granted more access than intended',
+    });
+  } else {
+    // ── Check: wildcard permissions ──────────────────────────────────────────
+    const perms = Array.isArray(fm.permissions) ? fm.permissions : [fm.permissions];
+    const wildcards = perms.filter(p => /^\*$|^all$|^any$/i.test(String(p)));
+    if (wildcards.length > 0) {
+      findings.push({
+        check: 'hermes-frontmatter',
+        name: 'Hermes: Wildcard permissions (* / all) — excessive agency (ASI-02)',
+        severity: 'high',
+        line: 0,
+        matched: `permissions: [${wildcards.join(', ')}]`,
+      });
+    }
+    // ── Check: dangerous explicit permissions ────────────────────────────────
+    // Match whole-word or exact qualified values — don't fire on "filesystem: read-only"
+    const dangerousPatterns = [
+      /\bshell\b/i, /\bexec\b/i, /\bsystem\b/i, /\badmin\b/i, /\broot\b/i, /\bsudo\b/i,
+      /filesystem\s*:\s*write/i, /filesystem\s*:\s*read-write/i,
+      /network\s*:\s*unrestricted/i, /network\s*:\s*all/i,
+      /^filesystem$/i, /^network$/i,  // bare "filesystem" or "network" without qualifier is ambiguous → flag
+    ];
+    for (const perm of perms) {
+      if (dangerousPatterns.some(p => p.test(String(perm)))) {
+        findings.push({
+          check: 'hermes-frontmatter',
+          name: `Hermes: Dangerous permission declared: ${perm}`,
+          severity: 'high',
+          line: 0,
+          matched: `permissions: [${perm}]`,
+        });
+      }
+    }
+  }
+  // ── Check: missing version pin ────────────────────────────────────────────
+  if (!fm.version) {
+    findings.push({
+      check: 'hermes-frontmatter',
+      name: 'Hermes: Skill missing version field — unpinned skill (ASI-10 Supply Chain)',
+      severity: 'medium',
+      line: 0,
+      matched: 'No version: field in frontmatter — skill version drift cannot be detected',
+    });
+  }
+  // ── Check: cross-skill tool binding validation ────────────────────────────
+  const tools = Array.isArray(fm.tools) ? fm.tools : fm.tools ? [fm.tools] : [];
+  for (const toolName of tools) {
+    if (!KNOWN_TOOL_REGISTRIES[toolName]) {
+      findings.push({
+        check: 'hermes-tool-binding',
+        name: `Hermes: Unresolvable tool reference: "${toolName}"`,
+        severity: 'high',
+        line: 0,
+        matched: `tools: [${toolName}] — not found in any known tool registry. May cause silent failures or late-binding substitution.`,
+      });
+    }
+  }
+  // ── Check: tools declared but no permissions field ────────────────────────
+  if (tools.length > 0 && !fm.permissions) {
+    findings.push({
+      check: 'hermes-tool-binding',
+      name: 'Hermes: Skill declares tools without permissions (permission drift)',
+      severity: 'high',
+      line: 0,
+      matched: `tools: [${tools.join(', ')}] declared but no permissions: field — skill runs with ambient agent permissions`,
+    });
+  }
+  return findings;
+}
+function checkHermesBodyPatterns(content, lines) {
+  const findings = [];
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i];
+    for (const pattern of HERMES_SKILL_PATTERNS) {
+      pattern.regex.lastIndex = 0;
+      if (pattern.regex.test(line)) {
+        findings.push({
+          check: 'hermes-injection',
+          name: pattern.name,
+          severity: pattern.severity,
+          line: i + 1,
+          matched: line.trim().slice(0, 100),
+        });
+      }
+    }
+  }
+  // Multi-line checks for <tool_call> blocks that span lines
+  for (const pattern of HERMES_SKILL_PATTERNS) {
+    pattern.regex.lastIndex = 0;
+    const match = pattern.regex.exec(content);
+    if (match) {
+      // Avoid duplicate if already caught line-by-line
+      const alreadyFound = findings.some(f => f.name === pattern.name);
+      if (!alreadyFound) {
+        findings.push({
+          check: 'hermes-injection',
+          name: pattern.name,
+          severity: pattern.severity,
+          line: 0,
+          matched: match[0].slice(0, 100),
+        });
+      }
+    }
+  }
   return findings;
 }
@@ -281,7 +524,7 @@ async function scanAllSkills(rootPath) {
           const response = await fetch(url);
           if (!response.ok) throw new Error(`HTTP ${response.status}`);
           const content = await response.text();
-          const findings = analyzeSkill(content, name, url);
+          const findings = await analyzeSkill(content, name, url);
           if (findings.length > 0) {
             printSkillFindings(findings, name);
           } else {

package/cli/commands/watch.js CHANGED Viewed

@@ -16,6 +16,7 @@ import chalk from 'chalk';
 import { SKIP_DIRS, SKIP_EXTENSIONS, SKIP_FILENAMES, SECRET_PATTERNS, SECURITY_PATTERNS } from '../utils/patterns.js';
 import { isHighEntropyMatch, getConfidence } from '../utils/entropy.js';
 import * as output from '../utils/output.js';
+import { ScoringEngine } from '../agents/scoring-engine.js';
 // Agent config files to watch
 const AGENT_CONFIG_PATTERNS = [
@@ -26,6 +27,10 @@ const AGENT_CONFIG_PATTERNS = [
   '.cursor/mcp.json', '.vscode/mcp.json',
 ];
+// Watch state persistence
+const WATCH_DB_DIR = '.ship-safe';
+const WATCH_DB_FILE = 'watch.json';
 export async function watchCommand(targetPath = '.', options = {}) {
   const absolutePath = path.resolve(targetPath);
@@ -34,15 +39,26 @@ export async function watchCommand(targetPath = '.', options = {}) {
     process.exit(1);
   }
+  // Status mode: print current watch state and exit
+  if (options.status) {
+    return showWatchStatus(absolutePath);
+  }
   // Config-only watch mode
   if (options.configs) {
     return watchConfigs(absolutePath);
   }
+  // Deep mode: run full orchestrator on changes
+  if (options.deep) {
+    return watchDeep(absolutePath, options);
+  }
   console.log();
   output.header('Ship Safe — Watch Mode');
   console.log();
   console.log(chalk.cyan('  Watching for file changes...'));
+  console.log(chalk.gray('  Use --deep for full agent scanning, --status for current findings'));
   console.log(chalk.gray('  Press Ctrl+C to stop'));
   console.log();
@@ -230,6 +246,195 @@ async function watchConfigs(absolutePath) {
   }
 }
+// =============================================================================
+// STATUS MODE
+// =============================================================================
+function showWatchStatus(rootPath) {
+  const dbFile = path.join(rootPath, WATCH_DB_DIR, WATCH_DB_FILE);
+  if (!fs.existsSync(dbFile)) {
+    console.log('\n  No watch data found. Run: ship-safe watch . --deep\n');
+    return;
+  }
+  try {
+    const data = JSON.parse(fs.readFileSync(dbFile, 'utf-8'));
+    console.log(`\n  ${chalk.cyan.bold('Ship Safe Watch — Status')}`);
+    console.log(`  ${'─'.repeat(40)}`);
+    console.log(`  Last scan:  ${data.lastScan || 'never'}`);
+    console.log(`  Scans run:  ${data.scanCount || 0}`);
+    console.log(`  Score:      ${data.score?.score ?? '?'}/100 ${data.score?.grade ?? ''}`);
+    console.log(`  Findings:   ${data.score?.totalFindings ?? 0}`);
+    if (data.agentic) {
+      console.log(`  Agentic:    ${data.agentic.flagged}/${data.agentic.total} OWASP Agentic risks flagged`);
+    }
+    // Severity breakdown
+    const sevCounts = { critical: 0, high: 0, medium: 0, low: 0 };
+    for (const f of (data.findings || [])) {
+      sevCounts[f.severity] = (sevCounts[f.severity] || 0) + 1;
+    }
+    console.log(`    Critical: ${sevCounts.critical}`);
+    console.log(`    High:     ${sevCounts.high}`);
+    console.log(`    Medium:   ${sevCounts.medium}`);
+    console.log(`    Low:      ${sevCounts.low}\n`);
+  } catch {
+    console.log('\n  Failed to read watch data. File may be corrupted.\n');
+  }
+}
+// =============================================================================
+// DEEP WATCH MODE (full orchestrator)
+// =============================================================================
+async function watchDeep(absolutePath, options = {}) {
+  const { buildOrchestrator } = await import('../agents/index.js');
+  const { ReconAgent } = await import('../agents/recon-agent.js');
+  const debounceMs = options.debounce || 1500;
+  const threshold = options.threshold || null;
+  const scoringEngine = new ScoringEngine();
+  console.log();
+  output.header('Ship Safe — Deep Watch Mode');
+  console.log();
+  console.log(chalk.cyan('  Running full agent scans on file changes'));
+  console.log(chalk.gray(`  Debounce: ${debounceMs}ms`));
+  if (threshold) console.log(chalk.gray(`  Threshold: ${threshold}/100`));
+  console.log(chalk.gray('  Press Ctrl+C to stop'));
+  console.log();
+  // Initial recon
+  const reconAgent = new ReconAgent();
+  console.log(chalk.gray('  Running initial recon...'));
+  let recon;
+  try {
+    const reconResults = await reconAgent.analyze({ rootPath: absolutePath });
+    recon = Array.isArray(reconResults) ? {} : reconResults;
+  } catch { recon = {}; }
+  console.log(chalk.gray('  Recon complete. Watching...\n'));
+  let pendingFiles = new Set();
+  let debounceTimer = null;
+  let scanCount = 0;
+  const dbDir = path.join(absolutePath, WATCH_DB_DIR);
+  const dbFile = path.join(dbDir, WATCH_DB_FILE);
+  const processChanges = async () => {
+    const files = [...pendingFiles];
+    pendingFiles.clear();
+    if (files.length === 0) return;
+    scanCount++;
+    const timestamp = new Date().toLocaleTimeString();
+    console.log(chalk.gray(`  [${timestamp}] ${files.length} file(s) changed — deep scanning...`));
+    try {
+      const orchestrator = buildOrchestrator();
+      const context = {
+        rootPath: absolutePath,
+        files,
+        changedFiles: files,
+        recon,
+        options: { incremental: true },
+      };
+      const findings = await orchestrator.run(context);
+      const scoreResult = scoringEngine.compute(findings);
+      // Persist results
+      try {
+        if (!fs.existsSync(dbDir)) fs.mkdirSync(dbDir, { recursive: true });
+        fs.writeFileSync(dbFile, JSON.stringify({
+          lastScan: new Date().toISOString(),
+          scanCount,
+          score: {
+            score: scoreResult.score,
+            grade: scoreResult.grade?.letter,
+            totalFindings: scoreResult.totalFindings,
+          },
+          agentic: scoreResult.agenticSummary
+            ? { flagged: scoreResult.agenticSummary.flagged, total: scoreResult.agenticSummary.total }
+            : null,
+          findings: findings.map(f => ({
+            file: path.relative(absolutePath, f.file || ''),
+            line: f.line,
+            severity: f.severity,
+            rule: f.rule,
+            title: f.title,
+            agenticRisk: f.agenticRisk || null,
+          })),
+        }, null, 2));
+      } catch { /* non-fatal */ }
+      // Output
+      const criticals = findings.filter(f => f.severity === 'critical').length;
+      const highs = findings.filter(f => f.severity === 'high').length;
+      if (findings.length === 0) {
+        console.log(chalk.green(`  [${timestamp}] ✔ Clean — Score: ${scoreResult.score}/100 ${scoreResult.grade?.letter}\n`));
+      } else {
+        const scoreColor = scoreResult.score >= 75 ? chalk.cyan : scoreResult.score >= 50 ? chalk.yellow : chalk.red;
+        console.log(`  [${timestamp}] ${chalk.white(`${findings.length} finding(s)`)}: ${criticals ? chalk.red.bold(`${criticals} critical`) : ''}${criticals && highs ? ', ' : ''}${highs ? chalk.yellow(`${highs} high`) : ''}. Score: ${scoreColor(`${scoreResult.score}/100 ${scoreResult.grade?.letter}`)}`);
+        for (const f of findings.filter(f => f.severity === 'critical' || f.severity === 'high')) {
+          const relFile = path.relative(absolutePath, f.file || '');
+          const sev = f.severity === 'critical' ? chalk.red.bold('!!') : chalk.yellow(' !');
+          const agentic = f.agenticRisk ? chalk.gray(` [${f.agenticRisk.id}]`) : '';
+          console.log(`    ${sev} ${f.title} — ${relFile}:${f.line}${agentic}`);
+        }
+        console.log('');
+      }
+      if (threshold && scoreResult.score < threshold) {
+        console.log(chalk.red.bold(`  ⚠ Score ${scoreResult.score} below threshold ${threshold}\n`));
+      }
+    } catch (err) {
+      console.log(chalk.red(`  [${timestamp}] Scan error: ${err.message}\n`));
+    }
+  };
+  try {
+    const watcher = fs.watch(absolutePath, { recursive: true }, (eventType, filename) => {
+      if (!filename) return;
+      // Skip non-scannable
+      const relPath = filename.replace(/\\/g, '/');
+      for (const skipDir of SKIP_DIRS) {
+        if (relPath.includes(`${skipDir}/`)) return;
+      }
+      const ext = path.extname(filename).toLowerCase();
+      if (SKIP_EXTENSIONS.has(ext)) return;
+      if (SKIP_FILENAMES.has(path.basename(filename))) return;
+      if (filename.endsWith('.min.js') || filename.endsWith('.min.css')) return;
+      const fullPath = path.join(absolutePath, filename);
+      if (!fs.existsSync(fullPath)) return;
+      pendingFiles.add(fullPath);
+      if (debounceTimer) clearTimeout(debounceTimer);
+      debounceTimer = setTimeout(processChanges, debounceMs);
+    });
+    process.on('SIGINT', () => {
+      watcher.close();
+      console.log(`\n  Watch stopped. ${scanCount} scan(s) completed.\n`);
+      process.exit(0);
+    });
+    setInterval(() => {}, 1000 * 60 * 60);
+  } catch (err) {
+    output.error(`Watch failed: ${err.message}`);
+    process.exit(1);
+  }
+}
+// =============================================================================
+// CONFIG WATCH — scanConfigFiles
+// =============================================================================
 async function scanConfigFiles(files, rootPath) {
   // Dynamic import to avoid circular dependency
   const { AgentConfigScanner } = await import('../agents/agent-config-scanner.js');

package/cli/index.js CHANGED Viewed

@@ -71,3 +71,8 @@ export { CacheManager } from './utils/cache-manager.js';
 // ── LLM Providers ─────────────────────────────────────────────────────────────
 export { createProvider, autoDetectProvider } from './providers/llm-provider.js';
+// ── v8.0.0 — Ship Safe × Hermes Agent ────────────────────────────────────────
+export { HermesSecurityAgent } from './agents/hermes-security-agent.js';
+export { AgentAttestationAgent } from './agents/agent-attestation-agent.js';
+export { HERMES_TOOLS, registerWithHermes, verifyIntegrity } from './utils/hermes-tool-registry.js';

package/cli/providers/llm-provider.js CHANGED Viewed

@@ -196,7 +196,7 @@ class GoogleProvider extends BaseLLMProvider {
 class OllamaProvider extends BaseLLMProvider {
   constructor(apiKey, options = {}) {
     super('Ollama', null, options);
-    this.model = options.model || 'llama3.2';
+    this.model = options.model || 'gemma4:e4b';
     this.baseUrl = options.baseUrl || 'http://localhost:11434/api/chat';
   }
@@ -223,6 +223,83 @@ class OllamaProvider extends BaseLLMProvider {
   }
 }
+// =============================================================================
+// GEMMA 4 PROVIDER
+// Uses Ollama's structured output (format: schema) for guaranteed JSON —
+// no regex parsing, no silent dropped findings.
+// =============================================================================
+const CLASSIFY_SCHEMA = {
+  type: 'object',
+  properties: {
+    results: {
+      type: 'array',
+      items: {
+        type: 'object',
+        properties: {
+          id:             { type: 'string' },
+          classification: { type: 'string', enum: ['REAL', 'FALSE_POSITIVE'] },
+          reason:         { type: 'string' },
+          fix:            { type: ['string', 'null'] },
+        },
+        required: ['id', 'classification', 'reason', 'fix'],
+      },
+    },
+  },
+  required: ['results'],
+};
+class GemmaProvider extends OllamaProvider {
+  constructor(options = {}) {
+    super(null, {
+      model:   options.model   || 'gemma4:e4b',
+      baseUrl: options.baseUrl || 'http://localhost:11434/api/chat',
+    });
+    this.name = 'Gemma4';
+    // 256K tokens for 27b/31b, 128K for e4b — set conservatively high
+    this.contextWindow = options.model?.includes('27b') ? 131072 : 65536;
+  }
+  /**
+   * Classify using Ollama structured output (format: schema).
+   * Gemma 4 has trained-in function calling — the schema is enforced at the
+   * token level, so the response is always valid JSON matching CLASSIFY_SCHEMA.
+   */
+  async classify(findings, context) {
+    const prompt = this.buildClassificationPrompt(findings, context);
+    const response = await fetch(this.baseUrl, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({
+        model:  this.model,
+        format: CLASSIFY_SCHEMA,
+        stream: false,
+        options: { num_ctx: this.contextWindow },
+        messages: [
+          { role: 'system', content: 'You are a security expert. Classify each finding as REAL or FALSE_POSITIVE and suggest a fix.' },
+          { role: 'user',   content: prompt },
+        ],
+      }),
+    });
+    if (!response.ok) {
+      throw new Error(`Gemma4/Ollama error: HTTP ${response.status}`);
+    }
+    const data = await response.json();
+    const text = data.message?.content || '';
+    try {
+      const parsed = JSON.parse(text);
+      return parsed.results ?? [];
+    } catch {
+      // Fallback: schema enforcement failed (old Ollama version) — try regex parse
+      return this.parseJSON(text);
+    }
+  }
+}
 // =============================================================================
 // OPENAI-COMPATIBLE PROVIDER
 // Handles Groq, Together AI, Mistral API, LM Studio, Azure OpenAI, Bedrock
@@ -239,6 +316,10 @@ const OPENAI_COMPATIBLE_PRESETS = {
   perplexity: { baseUrl: 'https://api.perplexity.ai/chat/completions',               model: 'llama-3.1-sonar-large-128k-online', envKey: 'PERPLEXITY_API_KEY' },
   lmstudio:   { baseUrl: 'http://localhost:1234/v1/chat/completions',                model: null,                         envKey: null },
   xai:        { baseUrl: 'https://api.x.ai/v1/chat/completions',                    model: 'grok-3-mini',                envKey: 'XAI_API_KEY' },
+  // Gemma 4 via Ollama — runs fully local, no API key required
+  // e4b: MoE 4B active params, ~8GB RAM;  27b: dense, ~20GB RAM
+  gemma4:     { baseUrl: 'http://localhost:11434/v1/chat/completions',               model: 'gemma4:e4b',                 envKey: null },
+  'gemma4:27b': { baseUrl: 'http://localhost:11434/v1/chat/completions',             model: 'gemma4:27b',                 envKey: null },
 };
 class OpenAICompatibleProvider extends OpenAIProvider {
@@ -279,6 +360,13 @@ export function createProvider(provider, apiKey, options = {}) {
     case 'ollama':
     case 'local':
       return new OllamaProvider(apiKey, options);
+    case 'gemma4':
+    case 'gemma':
+      // Gemma 4 via Ollama — structured output, no API key needed
+      return new GemmaProvider({
+        model:   options.model,
+        baseUrl: options.baseUrl,
+      });
   }
   // OpenAI-compatible presets