npm - agentaudit - Versions diffs - 3.12.10 → 3.12.12 - Mend

agentaudit 3.12.10 → 3.12.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/cli.mjs CHANGED Viewed

@@ -36,6 +36,19 @@ const __dirname = path.dirname(fileURLToPath(import.meta.url));
 const SKILL_DIR = path.resolve(__dirname);
 const REGISTRY_URL = 'https://agentaudit.dev';
+// ── Global error handlers — catch unhandled errors and exit cleanly ────
+process.on('uncaughtException', (err) => {
+  process.stderr.write(`\nagentaudit: fatal error — ${err.message || err}\n`);
+  if (process.argv.includes('--debug')) process.stderr.write(`${err.stack || ''}\n`);
+  process.exit(2);
+});
+process.on('unhandledRejection', (reason) => {
+  const msg = reason instanceof Error ? reason.message : String(reason);
+  process.stderr.write(`\nagentaudit: unhandled promise rejection — ${msg}\n`);
+  if (process.argv.includes('--debug') && reason instanceof Error) process.stderr.write(`${reason.stack || ''}\n`);
+  process.exit(2);
+});
 // ── Global flags (set in main before command routing) ────
 let jsonMode = false;
 let quietMode = false;
@@ -367,21 +380,23 @@ function multiSelect(items, { title = 'Select items', hint = 'Space=toggle  ↑
     process.stdin.resume();
     process.stdin.setEncoding('utf8');
+    const cleanup = () => {
+      try { process.stdin.setRawMode(false); } catch {}
+      process.stdin.pause();
+      process.stdin.removeListener('data', onData);
+    };
     const onData = (key) => {
-      // Ctrl+C
+      // Ctrl+C — restore terminal state and exit cleanly
       if (key === '\x03') {
-        process.stdin.setRawMode(false);
-        process.stdin.pause();
-        process.stdin.removeListener('data', onData);
+        cleanup();
         console.log();
-        process.exitCode = 0; return;
+        process.exit(0);
       }
       // Enter
       if (key === '\r' || key === '\n') {
-        process.stdin.setRawMode(false);
-        process.stdin.pause();
-        process.stdin.removeListener('data', onData);
+        cleanup();
         resolve(items.filter((_, i) => selected.has(i)).map(i => i.value));
         return;
       }
@@ -1001,23 +1016,34 @@ function formatApiError(error, provider, statusCode) {
   return null;
 }
+/**
+ * Validate that a parsed object looks like a valid audit report.
+ * Must have at least: findings (array) and one of skill_slug/risk_score/result.
+ */
+function isValidReportSchema(obj) {
+  if (!obj || typeof obj !== 'object') return false;
+  if (!Array.isArray(obj.findings)) return false;
+  // Must have at least one identifying field
+  if (!('skill_slug' in obj) && !('risk_score' in obj) && !('result' in obj)) return false;
+  return true;
+}
 function extractJSON(text) {
   // 1. Try parsing the entire text as JSON directly
-  try { return JSON.parse(text.trim()); } catch {}
+  try {
+    const parsed = JSON.parse(text.trim());
+    if (isValidReportSchema(parsed)) return parsed;
+  } catch {}
   // 2. Strip markdown code fences — try last fence first (report is usually at the end)
   const fenceMatches = [...text.matchAll(/```(?:json)?\s*\n?([\s\S]*?)\n?\s*```/g)];
   for (let i = fenceMatches.length - 1; i >= 0; i--) {
-    try {
+    try {
       const parsed = JSON.parse(fenceMatches[i][1].trim());
-      if (parsed && typeof parsed === 'object' && ('risk_score' in parsed || 'findings' in parsed || 'result' in parsed)) return parsed;
+      if (isValidReportSchema(parsed)) return parsed;
     } catch {}
   }
-  // Try any fence even without report keys
-  for (let i = fenceMatches.length - 1; i >= 0; i--) {
-    try { return JSON.parse(fenceMatches[i][1].trim()); } catch {}
-  }
   // 3. Find ALL balanced top-level { ... } blocks, try each (prefer largest valid one)
   const blocks = [];
   let searchFrom = 0;
@@ -1045,9 +1071,12 @@ function extractJSON(text) {
   // Try largest block first (the report JSON is usually the biggest)
   blocks.sort((a, b) => b.length - a.length);
   for (const block of blocks) {
-    try { return JSON.parse(block); } catch {}
+    try {
+      const parsed = JSON.parse(block);
+      if (isValidReportSchema(parsed)) return parsed;
+    } catch {}
   }
   return null;
 }
@@ -1067,8 +1096,15 @@ const SKIP_EXTENSIONS = new Set([
   '.dylib', '.dll', '.exe', '.bin', '.dat', '.db', '.sqlite',
 ]);
-function collectFiles(dir, basePath = '', collected = [], totalSize = { bytes: 0 }) {
+function collectFiles(dir, basePath = '', collected = [], totalSize = { bytes: 0 }, _visitedPaths = new Set()) {
   if (totalSize.bytes >= MAX_TOTAL_SIZE) return collected;
+  // Symlink loop protection: resolve real path and track visited directories
+  let realDir;
+  try { realDir = fs.realpathSync(dir); } catch { return collected; }
+  if (_visitedPaths.has(realDir)) return collected;
+  _visitedPaths.add(realDir);
   let entries;
   try { entries = fs.readdirSync(dir, { withFileTypes: true }); }
   catch { return collected; }
@@ -1077,15 +1113,24 @@ function collectFiles(dir, basePath = '', collected = [], totalSize = { bytes: 0
     if (totalSize.bytes >= MAX_TOTAL_SIZE) break;
     const relPath = basePath ? `${basePath}/${entry.name}` : entry.name;
     const fullPath = path.join(dir, entry.name);
+    // Skip symlinks that point to directories (prevent symlink traversal attacks)
+    if (entry.isSymbolicLink()) {
+      try {
+        const target = fs.realpathSync(fullPath);
+        if (fs.statSync(target).isDirectory()) continue; // skip symlinked dirs entirely
+      } catch { continue; }
+    }
     if (entry.isDirectory()) {
       // Special: scan .github/workflows/ (security-critical CI/CD files)
       if (entry.name === '.github') {
         const wfDir = path.join(fullPath, 'workflows');
-        try { if (fs.statSync(wfDir).isDirectory()) collectFiles(wfDir, relPath + '/workflows', collected, totalSize); } catch {}
+        try { if (fs.statSync(wfDir).isDirectory()) collectFiles(wfDir, relPath + '/workflows', collected, totalSize, _visitedPaths); } catch {}
         continue;
       }
       if (SKIP_DIRS.has(entry.name) || entry.name.startsWith('.')) continue;
-      collectFiles(fullPath, relPath, collected, totalSize);
+      collectFiles(fullPath, relPath, collected, totalSize, _visitedPaths);
     } else {
       const ext = path.extname(entry.name).toLowerCase();
       if (SKIP_EXTENSIONS.has(ext)) continue;
@@ -2745,6 +2790,30 @@ function checkContextLimit(model, systemPrompt, userMessage) {
   return null;
 }
+/**
+ * Safely parse JSON from a fetch response. If the response is not JSON
+ * (e.g. HTML error page from a 502/503), returns {error: {message: ...}}
+ * which the callLlm error handling paths already handle.
+ */
+async function safeJsonParse(res, llmConfig) {
+  const contentType = res.headers.get('content-type') || '';
+  // Read body as text first — we can only consume the stream once
+  let body;
+  try { body = await res.text(); } catch { body = ''; }
+  if (!res.ok && !contentType.includes('application/json')) {
+    // Non-JSON error response (e.g. HTML from a proxy/gateway)
+    const preview = body.slice(0, 200).replace(/<[^>]+>/g, '').trim();
+    return { error: { message: `HTTP ${res.status} from ${llmConfig.provider}${preview ? ': ' + preview : ''}` } };
+  }
+  try {
+    return JSON.parse(body);
+  } catch (parseErr) {
+    const preview = body.slice(0, 200).replace(/<[^>]+>/g, '').trim();
+    return { error: { message: `Invalid JSON from ${llmConfig.provider} (HTTP ${res.status}): ${preview || parseErr.message}` } };
+  }
+}
 async function callLlm(llmConfig, systemPrompt, userMessage) {
   const apiKey = process.env[llmConfig.key];
   if (!apiKey) return { error: `Missing API key: ${llmConfig.key}` };
@@ -2769,7 +2838,7 @@ async function callLlm(llmConfig, systemPrompt, userMessage) {
         body: JSON.stringify({ model: llmConfig.model, max_tokens: 16384, system: systemPrompt, messages: [{ role: 'user', content: userMessage }] }),
         signal: AbortSignal.timeout(180_000),
       });
-      data = await res.json();
+      data = await safeJsonParse(res, llmConfig);
       if (data.error) {
         const friendly = formatApiError(data.error, llmConfig.provider, res.status);
         return { error: friendly?.text || data.error.message || JSON.stringify(data.error), hint: friendly?.hint, duration: Date.now() - start };
@@ -2789,7 +2858,10 @@ async function callLlm(llmConfig, systemPrompt, userMessage) {
       }
       return { report, text: _text, duration: Date.now() - start, truncated: data.stop_reason === 'max_tokens' };
     } else if (llmConfig.type === 'gemini') {
-      const res = await fetch(`${llmConfig.url}/${llmConfig.model}:generateContent?key=${apiKey}`, {
+      // NOTE: Google's Gemini API requires the API key as a URL query parameter.
+      // This is by design (their auth model). We never log the full URL to avoid key leakage.
+      const geminiUrl = `${llmConfig.url}/${llmConfig.model}:generateContent?key=${apiKey}`;
+      const res = await fetch(geminiUrl, {
         method: 'POST',
         headers: { 'Content-Type': 'application/json' },
         body: JSON.stringify({
@@ -2799,7 +2871,7 @@ async function callLlm(llmConfig, systemPrompt, userMessage) {
         }),
         signal: AbortSignal.timeout(180_000),
       });
-      data = await res.json();
+      data = await safeJsonParse(res, llmConfig);
       if (data.error) {
         const friendly = formatApiError(data.error, llmConfig.provider, res.status);
         return { error: friendly?.text || data.error.message || JSON.stringify(data.error), hint: friendly?.hint, duration: Date.now() - start };
@@ -2827,7 +2899,7 @@ async function callLlm(llmConfig, systemPrompt, userMessage) {
         body: JSON.stringify({ model: llmConfig.model, max_tokens: 16384, messages: [{ role: 'system', content: systemPrompt }, { role: 'user', content: userMessage }] }),
         signal: AbortSignal.timeout(180_000),
       });
-      data = await res.json();
+      data = await safeJsonParse(res, llmConfig);
       if (data.error) {
         const friendly = formatApiError(data.error, llmConfig.provider, res.status);
         return { error: friendly?.text || data.error.message || JSON.stringify(data.error), hint: friendly?.hint, duration: Date.now() - start };
@@ -2919,7 +2991,23 @@ function enrichFindings(report, files, pkgInfo) {
     report.max_severity = report.findings.length > 0 ? maxSev : 'none';
   }
+  const VALID_SEVERITIES = new Set(['critical', 'high', 'medium', 'low', 'info']);
   for (const finding of report.findings) {
+    // 0. Validate & sanitize finding fields
+    // Severity: must be one of the known values
+    const sev = (finding.severity || '').toLowerCase();
+    finding.severity = VALID_SEVERITIES.has(sev) ? sev : 'medium';
+    // Line number: must be a positive integer
+    if (finding.line != null) {
+      const lineNum = parseInt(finding.line, 10);
+      finding.line = (Number.isFinite(lineNum) && lineNum > 0) ? lineNum : undefined;
+    }
+    // File path: reject suspicious characters (null bytes, .., protocol schemes)
+    if (finding.file && (/[\x00]|\.\.[\\/]|^[a-z]+:\/\//i.test(finding.file))) {
+      finding.file = undefined;
+    }
     // 1. Fill cwe_id from pattern_id lookup
     if (!finding.cwe_id || finding.cwe_id === '') {
       const prefix = (finding.pattern_id || '').replace(/_\d+$/, '');
@@ -3648,12 +3736,19 @@ async function remoteAudit(url) {
       for (const part of parts) {
         const eventMatch = part.match(/^event:\s*(.+)/m);
-        const dataMatch = part.match(/^data:\s*(.+)/m);
-        if (!eventMatch || !dataMatch) continue;
+        if (!eventMatch) continue;
+        // Accumulate all data: lines per SSE spec (data fields can span multiple lines)
+        const dataLines = [];
+        for (const line of part.split('\n')) {
+          const dm = line.match(/^data:\s?(.*)/);
+          if (dm) dataLines.push(dm[1]);
+        }
+        if (dataLines.length === 0) continue;
+        const dataStr = dataLines.join('\n');
         const event = eventMatch[1].trim();
         let data;
-        try { data = JSON.parse(dataMatch[1]); } catch { continue; }
+        try { data = JSON.parse(dataStr); } catch { continue; }
         switch (event) {
           case 'step': {
@@ -4864,13 +4959,23 @@ async function main() {
       `  agentaudit consensus fastmcp --json`,
     ],
     history: [
-      `${c.bold}agentaudit history${c.reset} [options]`,
+      `${c.bold}agentaudit history${c.reset} [show|upload] [n]`,
       ``,
       `Show your local audit history. Results are stored in ~/.config/agentaudit/history/`,
       `after every audit run. No internet connection required.`,
       ``,
+      `${c.bold}Subcommands:${c.reset}`,
+      `  history              List all local audits (numbered)`,
+      `  history show <n>     Show full report details for entry #n`,
+      `  history upload <n>   Retry upload of entry #n to agentaudit.dev`,
+      ``,
       `${c.bold}Options:${c.reset}`,
       `  --json          Machine-readable JSON output`,
+      ``,
+      `${c.bold}Examples:${c.reset}`,
+      `  agentaudit history`,
+      `  agentaudit history show 1`,
+      `  agentaudit history upload 1`,
     ],
     activity: [
       `${c.bold}agentaudit activity${c.reset} [options]`,
@@ -5033,13 +5138,96 @@ async function main() {
   }
   if (command === 'history') {
     banner();
+    const subCmd = targets[0];
     const entries = loadHistory(30);
-    if (entries.length === 0) {
+    if (entries.length === 0 && !subCmd) {
       console.log(`  ${c.dim}No local audit history yet. Run ${c.cyan}agentaudit audit <url>${c.dim} to start.${c.reset}`);
       console.log();
       return;
     }
+    // history show <n> — show full report details
+    if (subCmd === 'show') {
+      const idx = parseInt(targets[1], 10) - 1;
+      if (isNaN(idx) || idx < 0 || idx >= entries.length) {
+        console.log(`  ${c.red}Invalid index.${c.reset} Use a number from 1 to ${entries.length}.`);
+        console.log(`  ${c.dim}Run ${c.cyan}agentaudit history${c.dim} to see the list.${c.reset}`);
+        return;
+      }
+      const entry = entries[idx];
+      if (jsonMode) {
+        console.log(JSON.stringify(entry, null, 2));
+        return;
+      }
+      console.log(sectionHeader(`Report: ${entry.skill_slug || 'unknown'}`));
+      console.log();
+      console.log(`  Source      ${c.bold}${entry.source_url || '?'}${c.reset}`);
+      console.log(`  Model       ${c.bold}${entry.audit_model || '?'}${c.reset}  ${c.dim}(${entry.audit_provider || '?'})${c.reset}`);
+      console.log(`  Risk        ${riskBadge(entry.risk_score ?? 0)}`);
+      console.log(`  Result      ${entry.result || '?'}`);
+      console.log(`  Files       ${entry.files_scanned || '?'}  ${c.dim}Duration: ${entry.audit_duration_ms ? (entry.audit_duration_ms / 1000).toFixed(1) + 's' : '?'}${c.reset}`);
+      console.log(`  Tokens      ${c.dim}in: ${entry.input_tokens || '?'}  out: ${entry.output_tokens || '?'}${c.reset}`);
+      console.log(`  File        ${c.dim}${entry._file}${c.reset}`);
+      console.log();
+      if (entry.findings && entry.findings.length > 0) {
+        console.log(sectionHeader(`Findings (${entry.findings.length})`));
+        console.log();
+        for (const f of entry.findings) {
+          const sc = severityColor(f.severity);
+          console.log(`  ${sc}┃${c.reset} ${sc}${(f.severity || '').toUpperCase().padEnd(8)}${c.reset}  ${c.bold}${f.title}${c.reset}`);
+          if (f.file) console.log(`  ${sc}┃${c.reset}           ${c.dim}${f.file}${f.line ? ':' + f.line : ''}${c.reset}`);
+          if (f.description) console.log(`  ${sc}┃${c.reset}           ${c.dim}${f.description.slice(0, 200)}${c.reset}`);
+          console.log();
+        }
+      } else {
+        console.log(`  ${c.green}No findings.${c.reset}`);
+        console.log();
+      }
+      return;
+    }
+    // history upload <n> — retry upload of a local report
+    if (subCmd === 'upload') {
+      const idx = parseInt(targets[1], 10) - 1;
+      if (isNaN(idx) || idx < 0 || idx >= entries.length) {
+        console.log(`  ${c.red}Invalid index.${c.reset} Use a number from 1 to ${entries.length}.`);
+        console.log(`  ${c.dim}Run ${c.cyan}agentaudit history${c.dim} to see the list.${c.reset}`);
+        return;
+      }
+      const entry = entries[idx];
+      const creds = loadCredentials();
+      if (!creds) {
+        console.log(`  ${c.red}Not logged in.${c.reset} Run ${c.cyan}agentaudit login${c.reset} first.`);
+        return;
+      }
+      process.stdout.write(`  Uploading ${c.bold}${entry.skill_slug}${c.reset} (${entry.audit_model || '?'})...`);
+      try {
+        const reportCopy = { ...entry };
+        delete reportCopy._file;
+        const res = await fetch(`${REGISTRY_URL}/api/reports`, {
+          method: 'POST',
+          headers: { 'Authorization': `Bearer ${creds.api_key}`, 'Content-Type': 'application/json' },
+          body: JSON.stringify(reportCopy),
+          signal: AbortSignal.timeout(30_000),
+        });
+        if (res.ok) {
+          const data = await res.json();
+          console.log(` ${c.green}done${c.reset} ${c.dim}(report #${data.report_id})${c.reset}`);
+          console.log(`  ${c.dim}${REGISTRY_URL}/packages/${entry.skill_slug}${c.reset}`);
+        } else {
+          const errBody = await res.text().catch(() => '');
+          console.log(` ${c.red}failed (HTTP ${res.status})${c.reset}`);
+          if (errBody) console.log(`  ${c.dim}${errBody.slice(0, 300)}${c.reset}`);
+        }
+      } catch (e) {
+        console.log(` ${c.red}failed: ${e.message}${c.reset}`);
+      }
+      console.log();
+      return;
+    }
+    // Default: list all entries
     if (jsonMode) {
       console.log(JSON.stringify(entries, null, 2));
       return;
@@ -5048,7 +5236,8 @@ async function main() {
     console.log(sectionHeader(`Local History (${entries.length})`));
     console.log();
-    for (const entry of entries) {
+    for (let i = 0; i < entries.length; i++) {
+      const entry = entries[i];
       const slug = entry.skill_slug || 'unknown';
       const risk = entry.risk_score ?? '?';
       const sev = entry.max_severity || 'none';
@@ -5056,10 +5245,13 @@ async function main() {
       const model = entry.audit_model || '?';
       const fc = entry.findings?.length || 0;
       const ts = entry._file?.slice(0, 10) || '';
-      console.log(`  ${sc}┃${c.reset} ${c.bold}${slug.padEnd(30)}${c.reset} ${riskBadge(risk)}  ${c.dim}${model}${c.reset}`);
-      console.log(`  ${sc}┃${c.reset} ${c.dim}${ts}  ${fc} findings  ${sev.toUpperCase()}${c.reset}`);
+      const num = `${c.dim}${String(i + 1).padStart(2)}.${c.reset}`;
+      console.log(`  ${num} ${sc}┃${c.reset} ${c.bold}${slug.padEnd(30)}${c.reset} ${riskBadge(risk)}  ${c.dim}${model}${c.reset}`);
+      console.log(`     ${sc}┃${c.reset} ${c.dim}${ts}  ${fc} findings  ${sev.toUpperCase()}${c.reset}`);
       console.log();
     }
+    console.log(`  ${c.dim}Tip: ${c.cyan}agentaudit history show <n>${c.dim} for details, ${c.cyan}history upload <n>${c.dim} to retry upload${c.reset}`);
+    console.log();
     return;
   }
   if (command === 'activity' || command === 'my') {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "agentaudit",
-  "version": "3.12.10",
+  "version": "3.12.12",
   "description": "Security scanner for AI agent packages — CLI + MCP server",
   "type": "module",
   "bin": {

package/prompts/audit-prompt.md CHANGED Viewed

@@ -237,6 +237,35 @@ A package that integrates multiple APIs requiring multiple credentials is a feat
 - Test files with deliberate vulnerabilities
 - Negation contexts ("never use eval"), install docs (`sudo apt`)
+### ❌ Opt-In Features with Safety Warnings ≠ Default Vulnerabilities
+If a feature must be EXPLICITLY enabled (via env var, config flag, CLI option) AND the naming/docs warn about risks, this is NOT a vulnerability in the default configuration.
+```
+❌ FALSE POSITIVE: MCP server has ENABLE_UNSAFE_SSE_TRANSPORT env var (default: unset/disabled) → NOT Critical (at most LOW/by_design)
+❌ FALSE POSITIVE: Helm chart has useLegacyRules: false with documented "not recommended for production" → NOT a finding (defaults are safe)
+❌ FALSE POSITIVE: Debug mode available via DEBUG=true env var → NOT a finding (operator must enable it)
+✅ TRUE POSITIVE: SSE transport enabled by default without authentication → IS a finding (default is insecure)
+✅ TRUE POSITIVE: Admin panel accessible without auth unless DISABLE_ADMIN=true → IS a finding (default is insecure)
+```
+**Key distinction:** "Vulnerable if operator explicitly opts in" (LOW/by_design) vs "Vulnerable by default" (HIGH/CRITICAL). Count the prerequisites — each explicit opt-in step REDUCES severity.
+### ❌ Secure Code Patterns ≠ Injection Vulnerabilities
+These code patterns are SECURE and must NOT be flagged:
+```
+❌ FALSE POSITIVE: execFileSync("kubectl", cmdArgs) where cmdArgs is an array → NOT shell injection (array args bypass shell)
+❌ FALSE POSITIVE: execFile(command, [arg1, arg2]) → NOT command injection (no shell interpolation)
+❌ FALSE POSITIVE: subprocess.run(["git", "clone", url]) → NOT injection (list form, no shell=True)
+✅ TRUE POSITIVE: exec(`kubectl ${userInput}`) → IS command injection (string concatenation with shell)
+✅ TRUE POSITIVE: execSync("git clone " + url) → IS command injection (string concatenation)
+```
+**Key distinction:** Array-based process spawning (`execFile`/`execFileSync` with args array, `subprocess.run` with list) does NOT use a shell and CANNOT be injected. Only string-based execution (`exec`, `execSync`, `shell=True`) is vulnerable.
+### ❌ Never Fabricate Code That Doesn't Exist
+If you cannot find the EXACT code pattern in the provided source files, do NOT report it. Specifically:
+- Do NOT invent HTTP headers (e.g., `Access-Control-Allow-Origin: *`) that are not in the source code
+- Do NOT assume a file contains code based on its name — VERIFY by reading it
+- Do NOT report line numbers you haven't verified against actual file content
+- If a vulnerability would exist in a dependency (e.g., Express defaults, MCP SDK) but NOT in the scanned package's code, it is NOT a finding for this package
 ## 3.3 Core-Functionality-Exemption (Hard Rule)
 If the pattern is in the Package Profile's "Expected Behaviors" list:
@@ -272,8 +301,9 @@ For each candidate finding, evaluate:
 - **None** (requires code modification) → likely NOT a finding
 ### Attack Complexity
-- **Low**: No special conditions, works out of the box
+- **Low**: No special conditions, works out of the box with default configuration
 - **High**: Requires specific config, race conditions, chained exploits → cap at MEDIUM unless catastrophic impact
+- **Opt-in required**: Vulnerability only exists if operator explicitly enables a feature (env var, config flag) → cap at LOW. Each required opt-in step reduces severity by one level.
 ### Privileges & Interaction Required
 - More prerequisites → lower realistic severity