agentaudit 3.12.1 → 3.12.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cli.mjs CHANGED
@@ -150,6 +150,36 @@ const USER_CRED_DIR = path.join(xdgConfig, 'agentaudit');
150
150
  const USER_CRED_FILE = path.join(USER_CRED_DIR, 'credentials.json');
151
151
  const SKILL_CRED_FILE = path.join(SKILL_DIR, 'config', 'credentials.json');
152
152
  const PROFILE_CACHE_FILE = path.join(USER_CRED_DIR, 'profile-cache.json');
153
+ const HISTORY_DIR = path.join(USER_CRED_DIR, 'history');
154
+
155
+ function saveHistory(report) {
156
+ try {
157
+ fs.mkdirSync(HISTORY_DIR, { recursive: true });
158
+ const slug = report.skill_slug || 'unknown';
159
+ const model = (report.audit_model || 'unknown').replace(/[^a-z0-9-]/gi, '-').slice(0, 30);
160
+ const ts = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19);
161
+ const filename = `${ts}_${slug}_${model}.json`;
162
+ fs.writeFileSync(path.join(HISTORY_DIR, filename), JSON.stringify(report, null, 2));
163
+ } catch {}
164
+ }
165
+
166
+ function loadHistory(limit = 20) {
167
+ try {
168
+ if (!fs.existsSync(HISTORY_DIR)) return [];
169
+ const files = fs.readdirSync(HISTORY_DIR)
170
+ .filter(f => f.endsWith('.json'))
171
+ .sort()
172
+ .reverse()
173
+ .slice(0, limit);
174
+ return files.map(f => {
175
+ try {
176
+ const data = JSON.parse(fs.readFileSync(path.join(HISTORY_DIR, f), 'utf8'));
177
+ data._file = f;
178
+ return data;
179
+ } catch { return null; }
180
+ }).filter(Boolean);
181
+ } catch { return []; }
182
+ }
153
183
 
154
184
  function loadCredentials() {
155
185
  for (const f of [SKILL_CRED_FILE, USER_CRED_FILE]) {
@@ -222,6 +252,37 @@ function resolveProvider() {
222
252
  return LLM_PROVIDERS.find(p => process.env[p.key]) || null;
223
253
  }
224
254
 
255
+ function resolveModel(modelName) {
256
+ // model with '/' → OpenRouter
257
+ if (modelName.includes('/')) {
258
+ const p = LLM_PROVIDERS.find(p => p.provider === 'openrouter' && process.env[p.key]);
259
+ if (p) return { ...p, model: modelName };
260
+ return null;
261
+ }
262
+ // Known prefix → native provider
263
+ const prefixes = [
264
+ ['claude', 'anthropic'], ['gemini', 'google'], ['gpt', 'openai'],
265
+ ['deepseek', 'deepseek'], ['mistral', 'mistral'], ['grok', 'xai'], ['glm', 'zhipu'],
266
+ ];
267
+ for (const [prefix, prov] of prefixes) {
268
+ if (modelName.toLowerCase().startsWith(prefix)) {
269
+ const p = LLM_PROVIDERS.find(p => p.provider === prov && process.env[p.key]);
270
+ if (p) return { ...p, model: modelName };
271
+ }
272
+ }
273
+ // Check PROVIDER_MODELS for exact match
274
+ for (const [prov, models] of Object.entries(PROVIDER_MODELS)) {
275
+ if (models.some(m => m.value === modelName)) {
276
+ const p = LLM_PROVIDERS.find(p => p.provider === prov && process.env[p.key]);
277
+ if (p) return { ...p, model: modelName };
278
+ }
279
+ }
280
+ // Last resort: OpenRouter
281
+ const or = LLM_PROVIDERS.find(p => p.provider === 'openrouter' && process.env[p.key]);
282
+ if (or) return { ...or, model: modelName };
283
+ return null;
284
+ }
285
+
225
286
  function saveCredentials(data) {
226
287
  const json = JSON.stringify(data, null, 2);
227
288
  fs.mkdirSync(USER_CRED_DIR, { recursive: true });
@@ -992,7 +1053,24 @@ function detectPackageInfo(repoPath, files) {
992
1053
  for (const ef of entryFiles) {
993
1054
  if (files.some(f => f.path === ef)) { info.entrypoint = ef; break; }
994
1055
  }
995
-
1056
+
1057
+ // Extract package version from manifest files
1058
+ info.version = null;
1059
+ const versionSources = [
1060
+ { file: 'package.json', extract: c => { try { return JSON.parse(c).version; } catch { return null; } } },
1061
+ { file: 'pyproject.toml', extract: c => { const m = c.match(/^\s*version\s*=\s*["']([^"']+)["']/m); return m?.[1] || null; } },
1062
+ { file: 'setup.py', extract: c => { const m = c.match(/version\s*=\s*["']([^"']+)["']/); return m?.[1] || null; } },
1063
+ { file: 'setup.cfg', extract: c => { const m = c.match(/^\s*version\s*=\s*(.+)$/m); return m?.[1]?.trim() || null; } },
1064
+ { file: 'Cargo.toml', extract: c => { const m = c.match(/^\s*version\s*=\s*["']([^"']+)["']/m); return m?.[1] || null; } },
1065
+ ];
1066
+ for (const vs of versionSources) {
1067
+ const f = files.find(f => f.path === vs.file || f.path.endsWith('/' + vs.file));
1068
+ if (f) {
1069
+ const v = vs.extract(f.content);
1070
+ if (v) { info.version = v; break; }
1071
+ }
1072
+ }
1073
+
996
1074
  return info;
997
1075
  }
998
1076
 
@@ -2509,6 +2587,219 @@ function loadAuditPrompt() {
2509
2587
  return null;
2510
2588
  }
2511
2589
 
2590
+ async function callLlm(llmConfig, systemPrompt, userMessage) {
2591
+ const apiKey = process.env[llmConfig.key];
2592
+ if (!apiKey) return { error: `Missing API key: ${llmConfig.key}` };
2593
+ const start = Date.now();
2594
+ let _text = '';
2595
+ try {
2596
+ let data;
2597
+ if (llmConfig.type === 'anthropic') {
2598
+ const res = await fetch(llmConfig.url, {
2599
+ method: 'POST',
2600
+ headers: { 'x-api-key': apiKey, 'anthropic-version': '2023-06-01', 'content-type': 'application/json' },
2601
+ body: JSON.stringify({ model: llmConfig.model, max_tokens: 8192, system: systemPrompt, messages: [{ role: 'user', content: userMessage }] }),
2602
+ signal: AbortSignal.timeout(120_000),
2603
+ });
2604
+ data = await res.json();
2605
+ if (data.error) {
2606
+ const friendly = formatApiError(data.error, llmConfig.provider, res.status);
2607
+ return { error: friendly?.text || data.error.message || JSON.stringify(data.error), hint: friendly?.hint, duration: Date.now() - start };
2608
+ }
2609
+ _text = data.content?.[0]?.text || '';
2610
+ const report = extractJSON(_text);
2611
+ if (report) {
2612
+ report.audit_model = data.model || llmConfig.model;
2613
+ report.audit_provider = llmConfig.provider;
2614
+ if (data.id) report.provider_msg_id = data.id;
2615
+ if (data.usage) { report.input_tokens = data.usage.input_tokens; report.output_tokens = data.usage.output_tokens; }
2616
+ }
2617
+ return { report, text: _text, duration: Date.now() - start };
2618
+ } else if (llmConfig.type === 'gemini') {
2619
+ const res = await fetch(`${llmConfig.url}/${llmConfig.model}:generateContent?key=${apiKey}`, {
2620
+ method: 'POST',
2621
+ headers: { 'Content-Type': 'application/json' },
2622
+ body: JSON.stringify({
2623
+ systemInstruction: { parts: [{ text: systemPrompt }] },
2624
+ contents: [{ role: 'user', parts: [{ text: userMessage }] }],
2625
+ generationConfig: { maxOutputTokens: 8192 },
2626
+ }),
2627
+ signal: AbortSignal.timeout(120_000),
2628
+ });
2629
+ data = await res.json();
2630
+ if (data.error) {
2631
+ const friendly = formatApiError(data.error, llmConfig.provider, res.status);
2632
+ return { error: friendly?.text || data.error.message || JSON.stringify(data.error), hint: friendly?.hint, duration: Date.now() - start };
2633
+ }
2634
+ _text = data.candidates?.[0]?.content?.parts?.[0]?.text || '';
2635
+ const report = extractJSON(_text);
2636
+ if (report) {
2637
+ report.audit_model = data.modelVersion || llmConfig.model;
2638
+ report.audit_provider = llmConfig.provider;
2639
+ if (data.usageMetadata) { report.input_tokens = data.usageMetadata.promptTokenCount; report.output_tokens = data.usageMetadata.candidatesTokenCount; }
2640
+ }
2641
+ return { report, text: _text, duration: Date.now() - start };
2642
+ } else {
2643
+ const headers = { 'Authorization': `Bearer ${apiKey}`, 'Content-Type': 'application/json' };
2644
+ if (llmConfig.provider === 'openrouter') { headers['HTTP-Referer'] = 'https://agentaudit.dev'; headers['X-Title'] = 'AgentAudit CLI'; }
2645
+ const res = await fetch(llmConfig.url, {
2646
+ method: 'POST',
2647
+ headers,
2648
+ body: JSON.stringify({ model: llmConfig.model, max_tokens: 8192, messages: [{ role: 'system', content: systemPrompt }, { role: 'user', content: userMessage }] }),
2649
+ signal: AbortSignal.timeout(120_000),
2650
+ });
2651
+ data = await res.json();
2652
+ if (data.error) {
2653
+ const friendly = formatApiError(data.error, llmConfig.provider, res.status);
2654
+ return { error: friendly?.text || data.error.message || JSON.stringify(data.error), hint: friendly?.hint, duration: Date.now() - start };
2655
+ }
2656
+ _text = data.choices?.[0]?.message?.content || '';
2657
+ const report = extractJSON(_text);
2658
+ if (report) {
2659
+ report.audit_model = data.model || llmConfig.model;
2660
+ report.audit_provider = llmConfig.provider;
2661
+ if (data.id) report.provider_msg_id = data.id;
2662
+ if (data.system_fingerprint) report.provider_fingerprint = data.system_fingerprint;
2663
+ if (data.usage) { report.input_tokens = data.usage.prompt_tokens; report.output_tokens = data.usage.completion_tokens; }
2664
+ }
2665
+ return { report, text: _text, duration: Date.now() - start };
2666
+ }
2667
+ } catch (err) {
2668
+ const dur = Date.now() - start;
2669
+ if (err.name === 'TimeoutError' || err.message?.includes('timeout')) return { error: 'Request timed out (120s)', hint: 'Try again or use a faster model', duration: dur };
2670
+ if (err.code === 'ENOTFOUND' || err.code === 'ECONNREFUSED' || err.message?.includes('fetch failed')) return { error: `Network error: could not reach ${llmConfig.provider}`, hint: 'Check your internet connection', duration: dur };
2671
+ return { error: err.message, duration: dur };
2672
+ }
2673
+ }
2674
+
2675
+ // ── Deterministic post-processing for LLM reports ────────────────────────
2676
+ // Fills in missing fields that LLMs often omit, using deterministic lookups
2677
+
2678
+ const PATTERN_CWE_MAP = {
2679
+ CMD_INJECT: 'CWE-78', CRED_THEFT: 'CWE-522', DATA_EXFIL: 'CWE-200',
2680
+ DESTRUCT: 'CWE-912', OBF: 'CWE-506', SANDBOX_ESC: 'CWE-693',
2681
+ SUPPLY_CHAIN: 'CWE-1357', SOCIAL_ENG: 'CWE-451', PRIV_ESC: 'CWE-269',
2682
+ INFO_LEAK: 'CWE-200', CRYPTO_WEAK: 'CWE-327', DESER: 'CWE-502',
2683
+ PATH_TRAV: 'CWE-22', SEC_BYPASS: 'CWE-693', PERSIST: 'CWE-912',
2684
+ AI_PROMPT: 'CWE-1426', MCP_POISON: 'CWE-1426', MCP_INJECT: 'CWE-94',
2685
+ MCP_TRAVERSAL: 'CWE-22', MCP_SUPPLY: 'CWE-1357', MCP_PERM: 'CWE-269',
2686
+ WORM: 'CWE-912', CICD: 'CWE-912', CORR: 'CWE-829', MANUAL: 'CWE-693',
2687
+ };
2688
+
2689
+ const SEVERITY_IMPACT = { critical: -25, high: -15, medium: -5, low: -1 };
2690
+
2691
+ const REMEDIATION_TEMPLATES = {
2692
+ CMD_INJECT: 'Validate and sanitize input; use allowlists or parameterized execution instead of shell strings',
2693
+ CRED_THEFT: 'Remove hardcoded credentials; use environment variables or a secrets manager',
2694
+ DATA_EXFIL: 'Remove or document the external data transmission; ensure user consent',
2695
+ DESTRUCT: 'Add confirmation prompts and safeguards before destructive operations',
2696
+ OBF: 'Replace obfuscated code with readable equivalents; document the purpose',
2697
+ SANDBOX_ESC: 'Restrict file and process access to configured boundaries',
2698
+ SUPPLY_CHAIN: 'Pin dependency versions; verify package integrity',
2699
+ SOCIAL_ENG: 'Align documentation with actual code behavior',
2700
+ PRIV_ESC: 'Apply principle of least privilege; remove unnecessary elevated permissions',
2701
+ INFO_LEAK: 'Restrict exposed information to what is necessary for operation',
2702
+ CRYPTO_WEAK: 'Use modern cryptographic algorithms (AES-256, SHA-256+)',
2703
+ DESER: 'Use safe deserialization (e.g. yaml.safe_load, JSON) instead of unsafe loaders',
2704
+ PATH_TRAV: 'Sanitize file paths; reject inputs containing .. or absolute paths',
2705
+ SEC_BYPASS: 'Do not disable security controls; use proper certificate validation',
2706
+ PERSIST: 'Remove persistence mechanisms or require explicit user opt-in',
2707
+ AI_PROMPT: 'Remove hidden instructions; ensure tool descriptions are transparent',
2708
+ MCP_POISON: 'Remove injected instructions from tool descriptions and schemas',
2709
+ MCP_INJECT: 'Sanitize tool arguments and descriptions; prevent prompt injection',
2710
+ MCP_TRAVERSAL: 'Validate and sandbox file paths in MCP tool handlers',
2711
+ MCP_SUPPLY: 'Pin MCP package versions; verify transport configurations',
2712
+ MCP_PERM: 'Restrict permissions to minimum required scope; remove wildcard grants',
2713
+ };
2714
+
2715
+ function enrichFindings(report, files, pkgInfo) {
2716
+ if (!report || !report.findings) return report;
2717
+
2718
+ // Ensure package_version
2719
+ if (!report.package_version || report.package_version === 'unknown') {
2720
+ report.package_version = pkgInfo.version || 'unknown';
2721
+ }
2722
+
2723
+ // Ensure max_severity
2724
+ const severities = ['critical', 'high', 'medium', 'low'];
2725
+ let maxSev = 'none';
2726
+ for (const f of report.findings) {
2727
+ const idx = severities.indexOf((f.severity || '').toLowerCase());
2728
+ if (idx !== -1 && idx < severities.indexOf(maxSev === 'none' ? 'low' : maxSev)) {
2729
+ maxSev = severities[idx];
2730
+ }
2731
+ }
2732
+ // Only override if not set or wrong
2733
+ if (!report.max_severity || report.max_severity === 'none') {
2734
+ report.max_severity = report.findings.length > 0 ? maxSev : 'none';
2735
+ }
2736
+
2737
+ for (const finding of report.findings) {
2738
+ // 1. Fill cwe_id from pattern_id lookup
2739
+ if (!finding.cwe_id || finding.cwe_id === '') {
2740
+ const prefix = (finding.pattern_id || '').replace(/_\d+$/, '');
2741
+ finding.cwe_id = PATTERN_CWE_MAP[prefix] || 'CWE-693';
2742
+ }
2743
+
2744
+ // 2. Fill content (code snippet) from files array
2745
+ if ((!finding.content || finding.content === '' || finding.content === '...') && finding.file && finding.line) {
2746
+ const matchFile = files.find(f => f.path === finding.file || f.path.endsWith('/' + finding.file));
2747
+ if (matchFile) {
2748
+ const lines = matchFile.content.split('\n');
2749
+ const lineIdx = finding.line - 1;
2750
+ if (lineIdx >= 0 && lineIdx < lines.length) {
2751
+ // Extract 1-3 lines around the target
2752
+ const start = Math.max(0, lineIdx - 1);
2753
+ const end = Math.min(lines.length, lineIdx + 2);
2754
+ finding.content = lines.slice(start, end).map(l => l.trimEnd()).join('\n').trim();
2755
+ }
2756
+ }
2757
+ }
2758
+
2759
+ // 3. Fill remediation from template
2760
+ if (!finding.remediation || finding.remediation === '' || finding.remediation === '...') {
2761
+ const prefix = (finding.pattern_id || '').replace(/_\d+$/, '');
2762
+ finding.remediation = REMEDIATION_TEMPLATES[prefix] || 'Review and address the identified security concern';
2763
+ }
2764
+
2765
+ // 4. Ensure score_impact is set correctly
2766
+ if (finding.score_impact === undefined || finding.score_impact === null) {
2767
+ if (finding.by_design) {
2768
+ finding.score_impact = 0;
2769
+ } else {
2770
+ finding.score_impact = SEVERITY_IMPACT[(finding.severity || '').toLowerCase()] || -5;
2771
+ }
2772
+ }
2773
+
2774
+ // 5. Ensure confidence has valid value
2775
+ if (!['high', 'medium', 'low'].includes(finding.confidence)) {
2776
+ finding.confidence = 'medium';
2777
+ }
2778
+
2779
+ // 6. Ensure by_design is boolean
2780
+ if (typeof finding.by_design !== 'boolean') {
2781
+ finding.by_design = false;
2782
+ }
2783
+ }
2784
+
2785
+ // Recalculate risk_score from findings
2786
+ const computedRisk = report.findings.reduce((sum, f) => {
2787
+ if (f.by_design) return sum;
2788
+ return sum + Math.abs(f.score_impact || 0);
2789
+ }, 0);
2790
+ report.risk_score = Math.min(100, computedRisk);
2791
+
2792
+ // Ensure result matches risk_score
2793
+ if (report.risk_score <= 25) report.result = 'safe';
2794
+ else if (report.risk_score <= 50) report.result = 'caution';
2795
+ else report.result = 'unsafe';
2796
+
2797
+ // Ensure findings_count
2798
+ report.findings_count = report.findings.length;
2799
+
2800
+ return report;
2801
+ }
2802
+
2512
2803
  async function auditRepo(url) {
2513
2804
  const start = Date.now();
2514
2805
  const slug = slugFromUrl(url);
@@ -2547,286 +2838,315 @@ async function auditRepo(url) {
2547
2838
  }
2548
2839
  console.log(` ${c.green}done${c.reset}`);
2549
2840
 
2550
- // Step 4: LLM Analysis
2551
- // Resolve provider: preferred_provider from config → first match fallback
2552
- const activeLlm = resolveProvider();
2553
- const llmApiKey = activeLlm ? process.env[activeLlm.key] : null;
2554
- const activeProvider = activeLlm ? activeLlm.name : null;
2555
-
2556
- // Model override: --model flag > AGENTAUDIT_MODEL env > credentials.json > provider default
2557
- const modelArgIdx = process.argv.indexOf('--model');
2558
- const modelFlag = modelArgIdx !== -1 ? process.argv[modelArgIdx + 1] : null;
2559
- const modelEnv = process.env.AGENTAUDIT_MODEL;
2560
- const modelConfig = loadLlmConfig()?.llm_model;
2561
- const modelOverride = modelFlag || modelEnv || modelConfig || null;
2562
- if (activeLlm && modelOverride) {
2563
- activeLlm.model = modelOverride;
2564
- }
2565
-
2566
- if (!activeLlm) {
2567
- // No LLM API key — compact explanation
2568
- console.log();
2569
- console.log(` ${c.yellow}No LLM API key found.${c.reset} The ${c.bold}audit${c.reset} command needs an LLM to analyze code.`);
2570
- console.log();
2571
- console.log(` ${c.bold}Set an API key${c.reset} (e.g. ${c.cyan}export OPENROUTER_API_KEY=sk-or-...${c.reset})`);
2572
- console.log(` ${c.dim}Run "agentaudit model" to configure provider + model interactively${c.reset}`);
2573
- console.log();
2574
- console.log(` ${c.bold}Or export for manual review:${c.reset} ${c.cyan}agentaudit audit ${url} --export${c.reset}`);
2575
- console.log(` ${c.bold}Or use as MCP server${c.reset} in Cursor/Claude ${c.dim}(no extra API key needed)${c.reset}`);
2576
- console.log(` ${c.dim}{ "agentaudit": { "command": "npx", "args": ["-y", "agentaudit"] } }${c.reset}`);
2577
- console.log();
2578
-
2579
- // Check if --export flag
2580
- if (process.argv.includes('--export')) {
2581
- const exportPath = path.join(process.cwd(), `audit-${slug}.md`);
2582
- const exportContent = [
2583
- `# Security Audit: ${slug}`,
2584
- `**Source:** ${url}`,
2585
- `**Files:** ${files.length}`,
2586
- ``,
2587
- `## Audit Instructions`,
2588
- ``,
2589
- auditPrompt || '(audit prompt not found)',
2590
- ``,
2591
- `## Report Format`,
2592
- ``,
2593
- `After analysis, produce a JSON report:`,
2594
- '```json',
2595
- `{ "skill_slug": "${slug}", "source_url": "${url}", "risk_score": 0, "result": "safe", "findings": [] }`,
2596
- '```',
2597
- ``,
2598
- `## Source Code`,
2599
- ``,
2600
- codeBlock,
2601
- ].join('\n');
2602
- fs.writeFileSync(exportPath, exportContent);
2603
- console.log(` ${icons.safe} Exported to ${c.bold}${exportPath}${c.reset}`);
2604
- console.log(` ${c.dim}Paste this into any LLM (Claude, ChatGPT, etc.) for analysis${c.reset}`);
2605
- }
2606
-
2607
- // Cleanup
2608
- try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
2609
- return null;
2610
- }
2611
-
2612
- // We have an API key — run LLM audit
2613
- const modelLabel = modelOverride ? `${activeProvider} → ${activeLlm.model}` : activeProvider;
2614
- process.stdout.write(` ${stepProgress(4, 4)} Running LLM analysis ${c.dim}(${modelLabel})${c.reset}...`);
2841
+ // Step 4: Provenance + type detection (needs repoPath on disk)
2842
+ let commitSha = '';
2843
+ try { commitSha = execSync('git rev-parse HEAD', { cwd: repoPath, encoding: 'utf8' }).trim(); } catch {}
2844
+ const sourceHash = crypto.createHash('sha256').update(
2845
+ files.slice().sort((a, b) => a.path.localeCompare(b.path))
2846
+ .map(f => f.path + '\n' + f.content).join('\n')
2847
+ ).digest('hex');
2848
+ const pkgInfo = detectPackageInfo(repoPath, files);
2849
+ const KNOWN_MCP_LIBS = new Set(['fastmcp', 'jlowin-fastmcp', 'mcp-go', 'fastapi-mcp', 'fastapi_mcp', 'mcp-use', 'mcp-agent']);
2850
+ const KNOWN_CLI = new Set(['mcp-cli', 'mcp-scan', 'inspector']);
2851
+ let detectedType = pkgInfo.type === 'unknown' ? 'other' : pkgInfo.type;
2852
+ if (KNOWN_MCP_LIBS.has(slug)) detectedType = 'library';
2853
+ if (KNOWN_CLI.has(slug)) detectedType = 'cli-tool';
2615
2854
 
2855
+ // Cleanup repo (files in memory, provenance captured)
2856
+ try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
2857
+
2858
+ // Build prompts
2616
2859
  const systemPrompt = auditPrompt || 'You are a security auditor. Analyze the code and report findings as JSON.';
2860
+ const detectedVersion = pkgInfo.version || 'unknown';
2617
2861
  const userMessage = [
2618
2862
  `Audit this package: **${slug}** (${url})`,
2863
+ `Package version detected: ${detectedVersion}`,
2864
+ ``,
2865
+ `Respond with ONLY a valid JSON object. No markdown fences, no explanation, no text before or after.`,
2866
+ ``,
2867
+ `Required top-level fields: skill_slug, source_url, package_type, package_version, risk_score, max_severity, result, findings_count, findings`,
2868
+ `Required finding fields (ALL mandatory): pattern_id, cwe_id, severity, title, description, file, line, content, remediation, confidence, by_design, score_impact`,
2619
2869
  ``,
2620
- `After analysis, respond with ONLY a valid JSON object. No markdown fences, no explanation, no text before or after. Just the raw JSON:`,
2621
- `{ "skill_slug": "${slug}", "source_url": "${url}", "package_type": "<mcp-server|agent-skill|library|cli-tool|other>",`,
2622
- ` "risk_score": <0-100>, "result": "<safe|caution|unsafe>", "max_severity": "<none|low|medium|high|critical>",`,
2623
- ` "findings_count": <n>, "findings": [{ "pattern_id": "CMD_INJECT_001", "title": "...", "severity": "...", "category": "...",`,
2624
- ` "cwe_id": "CWE-78", "description": "...", "file": "...", "line": <n>, "content": "...", "remediation": "...",`,
2625
- ` "confidence": "high|medium|low", "by_design": false, "score_impact": -15 }] }`,
2870
+ `A finding missing cwe_id, content, or remediation is INVALID do not emit it.`,
2626
2871
  ``,
2627
2872
  `## Source Code`,
2628
2873
  codeBlock,
2629
2874
  ].join('\n');
2630
2875
 
2631
- let report = null;
2632
- let _lastLlmText = '';
2876
+ // Helper: add provenance to a report
2877
+ const enrichReport = (report, duration) => {
2878
+ report.skill_slug = slug;
2879
+ report.package_type = detectedType;
2880
+ report.audit_duration_ms = duration || (Date.now() - start);
2881
+ report.files_scanned = files.length;
2882
+ if (commitSha) report.commit_sha = commitSha;
2883
+ report.source_hash = sourceHash;
2884
+ };
2633
2885
 
2634
- try {
2635
- let data;
2636
- if (activeLlm.type === 'anthropic') {
2637
- // Anthropic Messages API (unique format)
2638
- const res = await fetch(activeLlm.url, {
2886
+ // Helper: upload one report
2887
+ const uploadReport = async (report, creds) => {
2888
+ if (!creds) return;
2889
+ process.stdout.write(` Uploading report${report.audit_model ? ` (${report.audit_model})` : ''}...`);
2890
+ try {
2891
+ const res = await fetch(`${REGISTRY_URL}/api/reports`, {
2639
2892
  method: 'POST',
2640
- headers: {
2641
- 'x-api-key': llmApiKey,
2642
- 'anthropic-version': '2023-06-01',
2643
- 'content-type': 'application/json',
2644
- },
2645
- body: JSON.stringify({
2646
- model: activeLlm.model,
2647
- max_tokens: 8192,
2648
- system: systemPrompt,
2649
- messages: [{ role: 'user', content: userMessage }],
2650
- }),
2651
- signal: AbortSignal.timeout(120_000),
2893
+ headers: { 'Authorization': `Bearer ${creds.api_key}`, 'Content-Type': 'application/json' },
2894
+ body: JSON.stringify(report),
2895
+ signal: AbortSignal.timeout(15_000),
2652
2896
  });
2653
- data = await res.json();
2654
- if (data.error) {
2655
- console.log(` ${c.red}failed${c.reset}`);
2656
- const friendly = formatApiError(data.error, activeLlm.provider, res.status);
2657
- if (friendly) {
2658
- console.log(` ${c.red}${friendly.text}${c.reset}`);
2659
- console.log(` ${c.dim}${friendly.hint}${c.reset}`);
2660
- } else {
2661
- console.log(` ${c.red}API error: ${data.error.message || JSON.stringify(data.error)}${c.reset}`);
2662
- }
2663
- try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
2664
- return null;
2897
+ if (res.ok) {
2898
+ console.log(` ${c.green}done${c.reset}`);
2899
+ } else {
2900
+ let errBody = ''; try { errBody = await res.text(); } catch {}
2901
+ console.log(` ${c.yellow}failed (HTTP ${res.status})${c.reset}`);
2902
+ if (errBody && process.argv.includes('--debug')) console.log(` ${c.dim}Server: ${errBody.slice(0, 300)}${c.reset}`);
2665
2903
  }
2666
- _lastLlmText = data.content?.[0]?.text || '';
2667
- report = extractJSON(_lastLlmText);
2668
- if (report) {
2669
- report.audit_model = data.model || activeLlm.model;
2670
- report.audit_provider = activeLlm.provider;
2671
- if (data.id) report.provider_msg_id = data.id;
2672
- if (data.usage) {
2673
- report.input_tokens = data.usage.input_tokens;
2674
- report.output_tokens = data.usage.output_tokens;
2675
- }
2904
+ } catch { console.log(` ${c.yellow}failed${c.reset}`); }
2905
+ };
2906
+
2907
+ // Step 5: Resolve models
2908
+ const modelsArgIdx = process.argv.indexOf('--models');
2909
+ const modelsFlag = modelsArgIdx !== -1 ? process.argv[modelsArgIdx + 1] : null;
2910
+ const modelNames = modelsFlag ? modelsFlag.split(',').map(m => m.trim()).filter(Boolean) : [];
2911
+ const isMultiModel = modelNames.length > 1;
2912
+
2913
+ // ── Multi-Model Path ─────────────────────────────────────
2914
+ if (isMultiModel) {
2915
+ const resolvedModels = [];
2916
+ const failedModels = [];
2917
+ for (const name of modelNames) {
2918
+ const config = resolveModel(name);
2919
+ if (!config) { failedModels.push(name); continue; }
2920
+ resolvedModels.push({ name, config });
2921
+ }
2922
+
2923
+ if (resolvedModels.length === 0) {
2924
+ console.log();
2925
+ console.log(` ${c.red}No API keys available for requested models${c.reset}`);
2926
+ for (const name of failedModels) console.log(` ${c.dim}${name}: no matching API key${c.reset}`);
2927
+ console.log(` ${c.dim}Run "agentaudit model" to configure providers${c.reset}`);
2928
+ return null;
2929
+ }
2930
+
2931
+ // Progress
2932
+ const totalSteps = resolvedModels.length;
2933
+ console.log(` ${stepProgress(4, 4)} Running LLM analysis ${c.dim}(${totalSteps} models in parallel)${c.reset}`);
2934
+ if (failedModels.length > 0) {
2935
+ for (const name of failedModels) console.log(` ${c.yellow}⚠${c.reset} ${name.padEnd(30)} ${c.dim}skipped (no API key)${c.reset}`);
2936
+ }
2937
+
2938
+ // Parallel LLM calls
2939
+ const results = await Promise.allSettled(
2940
+ resolvedModels.map(async ({ name, config }) => {
2941
+ const result = await callLlm(config, systemPrompt, userMessage);
2942
+ return { name, ...result };
2943
+ })
2944
+ );
2945
+
2946
+ // Process results
2947
+ const reports = [];
2948
+ for (let i = 0; i < results.length; i++) {
2949
+ const name = resolvedModels[i].name;
2950
+ const r = results[i];
2951
+ if (r.status === 'rejected') {
2952
+ console.log(` ${c.red}✗${c.reset} ${name.padEnd(30)} ${c.red}error${c.reset}`);
2953
+ continue;
2676
2954
  }
2677
- } else if (activeLlm.type === 'gemini') {
2678
- // Google Gemini API (unique format)
2679
- const res = await fetch(`${activeLlm.url}/${activeLlm.model}:generateContent?key=${llmApiKey}`, {
2680
- method: 'POST',
2681
- headers: { 'Content-Type': 'application/json' },
2682
- body: JSON.stringify({
2683
- systemInstruction: { parts: [{ text: systemPrompt }] },
2684
- contents: [{ role: 'user', parts: [{ text: userMessage }] }],
2685
- generationConfig: { maxOutputTokens: 8192 },
2686
- }),
2687
- signal: AbortSignal.timeout(120_000),
2688
- });
2689
- data = await res.json();
2690
- if (data.error) {
2691
- console.log(` ${c.red}failed${c.reset}`);
2692
- const friendly = formatApiError(data.error, activeLlm.provider, res.status);
2693
- if (friendly) {
2694
- console.log(` ${c.red}${friendly.text}${c.reset}`);
2695
- console.log(` ${c.dim}${friendly.hint}${c.reset}`);
2696
- } else {
2697
- console.log(` ${c.red}API error: ${data.error.message || JSON.stringify(data.error)}${c.reset}`);
2955
+ const { report, text, error, hint, duration } = r.value;
2956
+ if (error) {
2957
+ console.log(` ${c.red}✗${c.reset} ${name.padEnd(30)} ${c.red}${error}${c.reset}`);
2958
+ if (hint) console.log(` ${c.dim}${hint}${c.reset}`);
2959
+ continue;
2960
+ }
2961
+ if (!report) {
2962
+ console.log(` ${c.yellow}✗${c.reset} ${name.padEnd(30)} ${c.yellow}JSON parse failed${c.reset}`);
2963
+ if (process.argv.includes('--debug') && text) {
2964
+ console.log(` ${c.dim}${text.slice(0, 200)}...${c.reset}`);
2698
2965
  }
2699
- try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
2700
- return null;
2966
+ continue;
2701
2967
  }
2702
- _lastLlmText = data.candidates?.[0]?.content?.parts?.[0]?.text || '';
2703
- report = extractJSON(_lastLlmText);
2704
- if (report) {
2705
- report.audit_model = data.modelVersion || activeLlm.model;
2706
- report.audit_provider = activeLlm.provider;
2707
- if (data.usageMetadata) {
2708
- report.input_tokens = data.usageMetadata.promptTokenCount;
2709
- report.output_tokens = data.usageMetadata.candidatesTokenCount;
2968
+ const durSec = Math.round((duration || 0) / 1000);
2969
+ console.log(` ${c.green}✓${c.reset} ${name.padEnd(30)} ${c.green}done${c.reset} ${c.dim}(${durSec}s)${c.reset}`);
2970
+ enrichReport(report, duration);
2971
+ enrichFindings(report, files, pkgInfo);
2972
+ saveHistory(report);
2973
+ reports.push({ name, report });
2974
+ }
2975
+
2976
+ if (reports.length === 0) {
2977
+ console.log();
2978
+ console.log(` ${c.red}No models returned valid results${c.reset}`);
2979
+ return null;
2980
+ }
2981
+
2982
+ // Display per-model results
2983
+ console.log();
2984
+ for (const { name, report } of reports) {
2985
+ console.log(sectionHeader(name));
2986
+ console.log(` ${riskBadge(report.risk_score || 0)}`);
2987
+ const fc = report.findings?.length || 0;
2988
+ if (fc > 0) {
2989
+ const counts = {};
2990
+ for (const f of report.findings) { const s = (f.severity || 'info').toLowerCase(); counts[s] = (counts[s] || 0) + 1; }
2991
+ const parts = [];
2992
+ for (const sev of ['critical', 'high', 'medium', 'low', 'info']) { if (counts[sev]) parts.push(`${counts[sev]} ${sev}`); }
2993
+ console.log(` ${c.dim}${fc} findings: ${parts.join(', ')}${c.reset}`);
2994
+ } else {
2995
+ console.log(` ${c.green}No findings${c.reset}`);
2996
+ }
2997
+ console.log();
2998
+ }
2999
+
3000
+ // Consensus comparison
3001
+ if (reports.length > 1) {
3002
+ console.log(sectionHeader('Consensus'));
3003
+
3004
+ // Risk range
3005
+ const risks = reports.map(r => r.report.risk_score || 0);
3006
+ const minRisk = Math.min(...risks);
3007
+ const maxRisk = Math.max(...risks);
3008
+ const avgRisk = Math.round(risks.reduce((a, b) => a + b, 0) / risks.length);
3009
+ console.log(` Risk: ${riskBadge(avgRisk)} ${c.dim}(range ${minRisk}–${maxRisk})${c.reset}`);
3010
+ console.log();
3011
+
3012
+ // Severity agreement
3013
+ const severities = reports.map(r => (r.report.max_severity || 'none').toLowerCase());
3014
+ const allSameSev = severities.every(s => s === severities[0]);
3015
+ if (allSameSev) {
3016
+ console.log(` ${c.green}${reports.length}/${reports.length} models agree:${c.reset} ${severities[0].toUpperCase()}`);
3017
+ } else {
3018
+ console.log(` ${c.yellow}Models disagree on severity:${c.reset}`);
3019
+ for (const { name, report } of reports) {
3020
+ const sev = (report.max_severity || 'none').toUpperCase();
3021
+ const sc = severityColor(report.max_severity);
3022
+ console.log(` ${sc}${sev.padEnd(10)}${c.reset} ${c.dim}${name}${c.reset}`);
2710
3023
  }
2711
3024
  }
2712
- } else {
2713
- // OpenAI-compatible API (OpenAI, Mistral, Groq, OpenRouter, etc.)
2714
- const headers = {
2715
- 'Authorization': `Bearer ${llmApiKey}`,
2716
- 'Content-Type': 'application/json',
2717
- };
2718
- // OpenRouter requires additional headers
2719
- if (activeLlm.provider === 'openrouter') {
2720
- headers['HTTP-Referer'] = 'https://agentaudit.dev';
2721
- headers['X-Title'] = 'AgentAudit CLI';
3025
+ console.log();
3026
+
3027
+ // Finding intersection (match by normalized title)
3028
+ const findingsByTitle = new Map();
3029
+ for (const { name, report } of reports) {
3030
+ for (const f of (report.findings || [])) {
3031
+ const key = (f.title || '').toLowerCase().replace(/[^a-z0-9]+/g, ' ').trim();
3032
+ if (!key) continue;
3033
+ if (!findingsByTitle.has(key)) findingsByTitle.set(key, { title: f.title, severity: f.severity, models: [] });
3034
+ findingsByTitle.get(key).models.push(name);
3035
+ }
2722
3036
  }
2723
- const res = await fetch(activeLlm.url, {
2724
- method: 'POST',
2725
- headers,
2726
- body: JSON.stringify({
2727
- model: activeLlm.model,
2728
- max_tokens: 8192,
2729
- messages: [
2730
- { role: 'system', content: systemPrompt },
2731
- { role: 'user', content: userMessage },
2732
- ],
2733
- }),
2734
- signal: AbortSignal.timeout(120_000),
2735
- });
2736
- data = await res.json();
2737
- if (data.error) {
2738
- console.log(` ${c.red}failed${c.reset}`);
2739
- const friendly = formatApiError(data.error, activeLlm.provider, res.status);
2740
- if (friendly) {
2741
- console.log(` ${c.red}${friendly.text}${c.reset}`);
2742
- console.log(` ${c.dim}${friendly.hint}${c.reset}`);
2743
- } else {
2744
- console.log(` ${c.red}API error: ${data.error.message || JSON.stringify(data.error)}${c.reset}`);
3037
+
3038
+ const shared = [...findingsByTitle.values()].filter(f => f.models.length > 1);
3039
+ const unique = [...findingsByTitle.values()].filter(f => f.models.length === 1);
3040
+
3041
+ if (shared.length > 0) {
3042
+ console.log(` ${c.bold}Shared findings (${shared.length}):${c.reset}`);
3043
+ for (const f of shared) {
3044
+ const sc = severityColor(f.severity);
3045
+ console.log(` ${sc}┃${c.reset} ${sc}${(f.severity || '').toUpperCase().padEnd(8)}${c.reset} ${f.title} ${c.dim}(${f.models.length}/${reports.length})${c.reset}`);
2745
3046
  }
2746
- try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
2747
- return null;
3047
+ console.log();
2748
3048
  }
2749
- _lastLlmText = data.choices?.[0]?.message?.content || '';
2750
- report = extractJSON(_lastLlmText);
2751
- if (report) {
2752
- report.audit_model = data.model || activeLlm.model;
2753
- report.audit_provider = activeLlm.provider;
2754
- if (data.id) report.provider_msg_id = data.id;
2755
- if (data.system_fingerprint) report.provider_fingerprint = data.system_fingerprint;
2756
- if (data.usage) {
2757
- report.input_tokens = data.usage.prompt_tokens;
2758
- report.output_tokens = data.usage.completion_tokens;
3049
+
3050
+ if (unique.length > 0) {
3051
+ console.log(` ${c.bold}Unique findings (${unique.length}):${c.reset}`);
3052
+ for (const f of unique) {
3053
+ const sc = severityColor(f.severity);
3054
+ console.log(` ${sc}┃${c.reset} ${sc}${(f.severity || '').toUpperCase().padEnd(8)}${c.reset} ${f.title} ${c.dim}(${f.models[0]} only)${c.reset}`);
2759
3055
  }
3056
+ console.log();
2760
3057
  }
2761
3058
  }
2762
-
2763
- console.log(` ${c.green}done${c.reset} ${c.dim}(${elapsed(start)})${c.reset}`);
2764
- } catch (err) {
2765
- console.log(` ${c.red}failed${c.reset}`);
2766
- if (err.name === 'TimeoutError' || err.message?.includes('timeout')) {
2767
- console.log(` ${c.red}Request timed out (120s)${c.reset}`);
2768
- console.log(` ${c.dim}The provider took too long to respond. Try again or use a faster model${c.reset}`);
2769
- } else if (err.code === 'ENOTFOUND' || err.code === 'ECONNREFUSED' || err.message?.includes('fetch failed')) {
2770
- console.log(` ${c.red}Network error: could not reach ${activeProvider}${c.reset}`);
2771
- console.log(` ${c.dim}Check your internet connection or provider status${c.reset}`);
2772
- } else {
2773
- console.log(` ${c.red}${err.message}${c.reset}`);
3059
+
3060
+ // Upload each report
3061
+ const noUpload = process.argv.includes('--no-upload');
3062
+ const creds = loadCredentials();
3063
+ if (!noUpload && creds) {
3064
+ for (const { report } of reports) await uploadReport(report, creds);
3065
+ console.log(` ${c.dim}Reports: ${REGISTRY_URL}/packages/${slug}${c.reset}`);
3066
+ } else if (!noUpload && !creds) {
3067
+ console.log(` ${c.dim}Run ${c.cyan}agentaudit setup${c.dim} to upload reports to agentaudit.dev${c.reset}`);
3068
+ }
3069
+
3070
+ console.log();
3071
+ return reports.map(r => r.report);
3072
+ }
3073
+
3074
+ // ── Single-Model Path ────────────────────────────────────
3075
+ // If --models has exactly 1 model, use it; otherwise resolve via --model / config / env
3076
+ let activeLlm;
3077
+ if (modelNames.length === 1) {
3078
+ activeLlm = resolveModel(modelNames[0]);
3079
+ } else {
3080
+ activeLlm = resolveProvider();
3081
+ // Model override: --model flag > AGENTAUDIT_MODEL env > credentials.json > provider default
3082
+ const modelArgIdx2 = process.argv.indexOf('--model');
3083
+ const modelFlag2 = modelArgIdx2 !== -1 ? process.argv[modelArgIdx2 + 1] : null;
3084
+ const modelOverride = modelFlag2 || process.env.AGENTAUDIT_MODEL || loadLlmConfig()?.llm_model || null;
3085
+ if (activeLlm && modelOverride) activeLlm.model = modelOverride;
3086
+ }
3087
+
3088
+ if (!activeLlm) {
3089
+ console.log();
3090
+ console.log(` ${c.yellow}No LLM API key found.${c.reset} The ${c.bold}audit${c.reset} command needs an LLM to analyze code.`);
3091
+ console.log();
3092
+ console.log(` ${c.bold}Set an API key${c.reset} (e.g. ${c.cyan}export OPENROUTER_API_KEY=sk-or-...${c.reset})`);
3093
+ console.log(` ${c.dim}Run "agentaudit model" to configure provider + model interactively${c.reset}`);
3094
+ console.log();
3095
+ console.log(` ${c.bold}Or export for manual review:${c.reset} ${c.cyan}agentaudit audit ${url} --export${c.reset}`);
3096
+ console.log(` ${c.bold}Or use as MCP server${c.reset} in Cursor/Claude ${c.dim}(no extra API key needed)${c.reset}`);
3097
+ console.log(` ${c.dim}{ "agentaudit": { "command": "npx", "args": ["-y", "agentaudit"] } }${c.reset}`);
3098
+ console.log();
3099
+ if (process.argv.includes('--export')) {
3100
+ const exportPath = path.join(process.cwd(), `audit-${slug}.md`);
3101
+ const exportContent = [
3102
+ `# Security Audit: ${slug}`, `**Source:** ${url}`, `**Files:** ${files.length}`, ``,
3103
+ `## Audit Instructions`, ``, auditPrompt || '(audit prompt not found)', ``,
3104
+ `## Report Format`, ``, `After analysis, produce a JSON report:`,
3105
+ '```json', `{ "skill_slug": "${slug}", "source_url": "${url}", "risk_score": 0, "result": "safe", "findings": [] }`, '```',
3106
+ ``, `## Source Code`, ``, codeBlock,
3107
+ ].join('\n');
3108
+ fs.writeFileSync(exportPath, exportContent);
3109
+ console.log(` ${icons.safe} Exported to ${c.bold}${exportPath}${c.reset}`);
3110
+ console.log(` ${c.dim}Paste this into any LLM (Claude, ChatGPT, etc.) for analysis${c.reset}`);
2774
3111
  }
2775
- try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
2776
3112
  return null;
2777
3113
  }
2778
-
2779
- // Provenance: compute BEFORE cleanup (needs repoPath on disk)
2780
- let commitSha = '';
2781
- try {
2782
- commitSha = execSync('git rev-parse HEAD', { cwd: repoPath, encoding: 'utf8' }).trim();
2783
- } catch { /* shallow clone without HEAD — unlikely but safe */ }
2784
- const sourceHash = crypto.createHash('sha256').update(
2785
- files.slice().sort((a, b) => a.path.localeCompare(b.path))
2786
- .map(f => f.path + '\n' + f.content).join('\n')
2787
- ).digest('hex');
2788
- // Code-based type detection (uses files array in memory + repoPath for context)
2789
- const pkgInfo = detectPackageInfo(repoPath, files);
2790
- // Known MCP frameworks are libraries, not servers (they contain MCP patterns but ARE the SDK)
2791
- const KNOWN_MCP_LIBS = new Set(['fastmcp', 'jlowin-fastmcp', 'mcp-go', 'fastapi-mcp', 'fastapi_mcp', 'mcp-use', 'mcp-agent']);
2792
- const KNOWN_CLI = new Set(['mcp-cli', 'mcp-scan', 'inspector']);
2793
- let detectedType = pkgInfo.type === 'unknown' ? 'other' : pkgInfo.type;
2794
- if (KNOWN_MCP_LIBS.has(slug)) detectedType = 'library';
2795
- if (KNOWN_CLI.has(slug)) detectedType = 'cli-tool';
2796
3114
 
2797
- // Cleanup repo (safe now — provenance data captured above)
2798
- try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
3115
+ // Single LLM call via callLlm()
3116
+ const modelLabel = `${activeLlm.name} ${activeLlm.model}`;
3117
+ process.stdout.write(` ${stepProgress(4, 4)} Running LLM analysis ${c.dim}(${modelLabel})${c.reset}...`);
2799
3118
 
3119
+ const llmResult = await callLlm(activeLlm, systemPrompt, userMessage);
3120
+
3121
+ if (llmResult.error) {
3122
+ console.log(` ${c.red}failed${c.reset}`);
3123
+ console.log(` ${c.red}${llmResult.error}${c.reset}`);
3124
+ if (llmResult.hint) console.log(` ${c.dim}${llmResult.hint}${c.reset}`);
3125
+ return null;
3126
+ }
3127
+
3128
+ console.log(` ${c.green}done${c.reset} ${c.dim}(${elapsed(start)})${c.reset}`);
3129
+
3130
+ const report = llmResult.report;
2800
3131
  if (!report) {
2801
3132
  console.log(` ${c.red}Could not parse LLM response as JSON${c.reset}`);
2802
3133
  console.log(` ${c.dim}Hint: run with --debug to see the raw LLM response${c.reset}`);
2803
3134
  if (process.argv.includes('--debug')) {
2804
3135
  console.log(` ${c.dim}--- Raw LLM response (first 2000 chars) ---${c.reset}`);
2805
- console.log((typeof _lastLlmText === 'string' ? _lastLlmText : '(empty)').slice(0, 2000));
3136
+ console.log((llmResult.text || '(empty)').slice(0, 2000));
2806
3137
  console.log(` ${c.dim}--- end ---${c.reset}`);
2807
3138
  }
2808
3139
  return null;
2809
3140
  }
2810
3141
 
2811
- // Force slug from URL — never trust LLM-provided skill_slug
2812
- report.skill_slug = slug;
2813
-
2814
- // Force package_type from code detection — never trust LLM-provided type
2815
- report.package_type = detectedType;
2816
-
2817
- // Add scan metadata for benchmarking
2818
- report.audit_duration_ms = Date.now() - start;
2819
- report.files_scanned = files.length;
2820
-
2821
- // Set provenance data
2822
- if (commitSha) report.commit_sha = commitSha;
2823
- report.source_hash = sourceHash;
3142
+ enrichReport(report);
3143
+ enrichFindings(report, files, pkgInfo);
3144
+ saveHistory(report);
2824
3145
 
2825
3146
  // Display results
2826
3147
  console.log();
2827
- const riskScore = report.risk_score || 0;
2828
3148
  console.log(sectionHeader('Result'));
2829
- console.log(` ${riskBadge(riskScore)}`);
3149
+ console.log(` ${riskBadge(report.risk_score || 0)}`);
2830
3150
  console.log();
2831
3151
 
2832
3152
  if (report.findings && report.findings.length > 0) {
@@ -2839,8 +3159,6 @@ async function auditRepo(url) {
2839
3159
  if (f.description) console.log(` ${sc}┃${c.reset} ${c.dim}${f.description.slice(0, 120)}${c.reset}`);
2840
3160
  console.log();
2841
3161
  }
2842
-
2843
- // Severity histogram
2844
3162
  const histLines = severityHistogram(report.findings);
2845
3163
  if (histLines.length > 1) {
2846
3164
  console.log(sectionHeader('Severity'));
@@ -2851,41 +3169,16 @@ async function auditRepo(url) {
2851
3169
  console.log(` ${c.green}No findings — package looks clean.${c.reset}`);
2852
3170
  console.log();
2853
3171
  }
2854
-
2855
- // Upload to registry (skip with --no-upload)
3172
+
3173
+ // Upload to registry
2856
3174
  const noUpload = process.argv.includes('--no-upload');
2857
3175
  let creds = loadCredentials();
2858
3176
  if (noUpload) {
2859
3177
  // Skip silently
2860
3178
  } else if (creds) {
2861
- process.stdout.write(` Uploading report to registry...`);
2862
- try {
2863
- const res = await fetch(`${REGISTRY_URL}/api/reports`, {
2864
- method: 'POST',
2865
- headers: {
2866
- 'Authorization': `Bearer ${creds.api_key}`,
2867
- 'Content-Type': 'application/json',
2868
- },
2869
- body: JSON.stringify(report),
2870
- signal: AbortSignal.timeout(15_000),
2871
- });
2872
- if (res.ok) {
2873
- const data = await res.json();
2874
- console.log(` ${c.green}done${c.reset}`);
2875
- console.log(` ${c.dim}Report: ${REGISTRY_URL}/packages/${slug}${c.reset}`);
2876
- } else {
2877
- let errBody = '';
2878
- try { errBody = await res.text(); } catch {}
2879
- console.log(` ${c.yellow}failed (HTTP ${res.status})${c.reset}`);
2880
- if (errBody && process.argv.includes('--debug')) {
2881
- console.log(` ${c.dim}Server: ${errBody.slice(0, 300)}${c.reset}`);
2882
- }
2883
- }
2884
- } catch (err) {
2885
- console.log(` ${c.yellow}failed${c.reset}`);
2886
- }
3179
+ await uploadReport(report, creds);
3180
+ console.log(` ${c.dim}Report: ${REGISTRY_URL}/packages/${slug}${c.reset}`);
2887
3181
  } else if (process.stdin.isTTY) {
2888
- // No credentials — prompt to paste key or set up
2889
3182
  console.log();
2890
3183
  console.log(` ${c.bold}Want to upload this report to agentaudit.dev?${c.reset}`);
2891
3184
  console.log(` ${c.dim}Create an API key at ${c.cyan}${REGISTRY_URL}/profile${c.dim} (sign in with GitHub)${c.reset}`);
@@ -2899,27 +3192,8 @@ async function auditRepo(url) {
2899
3192
  saveCredentials({ api_key: pastedKey.trim(), agent_name: agentName });
2900
3193
  creds = { api_key: pastedKey.trim(), agent_name: agentName };
2901
3194
  console.log(` ${c.green}valid!${c.reset}`);
2902
- process.stdout.write(` Uploading report...`);
2903
- try {
2904
- const res = await fetch(`${REGISTRY_URL}/api/reports`, {
2905
- method: 'POST',
2906
- headers: {
2907
- 'Authorization': `Bearer ${creds.api_key}`,
2908
- 'Content-Type': 'application/json',
2909
- },
2910
- body: JSON.stringify(report),
2911
- signal: AbortSignal.timeout(15_000),
2912
- });
2913
- if (res.ok) {
2914
- console.log(` ${c.green}done${c.reset}`);
2915
- console.log(` ${c.dim}Report: ${REGISTRY_URL}/packages/${slug}${c.reset}`);
2916
- } else {
2917
- console.log(` ${c.yellow}failed (HTTP ${res.status})${c.reset}`);
2918
- }
2919
- } catch (err) {
2920
- console.log(` ${c.red}failed${c.reset}`);
2921
- console.log(` ${c.dim}${err.message}${c.reset}`);
2922
- }
3195
+ await uploadReport(report, creds);
3196
+ console.log(` ${c.dim}Report: ${REGISTRY_URL}/packages/${slug}${c.reset}`);
2923
3197
  } else {
2924
3198
  console.log(` ${c.red}invalid key${c.reset}`);
2925
3199
  console.log(` ${c.dim}Run ${c.cyan}agentaudit setup${c.dim} to configure.${c.reset}`);
@@ -2928,7 +3202,7 @@ async function auditRepo(url) {
2928
3202
  } else {
2929
3203
  console.log(` ${c.dim}Run ${c.cyan}agentaudit setup${c.dim} to configure your API key and upload reports${c.reset}`);
2930
3204
  }
2931
-
3205
+
2932
3206
  console.log();
2933
3207
  return report;
2934
3208
  }
@@ -3812,9 +4086,11 @@ async function main() {
3812
4086
  // Strip global flags from args (including --model <value>)
3813
4087
  const globalFlags = new Set(['--json', '--quiet', '-q', '--no-color', '--no-upload']);
3814
4088
  let args = rawArgs.filter(a => !globalFlags.has(a));
3815
- // Remove --model <value> pair
4089
+ // Remove --model <value> and --models <value> pairs
3816
4090
  const modelIdx = args.indexOf('--model');
3817
4091
  if (modelIdx !== -1) args.splice(modelIdx, 2);
4092
+ const modelsIdx = args.indexOf('--models');
4093
+ if (modelsIdx !== -1) args.splice(modelsIdx, 2);
3818
4094
 
3819
4095
  // Detect per-command --help BEFORE stripping (e.g. `agentaudit model --help`)
3820
4096
  const wantsHelp = args.includes('--help') || args.includes('-h');
@@ -3864,15 +4140,16 @@ async function main() {
3864
4140
  `Deep LLM-powered 3-pass security audit (~30s). Requires an LLM API key.`,
3865
4141
  ``,
3866
4142
  `${c.bold}Options:${c.reset}`,
3867
- ` --model <name> Override LLM model for this run`,
3868
- ` --no-upload Skip uploading report to registry`,
3869
- ` --export Export audit payload as markdown (for manual LLM review)`,
3870
- ` --debug Show raw LLM response on parse errors`,
4143
+ ` --model <name> Override LLM model for this run`,
4144
+ ` --models <a,b,c> Multi-model audit (parallel calls, consensus comparison)`,
4145
+ ` --no-upload Skip uploading report to registry`,
4146
+ ` --export Export audit payload as markdown (for manual LLM review)`,
4147
+ ` --debug Show raw LLM response on parse errors`,
3871
4148
  ``,
3872
4149
  `${c.bold}Examples:${c.reset}`,
3873
4150
  ` agentaudit audit https://github.com/owner/repo`,
3874
- ` agentaudit audit https://github.com/owner/repo --no-upload`,
3875
4151
  ` agentaudit audit https://github.com/owner/repo --model gpt-4o`,
4152
+ ` agentaudit audit https://github.com/owner/repo --models gemini-2.5-flash,claude-sonnet-4-20250514`,
3876
4153
  ` agentaudit audit https://github.com/owner/repo --export`,
3877
4154
  ],
3878
4155
  lookup: [
@@ -3986,10 +4263,32 @@ async function main() {
3986
4263
  ` agentaudit benchmark --json`,
3987
4264
  ],
3988
4265
  bench: null, // alias → benchmark
4266
+ consensus: [
4267
+ `${c.bold}agentaudit consensus${c.reset} <package-name>`,
4268
+ ``,
4269
+ `View multi-model consensus status from the AgentAudit registry.`,
4270
+ `Shows agreement across different LLM models and peer reviewers.`,
4271
+ ``,
4272
+ `${c.bold}Options:${c.reset}`,
4273
+ ` --json Machine-readable JSON output`,
4274
+ ``,
4275
+ `${c.bold}Examples:${c.reset}`,
4276
+ ` agentaudit consensus nanobanana-mcp-server`,
4277
+ ` agentaudit consensus fastmcp --json`,
4278
+ ],
4279
+ history: [
4280
+ `${c.bold}agentaudit history${c.reset} [options]`,
4281
+ ``,
4282
+ `Show your local audit history. Results are stored in ~/.config/agentaudit/history/`,
4283
+ `after every audit run. No internet connection required.`,
4284
+ ``,
4285
+ `${c.bold}Options:${c.reset}`,
4286
+ ` --json Machine-readable JSON output`,
4287
+ ],
3989
4288
  activity: [
3990
4289
  `${c.bold}agentaudit activity${c.reset} [options]`,
3991
4290
  ``,
3992
- `Show your recent audits and findings from the AgentAudit registry.`,
4291
+ `Show your recent audits and findings from the AgentAudit registry (online).`,
3993
4292
  `Requires being logged in (run ${c.cyan}agentaudit setup${c.reset} first).`,
3994
4293
  ``,
3995
4294
  `${c.bold}Options:${c.reset}`,
@@ -4087,12 +4386,14 @@ async function main() {
4087
4386
  console.log(` ${c.cyan}audit${c.reset} <url> [url...] Deep LLM-powered security audit (~30s)`);
4088
4387
  console.log(` ${c.cyan}validate${c.reset} [path] Validate SKILL.md format & security`);
4089
4388
  console.log(` ${c.cyan}lookup${c.reset} <name> Look up package in registry`);
4389
+ console.log(` ${c.cyan}consensus${c.reset} <name> View multi-model consensus for a package`);
4090
4390
  console.log();
4091
4391
  console.log(` ${c.bold}COMMUNITY${c.reset}`);
4092
4392
  console.log(` ${c.cyan}dashboard${c.reset} Interactive dashboard (full-screen)`);
4093
4393
  console.log(` ${c.cyan}leaderboard${c.reset} Top contributors ranking`);
4094
4394
  console.log(` ${c.cyan}benchmark${c.reset} LLM model performance comparison`);
4095
- console.log(` ${c.cyan}activity${c.reset} Your recent audits & findings`);
4395
+ console.log(` ${c.cyan}history${c.reset} Your local audit history`);
4396
+ console.log(` ${c.cyan}activity${c.reset} Your recent audits & findings (online)`);
4096
4397
  console.log(` ${c.cyan}search${c.reset} <query> Search packages in registry`);
4097
4398
  console.log();
4098
4399
  console.log(` ${c.bold}CONFIGURATION${c.reset}`);
@@ -4106,6 +4407,7 @@ async function main() {
4106
4407
  console.log(` ${c.dim}--quiet Suppress banner${c.reset}`);
4107
4408
  console.log(` ${c.dim}--no-color Disable ANSI colors (also: NO_COLOR env)${c.reset}`);
4108
4409
  console.log(` ${c.dim}--model <name> Override LLM model for this run${c.reset}`);
4410
+ console.log(` ${c.dim}--models <a,b,c> Multi-model audit (parallel, with consensus)${c.reset}`);
4109
4411
  console.log(` ${c.dim}--no-upload Skip uploading report to registry${c.reset}`);
4110
4412
  console.log(` ${c.dim}--export Export audit payload as markdown${c.reset}`);
4111
4413
  console.log(` ${c.dim}--debug Show raw LLM response on parse errors${c.reset}`);
@@ -4114,6 +4416,7 @@ async function main() {
4114
4416
  console.log(` agentaudit discover --quick`);
4115
4417
  console.log(` agentaudit scan https://github.com/owner/repo`);
4116
4418
  console.log(` agentaudit audit https://github.com/owner/repo`);
4419
+ console.log(` agentaudit audit <url> --models gemini-2.5-flash,claude-sonnet-4-20250514`);
4117
4420
  console.log(` agentaudit lookup fastmcp --json`);
4118
4421
  console.log();
4119
4422
  console.log(` ${c.bold}LEARN MORE${c.reset}`);
@@ -4140,6 +4443,37 @@ async function main() {
4140
4443
  await benchmarkCommand(targets);
4141
4444
  return;
4142
4445
  }
4446
+ if (command === 'history') {
4447
+ banner();
4448
+ const entries = loadHistory(30);
4449
+ if (entries.length === 0) {
4450
+ console.log(` ${c.dim}No local audit history yet. Run ${c.cyan}agentaudit audit <url>${c.dim} to start.${c.reset}`);
4451
+ console.log();
4452
+ return;
4453
+ }
4454
+
4455
+ if (jsonMode) {
4456
+ console.log(JSON.stringify(entries, null, 2));
4457
+ return;
4458
+ }
4459
+
4460
+ console.log(sectionHeader(`Local History (${entries.length})`));
4461
+ console.log();
4462
+
4463
+ for (const entry of entries) {
4464
+ const slug = entry.skill_slug || 'unknown';
4465
+ const risk = entry.risk_score ?? '?';
4466
+ const sev = entry.max_severity || 'none';
4467
+ const sc = severityColor(sev);
4468
+ const model = entry.audit_model || '?';
4469
+ const fc = entry.findings?.length || 0;
4470
+ const ts = entry._file?.slice(0, 10) || '';
4471
+ console.log(` ${sc}┃${c.reset} ${c.bold}${slug.padEnd(30)}${c.reset} ${riskBadge(risk)} ${c.dim}${model}${c.reset}`);
4472
+ console.log(` ${sc}┃${c.reset} ${c.dim}${ts} ${fc} findings ${sev.toUpperCase()}${c.reset}`);
4473
+ console.log();
4474
+ }
4475
+ return;
4476
+ }
4143
4477
  if (command === 'activity' || command === 'my') {
4144
4478
  await activityCommand(targets);
4145
4479
  return;
@@ -4148,6 +4482,73 @@ async function main() {
4148
4482
  await searchCommand(targets);
4149
4483
  return;
4150
4484
  }
4485
+ if (command === 'consensus') {
4486
+ banner();
4487
+ const pkg = targets[0];
4488
+ if (!pkg) {
4489
+ console.log(` ${c.red}Error: package name required${c.reset}`);
4490
+ console.log(` ${c.dim}Usage: ${c.cyan}agentaudit consensus <package-name>${c.reset}`);
4491
+ process.exitCode = 2;
4492
+ return;
4493
+ }
4494
+ const slug = pkg.toLowerCase().replace(/[^a-z0-9-]/g, '-');
4495
+ if (!jsonMode) console.log(` Fetching consensus for ${c.bold}${slug}${c.reset}...`);
4496
+ try {
4497
+ const res = await fetch(`${REGISTRY_URL}/api/packages/${slug}/consensus`, { signal: AbortSignal.timeout(10_000) });
4498
+ if (!res.ok) {
4499
+ if (res.status === 404) {
4500
+ console.log(` ${c.yellow}Not found${c.reset} — "${slug}" hasn't been audited yet.`);
4501
+ console.log(` ${c.dim}Run: ${c.cyan}agentaudit audit <repo-url>${c.dim} to create the first audit${c.reset}`);
4502
+ } else {
4503
+ console.log(` ${c.red}API error (HTTP ${res.status})${c.reset}`);
4504
+ }
4505
+ return;
4506
+ }
4507
+ const data = await res.json();
4508
+ if (jsonMode) { console.log(JSON.stringify(data, null, 2)); return; }
4509
+
4510
+ console.log();
4511
+ console.log(sectionHeader(`Consensus: ${slug}`));
4512
+ console.log();
4513
+
4514
+ // Status
4515
+ const status = data.consensus_status || data.status || 'pending';
4516
+ const statusColor = status === 'reached' ? c.green : status === 'disputed' ? c.yellow : c.dim;
4517
+ console.log(` Status: ${statusColor}${status.toUpperCase()}${c.reset}`);
4518
+
4519
+ // Risk + Severity
4520
+ if (data.consensus_risk_score != null) console.log(` Risk: ${riskBadge(data.consensus_risk_score)}`);
4521
+ if (data.consensus_severity) {
4522
+ const sc = severityColor(data.consensus_severity);
4523
+ console.log(` Severity: ${sc}${data.consensus_severity.toUpperCase()}${c.reset}`);
4524
+ }
4525
+
4526
+ // Models
4527
+ if (data.models && data.models.length > 0) {
4528
+ console.log();
4529
+ console.log(` ${c.bold}Models (${data.models.length}):${c.reset}`);
4530
+ for (const m of data.models) {
4531
+ const sc = severityColor(m.severity || m.max_severity);
4532
+ const risk = m.risk_score ?? '?';
4533
+ console.log(` ${sc}┃${c.reset} ${(m.model || m.audit_model || '?').padEnd(30)} ${c.dim}risk ${risk}${c.reset} ${sc}${(m.severity || m.max_severity || '').toUpperCase()}${c.reset}`);
4534
+ }
4535
+ }
4536
+
4537
+ // Reviewers
4538
+ if (data.reviews != null || data.reviewer_count != null) {
4539
+ const count = data.reviewer_count || data.reviews?.length || 0;
4540
+ console.log();
4541
+ console.log(` ${c.dim}Reviews: ${count} | Threshold: 5 reviewers, >60% agreement${c.reset}`);
4542
+ }
4543
+
4544
+ console.log();
4545
+ console.log(` ${c.dim}Full details: ${REGISTRY_URL}/packages/${slug}${c.reset}`);
4546
+ console.log();
4547
+ } catch (err) {
4548
+ console.log(` ${c.red}Failed: ${err.message}${c.reset}`);
4549
+ }
4550
+ return;
4551
+ }
4151
4552
 
4152
4553
  banner();
4153
4554
 
@@ -4729,8 +5130,13 @@ async function main() {
4729
5130
 
4730
5131
  let hasFindings = false;
4731
5132
  for (const url of urls) {
4732
- const report = await auditRepo(url);
4733
- if (report?.findings?.length > 0) hasFindings = true;
5133
+ const result = await auditRepo(url);
5134
+ // Multi-model returns array, single-model returns object
5135
+ if (Array.isArray(result)) {
5136
+ if (result.some(r => r?.findings?.length > 0)) hasFindings = true;
5137
+ } else if (result?.findings?.length > 0) {
5138
+ hasFindings = true;
5139
+ }
4734
5140
  }
4735
5141
  process.exitCode = hasFindings ? 1 : 0;
4736
5142
  return;