agentaudit 3.9.35 → 3.9.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/cli.mjs +227 -130
  2. package/package.json +1 -1
package/cli.mjs CHANGED
@@ -1444,34 +1444,39 @@ async function auditRepo(url) {
1444
1444
  process.stdout.write(` ${c.dim}[3/4]${c.reset} Preparing audit payload...`);
1445
1445
  const auditPrompt = loadAuditPrompt();
1446
1446
 
1447
- // Build code block with smart truncation to fit context windows.
1448
- // Reserve ~16k tokens for system prompt + output budget ~48k tokens for code (~192k chars).
1449
- // Smaller models may have 32-65k context; we aim for safe default.
1450
- const MAX_CODE_CHARS = 180_000; // ~45k tokens
1451
- let codeBlock = '';
1452
- let totalChars = 0;
1453
- let truncatedFiles = 0;
1447
+ // Build code chunks for multi-pass analysis.
1448
+ // Budget ~45k tokens (~180k chars) per chunk for code, leaving room for prompt + output.
1449
+ const MAX_CHUNK_CHARS = 180_000;
1450
+ const chunks = []; // array of code block strings
1451
+ let currentChunk = '';
1452
+ let currentChars = 0;
1454
1453
  for (const file of files) {
1455
1454
  const entry = `\n### FILE: ${file.path}\n\`\`\`\n${file.content}\n\`\`\`\n`;
1456
- if (totalChars + entry.length > MAX_CODE_CHARS) {
1457
- // Try to fit a truncated version of this file
1458
- const remaining = MAX_CODE_CHARS - totalChars;
1459
- if (remaining > 200) {
1460
- const truncContent = file.content.substring(0, remaining - 100);
1461
- codeBlock += `\n### FILE: ${file.path}\n\`\`\`\n${truncContent}\n[... truncated ...]\n\`\`\`\n`;
1462
- }
1463
- truncatedFiles = files.length - codeBlock.split('### FILE:').length + 1;
1464
- break;
1455
+ if (currentChars + entry.length > MAX_CHUNK_CHARS && currentChars > 0) {
1456
+ chunks.push(currentChunk);
1457
+ currentChunk = '';
1458
+ currentChars = 0;
1459
+ }
1460
+ // If a single file exceeds chunk limit, truncate it
1461
+ if (entry.length > MAX_CHUNK_CHARS) {
1462
+ const truncContent = file.content.substring(0, MAX_CHUNK_CHARS - 200);
1463
+ currentChunk += `\n### FILE: ${file.path}\n\`\`\`\n${truncContent}\n[... file truncated, ${file.content.length} chars total ...]\n\`\`\`\n`;
1464
+ currentChars += MAX_CHUNK_CHARS;
1465
+ } else {
1466
+ currentChunk += entry;
1467
+ currentChars += entry.length;
1465
1468
  }
1466
- codeBlock += entry;
1467
- totalChars += entry.length;
1468
1469
  }
1469
- if (truncatedFiles > 0) {
1470
- codeBlock += `\n[⚠ ${truncatedFiles} file(s) omitted due to context window limits]\n`;
1471
- console.log(` ${c.green}done${c.reset} ${c.yellow}(${truncatedFiles} files truncated to fit context window)${c.reset}`);
1470
+ if (currentChunk) chunks.push(currentChunk);
1471
+
1472
+ const needsMultiPass = chunks.length > 1;
1473
+ if (needsMultiPass) {
1474
+ console.log(` ${c.green}done${c.reset} ${c.yellow}(${chunks.length} passes needed — ${files.length} files across ${chunks.length} chunks)${c.reset}`);
1472
1475
  } else {
1473
1476
  console.log(` ${c.green}done${c.reset}`);
1474
1477
  }
1478
+ // For single-pass, use the only chunk as codeBlock
1479
+ const codeBlock = chunks[0] || '';
1475
1480
 
1476
1481
  // Step 4: LLM Analysis
1477
1482
  // Check for API keys to determine which LLM to use
@@ -1567,128 +1572,220 @@ async function auditRepo(url) {
1567
1572
  actualModel = modelOverride || resolvedProvider.model || 'unknown';
1568
1573
  }
1569
1574
 
1570
- // We have an API key run LLM audit
1571
- process.stdout.write(` ${c.dim}[4/4]${c.reset} Running LLM analysis ${c.dim}(${resolvedProvider.id}: ${actualModel})${c.reset}...`);
1572
-
1573
- const systemPrompt = auditPrompt || 'You are a security auditor. Analyze the code and report findings as JSON.';
1574
- const userMessage = [
1575
- `Audit this package: **${slug}** (${url})`,
1576
- ``,
1577
- `After analysis, respond with ONLY a valid JSON object. No markdown fences, no explanation, no text before or after. Just the raw JSON:`,
1578
- `{ "skill_slug": "${slug}", "source_url": "${url}", "package_type": "<mcp-server|agent-skill|library|cli-tool>",`,
1579
- ` "risk_score": <0-100>, "result": "<safe|caution|unsafe>", "max_severity": "<none|low|medium|high|critical>",`,
1580
- ` "findings_count": <n>, "findings": [{ "id": "...", "title": "...", "severity": "...", "category": "...",`,
1581
- ` "description": "...", "file": "...", "line": <n>, "remediation": "...", "confidence": "...", "is_by_design": false }] }`,
1582
- ``,
1583
- `## Source Code`,
1584
- codeBlock,
1585
- ].join('\n');
1586
-
1575
+ // ── LLM call helper (reused for multi-pass) ──
1576
+ async function callLLM(codeContent, passLabel) {
1577
+ const systemPrompt = auditPrompt || 'You are a security auditor. Analyze the code and report findings as JSON.';
1578
+ const userMessage = [
1579
+ `Audit this package: **${slug}** (${url})`,
1580
+ ``,
1581
+ `After analysis, respond with ONLY a valid JSON object. No markdown fences, no explanation, no text before or after. Just the raw JSON:`,
1582
+ `{ "skill_slug": "${slug}", "source_url": "${url}", "package_type": "<mcp-server|agent-skill|library|cli-tool>",`,
1583
+ ` "risk_score": <0-100>, "result": "<safe|caution|unsafe>", "max_severity": "<none|low|medium|high|critical>",`,
1584
+ ` "findings_count": <n>, "findings": [{ "id": "...", "title": "...", "severity": "...", "category": "...",`,
1585
+ ` "description": "...", "file": "...", "line": <n>, "remediation": "...", "confidence": "...", "is_by_design": false }] }`,
1586
+ ``,
1587
+ `## Source Code`,
1588
+ codeContent,
1589
+ ].join('\n');
1590
+
1591
+ let _lastLlmText = '';
1592
+ let result = null;
1593
+ let meta = {};
1594
+
1595
+ try {
1596
+ if (resolvedProvider.id === 'anthropic') {
1597
+ const res = await fetch('https://api.anthropic.com/v1/messages', {
1598
+ method: 'POST',
1599
+ headers: {
1600
+ 'x-api-key': resolvedProvider.key,
1601
+ 'anthropic-version': '2023-06-01',
1602
+ 'content-type': 'application/json',
1603
+ },
1604
+ body: JSON.stringify({
1605
+ model: modelOverride || 'claude-sonnet-4-20250514',
1606
+ max_tokens: 8192,
1607
+ system: systemPrompt,
1608
+ messages: [{ role: 'user', content: userMessage }],
1609
+ }),
1610
+ signal: AbortSignal.timeout(120_000),
1611
+ });
1612
+ const data = await res.json();
1613
+ if (data.error) {
1614
+ return { error: data.error.message || JSON.stringify(data.error) };
1615
+ }
1616
+ const text = data.content?.[0]?.text || '';
1617
+ _lastLlmText = text;
1618
+ result = extractJSON(text);
1619
+ meta = {
1620
+ provider_msg_id: data.id || null,
1621
+ input_tokens: data.usage?.input_tokens || null,
1622
+ output_tokens: data.usage?.output_tokens || null,
1623
+ reported_model: data.model || null,
1624
+ };
1625
+ } else {
1626
+ let apiUrl, modelName, authHeaders;
1627
+ switch (resolvedProvider.id) {
1628
+ case 'openrouter':
1629
+ apiUrl = 'https://openrouter.ai/api/v1/chat/completions';
1630
+ modelName = modelOverride || process.env.OPENROUTER_MODEL || 'anthropic/claude-sonnet-4';
1631
+ authHeaders = { 'Authorization': `Bearer ${resolvedProvider.key}`, 'HTTP-Referer': 'https://agentaudit.dev', 'X-Title': 'AgentAudit' };
1632
+ break;
1633
+ case 'ollama':
1634
+ apiUrl = `${resolvedProvider.host}/v1/chat/completions`;
1635
+ modelName = modelOverride || resolvedProvider.model;
1636
+ authHeaders = {};
1637
+ break;
1638
+ case 'custom':
1639
+ apiUrl = resolvedProvider.url.endsWith('/chat/completions') ? resolvedProvider.url : `${resolvedProvider.url.replace(/\/$/, '')}/chat/completions`;
1640
+ modelName = modelOverride || resolvedProvider.model;
1641
+ authHeaders = resolvedProvider.key ? { 'Authorization': `Bearer ${resolvedProvider.key}` } : {};
1642
+ break;
1643
+ default:
1644
+ apiUrl = 'https://api.openai.com/v1/chat/completions';
1645
+ modelName = modelOverride || 'gpt-4o';
1646
+ authHeaders = { 'Authorization': `Bearer ${resolvedProvider.key}` };
1647
+ }
1648
+
1649
+ const res = await fetch(apiUrl, {
1650
+ method: 'POST',
1651
+ headers: { 'Content-Type': 'application/json', ...authHeaders },
1652
+ body: JSON.stringify({
1653
+ model: modelName,
1654
+ max_tokens: 8192,
1655
+ messages: [
1656
+ { role: 'system', content: systemPrompt },
1657
+ { role: 'user', content: userMessage },
1658
+ ],
1659
+ }),
1660
+ signal: AbortSignal.timeout(resolvedProvider.id === 'ollama' ? 300_000 : 120_000),
1661
+ });
1662
+ const data = await res.json();
1663
+ if (data.error) {
1664
+ return { error: data.error.message || JSON.stringify(data.error) };
1665
+ }
1666
+ const text = data.choices?.[0]?.message?.content || '';
1667
+ _lastLlmText = text;
1668
+ result = extractJSON(text);
1669
+ meta = {
1670
+ provider_msg_id: data.id || null,
1671
+ provider_fingerprint: data.system_fingerprint || null,
1672
+ input_tokens: data.usage?.prompt_tokens || null,
1673
+ output_tokens: data.usage?.completion_tokens || null,
1674
+ reported_model: data.model || null,
1675
+ };
1676
+ }
1677
+ } catch (err) {
1678
+ return { error: err.message };
1679
+ }
1680
+
1681
+ return { report: result, meta, rawText: _lastLlmText };
1682
+ }
1683
+
1684
+ // ── Run LLM analysis (single or multi-pass) ──
1587
1685
  let report = null;
1686
+ let providerMeta = {};
1588
1687
  let _lastLlmText = '';
1589
- let providerMeta = {}; // Collect provider metadata for attestation
1590
1688
 
1591
- try {
1592
- if (resolvedProvider.id === 'anthropic') {
1593
- const res = await fetch('https://api.anthropic.com/v1/messages', {
1594
- method: 'POST',
1595
- headers: {
1596
- 'x-api-key': resolvedProvider.key,
1597
- 'anthropic-version': '2023-06-01',
1598
- 'content-type': 'application/json',
1599
- },
1600
- body: JSON.stringify({
1601
- model: modelOverride || 'claude-sonnet-4-20250514',
1602
- max_tokens: 8192,
1603
- system: systemPrompt,
1604
- messages: [{ role: 'user', content: userMessage }],
1605
- }),
1606
- signal: AbortSignal.timeout(120_000),
1607
- });
1608
- const data = await res.json();
1609
- if (data.error) {
1610
- console.log(` ${c.red}failed${c.reset}`);
1611
- console.log(` ${c.red}API error: ${data.error.message || JSON.stringify(data.error)}${c.reset}`);
1612
- try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
1613
- return null;
1614
- }
1615
- const text = data.content?.[0]?.text || '';
1616
- _lastLlmText = text;
1617
- report = extractJSON(text);
1618
- providerMeta = {
1619
- provider_msg_id: data.id || null,
1620
- input_tokens: data.usage?.input_tokens || null,
1621
- output_tokens: data.usage?.output_tokens || null,
1622
- reported_model: data.model || null,
1623
- };
1624
- } else {
1625
- // OpenAI, OpenRouter, Ollama, or Custom (all use OpenAI-compatible chat completions API)
1626
- let apiUrl, modelName, authHeaders;
1627
- switch (resolvedProvider.id) {
1628
- case 'openrouter':
1629
- apiUrl = 'https://openrouter.ai/api/v1/chat/completions';
1630
- modelName = modelOverride || process.env.OPENROUTER_MODEL || 'anthropic/claude-sonnet-4';
1631
- authHeaders = { 'Authorization': `Bearer ${resolvedProvider.key}`, 'HTTP-Referer': 'https://agentaudit.dev', 'X-Title': 'AgentAudit' };
1632
- break;
1633
- case 'ollama':
1634
- apiUrl = `${resolvedProvider.host}/v1/chat/completions`;
1635
- modelName = modelOverride || resolvedProvider.model;
1636
- authHeaders = {};
1637
- break;
1638
- case 'custom':
1639
- apiUrl = resolvedProvider.url.endsWith('/chat/completions') ? resolvedProvider.url : `${resolvedProvider.url.replace(/\/$/, '')}/chat/completions`;
1640
- modelName = modelOverride || resolvedProvider.model;
1641
- authHeaders = resolvedProvider.key ? { 'Authorization': `Bearer ${resolvedProvider.key}` } : {};
1642
- break;
1643
- default: // openai
1644
- apiUrl = 'https://api.openai.com/v1/chat/completions';
1645
- modelName = modelOverride || 'gpt-4o';
1646
- authHeaders = { 'Authorization': `Bearer ${resolvedProvider.key}` };
1647
- }
1648
-
1649
- const res = await fetch(apiUrl, {
1650
- method: 'POST',
1651
- headers: { 'Content-Type': 'application/json', ...authHeaders },
1652
- body: JSON.stringify({
1653
- model: modelName,
1654
- max_tokens: 8192,
1655
- messages: [
1656
- { role: 'system', content: systemPrompt },
1657
- { role: 'user', content: userMessage },
1658
- ],
1659
- }),
1660
- signal: AbortSignal.timeout(resolvedProvider.id === 'ollama' ? 300_000 : 120_000), // Ollama: 5min (local can be slow)
1661
- });
1662
- const data = await res.json();
1663
- if (data.error) {
1689
+ if (needsMultiPass) {
1690
+ // Multi-pass: analyze each chunk, merge findings
1691
+ console.log(` ${c.dim}[4/4]${c.reset} Running LLM analysis ${c.dim}(${resolvedProvider.id}: ${actualModel})${c.reset} — ${c.yellow}${chunks.length} passes${c.reset}`);
1692
+ const allFindings = [];
1693
+ let totalInput = 0, totalOutput = 0;
1694
+ let lastMeta = {};
1695
+ let baseReport = null;
1696
+
1697
+ for (let i = 0; i < chunks.length; i++) {
1698
+ process.stdout.write(` ${c.dim} Pass ${i + 1}/${chunks.length}...${c.reset}`);
1699
+ const passStart = Date.now();
1700
+ const result = await callLLM(chunks[i], `pass ${i + 1}`);
1701
+
1702
+ if (result.error) {
1664
1703
  console.log(` ${c.red}failed${c.reset}`);
1665
- const errMsg = data.error.message || JSON.stringify(data.error);
1704
+ const errMsg = result.error;
1666
1705
  console.log(` ${c.red}API error: ${errMsg}${c.reset}`);
1667
1706
  if (/context.length|maximum.*tokens|too.many.tokens/i.test(errMsg)) {
1668
- console.log(` ${c.dim}This model's context window is too small for this repository.${c.reset}`);
1669
- console.log(` ${c.dim}Try a model with a larger context: --model anthropic/claude-sonnet-4 (200k) or --model openai/gpt-4o (128k)${c.reset}`);
1707
+ console.log(` ${c.dim}This model's context window is too small even for chunked analysis.${c.reset}`);
1708
+ console.log(` ${c.dim}Try: --model anthropic/claude-sonnet-4 (200k) or --model openai/gpt-4o (128k)${c.reset}`);
1670
1709
  }
1671
1710
  try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
1672
1711
  return null;
1673
1712
  }
1674
- const text = data.choices?.[0]?.message?.content || '';
1675
- _lastLlmText = text;
1676
- report = extractJSON(text);
1677
- providerMeta = {
1678
- provider_msg_id: data.id || null,
1679
- provider_fingerprint: data.system_fingerprint || null,
1680
- input_tokens: data.usage?.prompt_tokens || null,
1681
- output_tokens: data.usage?.completion_tokens || null,
1682
- reported_model: data.model || null,
1683
- };
1713
+
1714
+ if (!result.report) {
1715
+ console.log(` ${c.yellow}no findings (empty/unparseable)${c.reset}`);
1716
+ _lastLlmText = result.rawText || '';
1717
+ continue;
1718
+ }
1719
+
1720
+ const passElapsed = ((Date.now() - passStart) / 1000).toFixed(1);
1721
+ const passFindings = result.report.findings?.length || 0;
1722
+ console.log(` ${c.green}done${c.reset} ${c.dim}(${passElapsed}s, ${passFindings} findings)${c.reset}`);
1723
+
1724
+ if (!baseReport) baseReport = result.report;
1725
+ if (result.report.findings) allFindings.push(...result.report.findings);
1726
+ lastMeta = result.meta;
1727
+ totalInput += result.meta.input_tokens || 0;
1728
+ totalOutput += result.meta.output_tokens || 0;
1729
+ }
1730
+
1731
+ if (!baseReport) {
1732
+ console.log(` ${c.red}✖ All passes failed to produce a report${c.reset}`);
1733
+ try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
1734
+ return null;
1684
1735
  }
1685
1736
 
1737
+ // Merge: deduplicate findings by title+file, recalculate risk score
1738
+ const seen = new Set();
1739
+ const mergedFindings = [];
1740
+ for (const f of allFindings) {
1741
+ const key = `${f.title}::${f.file || ''}`;
1742
+ if (!seen.has(key)) {
1743
+ seen.add(key);
1744
+ mergedFindings.push(f);
1745
+ }
1746
+ }
1747
+
1748
+ // Recalculate severity-based risk
1749
+ const sevWeights = { critical: 25, high: 15, medium: 5, low: 1 };
1750
+ const mergedRisk = Math.min(100, mergedFindings.reduce((s, f) => s + (sevWeights[f.severity] || 0), 0));
1751
+ const maxSev = mergedFindings.length === 0 ? 'none' :
1752
+ mergedFindings.some(f => f.severity === 'critical') ? 'critical' :
1753
+ mergedFindings.some(f => f.severity === 'high') ? 'high' :
1754
+ mergedFindings.some(f => f.severity === 'medium') ? 'medium' : 'low';
1755
+
1756
+ report = {
1757
+ ...baseReport,
1758
+ findings: mergedFindings,
1759
+ findings_count: mergedFindings.length,
1760
+ risk_score: mergedRisk,
1761
+ result: mergedRisk === 0 ? 'safe' : mergedRisk <= 20 ? 'caution' : 'unsafe',
1762
+ max_severity: maxSev,
1763
+ };
1764
+ providerMeta = { ...lastMeta, input_tokens: totalInput || null, output_tokens: totalOutput || null };
1765
+
1766
+ console.log(` ${c.dim} Merged: ${mergedFindings.length} unique findings from ${chunks.length} passes${c.reset}`);
1767
+ console.log(` ${c.green}done${c.reset} ${c.dim}(${elapsed(start)})${c.reset}`);
1768
+ } else {
1769
+ // Single-pass (original flow)
1770
+ process.stdout.write(` ${c.dim}[4/4]${c.reset} Running LLM analysis ${c.dim}(${resolvedProvider.id}: ${actualModel})${c.reset}...`);
1771
+ const result = await callLLM(codeBlock);
1772
+
1773
+ if (result.error) {
1774
+ console.log(` ${c.red}failed${c.reset}`);
1775
+ const errMsg = result.error;
1776
+ console.log(` ${c.red}API error: ${errMsg}${c.reset}`);
1777
+ if (/context.length|maximum.*tokens|too.many.tokens/i.test(errMsg)) {
1778
+ console.log(` ${c.dim}This model's context window is too small for this repository.${c.reset}`);
1779
+ console.log(` ${c.dim}Try a model with a larger context: --model anthropic/claude-sonnet-4 (200k) or --model openai/gpt-4o (128k)${c.reset}`);
1780
+ }
1781
+ try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
1782
+ return null;
1783
+ }
1784
+
1785
+ report = result.report;
1786
+ providerMeta = result.meta;
1787
+ _lastLlmText = result.rawText || '';
1686
1788
  console.log(` ${c.green}done${c.reset} ${c.dim}(${elapsed(start)})${c.reset}`);
1687
- } catch (err) {
1688
- console.log(` ${c.red}failed${c.reset}`);
1689
- console.log(` ${c.red}${err.message}${c.reset}`);
1690
- try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
1691
- return null;
1692
1789
  }
1693
1790
 
1694
1791
  // Cleanup repo
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agentaudit",
3
- "version": "3.9.35",
3
+ "version": "3.9.36",
4
4
  "description": "Security scanner for AI packages — MCP server + CLI",
5
5
  "type": "module",
6
6
  "bin": {