agentaudit 3.9.34 → 3.9.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/cli.mjs +247 -112
  2. package/package.json +1 -1
package/cli.mjs CHANGED
@@ -1444,11 +1444,39 @@ async function auditRepo(url) {
1444
1444
  process.stdout.write(` ${c.dim}[3/4]${c.reset} Preparing audit payload...`);
1445
1445
  const auditPrompt = loadAuditPrompt();
1446
1446
 
1447
- let codeBlock = '';
1447
+ // Build code chunks for multi-pass analysis.
1448
+ // Budget ~45k tokens (~180k chars) per chunk for code, leaving room for prompt + output.
1449
+ const MAX_CHUNK_CHARS = 180_000;
1450
+ const chunks = []; // array of code block strings
1451
+ let currentChunk = '';
1452
+ let currentChars = 0;
1448
1453
  for (const file of files) {
1449
- codeBlock += `\n### FILE: ${file.path}\n\`\`\`\n${file.content}\n\`\`\`\n`;
1454
+ const entry = `\n### FILE: ${file.path}\n\`\`\`\n${file.content}\n\`\`\`\n`;
1455
+ if (currentChars + entry.length > MAX_CHUNK_CHARS && currentChars > 0) {
1456
+ chunks.push(currentChunk);
1457
+ currentChunk = '';
1458
+ currentChars = 0;
1459
+ }
1460
+ // If a single file exceeds chunk limit, truncate it
1461
+ if (entry.length > MAX_CHUNK_CHARS) {
1462
+ const truncContent = file.content.substring(0, MAX_CHUNK_CHARS - 200);
1463
+ currentChunk += `\n### FILE: ${file.path}\n\`\`\`\n${truncContent}\n[... file truncated, ${file.content.length} chars total ...]\n\`\`\`\n`;
1464
+ currentChars += MAX_CHUNK_CHARS;
1465
+ } else {
1466
+ currentChunk += entry;
1467
+ currentChars += entry.length;
1468
+ }
1469
+ }
1470
+ if (currentChunk) chunks.push(currentChunk);
1471
+
1472
+ const needsMultiPass = chunks.length > 1;
1473
+ if (needsMultiPass) {
1474
+ console.log(` ${c.green}done${c.reset} ${c.yellow}(${chunks.length} passes needed — ${files.length} files across ${chunks.length} chunks)${c.reset}`);
1475
+ } else {
1476
+ console.log(` ${c.green}done${c.reset}`);
1450
1477
  }
1451
- console.log(` ${c.green}done${c.reset}`);
1478
+ // For single-pass, use the only chunk as codeBlock
1479
+ const codeBlock = chunks[0] || '';
1452
1480
 
1453
1481
  // Step 4: LLM Analysis
1454
1482
  // Check for API keys to determine which LLM to use
@@ -1544,134 +1572,241 @@ async function auditRepo(url) {
1544
1572
  actualModel = modelOverride || resolvedProvider.model || 'unknown';
1545
1573
  }
1546
1574
 
1547
- // We have an API key run LLM audit
1548
- process.stdout.write(` ${c.dim}[4/4]${c.reset} Running LLM analysis ${c.dim}(${resolvedProvider.id}: ${actualModel})${c.reset}...`);
1549
-
1550
- const systemPrompt = auditPrompt || 'You are a security auditor. Analyze the code and report findings as JSON.';
1551
- const userMessage = [
1552
- `Audit this package: **${slug}** (${url})`,
1553
- ``,
1554
- `After analysis, respond with ONLY a valid JSON object. No markdown fences, no explanation, no text before or after. Just the raw JSON:`,
1555
- `{ "skill_slug": "${slug}", "source_url": "${url}", "package_type": "<mcp-server|agent-skill|library|cli-tool>",`,
1556
- ` "risk_score": <0-100>, "result": "<safe|caution|unsafe>", "max_severity": "<none|low|medium|high|critical>",`,
1557
- ` "findings_count": <n>, "findings": [{ "id": "...", "title": "...", "severity": "...", "category": "...",`,
1558
- ` "description": "...", "file": "...", "line": <n>, "remediation": "...", "confidence": "...", "is_by_design": false }] }`,
1559
- ``,
1560
- `## Source Code`,
1561
- codeBlock,
1562
- ].join('\n');
1563
-
1575
+ // ── LLM call helper (reused for multi-pass) ──
1576
+ async function callLLM(codeContent, passLabel) {
1577
+ const systemPrompt = auditPrompt || 'You are a security auditor. Analyze the code and report findings as JSON.';
1578
+ const userMessage = [
1579
+ `Audit this package: **${slug}** (${url})`,
1580
+ ``,
1581
+ `After analysis, respond with ONLY a valid JSON object. No markdown fences, no explanation, no text before or after. Just the raw JSON:`,
1582
+ `{ "skill_slug": "${slug}", "source_url": "${url}", "package_type": "<mcp-server|agent-skill|library|cli-tool>",`,
1583
+ ` "risk_score": <0-100>, "result": "<safe|caution|unsafe>", "max_severity": "<none|low|medium|high|critical>",`,
1584
+ ` "findings_count": <n>, "findings": [{ "id": "...", "title": "...", "severity": "...", "category": "...",`,
1585
+ ` "description": "...", "file": "...", "line": <n>, "remediation": "...", "confidence": "...", "is_by_design": false }] }`,
1586
+ ``,
1587
+ `## Source Code`,
1588
+ codeContent,
1589
+ ].join('\n');
1590
+
1591
+ let _lastLlmText = '';
1592
+ let result = null;
1593
+ let meta = {};
1594
+
1595
+ try {
1596
+ if (resolvedProvider.id === 'anthropic') {
1597
+ const res = await fetch('https://api.anthropic.com/v1/messages', {
1598
+ method: 'POST',
1599
+ headers: {
1600
+ 'x-api-key': resolvedProvider.key,
1601
+ 'anthropic-version': '2023-06-01',
1602
+ 'content-type': 'application/json',
1603
+ },
1604
+ body: JSON.stringify({
1605
+ model: modelOverride || 'claude-sonnet-4-20250514',
1606
+ max_tokens: 8192,
1607
+ system: systemPrompt,
1608
+ messages: [{ role: 'user', content: userMessage }],
1609
+ }),
1610
+ signal: AbortSignal.timeout(120_000),
1611
+ });
1612
+ const data = await res.json();
1613
+ if (data.error) {
1614
+ return { error: data.error.message || JSON.stringify(data.error) };
1615
+ }
1616
+ const text = data.content?.[0]?.text || '';
1617
+ _lastLlmText = text;
1618
+ result = extractJSON(text);
1619
+ meta = {
1620
+ provider_msg_id: data.id || null,
1621
+ input_tokens: data.usage?.input_tokens || null,
1622
+ output_tokens: data.usage?.output_tokens || null,
1623
+ reported_model: data.model || null,
1624
+ };
1625
+ } else {
1626
+ let apiUrl, modelName, authHeaders;
1627
+ switch (resolvedProvider.id) {
1628
+ case 'openrouter':
1629
+ apiUrl = 'https://openrouter.ai/api/v1/chat/completions';
1630
+ modelName = modelOverride || process.env.OPENROUTER_MODEL || 'anthropic/claude-sonnet-4';
1631
+ authHeaders = { 'Authorization': `Bearer ${resolvedProvider.key}`, 'HTTP-Referer': 'https://agentaudit.dev', 'X-Title': 'AgentAudit' };
1632
+ break;
1633
+ case 'ollama':
1634
+ apiUrl = `${resolvedProvider.host}/v1/chat/completions`;
1635
+ modelName = modelOverride || resolvedProvider.model;
1636
+ authHeaders = {};
1637
+ break;
1638
+ case 'custom':
1639
+ apiUrl = resolvedProvider.url.endsWith('/chat/completions') ? resolvedProvider.url : `${resolvedProvider.url.replace(/\/$/, '')}/chat/completions`;
1640
+ modelName = modelOverride || resolvedProvider.model;
1641
+ authHeaders = resolvedProvider.key ? { 'Authorization': `Bearer ${resolvedProvider.key}` } : {};
1642
+ break;
1643
+ default:
1644
+ apiUrl = 'https://api.openai.com/v1/chat/completions';
1645
+ modelName = modelOverride || 'gpt-4o';
1646
+ authHeaders = { 'Authorization': `Bearer ${resolvedProvider.key}` };
1647
+ }
1648
+
1649
+ const res = await fetch(apiUrl, {
1650
+ method: 'POST',
1651
+ headers: { 'Content-Type': 'application/json', ...authHeaders },
1652
+ body: JSON.stringify({
1653
+ model: modelName,
1654
+ max_tokens: 8192,
1655
+ messages: [
1656
+ { role: 'system', content: systemPrompt },
1657
+ { role: 'user', content: userMessage },
1658
+ ],
1659
+ }),
1660
+ signal: AbortSignal.timeout(resolvedProvider.id === 'ollama' ? 300_000 : 120_000),
1661
+ });
1662
+ const data = await res.json();
1663
+ if (data.error) {
1664
+ return { error: data.error.message || JSON.stringify(data.error) };
1665
+ }
1666
+ const text = data.choices?.[0]?.message?.content || '';
1667
+ _lastLlmText = text;
1668
+ result = extractJSON(text);
1669
+ meta = {
1670
+ provider_msg_id: data.id || null,
1671
+ provider_fingerprint: data.system_fingerprint || null,
1672
+ input_tokens: data.usage?.prompt_tokens || null,
1673
+ output_tokens: data.usage?.completion_tokens || null,
1674
+ reported_model: data.model || null,
1675
+ };
1676
+ }
1677
+ } catch (err) {
1678
+ return { error: err.message };
1679
+ }
1680
+
1681
+ return { report: result, meta, rawText: _lastLlmText };
1682
+ }
1683
+
1684
+ // ── Run LLM analysis (single or multi-pass) ──
1564
1685
  let report = null;
1686
+ let providerMeta = {};
1565
1687
  let _lastLlmText = '';
1566
- let providerMeta = {}; // Collect provider metadata for attestation
1567
1688
 
1568
- try {
1569
- if (resolvedProvider.id === 'anthropic') {
1570
- const res = await fetch('https://api.anthropic.com/v1/messages', {
1571
- method: 'POST',
1572
- headers: {
1573
- 'x-api-key': resolvedProvider.key,
1574
- 'anthropic-version': '2023-06-01',
1575
- 'content-type': 'application/json',
1576
- },
1577
- body: JSON.stringify({
1578
- model: modelOverride || 'claude-sonnet-4-20250514',
1579
- max_tokens: 8192,
1580
- system: systemPrompt,
1581
- messages: [{ role: 'user', content: userMessage }],
1582
- }),
1583
- signal: AbortSignal.timeout(120_000),
1584
- });
1585
- const data = await res.json();
1586
- if (data.error) {
1689
+ if (needsMultiPass) {
1690
+ // Multi-pass: analyze each chunk, merge findings
1691
+ console.log(` ${c.dim}[4/4]${c.reset} Running LLM analysis ${c.dim}(${resolvedProvider.id}: ${actualModel})${c.reset} — ${c.yellow}${chunks.length} passes${c.reset}`);
1692
+ const allFindings = [];
1693
+ let totalInput = 0, totalOutput = 0;
1694
+ let lastMeta = {};
1695
+ let baseReport = null;
1696
+
1697
+ for (let i = 0; i < chunks.length; i++) {
1698
+ process.stdout.write(` ${c.dim} Pass ${i + 1}/${chunks.length}...${c.reset}`);
1699
+ const passStart = Date.now();
1700
+ const result = await callLLM(chunks[i], `pass ${i + 1}`);
1701
+
1702
+ if (result.error) {
1587
1703
  console.log(` ${c.red}failed${c.reset}`);
1588
- console.log(` ${c.red}API error: ${data.error.message || JSON.stringify(data.error)}${c.reset}`);
1704
+ const errMsg = result.error;
1705
+ console.log(` ${c.red}API error: ${errMsg}${c.reset}`);
1706
+ if (/context.length|maximum.*tokens|too.many.tokens/i.test(errMsg)) {
1707
+ console.log(` ${c.dim}This model's context window is too small even for chunked analysis.${c.reset}`);
1708
+ console.log(` ${c.dim}Try: --model anthropic/claude-sonnet-4 (200k) or --model openai/gpt-4o (128k)${c.reset}`);
1709
+ }
1589
1710
  try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
1590
1711
  return null;
1591
1712
  }
1592
- const text = data.content?.[0]?.text || '';
1593
- _lastLlmText = text;
1594
- report = extractJSON(text);
1595
- providerMeta = {
1596
- provider_msg_id: data.id || null,
1597
- input_tokens: data.usage?.input_tokens || null,
1598
- output_tokens: data.usage?.output_tokens || null,
1599
- reported_model: data.model || null,
1600
- };
1601
- } else {
1602
- // OpenAI, OpenRouter, Ollama, or Custom (all use OpenAI-compatible chat completions API)
1603
- let apiUrl, modelName, authHeaders;
1604
- switch (resolvedProvider.id) {
1605
- case 'openrouter':
1606
- apiUrl = 'https://openrouter.ai/api/v1/chat/completions';
1607
- modelName = modelOverride || process.env.OPENROUTER_MODEL || 'anthropic/claude-sonnet-4';
1608
- authHeaders = { 'Authorization': `Bearer ${resolvedProvider.key}`, 'HTTP-Referer': 'https://agentaudit.dev', 'X-Title': 'AgentAudit' };
1609
- break;
1610
- case 'ollama':
1611
- apiUrl = `${resolvedProvider.host}/v1/chat/completions`;
1612
- modelName = modelOverride || resolvedProvider.model;
1613
- authHeaders = {};
1614
- break;
1615
- case 'custom':
1616
- apiUrl = resolvedProvider.url.endsWith('/chat/completions') ? resolvedProvider.url : `${resolvedProvider.url.replace(/\/$/, '')}/chat/completions`;
1617
- modelName = modelOverride || resolvedProvider.model;
1618
- authHeaders = resolvedProvider.key ? { 'Authorization': `Bearer ${resolvedProvider.key}` } : {};
1619
- break;
1620
- default: // openai
1621
- apiUrl = 'https://api.openai.com/v1/chat/completions';
1622
- modelName = modelOverride || 'gpt-4o';
1623
- authHeaders = { 'Authorization': `Bearer ${resolvedProvider.key}` };
1713
+
1714
+ if (!result.report) {
1715
+ console.log(` ${c.yellow}no findings (empty/unparseable)${c.reset}`);
1716
+ _lastLlmText = result.rawText || '';
1717
+ continue;
1624
1718
  }
1625
-
1626
- const res = await fetch(apiUrl, {
1627
- method: 'POST',
1628
- headers: { 'Content-Type': 'application/json', ...authHeaders },
1629
- body: JSON.stringify({
1630
- model: modelName,
1631
- max_tokens: 8192,
1632
- messages: [
1633
- { role: 'system', content: systemPrompt },
1634
- { role: 'user', content: userMessage },
1635
- ],
1636
- }),
1637
- signal: AbortSignal.timeout(resolvedProvider.id === 'ollama' ? 300_000 : 120_000), // Ollama: 5min (local can be slow)
1638
- });
1639
- const data = await res.json();
1640
- if (data.error) {
1641
- console.log(` ${c.red}failed${c.reset}`);
1642
- console.log(` ${c.red}API error: ${data.error.message || JSON.stringify(data.error)}${c.reset}`);
1643
- try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
1644
- return null;
1719
+
1720
+ const passElapsed = ((Date.now() - passStart) / 1000).toFixed(1);
1721
+ const passFindings = result.report.findings?.length || 0;
1722
+ console.log(` ${c.green}done${c.reset} ${c.dim}(${passElapsed}s, ${passFindings} findings)${c.reset}`);
1723
+
1724
+ if (!baseReport) baseReport = result.report;
1725
+ if (result.report.findings) allFindings.push(...result.report.findings);
1726
+ lastMeta = result.meta;
1727
+ totalInput += result.meta.input_tokens || 0;
1728
+ totalOutput += result.meta.output_tokens || 0;
1729
+ }
1730
+
1731
+ if (!baseReport) {
1732
+ console.log(` ${c.red}✖ All passes failed to produce a report${c.reset}`);
1733
+ try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
1734
+ return null;
1735
+ }
1736
+
1737
+ // Merge: deduplicate findings by title+file, recalculate risk score
1738
+ const seen = new Set();
1739
+ const mergedFindings = [];
1740
+ for (const f of allFindings) {
1741
+ const key = `${f.title}::${f.file || ''}`;
1742
+ if (!seen.has(key)) {
1743
+ seen.add(key);
1744
+ mergedFindings.push(f);
1645
1745
  }
1646
- const text = data.choices?.[0]?.message?.content || '';
1647
- _lastLlmText = text;
1648
- report = extractJSON(text);
1649
- providerMeta = {
1650
- provider_msg_id: data.id || null,
1651
- provider_fingerprint: data.system_fingerprint || null,
1652
- input_tokens: data.usage?.prompt_tokens || null,
1653
- output_tokens: data.usage?.completion_tokens || null,
1654
- reported_model: data.model || null,
1655
- };
1656
1746
  }
1657
1747
 
1748
+ // Recalculate severity-based risk
1749
+ const sevWeights = { critical: 25, high: 15, medium: 5, low: 1 };
1750
+ const mergedRisk = Math.min(100, mergedFindings.reduce((s, f) => s + (sevWeights[f.severity] || 0), 0));
1751
+ const maxSev = mergedFindings.length === 0 ? 'none' :
1752
+ mergedFindings.some(f => f.severity === 'critical') ? 'critical' :
1753
+ mergedFindings.some(f => f.severity === 'high') ? 'high' :
1754
+ mergedFindings.some(f => f.severity === 'medium') ? 'medium' : 'low';
1755
+
1756
+ report = {
1757
+ ...baseReport,
1758
+ findings: mergedFindings,
1759
+ findings_count: mergedFindings.length,
1760
+ risk_score: mergedRisk,
1761
+ result: mergedRisk === 0 ? 'safe' : mergedRisk <= 20 ? 'caution' : 'unsafe',
1762
+ max_severity: maxSev,
1763
+ };
1764
+ providerMeta = { ...lastMeta, input_tokens: totalInput || null, output_tokens: totalOutput || null };
1765
+
1766
+ console.log(` ${c.dim} Merged: ${mergedFindings.length} unique findings from ${chunks.length} passes${c.reset}`);
1767
+ console.log(` ${c.green}done${c.reset} ${c.dim}(${elapsed(start)})${c.reset}`);
1768
+ } else {
1769
+ // Single-pass (original flow)
1770
+ process.stdout.write(` ${c.dim}[4/4]${c.reset} Running LLM analysis ${c.dim}(${resolvedProvider.id}: ${actualModel})${c.reset}...`);
1771
+ const result = await callLLM(codeBlock);
1772
+
1773
+ if (result.error) {
1774
+ console.log(` ${c.red}failed${c.reset}`);
1775
+ const errMsg = result.error;
1776
+ console.log(` ${c.red}API error: ${errMsg}${c.reset}`);
1777
+ if (/context.length|maximum.*tokens|too.many.tokens/i.test(errMsg)) {
1778
+ console.log(` ${c.dim}This model's context window is too small for this repository.${c.reset}`);
1779
+ console.log(` ${c.dim}Try a model with a larger context: --model anthropic/claude-sonnet-4 (200k) or --model openai/gpt-4o (128k)${c.reset}`);
1780
+ }
1781
+ try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
1782
+ return null;
1783
+ }
1784
+
1785
+ report = result.report;
1786
+ providerMeta = result.meta;
1787
+ _lastLlmText = result.rawText || '';
1658
1788
  console.log(` ${c.green}done${c.reset} ${c.dim}(${elapsed(start)})${c.reset}`);
1659
- } catch (err) {
1660
- console.log(` ${c.red}failed${c.reset}`);
1661
- console.log(` ${c.red}${err.message}${c.reset}`);
1662
- try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
1663
- return null;
1664
1789
  }
1665
1790
 
1666
1791
  // Cleanup repo
1667
1792
  try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
1668
1793
 
1669
1794
  if (!report) {
1670
- console.log(` ${c.red}Could not parse LLM response as JSON${c.reset}`);
1671
- console.log(` ${c.dim}Hint: run with --debug to see the raw LLM response${c.reset}`);
1672
- if (process.argv.includes('--debug')) {
1795
+ const rawLen = typeof _lastLlmText === 'string' ? _lastLlmText.length : 0;
1796
+ if (rawLen === 0) {
1797
+ console.log(` ${c.red}✖ Model returned an empty response${c.reset}`);
1798
+ console.log(` ${c.dim}This model may not support structured JSON output or the prompt was too large.${c.reset}`);
1799
+ console.log(` ${c.dim}Try a different model: --model anthropic/claude-sonnet-4 or --model openai/gpt-4o${c.reset}`);
1800
+ } else {
1801
+ console.log(` ${c.red}✖ Could not parse LLM response as JSON${c.reset}`);
1802
+ console.log(` ${c.dim}The model returned ${rawLen} chars but not valid JSON. Try a stronger model.${c.reset}`);
1803
+ if (!process.argv.includes('--debug')) {
1804
+ console.log(` ${c.dim}Hint: run with --debug to see the raw LLM response${c.reset}`);
1805
+ }
1806
+ }
1807
+ if (process.argv.includes('--debug') && rawLen > 0) {
1673
1808
  console.log(` ${c.dim}--- Raw LLM response (first 2000 chars) ---${c.reset}`);
1674
- console.log((typeof _lastLlmText === 'string' ? _lastLlmText : '(empty)').slice(0, 2000));
1809
+ console.log(_lastLlmText.slice(0, 2000));
1675
1810
  console.log(` ${c.dim}--- end ---${c.reset}`);
1676
1811
  }
1677
1812
  return null;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agentaudit",
3
- "version": "3.9.34",
3
+ "version": "3.9.36",
4
4
  "description": "Security scanner for AI packages — MCP server + CLI",
5
5
  "type": "module",
6
6
  "bin": {