agentaudit 3.9.42 → 3.9.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli.mjs +142 -63
- package/package.json +1 -1
package/cli.mjs
CHANGED
|
@@ -48,8 +48,8 @@ function resolveProvider(flagOverride, keys) {
|
|
|
48
48
|
if (preferred) {
|
|
49
49
|
const resolved = aliases[preferred] || preferred;
|
|
50
50
|
const p = providers[resolved];
|
|
51
|
-
if (
|
|
52
|
-
|
|
51
|
+
if (p) return p;
|
|
52
|
+
// Preferred provider not available (no API key) — fall through to inference
|
|
53
53
|
}
|
|
54
54
|
|
|
55
55
|
// Smart inference: if model is set, try to match it to a provider
|
|
@@ -565,8 +565,8 @@ function collectFiles(dir, basePath = '', collected = [], totalSize = { bytes: 0
|
|
|
565
565
|
catch { return collected; }
|
|
566
566
|
entries.sort((a, b) => a.name.localeCompare(b.name));
|
|
567
567
|
for (const entry of entries) {
|
|
568
|
-
if (totalSize.bytes >= MAX_TOTAL_SIZE) { totalSize.truncated = true; totalSize.skippedPaths.push(relPath); continue; }
|
|
569
568
|
const relPath = basePath ? `${basePath}/${entry.name}` : entry.name;
|
|
569
|
+
if (totalSize.bytes >= MAX_TOTAL_SIZE) { totalSize.truncated = true; totalSize.skippedPaths.push(relPath); continue; }
|
|
570
570
|
const fullPath = path.join(dir, entry.name);
|
|
571
571
|
// SECURITY: Never follow symlinks — attacker could link to /etc/passwd or ~/.ssh/
|
|
572
572
|
if (entry.isSymbolicLink()) continue;
|
|
@@ -1471,15 +1471,65 @@ async function auditRepo(url) {
|
|
|
1471
1471
|
if (_collectMeta.skippedPaths.length > 5) console.log(` ${c.dim} ... and ${_collectMeta.skippedPaths.length - 5} more${c.reset}`);
|
|
1472
1472
|
}
|
|
1473
1473
|
|
|
1474
|
-
// Step 3:
|
|
1474
|
+
// Step 3: Resolve provider + model FIRST (needed for dynamic chunk sizing)
|
|
1475
|
+
const anthropicKey = process.env.ANTHROPIC_API_KEY;
|
|
1476
|
+
const openaiKey = process.env.OPENAI_API_KEY;
|
|
1477
|
+
const openrouterKey = process.env.OPENROUTER_API_KEY;
|
|
1478
|
+
const openrouterModel = process.env.OPENROUTER_MODEL || 'anthropic/claude-sonnet-4';
|
|
1479
|
+
const providerFlag = process.argv.find(a => a.startsWith('--provider='))?.split('=')[1]?.toLowerCase()
|
|
1480
|
+
|| (process.argv.includes('--provider') ? process.argv[process.argv.indexOf('--provider') + 1]?.toLowerCase() : null);
|
|
1481
|
+
const resolvedProvider = resolveProvider(providerFlag, { anthropicKey, openaiKey, openrouterKey });
|
|
1482
|
+
// Determine actual model name
|
|
1483
|
+
let actualModel;
|
|
1484
|
+
if (!resolvedProvider) {
|
|
1485
|
+
actualModel = 'unknown';
|
|
1486
|
+
} else if (resolvedProvider.id === 'anthropic') {
|
|
1487
|
+
actualModel = modelOverride || 'claude-sonnet-4-20250514';
|
|
1488
|
+
} else if (resolvedProvider.id === 'openrouter') {
|
|
1489
|
+
actualModel = modelOverride || process.env.OPENROUTER_MODEL || 'anthropic/claude-sonnet-4';
|
|
1490
|
+
} else if (resolvedProvider.id === 'openai') {
|
|
1491
|
+
actualModel = modelOverride || 'gpt-4o';
|
|
1492
|
+
} else if (resolvedProvider.id === 'ollama') {
|
|
1493
|
+
actualModel = modelOverride || resolvedProvider.model;
|
|
1494
|
+
} else {
|
|
1495
|
+
actualModel = modelOverride || resolvedProvider.model || 'unknown';
|
|
1496
|
+
}
|
|
1497
|
+
|
|
1498
|
+
// Step 3b: Determine model context for dynamic chunk sizing
|
|
1499
|
+
let modelContextTokens = 64_000; // conservative default
|
|
1500
|
+
let outputTokenBudget = 4096;
|
|
1501
|
+
|
|
1502
|
+
if (resolvedProvider) {
|
|
1503
|
+
if (resolvedProvider.id === 'openrouter') {
|
|
1504
|
+
try {
|
|
1505
|
+
const modelInfoRes = await fetch(`https://openrouter.ai/api/v1/models`, {
|
|
1506
|
+
signal: AbortSignal.timeout(5000),
|
|
1507
|
+
headers: { 'HTTP-Referer': 'https://agentaudit.dev' },
|
|
1508
|
+
});
|
|
1509
|
+
if (modelInfoRes.ok) {
|
|
1510
|
+
const modelData = await modelInfoRes.json();
|
|
1511
|
+
const modelInfo = modelData.data?.find(m => m.id === actualModel);
|
|
1512
|
+
if (modelInfo?.context_length) {
|
|
1513
|
+
modelContextTokens = modelInfo.context_length;
|
|
1514
|
+
}
|
|
1515
|
+
}
|
|
1516
|
+
} catch { /* ignore — use default */ }
|
|
1517
|
+
} else if (resolvedProvider.id === 'anthropic') {
|
|
1518
|
+
modelContextTokens = 200_000;
|
|
1519
|
+
} else if (resolvedProvider.id === 'openai') {
|
|
1520
|
+
modelContextTokens = 128_000;
|
|
1521
|
+
} else if (resolvedProvider.id === 'ollama') {
|
|
1522
|
+
modelContextTokens = 32_000;
|
|
1523
|
+
}
|
|
1524
|
+
}
|
|
1525
|
+
|
|
1526
|
+
outputTokenBudget = modelContextTokens >= 128_000 ? 8192 : modelContextTokens >= 64_000 ? 4096 : modelContextTokens >= 32_000 ? 2048 : 2048;
|
|
1527
|
+
const dynamicChunkChars = Math.floor(modelContextTokens * 0.5 * 4);
|
|
1528
|
+
const MAX_CHUNK_CHARS = Math.max(40_000, Math.min(dynamicChunkChars, 600_000));
|
|
1529
|
+
|
|
1530
|
+
// Step 3c: Build audit payload
|
|
1475
1531
|
process.stdout.write(` ${c.dim}[3/4]${c.reset} Preparing audit payload...`);
|
|
1476
1532
|
const auditPrompt = loadAuditPrompt();
|
|
1477
|
-
|
|
1478
|
-
// Build code chunks for multi-pass analysis.
|
|
1479
|
-
// Budget ~45k tokens (~180k chars) per chunk for code, leaving room for prompt + output.
|
|
1480
|
-
// ~15k tokens per chunk for code → fits comfortably in 32k+ context models
|
|
1481
|
-
// with room for system prompt (~2k tokens) + output (4k tokens)
|
|
1482
|
-
const MAX_CHUNK_CHARS = 60_000;
|
|
1483
1533
|
// Sort files by directory to keep related files in the same chunk.
|
|
1484
1534
|
// This preserves cross-file context (imports, shared modules) within each pass.
|
|
1485
1535
|
const sortedFiles = [...files].sort((a, b) => {
|
|
@@ -1524,17 +1574,6 @@ async function auditRepo(url) {
|
|
|
1524
1574
|
const codeBlock = chunks[0] || '';
|
|
1525
1575
|
|
|
1526
1576
|
// Step 4: LLM Analysis
|
|
1527
|
-
// Check for API keys to determine which LLM to use
|
|
1528
|
-
const anthropicKey = process.env.ANTHROPIC_API_KEY;
|
|
1529
|
-
const openaiKey = process.env.OPENAI_API_KEY;
|
|
1530
|
-
const openrouterKey = process.env.OPENROUTER_API_KEY;
|
|
1531
|
-
const openrouterModel = process.env.OPENROUTER_MODEL || 'anthropic/claude-sonnet-4';
|
|
1532
|
-
|
|
1533
|
-
// --provider flag overrides auto-detection
|
|
1534
|
-
const providerFlag = process.argv.find(a => a.startsWith('--provider='))?.split('=')[1]?.toLowerCase()
|
|
1535
|
-
|| (process.argv.includes('--provider') ? process.argv[process.argv.indexOf('--provider') + 1]?.toLowerCase() : null);
|
|
1536
|
-
|
|
1537
|
-
const resolvedProvider = resolveProvider(providerFlag, { anthropicKey, openaiKey, openrouterKey });
|
|
1538
1577
|
const activeProvider = resolvedProvider?.label || null;
|
|
1539
1578
|
|
|
1540
1579
|
if (!resolvedProvider) {
|
|
@@ -1603,49 +1642,9 @@ async function auditRepo(url) {
|
|
|
1603
1642
|
return null;
|
|
1604
1643
|
}
|
|
1605
1644
|
|
|
1606
|
-
//
|
|
1607
|
-
let actualModel;
|
|
1608
|
-
if (resolvedProvider.id === 'anthropic') {
|
|
1609
|
-
actualModel = modelOverride || 'claude-sonnet-4-20250514';
|
|
1610
|
-
} else if (resolvedProvider.id === 'openrouter') {
|
|
1611
|
-
actualModel = modelOverride || process.env.OPENROUTER_MODEL || 'anthropic/claude-sonnet-4';
|
|
1612
|
-
} else if (resolvedProvider.id === 'openai') {
|
|
1613
|
-
actualModel = modelOverride || 'gpt-4o';
|
|
1614
|
-
} else if (resolvedProvider.id === 'ollama') {
|
|
1615
|
-
actualModel = modelOverride || resolvedProvider.model;
|
|
1616
|
-
} else {
|
|
1617
|
-
actualModel = modelOverride || resolvedProvider.model || 'unknown';
|
|
1618
|
-
}
|
|
1645
|
+
// actualModel already resolved in Step 3
|
|
1619
1646
|
|
|
1620
1647
|
// ── LLM call helper (reused for multi-pass) ──
|
|
1621
|
-
// Determine optimal max_tokens based on model context size
|
|
1622
|
-
// For large-context models (128k+) we can afford 8192 output tokens
|
|
1623
|
-
// For medium (32k-128k) use 4096, for small (<32k) use 2048
|
|
1624
|
-
let outputTokenBudget = 4096; // safe default
|
|
1625
|
-
if (resolvedProvider.id === 'openrouter') {
|
|
1626
|
-
try {
|
|
1627
|
-
const modelInfoRes = await fetch(`https://openrouter.ai/api/v1/models`, {
|
|
1628
|
-
signal: AbortSignal.timeout(5000),
|
|
1629
|
-
headers: { 'HTTP-Referer': 'https://agentaudit.dev' },
|
|
1630
|
-
});
|
|
1631
|
-
if (modelInfoRes.ok) {
|
|
1632
|
-
const modelData = await modelInfoRes.json();
|
|
1633
|
-
const modelInfo = modelData.data?.find(m => m.id === actualModel);
|
|
1634
|
-
if (modelInfo?.context_length) {
|
|
1635
|
-
const ctx = modelInfo.context_length;
|
|
1636
|
-
outputTokenBudget = ctx >= 128_000 ? 8192 : ctx >= 64_000 ? 4096 : ctx >= 32_000 ? 2048 : 2048;
|
|
1637
|
-
if (process.argv.includes('--debug')) {
|
|
1638
|
-
console.log(` ${c.dim} Model context: ${ctx.toLocaleString()} tokens → max_tokens: ${outputTokenBudget}${c.reset}`);
|
|
1639
|
-
}
|
|
1640
|
-
}
|
|
1641
|
-
}
|
|
1642
|
-
} catch { /* ignore — use default */ }
|
|
1643
|
-
} else if (resolvedProvider.id === 'anthropic') {
|
|
1644
|
-
outputTokenBudget = 8192; // Claude models have 200k context
|
|
1645
|
-
} else if (resolvedProvider.id === 'openai') {
|
|
1646
|
-
outputTokenBudget = 8192; // GPT-4o has 128k context
|
|
1647
|
-
}
|
|
1648
|
-
|
|
1649
1648
|
async function callLLM(codeContent, passLabel) {
|
|
1650
1649
|
const systemPrompt = auditPrompt || 'You are a security auditor. Analyze the code and report findings as JSON.';
|
|
1651
1650
|
const userMessage = [
|
|
@@ -1834,6 +1833,79 @@ async function auditRepo(url) {
|
|
|
1834
1833
|
providerMeta = { ...lastMeta, input_tokens: totalInput || null, output_tokens: totalOutput || null };
|
|
1835
1834
|
|
|
1836
1835
|
console.log(` ${c.dim} Merged: ${mergedFindings.length} unique findings from ${chunks.length} passes${c.reset}`);
|
|
1836
|
+
|
|
1837
|
+
// ── Cross-file correlation pass ──
|
|
1838
|
+
// Build lightweight import/export map and ask LLM to check for multi-file attack patterns
|
|
1839
|
+
// that individual chunk passes couldn't detect (e.g., credential read in file A + exfil in file B)
|
|
1840
|
+
process.stdout.write(` ${c.dim} Cross-file correlation...${c.reset}`);
|
|
1841
|
+
try {
|
|
1842
|
+
const importMap = sortedFiles.map(f => {
|
|
1843
|
+
const imports = [];
|
|
1844
|
+
const exports = [];
|
|
1845
|
+
// JS/TS imports
|
|
1846
|
+
for (const m of f.content.matchAll(/(?:import|require)\s*\(?['"]([^'"]+)['"]\)?/g)) imports.push(m[1]);
|
|
1847
|
+
for (const m of f.content.matchAll(/(?:from)\s+['"]([^'"]+)['"]/g)) imports.push(m[1]);
|
|
1848
|
+
// Python imports
|
|
1849
|
+
for (const m of f.content.matchAll(/(?:from|import)\s+([\w.]+)/g)) imports.push(m[1]);
|
|
1850
|
+
// Exports
|
|
1851
|
+
for (const m of f.content.matchAll(/(?:module\.exports|export\s+(?:default\s+)?(?:function|class|const|let|var)\s+)(\w+)/g)) exports.push(m[1]);
|
|
1852
|
+
// Dangerous function calls (brief)
|
|
1853
|
+
const dangerousCalls = [];
|
|
1854
|
+
if (/\b(?:exec|spawn|execSync|system|eval|Function)\s*\(/.test(f.content)) dangerousCalls.push('exec/eval');
|
|
1855
|
+
if (/\b(?:fetch|https?\.request|axios|got)\s*\(/.test(f.content)) dangerousCalls.push('network');
|
|
1856
|
+
if (/\b(?:readFile|writeFile|createReadStream|open)\s*\(/.test(f.content)) dangerousCalls.push('fs');
|
|
1857
|
+
if (/process\.env|os\.environ|getenv/.test(f.content)) dangerousCalls.push('env-read');
|
|
1858
|
+
return { path: f.path, imports: [...new Set(imports)].slice(0, 10), exports: exports.slice(0, 10), calls: dangerousCalls };
|
|
1859
|
+
}).filter(f => f.imports.length > 0 || f.exports.length > 0 || f.calls.length > 0);
|
|
1860
|
+
|
|
1861
|
+
if (importMap.length > 2) {
|
|
1862
|
+
const correlationPrompt = [
|
|
1863
|
+
`You previously analyzed ${chunks.length} code chunks from package "${slug}" (${url}).`,
|
|
1864
|
+
`Here is a cross-file map showing imports, exports, and dangerous function calls.`,
|
|
1865
|
+
`Check for MULTI-FILE ATTACK PATTERNS that individual chunk analysis could miss:`,
|
|
1866
|
+
`- File A reads credentials/env → File B sends them to network (credential exfiltration pipeline)`,
|
|
1867
|
+
`- File A defines a function with exec/eval → File B calls it with user input (indirect RCE)`,
|
|
1868
|
+
`- Config file grants broad permissions → Code file exploits them`,
|
|
1869
|
+
`- Install hook in scripts/ triggers code in src/ that exfiltrates data`,
|
|
1870
|
+
``,
|
|
1871
|
+
`Respond with ONLY a JSON object: { "cross_file_findings": [...] } where each finding has:`,
|
|
1872
|
+
`{ "title": "...", "severity": "...", "description": "...", "file": "...", "confidence": "...", "pattern_id": "CORR_001", "remediation": "..." }`,
|
|
1873
|
+
`If no cross-file issues found, respond: { "cross_file_findings": [] }`,
|
|
1874
|
+
``,
|
|
1875
|
+
`## File Map`,
|
|
1876
|
+
JSON.stringify(importMap, null, 2),
|
|
1877
|
+
].join('\n');
|
|
1878
|
+
|
|
1879
|
+
const corrResult = await callLLM(correlationPrompt, 'correlation');
|
|
1880
|
+
if (corrResult.report?.cross_file_findings?.length > 0) {
|
|
1881
|
+
const corrFindings = corrResult.report.cross_file_findings;
|
|
1882
|
+
for (const f of corrFindings) {
|
|
1883
|
+
const key = `${f.title}::${f.file || ''}`;
|
|
1884
|
+
if (!seen.has(key)) {
|
|
1885
|
+
seen.add(key);
|
|
1886
|
+
mergedFindings.push(f);
|
|
1887
|
+
}
|
|
1888
|
+
}
|
|
1889
|
+
console.log(` ${c.yellow}${corrFindings.length} cross-file issues found${c.reset}`);
|
|
1890
|
+
// Re-merge into report
|
|
1891
|
+
const newRisk = Math.min(100, mergedFindings.reduce((s, f) => s + (sevWeights[f.severity] || 0), 0));
|
|
1892
|
+
report.findings = mergedFindings;
|
|
1893
|
+
report.findings_count = mergedFindings.length;
|
|
1894
|
+
report.risk_score = newRisk;
|
|
1895
|
+
report.result = newRisk === 0 ? 'safe' : newRisk <= 20 ? 'caution' : 'unsafe';
|
|
1896
|
+
totalInput += corrResult.meta?.input_tokens || 0;
|
|
1897
|
+
totalOutput += corrResult.meta?.output_tokens || 0;
|
|
1898
|
+
providerMeta = { ...providerMeta, input_tokens: totalInput || null, output_tokens: totalOutput || null };
|
|
1899
|
+
} else {
|
|
1900
|
+
console.log(` ${c.green}clean${c.reset}`);
|
|
1901
|
+
}
|
|
1902
|
+
} else {
|
|
1903
|
+
console.log(` ${c.dim}skipped (too few files with imports)${c.reset}`);
|
|
1904
|
+
}
|
|
1905
|
+
} catch (corrErr) {
|
|
1906
|
+
console.log(` ${c.dim}skipped (${corrErr.message?.slice(0, 40)})${c.reset}`);
|
|
1907
|
+
}
|
|
1908
|
+
|
|
1837
1909
|
console.log(` ${c.green}done${c.reset} ${c.dim}(${elapsed(start)})${c.reset}`);
|
|
1838
1910
|
} else {
|
|
1839
1911
|
// Single-pass (original flow)
|
|
@@ -1884,7 +1956,14 @@ async function auditRepo(url) {
|
|
|
1884
1956
|
|
|
1885
1957
|
// Display results
|
|
1886
1958
|
console.log();
|
|
1887
|
-
|
|
1959
|
+
// Always recalculate risk_score from findings severities (never trust LLM's score)
|
|
1960
|
+
const _sevW = { critical: 25, high: 15, medium: 5, low: 1 };
|
|
1961
|
+
const recalcRisk = report.findings && report.findings.length > 0
|
|
1962
|
+
? Math.min(100, report.findings.reduce((s, f) => s + (_sevW[f.severity] || 0), 0))
|
|
1963
|
+
: 0;
|
|
1964
|
+
report.risk_score = recalcRisk;
|
|
1965
|
+
report.result = recalcRisk === 0 ? 'safe' : recalcRisk <= 20 ? 'caution' : 'unsafe';
|
|
1966
|
+
const riskScore = recalcRisk;
|
|
1888
1967
|
const trustScore = 100 - riskScore;
|
|
1889
1968
|
const trustColor = trustScore >= 70 ? c.green : trustScore >= 40 ? c.yellow : c.red;
|
|
1890
1969
|
const trustLabel = trustScore >= 70 ? 'SAFE' : trustScore >= 40 ? 'CAUTION' : 'UNSAFE';
|