agentaudit 3.12.2 → 3.12.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cli.mjs CHANGED
@@ -1053,7 +1053,24 @@ function detectPackageInfo(repoPath, files) {
1053
1053
  for (const ef of entryFiles) {
1054
1054
  if (files.some(f => f.path === ef)) { info.entrypoint = ef; break; }
1055
1055
  }
1056
-
1056
+
1057
+ // Extract package version from manifest files
1058
+ info.version = null;
1059
+ const versionSources = [
1060
+ { file: 'package.json', extract: c => { try { return JSON.parse(c).version; } catch { return null; } } },
1061
+ { file: 'pyproject.toml', extract: c => { const m = c.match(/^\s*version\s*=\s*["']([^"']+)["']/m); return m?.[1] || null; } },
1062
+ { file: 'setup.py', extract: c => { const m = c.match(/version\s*=\s*["']([^"']+)["']/); return m?.[1] || null; } },
1063
+ { file: 'setup.cfg', extract: c => { const m = c.match(/^\s*version\s*=\s*(.+)$/m); return m?.[1]?.trim() || null; } },
1064
+ { file: 'Cargo.toml', extract: c => { const m = c.match(/^\s*version\s*=\s*["']([^"']+)["']/m); return m?.[1] || null; } },
1065
+ ];
1066
+ for (const vs of versionSources) {
1067
+ const f = files.find(f => f.path === vs.file || f.path.endsWith('/' + vs.file));
1068
+ if (f) {
1069
+ const v = vs.extract(f.content);
1070
+ if (v) { info.version = v; break; }
1071
+ }
1072
+ }
1073
+
1057
1074
  return info;
1058
1075
  }
1059
1076
 
@@ -2655,6 +2672,134 @@ async function callLlm(llmConfig, systemPrompt, userMessage) {
2655
2672
  }
2656
2673
  }
2657
2674
 
2675
+ // ── Deterministic post-processing for LLM reports ────────────────────────
2676
+ // Fills in missing fields that LLMs often omit, using deterministic lookups
2677
+
2678
+ const PATTERN_CWE_MAP = {
2679
+ CMD_INJECT: 'CWE-78', CRED_THEFT: 'CWE-522', DATA_EXFIL: 'CWE-200',
2680
+ DESTRUCT: 'CWE-912', OBF: 'CWE-506', SANDBOX_ESC: 'CWE-693',
2681
+ SUPPLY_CHAIN: 'CWE-1357', SOCIAL_ENG: 'CWE-451', PRIV_ESC: 'CWE-269',
2682
+ INFO_LEAK: 'CWE-200', CRYPTO_WEAK: 'CWE-327', DESER: 'CWE-502',
2683
+ PATH_TRAV: 'CWE-22', SEC_BYPASS: 'CWE-693', PERSIST: 'CWE-912',
2684
+ AI_PROMPT: 'CWE-1426', MCP_POISON: 'CWE-1426', MCP_INJECT: 'CWE-94',
2685
+ MCP_TRAVERSAL: 'CWE-22', MCP_SUPPLY: 'CWE-1357', MCP_PERM: 'CWE-269',
2686
+ WORM: 'CWE-912', CICD: 'CWE-912', CORR: 'CWE-829', MANUAL: 'CWE-693',
2687
+ };
2688
+
2689
+ const SEVERITY_IMPACT = { critical: -25, high: -15, medium: -5, low: -1 };
2690
+
2691
+ const REMEDIATION_TEMPLATES = {
2692
+ CMD_INJECT: 'Validate and sanitize input; use allowlists or parameterized execution instead of shell strings',
2693
+ CRED_THEFT: 'Remove hardcoded credentials; use environment variables or a secrets manager',
2694
+ DATA_EXFIL: 'Remove or document the external data transmission; ensure user consent',
2695
+ DESTRUCT: 'Add confirmation prompts and safeguards before destructive operations',
2696
+ OBF: 'Replace obfuscated code with readable equivalents; document the purpose',
2697
+ SANDBOX_ESC: 'Restrict file and process access to configured boundaries',
2698
+ SUPPLY_CHAIN: 'Pin dependency versions; verify package integrity',
2699
+ SOCIAL_ENG: 'Align documentation with actual code behavior',
2700
+ PRIV_ESC: 'Apply principle of least privilege; remove unnecessary elevated permissions',
2701
+ INFO_LEAK: 'Restrict exposed information to what is necessary for operation',
2702
+ CRYPTO_WEAK: 'Use modern cryptographic algorithms (AES-256, SHA-256+)',
2703
+ DESER: 'Use safe deserialization (e.g. yaml.safe_load, JSON) instead of unsafe loaders',
2704
+ PATH_TRAV: 'Sanitize file paths; reject inputs containing .. or absolute paths',
2705
+ SEC_BYPASS: 'Do not disable security controls; use proper certificate validation',
2706
+ PERSIST: 'Remove persistence mechanisms or require explicit user opt-in',
2707
+ AI_PROMPT: 'Remove hidden instructions; ensure tool descriptions are transparent',
2708
+ MCP_POISON: 'Remove injected instructions from tool descriptions and schemas',
2709
+ MCP_INJECT: 'Sanitize tool arguments and descriptions; prevent prompt injection',
2710
+ MCP_TRAVERSAL: 'Validate and sandbox file paths in MCP tool handlers',
2711
+ MCP_SUPPLY: 'Pin MCP package versions; verify transport configurations',
2712
+ MCP_PERM: 'Restrict permissions to minimum required scope; remove wildcard grants',
2713
+ };
2714
+
2715
+ function enrichFindings(report, files, pkgInfo) {
2716
+ if (!report || !report.findings) return report;
2717
+
2718
+ // Ensure package_version
2719
+ if (!report.package_version || report.package_version === 'unknown') {
2720
+ report.package_version = pkgInfo.version || 'unknown';
2721
+ }
2722
+
2723
+ // Ensure max_severity
2724
+ const severities = ['critical', 'high', 'medium', 'low'];
2725
+ let maxSev = 'none';
2726
+ for (const f of report.findings) {
2727
+ const idx = severities.indexOf((f.severity || '').toLowerCase());
2728
+ if (idx !== -1 && idx < severities.indexOf(maxSev === 'none' ? 'low' : maxSev)) {
2729
+ maxSev = severities[idx];
2730
+ }
2731
+ }
2732
+ // Only override if not set or wrong
2733
+ if (!report.max_severity || report.max_severity === 'none') {
2734
+ report.max_severity = report.findings.length > 0 ? maxSev : 'none';
2735
+ }
2736
+
2737
+ for (const finding of report.findings) {
2738
+ // 1. Fill cwe_id from pattern_id lookup
2739
+ if (!finding.cwe_id || finding.cwe_id === '') {
2740
+ const prefix = (finding.pattern_id || '').replace(/_\d+$/, '');
2741
+ finding.cwe_id = PATTERN_CWE_MAP[prefix] || 'CWE-693';
2742
+ }
2743
+
2744
+ // 2. Fill content (code snippet) from files array
2745
+ if ((!finding.content || finding.content === '' || finding.content === '...') && finding.file && finding.line) {
2746
+ const matchFile = files.find(f => f.path === finding.file || f.path.endsWith('/' + finding.file));
2747
+ if (matchFile) {
2748
+ const lines = matchFile.content.split('\n');
2749
+ const lineIdx = finding.line - 1;
2750
+ if (lineIdx >= 0 && lineIdx < lines.length) {
2751
+ // Extract 1-3 lines around the target
2752
+ const start = Math.max(0, lineIdx - 1);
2753
+ const end = Math.min(lines.length, lineIdx + 2);
2754
+ finding.content = lines.slice(start, end).map(l => l.trimEnd()).join('\n').trim();
2755
+ }
2756
+ }
2757
+ }
2758
+
2759
+ // 3. Fill remediation from template
2760
+ if (!finding.remediation || finding.remediation === '' || finding.remediation === '...') {
2761
+ const prefix = (finding.pattern_id || '').replace(/_\d+$/, '');
2762
+ finding.remediation = REMEDIATION_TEMPLATES[prefix] || 'Review and address the identified security concern';
2763
+ }
2764
+
2765
+ // 4. Ensure score_impact is set correctly
2766
+ if (finding.score_impact === undefined || finding.score_impact === null) {
2767
+ if (finding.by_design) {
2768
+ finding.score_impact = 0;
2769
+ } else {
2770
+ finding.score_impact = SEVERITY_IMPACT[(finding.severity || '').toLowerCase()] || -5;
2771
+ }
2772
+ }
2773
+
2774
+ // 5. Ensure confidence has valid value
2775
+ if (!['high', 'medium', 'low'].includes(finding.confidence)) {
2776
+ finding.confidence = 'medium';
2777
+ }
2778
+
2779
+ // 6. Ensure by_design is boolean
2780
+ if (typeof finding.by_design !== 'boolean') {
2781
+ finding.by_design = false;
2782
+ }
2783
+ }
2784
+
2785
+ // Recalculate risk_score from findings
2786
+ const computedRisk = report.findings.reduce((sum, f) => {
2787
+ if (f.by_design) return sum;
2788
+ return sum + Math.abs(f.score_impact || 0);
2789
+ }, 0);
2790
+ report.risk_score = Math.min(100, computedRisk);
2791
+
2792
+ // Ensure result matches risk_score
2793
+ if (report.risk_score <= 25) report.result = 'safe';
2794
+ else if (report.risk_score <= 50) report.result = 'caution';
2795
+ else report.result = 'unsafe';
2796
+
2797
+ // Ensure findings_count
2798
+ report.findings_count = report.findings.length;
2799
+
2800
+ return report;
2801
+ }
2802
+
2658
2803
  async function auditRepo(url) {
2659
2804
  const start = Date.now();
2660
2805
  const slug = slugFromUrl(url);
@@ -2712,15 +2857,17 @@ async function auditRepo(url) {
2712
2857
 
2713
2858
  // Build prompts
2714
2859
  const systemPrompt = auditPrompt || 'You are a security auditor. Analyze the code and report findings as JSON.';
2860
+ const detectedVersion = pkgInfo.version || 'unknown';
2715
2861
  const userMessage = [
2716
2862
  `Audit this package: **${slug}** (${url})`,
2863
+ `Package version detected: ${detectedVersion}`,
2864
+ ``,
2865
+ `Respond with ONLY a valid JSON object. No markdown fences, no explanation, no text before or after.`,
2866
+ ``,
2867
+ `Required top-level fields: skill_slug, source_url, package_type, package_version, risk_score, max_severity, result, findings_count, findings`,
2868
+ `Required finding fields (ALL mandatory): pattern_id, cwe_id, severity, title, description, file, line, content, remediation, confidence, by_design, score_impact`,
2717
2869
  ``,
2718
- `After analysis, respond with ONLY a valid JSON object. No markdown fences, no explanation, no text before or after. Just the raw JSON:`,
2719
- `{ "skill_slug": "${slug}", "source_url": "${url}", "package_type": "<mcp-server|agent-skill|library|cli-tool|other>",`,
2720
- ` "risk_score": <0-100>, "result": "<safe|caution|unsafe>", "max_severity": "<none|low|medium|high|critical>",`,
2721
- ` "findings_count": <n>, "findings": [{ "pattern_id": "CMD_INJECT_001", "title": "...", "severity": "...", "category": "...",`,
2722
- ` "cwe_id": "CWE-78", "description": "...", "file": "...", "line": <n>, "content": "...", "remediation": "...",`,
2723
- ` "confidence": "high|medium|low", "by_design": false, "score_impact": -15 }] }`,
2870
+ `A finding missing cwe_id, content, or remediation is INVALID do not emit it.`,
2724
2871
  ``,
2725
2872
  `## Source Code`,
2726
2873
  codeBlock,
@@ -2821,6 +2968,7 @@ async function auditRepo(url) {
2821
2968
  const durSec = Math.round((duration || 0) / 1000);
2822
2969
  console.log(` ${c.green}✓${c.reset} ${name.padEnd(30)} ${c.green}done${c.reset} ${c.dim}(${durSec}s)${c.reset}`);
2823
2970
  enrichReport(report, duration);
2971
+ enrichFindings(report, files, pkgInfo);
2824
2972
  saveHistory(report);
2825
2973
  reports.push({ name, report });
2826
2974
  }
@@ -2992,6 +3140,7 @@ async function auditRepo(url) {
2992
3140
  }
2993
3141
 
2994
3142
  enrichReport(report);
3143
+ enrichFindings(report, files, pkgInfo);
2995
3144
  saveHistory(report);
2996
3145
 
2997
3146
  // Display results
package/index.mjs CHANGED
@@ -343,7 +343,7 @@ async function checkRegistry(slug) {
343
343
  // ── MCP Server ───────────────────────────────────────────
344
344
 
345
345
  const server = new Server(
346
- { name: 'agentaudit', version: '3.12.2' },
346
+ { name: 'agentaudit', version: '3.12.3' },
347
347
  { capabilities: { tools: {} } }
348
348
  );
349
349
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agentaudit",
3
- "version": "3.12.2",
3
+ "version": "3.12.3",
4
4
  "description": "Security scanner for AI packages — MCP server + CLI",
5
5
  "type": "module",
6
6
  "bin": {
@@ -4,7 +4,7 @@ You are a security auditor analyzing a software package. Follow the three phases
4
4
 
5
5
  **LANGUAGE REQUIREMENT: Write ALL findings in ENGLISH. This includes `title`, `description`, `remediation` fields in the JSON report.**
6
6
 
7
- **BACKEND ENRICHMENT: The AgentAudit backend automatically extracts version info (package_version, commit_sha, PURL, SWHID) and computes content hashes. Focus on security analysis the backend handles mechanical tasks.**
7
+ **YOU must extract `package_version` from manifest files (package.json, pyproject.toml, setup.py). The backend enriches `commit_sha`, PURL, SWHID, and content hashes but `package_version` must come from YOU.**
8
8
 
9
9
  ---
10
10
 
@@ -452,43 +452,70 @@ For every README, package.json description, tool description, and SKILL.md: comp
452
452
  ## source_url Rules
453
453
  The `source_url` field MUST point to a **source code repository** — never a product website, API endpoint, or marketing page.
454
454
  - **Best:** GitHub/GitLab repository URL
455
- - **OK:** ClaWHub URL (`https://clawhub.ai/skill-slug`)
455
+ - **OK:** AgentAudit package URL (`https://agentaudit.dev/packages/package-slug`)
456
456
  - **OK:** npm/PyPI package URL as last resort
457
457
  - **NEVER:** Company websites, API URLs, app URLs
458
458
 
459
- To find source_url: check `package.json` → `repository.url`, `_meta.json` → `source`/`repository`, `README.md` → GitHub links. If none found, use `https://clawhub.ai/{slug}`.
459
+ To find source_url: check `package.json` → `repository.url`, `_meta.json` → `source`/`repository`, `README.md` → GitHub links. If none found, use `https://agentaudit.dev/packages/{slug}`.
460
460
 
461
461
  ## JSON Report Format
462
462
 
463
+ **EVERY field shown below is REQUIRED. A finding missing ANY field (especially `cwe_id`, `content`, `remediation`) is INVALID — do not emit it.**
464
+
463
465
  ```json
464
466
  {
465
467
  "skill_slug": "package-name",
466
468
  "source_url": "https://github.com/owner/repo",
467
- "risk_score": 8,
469
+ "package_type": "mcp-server",
470
+ "package_version": "1.2.3",
471
+ "risk_score": 23,
472
+ "max_severity": "high",
468
473
  "result": "safe",
469
474
  "findings_count": 2,
470
475
  "findings": [
471
476
  {
472
- "severity": "high",
473
477
  "pattern_id": "CMD_INJECT_001",
474
478
  "cwe_id": "CWE-78",
479
+ "severity": "high",
475
480
  "title": "Unescaped user input passed to exec()",
476
- "description": "User-controlled input from HTTP body is passed directly to exec() without sanitization.",
481
+ "description": "User-controlled input from the 'command' tool argument is passed directly to child_process.exec() without sanitization at runner.js:42. An attacker can inject arbitrary shell commands via the MCP tool call.",
477
482
  "file": "src/runner.js",
478
- "file_hash": "e3b0c442...",
479
483
  "line": 42,
480
484
  "content": "exec(req.body.command)",
485
+ "remediation": "Validate input against an allowlist of permitted commands; use execFile() with explicit argument array instead of exec()",
481
486
  "confidence": "high",
482
- "remediation": "Validate and sanitize input; use allowlist of permitted commands",
483
487
  "by_design": false,
484
488
  "score_impact": -15
489
+ },
490
+ {
491
+ "pattern_id": "INFO_LEAK_001",
492
+ "cwe_id": "CWE-200",
493
+ "severity": "medium",
494
+ "title": "Stack trace exposed in error response",
495
+ "description": "Unhandled errors in the /api/query endpoint return the full stack trace to the client at handler.js:87, potentially revealing internal file paths and dependency versions.",
496
+ "file": "src/handler.js",
497
+ "line": 87,
498
+ "content": "res.status(500).json({ error: err.stack })",
499
+ "remediation": "Return a generic error message to the client; log the full stack trace server-side only",
500
+ "confidence": "high",
501
+ "by_design": false,
502
+ "score_impact": -5
485
503
  }
486
504
  ]
487
505
  }
488
506
  ```
489
507
 
490
508
  ### Required Top-Level Fields
491
- `skill_slug`, `risk_score`, `result`, `findings_count`, `findings`. Do NOT nest `risk_score` or `result` inside a summary object.
509
+ `skill_slug`, `source_url`, `package_type`, `risk_score`, `max_severity`, `result`, `findings_count`, `findings`.
510
+ - `package_version`: Extract from `package.json` → `version`, `pyproject.toml` → `[project] version`, `setup.py` → `version=`. Use `"unknown"` only if no version file exists.
511
+ - `max_severity`: Highest severity across all findings. Use `"none"` if no findings.
512
+ - Do NOT nest `risk_score` or `result` inside a summary object.
513
+
514
+ ### Required Finding Fields (ALL mandatory)
515
+ Every finding MUST include ALL of these fields:
516
+ `pattern_id`, `cwe_id`, `severity`, `title`, `description`, `file`, `line`, `content`, `remediation`, `confidence`, `by_design`, `score_impact`
517
+
518
+ **A finding without `cwe_id` or `content` or `remediation` is INVALID. Do not emit incomplete findings.**
492
519
 
493
520
  ### Field Defaults
494
521
  - `by_design`: default `false` (set `true` only when all 4 criteria in §3.9 met)
@@ -504,21 +531,32 @@ To find source_url: check `package.json` → `repository.url`, `_meta.json` →
504
531
 
505
532
  **Only use:** `safe`, `caution`, or `unsafe`.
506
533
 
507
- ### Version Tracking (Optional — Backend Auto-Enrichment)
508
- Backend auto-extracts: `commit_sha`, `content_hash`, `package_version`. Per-finding `file_hash` (SHA-256) is recommended for staleness detection.
509
-
510
- ### CWE ID (Required)
511
- Every finding MUST include a `cwe_id` field with the most specific applicable CWE identifier.
512
- Common CWEs for MCP/package security:
513
- - `CWE-78` Command Injection, `CWE-79` XSS, `CWE-89` SQL Injection, `CWE-94` Code Injection
514
- - `CWE-22` Path Traversal, `CWE-918` SSRF, `CWE-502` Deserialization
515
- - `CWE-798` Hardcoded Credentials, `CWE-321` Hardcoded Crypto Key
516
- - `CWE-862` Missing Authorization (IDOR), `CWE-915` Mass Assignment
517
- - `CWE-200`/`CWE-209` Information Exposure, `CWE-532` Log Injection
518
- - `CWE-362` Race Condition, `CWE-601` Open Redirect, `CWE-434` Unrestricted Upload
519
- - `CWE-444` HTTP Smuggling, `CWE-1321` Prototype Pollution
520
- - `CWE-327` Weak Crypto, `CWE-338` Weak PRNG, `CWE-1333` ReDoS
521
- If unsure, use the closest parent CWE. Never omit this field.
534
+ ### Version & Provenance
535
+ - `package_version`: YOU must extract this from `package.json` → `version`, `pyproject.toml` → `[project] version`, `setup.py` → `version=`, or `Cargo.toml` → `version`. Use `"unknown"` only if no version file exists.
536
+ - `commit_sha`, `content_hash`: Auto-enriched by backend. Do not include unless available.
537
+ - Per-finding `file_hash` (SHA-256) is optional but recommended for staleness detection.
538
+
539
+ ### CWE ID (REQUIRED — findings without cwe_id are INVALID)
540
+ Every finding MUST include `cwe_id`. Use the most specific CWE. If unsure, use the closest parent.
541
+
542
+ **Pattern ID → CWE mapping (use as default, override if more specific CWE applies):**
543
+ | Pattern | Default CWE | Pattern | Default CWE |
544
+ |---------|------------|---------|------------|
545
+ | CMD_INJECT | CWE-78 | CRED_THEFT | CWE-522 |
546
+ | DATA_EXFIL | CWE-200 | DESTRUCT | CWE-912 |
547
+ | OBF | CWE-506 | SANDBOX_ESC | CWE-693 |
548
+ | SUPPLY_CHAIN | CWE-1357 | SOCIAL_ENG | CWE-451 |
549
+ | PRIV_ESC | CWE-269 | INFO_LEAK | CWE-200 |
550
+ | CRYPTO_WEAK | CWE-327 | DESER | CWE-502 |
551
+ | PATH_TRAV | CWE-22 | SEC_BYPASS | CWE-693 |
552
+ | PERSIST | CWE-912 | AI_PROMPT | CWE-1426 |
553
+ | MCP_POISON | CWE-1426 | MCP_INJECT | CWE-94 |
554
+ | MCP_TRAVERSAL | CWE-22 | MCP_SUPPLY | CWE-1357 |
555
+ | MCP_PERM | CWE-269 | WORM | CWE-912 |
556
+ | CICD | CWE-912 | CORR | CWE-829 |
557
+
558
+ **More specific CWEs (use when applicable):**
559
+ `CWE-79` XSS, `CWE-89` SQL Injection, `CWE-94` Code Injection, `CWE-918` SSRF, `CWE-798` Hardcoded Credentials, `CWE-321` Hardcoded Crypto Key, `CWE-862` Missing Authorization, `CWE-532` Log Injection, `CWE-362` Race Condition, `CWE-601` Open Redirect, `CWE-434` Unrestricted Upload, `CWE-1321` Prototype Pollution, `CWE-338` Weak PRNG, `CWE-1333` ReDoS
522
560
 
523
561
  ### Pattern ID Prefixes
524
562
  Use: `CMD_INJECT`, `CRED_THEFT`, `DATA_EXFIL`, `DESTRUCT`, `OBF`, `SANDBOX_ESC`, `SUPPLY_CHAIN`, `SOCIAL_ENG`, `PRIV_ESC`, `INFO_LEAK`, `CRYPTO_WEAK`, `DESER`, `PATH_TRAV`, `SEC_BYPASS`, `PERSIST`, `AI_PROMPT`, `CORR`, `MCP_POISON`, `MCP_INJECT`, `MCP_TRAVERSAL`, `MCP_SUPPLY`, `MCP_PERM`, `WORM`, `CICD`, `MANUAL`.
@@ -526,12 +564,12 @@ Use: `CMD_INJECT`, `CRED_THEFT`, `DATA_EXFIL`, `DESTRUCT`, `OBF`, `SANDBOX_ESC`,
526
564
  ---
527
565
 
528
566
  # ═══════════════════════════════════════════════
529
- # SAVE AND UPLOAD
567
+ # OUTPUT
530
568
  # ═══════════════════════════════════════════════
531
569
 
532
- Save JSON and upload: `bash scripts/upload.sh report.json`
570
+ Respond with ONLY the JSON report. No markdown fences, no explanation, no text before or after. The CLI handles upload automatically.
533
571
 
534
- If no findings: still submit with empty `findings` array and `result: "safe"` — clean scans are valuable too.
572
+ If no findings: still output the report with empty `findings` array, `result: "safe"`, `risk_score: 0`, `max_severity: "none"` — clean audits are valuable data.
535
573
 
536
574
  ---
537
575