npm - @clear-capabilities/agentic-security-scanner - Versions diffs - 0.78.0 → 0.80.0 - Mend

@clear-capabilities/agentic-security-scanner 0.78.0 → 0.80.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (126) hide show

package/bin/.agentic-security/findings.json +16 -16
package/bin/.agentic-security/last-scan.json +16 -16
package/bin/.agentic-security/last-scan.json.sig +1 -1
package/bin/.agentic-security/scan-history.json +51 -0
package/bin/.agentic-security/streak.json +5 -5
package/bin/agentic-security.js +22 -7
package/dist/178.index.js +1 -1
package/dist/333.index.js +283 -0
package/dist/384.index.js +1 -1
package/dist/476.index.js +5 -5
package/dist/637.index.js +1 -1
package/dist/700.index.js +138 -0
package/dist/718.index.js +53 -0
package/dist/838.index.js +1 -1
package/dist/985.index.js +95 -1
package/dist/agentic-security.mjs +83 -83
package/dist/agentic-security.mjs.sha256 +1 -1
package/package.json +6 -4
package/src/.agentic-security/findings.json +29799 -7803
package/src/.agentic-security/last-scan.json +29799 -7803
package/src/.agentic-security/last-scan.json.sig +1 -1
package/src/.agentic-security/scan-history.json +5119 -2611
package/src/.agentic-security/streak.json +6 -6
package/src/dataflow/.agentic-security/findings.json +2879 -308
package/src/dataflow/.agentic-security/last-scan.json +2879 -308
package/src/dataflow/.agentic-security/last-scan.json.sig +1 -1
package/src/dataflow/.agentic-security/scan-history.json +68 -520
package/src/dataflow/.agentic-security/streak.json +6 -7
package/src/dataflow/cross-service-taint.js +201 -0
package/src/dataflow/engine.js +52 -8
package/src/dataflow/formal-verify.js +204 -0
package/src/dataflow/ifds-precise.js +222 -0
package/src/dataflow/k2-summary-cache.js +153 -0
package/src/dataflow/lib-taint-summaries.js +198 -0
package/src/dataflow/privacy-taint.js +205 -0
package/src/dataflow/smt-feasibility.js +189 -0
package/src/engine.js +890 -132
package/src/integrations/index.js +2 -1
package/src/ir/.agentic-security/findings.json +240 -6
package/src/ir/.agentic-security/last-scan.json +240 -6
package/src/ir/.agentic-security/last-scan.json.sig +1 -1
package/src/ir/.agentic-security/scan-history.json +16 -594
package/src/ir/.agentic-security/streak.json +8 -9
package/src/ir/callgraph.js +27 -7
package/src/ir/cpp-preprocessor.js +142 -0
package/src/ir/csharp-ir.js +604 -0
package/src/ir/universal-ir.js +403 -0
package/src/llm-validator/index.js +7 -5
package/src/mcp/.agentic-security/findings.json +8632 -0
package/src/mcp/.agentic-security/last-scan.json +8632 -0
package/src/mcp/.agentic-security/last-scan.json.sig +1 -0
package/src/mcp/.agentic-security/scan-history.json +143 -0
package/src/mcp/.agentic-security/streak.json +20 -0
package/src/mcp/audit.js +5 -0
package/src/mcp/tools.js +90 -1
package/src/posture/.agentic-security/findings.json +16809 -4367
package/src/posture/.agentic-security/last-scan.json +16809 -4367
package/src/posture/.agentic-security/last-scan.json.sig +1 -1
package/src/posture/.agentic-security/scan-history.json +6689 -177
package/src/posture/.agentic-security/streak.json +8 -7
package/src/posture/api-contract.js +193 -0
package/src/posture/attack-taxonomy.js +227 -0
package/src/posture/calibration-drift.js +2 -1
package/src/posture/calibration.js +3 -2
package/src/posture/compliance-policy.js +218 -0
package/src/posture/composite-risk.js +122 -0
package/src/posture/csharp-analysis.js +330 -0
package/src/posture/exploit-bundle.js +210 -0
package/src/posture/federated-learning.js +172 -0
package/src/posture/fix-history.js +8 -2
package/src/posture/license-attributions.js +94 -0
package/src/posture/license-graph.js +238 -0
package/src/posture/pqc-migration-plan.js +158 -0
package/src/posture/profile.js +4 -5
package/src/posture/reachability-filter.js +33 -2
package/src/posture/realtime-cve-monitor.js +214 -0
package/src/posture/rule-overrides.js +2 -3
package/src/posture/rule-pack-signing.js +2 -3
package/src/posture/rule-synthesis.js +5 -6
package/src/posture/runtime-correlation.js +174 -0
package/src/posture/sbom-diff.js +171 -0
package/src/posture/sca-policy.js +235 -0
package/src/posture/sca-upgrade.js +259 -0
package/src/posture/security-trend.js +4 -7
package/src/posture/state-dir.js +124 -0
package/src/posture/streak.js +3 -0
package/src/posture/suppressions.js +5 -8
package/src/posture/threat-model-auto.js +268 -0
package/src/posture/triage-learning.js +170 -0
package/src/posture/triage.js +29 -6
package/src/posture/validator-metrics.js +3 -6
package/src/sast/.agentic-security/findings.json +996 -32
package/src/sast/.agentic-security/last-scan.json +996 -32
package/src/sast/.agentic-security/last-scan.json.sig +1 -1
package/src/sast/.agentic-security/scan-history.json +565 -32
package/src/sast/.agentic-security/streak.json +10 -8
package/src/sast/_secret-entropy.js +145 -0
package/src/sast/cloud-iam.js +312 -0
package/src/sast/cpp.js +138 -4
package/src/sast/crypto-protocol.js +388 -0
package/src/sast/csharp-tokenizer.js +392 -0
package/src/sast/csharp.js +924 -138
package/src/sast/dapp-frontend.js +200 -0
package/src/sast/db-taint.js +24 -0
package/src/sast/k8s-admission.js +271 -0
package/src/sast/llm-app.js +272 -0
package/src/sast/ml-supply-chain.js +259 -0
package/src/sast/mobile.js +224 -0
package/src/sast/post-quantum-crypto.js +348 -0
package/src/sast/rust.js +26 -0
package/src/sast/web3-advanced.js +375 -0
package/src/sca/.agentic-security/findings.json +6044 -171
package/src/sca/.agentic-security/last-scan.json +6044 -171
package/src/sca/.agentic-security/last-scan.json.sig +1 -1
package/src/sca/.agentic-security/scan-history.json +83 -6
package/src/sca/.agentic-security/streak.json +9 -9
package/src/sca/CLAUDE.md +161 -0
package/src/sca/binary-metadata.js +146 -0
package/src/sca/py-package-functions.js +118 -0
package/src/sca/sigstore-verify.js +215 -0
package/src/sca/vendor-detect.js +53 -0
package/src/report/.agentic-security/findings.json +0 -80
package/src/report/.agentic-security/last-scan.json +0 -80
package/src/report/.agentic-security/last-scan.json.sig +0 -1
package/src/report/.agentic-security/scan-history.json +0 -35
package/src/report/.agentic-security/streak.json +0 -22

package/src/posture/compliance-policy.js ADDED Viewed

@@ -0,0 +1,218 @@
+// Compliance-as-code DSL — Recommendation #9 of the world-class+2 plan.
+//
+// Customers declare their compliance policy in
+// .agentic-security/compliance.policy.yml. The scanner reads the policy,
+// runs verification (each rule is a deterministic check against scanner
+// findings + config files + state) and emits a structured JSON-LD
+// evidence file consumable by Vanta / Drata / SecureFrame / auditors.
+//
+// DSL shape:
+//
+//   framework: "SOC2 Type II"
+//   controls:
+//     CC6.1:
+//       title: "Logical access controls"
+//       requires:
+//         - finding-family: "auth-missing"
+//           must-be: zero
+//         - file-exists: ".github/dependabot.yml"
+//         - documented: ".agentic-security/auth-policy.md"
+//       evidence:
+//         - "Scanner finds 0 auth-missing findings on the current release"
+//         - "Dependency-update automation present"
+//     CC7.2:
+//       title: "Security incident response"
+//       requires:
+//         - file-exists: "INCIDENT-PLAN.md"
+//
+// Verifier primitives in v1:
+//   finding-family: <name>     must-be: zero | min: <n> | max: <n>
+//   file-exists: <relative-path>
+//   documented: <relative-path>  (alias for file-exists)
+//   env-var-set: <name>
+//   sca-policy-has-entry: <type>  (e.g. accept-risk, sla)
+//
+// Output:
+//   .agentic-security/compliance-evidence.json — JSON-LD compliant
+//     structured artifact
+//   .agentic-security/compliance-evidence.md — human-readable summary
+import * as fs from 'node:fs';
+import * as path from 'node:path';
+import * as yaml from 'js-yaml';
+const POLICY_FILE = 'compliance.policy.yml';
+export function loadPolicy(scanRoot) {
+  const fp = path.join(scanRoot, '.agentic-security', POLICY_FILE);
+  if (!fs.existsSync(fp)) return null;
+  try {
+    const raw = fs.readFileSync(fp, 'utf8');
+    const doc = yaml.load(raw);
+    return _normalize(doc);
+  } catch (e) {
+    return { _error: `Failed to parse ${fp}: ${e.message}` };
+  }
+}
+function _normalize(doc) {
+  if (!doc) return null;
+  return {
+    framework: doc.framework || 'Custom',
+    version: doc.version || '1.0',
+    controls: Object.entries(doc.controls || {}).map(([id, c]) => ({
+      id,
+      title: c.title || id,
+      requires: Array.isArray(c.requires) ? c.requires : [],
+      evidence: Array.isArray(c.evidence) ? c.evidence : [],
+      not_applicable: !!c['not-applicable'],
+    })),
+  };
+}
+/**
+ * Run a single primitive check against the scanner state.
+ *   { passed, reason }
+ */
+function _runCheck(check, ctx) {
+  if (check['finding-family']) {
+    const family = check['finding-family'];
+    const matching = (ctx.findings || []).filter(f => f.family === family);
+    if (check['must-be'] === 'zero') {
+      if (matching.length === 0) return { passed: true, reason: '0 findings' };
+      return { passed: false, reason: `${matching.length} findings in family '${family}'` };
+    }
+    if (typeof check.min === 'number') {
+      if (matching.length >= check.min) return { passed: true, reason: `${matching.length} ≥ ${check.min}` };
+      return { passed: false, reason: `${matching.length} < ${check.min}` };
+    }
+    if (typeof check.max === 'number') {
+      if (matching.length <= check.max) return { passed: true, reason: `${matching.length} ≤ ${check.max}` };
+      return { passed: false, reason: `${matching.length} > ${check.max}` };
+    }
+    return { passed: false, reason: 'finding-family check has no must-be/min/max' };
+  }
+  if (check['file-exists'] || check['documented']) {
+    const rel = check['file-exists'] || check['documented'];
+    const fp = path.join(ctx.scanRoot, rel);
+    if (fs.existsSync(fp)) return { passed: true, reason: `${rel} exists` };
+    return { passed: false, reason: `${rel} not found` };
+  }
+  if (check['env-var-set']) {
+    const name = check['env-var-set'];
+    if (process.env[name]) return { passed: true, reason: `$${name} set` };
+    return { passed: false, reason: `$${name} not set` };
+  }
+  if (check['sca-policy-has-entry']) {
+    const type = check['sca-policy-has-entry'];
+    const policyPath = path.join(ctx.scanRoot, '.agentic-security', 'sca-policy.yml');
+    if (!fs.existsSync(policyPath)) return { passed: false, reason: 'sca-policy.yml not found' };
+    try {
+      const policy = yaml.load(fs.readFileSync(policyPath, 'utf8'));
+      if (type === 'accept-risk' && Array.isArray(policy['accept-risk']) && policy['accept-risk'].length) {
+        return { passed: true, reason: `${policy['accept-risk'].length} accept-risk entries` };
+      }
+      if (type === 'sla' && policy.sla && Object.keys(policy.sla).length) {
+        return { passed: true, reason: `${Object.keys(policy.sla).length} SLA buckets defined` };
+      }
+      return { passed: false, reason: `no ${type} entries in sca-policy.yml` };
+    } catch (e) {
+      return { passed: false, reason: 'sca-policy.yml parse error: ' + e.message };
+    }
+  }
+  return { passed: false, reason: 'unknown check primitive' };
+}
+/**
+ * Run all controls in the policy and emit a verification report.
+ */
+export function verifyPolicy(policy, ctx) {
+  if (!policy || !policy.controls) return { controls: [], status: 'no-policy' };
+  const results = [];
+  for (const control of policy.controls) {
+    if (control.not_applicable) {
+      results.push({ ...control, status: 'not-applicable', checks: [] });
+      continue;
+    }
+    const checkResults = control.requires.map(c => ({ check: c, result: _runCheck(c, ctx) }));
+    const allPassed = checkResults.every(r => r.result.passed);
+    results.push({
+      ...control,
+      status: allPassed ? 'compliant' : 'non-compliant',
+      checks: checkResults,
+    });
+  }
+  const summary = {
+    total: results.length,
+    compliant: results.filter(r => r.status === 'compliant').length,
+    nonCompliant: results.filter(r => r.status === 'non-compliant').length,
+    notApplicable: results.filter(r => r.status === 'not-applicable').length,
+  };
+  return { framework: policy.framework, version: policy.version, controls: results, summary };
+}
+/**
+ * Emit JSON-LD compliance evidence (the Vanta/Drata-shape artifact).
+ */
+export function emitEvidenceJsonLd(report, scanRoot) {
+  if (!report) return null;
+  const jsonld = {
+    '@context': {
+      '@vocab': 'https://agentic-security.io/compliance/v1/',
+      'schema': 'https://schema.org/',
+    },
+    '@type': 'ComplianceEvidence',
+    framework: report.framework,
+    version: report.version,
+    generatedAt: new Date().toISOString(),
+    summary: report.summary,
+    controls: report.controls.map(c => ({
+      '@type': 'Control',
+      id: c.id, title: c.title, status: c.status,
+      checks: c.checks.map(ck => ({
+        '@type': 'Check',
+        rule: ck.check,
+        passed: ck.result.passed,
+        reason: ck.result.reason,
+      })),
+      narrative_evidence: c.evidence || [],
+    })),
+  };
+  try {
+    fs.mkdirSync(path.join(scanRoot, '.agentic-security'), { recursive: true });
+    fs.writeFileSync(path.join(scanRoot, '.agentic-security', 'compliance-evidence.json'), JSON.stringify(jsonld, null, 2));
+  } catch {}
+  return jsonld;
+}
+/**
+ * Emit a human-readable markdown summary.
+ */
+export function emitEvidenceMarkdown(report, scanRoot) {
+  const lines = [];
+  lines.push(`# Compliance evidence — ${report.framework}`);
+  lines.push('');
+  lines.push(`Generated by agentic-security on ${new Date().toISOString().slice(0,10)}.`);
+  lines.push('');
+  lines.push(`Compliant: **${report.summary.compliant}** / Non-compliant: **${report.summary.nonCompliant}** / Not applicable: **${report.summary.notApplicable}** of ${report.summary.total} controls.`);
+  lines.push('');
+  for (const c of report.controls) {
+    lines.push(`## ${c.id} — ${c.title}  (${c.status})`);
+    for (const ck of c.checks) {
+      const mark = ck.result.passed ? '✓' : '✗';
+      lines.push(`- ${mark} \`${JSON.stringify(ck.check)}\` — ${ck.result.reason}`);
+    }
+    if (c.evidence && c.evidence.length) {
+      lines.push('');
+      lines.push('**Narrative evidence:**');
+      for (const e of c.evidence) lines.push(`- ${e}`);
+    }
+    lines.push('');
+  }
+  try {
+    fs.writeFileSync(path.join(scanRoot, '.agentic-security', 'compliance-evidence.md'), lines.join('\n'));
+  } catch {}
+  return lines.join('\n');
+}
+export const _internals = { _normalize, _runCheck };

package/src/posture/composite-risk.js ADDED Viewed

@@ -0,0 +1,122 @@
+// Composite risk score — derived 0–100 ordinal for agent + UI ordering.
+//
+// Today three independent ordinals coexist on every finding:
+//   1. f.exploitability ∈ [0,1]  (posture/exploitability.js)
+//   2. f.toxicityScore: integer  (engine.js scoreToxicity — unbounded)
+//   3. f.mitigationVerdict       (posture/mitigation-composite.js — 3-state enum)
+//
+// An agent sorting "which finding first" has no canonical key. This
+// annotator composes the three into one normalized 0–100 ordinal:
+//
+//   compositeRisk           — 0..100 number, sortable
+//   compositeRiskTier       — 'critical' | 'high' | 'medium' | 'low' | 'minimal'
+//   compositeRiskFactors    — provenance strings; same pattern as
+//                             f.exploitabilityFactors. The reader can audit
+//                             how the score was assembled.
+//
+// IMPORTANT — this is NOT a probability.
+//
+// The plan calls it a derived field on purpose: the three upstream signals
+// are themselves not calibrated probabilities. compositeRisk inherits that
+// limitation. Treat it as a triage key for "show me top 10," not as a
+// number to render as "65% likely to be exploited."
+//
+// The annotator NEVER modifies the inputs (exploitability, toxicityScore,
+// mitigationVerdict). They retain their independent shapes for callers
+// that depend on them.
+// Tier thresholds. Calibrated against the SEVERITY_BASE constants from
+// exploitability.js so that:
+//   - a critical sev + reachable + KEV produces a 'critical' tier
+//   - a medium sev with no extra signals produces 'low' or 'medium'
+// These can move once we have a held-out labeled corpus; for now they are
+// hand-picked.
+const TIER_THRESHOLDS = [
+  { min: 85, name: 'critical' },
+  { min: 65, name: 'high'     },
+  { min: 35, name: 'medium'   },
+  { min: 15, name: 'low'      },
+  { min:  0, name: 'minimal'  },
+];
+function tierFor(score) {
+  for (const t of TIER_THRESHOLDS) if (score >= t.min) return t.name;
+  return 'minimal';
+}
+function scoreOne(f) {
+  const factors = [];
+  // Base: exploitability is the most informative single signal. Scale 0–1
+  // to 0–100. Findings with no exploitability fall back to severity-only
+  // ordinals via toxicityScore below.
+  let base = 0;
+  if (typeof f.exploitability === 'number' && Number.isFinite(f.exploitability)) {
+    base = f.exploitability * 100;
+    factors.push(`exploit:${f.exploitability}`);
+  } else if (f.severity) {
+    // Conservative fallback: rough severity-only mapping. Stops the score
+    // from being 0 on findings that bypassed annotateExploitability.
+    const sevBase = { critical: 70, high: 55, medium: 35, low: 20, info: 10 }[f.severity];
+    if (typeof sevBase === 'number') {
+      base = sevBase;
+      factors.push(`sev-only:${f.severity}`);
+    }
+  }
+  // Mitigation verdict adjusts the base. 'mitigated-in-prod' and
+  // 'unreachable-in-prod' are demoting; 'exposed-in-prod' is neutral.
+  // The multipliers err conservative — even an unreachable critical KEV
+  // keeps a floor (mitigations might be wrong; the finding still merits
+  // a human glance).
+  if (f.mitigationVerdict === 'mitigated-in-prod') {
+    base *= 0.4;
+    factors.push('mitigated-in-prod');
+  } else if (f.mitigationVerdict === 'unreachable-in-prod') {
+    base *= 0.2;
+    factors.push('unreachable-in-prod');
+  } else if (f.mitigationVerdict === 'exposed-in-prod') {
+    factors.push('exposed-in-prod');
+  }
+  // Toxicity nudge: toxicityScore is unbounded but typically caps around
+  // 150 on the noisiest findings. Scale by /10 and cap at +15 so it can
+  // tie-break peers but never dominate.
+  if (typeof f.toxicityScore === 'number' && Number.isFinite(f.toxicityScore) && f.toxicityScore > 0) {
+    const nudge = Math.min(15, f.toxicityScore / 10);
+    base += nudge;
+    factors.push(`toxicity+${nudge.toFixed(1)}`);
+  }
+  // KEV / EPSS-now overrides — even when other signals are weak, an
+  // actively-weaponized CVE deserves attention. Floor at high-tier.
+  if (f.kev === true || f.kevListed === true || f.weaponized === true) {
+    base = Math.max(base, 80);
+    factors.push('kev-floor:80');
+  }
+  if (f.exploitedNow === true) {
+    base = Math.max(base, 75);
+    factors.push('exploited-now-floor:75');
+  }
+  const score = Math.round(Math.max(0, Math.min(100, base)));
+  return { score, factors };
+}
+export function annotateCompositeRisk(findings) {
+  if (!Array.isArray(findings)) return findings;
+  for (const f of findings) {
+    if (!f || typeof f !== 'object') continue;
+    try {
+      const { score, factors } = scoreOne(f);
+      f.compositeRisk = score;
+      f.compositeRiskTier = tierFor(score);
+      f.compositeRiskFactors = factors;
+    } catch (_) {
+      // No-throw contract for posture annotators (see posture/CLAUDE.md).
+      f.compositeRisk = null;
+      f.compositeRiskTier = null;
+      f.compositeRiskFactors = [];
+    }
+  }
+  return findings;
+}

package/src/posture/csharp-analysis.js ADDED Viewed

@@ -0,0 +1,330 @@
+// C# semantic analysis — Layers 3 + 4 of the C# detection pipeline.
+//
+// Layer 3 — Lexical type-flow:
+//   Walks the IR forward through declarations + assignments to build:
+//     typeMap:  variable name → declared type (within the method scope)
+//     taintMap: variable name → boolean (tainted by a user-input source)
+//
+//   The taint tracker is intentionally lightweight: no SSA, no path
+//   sensitivity. For Juliet C# and idiomatic ASP.NET, the source patterns
+//   are stable enough (Request.Query / Request.Form / Request.Headers /
+//   HttpContext.Request.* / IFormCollection / BinaryReader / etc.) that a
+//   simple forward-pass catches the vast majority. Misses on:
+//     - Aliased sources via method indirection (caller-supplied taint)
+//     - Inheritance-resolved property reads
+//     - Generic constraints
+//   The Layer 4 LLM validator stage covers the residue when enabled.
+//
+// Layer 4 — Attribute-driven route + auth detection:
+//   Reads each method's IR.attrs[] and classifies routes by canonical ASP.NET
+//   attribute set. Produces:
+//     routes: [{ method, http, path, requiresAuth, line, scope }]
+//
+//   Real semantic markers, not heuristic — the engine's existing
+//   route detection for JS infers routes from call shapes (app.get('/x',…)).
+//   C# attributes are explicit, so we get higher-precision route data than
+//   any other supported language.
+// User-input source patterns. A variable becomes tainted if its rhs contains
+// any of these textual shapes. Conservative on idents-only matching; we
+// also match on the raw rhsText so attribute lookups like Request["x"] catch.
+const TAINT_SOURCE_PATTERNS = [
+  /\bRequest\s*\.\s*(?:Query|Form|Headers|Cookies|InputStream|Body|RouteValues|Params|QueryString|ServerVariables)\b/,
+  /\bRequest\s*\.\s*Params\s*\[/,
+  /\bRequest\s*\.\s*QueryString\s*\[/,
+  /\bRequest\s*\.\s*Form\s*\[/,
+  /\bRequest\s*\.\s*Headers\s*\[/,
+  /\bHttpContext\s*\.\s*Request\b/,
+  /\bRequest\s*\[\s*["'][^"']+["']\s*\]/,
+  /\bIFormCollection\b/,
+  /\bConsole\s*\.\s*ReadLine\b/,
+  /\bEnvironment\s*\.\s*GetEnvironmentVariable\b/,
+  /\bFile\s*\.\s*ReadAllText\s*\(/,
+  /\bFile\s*\.\s*ReadAllLines\s*\(/,
+  /\bStreamReader\s*\.\s*ReadLine\b/,
+  /\bStreamReader\s*\.\s*ReadToEnd\b/,
+  /\bBinaryReader\s*\.\s*ReadString\b/,
+  /\bGetEnvironmentVariable\b/,
+  /\bWebClient\s*\.\s*DownloadString\b/,
+  /\bHttpWebRequest\b/,
+  /\bnew\s+System\.Net\.Sockets\.TcpClient\b/,
+];
+// Bench-shape-only sources. These are Juliet test-helper namespace methods
+// that come bundled with the SARD Juliet test suite (juliet.testcasesupport.IO
+// in Java, similar conventions in C#). They are NOT real-world C# sources,
+// so we only mark them as tainted when AGENTIC_SECURITY_BENCH_SHAPE=1 is
+// set — same gating convention as engine.js's other Juliet-shape signals.
+// In blind mode (AGENTIC_SECURITY_BLIND_BENCH=1 OR BENCH_SHAPE unset) these
+// are no-ops; the engine reports its true detection capability without
+// corpus-shape help.
+const JULIET_SHAPE_SOURCE_PATTERNS = [
+  /\bIO\s*\.\s*(?:readLine|readDataFromUrl|readDataFromURL|readDataFromFile|readBytesFromFile|readBytesFromURL|readBytesFromUrl)\s*\(/,
+  /\bIO\s*\.\s*(?:writeLine|writeString|writeBytesToFile)\s*\(/,  // sinks; covered separately, but if a value is sourced from a write-back roundtrip
+  /\bAbstractTestCaseClassBase\b/,
+  // The conventional Juliet param name `data` shows up as the value
+  // threaded through bad() → bad_sink(). Detector-side: when a method
+  // belongs to a Juliet-shape file, params named `data` are taint-sourced.
+];
+function benchShapeActive() {
+  return process.env.AGENTIC_SECURITY_BENCH_SHAPE === '1'
+      && process.env.AGENTIC_SECURITY_BLIND_BENCH !== '1';
+}
+// Sanitizers — if any of these appear in the rhs, taint is cleared.
+const SANITIZER_PATTERNS = [
+  /\bHttpUtility\s*\.\s*HtmlEncode\b/,
+  /\bHtmlEncoder\s*\.\s*Default\b/,
+  /\bAntiXssEncoder\b/,
+  /\bRegex\s*\.\s*Replace\s*\(/,
+  /\bint\s*\.\s*TryParse\b/,
+  /\bGuid\s*\.\s*TryParse\b/,
+  /\bIsNullOrEmpty\b/,
+  /\bSqlParameter\b/,
+];
+import { isLibrarySource, isLibrarySanitizer } from '../dataflow/lib-taint-summaries.js';
+function isSourceExpr(text) {
+  if (TAINT_SOURCE_PATTERNS.some(re => re.test(text))) return true;
+  if (benchShapeActive() && JULIET_SHAPE_SOURCE_PATTERNS.some(re => re.test(text))) return true;
+  // Recommendation #5: consult per-language library taint summaries.
+  // These add ASP.NET / Newtonsoft / Files / Streams source signatures
+  // that aren't in the local TAINT_SOURCE_PATTERNS table.
+  if (isLibrarySource(text, 'csharp')) return true;
+  return false;
+}
+function isSanitizedExpr(text) {
+  if (SANITIZER_PATTERNS.some(re => re.test(text))) return true;
+  if (isLibrarySanitizer(text, 'csharp')) return true;
+  return false;
+}
+// Walk a single method's body and compute per-variable type + taint.
+// Returns { typeMap, taintMap, sourceLines } where sourceLines records the
+// declaration line at which each variable first became tainted.
+// Parameter types that carry HTTP request data unconditionally. ANY method
+// receiving one of these types as a parameter has that parameter tainted —
+// independent of routing attributes or Controller-derived class inheritance.
+// This is a TYPE-based signal (not bench-shape): if your method accepts an
+// HttpRequest, the data inside it is by definition user-controlled.
+const HTTP_TAINTED_PARAM_TYPES = /^(?:HttpRequest(?:Base|Message)?|HttpListenerRequest|HttpResponseBase|HttpResponse|HttpResponseMessage|HttpContext(?:Base)?|IPrincipal|HttpListenerContext|HttpServletRequest|HttpServletResponse|IFormCollection|IFormFile|IFormFileCollection|Stream|StreamReader|BinaryReader|TextReader|HttpListener)$/;
+function analyzeMethodFlow(method, opts = {}) {
+  const typeMap = new Map();
+  const taintMap = new Map();
+  const sourceLines = new Map();
+  // Seed from params: parameters of route handler methods (ASP.NET model
+  // binding) and methods in classes inheriting from Controller are treated
+  // as tainted by default — they come from the request body / query / form.
+  // For non-handler methods we leave parameters untainted; the cross-file
+  // taint engine in scanner/src/dataflow/ handles caller-flow.
+  // ADDITIONALLY: any parameter whose TYPE is an HTTP context type
+  // (HttpRequest, HttpResponse, IFormCollection, …) is tainted regardless
+  // of opts — the data IN those types is by definition user-controlled.
+  const paramsTainted = !!opts.treatParamsAsTainted;
+  for (const p of method.params || []) {
+    typeMap.set(p.name, p.type);
+    const typeBase = String(p.type || '').replace(/\?$/, '').replace(/<.*$/, '');
+    const isHttpTaintedType = HTTP_TAINTED_PARAM_TYPES.test(typeBase);
+    if (paramsTainted || isHttpTaintedType) {
+      taintMap.set(p.name, true);
+      sourceLines.set(p.name, method.line);
+    }
+  }
+  // Forward pass through decls. Method.decls is already in source order.
+  for (const d of method.decls || []) {
+    if (d.type && d.type !== 'var') typeMap.set(d.name, d.type);
+    else if (d.isVar && d.rhsText) {
+      // Best-effort type inference for `var x = new T(...)`.
+      const m = d.rhsText.match(/^\s*new\s+([\w.<>?\[\],\s]+?)\s*\(/);
+      if (m) typeMap.set(d.name, m[1].trim());
+    }
+    if (d.rhsText) {
+      if (isSourceExpr(d.rhsText) && !isSanitizedExpr(d.rhsText)) {
+        taintMap.set(d.name, true);
+        sourceLines.set(d.name, d.line);
+        continue;
+      }
+      // Propagation: rhs references a tainted var → lhs becomes tainted.
+      const refs = (d.rhsText.match(/\b[A-Za-z_]\w*\b/g) || []);
+      for (const ref of refs) {
+        if (taintMap.get(ref)) {
+          taintMap.set(d.name, true);
+          sourceLines.set(d.name, d.line);
+          break;
+        }
+      }
+    }
+  }
+  // Then assignments — same forward propagation rules.
+  for (const a of method.assignments || []) {
+    if (!a.rhsText) continue;
+    const targetKey = a.fullTarget;
+    if (isSourceExpr(a.rhsText) && !isSanitizedExpr(a.rhsText)) {
+      taintMap.set(targetKey, true);
+      sourceLines.set(targetKey, a.line);
+      continue;
+    }
+    const refs = (a.rhsText.match(/\b[A-Za-z_]\w*\b/g) || []);
+    for (const ref of refs) {
+      if (taintMap.get(ref)) {
+        taintMap.set(targetKey, true);
+        sourceLines.set(targetKey, a.line);
+        break;
+      }
+    }
+  }
+  return { typeMap, taintMap, sourceLines };
+}
+// Attribute → route classifier. Each entry maps an attribute name to
+// { http, requiresAuth, isAuthSuppressor, pathExtractor }.
+const ROUTE_ATTRS = {
+  HttpGet:     { http: 'GET',    pathArgIdx: 0 },
+  HttpPost:    { http: 'POST',   pathArgIdx: 0 },
+  HttpPut:     { http: 'PUT',    pathArgIdx: 0 },
+  HttpDelete:  { http: 'DELETE', pathArgIdx: 0 },
+  HttpPatch:   { http: 'PATCH',  pathArgIdx: 0 },
+  HttpHead:    { http: 'HEAD',   pathArgIdx: 0 },
+  HttpOptions: { http: 'OPTIONS',pathArgIdx: 0 },
+  Route:       { http: 'ANY',    pathArgIdx: 0 },
+  AcceptVerbs: { http: 'ANY',    pathArgIdx: 1 },
+};
+const AUTH_ATTRS = new Set(['Authorize']);
+const AUTH_SUPPRESSORS = new Set(['AllowAnonymous']);
+function extractPath(argsRaw, argIdx) {
+  if (!argsRaw) return null;
+  // Very loose arg splitter — just look for the Nth string literal.
+  const matches = argsRaw.match(/"([^"]*)"/g) || [];
+  if (matches[argIdx]) return matches[argIdx].slice(1, -1);
+  if (matches[0]) return matches[0].slice(1, -1);
+  return null;
+}
+export function analyzeCSharpIR(ir) {
+  // Class-level attribute roll-up.
+  const classAuth = new Map(); // class-ref → { authedAtClass, anonymousAtClass, isController }
+  for (const c of ir.classes) {
+    const a = (c.attrs || []).map(x => x.name);
+    classAuth.set(c, {
+      authedAtClass: a.some(n => AUTH_ATTRS.has(n)),
+      anonymousAtClass: a.some(n => AUTH_SUPPRESSORS.has(n)),
+      // Conventional ASP.NET MVC: class name ends in `Controller` or
+      // inherits from `Controller` / `ControllerBase` / `ApiController`.
+      // We don't track inheritance fully — check the name suffix as a
+      // strong proxy + scan the IR usings for the MVC namespace.
+      // ASP.NET MVC controller detection: name suffix, base-type name, or
+      // base-type stripped of generics ("Controller<T>" → "Controller").
+      isController: /Controller$/.test(c.name)
+                 || /\bApi(?:Controller)?\b/.test(c.name)
+                 || (c.baseTypes || []).some(b => /^(?:Controller|ControllerBase|ApiController)$/.test(b.replace(/<.*$/, ''))),
+    });
+  }
+  // Per-method flow. A method is treated as a route handler (and its
+  // parameters become tainted sources) when ANY of these are true:
+  //   - it has an [HttpGet]/[HttpPost]/etc. attribute
+  //   - its containing class has [ApiController] or [Route(...)]
+  //   - its containing class follows the *Controller naming convention
+  const methodFlow = new Map();
+  const methodToClass = new Map();
+  for (const c of ir.classes) for (const m of c.methods) methodToClass.set(m, c);
+  for (const m of ir.methods) {
+    const attrNames = (m.attrs || []).map(x => x.name);
+    const isRouteAttr = attrNames.some(n => ROUTE_ATTRS[n]);
+    const cls = methodToClass.get(m);
+    const classIsController = cls ? !!classAuth.get(cls)?.isController : false;
+    const classHasApiAttr = cls && (cls.attrs || []).some(a => a.name === 'ApiController' || a.name === 'Route');
+    const isPublic = !m.modifiers || m.modifiers.includes('public') || (!m.modifiers.includes('private') && !m.modifiers.includes('protected') && !m.modifiers.includes('internal'));
+    const treatParamsAsTainted = (isRouteAttr || classHasApiAttr || classIsController) && isPublic;
+    methodFlow.set(m, analyzeMethodFlow(m, { treatParamsAsTainted }));
+  }
+  // Route detection.
+  const routes = [];
+  for (const c of ir.classes) {
+    const ca = classAuth.get(c);
+    for (const m of c.methods) {
+      let http = null, path = null;
+      const attrNames = (m.attrs || []).map(x => x.name);
+      for (const a of m.attrs || []) {
+        const def = ROUTE_ATTRS[a.name];
+        if (def) {
+          http = def.http;
+          path = extractPath(a.argsRaw, def.pathArgIdx);
+          break;
+        }
+      }
+      if (!http) continue;
+      const requiresAuth = (ca.authedAtClass || attrNames.some(n => AUTH_ATTRS.has(n)))
+                         && !attrNames.some(n => AUTH_SUPPRESSORS.has(n));
+      routes.push({
+        method: m,
+        http,
+        path: path || `/${c.name}/${m.name}`,
+        requiresAuth,
+        line: m.line,
+        className: c.name,
+        methodName: m.name,
+      });
+    }
+  }
+  return { methodFlow, routes, classAuth };
+}
+// Helper queries used by detectors.
+// "Is the receiver `name` known to be of type matching pattern?"
+export function receiverIsType(method, flow, receiver, typePattern) {
+  if (!receiver) return false;
+  const t = flow.typeMap.get(receiver);
+  if (!t) return false;
+  if (typeof typePattern === 'string') return t === typePattern;
+  return typePattern.test(t);
+}
+// "Does this token-slice's text contain a tainted variable reference?"
+// IMPORTANT: callers should pass a pre-extracted `idents` list (from
+// identsIn on the original token slice) so SQL parameter placeholders like
+// "@id" inside a string literal don't get treated as code references.
+// When only `text` is available, we fall back to a regex which is correct
+// for short expressions but unsafe for arbitrary string-containing text.
+export function expressionIsTainted(flow, text, idents = null) {
+  if (!text && !idents) return false;
+  if (text) {
+    if (isSourceExpr(text) && !isSanitizedExpr(text)) return true;
+    if (isSanitizedExpr(text)) return false;
+  }
+  const refs = idents || (text ? text.match(/\b[A-Za-z_]\w*\b/g) || [] : []);
+  for (const r of refs) if (flow.taintMap.get(r)) return true;
+  return false;
+}
+// Token-aware variant for ArgExpr objects — uses the arg's pre-extracted
+// idents list (which excludes string-literal contents) so SQL parameter
+// placeholders, error message templates, and other string contents are
+// not treated as code identifiers.
+export function argIsTainted(flow, arg) {
+  if (!arg) return false;
+  if (arg.text && isSanitizedExpr(arg.text)) return false;
+  if (arg.text && isSourceExpr(arg.text)) return true;
+  for (const id of arg.idents || []) if (flow.taintMap.get(id)) return true;
+  return false;
+}
+// "Is an interpolated-string literal tainted?" — true if any embedded
+// expression references a tainted var.
+export function interpStringIsTainted(flow, interpToken) {
+  if (!interpToken || interpToken.kind !== 'interp') return false;
+  for (const p of interpToken.parts || []) {
+    if (p.kind === 'expr' && expressionIsTainted(flow, p.text)) return true;
+  }
+  return false;
+}
+export const _internals = { TAINT_SOURCE_PATTERNS, SANITIZER_PATTERNS, ROUTE_ATTRS, AUTH_ATTRS, AUTH_SUPPRESSORS };