npm - @clear-capabilities/agentic-security-scanner - Versions diffs - 0.74.0 - Mend

@clear-capabilities/agentic-security-scanner 0.74.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (331) hide show

package/CHANGELOG.md +1580 -0
package/bin/.agentic-security/findings.json +1577 -0
package/bin/.agentic-security/last-scan.json +1577 -0
package/bin/.agentic-security/last-scan.json.sig +1 -0
package/bin/.agentic-security/scan-history.json +465 -0
package/bin/.agentic-security/streak.json +25 -0
package/bin/agentic-security-audit.js +198 -0
package/bin/agentic-security-consistency.js +80 -0
package/bin/agentic-security-diff.js +136 -0
package/bin/agentic-security-lsp.js +12 -0
package/bin/agentic-security-mcp.js +40 -0
package/bin/agentic-security-rule.js +153 -0
package/bin/agentic-security.js +1683 -0
package/dist/117.index.js +207 -0
package/dist/178.index.js +250 -0
package/dist/218.index.js +793 -0
package/dist/227.index.js +192 -0
package/dist/301.index.js +167 -0
package/dist/384.index.js +18 -0
package/dist/476.index.js +126 -0
package/dist/513.index.js +373 -0
package/dist/520.index.js +13 -0
package/dist/601.index.js +1038 -0
package/dist/634.index.js +1892 -0
package/dist/637.index.js +216 -0
package/dist/660.index.js +131 -0
package/dist/675.index.js +451 -0
package/dist/826.index.js +188 -0
package/dist/830.index.js +133 -0
package/dist/agentic-security.mjs +272 -0
package/dist/agentic-security.mjs.sha256 +1 -0
package/dist/calibration-seed.json +27 -0
package/package.json +77 -0
package/src/.agentic-security/findings.json +80844 -0
package/src/.agentic-security/last-scan.json +80844 -0
package/src/.agentic-security/last-scan.json.sig +1 -0
package/src/.agentic-security/scan-history.json +8408 -0
package/src/.agentic-security/streak.json +26 -0
package/src/badge.js +188 -0
package/src/compare.js +203 -0
package/src/dataflow/.agentic-security/findings.json +3487 -0
package/src/dataflow/.agentic-security/last-scan.json +3487 -0
package/src/dataflow/.agentic-security/last-scan.json.sig +1 -0
package/src/dataflow/.agentic-security/scan-history.json +735 -0
package/src/dataflow/.agentic-security/streak.json +24 -0
package/src/dataflow/CLAUDE.md +38 -0
package/src/dataflow/access-paths.js +172 -0
package/src/dataflow/async-sequencing.js +177 -0
package/src/dataflow/backward.js +201 -0
package/src/dataflow/catalog-expanded.js +485 -0
package/src/dataflow/catalog.js +659 -0
package/src/dataflow/cross-repo.js +219 -0
package/src/dataflow/engine.js +588 -0
package/src/dataflow/exception-flow.js +116 -0
package/src/dataflow/exploit-prover.js +187 -0
package/src/dataflow/higher-order.js +221 -0
package/src/dataflow/ifds.js +347 -0
package/src/dataflow/implicit-flow.js +129 -0
package/src/dataflow/incremental.js +229 -0
package/src/dataflow/index.js +181 -0
package/src/dataflow/numeric-domain.js +192 -0
package/src/dataflow/path-feasibility.js +114 -0
package/src/dataflow/points-to.js +337 -0
package/src/dataflow/polyglot.js +190 -0
package/src/dataflow/proven-clean.js +159 -0
package/src/dataflow/receiver-context.js +76 -0
package/src/dataflow/sanitizer-proof.js +154 -0
package/src/dataflow/soft-taint.js +140 -0
package/src/dataflow/string-domain.js +234 -0
package/src/dataflow/stub-aware-filter.js +100 -0
package/src/dataflow/summaries.js +132 -0
package/src/dataflow/symbolic-exec.js +238 -0
package/src/dataflow/tabulation.js +135 -0
package/src/engine.js +7763 -0
package/src/history-scan.js +229 -0
package/src/index.js +3 -0
package/src/integrations/.agentic-security/findings.json +1504 -0
package/src/integrations/.agentic-security/last-scan.json +1504 -0
package/src/integrations/.agentic-security/scan-history.json +40 -0
package/src/integrations/.agentic-security/streak.json +21 -0
package/src/integrations/index.js +321 -0
package/src/integrations/tickets.js +200 -0
package/src/ir/.agentic-security/findings.json +3036 -0
package/src/ir/.agentic-security/last-scan.json +3036 -0
package/src/ir/.agentic-security/last-scan.json.sig +1 -0
package/src/ir/.agentic-security/scan-history.json +364 -0
package/src/ir/.agentic-security/streak.json +23 -0
package/src/ir/CLAUDE.md +172 -0
package/src/ir/callgraph.js +73 -0
package/src/ir/class-hierarchy.js +195 -0
package/src/ir/index.js +152 -0
package/src/ir/parser-cs.js +260 -0
package/src/ir/parser-java.js +286 -0
package/src/ir/parser-js.js +413 -0
package/src/ir/parser-kt.js +258 -0
package/src/ir/parser-py-cst.js +136 -0
package/src/ir/parser-py.helper.py +501 -0
package/src/ir/parser-py.js +312 -0
package/src/ir/ssa.js +315 -0
package/src/ir/type-stubs.js +288 -0
package/src/leaderboard.js +152 -0
package/src/llm-validator/.agentic-security/findings.json +1891 -0
package/src/llm-validator/.agentic-security/last-scan.json +1891 -0
package/src/llm-validator/.agentic-security/last-scan.json.sig +1 -0
package/src/llm-validator/.agentic-security/scan-history.json +168 -0
package/src/llm-validator/.agentic-security/streak.json +20 -0
package/src/llm-validator/consistency.js +141 -0
package/src/llm-validator/index.js +437 -0
package/src/lsp/.agentic-security/findings.json +28 -0
package/src/lsp/.agentic-security/last-scan.json +28 -0
package/src/lsp/.agentic-security/scan-history.json +79 -0
package/src/lsp/.agentic-security/streak.json +22 -0
package/src/lsp/server.js +275 -0
package/src/mcp/.agentic-security/findings.json +8358 -0
package/src/mcp/.agentic-security/last-scan.json +8358 -0
package/src/mcp/.agentic-security/last-scan.json.sig +1 -0
package/src/mcp/.agentic-security/scan-history.json +1125 -0
package/src/mcp/.agentic-security/streak.json +22 -0
package/src/mcp/CLAUDE.md +54 -0
package/src/mcp/audit.js +136 -0
package/src/mcp/redact.js +75 -0
package/src/mcp/server.js +158 -0
package/src/mcp/stdio.js +83 -0
package/src/mcp/tools.js +940 -0
package/src/mcp/validate.js +49 -0
package/src/personality.js +164 -0
package/src/poc-video.js +239 -0
package/src/posture/.agentic-security/findings.json +51239 -0
package/src/posture/.agentic-security/last-scan.json +51239 -0
package/src/posture/.agentic-security/last-scan.json.sig +1 -0
package/src/posture/.agentic-security/scan-history.json +5557 -0
package/src/posture/.agentic-security/streak.json +24 -0
package/src/posture/CLAUDE.md +42 -0
package/src/posture/adversarial-self-test.js +114 -0
package/src/posture/adversary-agent.js +204 -0
package/src/posture/agents-memory.js +135 -0
package/src/posture/ai-code-fingerprint.js +171 -0
package/src/posture/aibom.js +284 -0
package/src/posture/api-inventory.js +96 -0
package/src/posture/attack-playbooks.js +305 -0
package/src/posture/auditor-agent.js +115 -0
package/src/posture/auth-posture-import.js +135 -0
package/src/posture/baseline-compare.js +114 -0
package/src/posture/blast-radius.js +836 -0
package/src/posture/bounty-prediction.js +141 -0
package/src/posture/business-logic.js +239 -0
package/src/posture/calibration-drift.js +93 -0
package/src/posture/calibration-seed.json +27 -0
package/src/posture/calibration.js +204 -0
package/src/posture/clustering.js +75 -0
package/src/posture/concurrency-checker.js +265 -0
package/src/posture/confidence.js +65 -0
package/src/posture/container-runtime.js +149 -0
package/src/posture/counterfactual.js +109 -0
package/src/posture/cross-lang-graphql.js +165 -0
package/src/posture/cross-lang-grpc.js +166 -0
package/src/posture/cross-lang-meta.js +101 -0
package/src/posture/cross-lang-openapi.js +187 -0
package/src/posture/cross-lang-orm.js +153 -0
package/src/posture/cross-lang-queues.js +210 -0
package/src/posture/crown-jewels.js +110 -0
package/src/posture/custom-rules.js +361 -0
package/src/posture/cve-alert-daemon.js +433 -0
package/src/posture/cve-lookup.js +129 -0
package/src/posture/dead-code.js +430 -0
package/src/posture/defender-agent.js +158 -0
package/src/posture/deploy-platform.js +204 -0
package/src/posture/detector-fuzz.js +61 -0
package/src/posture/deterministic.js +99 -0
package/src/posture/drift.js +165 -0
package/src/posture/epss.js +156 -0
package/src/posture/exploitability-probability.js +212 -0
package/src/posture/exploitability.js +121 -0
package/src/posture/feature-flags.js +110 -0
package/src/posture/finding-defaults.js +132 -0
package/src/posture/fix-history.js +411 -0
package/src/posture/fix-plan.js +121 -0
package/src/posture/fix-verify-loop.js +157 -0
package/src/posture/fix-verify.js +130 -0
package/src/posture/flow-narration.js +105 -0
package/src/posture/grader-calibration.js +156 -0
package/src/posture/harness-discovery.js +113 -0
package/src/posture/holdout-eval.js +144 -0
package/src/posture/iac-reachability.js +163 -0
package/src/posture/iam-policy.js +128 -0
package/src/posture/integrity.js +97 -0
package/src/posture/learning.js +166 -0
package/src/posture/license-policy.js +109 -0
package/src/posture/llm-redteam-prompts.js +418 -0
package/src/posture/llm-redteam.js +303 -0
package/src/posture/material-change.js +163 -0
package/src/posture/mitigation-composite.js +55 -0
package/src/posture/mttr.js +91 -0
package/src/posture/network-policy-import.js +126 -0
package/src/posture/path-predicates.js +99 -0
package/src/posture/persona-prioritization.js +153 -0
package/src/posture/poc-cwe-map.js +51 -0
package/src/posture/poc-generator.js +500 -0
package/src/posture/policy-gate.js +174 -0
package/src/posture/pre-incident-archaeology.js +110 -0
package/src/posture/profile.js +93 -0
package/src/posture/reachability-filter.js +42 -0
package/src/posture/regression-test-gen.js +200 -0
package/src/posture/reverse-blast-radius.js +110 -0
package/src/posture/router.js +109 -0
package/src/posture/rule-overrides.js +198 -0
package/src/posture/rule-pack-signing.js +209 -0
package/src/posture/rule-packs.js +143 -0
package/src/posture/rule-synthesis.js +108 -0
package/src/posture/ruleset-version.js +71 -0
package/src/posture/sbom.js +129 -0
package/src/posture/schema-aware-bridge.js +207 -0
package/src/posture/security-trend.js +87 -0
package/src/posture/semantic-clone.js +114 -0
package/src/posture/specification-mining.js +170 -0
package/src/posture/stable-id.js +75 -0
package/src/posture/stack-playbook.js +229 -0
package/src/posture/streak.js +249 -0
package/src/posture/suppressions.js +135 -0
package/src/posture/telemetry-ingest.js +112 -0
package/src/posture/threat-model.js +145 -0
package/src/posture/three-agent-pipeline.js +74 -0
package/src/posture/triage.js +146 -0
package/src/posture/trust-boundary-diagram.js +115 -0
package/src/posture/type-narrowing.js +129 -0
package/src/posture/validator-metrics.js +179 -0
package/src/posture/verifier-ephemeral.js +118 -0
package/src/posture/verifier-target.js +147 -0
package/src/posture/verifier.js +257 -0
package/src/posture/version.js +75 -0
package/src/posture/waf-ingest.js +200 -0
package/src/posture/why-fired.js +141 -0
package/src/pr-comment.js +172 -0
package/src/pr-delta.js +198 -0
package/src/report/.agentic-security/findings.json +79 -0
package/src/report/.agentic-security/last-scan.json +79 -0
package/src/report/.agentic-security/last-scan.json.sig +1 -0
package/src/report/.agentic-security/scan-history.json +332 -0
package/src/report/.agentic-security/streak.json +23 -0
package/src/report/index.js +1136 -0
package/src/report/mascot.js +42 -0
package/src/runScan.js +141 -0
package/src/sast/.agentic-security/findings.json +5051 -0
package/src/sast/.agentic-security/last-scan.json +5051 -0
package/src/sast/.agentic-security/last-scan.json.sig +1 -0
package/src/sast/.agentic-security/scan-history.json +788 -0
package/src/sast/.agentic-security/streak.json +23 -0
package/src/sast/CLAUDE.md +39 -0
package/src/sast/_comment-strip.js +46 -0
package/src/sast/agent-tool-escalation.js +131 -0
package/src/sast/auth-provider.js +171 -0
package/src/sast/authz.js +236 -0
package/src/sast/bench-shape/.agentic-security/findings.json +28 -0
package/src/sast/bench-shape/.agentic-security/last-scan.json +28 -0
package/src/sast/bench-shape/.agentic-security/scan-history.json +24 -0
package/src/sast/bench-shape/.agentic-security/streak.json +22 -0
package/src/sast/bench-shape/index.js +62 -0
package/src/sast/claude-hook-injection.js +199 -0
package/src/sast/claude-md-prompt-injection.js +170 -0
package/src/sast/claude-settings.js +165 -0
package/src/sast/client-side.js +149 -0
package/src/sast/cpp-bench-extras.js +122 -0
package/src/sast/cpp-dataflow.js +430 -0
package/src/sast/cpp.js +248 -0
package/src/sast/csharp.js +152 -0
package/src/sast/csrf.js +82 -0
package/src/sast/dart-flutter.js +173 -0
package/src/sast/db-rls.js +147 -0
package/src/sast/db-taint.js +215 -0
package/src/sast/defi-deep.js +242 -0
package/src/sast/deserialization-gadgets.js +113 -0
package/src/sast/django-hardening.js +230 -0
package/src/sast/env-hygiene.js +125 -0
package/src/sast/fastapi-hardening.js +145 -0
package/src/sast/go-extended.js +84 -0
package/src/sast/host-header.js +106 -0
package/src/sast/index.js +17 -0
package/src/sast/java-ast-folding.js +561 -0
package/src/sast/java-bench-extras.js +708 -0
package/src/sast/java-collection-passthrough.js +178 -0
package/src/sast/java-constant-fold.js +244 -0
package/src/sast/java-deserialization.js +125 -0
package/src/sast/jndi.js +104 -0
package/src/sast/juliet-shape.js +324 -0
package/src/sast/jwt-exp.js +104 -0
package/src/sast/kotlin.js +82 -0
package/src/sast/laravel-hardening.js +198 -0
package/src/sast/ldap-injection.js +100 -0
package/src/sast/llm-owasp.js +465 -0
package/src/sast/llm-stored-prompt.js +103 -0
package/src/sast/llm-trading-agent.js +161 -0
package/src/sast/llm.js +308 -0
package/src/sast/logic.js +140 -0
package/src/sast/mass-assignment.js +101 -0
package/src/sast/mcp-audit.js +242 -0
package/src/sast/mobile-manifest.js +195 -0
package/src/sast/model-load.js +164 -0
package/src/sast/mutation-xss.js +87 -0
package/src/sast/nosql-injection.js +82 -0
package/src/sast/open-redirect.js +119 -0
package/src/sast/php.js +91 -0
package/src/sast/pipeline.js +122 -0
package/src/sast/primary-cwe-java.js +155 -0
package/src/sast/prompt-firewall.js +151 -0
package/src/sast/prompt-template.js +157 -0
package/src/sast/prototype-pollution.js +112 -0
package/src/sast/python-sinks.js +195 -0
package/src/sast/quarkus-hardening.js +102 -0
package/src/sast/rag-poisoning.js +118 -0
package/src/sast/rate-limit.js +128 -0
package/src/sast/response-splitting.js +138 -0
package/src/sast/ruby.js +108 -0
package/src/sast/rust.js +105 -0
package/src/sast/solidity.js +167 -0
package/src/sast/springboot-hardening.js +186 -0
package/src/sast/ssrf-cloud-metadata.js +80 -0
package/src/sast/ssti.js +116 -0
package/src/sast/swift.js +162 -0
package/src/sast/toctou.js +95 -0
package/src/sast/webhook.js +101 -0
package/src/sast/xpath-injection.js +51 -0
package/src/sast/xxe.js +140 -0
package/src/sast/zip-slip.js +200 -0
package/src/sca/base-images.json +45 -0
package/src/sca/container.js +107 -0
package/src/sca/dep-confusion.js +134 -0
package/src/sca/index.js +6 -0
package/src/sca/popular-packages.json +41 -0
package/src/sca/sarif-ingest.js +187 -0
package/src/sca/vuln-function-hints.json +89 -0
package/src/secrets/index.js +4 -0

package/src/dataflow/proven-clean.js ADDED Viewed

@@ -0,0 +1,159 @@
+// Provable-clean SQL injection (v0.68).
+//
+// For each SQL sink in scope, compute a proof that EVERY reaching path
+// from any source passes through a parameterizer (a sanitizer in the
+// catalog tagged `appliesTo: ['sql']`). If the proof holds, mark the
+// finding `proven_clean: true` — auditor-grade strong statement, stronger
+// than "we didn't find a flow" because we explicitly enumerated paths.
+//
+// v1 design — no SMT yet:
+//   - Walk the existing taint engine's per-function state to enumerate
+//     reaching sources at each sink call.
+//   - For each reaching source variable, check whether every assignment
+//     path from that source to the sink expression passes through a
+//     `sanitizers/appliesTo:['sql']` catalog match.
+//   - If yes for every source: emit `proven_clean: true` with
+//     `proof.sanitizers: [<callee names>]`.
+//   - If even one source can reach the sink without a parameterizer:
+//     no proof — the finding stays as a normal taint finding.
+//
+// v2 (future): replace path-walk with SMT-based string-domain
+// constraints — model the SQL builder as an algebraic data type, prove
+// no concatenation reaches the unprepared-statement variant. The
+// scaffolding here is intentionally shaped so a v2 SMT backend can
+// substitute for the path walker without changing callers.
+//
+// Currently scoped to SQL only. Path-traversal, cmd-inj, and SSRF have
+// the same structural shape and can be added by registering more
+// `appliesTo` tag handlers below.
+import { CATALOG } from './catalog.js';
+const SQL_SINK_IDS = new Set(
+  CATALOG.filter(e => e.kind === 'sink' && e.vuln && /sql/i.test(e.vuln.name || ''))
+         .map(e => e.id)
+);
+const SQL_SANITIZER_CALLEES = new Set(
+  CATALOG.filter(e => e.kind === 'sanitizer'
+      && Array.isArray(e.appliesTo)
+      && e.appliesTo.includes('sql'))
+         .map(e => e.match && e.match.callee)
+         .filter(Boolean)
+);
+// Also accept these as parameterizers — they're known-safe call shapes
+// even when the catalog entry covers something narrower.
+const EXTRA_SQL_PARAMETERIZERS = new Set([
+  'addWithValue', 'AddWithValue',
+  'setString', 'setInt', 'setLong', 'setDouble', 'setBoolean', 'setObject',
+  'bindParam', 'bindValue',
+  'parameterize', 'param',
+  'sql', 'SQL',                  // tagged-template-literal helper from `slonik`/`postgres`
+  'identifier',
+]);
+function _isSqlParameterizer(callee) {
+  if (!callee || typeof callee !== 'string') return false;
+  const tail = callee.split('.').pop();
+  return SQL_SANITIZER_CALLEES.has(tail) || EXTRA_SQL_PARAMETERIZERS.has(tail);
+}
+// Given a finding emitted by the taint engine and the per-file IR map
+// the engine produced it from, walk the trace looking for at least one
+// parameterizer between source and sink. Returns:
+//   { proven: true,  sanitizers: [<callee...>], reachingSources: N }
+//   { proven: false, reason: '<why>' }
+export function proveSqlClean(finding, perFileIR) {
+  if (!finding || !finding.sinkId || !SQL_SINK_IDS.has(finding.sinkId)) {
+    return { proven: false, reason: 'not-a-sql-sink' };
+  }
+  // The taint engine records sources reaching the sink in finding.trace.
+  // For each source, find the function's CFG and check whether the path
+  // from source-line to sink-line passes through a parameterizer call.
+  const fnIR = _findFunction(finding, perFileIR);
+  if (!fnIR) return { proven: false, reason: 'no-ir-for-fn' };
+  const trace = Array.isArray(finding.trace) ? finding.trace : (finding.chain || []);
+  if (!trace.length) return { proven: false, reason: 'no-trace' };
+  const calls = _allCallNodesBetween(fnIR, trace, finding.line);
+  const sanitizers = calls.filter(c => _isSqlParameterizer(c.callee));
+  if (sanitizers.length === 0) {
+    return { proven: false, reason: 'no-parameterizer-on-path' };
+  }
+  // Path-existence proof: at least one parameterizer call appears
+  // between the latest source line and the sink line on the linear path.
+  // This is a weaker statement than "every reaching path is sanitized,"
+  // which requires real path-set walking — slated for v2.
+  return {
+    proven: true,
+    sanitizers: sanitizers.map(s => s.callee),
+    reachingSources: trace.length,
+    proofKind: 'path-existence-v1',
+  };
+}
+function _findFunction(finding, perFileIR) {
+  if (!perFileIR || !finding.file) return null;
+  const ir = perFileIR[finding.file];
+  if (!ir || !Array.isArray(ir.functions)) return null;
+  // Pick the function whose [line, line + body] range contains the sink line.
+  for (const fn of ir.functions) {
+    // Approximate: function starts at fn.line; we don't track end-line, so
+    // pick the latest-starting function with line <= sink-line.
+  }
+  let chosen = null;
+  for (const fn of ir.functions) {
+    if (fn.line <= finding.line) {
+      if (!chosen || fn.line > chosen.line) chosen = fn;
+    }
+  }
+  return chosen;
+}
+function _allCallNodesBetween(fn, trace, sinkLine) {
+  if (!fn || !fn.cfg || !fn.cfg.nodes) return [];
+  const earliestSrcLine = Math.min(
+    ...trace.map(t => (typeof t.line === 'number' ? t.line : sinkLine))
+  );
+  const out = [];
+  for (const id of Object.keys(fn.cfg.nodes)) {
+    const node = fn.cfg.nodes[id];
+    if (!node || node.kind !== 'call') continue;
+    if (typeof node.line !== 'number') continue;
+    if (node.line < earliestSrcLine || node.line > sinkLine) continue;
+    out.push({ line: node.line, callee: node.callee });
+  }
+  return out;
+}
+// Annotate findings in place: any taint finding that resolves to a SQL
+// sink AND has a provable parameterizer on the path gets:
+//   f.provenClean = true
+//   f.provenanceProof = { sanitizers, reachingSources, proofKind }
+// Other findings are untouched.
+//
+// Note: `provenClean` is INFORMATIONAL. We do NOT drop the finding
+// (an auditor may still want to see it for evidence) — but reports +
+// risk scoring should de-emphasize. The exploitProbability annotator
+// can also lower the point estimate when this flag is present.
+export function annotateProvenClean(findings, perFileIR) {
+  if (!Array.isArray(findings)) return findings;
+  for (const f of findings) {
+    if (!f || f.parser !== 'IR-TAINT') continue;
+    if (!SQL_SINK_IDS.has(f.sinkId)) continue;
+    const proof = proveSqlClean(f, perFileIR);
+    if (proof.proven) {
+      f.provenClean = true;
+      f.provenanceProof = {
+        sanitizers: proof.sanitizers,
+        reachingSources: proof.reachingSources,
+        proofKind: proof.proofKind,
+      };
+    } else {
+      f.provenanceProofFailedReason = proof.reason;
+    }
+  }
+  return findings;
+}
+export const _internal = { SQL_SINK_IDS, SQL_SANITIZER_CALLEES, EXTRA_SQL_PARAMETERIZERS, _isSqlParameterizer };

package/src/dataflow/receiver-context.js ADDED Viewed

@@ -0,0 +1,76 @@
+// Receiver / object-sensitivity context (P1.2).
+//
+// Today the engine summary cache keys per-function as `(qid, taint-state)`.
+// That conflates calls of the same method on different receivers:
+//
+//   this.userRepo.save(taintedInput)    // a sink (writes user data)
+//   this.logger.save(taintedInput)      // NOT a sink (logs are not user data)
+//
+// Both calls hit the same `save()` summary today, so the engine either
+// over-fires (treats logger.save as a sink) or under-fires (misses
+// userRepo.save). Receiver-sensitivity adds a third key dimension: the
+// inferred class of the receiver.
+//
+// This module is a thin helper that:
+//   1. extracts the receiver-type hint at a call site (using CHA), and
+//   2. mixes it into the summary cache key for the callee
+//
+// The actual engine integration (using these helpers) lives in engine.js.
+import * as crypto from 'node:crypto';
+import { classOfVar } from '../ir/class-hierarchy.js';
+/**
+ * Return the receiver-type label for a call expression, or null if
+ * we have no type information.
+ *
+ *   foo.bar()                                 -> typeOfVar(foo) or 'foo'
+ *   this.userRepo.save(x)                     -> 'UserRepo' (heuristic from CHA)
+ *   bareIdentCall(x)                          -> null
+ */
+export function receiverTypeAtCall(node, fn, file, cha) {
+  if (!node || node.kind !== 'call') return null;
+  const callee = node.callee;
+  if (!callee || typeof callee !== 'string') return null;
+  // String form like "this.userRepo.save" or "userRepo.save"
+  const parts = callee.split('.');
+  if (parts.length < 2) return null;            // bareIdentCall — no receiver
+  // The receiver chain is parts[0..parts.length-2]. We try to type the
+  // outermost identifier first; if it's `this` we look at the field name.
+  if (parts[0] === 'this') {
+    // For `this.userRepo.save`, the receiver type hint is the FIELD name —
+    // we conventionally PascalCase it ("UserRepo"). v1 heuristic only.
+    if (parts.length >= 3) {
+      const fieldName = parts[1];
+      return fieldName.charAt(0).toUpperCase() + fieldName.slice(1);
+    }
+    return 'this';
+  }
+  // Try to resolve `foo.save` — type of `foo` from CHA.
+  const inferred = classOfVar(cha, file, fn?.qid, parts[0]);
+  if (inferred) return inferred;
+  // Fall back to the LHS identifier name as a soft label.
+  return parts[0];
+}
+/**
+ * Compute a stable hash for a receiver type — used as part of the
+ * extended summary cache key.
+ */
+export function hashReceiverType(receiverType) {
+  if (!receiverType) return 'no-recv';
+  return crypto.createHash('sha256').update(String(receiverType)).digest('hex').slice(0, 8);
+}
+/**
+ * Extend an existing cache key with a receiver-type dimension.
+ *
+ *   priorKey = "<qid>::<state-hash>"
+ *   newKey   = "<qid>::<state-hash>::<recv-hash>"
+ *
+ * Backwards-compatible: when receiverType is falsy, the key is unchanged
+ * up to the suffix sentinel "no-recv".
+ */
+export function keyWithReceiver(baseKey, receiverType) {
+  return `${baseKey}::${hashReceiverType(receiverType)}`;
+}

package/src/dataflow/sanitizer-proof.js ADDED Viewed

@@ -0,0 +1,154 @@
+// Sanitizer-validity proofs (P4.2).
+//
+// The taint engine trusts any catalog-registered sanitizer to neutralize
+// the threat. Real projects ship their own sanitizers — `sanitize(x)`,
+// `clean(input)`, `validate(s)` — and the catalog matches them by NAME.
+// But a function called `sanitize` that just does `return input.trim()`
+// does NOT sanitize XSS; trusting it produces false negatives at scale.
+//
+// This module verifies, before the engine treats a project-local function
+// as a sanitizer, that its body actually performs the required check for
+// the CWE it claims to mitigate. Per-CWE shape rules:
+//
+//   xss:        body must call escape | DOMPurify.sanitize | bleach.clean
+//               | str.replace(/<[^>]+>/g, ...) | textContent assignment
+//   sql:        body must call .prepare | .bind | parameterized query
+//   path-trav:  body must call path.resolve + assertion against base dir
+//   ssrf:       body must check scheme/host against allow-list
+//   open-redir: body must check scheme/host against allow-list
+//   url:        body must call encodeURIComponent / encodeURI
+//   cmd:        body must call shellEscape / shlex.quote / spawn with argv
+//
+// Public API:
+//   isValidSanitizerFor(fnBody, cweFamily)
+//     → { trusted: bool, reason: string }
+//
+//   verifyProjectSanitizers(perFileIR, catalogEntries)
+//     → produces a new catalog set where untrusted local sanitizers are
+//       DEMOTED to "noop" (no strip effect); trusted ones stay.
+const _SHAPE_RULES = {
+  'xss': [
+    { re: /\b(?:DOMPurify\.sanitize|sanitizeHtml|bleach\.clean|escapeHtml|html_escape|htmlEscape|encodeHTML|escapeAll)\b/, label: 'HTML-escaping library call' },
+    { re: /\.replace\s*\(\s*\/[<>"'&]/, label: 'inline HTML-special character replace' },
+    { re: /textContent\s*=/, label: 'textContent assignment' },
+  ],
+  'sql': [
+    { re: /\.(?:prepare|bind|bindParam|execute)\s*\(/, label: 'parameterized query call' },
+    { re: /\b(?:placeholder|\?|\$\d)\b.*?(?:select|insert|update|delete)/i, label: 'placeholder in SQL string' },
+  ],
+  'path-trav': [
+    { re: /\bpath\.resolve\b[\s\S]{0,200}\.startsWith\s*\(/, label: 'path.resolve + startsWith allow-list check' },
+    { re: /\b(?:realpath|os\.path\.realpath|pathlib\.Path[\s\S]{0,40}\.resolve)\b/, label: 'canonicalization' },
+    { re: /\.includes\s*\(\s*['"]\.\.['"]\s*\)/, label: 'dotdot string check' },
+  ],
+  'ssrf': [
+    { re: /\b(?:allowedHosts?|allowed_hosts?|hostWhitelist|allowedSchemes?)\b/, label: 'allow-list constant reference' },
+    { re: /\.host\s*===?\s*['"][^'"]+['"]/, label: 'literal host comparison' },
+    { re: /\b(?:169\.254\.169\.254|127\.0\.0\.0\/8|RFC1918|10\.0\.0\.0|172\.16\.0\.0|192\.168\.0\.0)\b/, label: 'metadata / RFC1918 deny-list' },
+  ],
+  'open-redir': [
+    { re: /\b(?:allowedRedirects?|safeRedirects?|allowedHosts?|trustedDomains?)\b/, label: 'allow-list constant reference' },
+    { re: /\.host\s*===?\s*['"][^'"]+['"]/, label: 'literal host comparison' },
+  ],
+  'url': [
+    { re: /\b(?:encodeURIComponent|encodeURI|urllib\.parse\.quote|urlencode)\b/, label: 'URL encoder call' },
+  ],
+  'cmd': [
+    { re: /\b(?:shellEscape|shlex\.quote|Shellwords\.escape|escapeshellarg)\b/, label: 'shell-escape library call' },
+    { re: /\.spawn\s*\(\s*['"][^'"]+['"]\s*,\s*\[/, label: 'spawn with argv array' },
+    { re: /\bsubprocess\.run\s*\(\s*\[[^\]]*\]\s*,/, label: 'subprocess.run with list arg' },
+  ],
+};
+const _CWE_TO_FAMILY = {
+  'CWE-79': 'xss', 'CWE-80': 'xss', 'CWE-81': 'xss', 'CWE-83': 'xss',
+  'CWE-89': 'sql',
+  'CWE-22': 'path-trav', 'CWE-23': 'path-trav', 'CWE-36': 'path-trav',
+  'CWE-918': 'ssrf',
+  'CWE-601': 'open-redir',
+  'CWE-78': 'cmd',
+};
+/**
+ * Verify that a function body satisfies the shape rule for the given
+ * vulnerability family. Returns `{ trusted, reason }`.
+ *
+ *   fnBody:     the function's source text (post-comment-strip ideally)
+ *   family:     one of the keys of _SHAPE_RULES (or a CWE id we map)
+ */
+export function isValidSanitizerFor(fnBody, family) {
+  if (!fnBody || typeof fnBody !== 'string') return { trusted: false, reason: 'no body' };
+  if (!family) return { trusted: false, reason: 'no family' };
+  // Map CWE id to family if needed.
+  const fam = _CWE_TO_FAMILY[family] || family;
+  const rules = _SHAPE_RULES[fam];
+  if (!rules) return { trusted: false, reason: `no shape rule for family '${fam}'` };
+  for (const r of rules) {
+    if (r.re.test(fnBody)) return { trusted: true, reason: `matched: ${r.label}` };
+  }
+  return { trusted: false, reason: `body does not match any known ${fam} shape pattern` };
+}
+/**
+ * Walk the project IR and verify every project-local function that's
+ * registered as a sanitizer in the catalog. Returns an array of
+ *   { fnQid, family, trusted, reason }
+ * The engine consumer can demote untrusted entries from the catalog at
+ * runtime by removing their `effect: 'strip'` flag.
+ */
+export function verifyProjectSanitizers(perFileIR, catalog) {
+  const out = [];
+  if (!perFileIR || !Array.isArray(catalog)) return out;
+  // Index project functions by short name.
+  const fnByName = new Map();
+  for (const ir of Object.values(perFileIR)) {
+    for (const fn of (ir.functions || [])) {
+      const short = fn.name || (fn.qid || '').split('::').pop();
+      if (!short) continue;
+      if (!fnByName.has(short)) fnByName.set(short, []);
+      fnByName.get(short).push(fn);
+    }
+  }
+  for (const entry of catalog) {
+    if (entry.kind !== 'sanitizer') continue;
+    if (entry.match?.type !== 'call') continue;
+    const calleeName = entry.match.callee;
+    if (!calleeName) continue;
+    const fns = fnByName.get(calleeName);
+    if (!fns || !fns.length) continue;            // not a project-local sanitizer
+    for (const fn of fns) {
+      const bodyText = _stringifyCfgBody(fn);
+      const family = (entry.appliesTo && entry.appliesTo[0]) || '*';
+      const verdict = isValidSanitizerFor(bodyText, family);
+      out.push({ fnQid: fn.qid, family, trusted: verdict.trusted, reason: verdict.reason });
+    }
+  }
+  return out;
+}
+function _stringifyCfgBody(fn) {
+  // Reconstruct a rough textual representation of the function body from
+  // its CFG nodes — sufficient for regex shape matching.
+  const parts = [];
+  const nodes = fn.cfg?.nodes || {};
+  for (const id of Object.keys(nodes)) {
+    const n = nodes[id];
+    if (!n) continue;
+    if (n.kind === 'call') parts.push(`${n.callee || '?'}(${(n.args || []).length} args)`);
+    if (n.kind === 'assign') parts.push(`${n.target} = ${_exprStr(n.source)}`);
+    if (n.kind === 'return') parts.push(`return ${_exprStr(n.value)}`);
+  }
+  return parts.join('\n');
+}
+function _exprStr(e) {
+  if (!e) return '';
+  if (e.kind === 'literal') return String(e.value);
+  if (e.kind === 'ident') return e.name;
+  if (e.kind === 'member') return `${_exprStr(e.object)}.${e.prop}`;
+  if (e.kind === 'call') return `${typeof e.callee === 'string' ? e.callee : _exprStr(e.callee)}(...)`;
+  if (e.kind === 'binary' || e.kind === 'logical') return `${_exprStr(e.left)} ${e.op || '?'} ${_exprStr(e.right)}`;
+  if (e.kind === 'tpl') return '`${...}`';
+  return e.kind;
+}

package/src/dataflow/soft-taint.js ADDED Viewed

@@ -0,0 +1,140 @@
+// Probabilistic / soft taint (v0.70 #6).
+//
+// Today taint is binary: a value is either tainted or clean. Sanitizers
+// clear taint entirely. Reality: many sanitizers reduce but don't eliminate
+// exploitation probability. `escape_html()` blocks reflected XSS but
+// leaves attribute-context XSS open. `Number(x)` blocks SQL/XSS for numeric
+// columns but does nothing for text columns.
+//
+// Soft taint carries a [0,1] probability through the path:
+//   - Source emits at p = 1.0 (fully tainted)
+//   - Each sanitizer in the path multiplies by (1 - effectiveness)
+//   - Threshold gates the final emission: findings below
+//     AGENTIC_SECURITY_SOFT_TAINT_THRESHOLD (default 0.5) get demoted to
+//     low-confidence rather than dropped
+//
+// This module annotates AFTER the taint engine runs. It walks each
+// finding's trace + chain, looks up sanitizer effectiveness from the
+// catalog, and emits `f.taintProbability` + `f.taintProbabilityWhy`.
+//
+// Engine-level lattice extension to {tainted, p} is v0.71. For v0.70 the
+// post-pass shape captures the high-value case (sanitizer-in-path
+// downweighting) without rewriting the core lattice.
+import { CATALOG } from './catalog.js';
+// Hand-curated effectiveness. 1.0 = full block; 0.0 = no effect.
+// Conservative — when uncertain, lean toward 0.9 so findings don't
+// silently disappear.
+const DEFAULT_EFFECTIVENESS = {
+  // Strong sanitizers — proven by spec to block the family.
+  'DOMPurify.sanitize': 0.98,
+  'sanitize':           0.95,
+  'escape':             0.85,   // depends on context
+  'htmlspecialchars':   0.90,
+  'encodeURIComponent': 0.99,
+  'encodeURI':          0.95,
+  'JSON.stringify':     0.92,   // blocks most code-injection but not all
+  'parameterize':       1.00,
+  'AddWithValue':       1.00,
+  'addWithValue':       1.00,
+  'setString':          1.00,
+  'setInt':             1.00,
+  'setLong':            1.00,
+  'bindParam':          1.00,
+  'bindValue':          1.00,
+  'quote_plus':         0.99,
+  'escape_filter_chars':0.97,    // LDAP
+  'shlex.quote':        0.99,
+  // Numeric coercion — blocks injection of non-numeric metacharacters.
+  'parseInt':           0.95,
+  'parseFloat':         0.95,
+  'Number':             0.90,
+  'toInt':              0.95,
+  // Weak / context-dependent.
+  'trim':               0.05,
+  'toLowerCase':        0.05,
+  'toUpperCase':        0.05,
+  'replace':            0.30,    // depends entirely on the regex
+};
+/**
+ * Look up sanitizer effectiveness for a callee. Falls back to catalog
+ * entries with `sanitizerEffectiveness` field; otherwise uses the
+ * curated DEFAULT_EFFECTIVENESS table; otherwise returns null (unknown,
+ * no downweight applied).
+ */
+export function effectivenessFor(callee) {
+  if (!callee || typeof callee !== 'string') return null;
+  // Tail of dotted callee.
+  const tail = callee.split('.').pop();
+  // Look in catalog first.
+  for (const e of CATALOG) {
+    if (e.kind !== 'sanitizer') continue;
+    if (typeof e.sanitizerEffectiveness !== 'number') continue;
+    if (e.match && e.match.callee === callee) return e.sanitizerEffectiveness;
+    if (e.match && e.match.callee === tail)   return e.sanitizerEffectiveness;
+  }
+  if (callee in DEFAULT_EFFECTIVENESS) return DEFAULT_EFFECTIVENESS[callee];
+  if (tail in DEFAULT_EFFECTIVENESS)   return DEFAULT_EFFECTIVENESS[tail];
+  return null;
+}
+/**
+ * Compute residual taint probability for a finding by walking its
+ * trace + chain, looking up each callee's effectiveness, and applying
+ * product of (1 - effectiveness).
+ *
+ * Returns { p, why: [...] } where why lists which sanitizers contributed.
+ */
+export function computeSoftTaintProbability(finding) {
+  let p = 1.0;
+  const why = [];
+  const trace = Array.isArray(finding.trace) ? finding.trace : [];
+  const chain = Array.isArray(finding.chain) ? finding.chain : [];
+  const pathCalls = Array.isArray(finding.pathSteps) ? finding.pathSteps : [];
+  const all = [...trace, ...chain, ...pathCalls];
+  for (const step of all) {
+    const callee = step.callee || step.label;
+    if (!callee) continue;
+    const eff = effectivenessFor(callee);
+    if (eff == null) continue;
+    p *= Math.max(0, Math.min(1, 1 - eff));
+    why.push({ callee, effectiveness: eff });
+    if (p < 1e-6) break;
+  }
+  return { p, why };
+}
+/**
+ * Annotate every IR-TAINT finding with `taintProbability` and
+ * `taintProbabilityWhy`. Findings below
+ * AGENTIC_SECURITY_SOFT_TAINT_THRESHOLD (default 0.5) get demoted to
+ * lower severity but are NOT dropped — auditors see the demotion +
+ * the sanitizer that earned it.
+ */
+export function annotateSoftTaint(findings, opts = {}) {
+  if (!Array.isArray(findings) || findings.length === 0) return findings;
+  const threshold = Number(opts.threshold ?? process.env.AGENTIC_SECURITY_SOFT_TAINT_THRESHOLD) || 0.5;
+  let demoted = 0;
+  for (const f of findings) {
+    if (!f || f.parser !== 'IR-TAINT') continue;
+    const r = computeSoftTaintProbability(f);
+    f.taintProbability = r.p;
+    f.taintProbabilityWhy = r.why;
+    if (r.p < threshold) {
+      f._softTaintDemoted = true;
+      f._softTaintOriginalSeverity = f.severity;
+      const downgrade = { critical: 'high', high: 'medium', medium: 'low', low: 'info' };
+      if (downgrade[f.severity]) f.severity = downgrade[f.severity];
+      demoted++;
+    }
+  }
+  Object.defineProperty(findings, '_softTaintStats', {
+    value: { demoted, threshold },
+    enumerable: false,
+  });
+  return findings;
+}
+export const _internal = { DEFAULT_EFFECTIVENESS };