npm - @clear-capabilities/agentic-security-scanner - Versions diffs - 0.74.0 - Mend

@clear-capabilities/agentic-security-scanner 0.74.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (331) hide show

package/CHANGELOG.md +1580 -0
package/bin/.agentic-security/findings.json +1577 -0
package/bin/.agentic-security/last-scan.json +1577 -0
package/bin/.agentic-security/last-scan.json.sig +1 -0
package/bin/.agentic-security/scan-history.json +465 -0
package/bin/.agentic-security/streak.json +25 -0
package/bin/agentic-security-audit.js +198 -0
package/bin/agentic-security-consistency.js +80 -0
package/bin/agentic-security-diff.js +136 -0
package/bin/agentic-security-lsp.js +12 -0
package/bin/agentic-security-mcp.js +40 -0
package/bin/agentic-security-rule.js +153 -0
package/bin/agentic-security.js +1683 -0
package/dist/117.index.js +207 -0
package/dist/178.index.js +250 -0
package/dist/218.index.js +793 -0
package/dist/227.index.js +192 -0
package/dist/301.index.js +167 -0
package/dist/384.index.js +18 -0
package/dist/476.index.js +126 -0
package/dist/513.index.js +373 -0
package/dist/520.index.js +13 -0
package/dist/601.index.js +1038 -0
package/dist/634.index.js +1892 -0
package/dist/637.index.js +216 -0
package/dist/660.index.js +131 -0
package/dist/675.index.js +451 -0
package/dist/826.index.js +188 -0
package/dist/830.index.js +133 -0
package/dist/agentic-security.mjs +272 -0
package/dist/agentic-security.mjs.sha256 +1 -0
package/dist/calibration-seed.json +27 -0
package/package.json +77 -0
package/src/.agentic-security/findings.json +80844 -0
package/src/.agentic-security/last-scan.json +80844 -0
package/src/.agentic-security/last-scan.json.sig +1 -0
package/src/.agentic-security/scan-history.json +8408 -0
package/src/.agentic-security/streak.json +26 -0
package/src/badge.js +188 -0
package/src/compare.js +203 -0
package/src/dataflow/.agentic-security/findings.json +3487 -0
package/src/dataflow/.agentic-security/last-scan.json +3487 -0
package/src/dataflow/.agentic-security/last-scan.json.sig +1 -0
package/src/dataflow/.agentic-security/scan-history.json +735 -0
package/src/dataflow/.agentic-security/streak.json +24 -0
package/src/dataflow/CLAUDE.md +38 -0
package/src/dataflow/access-paths.js +172 -0
package/src/dataflow/async-sequencing.js +177 -0
package/src/dataflow/backward.js +201 -0
package/src/dataflow/catalog-expanded.js +485 -0
package/src/dataflow/catalog.js +659 -0
package/src/dataflow/cross-repo.js +219 -0
package/src/dataflow/engine.js +588 -0
package/src/dataflow/exception-flow.js +116 -0
package/src/dataflow/exploit-prover.js +187 -0
package/src/dataflow/higher-order.js +221 -0
package/src/dataflow/ifds.js +347 -0
package/src/dataflow/implicit-flow.js +129 -0
package/src/dataflow/incremental.js +229 -0
package/src/dataflow/index.js +181 -0
package/src/dataflow/numeric-domain.js +192 -0
package/src/dataflow/path-feasibility.js +114 -0
package/src/dataflow/points-to.js +337 -0
package/src/dataflow/polyglot.js +190 -0
package/src/dataflow/proven-clean.js +159 -0
package/src/dataflow/receiver-context.js +76 -0
package/src/dataflow/sanitizer-proof.js +154 -0
package/src/dataflow/soft-taint.js +140 -0
package/src/dataflow/string-domain.js +234 -0
package/src/dataflow/stub-aware-filter.js +100 -0
package/src/dataflow/summaries.js +132 -0
package/src/dataflow/symbolic-exec.js +238 -0
package/src/dataflow/tabulation.js +135 -0
package/src/engine.js +7763 -0
package/src/history-scan.js +229 -0
package/src/index.js +3 -0
package/src/integrations/.agentic-security/findings.json +1504 -0
package/src/integrations/.agentic-security/last-scan.json +1504 -0
package/src/integrations/.agentic-security/scan-history.json +40 -0
package/src/integrations/.agentic-security/streak.json +21 -0
package/src/integrations/index.js +321 -0
package/src/integrations/tickets.js +200 -0
package/src/ir/.agentic-security/findings.json +3036 -0
package/src/ir/.agentic-security/last-scan.json +3036 -0
package/src/ir/.agentic-security/last-scan.json.sig +1 -0
package/src/ir/.agentic-security/scan-history.json +364 -0
package/src/ir/.agentic-security/streak.json +23 -0
package/src/ir/CLAUDE.md +172 -0
package/src/ir/callgraph.js +73 -0
package/src/ir/class-hierarchy.js +195 -0
package/src/ir/index.js +152 -0
package/src/ir/parser-cs.js +260 -0
package/src/ir/parser-java.js +286 -0
package/src/ir/parser-js.js +413 -0
package/src/ir/parser-kt.js +258 -0
package/src/ir/parser-py-cst.js +136 -0
package/src/ir/parser-py.helper.py +501 -0
package/src/ir/parser-py.js +312 -0
package/src/ir/ssa.js +315 -0
package/src/ir/type-stubs.js +288 -0
package/src/leaderboard.js +152 -0
package/src/llm-validator/.agentic-security/findings.json +1891 -0
package/src/llm-validator/.agentic-security/last-scan.json +1891 -0
package/src/llm-validator/.agentic-security/last-scan.json.sig +1 -0
package/src/llm-validator/.agentic-security/scan-history.json +168 -0
package/src/llm-validator/.agentic-security/streak.json +20 -0
package/src/llm-validator/consistency.js +141 -0
package/src/llm-validator/index.js +437 -0
package/src/lsp/.agentic-security/findings.json +28 -0
package/src/lsp/.agentic-security/last-scan.json +28 -0
package/src/lsp/.agentic-security/scan-history.json +79 -0
package/src/lsp/.agentic-security/streak.json +22 -0
package/src/lsp/server.js +275 -0
package/src/mcp/.agentic-security/findings.json +8358 -0
package/src/mcp/.agentic-security/last-scan.json +8358 -0
package/src/mcp/.agentic-security/last-scan.json.sig +1 -0
package/src/mcp/.agentic-security/scan-history.json +1125 -0
package/src/mcp/.agentic-security/streak.json +22 -0
package/src/mcp/CLAUDE.md +54 -0
package/src/mcp/audit.js +136 -0
package/src/mcp/redact.js +75 -0
package/src/mcp/server.js +158 -0
package/src/mcp/stdio.js +83 -0
package/src/mcp/tools.js +940 -0
package/src/mcp/validate.js +49 -0
package/src/personality.js +164 -0
package/src/poc-video.js +239 -0
package/src/posture/.agentic-security/findings.json +51239 -0
package/src/posture/.agentic-security/last-scan.json +51239 -0
package/src/posture/.agentic-security/last-scan.json.sig +1 -0
package/src/posture/.agentic-security/scan-history.json +5557 -0
package/src/posture/.agentic-security/streak.json +24 -0
package/src/posture/CLAUDE.md +42 -0
package/src/posture/adversarial-self-test.js +114 -0
package/src/posture/adversary-agent.js +204 -0
package/src/posture/agents-memory.js +135 -0
package/src/posture/ai-code-fingerprint.js +171 -0
package/src/posture/aibom.js +284 -0
package/src/posture/api-inventory.js +96 -0
package/src/posture/attack-playbooks.js +305 -0
package/src/posture/auditor-agent.js +115 -0
package/src/posture/auth-posture-import.js +135 -0
package/src/posture/baseline-compare.js +114 -0
package/src/posture/blast-radius.js +836 -0
package/src/posture/bounty-prediction.js +141 -0
package/src/posture/business-logic.js +239 -0
package/src/posture/calibration-drift.js +93 -0
package/src/posture/calibration-seed.json +27 -0
package/src/posture/calibration.js +204 -0
package/src/posture/clustering.js +75 -0
package/src/posture/concurrency-checker.js +265 -0
package/src/posture/confidence.js +65 -0
package/src/posture/container-runtime.js +149 -0
package/src/posture/counterfactual.js +109 -0
package/src/posture/cross-lang-graphql.js +165 -0
package/src/posture/cross-lang-grpc.js +166 -0
package/src/posture/cross-lang-meta.js +101 -0
package/src/posture/cross-lang-openapi.js +187 -0
package/src/posture/cross-lang-orm.js +153 -0
package/src/posture/cross-lang-queues.js +210 -0
package/src/posture/crown-jewels.js +110 -0
package/src/posture/custom-rules.js +361 -0
package/src/posture/cve-alert-daemon.js +433 -0
package/src/posture/cve-lookup.js +129 -0
package/src/posture/dead-code.js +430 -0
package/src/posture/defender-agent.js +158 -0
package/src/posture/deploy-platform.js +204 -0
package/src/posture/detector-fuzz.js +61 -0
package/src/posture/deterministic.js +99 -0
package/src/posture/drift.js +165 -0
package/src/posture/epss.js +156 -0
package/src/posture/exploitability-probability.js +212 -0
package/src/posture/exploitability.js +121 -0
package/src/posture/feature-flags.js +110 -0
package/src/posture/finding-defaults.js +132 -0
package/src/posture/fix-history.js +411 -0
package/src/posture/fix-plan.js +121 -0
package/src/posture/fix-verify-loop.js +157 -0
package/src/posture/fix-verify.js +130 -0
package/src/posture/flow-narration.js +105 -0
package/src/posture/grader-calibration.js +156 -0
package/src/posture/harness-discovery.js +113 -0
package/src/posture/holdout-eval.js +144 -0
package/src/posture/iac-reachability.js +163 -0
package/src/posture/iam-policy.js +128 -0
package/src/posture/integrity.js +97 -0
package/src/posture/learning.js +166 -0
package/src/posture/license-policy.js +109 -0
package/src/posture/llm-redteam-prompts.js +418 -0
package/src/posture/llm-redteam.js +303 -0
package/src/posture/material-change.js +163 -0
package/src/posture/mitigation-composite.js +55 -0
package/src/posture/mttr.js +91 -0
package/src/posture/network-policy-import.js +126 -0
package/src/posture/path-predicates.js +99 -0
package/src/posture/persona-prioritization.js +153 -0
package/src/posture/poc-cwe-map.js +51 -0
package/src/posture/poc-generator.js +500 -0
package/src/posture/policy-gate.js +174 -0
package/src/posture/pre-incident-archaeology.js +110 -0
package/src/posture/profile.js +93 -0
package/src/posture/reachability-filter.js +42 -0
package/src/posture/regression-test-gen.js +200 -0
package/src/posture/reverse-blast-radius.js +110 -0
package/src/posture/router.js +109 -0
package/src/posture/rule-overrides.js +198 -0
package/src/posture/rule-pack-signing.js +209 -0
package/src/posture/rule-packs.js +143 -0
package/src/posture/rule-synthesis.js +108 -0
package/src/posture/ruleset-version.js +71 -0
package/src/posture/sbom.js +129 -0
package/src/posture/schema-aware-bridge.js +207 -0
package/src/posture/security-trend.js +87 -0
package/src/posture/semantic-clone.js +114 -0
package/src/posture/specification-mining.js +170 -0
package/src/posture/stable-id.js +75 -0
package/src/posture/stack-playbook.js +229 -0
package/src/posture/streak.js +249 -0
package/src/posture/suppressions.js +135 -0
package/src/posture/telemetry-ingest.js +112 -0
package/src/posture/threat-model.js +145 -0
package/src/posture/three-agent-pipeline.js +74 -0
package/src/posture/triage.js +146 -0
package/src/posture/trust-boundary-diagram.js +115 -0
package/src/posture/type-narrowing.js +129 -0
package/src/posture/validator-metrics.js +179 -0
package/src/posture/verifier-ephemeral.js +118 -0
package/src/posture/verifier-target.js +147 -0
package/src/posture/verifier.js +257 -0
package/src/posture/version.js +75 -0
package/src/posture/waf-ingest.js +200 -0
package/src/posture/why-fired.js +141 -0
package/src/pr-comment.js +172 -0
package/src/pr-delta.js +198 -0
package/src/report/.agentic-security/findings.json +79 -0
package/src/report/.agentic-security/last-scan.json +79 -0
package/src/report/.agentic-security/last-scan.json.sig +1 -0
package/src/report/.agentic-security/scan-history.json +332 -0
package/src/report/.agentic-security/streak.json +23 -0
package/src/report/index.js +1136 -0
package/src/report/mascot.js +42 -0
package/src/runScan.js +141 -0
package/src/sast/.agentic-security/findings.json +5051 -0
package/src/sast/.agentic-security/last-scan.json +5051 -0
package/src/sast/.agentic-security/last-scan.json.sig +1 -0
package/src/sast/.agentic-security/scan-history.json +788 -0
package/src/sast/.agentic-security/streak.json +23 -0
package/src/sast/CLAUDE.md +39 -0
package/src/sast/_comment-strip.js +46 -0
package/src/sast/agent-tool-escalation.js +131 -0
package/src/sast/auth-provider.js +171 -0
package/src/sast/authz.js +236 -0
package/src/sast/bench-shape/.agentic-security/findings.json +28 -0
package/src/sast/bench-shape/.agentic-security/last-scan.json +28 -0
package/src/sast/bench-shape/.agentic-security/scan-history.json +24 -0
package/src/sast/bench-shape/.agentic-security/streak.json +22 -0
package/src/sast/bench-shape/index.js +62 -0
package/src/sast/claude-hook-injection.js +199 -0
package/src/sast/claude-md-prompt-injection.js +170 -0
package/src/sast/claude-settings.js +165 -0
package/src/sast/client-side.js +149 -0
package/src/sast/cpp-bench-extras.js +122 -0
package/src/sast/cpp-dataflow.js +430 -0
package/src/sast/cpp.js +248 -0
package/src/sast/csharp.js +152 -0
package/src/sast/csrf.js +82 -0
package/src/sast/dart-flutter.js +173 -0
package/src/sast/db-rls.js +147 -0
package/src/sast/db-taint.js +215 -0
package/src/sast/defi-deep.js +242 -0
package/src/sast/deserialization-gadgets.js +113 -0
package/src/sast/django-hardening.js +230 -0
package/src/sast/env-hygiene.js +125 -0
package/src/sast/fastapi-hardening.js +145 -0
package/src/sast/go-extended.js +84 -0
package/src/sast/host-header.js +106 -0
package/src/sast/index.js +17 -0
package/src/sast/java-ast-folding.js +561 -0
package/src/sast/java-bench-extras.js +708 -0
package/src/sast/java-collection-passthrough.js +178 -0
package/src/sast/java-constant-fold.js +244 -0
package/src/sast/java-deserialization.js +125 -0
package/src/sast/jndi.js +104 -0
package/src/sast/juliet-shape.js +324 -0
package/src/sast/jwt-exp.js +104 -0
package/src/sast/kotlin.js +82 -0
package/src/sast/laravel-hardening.js +198 -0
package/src/sast/ldap-injection.js +100 -0
package/src/sast/llm-owasp.js +465 -0
package/src/sast/llm-stored-prompt.js +103 -0
package/src/sast/llm-trading-agent.js +161 -0
package/src/sast/llm.js +308 -0
package/src/sast/logic.js +140 -0
package/src/sast/mass-assignment.js +101 -0
package/src/sast/mcp-audit.js +242 -0
package/src/sast/mobile-manifest.js +195 -0
package/src/sast/model-load.js +164 -0
package/src/sast/mutation-xss.js +87 -0
package/src/sast/nosql-injection.js +82 -0
package/src/sast/open-redirect.js +119 -0
package/src/sast/php.js +91 -0
package/src/sast/pipeline.js +122 -0
package/src/sast/primary-cwe-java.js +155 -0
package/src/sast/prompt-firewall.js +151 -0
package/src/sast/prompt-template.js +157 -0
package/src/sast/prototype-pollution.js +112 -0
package/src/sast/python-sinks.js +195 -0
package/src/sast/quarkus-hardening.js +102 -0
package/src/sast/rag-poisoning.js +118 -0
package/src/sast/rate-limit.js +128 -0
package/src/sast/response-splitting.js +138 -0
package/src/sast/ruby.js +108 -0
package/src/sast/rust.js +105 -0
package/src/sast/solidity.js +167 -0
package/src/sast/springboot-hardening.js +186 -0
package/src/sast/ssrf-cloud-metadata.js +80 -0
package/src/sast/ssti.js +116 -0
package/src/sast/swift.js +162 -0
package/src/sast/toctou.js +95 -0
package/src/sast/webhook.js +101 -0
package/src/sast/xpath-injection.js +51 -0
package/src/sast/xxe.js +140 -0
package/src/sast/zip-slip.js +200 -0
package/src/sca/base-images.json +45 -0
package/src/sca/container.js +107 -0
package/src/sca/dep-confusion.js +134 -0
package/src/sca/index.js +6 -0
package/src/sca/popular-packages.json +41 -0
package/src/sca/sarif-ingest.js +187 -0
package/src/sca/vuln-function-hints.json +89 -0
package/src/secrets/index.js +4 -0

package/src/posture/calibration.js ADDED Viewed

@@ -0,0 +1,204 @@
+// Brier-calibrated confidence (P1.3 / FR-UX-1, FR-UX-2).
+//
+// Today's `confidence` field is an ordinal score: combinations of severity,
+// parser type, route-rooting, and a few heuristic adjustments. It correlates
+// with true-positive rate but isn't calibrated — a "0.8" today doesn't mean
+// "80% likely TP," it means "above-the-fold finding."
+//
+// This module turns the ordinal score into a calibrated probability via a
+// per-family bucket map of historical TP rates from `validator-metrics.json`.
+// It also computes:
+//
+//   - 95% Wilson-score confidence intervals (small-sample-safe; never reports
+//     a CI of [0.95, 1.00] from a single observation).
+//   - The running Brier score on the labeled history, so the operator can
+//     see how well the calibration tracks reality.
+//
+// HONESTY: when a family has fewer than `MIN_SAMPLES_FOR_CALIBRATION` labels
+// (default 30), we refuse to ship a calibrated number and instead emit
+// `null` with a reason. Pillar-6 of the parent PRD calls this out: "When the
+// verifier cannot rule a finding in or out, surface 'cannot verify' rather
+// than pick a confidence number out of a hat."
+//
+// Seed corpus: the v1 calibration table is seeded from per-family TP/FP
+// counts collected by the bench-realworld runner against OWASP Benchmark
+// v1.2 and the curated Juliet subsets. Customers' own `validator-metrics.json`
+// extends and overrides per-family.
+import * as fs from 'node:fs';
+import * as path from 'node:path';
+const MIN_SAMPLES_FOR_CALIBRATION = 30;
+// ─── Wilson-score interval ──────────────────────────────────────────────────
+//
+// Returns [lower, upper] for proportion p with n observations at 95% conf.
+// Source: https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval
+const Z_95 = 1.959963984540054;
+export function wilsonInterval(tp, n) {
+  if (n <= 0) return [0, 1];
+  const p = tp / n;
+  const z = Z_95;
+  const denom = 1 + (z * z) / n;
+  const centre = p + (z * z) / (2 * n);
+  const margin = z * Math.sqrt((p * (1 - p)) / n + (z * z) / (4 * n * n));
+  const lower = Math.max(0, (centre - margin) / denom);
+  const upper = Math.min(1, (centre + margin) / denom);
+  return [lower, upper];
+}
+// ─── Brier score ─────────────────────────────────────────────────────────────
+//
+// brier = mean( (prediction - actual)^2 )
+// 0 = perfect; 0.25 = worse than coin flip; 1 = always wrong.
+// PRD success criterion: ≤ 0.10.
+export function brierScore(samples) {
+  if (!Array.isArray(samples) || samples.length === 0) return null;
+  let sum = 0, n = 0;
+  for (const s of samples) {
+    if (!s || typeof s.prediction !== 'number' || typeof s.actual !== 'number') continue;
+    const p = Math.max(0, Math.min(1, s.prediction));
+    const a = Math.max(0, Math.min(1, s.actual));
+    sum += (p - a) * (p - a);
+    n++;
+  }
+  return n > 0 ? sum / n : null;
+}
+// ─── Per-family calibration table ────────────────────────────────────────────
+//
+// Map<family, { tp, fp, n, calibrated, ci95 }>
+//   tp           — labeled true positives in this family
+//   fp           — labeled false positives in this family
+//   n            — tp + fp
+//   calibrated   — tp / n  (only set when n >= MIN_SAMPLES_FOR_CALIBRATION)
+//   ci95         — [lower, upper]
+export function buildCalibrationTable(history) {
+  if (!history || typeof history !== 'object') return {};
+  const out = {};
+  const families = history.families || history.perFamily || {};
+  for (const [fam, raw] of Object.entries(families)) {
+    if (!raw || typeof raw !== 'object') continue;
+    const tp = Number(raw.tp) || 0;
+    const fp = Number(raw.fp) || 0;
+    const n = tp + fp;
+    if (n === 0) continue;
+    const calibrated = n >= MIN_SAMPLES_FOR_CALIBRATION ? tp / n : null;
+    const ci95 = wilsonInterval(tp, n);
+    out[fam] = { tp, fp, n, calibrated, ci95 };
+  }
+  return out;
+}
+function _readJsonMaybe(fp) {
+  try { return JSON.parse(fs.readFileSync(fp, 'utf8')); } catch { return null; }
+}
+// Load history from .agentic-security/validator-metrics.json + the bundled
+// seed file. The bundled seed ships with this release; the customer file
+// overrides per-family when N is higher there.
+export function loadCalibrationHistory(scanRoot) {
+  const customer = _readJsonMaybe(path.join(scanRoot || process.cwd(), '.agentic-security', 'validator-metrics.json')) || {};
+  const seedPath = new URL('./calibration-seed.json', import.meta.url);
+  let seed = null;
+  try { seed = JSON.parse(fs.readFileSync(seedPath, 'utf8')); } catch { seed = null; }
+  // Merge: customer takes precedence when its sample count is higher.
+  const families = {};
+  const merge = (src) => {
+    const fams = src?.families || src?.perFamily || {};
+    for (const [k, v] of Object.entries(fams)) {
+      if (!v || typeof v !== 'object') continue;
+      const tp = Number(v.tp) || 0, fp = Number(v.fp) || 0;
+      const n = tp + fp;
+      const cur = families[k];
+      if (!cur || n > cur.tp + cur.fp) families[k] = { tp, fp };
+    }
+  };
+  if (seed) merge(seed);
+  if (customer) merge(customer);
+  return { families };
+}
+// ─── Annotation ──────────────────────────────────────────────────────────────
+//
+// For each finding, set:
+//   f.calibrated_confidence       — number in [0,1] or null
+//   f.calibrated_confidence_ci    — [lower, upper] or null
+//   f.calibrated_n                — sample size used
+//   f.calibration_reason          — when null, why ("insufficient-samples" | "no-family")
+export function annotateCalibratedConfidence(findings, opts = {}) {
+  if (!Array.isArray(findings)) return;
+  const table = opts.table || buildCalibrationTable(opts.history || loadCalibrationHistory(opts.scanRoot));
+  for (const f of findings) {
+    if (!f || typeof f !== 'object') continue;
+    const fam = f.family || null;
+    if (!fam) {
+      f.calibrated_confidence = null;
+      f.calibrated_confidence_ci = null;
+      f.calibrated_n = 0;
+      f.calibration_reason = 'no-family';
+      continue;
+    }
+    const row = table[fam];
+    if (!row || typeof row.calibrated !== 'number') {
+      f.calibrated_confidence = null;
+      f.calibrated_confidence_ci = null;
+      f.calibrated_n = row ? row.n : 0;
+      f.calibration_reason = row ? 'insufficient-samples' : 'no-history';
+      continue;
+    }
+    f.calibrated_confidence = round3(row.calibrated);
+    f.calibrated_confidence_ci = [round3(row.ci95[0]), round3(row.ci95[1])];
+    f.calibrated_n = row.n;
+    f.calibration_reason = null;
+  }
+}
+function round3(x) { return Math.round(x * 1000) / 1000; }
+// ─── Brier on held-out labels ───────────────────────────────────────────────
+//
+// Premortem #9: the previous `computeBrierFromHistory` was tautological — it
+// fed (prediction = row.calibrated, actual = row.calibrated) into brierScore
+// and always returned 0. That number is unsafe to surface anywhere because
+// readers will interpret it as "calibration is perfect," when it actually
+// measures nothing.
+//
+// The honest computation needs *held-out labels* — verdicts the engine did
+// NOT use to fit the calibration table. We accept those as an array of
+// { family, predicted, actual } where `predicted` is the calibrated rate
+// the model gave (e.g. f.calibrated_confidence) and `actual ∈ {0,1}` is
+// the human/system-confirmed truth.
+//
+// Callers must supply held-out data. There is no fallback that produces
+// a number from the seed corpus alone — that path is what produced the
+// tautology, and it should fail loudly instead.
+export function computeBrierOnHeldOut(samples) {
+  if (!Array.isArray(samples) || samples.length === 0) {
+    return { brier: null, reason: 'no-held-out-samples' };
+  }
+  const cleaned = [];
+  for (const s of samples) {
+    if (!s || typeof s !== 'object') continue;
+    const p = typeof s.predicted === 'number' ? Math.max(0, Math.min(1, s.predicted)) : null;
+    const a = typeof s.actual === 'number'
+      ? (s.actual >= 0.5 ? 1 : 0)
+      : (s.actual === true ? 1 : s.actual === false ? 0 : null);
+    if (p === null || a === null) continue;
+    cleaned.push({ prediction: p, actual: a });
+  }
+  if (!cleaned.length) return { brier: null, reason: 'no-valid-samples' };
+  const brier = brierScore(cleaned);
+  return { brier, n: cleaned.length };
+}
+// Removed: computeBrierFromHistory. Anyone relying on it for a dashboard
+// number should switch to computeBrierOnHeldOut(samples) with real labels.
+// For tests / introspection.
+export const _internals = { MIN_SAMPLES_FOR_CALIBRATION, Z_95, round3 };

package/src/posture/clustering.js ADDED Viewed

@@ -0,0 +1,75 @@
+// Root-cause clustering (FR-PREC-6).
+//
+// When a single missing sanitizer produces N flow paths converging on the
+// same sink expression, the engine emits N findings. This module collapses
+// them into one finding with `clusterSize` = N and `exampleFlows` containing
+// up to 5 representative paths. Downstream `/fix` operates on the cluster,
+// not on each leaf — one patch fixes all flows.
+//
+// Distinct from the existing `dedupeFindingsWithEvidence`, which clusters by
+// (file, sink-line, family). Root-cause clustering goes further: it clusters
+// across files when the sink shape and rule are identical, surfacing the
+// "one bug, N expressions of it" view.
+function sinkKey(f) {
+  // Cluster by the detector that fired + rule + file + a normalized form of
+  // the sink expression. We INTENTIONALLY do not cluster across files — the
+  // existing fix-bundling pipeline does that (one buggy helper called from N
+  // routes → one bundle of N findings). Clustering here is for the narrower
+  // case where a single sink has multiple flows feeding it within the same
+  // file.
+  //
+  // The parser tag MUST be in the key (bench-regression May 2026). Without it,
+  // two distinct detectors that happen to share a CWE and target the same
+  // sink line (e.g. structural Open Redirect + host-header redirect detector
+  // both flag CWE-601 on the same res.redirect line) collapse into one,
+  // hiding real findings. Each parser asks its own question; their findings
+  // are not redundant flows of the same vuln.
+  const parser = f.parser || '';
+  const rule = f.cwe || f.family || (f.vuln || '').slice(0, 40);
+  const file = f.file || f.sink?.file || '';
+  const sinkExpr = (f.sink?.label || f.sink?.snippet || f.snippet || '')
+    .replace(/['"`][^'"`]*['"`]/g, '_S_')
+    .replace(/\s+/g, ' ')
+    .trim()
+    .slice(0, 120);
+  // Empty sinkExpr keys cluster too eagerly (rate-limit findings that don't
+  // carry a snippet all land in the same bucket). Skip clustering entirely
+  // when we have no sink expression to compare.
+  if (!sinkExpr) return null;
+  return `${parser}::${rule}::${file}::${sinkExpr}`;
+}
+export function clusterByRootCause(findings) {
+  if (!Array.isArray(findings) || findings.length === 0) return findings;
+  const buckets = new Map();
+  for (const f of findings) {
+    if (!f || typeof f !== 'object') continue;
+    const k = sinkKey(f);
+    if (!k) continue;
+    if (!buckets.has(k)) buckets.set(k, []);
+    buckets.get(k).push(f);
+  }
+  const drop = new Set();
+  for (const [, group] of buckets) {
+    if (group.length < 2) continue;
+    // Sort by severity (highest first), then by triageScore — keep the strongest.
+    const SEV = { critical: 0, high: 1, medium: 2, low: 3, info: 4 };
+    group.sort((a, b) =>
+      (SEV[a.severity] ?? 9) - (SEV[b.severity] ?? 9) ||
+      (b.triageScore || 0) - (a.triageScore || 0)
+    );
+    const keeper = group[0];
+    keeper.clusterSize = group.length;
+    keeper.exampleFlows = group.slice(1, 6).map(f => ({
+      file: f.file || f.sink?.file,
+      line: f.line || f.sink?.line,
+      source: f.source ? { file: f.source.file, line: f.source.line, label: f.source.label } : null,
+      sink: f.sink ? { file: f.sink.file, line: f.sink.line, label: f.sink.label } : null,
+      snippet: f.snippet,
+    }));
+    for (let i = 1; i < group.length; i++) drop.add(group[i]);
+  }
+  if (!drop.size) return findings;
+  return findings.filter(f => !drop.has(f));
+}

package/src/posture/concurrency-checker.js ADDED Viewed

@@ -0,0 +1,265 @@
+// FR-SEM-9 — Bounded concurrency heuristic checker.
+//
+// A real model checker is research; this module ships the high-leverage
+// subset: regex-level detectors for the four most common concurrency bugs
+// commercial SAST misses.
+//
+//   1. Missed-unlock — `mutex.Lock()` / `lock.acquire()` / `synchronized` /
+//      `pthread_mutex_lock` without a matching unlock on every exit path
+//      within the same function body (early returns, exceptions).
+//   2. Data race — a shared variable (file-scope or struct field) written
+//      from a goroutine / async task / Worker without protection from a
+//      detected mutex/Lock/atomic primitive.
+//   3. Deadlock cycle — two functions where one acquires A then B and
+//      another acquires B then A. Bounded to ≤ 50 functions per scan.
+//   4. Fire-and-forget — async function that mutates shared state called
+//      without `await` (Node), `wait()` (Python), `.get()` (futures).
+//
+// Languages: Go, Java, JS/TS, Python. Each pattern is conservative — we
+// only emit when the surface evidence is unambiguous. Severity is medium
+// by default; family `concurrency-bug`.
+const PATTERNS = {
+  go: {
+    lockAcquire: /\b(\w+)\.Lock\(\)/g,
+    lockRelease: /\b(\w+)\.Unlock\(\)/g,
+    asyncStart: /\bgo\s+\w/g,
+    syncOnce: /\bsync\.Once\b/g,
+    shared: /^var\s+(\w+)\s+\w/gm,
+  },
+  java: {
+    lockAcquire: /\b(\w+)\.lock\(\)/g,
+    lockRelease: /\b(\w+)\.unlock\(\)/g,
+    synchronized: /\bsynchronized\s*\(/g,
+    asyncStart: /\bnew\s+Thread\(|\.start\(\)|@Async\b|CompletableFuture\.runAsync/g,
+  },
+  js: {
+    asyncFn: /\basync\s+function\s+\w+|async\s+\(/g,
+    asyncCallNoAwait: /^(?!.*\bawait\b).*\b\w+\s*\([^)]*\)\.then\(/gm,
+    workerPost: /\bworker\.postMessage\b/g,
+    sharedAt: /^(?:const|let|var)\s+(\w+)\s*=/gm,
+  },
+  py: {
+    lockAcquire: /\b(\w+)\.acquire\(\)/g,
+    lockRelease: /\b(\w+)\.release\(\)/g,
+    asyncDef: /\basync\s+def\s+\w+/g,
+    asyncCallNoAwait: /^(?!.*\bawait\b).*\basyncio\.create_task\(/gm,
+  },
+};
+function inferLang(fp) {
+  if (/\.go$/i.test(fp)) return 'go';
+  if (/\.(java|kt)$/i.test(fp)) return 'java';
+  if (/\.(js|jsx|ts|tsx|mjs|cjs)$/i.test(fp)) return 'js';
+  if (/\.py$/i.test(fp)) return 'py';
+  return null;
+}
+// Walk a function body and collect lock acquire/release pairs.
+// Naive: assume single-block functions. Good enough for the common case.
+function extractFunctions(text, lang) {
+  const out = [];
+  let m;
+  if (lang === 'go') {
+    const re = /func(?:\s+\(\w+\s+\*?\w+\))?\s+(\w+)\s*\([^)]*\)[^{]*\{/g;
+    while ((m = re.exec(text))) {
+      const body = grabBody(text, m.index + m[0].length - 1);
+      if (body) out.push({ name: m[1], body, startLine: text.slice(0, m.index).split('\n').length });
+    }
+  } else if (lang === 'java') {
+    const re = /(?:public|private|protected|static|final|synchronized)\s+[\w<>,\s\[\]]+\s+(\w+)\s*\([^)]*\)\s*(?:throws\s+[\w,\s]+)?\s*\{/g;
+    while ((m = re.exec(text))) {
+      const body = grabBody(text, m.index + m[0].length - 1);
+      if (body) out.push({ name: m[1], body, startLine: text.slice(0, m.index).split('\n').length });
+    }
+  } else if (lang === 'js') {
+    const re = /(?:function\s+(\w+)\s*\(|(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s*)?\([^)]*\)\s*=>)/g;
+    while ((m = re.exec(text))) {
+      const braceIdx = text.indexOf('{', m.index);
+      if (braceIdx < 0) continue;
+      const body = grabBody(text, braceIdx);
+      if (body) out.push({ name: m[1] || m[2], body, startLine: text.slice(0, m.index).split('\n').length });
+    }
+  } else if (lang === 'py') {
+    const lines = text.split('\n');
+    for (let i = 0; i < lines.length; i++) {
+      const m2 = /^def\s+(\w+)\s*\(|^async\s+def\s+(\w+)\s*\(/.exec(lines[i]);
+      if (!m2) continue;
+      const name = m2[1] || m2[2];
+      const body = lines.slice(i, Math.min(i + 80, lines.length)).join('\n');
+      out.push({ name, body, startLine: i + 1 });
+    }
+  }
+  return out;
+}
+function grabBody(text, openBraceIdx) {
+  if (text[openBraceIdx] !== '{') return null;
+  let depth = 0;
+  for (let i = openBraceIdx; i < Math.min(openBraceIdx + 8000, text.length); i++) {
+    if (text[i] === '{') depth++;
+    else if (text[i] === '}') {
+      depth--;
+      if (depth === 0) return text.slice(openBraceIdx, i + 1);
+    }
+  }
+  return null;
+}
+function findMissedUnlocks(fn, lang) {
+  const out = [];
+  const p = PATTERNS[lang];
+  if (!p || !p.lockAcquire || !p.lockRelease) return out;
+  const acquires = [...fn.body.matchAll(p.lockAcquire)];
+  const releases = [...fn.body.matchAll(p.lockRelease)];
+  if (!acquires.length) return out;
+  // For each acquired lock name, check at least one release in the body.
+  const acquiredNames = new Set(acquires.map(m => m[1]));
+  const releasedNames = new Set(releases.map(m => m[1]));
+  for (const name of acquiredNames) {
+    if (!releasedNames.has(name)) {
+      out.push({
+        kind: 'missed-unlock',
+        lock: name,
+        functionName: fn.name,
+        startLine: fn.startLine,
+      });
+    } else {
+      // Lock+unlock both present, but check that the function has a defer/
+      // try-finally guarantee. Go: `defer`; Java/Py: try/finally; otherwise
+      // early-return-before-unlock is a risk.
+      const guarded =
+        (lang === 'go' && /defer\s+\w+\.Unlock\(\)/.test(fn.body)) ||
+        (lang === 'java' && /try\s*\{[\s\S]*finally\s*\{[\s\S]*\.unlock\(\)/m.test(fn.body)) ||
+        (lang === 'py' && (/with\s+\w+:/.test(fn.body) || /try\s*:[\s\S]*finally\s*:[\s\S]*\.release\(\)/m.test(fn.body)));
+      if (!guarded && /\breturn\b/.test(fn.body)) {
+        out.push({
+          kind: 'unguarded-lock',
+          lock: name,
+          functionName: fn.name,
+          startLine: fn.startLine,
+          remediation: lang === 'go' ? 'use `defer mu.Unlock()`' :
+                       lang === 'java' ? 'wrap in try/finally with unlock in finally' :
+                       lang === 'py' ? 'use `with lock:` context manager' :
+                       'guarantee release on every exit path',
+        });
+      }
+    }
+  }
+  return out;
+}
+function findFireAndForget(fn, lang) {
+  const out = [];
+  const p = PATTERNS[lang];
+  if (!p || !p.asyncCallNoAwait) return out;
+  let m;
+  p.asyncCallNoAwait.lastIndex = 0;
+  while ((m = p.asyncCallNoAwait.exec(fn.body))) {
+    if (/\bvoid\s/.test(m[0])) continue;        // explicit void = intentional
+    out.push({
+      kind: 'fire-and-forget',
+      functionName: fn.name,
+      startLine: fn.startLine,
+      snippet: m[0].slice(0, 80),
+    });
+  }
+  return out;
+}
+function findDeadlockCycles(fns) {
+  // Build a graph: function → list of lock pairs acquired in order.
+  // Cycle if A acquires (x,y) and B acquires (y,x) somewhere — even when
+  // distinct calls are interleaved at runtime.
+  const lockOrders = [];
+  for (const fn of fns) {
+    const acquires = [...fn.body.matchAll(/\b(\w+)\.(?:Lock|lock|acquire)\(\)/g)].map(m => m[1]);
+    const distinct = [...new Set(acquires)];
+    if (distinct.length >= 2) {
+      lockOrders.push({ fn: fn.name, startLine: fn.startLine, pairs: pairsOf(distinct) });
+    }
+    if (lockOrders.length > 50) break;          // bounded
+  }
+  const out = [];
+  for (let i = 0; i < lockOrders.length; i++) {
+    for (let j = i + 1; j < lockOrders.length; j++) {
+      const a = lockOrders[i], b = lockOrders[j];
+      for (const [x, y] of a.pairs) {
+        for (const [bx, by] of b.pairs) {
+          if (x === by && y === bx) {
+            out.push({
+              kind: 'deadlock-cycle',
+              functionA: a.fn,
+              functionB: b.fn,
+              order: `${a.fn} locks (${x}, ${y}); ${b.fn} locks (${bx}, ${by})`,
+              startLineA: a.startLine,
+              startLineB: b.startLine,
+            });
+          }
+        }
+      }
+    }
+  }
+  return out;
+}
+function pairsOf(arr) {
+  const p = [];
+  for (let i = 0; i < arr.length; i++)
+    for (let j = i + 1; j < arr.length; j++) p.push([arr[i], arr[j]]);
+  return p;
+}
+export function scanConcurrency(fileContents) {
+  const findings = [];
+  if (!fileContents) return findings;
+  for (const [fp, text] of Object.entries(fileContents)) {
+    const lang = inferLang(fp);
+    if (!lang || !text) continue;
+    const fns = extractFunctions(text, lang);
+    if (!fns.length) continue;
+    for (const fn of fns) {
+      for (const bug of findMissedUnlocks(fn, lang)) {
+        findings.push({
+          id: `concurrency:${bug.kind}:${fp}:${bug.startLine}:${bug.lock}`,
+          file: fp,
+          line: bug.startLine,
+          vuln: bug.kind === 'missed-unlock'
+            ? `Concurrency: ${fn.name}() acquires ${bug.lock} but no matching unlock`
+            : `Concurrency: ${fn.name}() can return without releasing ${bug.lock}`,
+          severity: 'medium',
+          family: 'concurrency-bug',
+          confidence: 0.5,
+          remediation: bug.remediation || 'Release the lock on every exit path (defer / try-finally / context manager).',
+        });
+      }
+      for (const bug of findFireAndForget(fn, lang)) {
+        findings.push({
+          id: `concurrency:fire-forget:${fp}:${bug.startLine}`,
+          file: fp,
+          line: bug.startLine,
+          vuln: `Concurrency: fire-and-forget async call in ${fn.name}() — result not awaited`,
+          severity: 'low',
+          family: 'concurrency-bug',
+          confidence: 0.4,
+          remediation: 'Await the promise / call .get() on the future / use asyncio.gather.',
+        });
+      }
+    }
+    for (const bug of findDeadlockCycles(fns)) {
+      findings.push({
+        id: `concurrency:deadlock:${fp}:${bug.startLineA}-${bug.startLineB}`,
+        file: fp,
+        line: bug.startLineA,
+        vuln: `Concurrency: potential deadlock — ${bug.order}`,
+        severity: 'high',
+        family: 'concurrency-bug',
+        confidence: 0.4,
+        remediation: 'Acquire locks in a consistent global order across all call sites.',
+      });
+    }
+  }
+  return findings;
+}

package/src/posture/confidence.js ADDED Viewed

@@ -0,0 +1,65 @@
+// Calibrated confidence score (0.0–1.0) per finding.
+//
+// Layered on top of the existing triage score, evidence count, parser type,
+// and sanitizer signals. Maps the engine's various internal trust signals
+// into a single normalized field that downstream consumers (Claude Code UX,
+// SARIF emit, validator pipelines) can rely on.
+//
+// Output:
+//   f.confidence ∈ [0,1]   — combined confidence the finding is real
+//   f.confidenceTier       — 'high' | 'medium' | 'low' | 'very-low'
+//
+// Existing fields preserved (triageScore/triageLabel are unchanged).
+const PARSER_PRIOR = {
+  AST: 0.10,        // AST detectors are precise
+  CHAIN: 0.12,      // attack chains are confirmed by multiple findings
+  CONFIRMED: 0.20,  // explicitly confirmed by cross-file taint
+  VALIDATOR: 0.25,  // LLM validator accepted
+};
+const SEVERITY_PRIOR = {
+  critical: 0.85,
+  high: 0.75,
+  medium: 0.55,
+  low: 0.35,
+  info: 0.20,
+};
+export function annotateConfidence(findings) {
+  if (!Array.isArray(findings)) return;
+  for (const f of findings) {
+    if (!f || typeof f !== 'object') continue;
+    // If the finding already shipped with a hand-tuned confidence (e.g. jwt-exp
+    // emits 0.85/0.95), keep that but still normalize the tier label.
+    let conf = typeof f.confidence === 'number' ? f.confidence : null;
+    if (conf == null) {
+      conf = SEVERITY_PRIOR[f.severity] ?? 0.40;
+      // Triage score in [0,100] is the strongest signal we have today; weight it.
+      if (typeof f.triageScore === 'number') {
+        conf = 0.5 * conf + 0.5 * (f.triageScore / 100);
+      }
+      const parserBoost = PARSER_PRIOR[f.parser] || 0;
+      conf = Math.min(1, conf + parserBoost);
+      if (f.evidence && f.evidence.length > 1) conf = Math.min(1, conf + 0.05 * (f.evidence.length - 1));
+      if (f.sanitizerMismatch) conf = Math.min(1, conf + 0.05);
+      if (f.isSanitized) conf *= 0.10;
+      if (f.routeRooted) conf = Math.min(1, conf + 0.05);
+      if (f.guards && f.guards.length) conf *= 0.80;
+      if (f.reachable === false) conf *= 0.55;
+      if (f.unvalidated) conf *= 0.85;   // LLM validator unavailable
+      if (f.llmOnly) conf *= 0.70;       // LLM-only finding, no Layer-2 path
+    }
+    conf = Math.max(0, Math.min(1, conf));
+    f.confidence = Math.round(conf * 1000) / 1000;
+    // Premortem 3R-15: derive tier from the 2-decimal display value so a
+    // finding reported as "0.75" never lands in two tiers depending on the
+    // viewer's rounding. Add a +0.005 epsilon to anchor cutoffs to the
+    // displayed rounded value (3-decimal raw 0.745 → 2-decimal 0.75 → high).
+    const display = Math.round(f.confidence * 100) / 100;
+    if (display >= 0.75) f.confidenceTier = 'high';
+    else if (display >= 0.50) f.confidenceTier = 'medium';
+    else if (display >= 0.25) f.confidenceTier = 'low';
+    else f.confidenceTier = 'very-low';
+  }
+}