@clear-capabilities/agentic-security-scanner 0.74.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (331) hide show
  1. package/CHANGELOG.md +1580 -0
  2. package/bin/.agentic-security/findings.json +1577 -0
  3. package/bin/.agentic-security/last-scan.json +1577 -0
  4. package/bin/.agentic-security/last-scan.json.sig +1 -0
  5. package/bin/.agentic-security/scan-history.json +465 -0
  6. package/bin/.agentic-security/streak.json +25 -0
  7. package/bin/agentic-security-audit.js +198 -0
  8. package/bin/agentic-security-consistency.js +80 -0
  9. package/bin/agentic-security-diff.js +136 -0
  10. package/bin/agentic-security-lsp.js +12 -0
  11. package/bin/agentic-security-mcp.js +40 -0
  12. package/bin/agentic-security-rule.js +153 -0
  13. package/bin/agentic-security.js +1683 -0
  14. package/dist/117.index.js +207 -0
  15. package/dist/178.index.js +250 -0
  16. package/dist/218.index.js +793 -0
  17. package/dist/227.index.js +192 -0
  18. package/dist/301.index.js +167 -0
  19. package/dist/384.index.js +18 -0
  20. package/dist/476.index.js +126 -0
  21. package/dist/513.index.js +373 -0
  22. package/dist/520.index.js +13 -0
  23. package/dist/601.index.js +1038 -0
  24. package/dist/634.index.js +1892 -0
  25. package/dist/637.index.js +216 -0
  26. package/dist/660.index.js +131 -0
  27. package/dist/675.index.js +451 -0
  28. package/dist/826.index.js +188 -0
  29. package/dist/830.index.js +133 -0
  30. package/dist/agentic-security.mjs +272 -0
  31. package/dist/agentic-security.mjs.sha256 +1 -0
  32. package/dist/calibration-seed.json +27 -0
  33. package/package.json +77 -0
  34. package/src/.agentic-security/findings.json +80844 -0
  35. package/src/.agentic-security/last-scan.json +80844 -0
  36. package/src/.agentic-security/last-scan.json.sig +1 -0
  37. package/src/.agentic-security/scan-history.json +8408 -0
  38. package/src/.agentic-security/streak.json +26 -0
  39. package/src/badge.js +188 -0
  40. package/src/compare.js +203 -0
  41. package/src/dataflow/.agentic-security/findings.json +3487 -0
  42. package/src/dataflow/.agentic-security/last-scan.json +3487 -0
  43. package/src/dataflow/.agentic-security/last-scan.json.sig +1 -0
  44. package/src/dataflow/.agentic-security/scan-history.json +735 -0
  45. package/src/dataflow/.agentic-security/streak.json +24 -0
  46. package/src/dataflow/CLAUDE.md +38 -0
  47. package/src/dataflow/access-paths.js +172 -0
  48. package/src/dataflow/async-sequencing.js +177 -0
  49. package/src/dataflow/backward.js +201 -0
  50. package/src/dataflow/catalog-expanded.js +485 -0
  51. package/src/dataflow/catalog.js +659 -0
  52. package/src/dataflow/cross-repo.js +219 -0
  53. package/src/dataflow/engine.js +588 -0
  54. package/src/dataflow/exception-flow.js +116 -0
  55. package/src/dataflow/exploit-prover.js +187 -0
  56. package/src/dataflow/higher-order.js +221 -0
  57. package/src/dataflow/ifds.js +347 -0
  58. package/src/dataflow/implicit-flow.js +129 -0
  59. package/src/dataflow/incremental.js +229 -0
  60. package/src/dataflow/index.js +181 -0
  61. package/src/dataflow/numeric-domain.js +192 -0
  62. package/src/dataflow/path-feasibility.js +114 -0
  63. package/src/dataflow/points-to.js +337 -0
  64. package/src/dataflow/polyglot.js +190 -0
  65. package/src/dataflow/proven-clean.js +159 -0
  66. package/src/dataflow/receiver-context.js +76 -0
  67. package/src/dataflow/sanitizer-proof.js +154 -0
  68. package/src/dataflow/soft-taint.js +140 -0
  69. package/src/dataflow/string-domain.js +234 -0
  70. package/src/dataflow/stub-aware-filter.js +100 -0
  71. package/src/dataflow/summaries.js +132 -0
  72. package/src/dataflow/symbolic-exec.js +238 -0
  73. package/src/dataflow/tabulation.js +135 -0
  74. package/src/engine.js +7763 -0
  75. package/src/history-scan.js +229 -0
  76. package/src/index.js +3 -0
  77. package/src/integrations/.agentic-security/findings.json +1504 -0
  78. package/src/integrations/.agentic-security/last-scan.json +1504 -0
  79. package/src/integrations/.agentic-security/scan-history.json +40 -0
  80. package/src/integrations/.agentic-security/streak.json +21 -0
  81. package/src/integrations/index.js +321 -0
  82. package/src/integrations/tickets.js +200 -0
  83. package/src/ir/.agentic-security/findings.json +3036 -0
  84. package/src/ir/.agentic-security/last-scan.json +3036 -0
  85. package/src/ir/.agentic-security/last-scan.json.sig +1 -0
  86. package/src/ir/.agentic-security/scan-history.json +364 -0
  87. package/src/ir/.agentic-security/streak.json +23 -0
  88. package/src/ir/CLAUDE.md +172 -0
  89. package/src/ir/callgraph.js +73 -0
  90. package/src/ir/class-hierarchy.js +195 -0
  91. package/src/ir/index.js +152 -0
  92. package/src/ir/parser-cs.js +260 -0
  93. package/src/ir/parser-java.js +286 -0
  94. package/src/ir/parser-js.js +413 -0
  95. package/src/ir/parser-kt.js +258 -0
  96. package/src/ir/parser-py-cst.js +136 -0
  97. package/src/ir/parser-py.helper.py +501 -0
  98. package/src/ir/parser-py.js +312 -0
  99. package/src/ir/ssa.js +315 -0
  100. package/src/ir/type-stubs.js +288 -0
  101. package/src/leaderboard.js +152 -0
  102. package/src/llm-validator/.agentic-security/findings.json +1891 -0
  103. package/src/llm-validator/.agentic-security/last-scan.json +1891 -0
  104. package/src/llm-validator/.agentic-security/last-scan.json.sig +1 -0
  105. package/src/llm-validator/.agentic-security/scan-history.json +168 -0
  106. package/src/llm-validator/.agentic-security/streak.json +20 -0
  107. package/src/llm-validator/consistency.js +141 -0
  108. package/src/llm-validator/index.js +437 -0
  109. package/src/lsp/.agentic-security/findings.json +28 -0
  110. package/src/lsp/.agentic-security/last-scan.json +28 -0
  111. package/src/lsp/.agentic-security/scan-history.json +79 -0
  112. package/src/lsp/.agentic-security/streak.json +22 -0
  113. package/src/lsp/server.js +275 -0
  114. package/src/mcp/.agentic-security/findings.json +8358 -0
  115. package/src/mcp/.agentic-security/last-scan.json +8358 -0
  116. package/src/mcp/.agentic-security/last-scan.json.sig +1 -0
  117. package/src/mcp/.agentic-security/scan-history.json +1125 -0
  118. package/src/mcp/.agentic-security/streak.json +22 -0
  119. package/src/mcp/CLAUDE.md +54 -0
  120. package/src/mcp/audit.js +136 -0
  121. package/src/mcp/redact.js +75 -0
  122. package/src/mcp/server.js +158 -0
  123. package/src/mcp/stdio.js +83 -0
  124. package/src/mcp/tools.js +940 -0
  125. package/src/mcp/validate.js +49 -0
  126. package/src/personality.js +164 -0
  127. package/src/poc-video.js +239 -0
  128. package/src/posture/.agentic-security/findings.json +51239 -0
  129. package/src/posture/.agentic-security/last-scan.json +51239 -0
  130. package/src/posture/.agentic-security/last-scan.json.sig +1 -0
  131. package/src/posture/.agentic-security/scan-history.json +5557 -0
  132. package/src/posture/.agentic-security/streak.json +24 -0
  133. package/src/posture/CLAUDE.md +42 -0
  134. package/src/posture/adversarial-self-test.js +114 -0
  135. package/src/posture/adversary-agent.js +204 -0
  136. package/src/posture/agents-memory.js +135 -0
  137. package/src/posture/ai-code-fingerprint.js +171 -0
  138. package/src/posture/aibom.js +284 -0
  139. package/src/posture/api-inventory.js +96 -0
  140. package/src/posture/attack-playbooks.js +305 -0
  141. package/src/posture/auditor-agent.js +115 -0
  142. package/src/posture/auth-posture-import.js +135 -0
  143. package/src/posture/baseline-compare.js +114 -0
  144. package/src/posture/blast-radius.js +836 -0
  145. package/src/posture/bounty-prediction.js +141 -0
  146. package/src/posture/business-logic.js +239 -0
  147. package/src/posture/calibration-drift.js +93 -0
  148. package/src/posture/calibration-seed.json +27 -0
  149. package/src/posture/calibration.js +204 -0
  150. package/src/posture/clustering.js +75 -0
  151. package/src/posture/concurrency-checker.js +265 -0
  152. package/src/posture/confidence.js +65 -0
  153. package/src/posture/container-runtime.js +149 -0
  154. package/src/posture/counterfactual.js +109 -0
  155. package/src/posture/cross-lang-graphql.js +165 -0
  156. package/src/posture/cross-lang-grpc.js +166 -0
  157. package/src/posture/cross-lang-meta.js +101 -0
  158. package/src/posture/cross-lang-openapi.js +187 -0
  159. package/src/posture/cross-lang-orm.js +153 -0
  160. package/src/posture/cross-lang-queues.js +210 -0
  161. package/src/posture/crown-jewels.js +110 -0
  162. package/src/posture/custom-rules.js +361 -0
  163. package/src/posture/cve-alert-daemon.js +433 -0
  164. package/src/posture/cve-lookup.js +129 -0
  165. package/src/posture/dead-code.js +430 -0
  166. package/src/posture/defender-agent.js +158 -0
  167. package/src/posture/deploy-platform.js +204 -0
  168. package/src/posture/detector-fuzz.js +61 -0
  169. package/src/posture/deterministic.js +99 -0
  170. package/src/posture/drift.js +165 -0
  171. package/src/posture/epss.js +156 -0
  172. package/src/posture/exploitability-probability.js +212 -0
  173. package/src/posture/exploitability.js +121 -0
  174. package/src/posture/feature-flags.js +110 -0
  175. package/src/posture/finding-defaults.js +132 -0
  176. package/src/posture/fix-history.js +411 -0
  177. package/src/posture/fix-plan.js +121 -0
  178. package/src/posture/fix-verify-loop.js +157 -0
  179. package/src/posture/fix-verify.js +130 -0
  180. package/src/posture/flow-narration.js +105 -0
  181. package/src/posture/grader-calibration.js +156 -0
  182. package/src/posture/harness-discovery.js +113 -0
  183. package/src/posture/holdout-eval.js +144 -0
  184. package/src/posture/iac-reachability.js +163 -0
  185. package/src/posture/iam-policy.js +128 -0
  186. package/src/posture/integrity.js +97 -0
  187. package/src/posture/learning.js +166 -0
  188. package/src/posture/license-policy.js +109 -0
  189. package/src/posture/llm-redteam-prompts.js +418 -0
  190. package/src/posture/llm-redteam.js +303 -0
  191. package/src/posture/material-change.js +163 -0
  192. package/src/posture/mitigation-composite.js +55 -0
  193. package/src/posture/mttr.js +91 -0
  194. package/src/posture/network-policy-import.js +126 -0
  195. package/src/posture/path-predicates.js +99 -0
  196. package/src/posture/persona-prioritization.js +153 -0
  197. package/src/posture/poc-cwe-map.js +51 -0
  198. package/src/posture/poc-generator.js +500 -0
  199. package/src/posture/policy-gate.js +174 -0
  200. package/src/posture/pre-incident-archaeology.js +110 -0
  201. package/src/posture/profile.js +93 -0
  202. package/src/posture/reachability-filter.js +42 -0
  203. package/src/posture/regression-test-gen.js +200 -0
  204. package/src/posture/reverse-blast-radius.js +110 -0
  205. package/src/posture/router.js +109 -0
  206. package/src/posture/rule-overrides.js +198 -0
  207. package/src/posture/rule-pack-signing.js +209 -0
  208. package/src/posture/rule-packs.js +143 -0
  209. package/src/posture/rule-synthesis.js +108 -0
  210. package/src/posture/ruleset-version.js +71 -0
  211. package/src/posture/sbom.js +129 -0
  212. package/src/posture/schema-aware-bridge.js +207 -0
  213. package/src/posture/security-trend.js +87 -0
  214. package/src/posture/semantic-clone.js +114 -0
  215. package/src/posture/specification-mining.js +170 -0
  216. package/src/posture/stable-id.js +75 -0
  217. package/src/posture/stack-playbook.js +229 -0
  218. package/src/posture/streak.js +249 -0
  219. package/src/posture/suppressions.js +135 -0
  220. package/src/posture/telemetry-ingest.js +112 -0
  221. package/src/posture/threat-model.js +145 -0
  222. package/src/posture/three-agent-pipeline.js +74 -0
  223. package/src/posture/triage.js +146 -0
  224. package/src/posture/trust-boundary-diagram.js +115 -0
  225. package/src/posture/type-narrowing.js +129 -0
  226. package/src/posture/validator-metrics.js +179 -0
  227. package/src/posture/verifier-ephemeral.js +118 -0
  228. package/src/posture/verifier-target.js +147 -0
  229. package/src/posture/verifier.js +257 -0
  230. package/src/posture/version.js +75 -0
  231. package/src/posture/waf-ingest.js +200 -0
  232. package/src/posture/why-fired.js +141 -0
  233. package/src/pr-comment.js +172 -0
  234. package/src/pr-delta.js +198 -0
  235. package/src/report/.agentic-security/findings.json +79 -0
  236. package/src/report/.agentic-security/last-scan.json +79 -0
  237. package/src/report/.agentic-security/last-scan.json.sig +1 -0
  238. package/src/report/.agentic-security/scan-history.json +332 -0
  239. package/src/report/.agentic-security/streak.json +23 -0
  240. package/src/report/index.js +1136 -0
  241. package/src/report/mascot.js +42 -0
  242. package/src/runScan.js +141 -0
  243. package/src/sast/.agentic-security/findings.json +5051 -0
  244. package/src/sast/.agentic-security/last-scan.json +5051 -0
  245. package/src/sast/.agentic-security/last-scan.json.sig +1 -0
  246. package/src/sast/.agentic-security/scan-history.json +788 -0
  247. package/src/sast/.agentic-security/streak.json +23 -0
  248. package/src/sast/CLAUDE.md +39 -0
  249. package/src/sast/_comment-strip.js +46 -0
  250. package/src/sast/agent-tool-escalation.js +131 -0
  251. package/src/sast/auth-provider.js +171 -0
  252. package/src/sast/authz.js +236 -0
  253. package/src/sast/bench-shape/.agentic-security/findings.json +28 -0
  254. package/src/sast/bench-shape/.agentic-security/last-scan.json +28 -0
  255. package/src/sast/bench-shape/.agentic-security/scan-history.json +24 -0
  256. package/src/sast/bench-shape/.agentic-security/streak.json +22 -0
  257. package/src/sast/bench-shape/index.js +62 -0
  258. package/src/sast/claude-hook-injection.js +199 -0
  259. package/src/sast/claude-md-prompt-injection.js +170 -0
  260. package/src/sast/claude-settings.js +165 -0
  261. package/src/sast/client-side.js +149 -0
  262. package/src/sast/cpp-bench-extras.js +122 -0
  263. package/src/sast/cpp-dataflow.js +430 -0
  264. package/src/sast/cpp.js +248 -0
  265. package/src/sast/csharp.js +152 -0
  266. package/src/sast/csrf.js +82 -0
  267. package/src/sast/dart-flutter.js +173 -0
  268. package/src/sast/db-rls.js +147 -0
  269. package/src/sast/db-taint.js +215 -0
  270. package/src/sast/defi-deep.js +242 -0
  271. package/src/sast/deserialization-gadgets.js +113 -0
  272. package/src/sast/django-hardening.js +230 -0
  273. package/src/sast/env-hygiene.js +125 -0
  274. package/src/sast/fastapi-hardening.js +145 -0
  275. package/src/sast/go-extended.js +84 -0
  276. package/src/sast/host-header.js +106 -0
  277. package/src/sast/index.js +17 -0
  278. package/src/sast/java-ast-folding.js +561 -0
  279. package/src/sast/java-bench-extras.js +708 -0
  280. package/src/sast/java-collection-passthrough.js +178 -0
  281. package/src/sast/java-constant-fold.js +244 -0
  282. package/src/sast/java-deserialization.js +125 -0
  283. package/src/sast/jndi.js +104 -0
  284. package/src/sast/juliet-shape.js +324 -0
  285. package/src/sast/jwt-exp.js +104 -0
  286. package/src/sast/kotlin.js +82 -0
  287. package/src/sast/laravel-hardening.js +198 -0
  288. package/src/sast/ldap-injection.js +100 -0
  289. package/src/sast/llm-owasp.js +465 -0
  290. package/src/sast/llm-stored-prompt.js +103 -0
  291. package/src/sast/llm-trading-agent.js +161 -0
  292. package/src/sast/llm.js +308 -0
  293. package/src/sast/logic.js +140 -0
  294. package/src/sast/mass-assignment.js +101 -0
  295. package/src/sast/mcp-audit.js +242 -0
  296. package/src/sast/mobile-manifest.js +195 -0
  297. package/src/sast/model-load.js +164 -0
  298. package/src/sast/mutation-xss.js +87 -0
  299. package/src/sast/nosql-injection.js +82 -0
  300. package/src/sast/open-redirect.js +119 -0
  301. package/src/sast/php.js +91 -0
  302. package/src/sast/pipeline.js +122 -0
  303. package/src/sast/primary-cwe-java.js +155 -0
  304. package/src/sast/prompt-firewall.js +151 -0
  305. package/src/sast/prompt-template.js +157 -0
  306. package/src/sast/prototype-pollution.js +112 -0
  307. package/src/sast/python-sinks.js +195 -0
  308. package/src/sast/quarkus-hardening.js +102 -0
  309. package/src/sast/rag-poisoning.js +118 -0
  310. package/src/sast/rate-limit.js +128 -0
  311. package/src/sast/response-splitting.js +138 -0
  312. package/src/sast/ruby.js +108 -0
  313. package/src/sast/rust.js +105 -0
  314. package/src/sast/solidity.js +167 -0
  315. package/src/sast/springboot-hardening.js +186 -0
  316. package/src/sast/ssrf-cloud-metadata.js +80 -0
  317. package/src/sast/ssti.js +116 -0
  318. package/src/sast/swift.js +162 -0
  319. package/src/sast/toctou.js +95 -0
  320. package/src/sast/webhook.js +101 -0
  321. package/src/sast/xpath-injection.js +51 -0
  322. package/src/sast/xxe.js +140 -0
  323. package/src/sast/zip-slip.js +200 -0
  324. package/src/sca/base-images.json +45 -0
  325. package/src/sca/container.js +107 -0
  326. package/src/sca/dep-confusion.js +134 -0
  327. package/src/sca/index.js +6 -0
  328. package/src/sca/popular-packages.json +41 -0
  329. package/src/sca/sarif-ingest.js +187 -0
  330. package/src/sca/vuln-function-hints.json +89 -0
  331. package/src/secrets/index.js +4 -0
@@ -0,0 +1,74 @@
1
+ // Three-agent adversarial review pipeline.
2
+ //
3
+ // Composes the existing single-step adversary-agent (red team) with the
4
+ // defender-agent and auditor-agent into a single bounded cascade per
5
+ // finding:
6
+ //
7
+ // red.runAgent(finding, target) → attack transcript + outcome
8
+ // blue.runDefender(finding, red) → hardening recommendations
9
+ // auditor.runAuditor(finding, red, blue) → final verdict
10
+ //
11
+ // Each phase is hash-chained; the final output is a structured envelope
12
+ // the slash command can render. Without a configured LLM endpoint, every
13
+ // phase short-circuits to its static-analysis equivalent — the cascade
14
+ // still produces a useful artifact (static hardening + static verdict).
15
+
16
+ import { runAgent as runRedTeam } from './adversary-agent.js';
17
+ import { runDefender } from './defender-agent.js';
18
+ import { runAuditor } from './auditor-agent.js';
19
+
20
+ const DEFAULT_RED_BUDGET = { maxCalls: 30, maxWallMs: 8 * 60 * 1000 };
21
+
22
+ export async function runThreeAgentReview(finding, opts = {}) {
23
+ const target = opts.target || '';
24
+ const startedAt = new Date().toISOString();
25
+
26
+ // Phase 1 — red team.
27
+ const red = await runRedTeam(finding, {
28
+ target,
29
+ maxCalls: opts.maxCalls ?? DEFAULT_RED_BUDGET.maxCalls,
30
+ maxWallMs: opts.maxWallMs ?? DEFAULT_RED_BUDGET.maxWallMs,
31
+ llmInvoke: opts.redLlmInvoke,
32
+ executeTool: opts.redExecuteTool,
33
+ });
34
+
35
+ // Phase 2 — blue team. Reads the red transcript.
36
+ const blue = await runDefender(finding, red.transcript, {
37
+ llmInvoke: opts.blueLlmInvoke,
38
+ });
39
+
40
+ // Phase 3 — auditor. Reads both prior transcripts.
41
+ const audit = await runAuditor(finding, red.transcript, blue, {
42
+ llmInvoke: opts.auditorLlmInvoke,
43
+ });
44
+
45
+ return {
46
+ startedAt,
47
+ finishedAt: new Date().toISOString(),
48
+ finding: {
49
+ stableId: finding?.stableId || null,
50
+ file: finding?.file || null,
51
+ line: finding?.line || null,
52
+ vuln: finding?.vuln || null,
53
+ family: finding?.family || null,
54
+ severity: finding?.severity || null,
55
+ },
56
+ red: {
57
+ outcome: red.outcome,
58
+ toolCallCount: (red.transcript?.entries || []).filter(e => e.tool).length,
59
+ transcriptHead: red.transcript?.chainHead || null,
60
+ },
61
+ blue: {
62
+ mode: blue.mode,
63
+ recommendations: blue.recommendations || [],
64
+ transcriptHead: blue.transcript?.chainHead || null,
65
+ },
66
+ auditor: {
67
+ verdict: audit.verdict,
68
+ rationale: audit.rationale,
69
+ mode: audit.mode,
70
+ transcriptHead: audit.transcript?.chainHead || null,
71
+ },
72
+ target,
73
+ };
74
+ }
@@ -0,0 +1,146 @@
1
+ // Pro-only triage layer (R6). JSON-backed store at .agentic-security/triage.json.
2
+ // Tracks state, assignees, comments, and transitions per finding. Computes
3
+ // MTTR and trend stats from the transition log.
4
+
5
+ import * as fs from 'node:fs';
6
+ import * as path from 'node:path';
7
+
8
+ const STORE_PATH = '.agentic-security/triage.json';
9
+
10
+ export const STATES = ['open', 'in-progress', 'fixed', 'wont-fix', 'false-positive'];
11
+
12
+ function _storePath(scanRoot) {
13
+ return path.join(scanRoot || process.cwd(), STORE_PATH);
14
+ }
15
+
16
+ export function loadTriage(scanRoot) {
17
+ const fp = _storePath(scanRoot);
18
+ if (!fs.existsSync(fp)) return { findings: {}, transitions: [] };
19
+ try { return JSON.parse(fs.readFileSync(fp, 'utf8')); }
20
+ catch (_) { return { findings: {}, transitions: [] }; }
21
+ }
22
+
23
+ function _save(scanRoot, data) {
24
+ const fp = _storePath(scanRoot);
25
+ fs.mkdirSync(path.dirname(fp), { recursive: true });
26
+ fs.writeFileSync(fp, JSON.stringify(data, null, 2));
27
+ }
28
+
29
+ // Sync the triage store with the latest scan: new findings become 'open',
30
+ // findings no longer surfaced become 'fixed' (with a transition entry).
31
+ export function syncWithScan(scanRoot, findings) {
32
+ const data = loadTriage(scanRoot);
33
+ const now = new Date().toISOString();
34
+ const seen = new Set();
35
+ for (const f of findings) {
36
+ const id = f.id || `${f.file}:${f.line}:${f.vuln}`;
37
+ seen.add(id);
38
+ if (!data.findings[id]) {
39
+ data.findings[id] = {
40
+ id,
41
+ file: f.file,
42
+ line: f.line,
43
+ vuln: f.vuln,
44
+ severity: f.severity,
45
+ state: 'open',
46
+ assignee: null,
47
+ opened_at: now,
48
+ comments: [],
49
+ };
50
+ data.transitions.push({ id, from: null, to: 'open', at: now });
51
+ }
52
+ }
53
+ // Auto-close findings that scanner no longer sees.
54
+ for (const id of Object.keys(data.findings)) {
55
+ const cur = data.findings[id];
56
+ if (seen.has(id)) continue;
57
+ if (cur.state === 'fixed' || cur.state === 'wont-fix' || cur.state === 'false-positive') continue;
58
+ cur.state = 'fixed';
59
+ cur.fixed_at = now;
60
+ data.transitions.push({ id, from: 'open', to: 'fixed', at: now, automatic: true });
61
+ }
62
+ _save(scanRoot, data);
63
+ return data;
64
+ }
65
+
66
+ export function assign(scanRoot, id, assignee) {
67
+ const data = loadTriage(scanRoot);
68
+ if (!data.findings[id]) return { ok: false, error: 'unknown finding id' };
69
+ data.findings[id].assignee = assignee;
70
+ data.findings[id].assigned_at = new Date().toISOString();
71
+ _save(scanRoot, data);
72
+ return { ok: true };
73
+ }
74
+
75
+ export function transition(scanRoot, id, toState, comment) {
76
+ const data = loadTriage(scanRoot);
77
+ if (!data.findings[id]) return { ok: false, error: 'unknown finding id' };
78
+ if (!STATES.includes(toState)) return { ok: false, error: `invalid state: ${toState}` };
79
+ const cur = data.findings[id];
80
+ const from = cur.state;
81
+ cur.state = toState;
82
+ if (toState === 'fixed') cur.fixed_at = new Date().toISOString();
83
+ data.transitions.push({ id, from, to: toState, at: new Date().toISOString(), comment });
84
+ _save(scanRoot, data);
85
+ return { ok: true };
86
+ }
87
+
88
+ export function comment(scanRoot, id, author, body) {
89
+ const data = loadTriage(scanRoot);
90
+ if (!data.findings[id]) return { ok: false, error: 'unknown finding id' };
91
+ data.findings[id].comments.push({ author, body, at: new Date().toISOString() });
92
+ _save(scanRoot, data);
93
+ return { ok: true };
94
+ }
95
+
96
+ export function list(scanRoot, filter = {}) {
97
+ const data = loadTriage(scanRoot);
98
+ let out = Object.values(data.findings);
99
+ if (filter.state) out = out.filter(f => f.state === filter.state);
100
+ if (filter.severity) out = out.filter(f => f.severity === filter.severity);
101
+ if (filter.assignee) out = out.filter(f => f.assignee === filter.assignee);
102
+ if (filter.unassigned) out = out.filter(f => !f.assignee);
103
+ return out;
104
+ }
105
+
106
+ // Trends across the last N days. Returns counts by severity, opened/closed
107
+ // pairs, and median MTTR.
108
+ export function trend(scanRoot, sinceDays = 30) {
109
+ const data = loadTriage(scanRoot);
110
+ const cutoff = Date.now() - sinceDays * 86400000;
111
+ const findings = Object.values(data.findings);
112
+ const recent = data.transitions.filter(t => new Date(t.at).getTime() >= cutoff);
113
+
114
+ const opened = recent.filter(t => t.to === 'open').length;
115
+ const closed = recent.filter(t => t.to === 'fixed').length;
116
+
117
+ const openBySev = { critical: 0, high: 0, medium: 0, low: 0 };
118
+ for (const f of findings) {
119
+ if (f.state === 'open' || f.state === 'in-progress') {
120
+ openBySev[f.severity] = (openBySev[f.severity] || 0) + 1;
121
+ }
122
+ }
123
+
124
+ // MTTR: for each finding fixed in window, (fixed_at - opened_at).
125
+ const mttrMs = [];
126
+ for (const f of findings) {
127
+ if (!f.fixed_at) continue;
128
+ if (new Date(f.fixed_at).getTime() < cutoff) continue;
129
+ const dur = new Date(f.fixed_at).getTime() - new Date(f.opened_at).getTime();
130
+ if (dur > 0) mttrMs.push(dur);
131
+ }
132
+ mttrMs.sort((a, b) => a - b);
133
+ const medianMttr = mttrMs.length
134
+ ? mttrMs[Math.floor(mttrMs.length / 2)] / 86400000
135
+ : null;
136
+
137
+ return {
138
+ sinceDays,
139
+ opened,
140
+ closed,
141
+ net: closed - opened,
142
+ openBySev,
143
+ medianMttrDays: medianMttr,
144
+ totalOpen: findings.filter(f => f.state === 'open' || f.state === 'in-progress').length,
145
+ };
146
+ }
@@ -0,0 +1,115 @@
1
+ // FR-UX-10 — Living trust-boundary diagram.
2
+ //
3
+ // Auto-generate a Mermaid diagram of the project's trust boundaries: HTTP
4
+ // edges (with method+path), queue producers/consumers, gRPC servers, DB
5
+ // edges, IaC-exposed resources. Findings on edges render as decorations.
6
+ //
7
+ // We emit Mermaid (text). The HTML report can render it natively;
8
+ // PR-comment bot can render the source.
9
+ //
10
+ // Output:
11
+ // {
12
+ // mermaid: string, // the Mermaid source
13
+ // nodes: [...], // {id, kind, label}
14
+ // edges: [...], // {from, to, kind, label?}
15
+ // decorations: [...], // {nodeId, severity, vuln}
16
+ // }
17
+
18
+ import { buildAssetInventory, buildTrustBoundaries } from './threat-model.js';
19
+
20
+ function sanitizeId(s) {
21
+ return String(s).replace(/[^A-Za-z0-9_]/g, '_').slice(0, 40);
22
+ }
23
+
24
+ function nodeFor(b) {
25
+ if (b.type === 'http-route' || b.type === 'http-route-py' || b.type === 'http-route-java') {
26
+ return { kind: 'route', id: 'route_' + sanitizeId(b.label || `${b.file}_${b.line}`), label: b.label || `route@${b.file}:${b.line}` };
27
+ }
28
+ if (b.type === 'queue-producer') {
29
+ return { kind: 'queue', id: 'qprod_' + sanitizeId(`${b.file}_${b.line}`), label: `producer: ${b.file}:${b.line}` };
30
+ }
31
+ if (b.type === 'queue-consumer') {
32
+ return { kind: 'queue', id: 'qcons_' + sanitizeId(`${b.file}_${b.line}`), label: `consumer: ${b.file}:${b.line}` };
33
+ }
34
+ if (b.type === 'grpc-server') {
35
+ return { kind: 'grpc', id: 'grpc_' + sanitizeId(`${b.file}_${b.line}`), label: `grpc@${b.file}:${b.line}` };
36
+ }
37
+ if (b.type === 'db-edge') {
38
+ return { kind: 'db', id: 'db_' + sanitizeId(`${b.file}_${b.line}`), label: `db@${b.file}:${b.line}` };
39
+ }
40
+ return null;
41
+ }
42
+
43
+ function findingsForNode(node, findings, boundary) {
44
+ if (!Array.isArray(findings)) return [];
45
+ return findings.filter(f =>
46
+ f && f.file === boundary.file &&
47
+ Math.abs((f.line || 0) - (boundary.line || 0)) <= 25
48
+ );
49
+ }
50
+
51
+ export function buildTrustBoundaryDiagram(findings, fileContents) {
52
+ const boundaries = buildTrustBoundaries(fileContents);
53
+ const assets = buildAssetInventory(fileContents);
54
+
55
+ const nodes = new Map();
56
+ const edges = [];
57
+ const decorations = [];
58
+
59
+ // INTERNET ── route ── service-internal
60
+ const INTERNET = { id: 'INTERNET', kind: 'external', label: 'Internet' };
61
+ const APP = { id: 'APP', kind: 'app', label: 'Application' };
62
+ nodes.set('INTERNET', INTERNET);
63
+ nodes.set('APP', APP);
64
+
65
+ for (const b of boundaries) {
66
+ const n = nodeFor(b);
67
+ if (!n) continue;
68
+ if (!nodes.has(n.id)) nodes.set(n.id, n);
69
+ if (n.kind === 'route') edges.push({ from: INTERNET.id, to: n.id, kind: 'http' });
70
+ if (n.kind === 'route') edges.push({ from: n.id, to: APP.id, kind: 'invoke' });
71
+ if (n.kind === 'queue') edges.push({ from: APP.id, to: n.id, kind: 'queue' });
72
+ if (n.kind === 'grpc') edges.push({ from: INTERNET.id, to: n.id, kind: 'grpc' });
73
+ if (n.kind === 'db') edges.push({ from: APP.id, to: n.id, kind: 'db' });
74
+ for (const f of findingsForNode(n, findings, b)) {
75
+ decorations.push({ nodeId: n.id, severity: f.severity, vuln: (f.vuln || '').slice(0, 60), file: f.file, line: f.line });
76
+ }
77
+ }
78
+
79
+ // Assets → as terminal nodes off APP
80
+ for (const a of assets.slice(0, 12)) {
81
+ const id = 'asset_' + sanitizeId(`${a.category}_${a.name || a.file}`);
82
+ if (!nodes.has(id)) nodes.set(id, { id, kind: 'asset', label: `${a.category}: ${a.name || ''}` });
83
+ edges.push({ from: APP.id, to: id, kind: 'asset' });
84
+ }
85
+
86
+ // Render Mermaid
87
+ const lines = ['flowchart LR'];
88
+ for (const n of nodes.values()) {
89
+ const safe = n.label.replace(/"/g, "'").slice(0, 60);
90
+ if (n.kind === 'external') lines.push(` ${n.id}((${safe}))`);
91
+ else if (n.kind === 'asset') lines.push(` ${n.id}[/"${safe}"/]`);
92
+ else if (n.kind === 'db') lines.push(` ${n.id}[("${safe}")]`);
93
+ else lines.push(` ${n.id}["${safe}"]`);
94
+ }
95
+ for (const e of edges) lines.push(` ${e.from} -->|${e.kind}| ${e.to}`);
96
+ // Severity-styled class assignments for decorated nodes.
97
+ const decoratedSeverities = new Map();
98
+ for (const d of decorations) {
99
+ const rank = { critical: 0, high: 1, medium: 2, low: 3, info: 4 };
100
+ const prev = decoratedSeverities.get(d.nodeId);
101
+ if (!prev || (rank[d.severity] ?? 9) < (rank[prev] ?? 9)) decoratedSeverities.set(d.nodeId, d.severity);
102
+ }
103
+ for (const [id, sev] of decoratedSeverities) lines.push(` class ${id} sev_${sev};`);
104
+ lines.push(' classDef sev_critical fill:#ffcccc,stroke:#a00,stroke-width:2px;');
105
+ lines.push(' classDef sev_high fill:#ffe0b2,stroke:#c60,stroke-width:2px;');
106
+ lines.push(' classDef sev_medium fill:#fff3cd,stroke:#a80;');
107
+ lines.push(' classDef sev_low fill:#e8eaf6,stroke:#557;');
108
+
109
+ return {
110
+ mermaid: lines.join('\n'),
111
+ nodes: [...nodes.values()],
112
+ edges,
113
+ decorations,
114
+ };
115
+ }
@@ -0,0 +1,129 @@
1
+ // FR-SEM-10 — Whole-program type narrowing (heuristic).
2
+ //
3
+ // When a function parameter is declared `any` / `unknown` / `interface{}` /
4
+ // `dynamic` but in practice every call site passes a typed value, downstream
5
+ // taint analysis sees the wide type and falsely warns "could be anything."
6
+ // This module performs a callsite-based narrowing pass: for each function
7
+ // with a wide parameter type, look at all call sites in the project; if every
8
+ // argument is provably typed (literal, typed variable, typed return), narrow
9
+ // the parameter for analysis purposes.
10
+ //
11
+ // The pass is INFORMATIONAL ONLY — it does not silently drop findings. It
12
+ // annotates `f.typeNarrowed: { from, to, callSites }` and lowers confidence
13
+ // by a small amount on the affected finding so downstream ranking accounts
14
+ // for the narrowing without erasing the finding.
15
+ //
16
+ // Languages covered (regex-level):
17
+ // TypeScript : `(x: any)` / `(x: unknown)`
18
+ // Python : `def f(x: Any)` / `def f(x)` (no annotation = implicit any)
19
+ // Go : `interface{}` / `any`
20
+
21
+ const TS_ANY_PARAM_RE = /function\s+(\w+)\s*\(([^)]*\b\w+\s*:\s*(?:any|unknown)[^)]*)\)/g;
22
+ const TS_ANY_ARROW_RE = /const\s+(\w+)\s*=\s*\(([^)]*\b\w+\s*:\s*(?:any|unknown)[^)]*)\)\s*=>/g;
23
+ const PY_ANY_PARAM_RE = /def\s+(\w+)\s*\(([^)]*\b\w+\s*:\s*(?:Any|object)[^)]*)\)/g;
24
+ const GO_ANY_PARAM_RE = /func\s+(\w+)\s*\(([^)]*\b\w+\s+(?:interface\{\}|any)[^)]*)\)/g;
25
+
26
+ function extractWideParamFns(text, lang) {
27
+ if (!text) return [];
28
+ const out = [];
29
+ const patterns = {
30
+ ts: [TS_ANY_PARAM_RE, TS_ANY_ARROW_RE],
31
+ py: [PY_ANY_PARAM_RE],
32
+ go: [GO_ANY_PARAM_RE],
33
+ };
34
+ for (const re of (patterns[lang] || [])) {
35
+ re.lastIndex = 0;
36
+ let m;
37
+ while ((m = re.exec(text))) {
38
+ out.push({ name: m[1], params: m[2], pos: m.index });
39
+ }
40
+ }
41
+ return out;
42
+ }
43
+
44
+ function inferLang(filePath) {
45
+ if (/\.(ts|tsx)$/i.test(filePath)) return 'ts';
46
+ if (/\.py$/i.test(filePath)) return 'py';
47
+ if (/\.go$/i.test(filePath)) return 'go';
48
+ return null;
49
+ }
50
+
51
+ // Detect whether a call-site argument is "narrowly typed": literal, typed
52
+ // expression, member of a known-typed object. We deliberately under-approximate
53
+ // — we only narrow when every observed call passes a clearly-typed value.
54
+ function isNarrowlyTypedArg(arg) {
55
+ const t = arg.trim();
56
+ if (!t) return false;
57
+ if (/^["'`]/.test(t)) return true; // string literal
58
+ if (/^-?\d/.test(t)) return true; // number literal
59
+ if (/^(?:true|false|null|undefined|None|nil)$/.test(t)) return true;
60
+ if (/^\{/.test(t) || /^\[/.test(t)) return true; // object/array literal
61
+ if (/\bas\s+\w+/.test(t)) return true; // TS cast
62
+ if (/<[\w.<>,\s]+>/.test(t) && /^\w+</.test(t)) return true; // generic typed call
63
+ return false;
64
+ }
65
+
66
+ export function findNarrowableFunctions(fileContents) {
67
+ if (!fileContents || typeof fileContents !== 'object') return [];
68
+ const fns = [];
69
+ for (const [fp, text] of Object.entries(fileContents)) {
70
+ const lang = inferLang(fp);
71
+ if (!lang) continue;
72
+ if (!text || typeof text !== 'string') continue;
73
+ for (const fn of extractWideParamFns(text, lang)) {
74
+ fns.push({ name: fn.name, declaredIn: fp, lang });
75
+ }
76
+ }
77
+ return fns;
78
+ }
79
+
80
+ // Find call sites of `name` across all files; classify each argument.
81
+ // Returns { totalCalls, narrowCalls, examples[] }.
82
+ function probeCallSites(name, fileContents) {
83
+ const re = new RegExp(`\\b${name}\\s*\\(\\s*([^)]{0,200})\\)`, 'g');
84
+ let total = 0, narrow = 0;
85
+ const examples = [];
86
+ for (const [fp, text] of Object.entries(fileContents)) {
87
+ if (!text || typeof text !== 'string') continue;
88
+ re.lastIndex = 0;
89
+ let m;
90
+ while ((m = re.exec(text))) {
91
+ total++;
92
+ const args = m[1].split(',').slice(0, 1);
93
+ if (args.every(isNarrowlyTypedArg)) {
94
+ narrow++;
95
+ if (examples.length < 3) examples.push({ file: fp, snippet: m[0].slice(0, 80) });
96
+ }
97
+ }
98
+ }
99
+ return { totalCalls: total, narrowCalls: narrow, examples };
100
+ }
101
+
102
+ export function annotateTypeNarrowing(findings, fileContents) {
103
+ if (!Array.isArray(findings) || !fileContents) return findings;
104
+ const fns = findNarrowableFunctions(fileContents);
105
+ if (!fns.length) return findings;
106
+ const fnIndex = new Map();
107
+ for (const fn of fns) fnIndex.set(fn.name, fn);
108
+ for (const f of findings) {
109
+ if (!f || typeof f !== 'object') continue;
110
+ const enclosing = f.enclosingFunction || f.functionName || f.fnName;
111
+ if (!enclosing || !fnIndex.has(enclosing)) continue;
112
+ const probe = probeCallSites(enclosing, fileContents);
113
+ if (probe.totalCalls === 0) continue;
114
+ if (probe.narrowCalls / probe.totalCalls >= 0.95) {
115
+ f.typeNarrowed = {
116
+ from: 'any/unknown/interface{}',
117
+ to: 'callsite-uniform-typed',
118
+ callSites: probe.totalCalls,
119
+ narrowed: probe.narrowCalls,
120
+ examples: probe.examples,
121
+ };
122
+ if (typeof f.confidence === 'number') {
123
+ f.confidence = Math.max(0, f.confidence - 0.10);
124
+ f._narrowedConfidenceAdjust = -0.10;
125
+ }
126
+ }
127
+ }
128
+ return findings;
129
+ }
@@ -0,0 +1,179 @@
1
+ // Per-CWE precision/recall metrics persistence.
2
+ //
3
+ // PRD success metric §5: "Recall on top-25 CWE classes ≥ 0.92." Tracking
4
+ // this requires running a labelled benchmark and persisting the per-family
5
+ // scorecard so /security-trend, /report-card, and the dashboard can
6
+ // surface the trend over time.
7
+ //
8
+ // File location: .agentic-security/validator-metrics.json
9
+ //
10
+ // Shape:
11
+ // {
12
+ // "history": [
13
+ // { "when": "2026-05-18T...", "benchmark": "owasp-benchmark-v1.2",
14
+ // "mode": "blind+strict",
15
+ // "aggregate": { "tp": ..., "fp": ..., "fn": ..., "precision": ..., "recall": ..., "f1": ... },
16
+ // "perFamily": { "<family>": { "tp": ..., "fp": ..., "fn": ..., "precision": ..., "recall": ..., "f1": ... } }
17
+ // }
18
+ // ],
19
+ // "floors": {
20
+ // "perFamily": { "default": { "recall": 0.92 }, "<family>": { "recall": 0.92, "precision": 0.85 } },
21
+ // "aggregate": { "f1": 0.90 }
22
+ // }
23
+ // }
24
+
25
+ import * as fs from 'node:fs';
26
+ import * as path from 'node:path';
27
+
28
+ const FILE = '.agentic-security/validator-metrics.json';
29
+ const HISTORY_CAP = 100;
30
+
31
+ function _filePath(scanRoot) { return path.join(scanRoot || process.cwd(), FILE); }
32
+
33
+ function _read(scanRoot) {
34
+ const fp = _filePath(scanRoot);
35
+ if (!fs.existsSync(fp)) return { history: [], floors: { perFamily: { default: { recall: 0.92 } }, aggregate: { f1: 0.90 } } };
36
+ try { return JSON.parse(fs.readFileSync(fp, 'utf8')); }
37
+ catch { return { history: [], floors: { perFamily: { default: { recall: 0.92 } }, aggregate: { f1: 0.90 } } }; }
38
+ }
39
+
40
+ function _write(scanRoot, data) {
41
+ const fp = _filePath(scanRoot);
42
+ try {
43
+ fs.mkdirSync(path.dirname(fp), { recursive: true });
44
+ fs.writeFileSync(fp, JSON.stringify(data, null, 2));
45
+ } catch { /* swallow — telemetry is best-effort */ }
46
+ }
47
+
48
+ function _round(n) { return Math.round(n * 10000) / 10000; }
49
+
50
+ function _computeStats(tp, fp, fn) {
51
+ const precision = tp / Math.max(tp + fp, 1e-9);
52
+ const recall = tp / Math.max(tp + fn, 1e-9);
53
+ const f1 = (2 * precision * recall) / Math.max(precision + recall, 1e-9);
54
+ return { precision: _round(precision), recall: _round(recall), f1: _round(f1) };
55
+ }
56
+
57
+ // Record one benchmark run.
58
+ // benchmark: 'owasp-benchmark-v1.2' | 'sard-juliet-java' | 'cve-replay' | ...
59
+ // mode: 'blind+strict' | 'non-blind+strict' | 'non-blind+wildcard'
60
+ // perFamily: { fam: { tp, fp, fn } }
61
+ export function recordRun(scanRoot, { benchmark, mode, tp, fp, fn, perFamily }) {
62
+ const data = _read(scanRoot);
63
+ const entry = {
64
+ when: new Date().toISOString(),
65
+ benchmark, mode,
66
+ aggregate: { tp, fp, fn, ..._computeStats(tp, fp, fn) },
67
+ perFamily: {},
68
+ };
69
+ for (const [fam, c] of Object.entries(perFamily || {})) {
70
+ if (!c) continue;
71
+ entry.perFamily[fam] = { tp: c.tp || 0, fp: c.fp || 0, fn: c.fn || 0, ..._computeStats(c.tp || 0, c.fp || 0, c.fn || 0) };
72
+ }
73
+ data.history = data.history || [];
74
+ data.history.push(entry);
75
+ if (data.history.length > HISTORY_CAP) data.history = data.history.slice(-HISTORY_CAP);
76
+ _write(scanRoot, data);
77
+ return entry;
78
+ }
79
+
80
+ // Record one PRODUCTION-TRIAGE outcome (operator marked a finding tp/fp).
81
+ // This is the per-CWE quality signal from real-world use, complementing the
82
+ // per-benchmark numbers above. Without this, the only feedback the engine
83
+ // gets is OWASP-Benchmark-tuned; with it, customer-real-world precision
84
+ // trends are visible in /security-trend.
85
+ //
86
+ // benchmark: 'production-triage' (fixed string, used as the storage key)
87
+ // verdict: 'tp' | 'fp' | 'wontfix'
88
+ // family: the finding's family name
89
+ //
90
+ // Aggregated per-CWE counts are stored under
91
+ // data.productionTriage[family] = { tp, fp, wontfix, lastAt }.
92
+ // summarize() and a future /security-trend command surface this.
93
+ export function recordTriage(scanRoot, { family, verdict, stableId }) {
94
+ if (!family || !['tp', 'fp', 'wontfix'].includes(verdict)) return null;
95
+ const data = _read(scanRoot);
96
+ data.productionTriage = data.productionTriage || {};
97
+ const row = data.productionTriage[family] = data.productionTriage[family] || { tp: 0, fp: 0, wontfix: 0, lastAt: null };
98
+ row[verdict] = (row[verdict] || 0) + 1;
99
+ row.lastAt = new Date().toISOString();
100
+ // Cap per-family rows so a runaway triage script can't bloat the file.
101
+ if ((row.tp || 0) + (row.fp || 0) + (row.wontfix || 0) > 10_000) {
102
+ // Stop accumulating; the trend is well-established by now.
103
+ row._capped = true;
104
+ return row;
105
+ }
106
+ void stableId;
107
+ _write(scanRoot, data);
108
+ return row;
109
+ }
110
+
111
+ // Read the latest entry and compare against floors.
112
+ export function getLatest(scanRoot, benchmark) {
113
+ const data = _read(scanRoot);
114
+ const matches = (data.history || []).filter(e => !benchmark || e.benchmark === benchmark);
115
+ return matches[matches.length - 1] || null;
116
+ }
117
+
118
+ // Identify families that violate their floors in the latest run.
119
+ // { aggregateBelowFloor: bool, familiesBelowFloor: [{fam, metric, value, floor}] }
120
+ export function checkFloors(scanRoot, benchmark) {
121
+ const data = _read(scanRoot);
122
+ const latest = getLatest(scanRoot, benchmark);
123
+ if (!latest) return { aggregateBelowFloor: false, familiesBelowFloor: [], latest: null };
124
+ const floors = data.floors || {};
125
+ const out = { aggregateBelowFloor: false, familiesBelowFloor: [], latest };
126
+ const aggMin = (floors.aggregate || {}).f1;
127
+ if (typeof aggMin === 'number' && latest.aggregate.f1 < aggMin) {
128
+ out.aggregateBelowFloor = true;
129
+ out.aggregateFloor = aggMin;
130
+ }
131
+ const perFamFloors = floors.perFamily || {};
132
+ const defaultFamFloor = perFamFloors.default || {};
133
+ for (const [fam, stats] of Object.entries(latest.perFamily || {})) {
134
+ const famFloor = { ...defaultFamFloor, ...(perFamFloors[fam] || {}) };
135
+ for (const metric of ['precision', 'recall', 'f1']) {
136
+ if (typeof famFloor[metric] === 'number' && stats[metric] < famFloor[metric]) {
137
+ out.familiesBelowFloor.push({ fam, metric, value: stats[metric], floor: famFloor[metric] });
138
+ }
139
+ }
140
+ }
141
+ return out;
142
+ }
143
+
144
+ // Convenience: render a short human summary including both benchmark
145
+ // numbers AND the production-triage trend (per-CWE TP/FP from real-world
146
+ // operator verdicts via /triage).
147
+ export function summarize(scanRoot, benchmark) {
148
+ const latest = getLatest(scanRoot, benchmark);
149
+ const data = _read(scanRoot);
150
+ const lines = [];
151
+ if (latest) {
152
+ const r = latest.aggregate;
153
+ const fams = Object.entries(latest.perFamily || {})
154
+ .sort((a, b) => b[1].tp - a[1].tp);
155
+ lines.push(`Benchmark: ${latest.benchmark} (${latest.mode}) @ ${latest.when.slice(0, 16)}`);
156
+ lines.push(` F1=${r.f1} P=${r.precision} R=${r.recall} (TP=${r.tp} FP=${r.fp} FN=${r.fn})`);
157
+ for (const [fam, s] of fams.slice(0, 10)) {
158
+ lines.push(` · ${fam.padEnd(20)} P=${s.precision} R=${s.recall} (TP=${s.tp} FP=${s.fp} FN=${s.fn})`);
159
+ }
160
+ } else {
161
+ lines.push('(no benchmark metrics yet)');
162
+ }
163
+ // Production-triage trend (R3.3 / P1-11 — this is the real-world signal,
164
+ // not the benchmark proxy).
165
+ const triage = data.productionTriage || {};
166
+ const fams = Object.entries(triage)
167
+ .filter(([, c]) => (c.tp || 0) + (c.fp || 0) > 0)
168
+ .sort((a, b) => ((b[1].fp || 0) - (a[1].fp || 0)));
169
+ if (fams.length) {
170
+ lines.push('');
171
+ lines.push('Production-triage trend (real-world precision proxy):');
172
+ for (const [fam, c] of fams.slice(0, 10)) {
173
+ const total = (c.tp || 0) + (c.fp || 0);
174
+ const pHat = total ? (c.tp / total).toFixed(2) : '?';
175
+ lines.push(` · ${fam.padEnd(20)} P≈${pHat} (TP=${c.tp || 0} FP=${c.fp || 0} wontfix=${c.wontfix || 0})`);
176
+ }
177
+ }
178
+ return lines.join('\n');
179
+ }