agent-threat-rules 0.3.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (287) hide show
  1. package/README.md +190 -54
  2. package/package.json +3 -1
  3. package/rules/agent-manipulation/{ATR-2026-030-cross-agent-attack.yaml → ATR-2026-00030-cross-agent-attack.yaml} +3 -1
  4. package/rules/agent-manipulation/{ATR-2026-032-goal-hijacking.yaml → ATR-2026-00032-goal-hijacking.yaml} +3 -1
  5. package/rules/agent-manipulation/{ATR-2026-074-cross-agent-privilege-escalation.yaml → ATR-2026-00074-cross-agent-privilege-escalation.yaml} +3 -1
  6. package/rules/agent-manipulation/{ATR-2026-076-inter-agent-message-spoofing.yaml → ATR-2026-00076-inter-agent-message-spoofing.yaml} +3 -1
  7. package/rules/agent-manipulation/{ATR-2026-077-human-trust-exploitation.yaml → ATR-2026-00077-human-trust-exploitation.yaml} +3 -1
  8. package/rules/agent-manipulation/{ATR-2026-108-consensus-sybil-attack.yaml → ATR-2026-00108-consensus-sybil-attack.yaml} +3 -1
  9. package/rules/agent-manipulation/ATR-2026-00116-a2a-message-validation.yaml +92 -0
  10. package/rules/agent-manipulation/ATR-2026-00117-agent-identity-spoofing.yaml +92 -0
  11. package/rules/agent-manipulation/ATR-2026-00118-approval-fatigue.yaml +89 -0
  12. package/rules/agent-manipulation/ATR-2026-00119-social-engineering-via-agent.yaml +89 -0
  13. package/rules/agent-manipulation/ATR-2026-00132-casual-authority-escalation.yaml +105 -0
  14. package/rules/agent-manipulation/ATR-2026-00139-casual-authority-redirect.yaml +53 -0
  15. package/rules/context-exfiltration/{ATR-2026-020-system-prompt-leak.yaml → ATR-2026-00020-system-prompt-leak.yaml} +3 -1
  16. package/rules/context-exfiltration/{ATR-2026-021-api-key-exposure.yaml → ATR-2026-00021-api-key-exposure.yaml} +3 -1
  17. package/rules/context-exfiltration/{ATR-2026-075-agent-memory-manipulation.yaml → ATR-2026-00075-agent-memory-manipulation.yaml} +3 -1
  18. package/rules/context-exfiltration/{ATR-2026-102-disguised-analytics-exfiltration.yaml → ATR-2026-00102-disguised-analytics-exfiltration.yaml} +3 -1
  19. package/rules/context-exfiltration/ATR-2026-00113-credential-theft.yaml +89 -0
  20. package/rules/context-exfiltration/ATR-2026-00114-oauth-token-abuse.yaml +89 -0
  21. package/rules/context-exfiltration/ATR-2026-00115-env-var-harvesting.yaml +90 -0
  22. package/rules/context-exfiltration/ATR-2026-00136-tool-response-data-piggyback.yaml +100 -0
  23. package/rules/context-exfiltration/ATR-2026-00141-example-format-key-leak.yaml +52 -0
  24. package/rules/context-exfiltration/ATR-2026-00142-piggyback-transition-words.yaml +55 -0
  25. package/rules/context-exfiltration/ATR-2026-00145-obfuscated-key-disclosure.yaml +49 -0
  26. package/rules/context-exfiltration/ATR-2026-00146-env-var-existence-probe.yaml +49 -0
  27. package/rules/data-poisoning/{ATR-2026-070-data-poisoning.yaml → ATR-2026-00070-data-poisoning.yaml} +3 -1
  28. package/rules/excessive-autonomy/{ATR-2026-050-runaway-agent-loop.yaml → ATR-2026-00050-runaway-agent-loop.yaml} +3 -1
  29. package/rules/excessive-autonomy/{ATR-2026-051-resource-exhaustion.yaml → ATR-2026-00051-resource-exhaustion.yaml} +3 -1
  30. package/rules/excessive-autonomy/{ATR-2026-052-cascading-failure.yaml → ATR-2026-00052-cascading-failure.yaml} +3 -1
  31. package/rules/excessive-autonomy/{ATR-2026-098-unauthorized-financial-action.yaml → ATR-2026-00098-unauthorized-financial-action.yaml} +3 -1
  32. package/rules/excessive-autonomy/{ATR-2026-099-high-risk-tool-gate.yaml → ATR-2026-00099-high-risk-tool-gate.yaml} +4 -2
  33. package/rules/model-security/{ATR-2026-072-model-behavior-extraction.yaml → ATR-2026-00072-model-behavior-extraction.yaml} +3 -1
  34. package/rules/model-security/{ATR-2026-073-malicious-finetuning-data.yaml → ATR-2026-00073-malicious-finetuning-data.yaml} +3 -1
  35. package/rules/privilege-escalation/{ATR-2026-040-privilege-escalation.yaml → ATR-2026-00040-privilege-escalation.yaml} +3 -1
  36. package/rules/privilege-escalation/{ATR-2026-041-scope-creep.yaml → ATR-2026-00041-scope-creep.yaml} +3 -1
  37. package/rules/privilege-escalation/{ATR-2026-107-delayed-execution-bypass.yaml → ATR-2026-00107-delayed-execution-bypass.yaml} +3 -1
  38. package/rules/privilege-escalation/ATR-2026-00110-eval-injection.yaml +92 -0
  39. package/rules/privilege-escalation/ATR-2026-00111-shell-escape.yaml +93 -0
  40. package/rules/privilege-escalation/ATR-2026-00112-dynamic-import-exploitation.yaml +89 -0
  41. package/rules/privilege-escalation/ATR-2026-00143-casual-privilege-escalation.yaml +53 -0
  42. package/rules/privilege-escalation/ATR-2026-00144-rationalized-safety-bypass.yaml +49 -0
  43. package/rules/prompt-injection/{ATR-2026-001-direct-prompt-injection.yaml → ATR-2026-00001-direct-prompt-injection.yaml} +121 -11
  44. package/rules/prompt-injection/{ATR-2026-002-indirect-prompt-injection.yaml → ATR-2026-00002-indirect-prompt-injection.yaml} +3 -1
  45. package/rules/prompt-injection/{ATR-2026-003-jailbreak-attempt.yaml → ATR-2026-00003-jailbreak-attempt.yaml} +3 -1
  46. package/rules/prompt-injection/{ATR-2026-004-system-prompt-override.yaml → ATR-2026-00004-system-prompt-override.yaml} +3 -1
  47. package/rules/prompt-injection/{ATR-2026-005-multi-turn-injection.yaml → ATR-2026-00005-multi-turn-injection.yaml} +3 -1
  48. package/rules/prompt-injection/{ATR-2026-080-encoding-evasion.yaml → ATR-2026-00080-encoding-evasion.yaml} +3 -1
  49. package/rules/prompt-injection/{ATR-2026-081-semantic-multi-turn.yaml → ATR-2026-00081-semantic-multi-turn.yaml} +3 -1
  50. package/rules/prompt-injection/{ATR-2026-082-fingerprint-evasion.yaml → ATR-2026-00082-fingerprint-evasion.yaml} +3 -1
  51. package/rules/prompt-injection/{ATR-2026-083-indirect-tool-injection.yaml → ATR-2026-00083-indirect-tool-injection.yaml} +3 -1
  52. package/rules/prompt-injection/{ATR-2026-084-structured-data-injection.yaml → ATR-2026-00084-structured-data-injection.yaml} +3 -1
  53. package/rules/prompt-injection/{ATR-2026-085-audit-evasion.yaml → ATR-2026-00085-audit-evasion.yaml} +3 -1
  54. package/rules/prompt-injection/{ATR-2026-086-visual-spoofing.yaml → ATR-2026-00086-visual-spoofing.yaml} +3 -1
  55. package/rules/prompt-injection/{ATR-2026-087-rule-probing.yaml → ATR-2026-00087-rule-probing.yaml} +3 -1
  56. package/rules/prompt-injection/{ATR-2026-088-adaptive-countermeasure.yaml → ATR-2026-00088-adaptive-countermeasure.yaml} +3 -1
  57. package/rules/prompt-injection/{ATR-2026-089-polymorphic-skill.yaml → ATR-2026-00089-polymorphic-skill.yaml} +3 -1
  58. package/rules/prompt-injection/{ATR-2026-090-threat-intel-exfil.yaml → ATR-2026-00090-threat-intel-exfil.yaml} +3 -1
  59. package/rules/prompt-injection/{ATR-2026-091-nested-payload.yaml → ATR-2026-00091-nested-payload.yaml} +3 -1
  60. package/rules/prompt-injection/{ATR-2026-092-consensus-poisoning.yaml → ATR-2026-00092-consensus-poisoning.yaml} +3 -1
  61. package/rules/prompt-injection/{ATR-2026-093-gradual-escalation.yaml → ATR-2026-00093-gradual-escalation.yaml} +3 -1
  62. package/rules/prompt-injection/{ATR-2026-094-audit-bypass.yaml → ATR-2026-00094-audit-bypass.yaml} +3 -1
  63. package/rules/prompt-injection/{ATR-2026-097-cjk-injection-patterns.yaml → ATR-2026-00097-cjk-injection-patterns.yaml} +18 -1
  64. package/rules/prompt-injection/{ATR-2026-104-persona-hijacking.yaml → ATR-2026-00104-persona-hijacking.yaml} +3 -1
  65. package/rules/prompt-injection/ATR-2026-00130-indirect-authority-claim.yaml +103 -0
  66. package/rules/prompt-injection/ATR-2026-00131-fictional-academic-framing.yaml +99 -0
  67. package/rules/prompt-injection/ATR-2026-00133-paraphrase-injection.yaml +117 -0
  68. package/rules/prompt-injection/ATR-2026-00137-authority-claim-injection.yaml +52 -0
  69. package/rules/prompt-injection/ATR-2026-00138-fictional-framing-bypass.yaml +51 -0
  70. package/rules/prompt-injection/ATR-2026-00140-indirect-reference-reversal.yaml +52 -0
  71. package/rules/prompt-injection/ATR-2026-00148-language-switch-injection.yaml +71 -0
  72. package/rules/skill-compromise/{ATR-2026-060-skill-impersonation.yaml → ATR-2026-00060-skill-impersonation.yaml} +3 -1
  73. package/rules/skill-compromise/{ATR-2026-061-description-behavior-mismatch.yaml → ATR-2026-00061-description-behavior-mismatch.yaml} +4 -2
  74. package/rules/skill-compromise/{ATR-2026-062-hidden-capability.yaml → ATR-2026-00062-hidden-capability.yaml} +3 -1
  75. package/rules/skill-compromise/{ATR-2026-063-skill-chain-attack.yaml → ATR-2026-00063-skill-chain-attack.yaml} +5 -2
  76. package/rules/skill-compromise/{ATR-2026-064-over-permissioned-skill.yaml → ATR-2026-00064-over-permissioned-skill.yaml} +3 -1
  77. package/rules/skill-compromise/{ATR-2026-065-skill-update-attack.yaml → ATR-2026-00065-skill-update-attack.yaml} +3 -1
  78. package/rules/skill-compromise/{ATR-2026-066-parameter-injection.yaml → ATR-2026-00066-parameter-injection.yaml} +3 -1
  79. package/rules/skill-compromise/ATR-2026-00120-skill-instruction-injection.yaml +121 -0
  80. package/rules/skill-compromise/ATR-2026-00121-skill-dangerous-script.yaml +165 -0
  81. package/rules/skill-compromise/ATR-2026-00122-skill-weaponized-instruction.yaml +114 -0
  82. package/rules/skill-compromise/ATR-2026-00123-skill-overreach-permissions.yaml +118 -0
  83. package/rules/skill-compromise/ATR-2026-00124-skill-name-squatting.yaml +98 -0
  84. package/rules/skill-compromise/ATR-2026-00125-context-poisoning-compaction.yaml +93 -0
  85. package/rules/skill-compromise/ATR-2026-00126-skill-rug-pull-setup.yaml +99 -0
  86. package/rules/skill-compromise/ATR-2026-00127-subcommand-overflow.yaml +74 -0
  87. package/rules/skill-compromise/ATR-2026-00128-html-comment-hidden-payload.yaml +79 -0
  88. package/rules/skill-compromise/ATR-2026-00129-unicode-smuggling.yaml +73 -0
  89. package/rules/skill-compromise/ATR-2026-00134-fork-claim-impersonation.yaml +93 -0
  90. package/rules/skill-compromise/ATR-2026-00135-exfil-url-in-instructions.yaml +82 -0
  91. package/rules/skill-compromise/ATR-2026-00147-fork-impersonation.yaml +48 -0
  92. package/rules/tool-poisoning/{ATR-2026-010-mcp-malicious-response.yaml → ATR-2026-00010-mcp-malicious-response.yaml} +3 -1
  93. package/rules/tool-poisoning/{ATR-2026-011-tool-output-injection.yaml → ATR-2026-00011-tool-output-injection.yaml} +3 -1
  94. package/rules/tool-poisoning/{ATR-2026-012-unauthorized-tool-call.yaml → ATR-2026-00012-unauthorized-tool-call.yaml} +3 -1
  95. package/rules/tool-poisoning/{ATR-2026-013-tool-ssrf.yaml → ATR-2026-00013-tool-ssrf.yaml} +3 -1
  96. package/rules/tool-poisoning/{ATR-2026-095-supply-chain-poisoning.yaml → ATR-2026-00095-supply-chain-poisoning.yaml} +3 -1
  97. package/rules/tool-poisoning/{ATR-2026-096-registry-poisoning.yaml → ATR-2026-00096-registry-poisoning.yaml} +3 -1
  98. package/rules/tool-poisoning/{ATR-2026-100-consent-bypass-instruction.yaml → ATR-2026-00100-consent-bypass-instruction.yaml} +3 -1
  99. package/rules/tool-poisoning/{ATR-2026-101-trust-escalation-override.yaml → ATR-2026-00101-trust-escalation-override.yaml} +3 -1
  100. package/rules/tool-poisoning/{ATR-2026-103-hidden-safety-bypass-instruction.yaml → ATR-2026-00103-hidden-safety-bypass-instruction.yaml} +3 -1
  101. package/rules/tool-poisoning/{ATR-2026-105-silent-action-concealment.yaml → ATR-2026-00105-silent-action-concealment.yaml} +3 -1
  102. package/rules/tool-poisoning/{ATR-2026-106-schema-description-contradiction.yaml → ATR-2026-00106-schema-description-contradiction.yaml} +3 -1
  103. package/spec/atr-schema.yaml +32 -3
  104. package/dist/action-executor.d.ts +0 -44
  105. package/dist/action-executor.d.ts.map +0 -1
  106. package/dist/action-executor.js +0 -130
  107. package/dist/action-executor.js.map +0 -1
  108. package/dist/adapters/default-adapter.d.ts +0 -24
  109. package/dist/adapters/default-adapter.d.ts.map +0 -1
  110. package/dist/adapters/default-adapter.js +0 -51
  111. package/dist/adapters/default-adapter.js.map +0 -1
  112. package/dist/adapters/stdio-adapter.d.ts +0 -30
  113. package/dist/adapters/stdio-adapter.d.ts.map +0 -1
  114. package/dist/adapters/stdio-adapter.js +0 -128
  115. package/dist/adapters/stdio-adapter.js.map +0 -1
  116. package/dist/capability-extractor.d.ts +0 -35
  117. package/dist/capability-extractor.d.ts.map +0 -1
  118. package/dist/capability-extractor.js +0 -91
  119. package/dist/capability-extractor.js.map +0 -1
  120. package/dist/cli.d.ts +0 -12
  121. package/dist/cli.d.ts.map +0 -1
  122. package/dist/cli.js +0 -820
  123. package/dist/cli.js.map +0 -1
  124. package/dist/converters/elastic.d.ts +0 -36
  125. package/dist/converters/elastic.d.ts.map +0 -1
  126. package/dist/converters/elastic.js +0 -125
  127. package/dist/converters/elastic.js.map +0 -1
  128. package/dist/converters/index.d.ts +0 -28
  129. package/dist/converters/index.d.ts.map +0 -1
  130. package/dist/converters/index.js +0 -36
  131. package/dist/converters/index.js.map +0 -1
  132. package/dist/converters/splunk.d.ts +0 -19
  133. package/dist/converters/splunk.d.ts.map +0 -1
  134. package/dist/converters/splunk.js +0 -148
  135. package/dist/converters/splunk.js.map +0 -1
  136. package/dist/coverage-analyzer.d.ts +0 -43
  137. package/dist/coverage-analyzer.d.ts.map +0 -1
  138. package/dist/coverage-analyzer.js +0 -329
  139. package/dist/coverage-analyzer.js.map +0 -1
  140. package/dist/embedding/build-corpus.d.ts +0 -15
  141. package/dist/embedding/build-corpus.d.ts.map +0 -1
  142. package/dist/embedding/build-corpus.js +0 -105
  143. package/dist/embedding/build-corpus.js.map +0 -1
  144. package/dist/embedding/model-loader.d.ts +0 -41
  145. package/dist/embedding/model-loader.d.ts.map +0 -1
  146. package/dist/embedding/model-loader.js +0 -90
  147. package/dist/embedding/model-loader.js.map +0 -1
  148. package/dist/embedding/vector-store.d.ts +0 -41
  149. package/dist/embedding/vector-store.d.ts.map +0 -1
  150. package/dist/embedding/vector-store.js +0 -70
  151. package/dist/embedding/vector-store.js.map +0 -1
  152. package/dist/engine.d.ts +0 -163
  153. package/dist/engine.d.ts.map +0 -1
  154. package/dist/engine.js +0 -869
  155. package/dist/engine.js.map +0 -1
  156. package/dist/eval/corpus.d.ts +0 -42
  157. package/dist/eval/corpus.d.ts.map +0 -1
  158. package/dist/eval/corpus.js +0 -427
  159. package/dist/eval/corpus.js.map +0 -1
  160. package/dist/eval/eval-harness.d.ts +0 -44
  161. package/dist/eval/eval-harness.d.ts.map +0 -1
  162. package/dist/eval/eval-harness.js +0 -296
  163. package/dist/eval/eval-harness.js.map +0 -1
  164. package/dist/eval/index.d.ts +0 -13
  165. package/dist/eval/index.d.ts.map +0 -1
  166. package/dist/eval/index.js +0 -9
  167. package/dist/eval/index.js.map +0 -1
  168. package/dist/eval/metrics.d.ts +0 -74
  169. package/dist/eval/metrics.d.ts.map +0 -1
  170. package/dist/eval/metrics.js +0 -108
  171. package/dist/eval/metrics.js.map +0 -1
  172. package/dist/eval/pint-corpus.d.ts +0 -34
  173. package/dist/eval/pint-corpus.d.ts.map +0 -1
  174. package/dist/eval/pint-corpus.js +0 -109
  175. package/dist/eval/pint-corpus.js.map +0 -1
  176. package/dist/eval/rule-corpus.d.ts +0 -9
  177. package/dist/eval/rule-corpus.d.ts.map +0 -1
  178. package/dist/eval/rule-corpus.js +0 -4780
  179. package/dist/eval/rule-corpus.js.map +0 -1
  180. package/dist/eval/rule-metrics.d.ts +0 -34
  181. package/dist/eval/rule-metrics.d.ts.map +0 -1
  182. package/dist/eval/rule-metrics.js +0 -92
  183. package/dist/eval/rule-metrics.js.map +0 -1
  184. package/dist/eval/run-eval.d.ts +0 -7
  185. package/dist/eval/run-eval.d.ts.map +0 -1
  186. package/dist/eval/run-eval.js +0 -11
  187. package/dist/eval/run-eval.js.map +0 -1
  188. package/dist/eval/run-pint-benchmark.d.ts +0 -18
  189. package/dist/eval/run-pint-benchmark.d.ts.map +0 -1
  190. package/dist/eval/run-pint-benchmark.js +0 -157
  191. package/dist/eval/run-pint-benchmark.js.map +0 -1
  192. package/dist/flywheel.d.ts +0 -54
  193. package/dist/flywheel.d.ts.map +0 -1
  194. package/dist/flywheel.js +0 -121
  195. package/dist/flywheel.js.map +0 -1
  196. package/dist/hook-handler.d.ts +0 -61
  197. package/dist/hook-handler.d.ts.map +0 -1
  198. package/dist/hook-handler.js +0 -178
  199. package/dist/hook-handler.js.map +0 -1
  200. package/dist/index.d.ts +0 -62
  201. package/dist/index.d.ts.map +0 -1
  202. package/dist/index.js +0 -54
  203. package/dist/index.js.map +0 -1
  204. package/dist/layer-integration.d.ts +0 -55
  205. package/dist/layer-integration.d.ts.map +0 -1
  206. package/dist/layer-integration.js +0 -185
  207. package/dist/layer-integration.js.map +0 -1
  208. package/dist/loader.d.ts +0 -21
  209. package/dist/loader.d.ts.map +0 -1
  210. package/dist/loader.js +0 -124
  211. package/dist/loader.js.map +0 -1
  212. package/dist/mcp-server.d.ts +0 -13
  213. package/dist/mcp-server.d.ts.map +0 -1
  214. package/dist/mcp-server.js +0 -220
  215. package/dist/mcp-server.js.map +0 -1
  216. package/dist/mcp-tools/coverage-gaps.d.ts +0 -13
  217. package/dist/mcp-tools/coverage-gaps.d.ts.map +0 -1
  218. package/dist/mcp-tools/coverage-gaps.js +0 -55
  219. package/dist/mcp-tools/coverage-gaps.js.map +0 -1
  220. package/dist/mcp-tools/list-rules.d.ts +0 -17
  221. package/dist/mcp-tools/list-rules.d.ts.map +0 -1
  222. package/dist/mcp-tools/list-rules.js +0 -45
  223. package/dist/mcp-tools/list-rules.js.map +0 -1
  224. package/dist/mcp-tools/scan.d.ts +0 -24
  225. package/dist/mcp-tools/scan.d.ts.map +0 -1
  226. package/dist/mcp-tools/scan.js +0 -94
  227. package/dist/mcp-tools/scan.js.map +0 -1
  228. package/dist/mcp-tools/submit-proposal.d.ts +0 -12
  229. package/dist/mcp-tools/submit-proposal.d.ts.map +0 -1
  230. package/dist/mcp-tools/submit-proposal.js +0 -103
  231. package/dist/mcp-tools/submit-proposal.js.map +0 -1
  232. package/dist/mcp-tools/threat-summary.d.ts +0 -12
  233. package/dist/mcp-tools/threat-summary.d.ts.map +0 -1
  234. package/dist/mcp-tools/threat-summary.js +0 -74
  235. package/dist/mcp-tools/threat-summary.js.map +0 -1
  236. package/dist/mcp-tools/validate.d.ts +0 -15
  237. package/dist/mcp-tools/validate.d.ts.map +0 -1
  238. package/dist/mcp-tools/validate.js +0 -45
  239. package/dist/mcp-tools/validate.js.map +0 -1
  240. package/dist/modules/embedding.d.ts +0 -71
  241. package/dist/modules/embedding.d.ts.map +0 -1
  242. package/dist/modules/embedding.js +0 -141
  243. package/dist/modules/embedding.js.map +0 -1
  244. package/dist/modules/index.d.ts +0 -144
  245. package/dist/modules/index.d.ts.map +0 -1
  246. package/dist/modules/index.js +0 -82
  247. package/dist/modules/index.js.map +0 -1
  248. package/dist/modules/semantic.d.ts +0 -106
  249. package/dist/modules/semantic.d.ts.map +0 -1
  250. package/dist/modules/semantic.js +0 -359
  251. package/dist/modules/semantic.js.map +0 -1
  252. package/dist/modules/session.d.ts +0 -70
  253. package/dist/modules/session.d.ts.map +0 -1
  254. package/dist/modules/session.js +0 -128
  255. package/dist/modules/session.js.map +0 -1
  256. package/dist/rule-scaffolder.d.ts +0 -53
  257. package/dist/rule-scaffolder.d.ts.map +0 -1
  258. package/dist/rule-scaffolder.js +0 -301
  259. package/dist/rule-scaffolder.js.map +0 -1
  260. package/dist/session-tracker.d.ts +0 -58
  261. package/dist/session-tracker.d.ts.map +0 -1
  262. package/dist/session-tracker.js +0 -176
  263. package/dist/session-tracker.js.map +0 -1
  264. package/dist/shadow-evaluator.d.ts +0 -48
  265. package/dist/shadow-evaluator.d.ts.map +0 -1
  266. package/dist/shadow-evaluator.js +0 -128
  267. package/dist/shadow-evaluator.js.map +0 -1
  268. package/dist/skill-fingerprint.d.ts +0 -85
  269. package/dist/skill-fingerprint.d.ts.map +0 -1
  270. package/dist/skill-fingerprint.js +0 -284
  271. package/dist/skill-fingerprint.js.map +0 -1
  272. package/dist/tier0-invariant.d.ts +0 -49
  273. package/dist/tier0-invariant.d.ts.map +0 -1
  274. package/dist/tier0-invariant.js +0 -184
  275. package/dist/tier0-invariant.js.map +0 -1
  276. package/dist/tier1-blacklist.d.ts +0 -48
  277. package/dist/tier1-blacklist.d.ts.map +0 -1
  278. package/dist/tier1-blacklist.js +0 -91
  279. package/dist/tier1-blacklist.js.map +0 -1
  280. package/dist/types.d.ts +0 -190
  281. package/dist/types.d.ts.map +0 -1
  282. package/dist/types.js +0 -6
  283. package/dist/types.js.map +0 -1
  284. package/dist/verdict.d.ts +0 -26
  285. package/dist/verdict.d.ts.map +0 -1
  286. package/dist/verdict.js +0 -127
  287. package/dist/verdict.js.map +0 -1
@@ -1,296 +0,0 @@
1
- /**
2
- * Evaluation Harness -- orchestrates running the corpus through the ATR engine
3
- * and produces a structured EvalReport.
4
- *
5
- * Supports:
6
- * - Regex-only evaluation (Tier 2)
7
- * - Regex + Embedding evaluation (Tier 2 + 2.5)
8
- * - Full pipeline evaluation (all tiers)
9
- * - Per-sample latency measurement
10
- * - Regression check against baseline thresholds
11
- *
12
- * @module agent-threat-rules/eval/eval-harness
13
- */
14
- import { resolve, join } from 'node:path';
15
- import { existsSync, readFileSync, writeFileSync } from 'node:fs';
16
- import { ATREngine } from '../engine.js';
17
- import { EmbeddingModule } from '../modules/embedding.js';
18
- import { EVAL_CORPUS, getCorpusStats } from './corpus.js';
19
- import { computeEvalReport, checkRegression } from './metrics.js';
20
- import { computeRuleQuality } from './rule-metrics.js';
21
- /**
22
- * Convert a corpus sample to an AgentEvent.
23
- */
24
- function sampleToEvent(sample) {
25
- return {
26
- type: sample.eventType,
27
- content: sample.text,
28
- timestamp: new Date().toISOString(),
29
- fields: sample.fields,
30
- };
31
- }
32
- /**
33
- * Run a single sample through the engine (regex only) and measure results.
34
- * Catches engine errors so a single bad sample doesn't abort the entire eval.
35
- */
36
- function evaluateSampleRegex(engine, sample) {
37
- const event = sampleToEvent(sample);
38
- try {
39
- const start = performance.now();
40
- const matches = engine.evaluate(event);
41
- const latencyMs = performance.now() - start;
42
- const detected = matches.length > 0;
43
- const topMatch = matches[0];
44
- return {
45
- id: sample.id,
46
- category: sample.category,
47
- expectedDetection: sample.expectedDetection,
48
- actualDetection: detected,
49
- matchedRules: matches.map((m) => m.rule.id),
50
- confidence: topMatch?.confidence ?? 0,
51
- latencyMs,
52
- difficulty: sample.difficulty,
53
- tier: sample.tier,
54
- };
55
- }
56
- catch {
57
- return {
58
- id: sample.id,
59
- category: sample.category,
60
- expectedDetection: sample.expectedDetection,
61
- actualDetection: false,
62
- matchedRules: [],
63
- confidence: 0,
64
- latencyMs: 0,
65
- difficulty: sample.difficulty,
66
- tier: sample.tier,
67
- };
68
- }
69
- }
70
- /**
71
- * Run a single sample through the full pipeline (regex + embedding) and measure results.
72
- */
73
- async function evaluateSampleFull(engine, sample) {
74
- const event = sampleToEvent(sample);
75
- try {
76
- const start = performance.now();
77
- const { verdict } = await engine.evaluateWithVerdict(event);
78
- const latencyMs = performance.now() - start;
79
- const detected = verdict.matchCount > 0;
80
- return {
81
- id: sample.id,
82
- category: sample.category,
83
- expectedDetection: sample.expectedDetection,
84
- actualDetection: detected,
85
- matchedRules: verdict.matches.map((m) => m.rule.id),
86
- confidence: verdict.highestConfidence,
87
- latencyMs,
88
- difficulty: sample.difficulty,
89
- tier: sample.tier,
90
- };
91
- }
92
- catch {
93
- return {
94
- id: sample.id,
95
- category: sample.category,
96
- expectedDetection: sample.expectedDetection,
97
- actualDetection: false,
98
- matchedRules: [],
99
- confidence: 0,
100
- latencyMs: 0,
101
- difficulty: sample.difficulty,
102
- tier: sample.tier,
103
- };
104
- }
105
- }
106
- /**
107
- * Try to load the embedding module. Returns null if unavailable.
108
- */
109
- async function tryLoadEmbedding(embeddingsPath) {
110
- if (!existsSync(embeddingsPath))
111
- return null;
112
- try {
113
- const data = JSON.parse(readFileSync(embeddingsPath, 'utf-8'));
114
- const module = new EmbeddingModule({
115
- attackVectorsData: data,
116
- similarityThreshold: 0.65,
117
- });
118
- await module.initialize();
119
- return module.isAvailable() ? module : null;
120
- }
121
- catch {
122
- return null;
123
- }
124
- }
125
- /**
126
- * Run the full evaluation harness.
127
- * Returns the EvalReport and RegressionCheck.
128
- */
129
- export async function runEval(config) {
130
- const corpus = config.corpus ?? EVAL_CORPUS;
131
- const base = resolve(config.rulesDir, '..');
132
- const embeddingsPath = config.embeddingsPath ?? join(base, 'data', 'attack-embeddings.json');
133
- // Try to load embedding module
134
- const shouldEmbed = config.enableEmbedding !== false;
135
- let embeddingModule = null;
136
- const tiersUsed = ['tier2-regex'];
137
- if (shouldEmbed) {
138
- embeddingModule = await tryLoadEmbedding(embeddingsPath);
139
- if (embeddingModule) {
140
- tiersUsed.push('tier2.5-embedding');
141
- }
142
- }
143
- // Initialize engine
144
- const engine = new ATREngine({
145
- rulesDir: config.rulesDir,
146
- embeddingModule: embeddingModule ?? undefined,
147
- });
148
- const ruleCount = await engine.loadRules();
149
- if (ruleCount === 0) {
150
- throw new Error(`No rules loaded from ${config.rulesDir}`);
151
- }
152
- // Run all samples
153
- const results = [];
154
- const useFullPipeline = embeddingModule !== null;
155
- for (const sample of corpus) {
156
- const result = useFullPipeline
157
- ? await evaluateSampleFull(engine, sample)
158
- : evaluateSampleRegex(engine, sample);
159
- results.push(result);
160
- }
161
- // Cleanup embedding module
162
- if (embeddingModule) {
163
- await embeddingModule.destroy();
164
- }
165
- // Compute report
166
- const report = computeEvalReport(results);
167
- const regression = checkRegression(report, config.thresholds);
168
- const corpusStats = getCorpusStats();
169
- // Compute per-rule quality
170
- const loadedRuleIds = engine.getRules().map((r) => r.id);
171
- const ruleQuality = computeRuleQuality(results, loadedRuleIds);
172
- // Save report if output path specified
173
- if (config.outputPath) {
174
- const output = {
175
- report,
176
- regression,
177
- corpusStats,
178
- ruleQuality,
179
- ruleCount,
180
- engine: 'ATREngine',
181
- tiers: tiersUsed,
182
- };
183
- writeFileSync(config.outputPath, JSON.stringify(output, null, 2));
184
- }
185
- return { report, regression, corpusStats, tiersUsed, ruleQuality };
186
- }
187
- // ---------------------------------------------------------------------------
188
- // CLI entry point
189
- // ---------------------------------------------------------------------------
190
- function formatPercent(n) {
191
- return `${(n * 100).toFixed(1)}%`;
192
- }
193
- function formatMs(n) {
194
- return `${n.toFixed(2)}ms`;
195
- }
196
- export async function runEvalCLI() {
197
- const base = resolve(join(import.meta.dirname ?? '.', '..', '..'));
198
- const rulesDir = join(base, 'rules');
199
- const outputPath = join(base, 'data', 'eval-report.json');
200
- console.log('\n=== ATR Evaluation Harness ===\n');
201
- const { report, regression, corpusStats, tiersUsed, ruleQuality } = await runEval({
202
- rulesDir,
203
- outputPath,
204
- });
205
- // Corpus stats
206
- console.log(`Corpus: ${corpusStats.total} samples (${corpusStats.attacks} attacks, ${corpusStats.benign} benign)`);
207
- console.log(`Categories: ${Object.keys(corpusStats.byCategory).join(', ')}`);
208
- console.log(`Tiers: ${tiersUsed.join(' + ')}`);
209
- // Overall metrics
210
- console.log(`\n--- Overall ---`);
211
- console.log(` Precision: ${formatPercent(report.overall.precision)}`);
212
- console.log(` Recall: ${formatPercent(report.overall.recall)}`);
213
- console.log(` F1: ${formatPercent(report.overall.f1)}`);
214
- console.log(` Accuracy: ${formatPercent(report.overall.accuracy)}`);
215
- console.log(` FP Rate: ${formatPercent(report.overall.fpRate)}`);
216
- console.log(` Confusion: TP=${report.overall.confusion.tp} FP=${report.overall.confusion.fp} TN=${report.overall.confusion.tn} FN=${report.overall.confusion.fn}`);
217
- // Latency
218
- console.log(`\n--- Latency ---`);
219
- console.log(` P50: ${formatMs(report.latency.p50)}`);
220
- console.log(` P95: ${formatMs(report.latency.p95)}`);
221
- console.log(` P99: ${formatMs(report.latency.p99)}`);
222
- console.log(` Mean: ${formatMs(report.latency.mean)}`);
223
- console.log(` Max: ${formatMs(report.latency.max)}`);
224
- // Per category
225
- console.log(`\n--- By Category ---`);
226
- for (const cat of report.byCategory) {
227
- const missed = cat.missedSamples.length > 0 ? ` (missed: ${cat.missedSamples.join(', ')})` : '';
228
- const fps = cat.falsePositives.length > 0 ? ` (FP: ${cat.falsePositives.join(', ')})` : '';
229
- console.log(` ${cat.category}: recall=${formatPercent(cat.metrics.recall)} precision=${formatPercent(cat.metrics.precision)} f1=${formatPercent(cat.metrics.f1)}${missed}${fps}`);
230
- }
231
- // Per difficulty
232
- console.log(`\n--- By Difficulty ---`);
233
- for (const diff of report.byDifficulty) {
234
- console.log(` ${diff.difficulty}: recall=${formatPercent(diff.metrics.recall)} precision=${formatPercent(diff.metrics.precision)} f1=${formatPercent(diff.metrics.f1)}`);
235
- }
236
- // Missed attacks
237
- if (report.missedAttacks.length > 0) {
238
- console.log(`\n--- Missed Attacks (${report.missedAttacks.length}) ---`);
239
- for (const m of report.missedAttacks) {
240
- console.log(` [${m.id}] ${m.category}/${m.difficulty}/${m.tier}`);
241
- }
242
- }
243
- // False positives
244
- if (report.falsePositives.length > 0) {
245
- console.log(`\n--- False Positives (${report.falsePositives.length}) ---`);
246
- for (const fp of report.falsePositives) {
247
- console.log(` [${fp.id}] rules: ${fp.matchedRules.join(', ')}`);
248
- }
249
- }
250
- // Rule quality
251
- console.log(`\n--- Rule Quality ---`);
252
- console.log(` Total rules loaded: ${ruleQuality.totalRulesEvaluated}`);
253
- console.log(` Rules fired: ${ruleQuality.rulesFired}`);
254
- console.log(` Rules never fired: ${ruleQuality.rulesNeverFired}`);
255
- if (ruleQuality.topRules.length > 0) {
256
- console.log(`\n Top 10 rules by match count:`);
257
- for (const rule of ruleQuality.topRules.slice(0, 10)) {
258
- const precision = rule.matchCount > 0
259
- ? formatPercent(rule.tpCount / rule.matchCount)
260
- : 'N/A';
261
- console.log(` ${rule.ruleId}: matches=${rule.matchCount} TP=${rule.tpCount} FP=${rule.fpCount} precision=${precision} avgConf=${rule.avgConfidence.toFixed(2)}`);
262
- }
263
- }
264
- if (ruleQuality.weakRules.length > 0) {
265
- console.log(`\n Weak rules (FP > 0 or matchCount <= 1):`);
266
- for (const rule of ruleQuality.weakRules.slice(0, 10)) {
267
- console.log(` ${rule.ruleId}: matches=${rule.matchCount} TP=${rule.tpCount} FP=${rule.fpCount} categories=[${rule.categories.join(', ')}]`);
268
- }
269
- }
270
- if (ruleQuality.neverFiredRuleIds.length > 0) {
271
- console.log(`\n Never-fired rules (${ruleQuality.neverFiredRuleIds.length}):`);
272
- for (const id of ruleQuality.neverFiredRuleIds.slice(0, 20)) {
273
- console.log(` ${id}`);
274
- }
275
- if (ruleQuality.neverFiredRuleIds.length > 20) {
276
- console.log(` ... and ${ruleQuality.neverFiredRuleIds.length - 20} more`);
277
- }
278
- }
279
- // Regression check
280
- console.log(`\n--- Regression Check ---`);
281
- if (regression.passed) {
282
- console.log(' PASSED');
283
- }
284
- else {
285
- console.log(' FAILED:');
286
- for (const v of regression.violations) {
287
- console.log(` - ${v}`);
288
- }
289
- }
290
- console.log(`\nReport saved to: ${outputPath}`);
291
- console.log('Done.\n');
292
- if (!regression.passed) {
293
- process.exitCode = 1;
294
- }
295
- }
296
- //# sourceMappingURL=eval-harness.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"eval-harness.js","sourceRoot":"","sources":["../../src/eval/eval-harness.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AAClE,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,eAAe,EAAE,MAAM,yBAAyB,CAAC;AAG1D,OAAO,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAE1D,OAAO,EAAE,iBAAiB,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAElE,OAAO,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC;AAiBvD;;GAEG;AACH,SAAS,aAAa,CAAC,MAAoB;IACzC,OAAO;QACL,IAAI,EAAE,MAAM,CAAC,SAAS;QACtB,OAAO,EAAE,MAAM,CAAC,IAAI;QACpB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,MAAM,EAAE,MAAM,CAAC,MAAM;KACtB,CAAC;AACJ,CAAC;AAED;;;GAGG;AACH,SAAS,mBAAmB,CAC1B,MAAiB,EACjB,MAAoB;IAEpB,MAAM,KAAK,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC;IAEpC,IAAI,CAAC;QACH,MAAM,KAAK,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;QAChC,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;QACvC,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC;QAE5C,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC;QACpC,MAAM,QAAQ,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;QAE5B,OAAO;YACL,EAAE,EAAE,MAAM,CAAC,EAAE;YACb,QAAQ,EAAE,MAAM,CAAC,QAAQ;YACzB,iBAAiB,EAAE,MAAM,CAAC,iBAAiB;YAC3C,eAAe,EAAE,QAAQ;YACzB,YAAY,EAAE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC;YAC3C,UAAU,EAAE,QAAQ,EAAE,UAAU,IAAI,CAAC;YACrC,SAAS;YACT,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,IAAI,EAAE,MAAM,CAAC,IAAI;SAClB,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO;YACL,EAAE,EAAE,MAAM,CAAC,EAAE;YACb,QAAQ,EAAE,MAAM,CAAC,QAAQ;YACzB,iBAAiB,EAAE,MAAM,CAAC,iBAAiB;YAC3C,eAAe,EAAE,KAAK;YACtB,YAAY,EAAE,EAAE;YAChB,UAAU,EAAE,CAAC;YACb,SAAS,EAAE,CAAC;YACZ,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,IAAI,EAAE,MAAM,CAAC,IAAI;SAClB,CAAC;IACJ,CAAC;AACH,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,kBAAkB,CAC/B,MAAiB,EACjB,MAAoB;IAEpB,MAAM,KAAK,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC;IAEpC,IAAI,CAAC;QACH,MAAM,KAAK,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;QAChC,MAAM,EAAE,OAAO,EAAE,GAAG,MAAM,MAAM,CAAC,mBAAmB,CAAC,KAAK,CAAC,CAAC;QAC5D,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC;QAE5C,MAAM,QAAQ,GAAG,OAAO,CAAC,UAAU,GAAG,CAAC,CAAC;QAExC,OAAO;YACL,EAAE,EAAE,MAAM,CAAC,EAAE;YACb,QAAQ,EAAE,MAAM,CAAC,QAAQ;YACzB,iBAAiB,EAAE,MAAM,CAAC,iBAAiB;YAC3C,eAAe,EAAE,QAAQ;YACzB,YAAY,EAAE,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC;YACnD,UAAU,EAAE,OAAO,CAAC,iBAAiB;YACrC,SAAS;YACT,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,IAAI,EAAE,MAAM,CAAC,IAAI;SAClB,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO;YACL,EAAE,EAAE,MAAM,CAAC,EAAE;YACb,QAAQ,EAAE,MAAM,CAAC,QAAQ;YACzB,iBAAiB,EAAE,MAAM,CAAC,iBAAiB;YAC3C,eAAe,EAAE,KAAK;YACtB,YAAY,EAAE,EAAE;YAChB,UAAU,EAAE,CAAC;YACb,SAAS,EAAE,CAAC;YACZ,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,IAAI,EAAE,MAAM,CAAC,IAAI;SAClB,CAAC;IACJ,CAAC;AACH,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,gBAAgB,CAAC,cAAsB;IACpD,IAAI,CAAC,UAAU,CAAC,cAAc,CAAC;QAAE,OAAO,IAAI,CAAC;IAE7C,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,cAAc,EAAE,OAAO,CAAC,CAAC,CAAC;QAC/D,MAAM,MAAM,GAAG,IAAI,eAAe,CAAC;YACjC,iBAAiB,EAAE,IAAI;YACvB,mBAAmB,EAAE,IAAI;SAC1B,CAAC,CAAC;QACH,MAAM,MAAM,CAAC,UAAU,EAAE,CAAC;QAC1B,OAAO,MAAM,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC;IAC9C,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,OAAO,CAAC,MAAkB;IAO9C,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,WAAW,CAAC;IAC5C,MAAM,IAAI,GAAG,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;IAC5C,MAAM,cAAc,GAAG,MAAM,CAAC,cAAc,IAAI,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,wBAAwB,CAAC,CAAC;IAE7F,+BAA+B;IAC/B,MAAM,WAAW,GAAG,MAAM,CAAC,eAAe,KAAK,KAAK,CAAC;IACrD,IAAI,eAAe,GAA2B,IAAI,CAAC;IACnD,MAAM,SAAS,GAAa,CAAC,aAAa,CAAC,CAAC;IAE5C,IAAI,WAAW,EAAE,CAAC;QAChB,eAAe,GAAG,MAAM,gBAAgB,CAAC,cAAc,CAAC,CAAC;QACzD,IAAI,eAAe,EAAE,CAAC;YACpB,SAAS,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;QACtC,CAAC;IACH,CAAC;IAED,oBAAoB;IACpB,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC;QAC3B,QAAQ,EAAE,MAAM,CAAC,QAAQ;QACzB,eAAe,EAAE,eAAe,IAAI,SAAS;KAC9C,CAAC,CAAC;IACH,MAAM,SAAS,GAAG,MAAM,MAAM,CAAC,SAAS,EAAE,CAAC;IAE3C,IAAI,SAAS,KAAK,CAAC,EAAE,CAAC;QACpB,MAAM,IAAI,KAAK,CAAC,wBAAwB,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC;IAC7D,CAAC;IAED,kBAAkB;IAClB,MAAM,OAAO,GAAmB,EAAE,CAAC;IACnC,MAAM,eAAe,GAAG,eAAe,KAAK,IAAI,CAAC;IAEjD,KAAK,MAAM,MAAM,IAAI,MAAM,EAAE,CAAC;QAC5B,MAAM,MAAM,GAAG,eAAe;YAC5B,CAAC,CAAC,MAAM,kBAAkB,CAAC,MAAM,EAAE,MAAM,CAAC;YAC1C,CAAC,CAAC,mBAAmB,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QACxC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACvB,CAAC;IAED,2BAA2B;IAC3B,IAAI,eAAe,EAAE,CAAC;QACpB,MAAM,eAAe,CAAC,OAAO,EAAE,CAAC;IAClC,CAAC;IAED,iBAAiB;IACjB,MAAM,MAAM,GAAG,iBAAiB,CAAC,OAAO,CAAC,CAAC;IAC1C,MAAM,UAAU,GAAG,eAAe,CAAC,MAAM,EAAE,MAAM,CAAC,UAAU,CAAC,CAAC;IAC9D,MAAM,WAAW,GAAG,cAAc,EAAE,CAAC;IAErC,2BAA2B;IAC3B,MAAM,aAAa,GAAG,MAAM,CAAC,QAAQ,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IACzD,MAAM,WAAW,GAAG,kBAAkB,CAAC,OAAO,EAAE,aAAa,CAAC,CAAC;IAE/D,uCAAuC;IACvC,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;QACtB,MAAM,MAAM,GAAG;YACb,MAAM;YACN,UAAU;YACV,WAAW;YACX,WAAW;YACX,SAAS;YACT,MAAM,EAAE,WAAW;YACnB,KAAK,EAAE,SAAS;SACjB,CAAC;QACF,aAAa,CAAC,MAAM,CAAC,UAAU,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IACpE,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,WAAW,EAAE,SAAS,EAAE,WAAW,EAAE,CAAC;AACrE,CAAC;AAED,8EAA8E;AAC9E,kBAAkB;AAClB,8EAA8E;AAE9E,SAAS,aAAa,CAAC,CAAS;IAC9B,OAAO,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;AACpC,CAAC;AAED,SAAS,QAAQ,CAAC,CAAS;IACzB,OAAO,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC;AAC7B,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,UAAU;IAC9B,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,IAAI,GAAG,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC;IACnE,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IACrC,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,kBAAkB,CAAC,CAAC;IAE1D,OAAO,CAAC,GAAG,CAAC,oCAAoC,CAAC,CAAC;IAElD,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,WAAW,EAAE,SAAS,EAAE,WAAW,EAAE,GAAG,MAAM,OAAO,CAAC;QAChF,QAAQ;QACR,UAAU;KACX,CAAC,CAAC;IAEH,eAAe;IACf,OAAO,CAAC,GAAG,CAAC,WAAW,WAAW,CAAC,KAAK,aAAa,WAAW,CAAC,OAAO,aAAa,WAAW,CAAC,MAAM,UAAU,CAAC,CAAC;IACnH,OAAO,CAAC,GAAG,CAAC,eAAe,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC7E,OAAO,CAAC,GAAG,CAAC,UAAU,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;IAE/C,kBAAkB;IAClB,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IACjC,OAAO,CAAC,GAAG,CAAC,iBAAiB,aAAa,CAAC,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC;IACxE,OAAO,CAAC,GAAG,CAAC,iBAAiB,aAAa,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;IACrE,OAAO,CAAC,GAAG,CAAC,iBAAiB,aAAa,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC;IACjE,OAAO,CAAC,GAAG,CAAC,iBAAiB,aAAa,CAAC,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;IACvE,OAAO,CAAC,GAAG,CAAC,iBAAiB,aAAa,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;IACrE,OAAO,CAAC,GAAG,CAAC,oBAAoB,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,OAAO,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,OAAO,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,OAAO,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,EAAE,CAAC,CAAC;IAErK,UAAU;IACV,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IACjC,OAAO,CAAC,GAAG,CAAC,WAAW,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IACvD,OAAO,CAAC,GAAG,CAAC,WAAW,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IACvD,OAAO,CAAC,GAAG,CAAC,WAAW,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IACvD,OAAO,CAAC,GAAG,CAAC,WAAW,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACxD,OAAO,CAAC,GAAG,CAAC,WAAW,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAEvD,eAAe;IACf,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;IACrC,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;QACpC,MAAM,MAAM,GAAG,GAAG,CAAC,aAAa,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,aAAa,GAAG,CAAC,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QAChG,MAAM,GAAG,GAAG,GAAG,CAAC,cAAc,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,GAAG,CAAC,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QAC3F,OAAO,CAAC,GAAG,CAAC,KAAK,GAAG,CAAC,QAAQ,YAAY,aAAa,CAAC,GAAG,CAAC,OAAO,CAAC,MAAM,CAAC,cAAc,aAAa,CAAC,GAAG,CAAC,OAAO,CAAC,SAAS,CAAC,OAAO,aAAa,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,GAAG,MAAM,GAAG,GAAG,EAAE,CAAC,CAAC;IACrL,CAAC;IAED,iBAAiB;IACjB,OAAO,CAAC,GAAG,CAAC,yBAAyB,CAAC,CAAC;IACvC,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,YAAY,EAAE,CAAC;QACvC,OAAO,CAAC,GAAG,CAAC,KAAK,IAAI,CAAC,UAAU,YAAY,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,cAAc,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,OAAO,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC;IAC5K,CAAC;IAED,iBAAiB;IACjB,IAAI,MAAM,CAAC,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACpC,OAAO,CAAC,GAAG,CAAC,yBAAyB,MAAM,CAAC,aAAa,CAAC,MAAM,OAAO,CAAC,CAAC;QACzE,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,aAAa,EAAE,CAAC;YACrC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,UAAU,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QACrE,CAAC;IACH,CAAC;IAED,kBAAkB;IAClB,IAAI,MAAM,CAAC,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrC,OAAO,CAAC,GAAG,CAAC,0BAA0B,MAAM,CAAC,cAAc,CAAC,MAAM,OAAO,CAAC,CAAC;QAC3E,KAAK,MAAM,EAAE,IAAI,MAAM,CAAC,cAAc,EAAE,CAAC;YACvC,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,YAAY,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACnE,CAAC;IACH,CAAC;IAED,eAAe;IACf,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;IACtC,OAAO,CAAC,GAAG,CAAC,yBAAyB,WAAW,CAAC,mBAAmB,EAAE,CAAC,CAAC;IACxE,OAAO,CAAC,GAAG,CAAC,kBAAkB,WAAW,CAAC,UAAU,EAAE,CAAC,CAAC;IACxD,OAAO,CAAC,GAAG,CAAC,wBAAwB,WAAW,CAAC,eAAe,EAAE,CAAC,CAAC;IAEnE,IAAI,WAAW,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACpC,OAAO,CAAC,GAAG,CAAC,kCAAkC,CAAC,CAAC;QAChD,KAAK,MAAM,IAAI,IAAI,WAAW,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC;YACrD,MAAM,SAAS,GAAG,IAAI,CAAC,UAAU,GAAG,CAAC;gBACnC,CAAC,CAAC,aAAa,CAAC,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC;gBAC/C,CAAC,CAAC,KAAK,CAAC;YACV,OAAO,CAAC,GAAG,CAAC,OAAO,IAAI,CAAC,MAAM,aAAa,IAAI,CAAC,UAAU,OAAO,IAAI,CAAC,OAAO,OAAO,IAAI,CAAC,OAAO,cAAc,SAAS,YAAY,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACtK,CAAC;IACH,CAAC;IAED,IAAI,WAAW,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrC,OAAO,CAAC,GAAG,CAAC,6CAA6C,CAAC,CAAC;QAC3D,KAAK,MAAM,IAAI,IAAI,WAAW,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC;YACtD,OAAO,CAAC,GAAG,CAAC,OAAO,IAAI,CAAC,MAAM,aAAa,IAAI,CAAC,UAAU,OAAO,IAAI,CAAC,OAAO,OAAO,IAAI,CAAC,OAAO,gBAAgB,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACjJ,CAAC;IACH,CAAC;IAED,IAAI,WAAW,CAAC,iBAAiB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC7C,OAAO,CAAC,GAAG,CAAC,0BAA0B,WAAW,CAAC,iBAAiB,CAAC,MAAM,IAAI,CAAC,CAAC;QAChF,KAAK,MAAM,EAAE,IAAI,WAAW,CAAC,iBAAiB,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC;YAC5D,OAAO,CAAC,GAAG,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;QAC3B,CAAC;QACD,IAAI,WAAW,CAAC,iBAAiB,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;YAC9C,OAAO,CAAC,GAAG,CAAC,eAAe,WAAW,CAAC,iBAAiB,CAAC,MAAM,GAAG,EAAE,OAAO,CAAC,CAAC;QAC/E,CAAC;IACH,CAAC;IAED,mBAAmB;IACnB,OAAO,CAAC,GAAG,CAAC,4BAA4B,CAAC,CAAC;IAC1C,IAAI,UAAU,CAAC,MAAM,EAAE,CAAC;QACtB,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;IAC1B,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;QACzB,KAAK,MAAM,CAAC,IAAI,UAAU,CAAC,UAAU,EAAE,CAAC;YACtC,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC;QAC5B,CAAC;IACH,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,sBAAsB,UAAU,EAAE,CAAC,CAAC;IAChD,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;IAEvB,IAAI,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC;QACvB,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;IACvB,CAAC;AACH,CAAC"}
@@ -1,13 +0,0 @@
1
- /**
2
- * Evaluation framework public API
3
- * @module agent-threat-rules/eval
4
- */
5
- export { EVAL_CORPUS, getAttackSamples, getBenignSamples, getSamplesByCategory, getSamplesByDifficulty, getCorpusStats } from './corpus.js';
6
- export type { CorpusSample } from './corpus.js';
7
- export { computeEvalReport, checkRegression } from './metrics.js';
8
- export type { SampleResult, ConfusionMatrix, ClassMetrics, LatencyStats, CategoryBreakdown, DifficultyBreakdown, EvalReport, RegressionCheck, BaselineThresholds, } from './metrics.js';
9
- export { runEval, runEvalCLI } from './eval-harness.js';
10
- export type { EvalConfig } from './eval-harness.js';
11
- export { computeRuleQuality } from './rule-metrics.js';
12
- export type { RuleQuality, RuleQualityReport } from './rule-metrics.js';
13
- //# sourceMappingURL=index.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/eval/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,WAAW,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,oBAAoB,EAAE,sBAAsB,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAC5I,YAAY,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAEhD,OAAO,EAAE,iBAAiB,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAClE,YAAY,EACV,YAAY,EACZ,eAAe,EACf,YAAY,EACZ,YAAY,EACZ,iBAAiB,EACjB,mBAAmB,EACnB,UAAU,EACV,eAAe,EACf,kBAAkB,GACnB,MAAM,cAAc,CAAC;AAEtB,OAAO,EAAE,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AACxD,YAAY,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAEpD,OAAO,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC;AACvD,YAAY,EAAE,WAAW,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC"}
@@ -1,9 +0,0 @@
1
- /**
2
- * Evaluation framework public API
3
- * @module agent-threat-rules/eval
4
- */
5
- export { EVAL_CORPUS, getAttackSamples, getBenignSamples, getSamplesByCategory, getSamplesByDifficulty, getCorpusStats } from './corpus.js';
6
- export { computeEvalReport, checkRegression } from './metrics.js';
7
- export { runEval, runEvalCLI } from './eval-harness.js';
8
- export { computeRuleQuality } from './rule-metrics.js';
9
- //# sourceMappingURL=index.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/eval/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,WAAW,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,oBAAoB,EAAE,sBAAsB,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAG5I,OAAO,EAAE,iBAAiB,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAalE,OAAO,EAAE,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAGxD,OAAO,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC"}
@@ -1,74 +0,0 @@
1
- /**
2
- * Evaluation Metrics -- computes precision, recall, F1, confusion matrix,
3
- * per-category breakdowns, and latency percentiles.
4
- *
5
- * All functions are pure (no side effects, no mutation).
6
- *
7
- * @module agent-threat-rules/eval/metrics
8
- */
9
- export interface SampleResult {
10
- readonly id: string;
11
- readonly category: string;
12
- readonly expectedDetection: boolean;
13
- readonly actualDetection: boolean;
14
- readonly matchedRules: readonly string[];
15
- readonly confidence: number;
16
- readonly latencyMs: number;
17
- readonly difficulty: string;
18
- readonly tier: string;
19
- }
20
- export interface ConfusionMatrix {
21
- readonly tp: number;
22
- readonly fp: number;
23
- readonly tn: number;
24
- readonly fn: number;
25
- }
26
- export interface ClassMetrics {
27
- readonly precision: number;
28
- readonly recall: number;
29
- readonly f1: number;
30
- readonly accuracy: number;
31
- readonly fpRate: number;
32
- readonly confusion: ConfusionMatrix;
33
- readonly sampleCount: number;
34
- }
35
- export interface LatencyStats {
36
- readonly p50: number;
37
- readonly p95: number;
38
- readonly p99: number;
39
- readonly mean: number;
40
- readonly max: number;
41
- }
42
- export interface CategoryBreakdown {
43
- readonly category: string;
44
- readonly metrics: ClassMetrics;
45
- readonly missedSamples: readonly string[];
46
- readonly falsePositives: readonly string[];
47
- }
48
- export interface DifficultyBreakdown {
49
- readonly difficulty: string;
50
- readonly metrics: ClassMetrics;
51
- }
52
- export interface EvalReport {
53
- readonly timestamp: string;
54
- readonly corpusSize: number;
55
- readonly overall: ClassMetrics;
56
- readonly latency: LatencyStats;
57
- readonly byCategory: readonly CategoryBreakdown[];
58
- readonly byDifficulty: readonly DifficultyBreakdown[];
59
- readonly missedAttacks: readonly SampleResult[];
60
- readonly falsePositives: readonly SampleResult[];
61
- }
62
- export declare function computeEvalReport(results: readonly SampleResult[]): EvalReport;
63
- export interface RegressionCheck {
64
- readonly passed: boolean;
65
- readonly violations: readonly string[];
66
- }
67
- export interface BaselineThresholds {
68
- readonly minRecall: number;
69
- readonly maxFpRate: number;
70
- readonly minF1: number;
71
- readonly maxP95LatencyMs: number;
72
- }
73
- export declare function checkRegression(report: EvalReport, thresholds?: BaselineThresholds): RegressionCheck;
74
- //# sourceMappingURL=metrics.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"metrics.d.ts","sourceRoot":"","sources":["../../src/eval/metrics.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,iBAAiB,EAAE,OAAO,CAAC;IACpC,QAAQ,CAAC,eAAe,EAAE,OAAO,CAAC;IAClC,QAAQ,CAAC,YAAY,EAAE,SAAS,MAAM,EAAE,CAAC;IACzC,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,SAAS,EAAE,eAAe,CAAC;IACpC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;CAC9B;AAED,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,OAAO,EAAE,YAAY,CAAC;IAC/B,QAAQ,CAAC,aAAa,EAAE,SAAS,MAAM,EAAE,CAAC;IAC1C,QAAQ,CAAC,cAAc,EAAE,SAAS,MAAM,EAAE,CAAC;CAC5C;AAED,MAAM,WAAW,mBAAmB;IAClC,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,OAAO,EAAE,YAAY,CAAC;CAChC;AAED,MAAM,WAAW,UAAU;IACzB,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,OAAO,EAAE,YAAY,CAAC;IAC/B,QAAQ,CAAC,OAAO,EAAE,YAAY,CAAC;IAC/B,QAAQ,CAAC,UAAU,EAAE,SAAS,iBAAiB,EAAE,CAAC;IAClD,QAAQ,CAAC,YAAY,EAAE,SAAS,mBAAmB,EAAE,CAAC;IACtD,QAAQ,CAAC,aAAa,EAAE,SAAS,YAAY,EAAE,CAAC;IAChD,QAAQ,CAAC,cAAc,EAAE,SAAS,YAAY,EAAE,CAAC;CAClD;AAkDD,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,SAAS,YAAY,EAAE,GAAG,UAAU,CAsC9E;AAMD,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,MAAM,EAAE,OAAO,CAAC;IACzB,QAAQ,CAAC,UAAU,EAAE,SAAS,MAAM,EAAE,CAAC;CACxC;AAED,MAAM,WAAW,kBAAkB;IACjC,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,eAAe,EAAE,MAAM,CAAC;CAClC;AASD,wBAAgB,eAAe,CAC7B,MAAM,EAAE,UAAU,EAClB,UAAU,GAAE,kBAAuC,GAClD,eAAe,CA4BjB"}
@@ -1,108 +0,0 @@
1
- /**
2
- * Evaluation Metrics -- computes precision, recall, F1, confusion matrix,
3
- * per-category breakdowns, and latency percentiles.
4
- *
5
- * All functions are pure (no side effects, no mutation).
6
- *
7
- * @module agent-threat-rules/eval/metrics
8
- */
9
- // ---------------------------------------------------------------------------
10
- // Core metric calculations
11
- // ---------------------------------------------------------------------------
12
- function buildConfusionMatrix(results) {
13
- let tp = 0, fp = 0, tn = 0, fn = 0;
14
- for (const r of results) {
15
- if (r.expectedDetection && r.actualDetection)
16
- tp++;
17
- else if (!r.expectedDetection && r.actualDetection)
18
- fp++;
19
- else if (!r.expectedDetection && !r.actualDetection)
20
- tn++;
21
- else
22
- fn++;
23
- }
24
- return { tp, fp, tn, fn };
25
- }
26
- function computeClassMetrics(cm, sampleCount) {
27
- const precision = cm.tp + cm.fp > 0 ? cm.tp / (cm.tp + cm.fp) : 1;
28
- const recall = cm.tp + cm.fn > 0 ? cm.tp / (cm.tp + cm.fn) : 1;
29
- const f1 = precision + recall > 0 ? 2 * precision * recall / (precision + recall) : 0;
30
- const accuracy = sampleCount > 0 ? (cm.tp + cm.tn) / sampleCount : 1;
31
- const fpRate = cm.fp + cm.tn > 0 ? cm.fp / (cm.fp + cm.tn) : 0;
32
- return { precision, recall, f1, accuracy, fpRate, confusion: cm, sampleCount };
33
- }
34
- function computeLatency(results) {
35
- if (results.length === 0) {
36
- return { p50: 0, p95: 0, p99: 0, mean: 0, max: 0 };
37
- }
38
- const sorted = [...results].map((r) => r.latencyMs).sort((a, b) => a - b);
39
- const len = sorted.length;
40
- return {
41
- p50: sorted[Math.floor(len * 0.5)] ?? 0,
42
- p95: sorted[Math.floor(len * 0.95)] ?? 0,
43
- p99: sorted[Math.floor(len * 0.99)] ?? 0,
44
- mean: sorted.reduce((a, b) => a + b, 0) / len,
45
- max: sorted[len - 1] ?? 0,
46
- };
47
- }
48
- // ---------------------------------------------------------------------------
49
- // Report generation
50
- // ---------------------------------------------------------------------------
51
- export function computeEvalReport(results) {
52
- const overallCM = buildConfusionMatrix(results);
53
- const overall = computeClassMetrics(overallCM, results.length);
54
- const latency = computeLatency(results);
55
- // By category
56
- const categories = [...new Set(results.map((r) => r.category))];
57
- const byCategory = categories.map((cat) => {
58
- const catResults = results.filter((r) => r.category === cat);
59
- const cm = buildConfusionMatrix(catResults);
60
- const metrics = computeClassMetrics(cm, catResults.length);
61
- const missed = catResults.filter((r) => r.expectedDetection && !r.actualDetection).map((r) => r.id);
62
- const fps = catResults.filter((r) => !r.expectedDetection && r.actualDetection).map((r) => r.id);
63
- return { category: cat, metrics, missedSamples: missed, falsePositives: fps };
64
- });
65
- // By difficulty
66
- const difficulties = [...new Set(results.map((r) => r.difficulty))];
67
- const byDifficulty = difficulties.map((diff) => {
68
- const diffResults = results.filter((r) => r.difficulty === diff);
69
- const cm = buildConfusionMatrix(diffResults);
70
- const metrics = computeClassMetrics(cm, diffResults.length);
71
- return { difficulty: diff, metrics };
72
- });
73
- const missedAttacks = results.filter((r) => r.expectedDetection && !r.actualDetection);
74
- const falsePositives = results.filter((r) => !r.expectedDetection && r.actualDetection);
75
- return {
76
- timestamp: new Date().toISOString(),
77
- corpusSize: results.length,
78
- overall,
79
- latency,
80
- byCategory,
81
- byDifficulty,
82
- missedAttacks,
83
- falsePositives,
84
- };
85
- }
86
- const DEFAULT_THRESHOLDS = {
87
- minRecall: 0.60,
88
- maxFpRate: 0.05,
89
- minF1: 0.70,
90
- maxP95LatencyMs: 50,
91
- };
92
- export function checkRegression(report, thresholds = DEFAULT_THRESHOLDS) {
93
- const violations = [];
94
- if (report.overall.recall < thresholds.minRecall) {
95
- violations.push(`Recall ${(report.overall.recall * 100).toFixed(1)}% < minimum ${(thresholds.minRecall * 100).toFixed(1)}%`);
96
- }
97
- if (report.overall.fpRate > thresholds.maxFpRate) {
98
- violations.push(`FP rate ${(report.overall.fpRate * 100).toFixed(3)}% > maximum ${(thresholds.maxFpRate * 100).toFixed(3)}%`);
99
- }
100
- if (report.overall.f1 < thresholds.minF1) {
101
- violations.push(`F1 ${(report.overall.f1 * 100).toFixed(1)}% < minimum ${(thresholds.minF1 * 100).toFixed(1)}%`);
102
- }
103
- if (report.latency.p95 > thresholds.maxP95LatencyMs) {
104
- violations.push(`P95 latency ${report.latency.p95.toFixed(1)}ms > maximum ${thresholds.maxP95LatencyMs}ms`);
105
- }
106
- return { passed: violations.length === 0, violations };
107
- }
108
- //# sourceMappingURL=metrics.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"metrics.js","sourceRoot":"","sources":["../../src/eval/metrics.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AA8DH,8EAA8E;AAC9E,2BAA2B;AAC3B,8EAA8E;AAE9E,SAAS,oBAAoB,CAAC,OAAgC;IAC5D,IAAI,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC;IAEnC,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,IAAI,CAAC,CAAC,iBAAiB,IAAI,CAAC,CAAC,eAAe;YAAE,EAAE,EAAE,CAAC;aAC9C,IAAI,CAAC,CAAC,CAAC,iBAAiB,IAAI,CAAC,CAAC,eAAe;YAAE,EAAE,EAAE,CAAC;aACpD,IAAI,CAAC,CAAC,CAAC,iBAAiB,IAAI,CAAC,CAAC,CAAC,eAAe;YAAE,EAAE,EAAE,CAAC;;YACrD,EAAE,EAAE,CAAC;IACZ,CAAC;IAED,OAAO,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC;AAC5B,CAAC;AAED,SAAS,mBAAmB,CAAC,EAAmB,EAAE,WAAmB;IACnE,MAAM,SAAS,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAClE,MAAM,MAAM,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC/D,MAAM,EAAE,GAAG,SAAS,GAAG,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,SAAS,GAAG,MAAM,GAAG,CAAC,SAAS,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACtF,MAAM,QAAQ,GAAG,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;IACrE,MAAM,MAAM,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAE/D,OAAO,EAAE,SAAS,EAAE,MAAM,EAAE,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,EAAE,EAAE,WAAW,EAAE,CAAC;AACjF,CAAC;AAED,SAAS,cAAc,CAAC,OAAgC;IACtD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,CAAC;IACrD,CAAC;IAED,MAAM,MAAM,GAAG,CAAC,GAAG,OAAO,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC1E,MAAM,GAAG,GAAG,MAAM,CAAC,MAAM,CAAC;IAE1B,OAAO;QACL,GAAG,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,GAAG,GAAG,CAAC,CAAC,IAAI,CAAC;QACvC,GAAG,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,GAAG,IAAI,CAAC,CAAC,IAAI,CAAC;QACxC,GAAG,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,GAAG,IAAI,CAAC,CAAC,IAAI,CAAC;QACxC,IAAI,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,GAAG;QAC7C,GAAG,EAAE,MAAM,CAAC,GAAG,GAAG,CAAC,CAAC,IAAI,CAAC;KAC1B,CAAC;AACJ,CAAC;AAED,8EAA8E;AAC9E,oBAAoB;AACpB,8EAA8E;AAE9E,MAAM,UAAU,iBAAiB,CAAC,OAAgC;IAChE,MAAM,SAAS,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAC;IAChD,MAAM,OAAO,GAAG,mBAAmB,CAAC,SAAS,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;IAC/D,MAAM,OAAO,GAAG,cAAc,CAAC,OAAO,CAAC,CAAC;IAExC,cAAc;IACd,MAAM,UAAU,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;IAChE,MAAM,UAAU,GAAwB,UAAU,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;QAC7D,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,GAAG,CAAC,CAAC;QAC7D,MAAM,EAAE,GAAG,oBAAoB,CAAC,UAAU,CAAC,CAAC;QAC5C,MAAM,OAAO,GAAG,mBAAmB,CAAC,EAAE,EAAE,UAAU,CAAC,MAAM,CAAC,CAAC;QAC3D,MAAM,MAAM,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,iBAAiB,IAAI,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACpG,MAAM,GAAG,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,iBAAiB,IAAI,CAAC,CAAC,eAAe,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACjG,OAAO,EAAE,QAAQ,EAAE,GAAG,EAAE,OAAO,EAAE,aAAa,EAAE,MAAM,EAAE,cAAc,EAAE,GAAG,EAAE,CAAC;IAChF,CAAC,CAAC,CAAC;IAEH,gBAAgB;IAChB,MAAM,YAAY,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;IACpE,MAAM,YAAY,GAA0B,YAAY,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACpE,MAAM,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,KAAK,IAAI,CAAC,CAAC;QACjE,MAAM,EAAE,GAAG,oBAAoB,CAAC,WAAW,CAAC,CAAC;QAC7C,MAAM,OAAO,GAAG,mBAAmB,CAAC,EAAE,EAAE,WAAW,CAAC,MAAM,CAAC,CAAC;QAC5D,OAAO,EAAE,UAAU,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC;IACvC,CAAC,CAAC,CAAC;IAEH,MAAM,aAAa,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,iBAAiB,IAAI,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC;IACvF,MAAM,cAAc,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,iBAAiB,IAAI,CAAC,CAAC,eAAe,CAAC,CAAC;IAExF,OAAO;QACL,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,UAAU,EAAE,OAAO,CAAC,MAAM;QAC1B,OAAO;QACP,OAAO;QACP,UAAU;QACV,YAAY;QACZ,aAAa;QACb,cAAc;KACf,CAAC;AACJ,CAAC;AAkBD,MAAM,kBAAkB,GAAuB;IAC7C,SAAS,EAAE,IAAI;IACf,SAAS,EAAE,IAAI;IACf,KAAK,EAAE,IAAI;IACX,eAAe,EAAE,EAAE;CACpB,CAAC;AAEF,MAAM,UAAU,eAAe,CAC7B,MAAkB,EAClB,aAAiC,kBAAkB;IAEnD,MAAM,UAAU,GAAa,EAAE,CAAC;IAEhC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,GAAG,UAAU,CAAC,SAAS,EAAE,CAAC;QACjD,UAAU,CAAC,IAAI,CACb,UAAU,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC,UAAU,CAAC,SAAS,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAC5G,CAAC;IACJ,CAAC;IAED,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,GAAG,UAAU,CAAC,SAAS,EAAE,CAAC;QACjD,UAAU,CAAC,IAAI,CACb,WAAW,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC,UAAU,CAAC,SAAS,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAC7G,CAAC;IACJ,CAAC;IAED,IAAI,MAAM,CAAC,OAAO,CAAC,EAAE,GAAG,UAAU,CAAC,KAAK,EAAE,CAAC;QACzC,UAAU,CAAC,IAAI,CACb,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC,UAAU,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAChG,CAAC;IACJ,CAAC;IAED,IAAI,MAAM,CAAC,OAAO,CAAC,GAAG,GAAG,UAAU,CAAC,eAAe,EAAE,CAAC;QACpD,UAAU,CAAC,IAAI,CACb,eAAe,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,gBAAgB,UAAU,CAAC,eAAe,IAAI,CAC3F,CAAC;IACJ,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,UAAU,EAAE,CAAC;AACzD,CAAC"}
@@ -1,34 +0,0 @@
1
- /**
2
- * PINT Benchmark Corpus Loader
3
- *
4
- * Reads the PINT-format dataset (JSON with text/category/label/source/language)
5
- * built from publicly available prompt injection datasets:
6
- * - deepset/prompt-injections (HuggingFace)
7
- * - Lakera/gandalf_ignore_instructions (HuggingFace)
8
- *
9
- * Converts each sample into the CorpusSample interface used by the ATR eval
10
- * harness, allowing the PINT corpus to be evaluated alongside or instead of
11
- * the built-in hand-crafted corpus.
12
- *
13
- * @module agent-threat-rules/eval/pint-corpus
14
- */
15
- import type { CorpusSample } from './corpus.js';
16
- /**
17
- * Load the PINT benchmark corpus from a JSON file on disk.
18
- *
19
- * @param dataPath - Absolute path to pint-corpus.json
20
- * @returns Readonly array of CorpusSample for use with runEval()
21
- */
22
- export declare function loadPintCorpus(dataPath: string): readonly CorpusSample[];
23
- /**
24
- * Get summary statistics for the loaded PINT corpus.
25
- */
26
- export declare function getPintCorpusStats(corpus: readonly CorpusSample[]): {
27
- readonly total: number;
28
- readonly attacks: number;
29
- readonly benign: number;
30
- readonly byCategory: Readonly<Record<string, number>>;
31
- readonly byDifficulty: Readonly<Record<string, number>>;
32
- readonly byLanguage: Readonly<Record<string, number>>;
33
- };
34
- //# sourceMappingURL=pint-corpus.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"pint-corpus.d.ts","sourceRoot":"","sources":["../../src/eval/pint-corpus.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAGH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAsDhD;;;;;GAKG;AACH,wBAAgB,cAAc,CAAC,QAAQ,EAAE,MAAM,GAAG,SAAS,YAAY,EAAE,CAqCxE;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,SAAS,YAAY,EAAE,GAAG;IACnE,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,UAAU,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IACtD,QAAQ,CAAC,YAAY,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IACxD,QAAQ,CAAC,UAAU,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;CACvD,CAoBA"}