@panguard-ai/atr 1.4.3 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (274) hide show
  1. package/dist/action-executor.d.ts +44 -0
  2. package/dist/action-executor.d.ts.map +1 -0
  3. package/dist/action-executor.js +130 -0
  4. package/dist/action-executor.js.map +1 -0
  5. package/dist/adapters/default-adapter.d.ts +24 -0
  6. package/dist/adapters/default-adapter.d.ts.map +1 -0
  7. package/dist/adapters/default-adapter.js +51 -0
  8. package/dist/adapters/default-adapter.js.map +1 -0
  9. package/dist/adapters/stdio-adapter.d.ts +30 -0
  10. package/dist/adapters/stdio-adapter.d.ts.map +1 -0
  11. package/dist/adapters/stdio-adapter.js +128 -0
  12. package/dist/adapters/stdio-adapter.js.map +1 -0
  13. package/dist/badge.d.ts +42 -0
  14. package/dist/badge.d.ts.map +1 -0
  15. package/dist/badge.js +163 -0
  16. package/dist/badge.js.map +1 -0
  17. package/dist/capability-extractor.d.ts +35 -0
  18. package/dist/capability-extractor.d.ts.map +1 -0
  19. package/dist/capability-extractor.js +91 -0
  20. package/dist/capability-extractor.js.map +1 -0
  21. package/dist/cli/scan-handler.d.ts +21 -0
  22. package/dist/cli/scan-handler.d.ts.map +1 -0
  23. package/dist/cli/scan-handler.js +276 -0
  24. package/dist/cli/scan-handler.js.map +1 -0
  25. package/dist/cli/tc-pipeline.d.ts +18 -0
  26. package/dist/cli/tc-pipeline.d.ts.map +1 -0
  27. package/dist/cli/tc-pipeline.js +295 -0
  28. package/dist/cli/tc-pipeline.js.map +1 -0
  29. package/dist/cli.d.ts +12 -0
  30. package/dist/cli.d.ts.map +1 -0
  31. package/dist/cli.js +894 -0
  32. package/dist/cli.js.map +1 -0
  33. package/dist/content-hash.d.ts +7 -0
  34. package/dist/content-hash.d.ts.map +1 -0
  35. package/dist/content-hash.js +10 -0
  36. package/dist/content-hash.js.map +1 -0
  37. package/dist/converters/elastic.d.ts +36 -0
  38. package/dist/converters/elastic.d.ts.map +1 -0
  39. package/dist/converters/elastic.js +125 -0
  40. package/dist/converters/elastic.js.map +1 -0
  41. package/dist/converters/generic-regex.d.ts +37 -0
  42. package/dist/converters/generic-regex.d.ts.map +1 -0
  43. package/dist/converters/generic-regex.js +59 -0
  44. package/dist/converters/generic-regex.js.map +1 -0
  45. package/dist/converters/index.d.ts +32 -0
  46. package/dist/converters/index.d.ts.map +1 -0
  47. package/dist/converters/index.js +38 -0
  48. package/dist/converters/index.js.map +1 -0
  49. package/dist/converters/sarif.d.ts +18 -0
  50. package/dist/converters/sarif.d.ts.map +1 -0
  51. package/dist/converters/sarif.js +142 -0
  52. package/dist/converters/sarif.js.map +1 -0
  53. package/dist/converters/splunk.d.ts +19 -0
  54. package/dist/converters/splunk.d.ts.map +1 -0
  55. package/dist/converters/splunk.js +148 -0
  56. package/dist/converters/splunk.js.map +1 -0
  57. package/dist/coverage-analyzer.d.ts +43 -0
  58. package/dist/coverage-analyzer.d.ts.map +1 -0
  59. package/dist/coverage-analyzer.js +329 -0
  60. package/dist/coverage-analyzer.js.map +1 -0
  61. package/dist/embedding/build-corpus.d.ts +15 -0
  62. package/dist/embedding/build-corpus.d.ts.map +1 -0
  63. package/dist/embedding/build-corpus.js +105 -0
  64. package/dist/embedding/build-corpus.js.map +1 -0
  65. package/dist/embedding/model-loader.d.ts +41 -0
  66. package/dist/embedding/model-loader.d.ts.map +1 -0
  67. package/dist/embedding/model-loader.js +90 -0
  68. package/dist/embedding/model-loader.js.map +1 -0
  69. package/dist/embedding/vector-store.d.ts +41 -0
  70. package/dist/embedding/vector-store.d.ts.map +1 -0
  71. package/dist/embedding/vector-store.js +70 -0
  72. package/dist/embedding/vector-store.js.map +1 -0
  73. package/dist/engine.d.ts +222 -0
  74. package/dist/engine.d.ts.map +1 -0
  75. package/dist/engine.js +1185 -0
  76. package/dist/engine.js.map +1 -0
  77. package/dist/eval/corpus.d.ts +42 -0
  78. package/dist/eval/corpus.d.ts.map +1 -0
  79. package/dist/eval/corpus.js +427 -0
  80. package/dist/eval/corpus.js.map +1 -0
  81. package/dist/eval/eval-harness.d.ts +44 -0
  82. package/dist/eval/eval-harness.d.ts.map +1 -0
  83. package/dist/eval/eval-harness.js +296 -0
  84. package/dist/eval/eval-harness.js.map +1 -0
  85. package/dist/eval/index.d.ts +13 -0
  86. package/dist/eval/index.d.ts.map +1 -0
  87. package/dist/eval/index.js +9 -0
  88. package/dist/eval/index.js.map +1 -0
  89. package/dist/eval/metrics.d.ts +74 -0
  90. package/dist/eval/metrics.d.ts.map +1 -0
  91. package/dist/eval/metrics.js +108 -0
  92. package/dist/eval/metrics.js.map +1 -0
  93. package/dist/eval/pint-corpus.d.ts +34 -0
  94. package/dist/eval/pint-corpus.d.ts.map +1 -0
  95. package/dist/eval/pint-corpus.js +113 -0
  96. package/dist/eval/pint-corpus.js.map +1 -0
  97. package/dist/eval/rule-corpus.d.ts +9 -0
  98. package/dist/eval/rule-corpus.d.ts.map +1 -0
  99. package/dist/eval/rule-corpus.js +4780 -0
  100. package/dist/eval/rule-corpus.js.map +1 -0
  101. package/dist/eval/rule-metrics.d.ts +34 -0
  102. package/dist/eval/rule-metrics.d.ts.map +1 -0
  103. package/dist/eval/rule-metrics.js +92 -0
  104. package/dist/eval/rule-metrics.js.map +1 -0
  105. package/dist/eval/run-eval.d.ts +7 -0
  106. package/dist/eval/run-eval.d.ts.map +1 -0
  107. package/dist/eval/run-eval.js +11 -0
  108. package/dist/eval/run-eval.js.map +1 -0
  109. package/dist/eval/run-pint-benchmark.d.ts +18 -0
  110. package/dist/eval/run-pint-benchmark.d.ts.map +1 -0
  111. package/dist/eval/run-pint-benchmark.js +159 -0
  112. package/dist/eval/run-pint-benchmark.js.map +1 -0
  113. package/dist/eval/skill-benchmark.d.ts +66 -0
  114. package/dist/eval/skill-benchmark.d.ts.map +1 -0
  115. package/dist/eval/skill-benchmark.js +194 -0
  116. package/dist/eval/skill-benchmark.js.map +1 -0
  117. package/dist/flywheel.d.ts +54 -0
  118. package/dist/flywheel.d.ts.map +1 -0
  119. package/dist/flywheel.js +121 -0
  120. package/dist/flywheel.js.map +1 -0
  121. package/dist/hook-handler.d.ts +61 -0
  122. package/dist/hook-handler.d.ts.map +1 -0
  123. package/dist/hook-handler.js +178 -0
  124. package/dist/hook-handler.js.map +1 -0
  125. package/dist/index.d.ts +8 -0
  126. package/dist/index.d.ts.map +1 -0
  127. package/{src/index.ts → dist/index.js} +1 -0
  128. package/dist/index.js.map +1 -0
  129. package/dist/layer-integration.d.ts +55 -0
  130. package/dist/layer-integration.d.ts.map +1 -0
  131. package/dist/layer-integration.js +187 -0
  132. package/dist/layer-integration.js.map +1 -0
  133. package/dist/loader.d.ts +18 -0
  134. package/dist/loader.d.ts.map +1 -0
  135. package/dist/loader.js +129 -0
  136. package/dist/loader.js.map +1 -0
  137. package/dist/mcp-server.d.ts +13 -0
  138. package/dist/mcp-server.d.ts.map +1 -0
  139. package/dist/mcp-server.js +246 -0
  140. package/dist/mcp-server.js.map +1 -0
  141. package/dist/mcp-tools/coverage-gaps.d.ts +13 -0
  142. package/dist/mcp-tools/coverage-gaps.d.ts.map +1 -0
  143. package/dist/mcp-tools/coverage-gaps.js +55 -0
  144. package/dist/mcp-tools/coverage-gaps.js.map +1 -0
  145. package/dist/mcp-tools/list-rules.d.ts +17 -0
  146. package/dist/mcp-tools/list-rules.d.ts.map +1 -0
  147. package/dist/mcp-tools/list-rules.js +45 -0
  148. package/dist/mcp-tools/list-rules.js.map +1 -0
  149. package/dist/mcp-tools/scan-skill.d.ts +17 -0
  150. package/dist/mcp-tools/scan-skill.d.ts.map +1 -0
  151. package/dist/mcp-tools/scan-skill.js +65 -0
  152. package/dist/mcp-tools/scan-skill.js.map +1 -0
  153. package/dist/mcp-tools/scan.d.ts +24 -0
  154. package/dist/mcp-tools/scan.d.ts.map +1 -0
  155. package/dist/mcp-tools/scan.js +94 -0
  156. package/dist/mcp-tools/scan.js.map +1 -0
  157. package/dist/mcp-tools/submit-proposal.d.ts +12 -0
  158. package/dist/mcp-tools/submit-proposal.d.ts.map +1 -0
  159. package/dist/mcp-tools/submit-proposal.js +103 -0
  160. package/dist/mcp-tools/submit-proposal.js.map +1 -0
  161. package/dist/mcp-tools/threat-summary.d.ts +12 -0
  162. package/dist/mcp-tools/threat-summary.d.ts.map +1 -0
  163. package/dist/mcp-tools/threat-summary.js +74 -0
  164. package/dist/mcp-tools/threat-summary.js.map +1 -0
  165. package/dist/mcp-tools/validate.d.ts +15 -0
  166. package/dist/mcp-tools/validate.d.ts.map +1 -0
  167. package/dist/mcp-tools/validate.js +51 -0
  168. package/dist/mcp-tools/validate.js.map +1 -0
  169. package/dist/modules/embedding.d.ts +71 -0
  170. package/dist/modules/embedding.d.ts.map +1 -0
  171. package/dist/modules/embedding.js +141 -0
  172. package/dist/modules/embedding.js.map +1 -0
  173. package/dist/modules/index.d.ts +144 -0
  174. package/dist/modules/index.d.ts.map +1 -0
  175. package/dist/modules/index.js +82 -0
  176. package/dist/modules/index.js.map +1 -0
  177. package/dist/modules/semantic.d.ts +106 -0
  178. package/dist/modules/semantic.d.ts.map +1 -0
  179. package/dist/modules/semantic.js +359 -0
  180. package/dist/modules/semantic.js.map +1 -0
  181. package/dist/modules/session.d.ts +70 -0
  182. package/dist/modules/session.d.ts.map +1 -0
  183. package/dist/modules/session.js +128 -0
  184. package/dist/modules/session.js.map +1 -0
  185. package/dist/quality/adapters/atr.d.ts +65 -0
  186. package/dist/quality/adapters/atr.d.ts.map +1 -0
  187. package/dist/quality/adapters/atr.js +154 -0
  188. package/dist/quality/adapters/atr.js.map +1 -0
  189. package/dist/quality/adapters/index.d.ts +10 -0
  190. package/dist/quality/adapters/index.d.ts.map +1 -0
  191. package/dist/quality/adapters/index.js +10 -0
  192. package/dist/quality/adapters/index.js.map +1 -0
  193. package/dist/quality/compute-confidence.d.ts +45 -0
  194. package/dist/quality/compute-confidence.d.ts.map +1 -0
  195. package/dist/quality/compute-confidence.js +133 -0
  196. package/dist/quality/compute-confidence.js.map +1 -0
  197. package/dist/quality/index.d.ts +36 -0
  198. package/dist/quality/index.d.ts.map +1 -0
  199. package/dist/quality/index.js +39 -0
  200. package/dist/quality/index.js.map +1 -0
  201. package/dist/quality/quality-gate.d.ts +86 -0
  202. package/dist/quality/quality-gate.d.ts.map +1 -0
  203. package/dist/quality/quality-gate.js +187 -0
  204. package/dist/quality/quality-gate.js.map +1 -0
  205. package/dist/quality/types.d.ts +129 -0
  206. package/dist/quality/types.d.ts.map +1 -0
  207. package/dist/quality/types.js +10 -0
  208. package/dist/quality/types.js.map +1 -0
  209. package/dist/quality/validate-maturity.d.ts +51 -0
  210. package/dist/quality/validate-maturity.d.ts.map +1 -0
  211. package/dist/quality/validate-maturity.js +134 -0
  212. package/dist/quality/validate-maturity.js.map +1 -0
  213. package/dist/quality.d.ts +8 -0
  214. package/dist/quality.d.ts.map +1 -0
  215. package/dist/quality.js +8 -0
  216. package/dist/quality.js.map +1 -0
  217. package/dist/rule-scaffolder.d.ts +53 -0
  218. package/dist/rule-scaffolder.d.ts.map +1 -0
  219. package/dist/rule-scaffolder.js +301 -0
  220. package/dist/rule-scaffolder.js.map +1 -0
  221. package/dist/session-tracker.d.ts +58 -0
  222. package/dist/session-tracker.d.ts.map +1 -0
  223. package/dist/session-tracker.js +176 -0
  224. package/dist/session-tracker.js.map +1 -0
  225. package/dist/shadow-evaluator.d.ts +48 -0
  226. package/dist/shadow-evaluator.d.ts.map +1 -0
  227. package/dist/shadow-evaluator.js +129 -0
  228. package/dist/shadow-evaluator.js.map +1 -0
  229. package/dist/skill-fingerprint.d.ts +85 -0
  230. package/dist/skill-fingerprint.d.ts.map +1 -0
  231. package/dist/skill-fingerprint.js +284 -0
  232. package/dist/skill-fingerprint.js.map +1 -0
  233. package/dist/tc-reporter.d.ts +50 -0
  234. package/dist/tc-reporter.d.ts.map +1 -0
  235. package/dist/tc-reporter.js +164 -0
  236. package/dist/tc-reporter.js.map +1 -0
  237. package/dist/tier0-invariant.d.ts +49 -0
  238. package/dist/tier0-invariant.d.ts.map +1 -0
  239. package/dist/tier0-invariant.js +185 -0
  240. package/dist/tier0-invariant.js.map +1 -0
  241. package/dist/tier1-blacklist.d.ts +48 -0
  242. package/dist/tier1-blacklist.d.ts.map +1 -0
  243. package/dist/tier1-blacklist.js +92 -0
  244. package/dist/tier1-blacklist.js.map +1 -0
  245. package/dist/types.d.ts +232 -0
  246. package/dist/types.d.ts.map +1 -0
  247. package/dist/types.js +6 -0
  248. package/dist/types.js.map +1 -0
  249. package/dist/verdict.d.ts +26 -0
  250. package/dist/verdict.d.ts.map +1 -0
  251. package/dist/verdict.js +127 -0
  252. package/dist/verdict.js.map +1 -0
  253. package/package.json +16 -4
  254. package/.github/ISSUE_TEMPLATE/evasion-report.yml +0 -75
  255. package/.github/ISSUE_TEMPLATE/false-positive.yml +0 -31
  256. package/.github/ISSUE_TEMPLATE/mirofish-prediction.yml +0 -128
  257. package/.github/ISSUE_TEMPLATE/new-rule.yml +0 -37
  258. package/.github/PULL_REQUEST_TEMPLATE.md +0 -23
  259. package/.github/workflows/rule-quality.yml +0 -203
  260. package/.github/workflows/validate.yml +0 -42
  261. package/CHANGELOG.md +0 -30
  262. package/CONTRIBUTING.md +0 -168
  263. package/CONTRIBUTORS.md +0 -28
  264. package/COVERAGE.md +0 -135
  265. package/LIMITATIONS.md +0 -154
  266. package/SECURITY.md +0 -48
  267. package/THREAT-MODEL.md +0 -243
  268. package/docs/contribution-paths.md +0 -202
  269. package/docs/mirofish-prediction-guide.md +0 -304
  270. package/docs/quick-start.md +0 -245
  271. package/docs/rule-writing-guide.md +0 -647
  272. package/docs/schema-spec.md +0 -594
  273. package/examples/how-to-write-a-rule.md +0 -251
  274. package/tsconfig.json +0 -17
@@ -0,0 +1 @@
1
+ {"version":3,"file":"skill-benchmark.d.ts","sourceRoot":"","sources":["../../src/eval/skill-benchmark.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AA8BH,UAAU,YAAY;IACpB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,KAAK,EAAE,WAAW,GAAG,QAAQ,CAAC;IACvC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC;IAC3B,QAAQ,CAAC,WAAW,EAAE,SAAS,MAAM,EAAE,CAAC;IACxC,QAAQ,CAAC,OAAO,EAAE,OAAO,CAAC;IAC1B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,sBAAsB,EAAE,OAAO,CAAC;IACzC,QAAQ,CAAC,gBAAgB,EAAE,OAAO,CAAC;CACpC;AAED,UAAU,YAAY;IACpB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;CACzB;AAED,UAAU,oBAAoB;IAC5B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,eAAe,EAAE,MAAM,CAAC;IACjC,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;IAChC,QAAQ,CAAC,iBAAiB,EAAE,MAAM,CAAC;IACnC,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,OAAO,EAAE,YAAY,CAAC;IAC/B,QAAQ,CAAC,OAAO,EAAE,YAAY,CAAC;IAC/B,QAAQ,CAAC,OAAO,EAAE,YAAY,CAAC;IAC/B,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;IAChC,QAAQ,CAAC,eAAe,EAAE,MAAM,CAAC;IACjC,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;IAChC,QAAQ,CAAC,eAAe,EAAE,MAAM,CAAC;IACjC,QAAQ,CAAC,uBAAuB,EAAE,MAAM,CAAC;IACzC,QAAQ,CAAC,iBAAiB,EAAE,MAAM,CAAC;IACnC,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;IAChC,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;IAChC,QAAQ,CAAC,OAAO,EAAE,SAAS,YAAY,EAAE,CAAC;IAC1C,QAAQ,CAAC,cAAc,EAAE,SAAS,YAAY,EAAE,CAAC;IACjD,QAAQ,CAAC,YAAY,EAAE,SAAS,YAAY,EAAE,CAAC;CAChD;AAMD,wBAAsB,iBAAiB,CAAC,OAAO,CAAC,EAAE;IAChD,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,SAAS,CAAC,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,UAAU,CAAC,EAAE,MAAM,CAAC;CAC9B,GAAG,OAAO,CAAC,oBAAoB,CAAC,CAuIhC;AAUD,wBAAgB,WAAW,CAAC,MAAM,EAAE,oBAAoB,GAAG,IAAI,CA6C9D"}
@@ -0,0 +1,194 @@
1
+ /**
2
+ * SKILL.md Benchmark Harness
3
+ *
4
+ * Evaluates the ATR scanSkill() method against a labeled corpus of
5
+ * malicious and benign SKILL.md files. Produces per-layer recall,
6
+ * overall precision, and a detailed per-sample report.
7
+ *
8
+ * Corpus: data/skill-benchmark/manifest.json
9
+ * Samples: data/skill-benchmark/malicious/ and data/skill-benchmark/benign/
10
+ *
11
+ * Layers:
12
+ * A = obvious payload (curl|bash, base64 exec, reverse shell)
13
+ * B = obfuscated (bash expansion, paste service relay, encoded)
14
+ * C = semantic (natural language instructions, social engineering)
15
+ *
16
+ * @module agent-threat-rules/eval/skill-benchmark
17
+ */
18
+ import { resolve, join } from 'node:path';
19
+ import { readFileSync, writeFileSync, existsSync } from 'node:fs';
20
+ import { ATREngine } from '../engine.js';
21
+ // ---------------------------------------------------------------------------
22
+ // Benchmark runner
23
+ // ---------------------------------------------------------------------------
24
+ export async function runSkillBenchmark(options) {
25
+ const repoRoot = resolve(import.meta.dirname, '..', '..');
26
+ const rulesDir = options?.rulesDir ?? join(repoRoot, 'rules');
27
+ const corpusDir = options?.corpusDir ?? join(repoRoot, 'data', 'skill-benchmark');
28
+ const outputPath = options?.outputPath ?? join(repoRoot, 'data', 'skill-benchmark', 'benchmark-report.json');
29
+ // Load manifest
30
+ const manifestPath = join(corpusDir, 'manifest.json');
31
+ if (!existsSync(manifestPath)) {
32
+ throw new Error(`Manifest not found: ${manifestPath}`);
33
+ }
34
+ const manifest = JSON.parse(readFileSync(manifestPath, 'utf-8'));
35
+ // Load expected findings (Cisco-style ground truth)
36
+ const expectedPath = join(corpusDir, 'expected-findings.json');
37
+ const expectedMap = existsSync(expectedPath)
38
+ ? JSON.parse(readFileSync(expectedPath, 'utf-8'))
39
+ : {};
40
+ // Load engine
41
+ const engine = new ATREngine({ rulesDir });
42
+ await engine.loadRules();
43
+ // Run each sample
44
+ const results = [];
45
+ for (const entry of manifest) {
46
+ const filePath = join(corpusDir, entry.file);
47
+ if (!existsSync(filePath)) {
48
+ console.error(`SKIP: file not found: ${entry.file}`);
49
+ continue;
50
+ }
51
+ const content = readFileSync(filePath, 'utf-8');
52
+ const start = performance.now();
53
+ const matches = engine.scanSkill(content);
54
+ const elapsed = performance.now() - start;
55
+ const detected = matches.length > 0;
56
+ const rulesFired = matches.map((m) => m.rule.id);
57
+ const correct = (entry.label === 'malicious' && detected) ||
58
+ (entry.label === 'benign' && !detected);
59
+ // Validate expected findings (Cisco-style category + rule matching)
60
+ const expected = expectedMap[entry.file];
61
+ const expectedRulesMatched = expected?.expected_findings
62
+ ? expected.expected_findings.every((ef) => rulesFired.includes(ef.rule_id))
63
+ : true; // no expected = pass by default
64
+ const expectedCategories = expected?.expected_findings?.map((ef) => ef.category) ?? [];
65
+ const actualCategories = matches.map((m) => m.rule.tags.category);
66
+ const categoryCorrect = expectedCategories.every((ec) => actualCategories.includes(ec));
67
+ results.push({
68
+ file: entry.file,
69
+ label: entry.label,
70
+ layer: entry.layer || '',
71
+ attack_type: entry.attack_type,
72
+ detected,
73
+ rules_fired: rulesFired,
74
+ correct,
75
+ latency_ms: Math.round(elapsed * 100) / 100,
76
+ expected_rules_matched: expectedRulesMatched,
77
+ category_correct: categoryCorrect,
78
+ });
79
+ }
80
+ // Compute metrics
81
+ const malicious = results.filter((r) => r.label === 'malicious');
82
+ const benign = results.filter((r) => r.label === 'benign');
83
+ const tp = malicious.filter((r) => r.detected).length;
84
+ const fn = malicious.filter((r) => !r.detected).length;
85
+ const fp = benign.filter((r) => r.detected).length;
86
+ const tn = benign.filter((r) => !r.detected).length;
87
+ const recall = malicious.length > 0 ? tp / malicious.length : 0;
88
+ const precision = (tp + fp) > 0 ? tp / (tp + fp) : 0;
89
+ const f1 = (precision + recall) > 0 ? 2 * (precision * recall) / (precision + recall) : 0;
90
+ const fpRate = benign.length > 0 ? fp / benign.length : 0;
91
+ // Per-layer metrics
92
+ const layerMetrics = (layer) => {
93
+ const samples = malicious.filter((r) => r.layer === layer);
94
+ const detected = samples.filter((r) => r.detected).length;
95
+ return {
96
+ total: samples.length,
97
+ detected,
98
+ recall: samples.length > 0 ? detected / samples.length : 0,
99
+ };
100
+ };
101
+ const latencies = results.map((r) => r.latency_ms);
102
+ // Expected findings accuracy
103
+ const detectedMalicious = malicious.filter((r) => r.detected);
104
+ const expectedRulesAccuracy = detectedMalicious.length > 0
105
+ ? detectedMalicious.filter((r) => r.expected_rules_matched).length / detectedMalicious.length
106
+ : 0;
107
+ const categoryAccuracy = detectedMalicious.length > 0
108
+ ? detectedMalicious.filter((r) => r.category_correct).length / detectedMalicious.length
109
+ : 0;
110
+ const report = {
111
+ timestamp: new Date().toISOString(),
112
+ corpus_size: results.length,
113
+ malicious_count: malicious.length,
114
+ benign_count: benign.length,
115
+ overall_recall: Math.round(recall * 1000) / 1000,
116
+ overall_precision: Math.round(precision * 1000) / 1000,
117
+ overall_f1: Math.round(f1 * 1000) / 1000,
118
+ fp_rate: Math.round(fpRate * 1000) / 1000,
119
+ layer_a: layerMetrics('A'),
120
+ layer_b: layerMetrics('B'),
121
+ layer_c: layerMetrics('C'),
122
+ true_positives: tp,
123
+ false_positives: fp,
124
+ true_negatives: tn,
125
+ false_negatives: fn,
126
+ expected_rules_accuracy: Math.round(expectedRulesAccuracy * 1000) / 1000,
127
+ category_accuracy: Math.round(categoryAccuracy * 1000) / 1000,
128
+ avg_latency_ms: Math.round((latencies.reduce((a, b) => a + b, 0) / latencies.length) * 100) / 100,
129
+ max_latency_ms: Math.round(Math.max(...latencies) * 100) / 100,
130
+ results,
131
+ missed_attacks: malicious.filter((r) => !r.detected),
132
+ false_alarms: benign.filter((r) => r.detected),
133
+ };
134
+ // Save report
135
+ writeFileSync(outputPath, JSON.stringify(report, null, 2));
136
+ return report;
137
+ }
138
+ // ---------------------------------------------------------------------------
139
+ // CLI runner
140
+ // ---------------------------------------------------------------------------
141
+ function formatPercent(n) {
142
+ return `${(n * 100).toFixed(1)}%`;
143
+ }
144
+ export function printReport(report) {
145
+ console.log('\n╔══════════════════════════════════════════════════╗');
146
+ console.log('║ SKILL.md BENCHMARK REPORT ║');
147
+ console.log('╚══════════════════════════════════════════════════╝\n');
148
+ console.log(`Corpus: ${report.corpus_size} samples (${report.malicious_count} malicious, ${report.benign_count} benign)`);
149
+ console.log(`Timestamp: ${report.timestamp}\n`);
150
+ console.log('┌─────────────────┬──────────┐');
151
+ console.log(`│ Overall Recall │ ${formatPercent(report.overall_recall).padStart(8)} │`);
152
+ console.log(`│ Overall Prec. │ ${formatPercent(report.overall_precision).padStart(8)} │`);
153
+ console.log(`│ F1 Score │ ${formatPercent(report.overall_f1).padStart(8)} │`);
154
+ console.log(`│ FP Rate │ ${formatPercent(report.fp_rate).padStart(8)} │`);
155
+ console.log('└─────────────────┴──────────┘\n');
156
+ console.log('Per-Layer Recall:');
157
+ console.log(` Layer A (obvious): ${formatPercent(report.layer_a.recall)} (${report.layer_a.detected}/${report.layer_a.total})`);
158
+ console.log(` Layer B (obfuscated): ${formatPercent(report.layer_b.recall)} (${report.layer_b.detected}/${report.layer_b.total})`);
159
+ console.log(` Layer C (semantic): ${formatPercent(report.layer_c.recall)} (${report.layer_c.detected}/${report.layer_c.total})\n`);
160
+ console.log('Finding Accuracy (Cisco-style):');
161
+ console.log(` Expected rules matched: ${formatPercent(report.expected_rules_accuracy)}`);
162
+ console.log(` Category accuracy: ${formatPercent(report.category_accuracy)}\n`);
163
+ console.log('Confusion Matrix:');
164
+ console.log(` TP: ${report.true_positives} FP: ${report.false_positives}`);
165
+ console.log(` FN: ${report.false_negatives} TN: ${report.true_negatives}\n`);
166
+ console.log(`Latency: avg ${report.avg_latency_ms}ms, max ${report.max_latency_ms}ms\n`);
167
+ if (report.missed_attacks.length > 0) {
168
+ console.log('MISSED ATTACKS:');
169
+ for (const m of report.missed_attacks) {
170
+ console.log(` ✗ [${m.layer}] ${m.file} (${m.attack_type})`);
171
+ }
172
+ console.log('');
173
+ }
174
+ if (report.false_alarms.length > 0) {
175
+ console.log('FALSE POSITIVES:');
176
+ for (const f of report.false_alarms) {
177
+ console.log(` ✗ ${f.file} → ${f.rules_fired.join(', ')}`);
178
+ }
179
+ console.log('');
180
+ }
181
+ }
182
+ // ---------------------------------------------------------------------------
183
+ // Direct execution
184
+ // ---------------------------------------------------------------------------
185
+ if (import.meta.url === `file://${process.argv[1]}` || process.argv[1]?.endsWith('skill-benchmark.ts')) {
186
+ runSkillBenchmark().then((report) => {
187
+ printReport(report);
188
+ console.log(`Report saved to: data/skill-benchmark/benchmark-report.json`);
189
+ }).catch((err) => {
190
+ console.error('Benchmark failed:', err);
191
+ process.exit(1);
192
+ });
193
+ }
194
+ //# sourceMappingURL=skill-benchmark.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"skill-benchmark.js","sourceRoot":"","sources":["../../src/eval/skill-benchmark.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAEH,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,YAAY,EAAE,aAAa,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AAClE,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAsEzC,8EAA8E;AAC9E,mBAAmB;AACnB,8EAA8E;AAE9E,MAAM,CAAC,KAAK,UAAU,iBAAiB,CAAC,OAIvC;IACC,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC;IAC1D,MAAM,QAAQ,GAAG,OAAO,EAAE,QAAQ,IAAI,IAAI,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAC9D,MAAM,SAAS,GAAG,OAAO,EAAE,SAAS,IAAI,IAAI,CAAC,QAAQ,EAAE,MAAM,EAAE,iBAAiB,CAAC,CAAC;IAClF,MAAM,UAAU,GAAG,OAAO,EAAE,UAAU,IAAI,IAAI,CAAC,QAAQ,EAAE,MAAM,EAAE,iBAAiB,EAAE,uBAAuB,CAAC,CAAC;IAE7G,gBAAgB;IAChB,MAAM,YAAY,GAAG,IAAI,CAAC,SAAS,EAAE,eAAe,CAAC,CAAC;IACtD,IAAI,CAAC,UAAU,CAAC,YAAY,CAAC,EAAE,CAAC;QAC9B,MAAM,IAAI,KAAK,CAAC,uBAAuB,YAAY,EAAE,CAAC,CAAC;IACzD,CAAC;IACD,MAAM,QAAQ,GAA6B,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC,CAAC;IAE3F,oDAAoD;IACpD,MAAM,YAAY,GAAG,IAAI,CAAC,SAAS,EAAE,wBAAwB,CAAC,CAAC;IAC/D,MAAM,WAAW,GAAkC,UAAU,CAAC,YAAY,CAAC;QACzE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC;QACjD,CAAC,CAAC,EAAE,CAAC;IAEP,cAAc;IACd,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC,EAAE,QAAQ,EAAE,CAAC,CAAC;IAC3C,MAAM,MAAM,CAAC,SAAS,EAAE,CAAC;IAEzB,kBAAkB;IAClB,MAAM,OAAO,GAAmB,EAAE,CAAC;IAEnC,KAAK,MAAM,KAAK,IAAI,QAAQ,EAAE,CAAC;QAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;QAC7C,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC1B,OAAO,CAAC,KAAK,CAAC,yBAAyB,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC;YACrD,SAAS;QACX,CAAC;QAED,MAAM,OAAO,GAAG,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QAChD,MAAM,KAAK,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;QAChC,MAAM,OAAO,GAAG,MAAM,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;QAC1C,MAAM,OAAO,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC;QAE1C,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC;QACpC,MAAM,UAAU,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAEjD,MAAM,OAAO,GACX,CAAC,KAAK,CAAC,KAAK,KAAK,WAAW,IAAI,QAAQ,CAAC;YACzC,CAAC,KAAK,CAAC,KAAK,KAAK,QAAQ,IAAI,CAAC,QAAQ,CAAC,CAAC;QAE1C,oEAAoE;QACpE,MAAM,QAAQ,GAAG,WAAW,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACzC,MAAM,oBAAoB,GAAG,QAAQ,EAAE,iBAAiB;YACtD,CAAC,CAAC,QAAQ,CAAC,iBAAiB,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC;YAC3E,CAAC,CAAC,IAAI,CAAC,CAAC,gCAAgC;QAC1C,MAAM,kBAAkB,GAAG,QAAQ,EAAE,iBAAiB,EAAE,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;QACvF,MAAM,gBAAgB,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAClE,MAAM,eAAe,GAAG,kBAAkB,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,EAAE,CACtD,gBAAgB,CAAC,QAAQ,CAAC,EAAqC,CAAC,CACjE,CAAC;QAEF,OAAO,CAAC,IAAI,CAAC;YACX,IAAI,EAAE,KAAK,CAAC,IAAI;YAChB,KAAK,EAAE,KAAK,CAAC,KAAK;YAClB,KAAK,EAAE,KAAK,CAAC,KAAK,IAAI,EAAE;YACxB,WAAW,EAAE,KAAK,CAAC,WAAW;YAC9B,QAAQ;YACR,WAAW,EAAE,UAAU;YACvB,OAAO;YACP,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,GAAG,CAAC,GAAG,GAAG;YAC3C,sBAAsB,EAAE,oBAAoB;YAC5C,gBAAgB,EAAE,eAAe;SAClC,CAAC,CAAC;IACL,CAAC;IAED,kBAAkB;IAClB,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,KAAK,WAAW,CAAC,CAAC;IACjE,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,KAAK,QAAQ,CAAC,CAAC;IAE3D,MAAM,EAAE,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC;IACtD,MAAM,EAAE,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC;IACvD,MAAM,EAAE,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC;IACnD,MAAM,EAAE,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC;IAEpD,MAAM,MAAM,GAAG,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;IAChE,MAAM,SAAS,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACrD,MAAM,EAAE,GAAG,CAAC,SAAS,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,GAAG,MAAM,CAAC,GAAG,CAAC,SAAS,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC1F,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;IAE1D,oBAAoB;IACpB,MAAM,YAAY,GAAG,CAAC,KAAa,EAAgB,EAAE;QACnD,MAAM,OAAO,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,KAAK,KAAK,CAAC,CAAC;QAC3D,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC;QAC1D,OAAO;YACL,KAAK,EAAE,OAAO,CAAC,MAAM;YACrB,QAAQ;YACR,MAAM,EAAE,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;SAC3D,CAAC;IACJ,CAAC,CAAC;IAEF,MAAM,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC;IAEnD,6BAA6B;IAC7B,MAAM,iBAAiB,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;IAC9D,MAAM,qBAAqB,GAAG,iBAAiB,CAAC,MAAM,GAAG,CAAC;QACxD,CAAC,CAAC,iBAAiB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,sBAAsB,CAAC,CAAC,MAAM,GAAG,iBAAiB,CAAC,MAAM;QAC7F,CAAC,CAAC,CAAC,CAAC;IACN,MAAM,gBAAgB,GAAG,iBAAiB,CAAC,MAAM,GAAG,CAAC;QACnD,CAAC,CAAC,iBAAiB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,gBAAgB,CAAC,CAAC,MAAM,GAAG,iBAAiB,CAAC,MAAM;QACvF,CAAC,CAAC,CAAC,CAAC;IAEN,MAAM,MAAM,GAAyB;QACnC,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,WAAW,EAAE,OAAO,CAAC,MAAM;QAC3B,eAAe,EAAE,SAAS,CAAC,MAAM;QACjC,YAAY,EAAE,MAAM,CAAC,MAAM;QAC3B,cAAc,EAAE,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,GAAG,IAAI;QAChD,iBAAiB,EAAE,IAAI,CAAC,KAAK,CAAC,SAAS,GAAG,IAAI,CAAC,GAAG,IAAI;QACtD,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,EAAE,GAAG,IAAI,CAAC,GAAG,IAAI;QACxC,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,GAAG,IAAI;QACzC,OAAO,EAAE,YAAY,CAAC,GAAG,CAAC;QAC1B,OAAO,EAAE,YAAY,CAAC,GAAG,CAAC;QAC1B,OAAO,EAAE,YAAY,CAAC,GAAG,CAAC;QAC1B,cAAc,EAAE,EAAE;QAClB,eAAe,EAAE,EAAE;QACnB,cAAc,EAAE,EAAE;QAClB,eAAe,EAAE,EAAE;QACnB,uBAAuB,EAAE,IAAI,CAAC,KAAK,CAAC,qBAAqB,GAAG,IAAI,CAAC,GAAG,IAAI;QACxE,iBAAiB,EAAE,IAAI,CAAC,KAAK,CAAC,gBAAgB,GAAG,IAAI,CAAC,GAAG,IAAI;QAC7D,cAAc,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,SAAS,CAAC,MAAM,CAAC,GAAG,GAAG,CAAC,GAAG,GAAG;QACjG,cAAc,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,SAAS,CAAC,GAAG,GAAG,CAAC,GAAG,GAAG;QAC9D,OAAO;QACP,cAAc,EAAE,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC;QACpD,YAAY,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;KAC/C,CAAC;IAEF,cAAc;IACd,aAAa,CAAC,UAAU,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IAE3D,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,8EAA8E;AAC9E,aAAa;AACb,8EAA8E;AAE9E,SAAS,aAAa,CAAC,CAAS;IAC9B,OAAO,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;AACpC,CAAC;AAED,MAAM,UAAU,WAAW,CAAC,MAA4B;IACtD,OAAO,CAAC,GAAG,CAAC,wDAAwD,CAAC,CAAC;IACtE,OAAO,CAAC,GAAG,CAAC,qDAAqD,CAAC,CAAC;IACnE,OAAO,CAAC,GAAG,CAAC,wDAAwD,CAAC,CAAC;IAEtE,OAAO,CAAC,GAAG,CAAC,WAAW,MAAM,CAAC,WAAW,aAAa,MAAM,CAAC,eAAe,eAAe,MAAM,CAAC,YAAY,UAAU,CAAC,CAAC;IAC1H,OAAO,CAAC,GAAG,CAAC,cAAc,MAAM,CAAC,SAAS,IAAI,CAAC,CAAC;IAEhD,OAAO,CAAC,GAAG,CAAC,gCAAgC,CAAC,CAAC;IAC9C,OAAO,CAAC,GAAG,CAAC,uBAAuB,aAAa,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IACzF,OAAO,CAAC,GAAG,CAAC,uBAAuB,aAAa,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAC5F,OAAO,CAAC,GAAG,CAAC,uBAAuB,aAAa,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IACrF,OAAO,CAAC,GAAG,CAAC,uBAAuB,aAAa,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAClF,OAAO,CAAC,GAAG,CAAC,kCAAkC,CAAC,CAAC;IAEhD,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IACjC,OAAO,CAAC,GAAG,CAAC,2BAA2B,aAAa,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,MAAM,CAAC,OAAO,CAAC,QAAQ,IAAI,MAAM,CAAC,OAAO,CAAC,KAAK,GAAG,CAAC,CAAC;IACpI,OAAO,CAAC,GAAG,CAAC,2BAA2B,aAAa,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,MAAM,CAAC,OAAO,CAAC,QAAQ,IAAI,MAAM,CAAC,OAAO,CAAC,KAAK,GAAG,CAAC,CAAC;IACpI,OAAO,CAAC,GAAG,CAAC,2BAA2B,aAAa,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,MAAM,CAAC,OAAO,CAAC,QAAQ,IAAI,MAAM,CAAC,OAAO,CAAC,KAAK,KAAK,CAAC,CAAC;IAEtI,OAAO,CAAC,GAAG,CAAC,iCAAiC,CAAC,CAAC;IAC/C,OAAO,CAAC,GAAG,CAAC,6BAA6B,aAAa,CAAC,MAAM,CAAC,uBAAuB,CAAC,EAAE,CAAC,CAAC;IAC1F,OAAO,CAAC,GAAG,CAAC,6BAA6B,aAAa,CAAC,MAAM,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC;IAEtF,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IACjC,OAAO,CAAC,GAAG,CAAC,SAAS,MAAM,CAAC,cAAc,SAAS,MAAM,CAAC,eAAe,EAAE,CAAC,CAAC;IAC7E,OAAO,CAAC,GAAG,CAAC,SAAS,MAAM,CAAC,eAAe,SAAS,MAAM,CAAC,cAAc,IAAI,CAAC,CAAC;IAE/E,OAAO,CAAC,GAAG,CAAC,gBAAgB,MAAM,CAAC,cAAc,WAAW,MAAM,CAAC,cAAc,MAAM,CAAC,CAAC;IAEzF,IAAI,MAAM,CAAC,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrC,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAC,CAAC;QAC/B,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,cAAc,EAAE,CAAC;YACtC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,WAAW,GAAG,CAAC,CAAC;QAC/D,CAAC;QACD,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAClB,CAAC;IAED,IAAI,MAAM,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACnC,OAAO,CAAC,GAAG,CAAC,kBAAkB,CAAC,CAAC;QAChC,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,YAAY,EAAE,CAAC;YACpC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,IAAI,MAAM,CAAC,CAAC,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC7D,CAAC;QACD,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAClB,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,mBAAmB;AACnB,8EAA8E;AAE9E,IAAI,MAAM,CAAC,IAAI,CAAC,GAAG,KAAK,UAAU,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,QAAQ,CAAC,oBAAoB,CAAC,EAAE,CAAC;IACvG,iBAAiB,EAAE,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,EAAE;QAClC,WAAW,CAAC,MAAM,CAAC,CAAC;QACpB,OAAO,CAAC,GAAG,CAAC,6DAA6D,CAAC,CAAC;IAC7E,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;QACf,OAAO,CAAC,KAAK,CAAC,mBAAmB,EAAE,GAAG,CAAC,CAAC;QACxC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC,CAAC,CAAC;AACL,CAAC"}
@@ -0,0 +1,54 @@
1
+ /**
2
+ * Flywheel Manager -- automates the threat detection → rule generation → promotion cycle.
3
+ *
4
+ * Flow:
5
+ * 1. Tier 4 (LLM) detects novel threat → auto-scaffold rule
6
+ * 2. Rule enters shadow mode → ShadowEvaluator tracks FP rate
7
+ * 3. FP < threshold after N evaluations → auto-promote to stable
8
+ * 4. Promoted rule distributes to all users via Threat Cloud
9
+ *
10
+ * Machine speed, not human speed. No manual proposals or voting required.
11
+ *
12
+ * @module agent-threat-rules/flywheel
13
+ */
14
+ import type { ATRRule, ATRMatch, AgentEvent } from './types.js';
15
+ import { type PromotionCandidate } from './shadow-evaluator.js';
16
+ export interface FlywheelConfig {
17
+ /** Max FP rate for auto-promotion (default: 0.001 = 0.1%) */
18
+ readonly maxFPRate?: number;
19
+ /** Minimum shadow evaluations before promotion (default: 1000) */
20
+ readonly minEvaluations?: number;
21
+ /** Callback when a rule is auto-promoted */
22
+ readonly onPromote?: (rule: ATRRule, stats: PromotionCandidate['stats']) => void | Promise<void>;
23
+ /** Callback when a new shadow rule is generated */
24
+ readonly onShadowRule?: (rule: ATRRule) => void | Promise<void>;
25
+ }
26
+ export declare class FlywheelManager {
27
+ private readonly scaffolder;
28
+ private readonly shadow;
29
+ private readonly config;
30
+ private readonly existingIds;
31
+ constructor(config?: FlywheelConfig);
32
+ /**
33
+ * Called when Tier 4 (LLM semantic) detects a novel threat.
34
+ * Auto-generates a shadow rule from the detection.
35
+ */
36
+ onTier4Detection(match: ATRMatch, event: AgentEvent): Promise<ATRRule | null>;
37
+ /**
38
+ * Called for every event -- runs shadow evaluation.
39
+ * Returns shadow matches (for logging only, not verdict).
40
+ */
41
+ evaluateShadow(event: AgentEvent): readonly ATRMatch[];
42
+ /** Record user feedback on a shadow match */
43
+ recordFeedback(ruleId: string, isTruePositive: boolean): void;
44
+ /**
45
+ * Check for rules ready to promote and execute promotion.
46
+ * Call periodically (e.g., every 15 minutes).
47
+ */
48
+ promoteReady(): Promise<readonly PromotionCandidate[]>;
49
+ /** Get shadow evaluator stats */
50
+ getShadowStats(): ReadonlyMap<string, unknown>;
51
+ /** Number of rules in shadow mode */
52
+ shadowRuleCount(): number;
53
+ }
54
+ //# sourceMappingURL=flywheel.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"flywheel.d.ts","sourceRoot":"","sources":["../src/flywheel.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,OAAO,KAAK,EAAE,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAEhE,OAAO,EAAmB,KAAK,kBAAkB,EAAE,MAAM,uBAAuB,CAAC;AAEjF,MAAM,WAAW,cAAc;IAC7B,6DAA6D;IAC7D,QAAQ,CAAC,SAAS,CAAC,EAAE,MAAM,CAAC;IAC5B,kEAAkE;IAClE,QAAQ,CAAC,cAAc,CAAC,EAAE,MAAM,CAAC;IACjC,4CAA4C;IAC5C,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,kBAAkB,CAAC,OAAO,CAAC,KAAK,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACjG,mDAAmD;IACnD,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC,IAAI,EAAE,OAAO,KAAK,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CACjE;AAED,qBAAa,eAAe;IAC1B,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAiB;IAC5C,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAkB;IACzC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAA2B;IAClD,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAqB;gBAErC,MAAM,GAAE,cAAmB;IAWvC;;;OAGG;IACG,gBAAgB,CAAC,KAAK,EAAE,QAAQ,EAAE,KAAK,EAAE,UAAU,GAAG,OAAO,CAAC,OAAO,GAAG,IAAI,CAAC;IAgEnF;;;OAGG;IACH,cAAc,CAAC,KAAK,EAAE,UAAU,GAAG,SAAS,QAAQ,EAAE;IAItD,6CAA6C;IAC7C,cAAc,CAAC,MAAM,EAAE,MAAM,EAAE,cAAc,EAAE,OAAO,GAAG,IAAI;IAI7D;;;OAGG;IACG,YAAY,IAAI,OAAO,CAAC,SAAS,kBAAkB,EAAE,CAAC;IAe5D,iCAAiC;IACjC,cAAc,IAAI,WAAW,CAAC,MAAM,EAAE,OAAO,CAAC;IAI9C,qCAAqC;IACrC,eAAe,IAAI,MAAM;CAG1B"}
@@ -0,0 +1,121 @@
1
+ /**
2
+ * Flywheel Manager -- automates the threat detection → rule generation → promotion cycle.
3
+ *
4
+ * Flow:
5
+ * 1. Tier 4 (LLM) detects novel threat → auto-scaffold rule
6
+ * 2. Rule enters shadow mode → ShadowEvaluator tracks FP rate
7
+ * 3. FP < threshold after N evaluations → auto-promote to stable
8
+ * 4. Promoted rule distributes to all users via Threat Cloud
9
+ *
10
+ * Machine speed, not human speed. No manual proposals or voting required.
11
+ *
12
+ * @module agent-threat-rules/flywheel
13
+ */
14
+ import { RuleScaffolder } from './rule-scaffolder.js';
15
+ import { ShadowEvaluator } from './shadow-evaluator.js';
16
+ export class FlywheelManager {
17
+ scaffolder;
18
+ shadow;
19
+ config;
20
+ existingIds = new Set();
21
+ constructor(config = {}) {
22
+ this.scaffolder = new RuleScaffolder({ author: 'ATR Flywheel (auto-generated)' });
23
+ this.shadow = new ShadowEvaluator();
24
+ this.config = {
25
+ maxFPRate: config.maxFPRate ?? 0.001,
26
+ minEvaluations: config.minEvaluations ?? 1000,
27
+ onPromote: config.onPromote ?? (() => { }),
28
+ onShadowRule: config.onShadowRule ?? (() => { }),
29
+ };
30
+ }
31
+ /**
32
+ * Called when Tier 4 (LLM semantic) detects a novel threat.
33
+ * Auto-generates a shadow rule from the detection.
34
+ */
35
+ async onTier4Detection(match, event) {
36
+ // Only generate from high-confidence Tier 4 matches
37
+ if (match.confidence < 0.7)
38
+ return null;
39
+ // Extract category and severity from the match
40
+ const category = match.rule.tags?.category ?? 'prompt-injection';
41
+ const severity = match.rule.severity ?? 'medium';
42
+ // Build example payloads from ATTACK PATTERNS, not just raw content.
43
+ // Priority: matched patterns > event fields > event content
44
+ const payloads = [];
45
+ // 1. Matched patterns from the Tier 4 detection — these ARE the attack signals
46
+ if (match.matchedPatterns.length > 0) {
47
+ payloads.push(...match.matchedPatterns.filter((p) => p.length > 5));
48
+ }
49
+ // 2. Event fields (tool_args, tool_response, etc.) — more specific than content
50
+ if (event.fields) {
51
+ for (const value of Object.values(event.fields)) {
52
+ if (value && value.length > 10) {
53
+ payloads.push(value.slice(0, 500));
54
+ }
55
+ }
56
+ }
57
+ // 3. Event content as fallback — but only if we don't have better signals
58
+ if (payloads.length === 0 && event.content) {
59
+ payloads.push(event.content.slice(0, 500));
60
+ }
61
+ // Ensure at least one payload
62
+ if (payloads.length === 0) {
63
+ payloads.push(match.rule.description ?? match.rule.title);
64
+ }
65
+ const input = {
66
+ title: `Auto: ${match.rule.description?.slice(0, 60) ?? match.rule.title}`,
67
+ category: category,
68
+ severity: severity,
69
+ attackDescription: match.rule.description ?? match.matchedPatterns.join('; '),
70
+ examplePayloads: payloads,
71
+ };
72
+ try {
73
+ const result = this.scaffolder.scaffold(input, this.existingIds);
74
+ const ruleYaml = result.yaml;
75
+ // Parse back to ATRRule object
76
+ const { default: yaml } = await import('js-yaml');
77
+ const rule = yaml.load(ruleYaml);
78
+ rule.status = 'experimental';
79
+ this.existingIds.add(result.id);
80
+ this.shadow.addRule(rule);
81
+ await this.config.onShadowRule(rule);
82
+ return rule;
83
+ }
84
+ catch {
85
+ return null;
86
+ }
87
+ }
88
+ /**
89
+ * Called for every event -- runs shadow evaluation.
90
+ * Returns shadow matches (for logging only, not verdict).
91
+ */
92
+ evaluateShadow(event) {
93
+ return this.shadow.evaluate(event);
94
+ }
95
+ /** Record user feedback on a shadow match */
96
+ recordFeedback(ruleId, isTruePositive) {
97
+ this.shadow.recordFeedback(ruleId, isTruePositive);
98
+ }
99
+ /**
100
+ * Check for rules ready to promote and execute promotion.
101
+ * Call periodically (e.g., every 15 minutes).
102
+ */
103
+ async promoteReady() {
104
+ const candidates = this.shadow.getPromotionCandidates(this.config.maxFPRate, this.config.minEvaluations);
105
+ for (const candidate of candidates) {
106
+ // Promote: change status from experimental to stable
107
+ const promoted = { ...candidate.rule, status: 'stable' };
108
+ await this.config.onPromote(promoted, candidate.stats);
109
+ }
110
+ return candidates;
111
+ }
112
+ /** Get shadow evaluator stats */
113
+ getShadowStats() {
114
+ return this.shadow.getAllStats();
115
+ }
116
+ /** Number of rules in shadow mode */
117
+ shadowRuleCount() {
118
+ return this.shadow.size();
119
+ }
120
+ }
121
+ //# sourceMappingURL=flywheel.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"flywheel.js","sourceRoot":"","sources":["../src/flywheel.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAGH,OAAO,EAAE,cAAc,EAAsB,MAAM,sBAAsB,CAAC;AAC1E,OAAO,EAAE,eAAe,EAA2B,MAAM,uBAAuB,CAAC;AAajF,MAAM,OAAO,eAAe;IACT,UAAU,CAAiB;IAC3B,MAAM,CAAkB;IACxB,MAAM,CAA2B;IACjC,WAAW,GAAG,IAAI,GAAG,EAAU,CAAC;IAEjD,YAAY,SAAyB,EAAE;QACrC,IAAI,CAAC,UAAU,GAAG,IAAI,cAAc,CAAC,EAAE,MAAM,EAAE,+BAA+B,EAAE,CAAC,CAAC;QAClF,IAAI,CAAC,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;QACpC,IAAI,CAAC,MAAM,GAAG;YACZ,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,KAAK;YACpC,cAAc,EAAE,MAAM,CAAC,cAAc,IAAI,IAAI;YAC7C,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC;YACzC,YAAY,EAAE,MAAM,CAAC,YAAY,IAAI,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC;SAChD,CAAC;IACJ,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,gBAAgB,CAAC,KAAe,EAAE,KAAiB;QACvD,oDAAoD;QACpD,IAAI,KAAK,CAAC,UAAU,GAAG,GAAG;YAAE,OAAO,IAAI,CAAC;QAExC,+CAA+C;QAC/C,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,QAAQ,IAAI,kBAAkB,CAAC;QACjE,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,IAAI,QAAQ,CAAC;QAEjD,qEAAqE;QACrE,4DAA4D;QAC5D,MAAM,QAAQ,GAAa,EAAE,CAAC;QAE9B,+EAA+E;QAC/E,IAAI,KAAK,CAAC,eAAe,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACrC,QAAQ,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;QACtE,CAAC;QAED,gFAAgF;QAChF,IAAI,KAAK,CAAC,MAAM,EAAE,CAAC;YACjB,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC;gBAChD,IAAI,KAAK,IAAI,KAAK,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;oBAC/B,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC;gBACrC,CAAC;YACH,CAAC;QACH,CAAC;QAED,0EAA0E;QAC1E,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,IAAI,KAAK,CAAC,OAAO,EAAE,CAAC;YAC3C,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC;QAC7C,CAAC;QAED,8BAA8B;QAC9B,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC1B,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,WAAW,IAAI,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC5D,CAAC;QAED,MAAM,KAAK,GAAkB;YAC3B,KAAK,EAAE,SAAS,KAAK,CAAC,IAAI,CAAC,WAAW,EAAE,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,KAAK,EAAE;YAC1E,QAAQ,EAAE,QAAqC;YAC/C,QAAQ,EAAE,QAAqC;YAC/C,iBAAiB,EAAE,KAAK,CAAC,IAAI,CAAC,WAAW,IAAI,KAAK,CAAC,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC;YAC7E,eAAe,EAAE,QAAQ;SAC1B,CAAC;QAEF,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,KAAK,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;YACjE,MAAM,QAAQ,GAAG,MAAM,CAAC,IAAI,CAAC;YAE7B,+BAA+B;YAC/B,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE,GAAG,MAAM,MAAM,CAAC,SAAS,CAAC,CAAC;YAClD,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAY,CAAC;YAC5C,IAAI,CAAC,MAAM,GAAG,cAAc,CAAC;YAE7B,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;YAChC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;YAE1B,MAAM,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;YAErC,OAAO,IAAI,CAAC;QACd,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED;;;OAGG;IACH,cAAc,CAAC,KAAiB;QAC9B,OAAO,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;IACrC,CAAC;IAED,6CAA6C;IAC7C,cAAc,CAAC,MAAc,EAAE,cAAuB;QACpD,IAAI,CAAC,MAAM,CAAC,cAAc,CAAC,MAAM,EAAE,cAAc,CAAC,CAAC;IACrD,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,YAAY;QAChB,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,sBAAsB,CACnD,IAAI,CAAC,MAAM,CAAC,SAAS,EACrB,IAAI,CAAC,MAAM,CAAC,cAAc,CAC3B,CAAC;QAEF,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;YACnC,qDAAqD;YACrD,MAAM,QAAQ,GAAG,EAAE,GAAG,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,QAAiB,EAAE,CAAC;YAClE,MAAM,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,QAAQ,EAAE,SAAS,CAAC,KAAK,CAAC,CAAC;QACzD,CAAC;QAED,OAAO,UAAU,CAAC;IACpB,CAAC;IAED,iCAAiC;IACjC,cAAc;QACZ,OAAO,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;IACnC,CAAC;IAED,qCAAqC;IACrC,eAAe;QACb,OAAO,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;IAC5B,CAAC;CACF"}
@@ -0,0 +1,61 @@
1
+ /**
2
+ * Hook Handler - Bridges Claude Code hooks to the ATR engine.
3
+ *
4
+ * Converts HookInput (PreToolUse/PostToolUse) into AgentEvents,
5
+ * evaluates them, and returns HookOutput for the agent host.
6
+ *
7
+ * Supports a stdio JSON-lines loop for use as a Claude Code hook process.
8
+ *
9
+ * CRITICAL: Fail-open on all errors -- default to "allow" so a
10
+ * bug in the guard never blocks legitimate agent operations.
11
+ *
12
+ * @module agent-threat-rules/hook-handler
13
+ */
14
+ import type { HookInput, HookOutput } from './types.js';
15
+ import type { ATREngine } from './engine.js';
16
+ import type { ActionExecutor } from './action-executor.js';
17
+ export interface HookHandlerConfig {
18
+ readonly engine: ATREngine;
19
+ readonly executor: ActionExecutor;
20
+ readonly timeoutMs?: number;
21
+ readonly failOpen?: boolean;
22
+ }
23
+ export declare class HookHandler {
24
+ private readonly engine;
25
+ private readonly executor;
26
+ private readonly timeoutMs;
27
+ private readonly failOpen;
28
+ constructor(config: HookHandlerConfig);
29
+ /**
30
+ * Handle a PreToolUse hook event.
31
+ * Converts input to an AgentEvent, evaluates, and returns a HookOutput.
32
+ */
33
+ handlePreToolUse(input: HookInput): Promise<HookOutput>;
34
+ /**
35
+ * Handle a PostToolUse hook event.
36
+ * Scans the tool output for threats.
37
+ */
38
+ handlePostToolUse(input: HookInput): Promise<HookOutput>;
39
+ /**
40
+ * Start a stdio JSON-lines loop.
41
+ *
42
+ * Reads one JSON object per line from stdin, dispatches to the
43
+ * appropriate handler, and writes one JSON line to stdout.
44
+ *
45
+ * Exits cleanly when stdin closes.
46
+ */
47
+ startStdioLoop(): Promise<void>;
48
+ /**
49
+ * Dispatch a HookInput to the appropriate handler.
50
+ */
51
+ private dispatch;
52
+ /**
53
+ * Evaluate an event with timeout and convert the verdict to HookOutput.
54
+ */
55
+ private evaluateAndRespond;
56
+ /**
57
+ * Handle errors with fail-open or fail-closed behavior.
58
+ */
59
+ private handleError;
60
+ }
61
+ //# sourceMappingURL=hook-handler.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"hook-handler.d.ts","sourceRoot":"","sources":["../src/hook-handler.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAGH,OAAO,KAAK,EAEV,SAAS,EACT,UAAU,EAEX,MAAM,YAAY,CAAC;AACpB,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAC7C,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AAK3D,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,MAAM,EAAE,SAAS,CAAC;IAC3B,QAAQ,CAAC,QAAQ,EAAE,cAAc,CAAC;IAClC,QAAQ,CAAC,SAAS,CAAC,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,QAAQ,CAAC,EAAE,OAAO,CAAC;CAC7B;AAmED,qBAAa,WAAW;IACtB,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAY;IACnC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAiB;IAC1C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAU;gBAEvB,MAAM,EAAE,iBAAiB;IAOrC;;;OAGG;IACG,gBAAgB,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAAC,UAAU,CAAC;IAS7D;;;OAGG;IACG,iBAAiB,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAAC,UAAU,CAAC;IAS9D;;;;;;;OAOG;IACG,cAAc,IAAI,OAAO,CAAC,IAAI,CAAC;IAuBrC;;OAEG;YACW,QAAQ;IAWtB;;OAEG;YACW,kBAAkB;IAoBhC;;OAEG;IACH,OAAO,CAAC,WAAW;CAapB"}
@@ -0,0 +1,178 @@
1
+ /**
2
+ * Hook Handler - Bridges Claude Code hooks to the ATR engine.
3
+ *
4
+ * Converts HookInput (PreToolUse/PostToolUse) into AgentEvents,
5
+ * evaluates them, and returns HookOutput for the agent host.
6
+ *
7
+ * Supports a stdio JSON-lines loop for use as a Claude Code hook process.
8
+ *
9
+ * CRITICAL: Fail-open on all errors -- default to "allow" so a
10
+ * bug in the guard never blocks legitimate agent operations.
11
+ *
12
+ * @module agent-threat-rules/hook-handler
13
+ */
14
+ import { createInterface } from 'node:readline';
15
+ /** Default evaluation timeout in milliseconds */
16
+ const DEFAULT_TIMEOUT_MS = 5_000;
17
+ /**
18
+ * Create an "allow" hook output, used as the safe default.
19
+ */
20
+ function allowOutput(reason) {
21
+ return Object.freeze({
22
+ decision: 'allow',
23
+ reason: reason ?? 'No threat detected.',
24
+ });
25
+ }
26
+ /**
27
+ * Convert a HookInput into an AgentEvent for engine evaluation.
28
+ */
29
+ function hookInputToEvent(input) {
30
+ const isPreTool = input.hook === 'PreToolUse';
31
+ const type = isPreTool ? 'tool_call' : 'tool_response';
32
+ const toolInput = input.tool_input ?? {};
33
+ const content = typeof toolInput['content'] === 'string'
34
+ ? toolInput['content']
35
+ : JSON.stringify(toolInput);
36
+ const fields = {
37
+ tool_name: input.tool_name ?? '',
38
+ tool_args: JSON.stringify(toolInput),
39
+ content,
40
+ };
41
+ // For PostToolUse, include output/response if present
42
+ if (!isPreTool) {
43
+ const output = toolInput['output'] ?? toolInput['response'];
44
+ if (typeof output === 'string') {
45
+ fields['tool_response'] = output;
46
+ }
47
+ }
48
+ return Object.freeze({
49
+ type,
50
+ timestamp: input.timestamp ?? new Date().toISOString(),
51
+ content,
52
+ fields: Object.freeze(fields),
53
+ sessionId: input.session_id,
54
+ });
55
+ }
56
+ /**
57
+ * Run a promise with a timeout. Resolves to the promise result
58
+ * or rejects with a timeout error.
59
+ */
60
+ function withTimeout(promise, ms) {
61
+ return new Promise((resolve, reject) => {
62
+ const timer = setTimeout(() => {
63
+ reject(new Error(`Evaluation timed out after ${ms}ms`));
64
+ }, ms);
65
+ promise.then((value) => { clearTimeout(timer); resolve(value); }, (err) => { clearTimeout(timer); reject(err); });
66
+ });
67
+ }
68
+ export class HookHandler {
69
+ engine;
70
+ executor;
71
+ timeoutMs;
72
+ failOpen;
73
+ constructor(config) {
74
+ this.engine = config.engine;
75
+ this.executor = config.executor;
76
+ this.timeoutMs = config.timeoutMs ?? DEFAULT_TIMEOUT_MS;
77
+ this.failOpen = config.failOpen ?? true;
78
+ }
79
+ /**
80
+ * Handle a PreToolUse hook event.
81
+ * Converts input to an AgentEvent, evaluates, and returns a HookOutput.
82
+ */
83
+ async handlePreToolUse(input) {
84
+ try {
85
+ const event = hookInputToEvent(input);
86
+ return await this.evaluateAndRespond(event);
87
+ }
88
+ catch (err) {
89
+ return this.handleError(err);
90
+ }
91
+ }
92
+ /**
93
+ * Handle a PostToolUse hook event.
94
+ * Scans the tool output for threats.
95
+ */
96
+ async handlePostToolUse(input) {
97
+ try {
98
+ const event = hookInputToEvent(input);
99
+ return await this.evaluateAndRespond(event);
100
+ }
101
+ catch (err) {
102
+ return this.handleError(err);
103
+ }
104
+ }
105
+ /**
106
+ * Start a stdio JSON-lines loop.
107
+ *
108
+ * Reads one JSON object per line from stdin, dispatches to the
109
+ * appropriate handler, and writes one JSON line to stdout.
110
+ *
111
+ * Exits cleanly when stdin closes.
112
+ */
113
+ async startStdioLoop() {
114
+ const rl = createInterface({
115
+ input: process.stdin,
116
+ crlfDelay: Infinity,
117
+ });
118
+ for await (const line of rl) {
119
+ const trimmed = line.trim();
120
+ if (!trimmed)
121
+ continue;
122
+ let output;
123
+ try {
124
+ const input = JSON.parse(trimmed);
125
+ output = await this.dispatch(input);
126
+ }
127
+ catch (err) {
128
+ output = this.handleError(err);
129
+ }
130
+ process.stdout.write(JSON.stringify(output) + '\n');
131
+ }
132
+ }
133
+ /**
134
+ * Dispatch a HookInput to the appropriate handler.
135
+ */
136
+ async dispatch(input) {
137
+ switch (input.hook) {
138
+ case 'PreToolUse':
139
+ return this.handlePreToolUse(input);
140
+ case 'PostToolUse':
141
+ return this.handlePostToolUse(input);
142
+ default:
143
+ return allowOutput(`Unknown hook type: ${String(input.hook)}`);
144
+ }
145
+ }
146
+ /**
147
+ * Evaluate an event with timeout and convert the verdict to HookOutput.
148
+ */
149
+ async evaluateAndRespond(event) {
150
+ const { verdict } = await withTimeout(this.engine.evaluateWithVerdict(event, this.executor), this.timeoutMs);
151
+ const matchedRules = verdict.matches.map((m) => m.rule.id);
152
+ return Object.freeze({
153
+ decision: verdict.outcome,
154
+ reason: verdict.reason,
155
+ message: verdict.outcome === 'deny'
156
+ ? `Blocked: ${verdict.reason}`
157
+ : undefined,
158
+ matched_rules: matchedRules.length > 0
159
+ ? Object.freeze(matchedRules)
160
+ : undefined,
161
+ });
162
+ }
163
+ /**
164
+ * Handle errors with fail-open or fail-closed behavior.
165
+ */
166
+ handleError(err) {
167
+ const message = err instanceof Error ? err.message : String(err);
168
+ process.stderr.write(`[atr-guard] Error: ${message}\n`);
169
+ if (this.failOpen) {
170
+ return allowOutput(`Guard error (fail-open): ${message}`);
171
+ }
172
+ return Object.freeze({
173
+ decision: 'deny',
174
+ reason: `Guard error (fail-closed): ${message}`,
175
+ });
176
+ }
177
+ }
178
+ //# sourceMappingURL=hook-handler.js.map