@panguard-ai/atr 1.4.3 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (274) hide show
  1. package/dist/action-executor.d.ts +44 -0
  2. package/dist/action-executor.d.ts.map +1 -0
  3. package/dist/action-executor.js +130 -0
  4. package/dist/action-executor.js.map +1 -0
  5. package/dist/adapters/default-adapter.d.ts +24 -0
  6. package/dist/adapters/default-adapter.d.ts.map +1 -0
  7. package/dist/adapters/default-adapter.js +51 -0
  8. package/dist/adapters/default-adapter.js.map +1 -0
  9. package/dist/adapters/stdio-adapter.d.ts +30 -0
  10. package/dist/adapters/stdio-adapter.d.ts.map +1 -0
  11. package/dist/adapters/stdio-adapter.js +128 -0
  12. package/dist/adapters/stdio-adapter.js.map +1 -0
  13. package/dist/badge.d.ts +42 -0
  14. package/dist/badge.d.ts.map +1 -0
  15. package/dist/badge.js +163 -0
  16. package/dist/badge.js.map +1 -0
  17. package/dist/capability-extractor.d.ts +35 -0
  18. package/dist/capability-extractor.d.ts.map +1 -0
  19. package/dist/capability-extractor.js +91 -0
  20. package/dist/capability-extractor.js.map +1 -0
  21. package/dist/cli/scan-handler.d.ts +21 -0
  22. package/dist/cli/scan-handler.d.ts.map +1 -0
  23. package/dist/cli/scan-handler.js +276 -0
  24. package/dist/cli/scan-handler.js.map +1 -0
  25. package/dist/cli/tc-pipeline.d.ts +18 -0
  26. package/dist/cli/tc-pipeline.d.ts.map +1 -0
  27. package/dist/cli/tc-pipeline.js +295 -0
  28. package/dist/cli/tc-pipeline.js.map +1 -0
  29. package/dist/cli.d.ts +12 -0
  30. package/dist/cli.d.ts.map +1 -0
  31. package/dist/cli.js +894 -0
  32. package/dist/cli.js.map +1 -0
  33. package/dist/content-hash.d.ts +7 -0
  34. package/dist/content-hash.d.ts.map +1 -0
  35. package/dist/content-hash.js +10 -0
  36. package/dist/content-hash.js.map +1 -0
  37. package/dist/converters/elastic.d.ts +36 -0
  38. package/dist/converters/elastic.d.ts.map +1 -0
  39. package/dist/converters/elastic.js +125 -0
  40. package/dist/converters/elastic.js.map +1 -0
  41. package/dist/converters/generic-regex.d.ts +37 -0
  42. package/dist/converters/generic-regex.d.ts.map +1 -0
  43. package/dist/converters/generic-regex.js +59 -0
  44. package/dist/converters/generic-regex.js.map +1 -0
  45. package/dist/converters/index.d.ts +32 -0
  46. package/dist/converters/index.d.ts.map +1 -0
  47. package/dist/converters/index.js +38 -0
  48. package/dist/converters/index.js.map +1 -0
  49. package/dist/converters/sarif.d.ts +18 -0
  50. package/dist/converters/sarif.d.ts.map +1 -0
  51. package/dist/converters/sarif.js +142 -0
  52. package/dist/converters/sarif.js.map +1 -0
  53. package/dist/converters/splunk.d.ts +19 -0
  54. package/dist/converters/splunk.d.ts.map +1 -0
  55. package/dist/converters/splunk.js +148 -0
  56. package/dist/converters/splunk.js.map +1 -0
  57. package/dist/coverage-analyzer.d.ts +43 -0
  58. package/dist/coverage-analyzer.d.ts.map +1 -0
  59. package/dist/coverage-analyzer.js +329 -0
  60. package/dist/coverage-analyzer.js.map +1 -0
  61. package/dist/embedding/build-corpus.d.ts +15 -0
  62. package/dist/embedding/build-corpus.d.ts.map +1 -0
  63. package/dist/embedding/build-corpus.js +105 -0
  64. package/dist/embedding/build-corpus.js.map +1 -0
  65. package/dist/embedding/model-loader.d.ts +41 -0
  66. package/dist/embedding/model-loader.d.ts.map +1 -0
  67. package/dist/embedding/model-loader.js +90 -0
  68. package/dist/embedding/model-loader.js.map +1 -0
  69. package/dist/embedding/vector-store.d.ts +41 -0
  70. package/dist/embedding/vector-store.d.ts.map +1 -0
  71. package/dist/embedding/vector-store.js +70 -0
  72. package/dist/embedding/vector-store.js.map +1 -0
  73. package/dist/engine.d.ts +222 -0
  74. package/dist/engine.d.ts.map +1 -0
  75. package/dist/engine.js +1185 -0
  76. package/dist/engine.js.map +1 -0
  77. package/dist/eval/corpus.d.ts +42 -0
  78. package/dist/eval/corpus.d.ts.map +1 -0
  79. package/dist/eval/corpus.js +427 -0
  80. package/dist/eval/corpus.js.map +1 -0
  81. package/dist/eval/eval-harness.d.ts +44 -0
  82. package/dist/eval/eval-harness.d.ts.map +1 -0
  83. package/dist/eval/eval-harness.js +296 -0
  84. package/dist/eval/eval-harness.js.map +1 -0
  85. package/dist/eval/index.d.ts +13 -0
  86. package/dist/eval/index.d.ts.map +1 -0
  87. package/dist/eval/index.js +9 -0
  88. package/dist/eval/index.js.map +1 -0
  89. package/dist/eval/metrics.d.ts +74 -0
  90. package/dist/eval/metrics.d.ts.map +1 -0
  91. package/dist/eval/metrics.js +108 -0
  92. package/dist/eval/metrics.js.map +1 -0
  93. package/dist/eval/pint-corpus.d.ts +34 -0
  94. package/dist/eval/pint-corpus.d.ts.map +1 -0
  95. package/dist/eval/pint-corpus.js +113 -0
  96. package/dist/eval/pint-corpus.js.map +1 -0
  97. package/dist/eval/rule-corpus.d.ts +9 -0
  98. package/dist/eval/rule-corpus.d.ts.map +1 -0
  99. package/dist/eval/rule-corpus.js +4780 -0
  100. package/dist/eval/rule-corpus.js.map +1 -0
  101. package/dist/eval/rule-metrics.d.ts +34 -0
  102. package/dist/eval/rule-metrics.d.ts.map +1 -0
  103. package/dist/eval/rule-metrics.js +92 -0
  104. package/dist/eval/rule-metrics.js.map +1 -0
  105. package/dist/eval/run-eval.d.ts +7 -0
  106. package/dist/eval/run-eval.d.ts.map +1 -0
  107. package/dist/eval/run-eval.js +11 -0
  108. package/dist/eval/run-eval.js.map +1 -0
  109. package/dist/eval/run-pint-benchmark.d.ts +18 -0
  110. package/dist/eval/run-pint-benchmark.d.ts.map +1 -0
  111. package/dist/eval/run-pint-benchmark.js +159 -0
  112. package/dist/eval/run-pint-benchmark.js.map +1 -0
  113. package/dist/eval/skill-benchmark.d.ts +66 -0
  114. package/dist/eval/skill-benchmark.d.ts.map +1 -0
  115. package/dist/eval/skill-benchmark.js +194 -0
  116. package/dist/eval/skill-benchmark.js.map +1 -0
  117. package/dist/flywheel.d.ts +54 -0
  118. package/dist/flywheel.d.ts.map +1 -0
  119. package/dist/flywheel.js +121 -0
  120. package/dist/flywheel.js.map +1 -0
  121. package/dist/hook-handler.d.ts +61 -0
  122. package/dist/hook-handler.d.ts.map +1 -0
  123. package/dist/hook-handler.js +178 -0
  124. package/dist/hook-handler.js.map +1 -0
  125. package/dist/index.d.ts +8 -0
  126. package/dist/index.d.ts.map +1 -0
  127. package/{src/index.ts → dist/index.js} +1 -0
  128. package/dist/index.js.map +1 -0
  129. package/dist/layer-integration.d.ts +55 -0
  130. package/dist/layer-integration.d.ts.map +1 -0
  131. package/dist/layer-integration.js +187 -0
  132. package/dist/layer-integration.js.map +1 -0
  133. package/dist/loader.d.ts +18 -0
  134. package/dist/loader.d.ts.map +1 -0
  135. package/dist/loader.js +129 -0
  136. package/dist/loader.js.map +1 -0
  137. package/dist/mcp-server.d.ts +13 -0
  138. package/dist/mcp-server.d.ts.map +1 -0
  139. package/dist/mcp-server.js +246 -0
  140. package/dist/mcp-server.js.map +1 -0
  141. package/dist/mcp-tools/coverage-gaps.d.ts +13 -0
  142. package/dist/mcp-tools/coverage-gaps.d.ts.map +1 -0
  143. package/dist/mcp-tools/coverage-gaps.js +55 -0
  144. package/dist/mcp-tools/coverage-gaps.js.map +1 -0
  145. package/dist/mcp-tools/list-rules.d.ts +17 -0
  146. package/dist/mcp-tools/list-rules.d.ts.map +1 -0
  147. package/dist/mcp-tools/list-rules.js +45 -0
  148. package/dist/mcp-tools/list-rules.js.map +1 -0
  149. package/dist/mcp-tools/scan-skill.d.ts +17 -0
  150. package/dist/mcp-tools/scan-skill.d.ts.map +1 -0
  151. package/dist/mcp-tools/scan-skill.js +65 -0
  152. package/dist/mcp-tools/scan-skill.js.map +1 -0
  153. package/dist/mcp-tools/scan.d.ts +24 -0
  154. package/dist/mcp-tools/scan.d.ts.map +1 -0
  155. package/dist/mcp-tools/scan.js +94 -0
  156. package/dist/mcp-tools/scan.js.map +1 -0
  157. package/dist/mcp-tools/submit-proposal.d.ts +12 -0
  158. package/dist/mcp-tools/submit-proposal.d.ts.map +1 -0
  159. package/dist/mcp-tools/submit-proposal.js +103 -0
  160. package/dist/mcp-tools/submit-proposal.js.map +1 -0
  161. package/dist/mcp-tools/threat-summary.d.ts +12 -0
  162. package/dist/mcp-tools/threat-summary.d.ts.map +1 -0
  163. package/dist/mcp-tools/threat-summary.js +74 -0
  164. package/dist/mcp-tools/threat-summary.js.map +1 -0
  165. package/dist/mcp-tools/validate.d.ts +15 -0
  166. package/dist/mcp-tools/validate.d.ts.map +1 -0
  167. package/dist/mcp-tools/validate.js +51 -0
  168. package/dist/mcp-tools/validate.js.map +1 -0
  169. package/dist/modules/embedding.d.ts +71 -0
  170. package/dist/modules/embedding.d.ts.map +1 -0
  171. package/dist/modules/embedding.js +141 -0
  172. package/dist/modules/embedding.js.map +1 -0
  173. package/dist/modules/index.d.ts +144 -0
  174. package/dist/modules/index.d.ts.map +1 -0
  175. package/dist/modules/index.js +82 -0
  176. package/dist/modules/index.js.map +1 -0
  177. package/dist/modules/semantic.d.ts +106 -0
  178. package/dist/modules/semantic.d.ts.map +1 -0
  179. package/dist/modules/semantic.js +359 -0
  180. package/dist/modules/semantic.js.map +1 -0
  181. package/dist/modules/session.d.ts +70 -0
  182. package/dist/modules/session.d.ts.map +1 -0
  183. package/dist/modules/session.js +128 -0
  184. package/dist/modules/session.js.map +1 -0
  185. package/dist/quality/adapters/atr.d.ts +65 -0
  186. package/dist/quality/adapters/atr.d.ts.map +1 -0
  187. package/dist/quality/adapters/atr.js +154 -0
  188. package/dist/quality/adapters/atr.js.map +1 -0
  189. package/dist/quality/adapters/index.d.ts +10 -0
  190. package/dist/quality/adapters/index.d.ts.map +1 -0
  191. package/dist/quality/adapters/index.js +10 -0
  192. package/dist/quality/adapters/index.js.map +1 -0
  193. package/dist/quality/compute-confidence.d.ts +45 -0
  194. package/dist/quality/compute-confidence.d.ts.map +1 -0
  195. package/dist/quality/compute-confidence.js +133 -0
  196. package/dist/quality/compute-confidence.js.map +1 -0
  197. package/dist/quality/index.d.ts +36 -0
  198. package/dist/quality/index.d.ts.map +1 -0
  199. package/dist/quality/index.js +39 -0
  200. package/dist/quality/index.js.map +1 -0
  201. package/dist/quality/quality-gate.d.ts +86 -0
  202. package/dist/quality/quality-gate.d.ts.map +1 -0
  203. package/dist/quality/quality-gate.js +187 -0
  204. package/dist/quality/quality-gate.js.map +1 -0
  205. package/dist/quality/types.d.ts +129 -0
  206. package/dist/quality/types.d.ts.map +1 -0
  207. package/dist/quality/types.js +10 -0
  208. package/dist/quality/types.js.map +1 -0
  209. package/dist/quality/validate-maturity.d.ts +51 -0
  210. package/dist/quality/validate-maturity.d.ts.map +1 -0
  211. package/dist/quality/validate-maturity.js +134 -0
  212. package/dist/quality/validate-maturity.js.map +1 -0
  213. package/dist/quality.d.ts +8 -0
  214. package/dist/quality.d.ts.map +1 -0
  215. package/dist/quality.js +8 -0
  216. package/dist/quality.js.map +1 -0
  217. package/dist/rule-scaffolder.d.ts +53 -0
  218. package/dist/rule-scaffolder.d.ts.map +1 -0
  219. package/dist/rule-scaffolder.js +301 -0
  220. package/dist/rule-scaffolder.js.map +1 -0
  221. package/dist/session-tracker.d.ts +58 -0
  222. package/dist/session-tracker.d.ts.map +1 -0
  223. package/dist/session-tracker.js +176 -0
  224. package/dist/session-tracker.js.map +1 -0
  225. package/dist/shadow-evaluator.d.ts +48 -0
  226. package/dist/shadow-evaluator.d.ts.map +1 -0
  227. package/dist/shadow-evaluator.js +129 -0
  228. package/dist/shadow-evaluator.js.map +1 -0
  229. package/dist/skill-fingerprint.d.ts +85 -0
  230. package/dist/skill-fingerprint.d.ts.map +1 -0
  231. package/dist/skill-fingerprint.js +284 -0
  232. package/dist/skill-fingerprint.js.map +1 -0
  233. package/dist/tc-reporter.d.ts +50 -0
  234. package/dist/tc-reporter.d.ts.map +1 -0
  235. package/dist/tc-reporter.js +164 -0
  236. package/dist/tc-reporter.js.map +1 -0
  237. package/dist/tier0-invariant.d.ts +49 -0
  238. package/dist/tier0-invariant.d.ts.map +1 -0
  239. package/dist/tier0-invariant.js +185 -0
  240. package/dist/tier0-invariant.js.map +1 -0
  241. package/dist/tier1-blacklist.d.ts +48 -0
  242. package/dist/tier1-blacklist.d.ts.map +1 -0
  243. package/dist/tier1-blacklist.js +92 -0
  244. package/dist/tier1-blacklist.js.map +1 -0
  245. package/dist/types.d.ts +232 -0
  246. package/dist/types.d.ts.map +1 -0
  247. package/dist/types.js +6 -0
  248. package/dist/types.js.map +1 -0
  249. package/dist/verdict.d.ts +26 -0
  250. package/dist/verdict.d.ts.map +1 -0
  251. package/dist/verdict.js +127 -0
  252. package/dist/verdict.js.map +1 -0
  253. package/package.json +16 -4
  254. package/.github/ISSUE_TEMPLATE/evasion-report.yml +0 -75
  255. package/.github/ISSUE_TEMPLATE/false-positive.yml +0 -31
  256. package/.github/ISSUE_TEMPLATE/mirofish-prediction.yml +0 -128
  257. package/.github/ISSUE_TEMPLATE/new-rule.yml +0 -37
  258. package/.github/PULL_REQUEST_TEMPLATE.md +0 -23
  259. package/.github/workflows/rule-quality.yml +0 -203
  260. package/.github/workflows/validate.yml +0 -42
  261. package/CHANGELOG.md +0 -30
  262. package/CONTRIBUTING.md +0 -168
  263. package/CONTRIBUTORS.md +0 -28
  264. package/COVERAGE.md +0 -135
  265. package/LIMITATIONS.md +0 -154
  266. package/SECURITY.md +0 -48
  267. package/THREAT-MODEL.md +0 -243
  268. package/docs/contribution-paths.md +0 -202
  269. package/docs/mirofish-prediction-guide.md +0 -304
  270. package/docs/quick-start.md +0 -245
  271. package/docs/rule-writing-guide.md +0 -647
  272. package/docs/schema-spec.md +0 -594
  273. package/examples/how-to-write-a-rule.md +0 -251
  274. package/tsconfig.json +0 -17
@@ -0,0 +1,296 @@
1
+ /**
2
+ * Evaluation Harness -- orchestrates running the corpus through the ATR engine
3
+ * and produces a structured EvalReport.
4
+ *
5
+ * Supports:
6
+ * - Regex-only evaluation (Tier 2)
7
+ * - Regex + Embedding evaluation (Tier 2 + 2.5)
8
+ * - Full pipeline evaluation (all tiers)
9
+ * - Per-sample latency measurement
10
+ * - Regression check against baseline thresholds
11
+ *
12
+ * @module agent-threat-rules/eval/eval-harness
13
+ */
14
+ import { resolve, join } from 'node:path';
15
+ import { existsSync, readFileSync, writeFileSync } from 'node:fs';
16
+ import { ATREngine } from '../engine.js';
17
+ import { EmbeddingModule } from '../modules/embedding.js';
18
+ import { EVAL_CORPUS, getCorpusStats } from './corpus.js';
19
+ import { computeEvalReport, checkRegression } from './metrics.js';
20
+ import { computeRuleQuality } from './rule-metrics.js';
21
+ /**
22
+ * Convert a corpus sample to an AgentEvent.
23
+ */
24
+ function sampleToEvent(sample) {
25
+ return {
26
+ type: sample.eventType,
27
+ content: sample.text,
28
+ timestamp: new Date().toISOString(),
29
+ fields: sample.fields,
30
+ };
31
+ }
32
+ /**
33
+ * Run a single sample through the engine (regex only) and measure results.
34
+ * Catches engine errors so a single bad sample doesn't abort the entire eval.
35
+ */
36
+ function evaluateSampleRegex(engine, sample) {
37
+ const event = sampleToEvent(sample);
38
+ try {
39
+ const start = performance.now();
40
+ const matches = engine.evaluate(event);
41
+ const latencyMs = performance.now() - start;
42
+ const detected = matches.length > 0;
43
+ const topMatch = matches[0];
44
+ return {
45
+ id: sample.id,
46
+ category: sample.category,
47
+ expectedDetection: sample.expectedDetection,
48
+ actualDetection: detected,
49
+ matchedRules: matches.map((m) => m.rule.id),
50
+ confidence: topMatch?.confidence ?? 0,
51
+ latencyMs,
52
+ difficulty: sample.difficulty,
53
+ tier: sample.tier,
54
+ };
55
+ }
56
+ catch {
57
+ return {
58
+ id: sample.id,
59
+ category: sample.category,
60
+ expectedDetection: sample.expectedDetection,
61
+ actualDetection: false,
62
+ matchedRules: [],
63
+ confidence: 0,
64
+ latencyMs: 0,
65
+ difficulty: sample.difficulty,
66
+ tier: sample.tier,
67
+ };
68
+ }
69
+ }
70
+ /**
71
+ * Run a single sample through the full pipeline (regex + embedding) and measure results.
72
+ */
73
+ async function evaluateSampleFull(engine, sample) {
74
+ const event = sampleToEvent(sample);
75
+ try {
76
+ const start = performance.now();
77
+ const { verdict } = await engine.evaluateWithVerdict(event);
78
+ const latencyMs = performance.now() - start;
79
+ const detected = verdict.matchCount > 0;
80
+ return {
81
+ id: sample.id,
82
+ category: sample.category,
83
+ expectedDetection: sample.expectedDetection,
84
+ actualDetection: detected,
85
+ matchedRules: verdict.matches.map((m) => m.rule.id),
86
+ confidence: verdict.highestConfidence,
87
+ latencyMs,
88
+ difficulty: sample.difficulty,
89
+ tier: sample.tier,
90
+ };
91
+ }
92
+ catch {
93
+ return {
94
+ id: sample.id,
95
+ category: sample.category,
96
+ expectedDetection: sample.expectedDetection,
97
+ actualDetection: false,
98
+ matchedRules: [],
99
+ confidence: 0,
100
+ latencyMs: 0,
101
+ difficulty: sample.difficulty,
102
+ tier: sample.tier,
103
+ };
104
+ }
105
+ }
106
+ /**
107
+ * Try to load the embedding module. Returns null if unavailable.
108
+ */
109
+ async function tryLoadEmbedding(embeddingsPath) {
110
+ if (!existsSync(embeddingsPath))
111
+ return null;
112
+ try {
113
+ const data = JSON.parse(readFileSync(embeddingsPath, 'utf-8'));
114
+ const module = new EmbeddingModule({
115
+ attackVectorsData: data,
116
+ similarityThreshold: 0.65,
117
+ });
118
+ await module.initialize();
119
+ return module.isAvailable() ? module : null;
120
+ }
121
+ catch {
122
+ return null;
123
+ }
124
+ }
125
+ /**
126
+ * Run the full evaluation harness.
127
+ * Returns the EvalReport and RegressionCheck.
128
+ */
129
+ export async function runEval(config) {
130
+ const corpus = config.corpus ?? EVAL_CORPUS;
131
+ const base = resolve(config.rulesDir, '..');
132
+ const embeddingsPath = config.embeddingsPath ?? join(base, 'data', 'attack-embeddings.json');
133
+ // Try to load embedding module
134
+ const shouldEmbed = config.enableEmbedding !== false;
135
+ let embeddingModule = null;
136
+ const tiersUsed = ['tier2-regex'];
137
+ if (shouldEmbed) {
138
+ embeddingModule = await tryLoadEmbedding(embeddingsPath);
139
+ if (embeddingModule) {
140
+ tiersUsed.push('tier2.5-embedding');
141
+ }
142
+ }
143
+ // Initialize engine
144
+ const engine = new ATREngine({
145
+ rulesDir: config.rulesDir,
146
+ embeddingModule: embeddingModule ?? undefined,
147
+ });
148
+ const ruleCount = await engine.loadRules();
149
+ if (ruleCount === 0) {
150
+ throw new Error(`No rules loaded from ${config.rulesDir}`);
151
+ }
152
+ // Run all samples
153
+ const results = [];
154
+ const useFullPipeline = embeddingModule !== null;
155
+ for (const sample of corpus) {
156
+ const result = useFullPipeline
157
+ ? await evaluateSampleFull(engine, sample)
158
+ : evaluateSampleRegex(engine, sample);
159
+ results.push(result);
160
+ }
161
+ // Cleanup embedding module
162
+ if (embeddingModule) {
163
+ await embeddingModule.destroy();
164
+ }
165
+ // Compute report
166
+ const report = computeEvalReport(results);
167
+ const regression = checkRegression(report, config.thresholds);
168
+ const corpusStats = getCorpusStats();
169
+ // Compute per-rule quality
170
+ const loadedRuleIds = engine.getRules().map((r) => r.id);
171
+ const ruleQuality = computeRuleQuality(results, loadedRuleIds);
172
+ // Save report if output path specified
173
+ if (config.outputPath) {
174
+ const output = {
175
+ report,
176
+ regression,
177
+ corpusStats,
178
+ ruleQuality,
179
+ ruleCount,
180
+ engine: 'ATREngine',
181
+ tiers: tiersUsed,
182
+ };
183
+ writeFileSync(config.outputPath, JSON.stringify(output, null, 2));
184
+ }
185
+ return { report, regression, corpusStats, tiersUsed, ruleQuality };
186
+ }
187
+ // ---------------------------------------------------------------------------
188
+ // CLI entry point
189
+ // ---------------------------------------------------------------------------
190
+ function formatPercent(n) {
191
+ return `${(n * 100).toFixed(1)}%`;
192
+ }
193
+ function formatMs(n) {
194
+ return `${n.toFixed(2)}ms`;
195
+ }
196
+ export async function runEvalCLI() {
197
+ const base = resolve(join(import.meta.dirname ?? '.', '..', '..'));
198
+ const rulesDir = join(base, 'rules');
199
+ const outputPath = join(base, 'data', 'eval-report.json');
200
+ console.log('\n=== ATR Evaluation Harness ===\n');
201
+ const { report, regression, corpusStats, tiersUsed, ruleQuality } = await runEval({
202
+ rulesDir,
203
+ outputPath,
204
+ });
205
+ // Corpus stats
206
+ console.log(`Corpus: ${corpusStats.total} samples (${corpusStats.attacks} attacks, ${corpusStats.benign} benign)`);
207
+ console.log(`Categories: ${Object.keys(corpusStats.byCategory).join(', ')}`);
208
+ console.log(`Tiers: ${tiersUsed.join(' + ')}`);
209
+ // Overall metrics
210
+ console.log(`\n--- Overall ---`);
211
+ console.log(` Precision: ${formatPercent(report.overall.precision)}`);
212
+ console.log(` Recall: ${formatPercent(report.overall.recall)}`);
213
+ console.log(` F1: ${formatPercent(report.overall.f1)}`);
214
+ console.log(` Accuracy: ${formatPercent(report.overall.accuracy)}`);
215
+ console.log(` FP Rate: ${formatPercent(report.overall.fpRate)}`);
216
+ console.log(` Confusion: TP=${report.overall.confusion.tp} FP=${report.overall.confusion.fp} TN=${report.overall.confusion.tn} FN=${report.overall.confusion.fn}`);
217
+ // Latency
218
+ console.log(`\n--- Latency ---`);
219
+ console.log(` P50: ${formatMs(report.latency.p50)}`);
220
+ console.log(` P95: ${formatMs(report.latency.p95)}`);
221
+ console.log(` P99: ${formatMs(report.latency.p99)}`);
222
+ console.log(` Mean: ${formatMs(report.latency.mean)}`);
223
+ console.log(` Max: ${formatMs(report.latency.max)}`);
224
+ // Per category
225
+ console.log(`\n--- By Category ---`);
226
+ for (const cat of report.byCategory) {
227
+ const missed = cat.missedSamples.length > 0 ? ` (missed: ${cat.missedSamples.join(', ')})` : '';
228
+ const fps = cat.falsePositives.length > 0 ? ` (FP: ${cat.falsePositives.join(', ')})` : '';
229
+ console.log(` ${cat.category}: recall=${formatPercent(cat.metrics.recall)} precision=${formatPercent(cat.metrics.precision)} f1=${formatPercent(cat.metrics.f1)}${missed}${fps}`);
230
+ }
231
+ // Per difficulty
232
+ console.log(`\n--- By Difficulty ---`);
233
+ for (const diff of report.byDifficulty) {
234
+ console.log(` ${diff.difficulty}: recall=${formatPercent(diff.metrics.recall)} precision=${formatPercent(diff.metrics.precision)} f1=${formatPercent(diff.metrics.f1)}`);
235
+ }
236
+ // Missed attacks
237
+ if (report.missedAttacks.length > 0) {
238
+ console.log(`\n--- Missed Attacks (${report.missedAttacks.length}) ---`);
239
+ for (const m of report.missedAttacks) {
240
+ console.log(` [${m.id}] ${m.category}/${m.difficulty}/${m.tier}`);
241
+ }
242
+ }
243
+ // False positives
244
+ if (report.falsePositives.length > 0) {
245
+ console.log(`\n--- False Positives (${report.falsePositives.length}) ---`);
246
+ for (const fp of report.falsePositives) {
247
+ console.log(` [${fp.id}] rules: ${fp.matchedRules.join(', ')}`);
248
+ }
249
+ }
250
+ // Rule quality
251
+ console.log(`\n--- Rule Quality ---`);
252
+ console.log(` Total rules loaded: ${ruleQuality.totalRulesEvaluated}`);
253
+ console.log(` Rules fired: ${ruleQuality.rulesFired}`);
254
+ console.log(` Rules never fired: ${ruleQuality.rulesNeverFired}`);
255
+ if (ruleQuality.topRules.length > 0) {
256
+ console.log(`\n Top 10 rules by match count:`);
257
+ for (const rule of ruleQuality.topRules.slice(0, 10)) {
258
+ const precision = rule.matchCount > 0
259
+ ? formatPercent(rule.tpCount / rule.matchCount)
260
+ : 'N/A';
261
+ console.log(` ${rule.ruleId}: matches=${rule.matchCount} TP=${rule.tpCount} FP=${rule.fpCount} precision=${precision} avgConf=${rule.avgConfidence.toFixed(2)}`);
262
+ }
263
+ }
264
+ if (ruleQuality.weakRules.length > 0) {
265
+ console.log(`\n Weak rules (FP > 0 or matchCount <= 1):`);
266
+ for (const rule of ruleQuality.weakRules.slice(0, 10)) {
267
+ console.log(` ${rule.ruleId}: matches=${rule.matchCount} TP=${rule.tpCount} FP=${rule.fpCount} categories=[${rule.categories.join(', ')}]`);
268
+ }
269
+ }
270
+ if (ruleQuality.neverFiredRuleIds.length > 0) {
271
+ console.log(`\n Never-fired rules (${ruleQuality.neverFiredRuleIds.length}):`);
272
+ for (const id of ruleQuality.neverFiredRuleIds.slice(0, 20)) {
273
+ console.log(` ${id}`);
274
+ }
275
+ if (ruleQuality.neverFiredRuleIds.length > 20) {
276
+ console.log(` ... and ${ruleQuality.neverFiredRuleIds.length - 20} more`);
277
+ }
278
+ }
279
+ // Regression check
280
+ console.log(`\n--- Regression Check ---`);
281
+ if (regression.passed) {
282
+ console.log(' PASSED');
283
+ }
284
+ else {
285
+ console.log(' FAILED:');
286
+ for (const v of regression.violations) {
287
+ console.log(` - ${v}`);
288
+ }
289
+ }
290
+ console.log(`\nReport saved to: ${outputPath}`);
291
+ console.log('Done.\n');
292
+ if (!regression.passed) {
293
+ process.exitCode = 1;
294
+ }
295
+ }
296
+ //# sourceMappingURL=eval-harness.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"eval-harness.js","sourceRoot":"","sources":["../../src/eval/eval-harness.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AAClE,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,eAAe,EAAE,MAAM,yBAAyB,CAAC;AAG1D,OAAO,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAE1D,OAAO,EAAE,iBAAiB,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAElE,OAAO,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC;AAiBvD;;GAEG;AACH,SAAS,aAAa,CAAC,MAAoB;IACzC,OAAO;QACL,IAAI,EAAE,MAAM,CAAC,SAAS;QACtB,OAAO,EAAE,MAAM,CAAC,IAAI;QACpB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,MAAM,EAAE,MAAM,CAAC,MAAM;KACtB,CAAC;AACJ,CAAC;AAED;;;GAGG;AACH,SAAS,mBAAmB,CAC1B,MAAiB,EACjB,MAAoB;IAEpB,MAAM,KAAK,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC;IAEpC,IAAI,CAAC;QACH,MAAM,KAAK,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;QAChC,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;QACvC,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC;QAE5C,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC;QACpC,MAAM,QAAQ,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;QAE5B,OAAO;YACL,EAAE,EAAE,MAAM,CAAC,EAAE;YACb,QAAQ,EAAE,MAAM,CAAC,QAAQ;YACzB,iBAAiB,EAAE,MAAM,CAAC,iBAAiB;YAC3C,eAAe,EAAE,QAAQ;YACzB,YAAY,EAAE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC;YAC3C,UAAU,EAAE,QAAQ,EAAE,UAAU,IAAI,CAAC;YACrC,SAAS;YACT,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,IAAI,EAAE,MAAM,CAAC,IAAI;SAClB,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO;YACL,EAAE,EAAE,MAAM,CAAC,EAAE;YACb,QAAQ,EAAE,MAAM,CAAC,QAAQ;YACzB,iBAAiB,EAAE,MAAM,CAAC,iBAAiB;YAC3C,eAAe,EAAE,KAAK;YACtB,YAAY,EAAE,EAAE;YAChB,UAAU,EAAE,CAAC;YACb,SAAS,EAAE,CAAC;YACZ,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,IAAI,EAAE,MAAM,CAAC,IAAI;SAClB,CAAC;IACJ,CAAC;AACH,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,kBAAkB,CAC/B,MAAiB,EACjB,MAAoB;IAEpB,MAAM,KAAK,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC;IAEpC,IAAI,CAAC;QACH,MAAM,KAAK,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;QAChC,MAAM,EAAE,OAAO,EAAE,GAAG,MAAM,MAAM,CAAC,mBAAmB,CAAC,KAAK,CAAC,CAAC;QAC5D,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC;QAE5C,MAAM,QAAQ,GAAG,OAAO,CAAC,UAAU,GAAG,CAAC,CAAC;QAExC,OAAO;YACL,EAAE,EAAE,MAAM,CAAC,EAAE;YACb,QAAQ,EAAE,MAAM,CAAC,QAAQ;YACzB,iBAAiB,EAAE,MAAM,CAAC,iBAAiB;YAC3C,eAAe,EAAE,QAAQ;YACzB,YAAY,EAAE,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC;YACnD,UAAU,EAAE,OAAO,CAAC,iBAAiB;YACrC,SAAS;YACT,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,IAAI,EAAE,MAAM,CAAC,IAAI;SAClB,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO;YACL,EAAE,EAAE,MAAM,CAAC,EAAE;YACb,QAAQ,EAAE,MAAM,CAAC,QAAQ;YACzB,iBAAiB,EAAE,MAAM,CAAC,iBAAiB;YAC3C,eAAe,EAAE,KAAK;YACtB,YAAY,EAAE,EAAE;YAChB,UAAU,EAAE,CAAC;YACb,SAAS,EAAE,CAAC;YACZ,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,IAAI,EAAE,MAAM,CAAC,IAAI;SAClB,CAAC;IACJ,CAAC;AACH,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,gBAAgB,CAAC,cAAsB;IACpD,IAAI,CAAC,UAAU,CAAC,cAAc,CAAC;QAAE,OAAO,IAAI,CAAC;IAE7C,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,cAAc,EAAE,OAAO,CAAC,CAAC,CAAC;QAC/D,MAAM,MAAM,GAAG,IAAI,eAAe,CAAC;YACjC,iBAAiB,EAAE,IAAI;YACvB,mBAAmB,EAAE,IAAI;SAC1B,CAAC,CAAC;QACH,MAAM,MAAM,CAAC,UAAU,EAAE,CAAC;QAC1B,OAAO,MAAM,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC;IAC9C,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,OAAO,CAAC,MAAkB;IAO9C,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,WAAW,CAAC;IAC5C,MAAM,IAAI,GAAG,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;IAC5C,MAAM,cAAc,GAAG,MAAM,CAAC,cAAc,IAAI,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,wBAAwB,CAAC,CAAC;IAE7F,+BAA+B;IAC/B,MAAM,WAAW,GAAG,MAAM,CAAC,eAAe,KAAK,KAAK,CAAC;IACrD,IAAI,eAAe,GAA2B,IAAI,CAAC;IACnD,MAAM,SAAS,GAAa,CAAC,aAAa,CAAC,CAAC;IAE5C,IAAI,WAAW,EAAE,CAAC;QAChB,eAAe,GAAG,MAAM,gBAAgB,CAAC,cAAc,CAAC,CAAC;QACzD,IAAI,eAAe,EAAE,CAAC;YACpB,SAAS,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;QACtC,CAAC;IACH,CAAC;IAED,oBAAoB;IACpB,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC;QAC3B,QAAQ,EAAE,MAAM,CAAC,QAAQ;QACzB,eAAe,EAAE,eAAe,IAAI,SAAS;KAC9C,CAAC,CAAC;IACH,MAAM,SAAS,GAAG,MAAM,MAAM,CAAC,SAAS,EAAE,CAAC;IAE3C,IAAI,SAAS,KAAK,CAAC,EAAE,CAAC;QACpB,MAAM,IAAI,KAAK,CAAC,wBAAwB,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC;IAC7D,CAAC;IAED,kBAAkB;IAClB,MAAM,OAAO,GAAmB,EAAE,CAAC;IACnC,MAAM,eAAe,GAAG,eAAe,KAAK,IAAI,CAAC;IAEjD,KAAK,MAAM,MAAM,IAAI,MAAM,EAAE,CAAC;QAC5B,MAAM,MAAM,GAAG,eAAe;YAC5B,CAAC,CAAC,MAAM,kBAAkB,CAAC,MAAM,EAAE,MAAM,CAAC;YAC1C,CAAC,CAAC,mBAAmB,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QACxC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACvB,CAAC;IAED,2BAA2B;IAC3B,IAAI,eAAe,EAAE,CAAC;QACpB,MAAM,eAAe,CAAC,OAAO,EAAE,CAAC;IAClC,CAAC;IAED,iBAAiB;IACjB,MAAM,MAAM,GAAG,iBAAiB,CAAC,OAAO,CAAC,CAAC;IAC1C,MAAM,UAAU,GAAG,eAAe,CAAC,MAAM,EAAE,MAAM,CAAC,UAAU,CAAC,CAAC;IAC9D,MAAM,WAAW,GAAG,cAAc,EAAE,CAAC;IAErC,2BAA2B;IAC3B,MAAM,aAAa,GAAG,MAAM,CAAC,QAAQ,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IACzD,MAAM,WAAW,GAAG,kBAAkB,CAAC,OAAO,EAAE,aAAa,CAAC,CAAC;IAE/D,uCAAuC;IACvC,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;QACtB,MAAM,MAAM,GAAG;YACb,MAAM;YACN,UAAU;YACV,WAAW;YACX,WAAW;YACX,SAAS;YACT,MAAM,EAAE,WAAW;YACnB,KAAK,EAAE,SAAS;SACjB,CAAC;QACF,aAAa,CAAC,MAAM,CAAC,UAAU,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IACpE,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,WAAW,EAAE,SAAS,EAAE,WAAW,EAAE,CAAC;AACrE,CAAC;AAED,8EAA8E;AAC9E,kBAAkB;AAClB,8EAA8E;AAE9E,SAAS,aAAa,CAAC,CAAS;IAC9B,OAAO,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;AACpC,CAAC;AAED,SAAS,QAAQ,CAAC,CAAS;IACzB,OAAO,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC;AAC7B,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,UAAU;IAC9B,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,IAAI,GAAG,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC;IACnE,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IACrC,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,kBAAkB,CAAC,CAAC;IAE1D,OAAO,CAAC,GAAG,CAAC,oCAAoC,CAAC,CAAC;IAElD,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,WAAW,EAAE,SAAS,EAAE,WAAW,EAAE,GAAG,MAAM,OAAO,CAAC;QAChF,QAAQ;QACR,UAAU;KACX,CAAC,CAAC;IAEH,eAAe;IACf,OAAO,CAAC,GAAG,CAAC,WAAW,WAAW,CAAC,KAAK,aAAa,WAAW,CAAC,OAAO,aAAa,WAAW,CAAC,MAAM,UAAU,CAAC,CAAC;IACnH,OAAO,CAAC,GAAG,CAAC,eAAe,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC7E,OAAO,CAAC,GAAG,CAAC,UAAU,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;IAE/C,kBAAkB;IAClB,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IACjC,OAAO,CAAC,GAAG,CAAC,iBAAiB,aAAa,CAAC,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC;IACxE,OAAO,CAAC,GAAG,CAAC,iBAAiB,aAAa,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;IACrE,OAAO,CAAC,GAAG,CAAC,iBAAiB,aAAa,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC;IACjE,OAAO,CAAC,GAAG,CAAC,iBAAiB,aAAa,CAAC,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;IACvE,OAAO,CAAC,GAAG,CAAC,iBAAiB,aAAa,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;IACrE,OAAO,CAAC,GAAG,CAAC,oBAAoB,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,OAAO,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,OAAO,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,OAAO,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,EAAE,CAAC,CAAC;IAErK,UAAU;IACV,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IACjC,OAAO,CAAC,GAAG,CAAC,WAAW,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IACvD,OAAO,CAAC,GAAG,CAAC,WAAW,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IACvD,OAAO,CAAC,GAAG,CAAC,WAAW,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IACvD,OAAO,CAAC,GAAG,CAAC,WAAW,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACxD,OAAO,CAAC,GAAG,CAAC,WAAW,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAEvD,eAAe;IACf,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;IACrC,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;QACpC,MAAM,MAAM,GAAG,GAAG,CAAC,aAAa,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,aAAa,GAAG,CAAC,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QAChG,MAAM,GAAG,GAAG,GAAG,CAAC,cAAc,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,GAAG,CAAC,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QAC3F,OAAO,CAAC,GAAG,CAAC,KAAK,GAAG,CAAC,QAAQ,YAAY,aAAa,CAAC,GAAG,CAAC,OAAO,CAAC,MAAM,CAAC,cAAc,aAAa,CAAC,GAAG,CAAC,OAAO,CAAC,SAAS,CAAC,OAAO,aAAa,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,GAAG,MAAM,GAAG,GAAG,EAAE,CAAC,CAAC;IACrL,CAAC;IAED,iBAAiB;IACjB,OAAO,CAAC,GAAG,CAAC,yBAAyB,CAAC,CAAC;IACvC,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,YAAY,EAAE,CAAC;QACvC,OAAO,CAAC,GAAG,CAAC,KAAK,IAAI,CAAC,UAAU,YAAY,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,cAAc,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,OAAO,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC;IAC5K,CAAC;IAED,iBAAiB;IACjB,IAAI,MAAM,CAAC,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACpC,OAAO,CAAC,GAAG,CAAC,yBAAyB,MAAM,CAAC,aAAa,CAAC,MAAM,OAAO,CAAC,CAAC;QACzE,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,aAAa,EAAE,CAAC;YACrC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,UAAU,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QACrE,CAAC;IACH,CAAC;IAED,kBAAkB;IAClB,IAAI,MAAM,CAAC,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrC,OAAO,CAAC,GAAG,CAAC,0BAA0B,MAAM,CAAC,cAAc,CAAC,MAAM,OAAO,CAAC,CAAC;QAC3E,KAAK,MAAM,EAAE,IAAI,MAAM,CAAC,cAAc,EAAE,CAAC;YACvC,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,YAAY,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACnE,CAAC;IACH,CAAC;IAED,eAAe;IACf,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;IACtC,OAAO,CAAC,GAAG,CAAC,yBAAyB,WAAW,CAAC,mBAAmB,EAAE,CAAC,CAAC;IACxE,OAAO,CAAC,GAAG,CAAC,kBAAkB,WAAW,CAAC,UAAU,EAAE,CAAC,CAAC;IACxD,OAAO,CAAC,GAAG,CAAC,wBAAwB,WAAW,CAAC,eAAe,EAAE,CAAC,CAAC;IAEnE,IAAI,WAAW,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACpC,OAAO,CAAC,GAAG,CAAC,kCAAkC,CAAC,CAAC;QAChD,KAAK,MAAM,IAAI,IAAI,WAAW,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC;YACrD,MAAM,SAAS,GAAG,IAAI,CAAC,UAAU,GAAG,CAAC;gBACnC,CAAC,CAAC,aAAa,CAAC,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC;gBAC/C,CAAC,CAAC,KAAK,CAAC;YACV,OAAO,CAAC,GAAG,CAAC,OAAO,IAAI,CAAC,MAAM,aAAa,IAAI,CAAC,UAAU,OAAO,IAAI,CAAC,OAAO,OAAO,IAAI,CAAC,OAAO,cAAc,SAAS,YAAY,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACtK,CAAC;IACH,CAAC;IAED,IAAI,WAAW,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrC,OAAO,CAAC,GAAG,CAAC,6CAA6C,CAAC,CAAC;QAC3D,KAAK,MAAM,IAAI,IAAI,WAAW,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC;YACtD,OAAO,CAAC,GAAG,CAAC,OAAO,IAAI,CAAC,MAAM,aAAa,IAAI,CAAC,UAAU,OAAO,IAAI,CAAC,OAAO,OAAO,IAAI,CAAC,OAAO,gBAAgB,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACjJ,CAAC;IACH,CAAC;IAED,IAAI,WAAW,CAAC,iBAAiB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC7C,OAAO,CAAC,GAAG,CAAC,0BAA0B,WAAW,CAAC,iBAAiB,CAAC,MAAM,IAAI,CAAC,CAAC;QAChF,KAAK,MAAM,EAAE,IAAI,WAAW,CAAC,iBAAiB,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC;YAC5D,OAAO,CAAC,GAAG,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;QAC3B,CAAC;QACD,IAAI,WAAW,CAAC,iBAAiB,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;YAC9C,OAAO,CAAC,GAAG,CAAC,eAAe,WAAW,CAAC,iBAAiB,CAAC,MAAM,GAAG,EAAE,OAAO,CAAC,CAAC;QAC/E,CAAC;IACH,CAAC;IAED,mBAAmB;IACnB,OAAO,CAAC,GAAG,CAAC,4BAA4B,CAAC,CAAC;IAC1C,IAAI,UAAU,CAAC,MAAM,EAAE,CAAC;QACtB,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;IAC1B,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;QACzB,KAAK,MAAM,CAAC,IAAI,UAAU,CAAC,UAAU,EAAE,CAAC;YACtC,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC;QAC5B,CAAC;IACH,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,sBAAsB,UAAU,EAAE,CAAC,CAAC;IAChD,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;IAEvB,IAAI,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC;QACvB,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;IACvB,CAAC;AACH,CAAC"}
@@ -0,0 +1,13 @@
1
+ /**
2
+ * Evaluation framework public API
3
+ * @module agent-threat-rules/eval
4
+ */
5
+ export { EVAL_CORPUS, getAttackSamples, getBenignSamples, getSamplesByCategory, getSamplesByDifficulty, getCorpusStats } from './corpus.js';
6
+ export type { CorpusSample } from './corpus.js';
7
+ export { computeEvalReport, checkRegression } from './metrics.js';
8
+ export type { SampleResult, ConfusionMatrix, ClassMetrics, LatencyStats, CategoryBreakdown, DifficultyBreakdown, EvalReport, RegressionCheck, BaselineThresholds, } from './metrics.js';
9
+ export { runEval, runEvalCLI } from './eval-harness.js';
10
+ export type { EvalConfig } from './eval-harness.js';
11
+ export { computeRuleQuality } from './rule-metrics.js';
12
+ export type { RuleQuality, RuleQualityReport } from './rule-metrics.js';
13
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/eval/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,WAAW,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,oBAAoB,EAAE,sBAAsB,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAC5I,YAAY,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAEhD,OAAO,EAAE,iBAAiB,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAClE,YAAY,EACV,YAAY,EACZ,eAAe,EACf,YAAY,EACZ,YAAY,EACZ,iBAAiB,EACjB,mBAAmB,EACnB,UAAU,EACV,eAAe,EACf,kBAAkB,GACnB,MAAM,cAAc,CAAC;AAEtB,OAAO,EAAE,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AACxD,YAAY,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAEpD,OAAO,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC;AACvD,YAAY,EAAE,WAAW,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC"}
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Evaluation framework public API
3
+ * @module agent-threat-rules/eval
4
+ */
5
+ export { EVAL_CORPUS, getAttackSamples, getBenignSamples, getSamplesByCategory, getSamplesByDifficulty, getCorpusStats } from './corpus.js';
6
+ export { computeEvalReport, checkRegression } from './metrics.js';
7
+ export { runEval, runEvalCLI } from './eval-harness.js';
8
+ export { computeRuleQuality } from './rule-metrics.js';
9
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/eval/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,WAAW,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,oBAAoB,EAAE,sBAAsB,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAG5I,OAAO,EAAE,iBAAiB,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAalE,OAAO,EAAE,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAGxD,OAAO,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC"}
@@ -0,0 +1,74 @@
1
+ /**
2
+ * Evaluation Metrics -- computes precision, recall, F1, confusion matrix,
3
+ * per-category breakdowns, and latency percentiles.
4
+ *
5
+ * All functions are pure (no side effects, no mutation).
6
+ *
7
+ * @module agent-threat-rules/eval/metrics
8
+ */
9
+ export interface SampleResult {
10
+ readonly id: string;
11
+ readonly category: string;
12
+ readonly expectedDetection: boolean;
13
+ readonly actualDetection: boolean;
14
+ readonly matchedRules: readonly string[];
15
+ readonly confidence: number;
16
+ readonly latencyMs: number;
17
+ readonly difficulty: string;
18
+ readonly tier: string;
19
+ }
20
+ export interface ConfusionMatrix {
21
+ readonly tp: number;
22
+ readonly fp: number;
23
+ readonly tn: number;
24
+ readonly fn: number;
25
+ }
26
+ export interface ClassMetrics {
27
+ readonly precision: number;
28
+ readonly recall: number;
29
+ readonly f1: number;
30
+ readonly accuracy: number;
31
+ readonly fpRate: number;
32
+ readonly confusion: ConfusionMatrix;
33
+ readonly sampleCount: number;
34
+ }
35
+ export interface LatencyStats {
36
+ readonly p50: number;
37
+ readonly p95: number;
38
+ readonly p99: number;
39
+ readonly mean: number;
40
+ readonly max: number;
41
+ }
42
+ export interface CategoryBreakdown {
43
+ readonly category: string;
44
+ readonly metrics: ClassMetrics;
45
+ readonly missedSamples: readonly string[];
46
+ readonly falsePositives: readonly string[];
47
+ }
48
+ export interface DifficultyBreakdown {
49
+ readonly difficulty: string;
50
+ readonly metrics: ClassMetrics;
51
+ }
52
+ export interface EvalReport {
53
+ readonly timestamp: string;
54
+ readonly corpusSize: number;
55
+ readonly overall: ClassMetrics;
56
+ readonly latency: LatencyStats;
57
+ readonly byCategory: readonly CategoryBreakdown[];
58
+ readonly byDifficulty: readonly DifficultyBreakdown[];
59
+ readonly missedAttacks: readonly SampleResult[];
60
+ readonly falsePositives: readonly SampleResult[];
61
+ }
62
+ export declare function computeEvalReport(results: readonly SampleResult[]): EvalReport;
63
+ export interface RegressionCheck {
64
+ readonly passed: boolean;
65
+ readonly violations: readonly string[];
66
+ }
67
+ export interface BaselineThresholds {
68
+ readonly minRecall: number;
69
+ readonly maxFpRate: number;
70
+ readonly minF1: number;
71
+ readonly maxP95LatencyMs: number;
72
+ }
73
+ export declare function checkRegression(report: EvalReport, thresholds?: BaselineThresholds): RegressionCheck;
74
+ //# sourceMappingURL=metrics.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"metrics.d.ts","sourceRoot":"","sources":["../../src/eval/metrics.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,iBAAiB,EAAE,OAAO,CAAC;IACpC,QAAQ,CAAC,eAAe,EAAE,OAAO,CAAC;IAClC,QAAQ,CAAC,YAAY,EAAE,SAAS,MAAM,EAAE,CAAC;IACzC,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,SAAS,EAAE,eAAe,CAAC;IACpC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;CAC9B;AAED,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,OAAO,EAAE,YAAY,CAAC;IAC/B,QAAQ,CAAC,aAAa,EAAE,SAAS,MAAM,EAAE,CAAC;IAC1C,QAAQ,CAAC,cAAc,EAAE,SAAS,MAAM,EAAE,CAAC;CAC5C;AAED,MAAM,WAAW,mBAAmB;IAClC,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,OAAO,EAAE,YAAY,CAAC;CAChC;AAED,MAAM,WAAW,UAAU;IACzB,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,OAAO,EAAE,YAAY,CAAC;IAC/B,QAAQ,CAAC,OAAO,EAAE,YAAY,CAAC;IAC/B,QAAQ,CAAC,UAAU,EAAE,SAAS,iBAAiB,EAAE,CAAC;IAClD,QAAQ,CAAC,YAAY,EAAE,SAAS,mBAAmB,EAAE,CAAC;IACtD,QAAQ,CAAC,aAAa,EAAE,SAAS,YAAY,EAAE,CAAC;IAChD,QAAQ,CAAC,cAAc,EAAE,SAAS,YAAY,EAAE,CAAC;CAClD;AAkDD,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,SAAS,YAAY,EAAE,GAAG,UAAU,CAsC9E;AAMD,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,MAAM,EAAE,OAAO,CAAC;IACzB,QAAQ,CAAC,UAAU,EAAE,SAAS,MAAM,EAAE,CAAC;CACxC;AAED,MAAM,WAAW,kBAAkB;IACjC,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,eAAe,EAAE,MAAM,CAAC;CAClC;AASD,wBAAgB,eAAe,CAC7B,MAAM,EAAE,UAAU,EAClB,UAAU,GAAE,kBAAuC,GAClD,eAAe,CA4BjB"}
@@ -0,0 +1,108 @@
1
+ /**
2
+ * Evaluation Metrics -- computes precision, recall, F1, confusion matrix,
3
+ * per-category breakdowns, and latency percentiles.
4
+ *
5
+ * All functions are pure (no side effects, no mutation).
6
+ *
7
+ * @module agent-threat-rules/eval/metrics
8
+ */
9
+ // ---------------------------------------------------------------------------
10
+ // Core metric calculations
11
+ // ---------------------------------------------------------------------------
12
+ function buildConfusionMatrix(results) {
13
+ let tp = 0, fp = 0, tn = 0, fn = 0;
14
+ for (const r of results) {
15
+ if (r.expectedDetection && r.actualDetection)
16
+ tp++;
17
+ else if (!r.expectedDetection && r.actualDetection)
18
+ fp++;
19
+ else if (!r.expectedDetection && !r.actualDetection)
20
+ tn++;
21
+ else
22
+ fn++;
23
+ }
24
+ return { tp, fp, tn, fn };
25
+ }
26
+ function computeClassMetrics(cm, sampleCount) {
27
+ const precision = cm.tp + cm.fp > 0 ? cm.tp / (cm.tp + cm.fp) : 1;
28
+ const recall = cm.tp + cm.fn > 0 ? cm.tp / (cm.tp + cm.fn) : 1;
29
+ const f1 = precision + recall > 0 ? 2 * precision * recall / (precision + recall) : 0;
30
+ const accuracy = sampleCount > 0 ? (cm.tp + cm.tn) / sampleCount : 1;
31
+ const fpRate = cm.fp + cm.tn > 0 ? cm.fp / (cm.fp + cm.tn) : 0;
32
+ return { precision, recall, f1, accuracy, fpRate, confusion: cm, sampleCount };
33
+ }
34
+ function computeLatency(results) {
35
+ if (results.length === 0) {
36
+ return { p50: 0, p95: 0, p99: 0, mean: 0, max: 0 };
37
+ }
38
+ const sorted = [...results].map((r) => r.latencyMs).sort((a, b) => a - b);
39
+ const len = sorted.length;
40
+ return {
41
+ p50: sorted[Math.floor(len * 0.5)] ?? 0,
42
+ p95: sorted[Math.floor(len * 0.95)] ?? 0,
43
+ p99: sorted[Math.floor(len * 0.99)] ?? 0,
44
+ mean: sorted.reduce((a, b) => a + b, 0) / len,
45
+ max: sorted[len - 1] ?? 0,
46
+ };
47
+ }
48
+ // ---------------------------------------------------------------------------
49
+ // Report generation
50
+ // ---------------------------------------------------------------------------
51
+ export function computeEvalReport(results) {
52
+ const overallCM = buildConfusionMatrix(results);
53
+ const overall = computeClassMetrics(overallCM, results.length);
54
+ const latency = computeLatency(results);
55
+ // By category
56
+ const categories = [...new Set(results.map((r) => r.category))];
57
+ const byCategory = categories.map((cat) => {
58
+ const catResults = results.filter((r) => r.category === cat);
59
+ const cm = buildConfusionMatrix(catResults);
60
+ const metrics = computeClassMetrics(cm, catResults.length);
61
+ const missed = catResults.filter((r) => r.expectedDetection && !r.actualDetection).map((r) => r.id);
62
+ const fps = catResults.filter((r) => !r.expectedDetection && r.actualDetection).map((r) => r.id);
63
+ return { category: cat, metrics, missedSamples: missed, falsePositives: fps };
64
+ });
65
+ // By difficulty
66
+ const difficulties = [...new Set(results.map((r) => r.difficulty))];
67
+ const byDifficulty = difficulties.map((diff) => {
68
+ const diffResults = results.filter((r) => r.difficulty === diff);
69
+ const cm = buildConfusionMatrix(diffResults);
70
+ const metrics = computeClassMetrics(cm, diffResults.length);
71
+ return { difficulty: diff, metrics };
72
+ });
73
+ const missedAttacks = results.filter((r) => r.expectedDetection && !r.actualDetection);
74
+ const falsePositives = results.filter((r) => !r.expectedDetection && r.actualDetection);
75
+ return {
76
+ timestamp: new Date().toISOString(),
77
+ corpusSize: results.length,
78
+ overall,
79
+ latency,
80
+ byCategory,
81
+ byDifficulty,
82
+ missedAttacks,
83
+ falsePositives,
84
+ };
85
+ }
86
+ const DEFAULT_THRESHOLDS = {
87
+ minRecall: 0.60,
88
+ maxFpRate: 0.05,
89
+ minF1: 0.70,
90
+ maxP95LatencyMs: 50,
91
+ };
92
+ export function checkRegression(report, thresholds = DEFAULT_THRESHOLDS) {
93
+ const violations = [];
94
+ if (report.overall.recall < thresholds.minRecall) {
95
+ violations.push(`Recall ${(report.overall.recall * 100).toFixed(1)}% < minimum ${(thresholds.minRecall * 100).toFixed(1)}%`);
96
+ }
97
+ if (report.overall.fpRate > thresholds.maxFpRate) {
98
+ violations.push(`FP rate ${(report.overall.fpRate * 100).toFixed(3)}% > maximum ${(thresholds.maxFpRate * 100).toFixed(3)}%`);
99
+ }
100
+ if (report.overall.f1 < thresholds.minF1) {
101
+ violations.push(`F1 ${(report.overall.f1 * 100).toFixed(1)}% < minimum ${(thresholds.minF1 * 100).toFixed(1)}%`);
102
+ }
103
+ if (report.latency.p95 > thresholds.maxP95LatencyMs) {
104
+ violations.push(`P95 latency ${report.latency.p95.toFixed(1)}ms > maximum ${thresholds.maxP95LatencyMs}ms`);
105
+ }
106
+ return { passed: violations.length === 0, violations };
107
+ }
108
+ //# sourceMappingURL=metrics.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"metrics.js","sourceRoot":"","sources":["../../src/eval/metrics.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AA8DH,8EAA8E;AAC9E,2BAA2B;AAC3B,8EAA8E;AAE9E,SAAS,oBAAoB,CAAC,OAAgC;IAC5D,IAAI,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC;IAEnC,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,IAAI,CAAC,CAAC,iBAAiB,IAAI,CAAC,CAAC,eAAe;YAAE,EAAE,EAAE,CAAC;aAC9C,IAAI,CAAC,CAAC,CAAC,iBAAiB,IAAI,CAAC,CAAC,eAAe;YAAE,EAAE,EAAE,CAAC;aACpD,IAAI,CAAC,CAAC,CAAC,iBAAiB,IAAI,CAAC,CAAC,CAAC,eAAe;YAAE,EAAE,EAAE,CAAC;;YACrD,EAAE,EAAE,CAAC;IACZ,CAAC;IAED,OAAO,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC;AAC5B,CAAC;AAED,SAAS,mBAAmB,CAAC,EAAmB,EAAE,WAAmB;IACnE,MAAM,SAAS,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAClE,MAAM,MAAM,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC/D,MAAM,EAAE,GAAG,SAAS,GAAG,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,SAAS,GAAG,MAAM,GAAG,CAAC,SAAS,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACtF,MAAM,QAAQ,GAAG,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;IACrE,MAAM,MAAM,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAE/D,OAAO,EAAE,SAAS,EAAE,MAAM,EAAE,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,EAAE,EAAE,WAAW,EAAE,CAAC;AACjF,CAAC;AAED,SAAS,cAAc,CAAC,OAAgC;IACtD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,CAAC;IACrD,CAAC;IAED,MAAM,MAAM,GAAG,CAAC,GAAG,OAAO,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC1E,MAAM,GAAG,GAAG,MAAM,CAAC,MAAM,CAAC;IAE1B,OAAO;QACL,GAAG,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,GAAG,GAAG,CAAC,CAAC,IAAI,CAAC;QACvC,GAAG,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,GAAG,IAAI,CAAC,CAAC,IAAI,CAAC;QACxC,GAAG,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,GAAG,IAAI,CAAC,CAAC,IAAI,CAAC;QACxC,IAAI,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,GAAG;QAC7C,GAAG,EAAE,MAAM,CAAC,GAAG,GAAG,CAAC,CAAC,IAAI,CAAC;KAC1B,CAAC;AACJ,CAAC;AAED,8EAA8E;AAC9E,oBAAoB;AACpB,8EAA8E;AAE9E,MAAM,UAAU,iBAAiB,CAAC,OAAgC;IAChE,MAAM,SAAS,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAC;IAChD,MAAM,OAAO,GAAG,mBAAmB,CAAC,SAAS,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;IAC/D,MAAM,OAAO,GAAG,cAAc,CAAC,OAAO,CAAC,CAAC;IAExC,cAAc;IACd,MAAM,UAAU,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;IAChE,MAAM,UAAU,GAAwB,UAAU,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;QAC7D,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,GAAG,CAAC,CAAC;QAC7D,MAAM,EAAE,GAAG,oBAAoB,CAAC,UAAU,CAAC,CAAC;QAC5C,MAAM,OAAO,GAAG,mBAAmB,CAAC,EAAE,EAAE,UAAU,CAAC,MAAM,CAAC,CAAC;QAC3D,MAAM,MAAM,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,iBAAiB,IAAI,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACpG,MAAM,GAAG,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,iBAAiB,IAAI,CAAC,CAAC,eAAe,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACjG,OAAO,EAAE,QAAQ,EAAE,GAAG,EAAE,OAAO,EAAE,aAAa,EAAE,MAAM,EAAE,cAAc,EAAE,GAAG,EAAE,CAAC;IAChF,CAAC,CAAC,CAAC;IAEH,gBAAgB;IAChB,MAAM,YAAY,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;IACpE,MAAM,YAAY,GAA0B,YAAY,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACpE,MAAM,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,KAAK,IAAI,CAAC,CAAC;QACjE,MAAM,EAAE,GAAG,oBAAoB,CAAC,WAAW,CAAC,CAAC;QAC7C,MAAM,OAAO,GAAG,mBAAmB,CAAC,EAAE,EAAE,WAAW,CAAC,MAAM,CAAC,CAAC;QAC5D,OAAO,EAAE,UAAU,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC;IACvC,CAAC,CAAC,CAAC;IAEH,MAAM,aAAa,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,iBAAiB,IAAI,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC;IACvF,MAAM,cAAc,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,iBAAiB,IAAI,CAAC,CAAC,eAAe,CAAC,CAAC;IAExF,OAAO;QACL,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,UAAU,EAAE,OAAO,CAAC,MAAM;QAC1B,OAAO;QACP,OAAO;QACP,UAAU;QACV,YAAY;QACZ,aAAa;QACb,cAAc;KACf,CAAC;AACJ,CAAC;AAkBD,MAAM,kBAAkB,GAAuB;IAC7C,SAAS,EAAE,IAAI;IACf,SAAS,EAAE,IAAI;IACf,KAAK,EAAE,IAAI;IACX,eAAe,EAAE,EAAE;CACpB,CAAC;AAEF,MAAM,UAAU,eAAe,CAC7B,MAAkB,EAClB,aAAiC,kBAAkB;IAEnD,MAAM,UAAU,GAAa,EAAE,CAAC;IAEhC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,GAAG,UAAU,CAAC,SAAS,EAAE,CAAC;QACjD,UAAU,CAAC,IAAI,CACb,UAAU,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC,UAAU,CAAC,SAAS,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAC5G,CAAC;IACJ,CAAC;IAED,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,GAAG,UAAU,CAAC,SAAS,EAAE,CAAC;QACjD,UAAU,CAAC,IAAI,CACb,WAAW,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC,UAAU,CAAC,SAAS,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAC7G,CAAC;IACJ,CAAC;IAED,IAAI,MAAM,CAAC,OAAO,CAAC,EAAE,GAAG,UAAU,CAAC,KAAK,EAAE,CAAC;QACzC,UAAU,CAAC,IAAI,CACb,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC,UAAU,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAChG,CAAC;IACJ,CAAC;IAED,IAAI,MAAM,CAAC,OAAO,CAAC,GAAG,GAAG,UAAU,CAAC,eAAe,EAAE,CAAC;QACpD,UAAU,CAAC,IAAI,CACb,eAAe,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,gBAAgB,UAAU,CAAC,eAAe,IAAI,CAC3F,CAAC;IACJ,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,UAAU,EAAE,CAAC;AACzD,CAAC"}
@@ -0,0 +1,34 @@
1
+ /**
2
+ * PINT Benchmark Corpus Loader
3
+ *
4
+ * Reads the PINT-format dataset (JSON with text/category/label/source/language)
5
+ * built from publicly available prompt injection datasets:
6
+ * - deepset/prompt-injections (HuggingFace)
7
+ * - Lakera/gandalf_ignore_instructions (HuggingFace)
8
+ *
9
+ * Converts each sample into the CorpusSample interface used by the ATR eval
10
+ * harness, allowing the PINT corpus to be evaluated alongside or instead of
11
+ * the built-in hand-crafted corpus.
12
+ *
13
+ * @module agent-threat-rules/eval/pint-corpus
14
+ */
15
+ import type { CorpusSample } from './corpus.js';
16
+ /**
17
+ * Load the PINT benchmark corpus from a JSON file on disk.
18
+ *
19
+ * @param dataPath - Absolute path to pint-corpus.json
20
+ * @returns Readonly array of CorpusSample for use with runEval()
21
+ */
22
+ export declare function loadPintCorpus(dataPath: string): readonly CorpusSample[];
23
+ /**
24
+ * Get summary statistics for the loaded PINT corpus.
25
+ */
26
+ export declare function getPintCorpusStats(corpus: readonly CorpusSample[]): {
27
+ readonly total: number;
28
+ readonly attacks: number;
29
+ readonly benign: number;
30
+ readonly byCategory: Readonly<Record<string, number>>;
31
+ readonly byDifficulty: Readonly<Record<string, number>>;
32
+ readonly byLanguage: Readonly<Record<string, number>>;
33
+ };
34
+ //# sourceMappingURL=pint-corpus.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"pint-corpus.d.ts","sourceRoot":"","sources":["../../src/eval/pint-corpus.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAGH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAsDhD;;;;;GAKG;AACH,wBAAgB,cAAc,CAAC,QAAQ,EAAE,MAAM,GAAG,SAAS,YAAY,EAAE,CAyCxE;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,SAAS,YAAY,EAAE,GAAG;IACnE,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,UAAU,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IACtD,QAAQ,CAAC,YAAY,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IACxD,QAAQ,CAAC,UAAU,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;CACvD,CAoBA"}