veto-sdk 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +209 -0
- package/dist/benchmark/cli.d.ts +22 -0
- package/dist/benchmark/cli.d.ts.map +1 -0
- package/dist/benchmark/cli.js +238 -0
- package/dist/benchmark/cli.js.map +1 -0
- package/dist/benchmark/index.d.ts +10 -0
- package/dist/benchmark/index.d.ts.map +1 -0
- package/dist/benchmark/index.js +10 -0
- package/dist/benchmark/index.js.map +1 -0
- package/dist/benchmark/loader.d.ts +19 -0
- package/dist/benchmark/loader.d.ts.map +1 -0
- package/dist/benchmark/loader.js +321 -0
- package/dist/benchmark/loader.js.map +1 -0
- package/dist/benchmark/metrics.d.ts +35 -0
- package/dist/benchmark/metrics.d.ts.map +1 -0
- package/dist/benchmark/metrics.js +195 -0
- package/dist/benchmark/metrics.js.map +1 -0
- package/dist/benchmark/runner.d.ts +39 -0
- package/dist/benchmark/runner.d.ts.map +1 -0
- package/dist/benchmark/runner.js +279 -0
- package/dist/benchmark/runner.js.map +1 -0
- package/dist/benchmark/types.d.ts +188 -0
- package/dist/benchmark/types.d.ts.map +1 -0
- package/dist/benchmark/types.js +24 -0
- package/dist/benchmark/types.js.map +1 -0
- package/dist/cli/bin.d.ts +8 -0
- package/dist/cli/bin.d.ts.map +1 -0
- package/dist/cli/bin.js +120 -0
- package/dist/cli/bin.js.map +1 -0
- package/dist/cli/config.d.ts +126 -0
- package/dist/cli/config.d.ts.map +1 -0
- package/dist/cli/config.js +137 -0
- package/dist/cli/config.js.map +1 -0
- package/dist/cli/index.d.ts +9 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +9 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/cli/init.d.ts +64 -0
- package/dist/cli/init.d.ts.map +1 -0
- package/dist/cli/init.js +160 -0
- package/dist/cli/init.js.map +1 -0
- package/dist/cli/templates.d.ts +22 -0
- package/dist/cli/templates.d.ts.map +1 -0
- package/dist/cli/templates.js +132 -0
- package/dist/cli/templates.js.map +1 -0
- package/dist/core/history.d.ts +104 -0
- package/dist/core/history.d.ts.map +1 -0
- package/dist/core/history.js +148 -0
- package/dist/core/history.js.map +1 -0
- package/dist/core/index.d.ts +10 -0
- package/dist/core/index.d.ts.map +1 -0
- package/dist/core/index.js +10 -0
- package/dist/core/index.js.map +1 -0
- package/dist/core/interceptor.d.ts +96 -0
- package/dist/core/interceptor.d.ts.map +1 -0
- package/dist/core/interceptor.js +227 -0
- package/dist/core/interceptor.js.map +1 -0
- package/dist/core/validator.d.ts +107 -0
- package/dist/core/validator.d.ts.map +1 -0
- package/dist/core/validator.js +263 -0
- package/dist/core/validator.js.map +1 -0
- package/dist/core/veto.d.ts +265 -0
- package/dist/core/veto.d.ts.map +1 -0
- package/dist/core/veto.js +681 -0
- package/dist/core/veto.js.map +1 -0
- package/dist/index.d.ts +43 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +40 -0
- package/dist/index.js.map +1 -0
- package/dist/kernel/client.d.ts +82 -0
- package/dist/kernel/client.d.ts.map +1 -0
- package/dist/kernel/client.js +162 -0
- package/dist/kernel/client.js.map +1 -0
- package/dist/kernel/index.d.ts +9 -0
- package/dist/kernel/index.d.ts.map +1 -0
- package/dist/kernel/index.js +9 -0
- package/dist/kernel/index.js.map +1 -0
- package/dist/kernel/prompt.d.ts +27 -0
- package/dist/kernel/prompt.d.ts.map +1 -0
- package/dist/kernel/prompt.js +127 -0
- package/dist/kernel/prompt.js.map +1 -0
- package/dist/kernel/types.d.ts +85 -0
- package/dist/kernel/types.d.ts.map +1 -0
- package/dist/kernel/types.js +52 -0
- package/dist/kernel/types.js.map +1 -0
- package/dist/providers/adapters.d.ts +167 -0
- package/dist/providers/adapters.d.ts.map +1 -0
- package/dist/providers/adapters.js +244 -0
- package/dist/providers/adapters.js.map +1 -0
- package/dist/providers/index.d.ts +11 -0
- package/dist/providers/index.d.ts.map +1 -0
- package/dist/providers/index.js +11 -0
- package/dist/providers/index.js.map +1 -0
- package/dist/providers/types.d.ts +92 -0
- package/dist/providers/types.d.ts.map +1 -0
- package/dist/providers/types.js +10 -0
- package/dist/providers/types.js.map +1 -0
- package/dist/rules/api-client.d.ts +103 -0
- package/dist/rules/api-client.d.ts.map +1 -0
- package/dist/rules/api-client.js +241 -0
- package/dist/rules/api-client.js.map +1 -0
- package/dist/rules/index.d.ts +10 -0
- package/dist/rules/index.d.ts.map +1 -0
- package/dist/rules/index.js +10 -0
- package/dist/rules/index.js.map +1 -0
- package/dist/rules/loader.d.ts +116 -0
- package/dist/rules/loader.d.ts.map +1 -0
- package/dist/rules/loader.js +300 -0
- package/dist/rules/loader.js.map +1 -0
- package/dist/rules/rule-validator.d.ts +135 -0
- package/dist/rules/rule-validator.d.ts.map +1 -0
- package/dist/rules/rule-validator.js +239 -0
- package/dist/rules/rule-validator.js.map +1 -0
- package/dist/rules/types.d.ts +162 -0
- package/dist/rules/types.d.ts.map +1 -0
- package/dist/rules/types.js +16 -0
- package/dist/rules/types.js.map +1 -0
- package/dist/types/config.d.ts +171 -0
- package/dist/types/config.d.ts.map +1 -0
- package/dist/types/config.js +31 -0
- package/dist/types/config.js.map +1 -0
- package/dist/types/index.d.ts +8 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +8 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/tool.d.ts +156 -0
- package/dist/types/tool.d.ts.map +1 -0
- package/dist/types/tool.js +27 -0
- package/dist/types/tool.js.map +1 -0
- package/dist/utils/glob.d.ts +21 -0
- package/dist/utils/glob.d.ts.map +1 -0
- package/dist/utils/glob.js +147 -0
- package/dist/utils/glob.js.map +1 -0
- package/dist/utils/id.d.ts +28 -0
- package/dist/utils/id.d.ts.map +1 -0
- package/dist/utils/id.js +43 -0
- package/dist/utils/id.js.map +1 -0
- package/dist/utils/index.d.ts +9 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +9 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/logger.d.ts +97 -0
- package/dist/utils/logger.d.ts.map +1 -0
- package/dist/utils/logger.js +153 -0
- package/dist/utils/logger.js.map +1 -0
- package/package.json +90 -0
|
@@ -0,0 +1,321 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Dataset loader for benchmark samples.
|
|
3
|
+
*
|
|
4
|
+
* Parses JSONL training data files and extracts benchmark samples
|
|
5
|
+
* with tool calls, rules, and expected decisions.
|
|
6
|
+
*
|
|
7
|
+
* @module benchmark/loader
|
|
8
|
+
*/
|
|
9
|
+
import { readFileSync, existsSync } from 'node:fs';
|
|
10
|
+
import { basename, dirname } from 'node:path';
|
|
11
|
+
import { glob } from '../utils/glob.js';
|
|
12
|
+
/**
|
|
13
|
+
* Load benchmark samples from a glob pattern.
|
|
14
|
+
*
|
|
15
|
+
* @param pattern - Glob pattern for JSONL files
|
|
16
|
+
* @param maxSamples - Maximum samples to load (0 = all)
|
|
17
|
+
* @param shuffle - Whether to shuffle samples
|
|
18
|
+
* @param seed - Random seed for shuffling
|
|
19
|
+
*/
|
|
20
|
+
export async function loadBenchmarkSamples(pattern, maxSamples = 0, shuffle = false, seed) {
|
|
21
|
+
const files = await glob(pattern);
|
|
22
|
+
if (files.length === 0) {
|
|
23
|
+
throw new Error(`No files found matching pattern: ${pattern}`);
|
|
24
|
+
}
|
|
25
|
+
const samples = [];
|
|
26
|
+
let sampleId = 0;
|
|
27
|
+
for (const file of files) {
|
|
28
|
+
if (!existsSync(file))
|
|
29
|
+
continue;
|
|
30
|
+
const category = extractCategory(file);
|
|
31
|
+
const content = readFileSync(file, 'utf-8');
|
|
32
|
+
const lines = content.trim().split('\n').filter(line => line.trim());
|
|
33
|
+
for (const line of lines) {
|
|
34
|
+
try {
|
|
35
|
+
const example = JSON.parse(line);
|
|
36
|
+
const sample = parseTrainingExample(example, sampleId++, file, category);
|
|
37
|
+
if (sample) {
|
|
38
|
+
samples.push(sample);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
catch (error) {
|
|
42
|
+
// Skip malformed lines
|
|
43
|
+
continue;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
if (samples.length === 0) {
|
|
48
|
+
throw new Error('No valid samples found in dataset');
|
|
49
|
+
}
|
|
50
|
+
// Shuffle if requested
|
|
51
|
+
let result = shuffle ? shuffleArray(samples, seed) : samples;
|
|
52
|
+
// Limit samples if requested
|
|
53
|
+
if (maxSamples > 0 && result.length > maxSamples) {
|
|
54
|
+
result = result.slice(0, maxSamples);
|
|
55
|
+
}
|
|
56
|
+
return result;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Parse a training example into a benchmark sample.
|
|
60
|
+
*/
|
|
61
|
+
function parseTrainingExample(example, id, sourceFile, category) {
|
|
62
|
+
if (!example.messages || example.messages.length < 3) {
|
|
63
|
+
return null;
|
|
64
|
+
}
|
|
65
|
+
const userMessage = example.messages.find(m => m.role === 'user');
|
|
66
|
+
const assistantMessage = example.messages.find(m => m.role === 'assistant');
|
|
67
|
+
if (!userMessage || !assistantMessage) {
|
|
68
|
+
return null;
|
|
69
|
+
}
|
|
70
|
+
// Parse user content to extract tool call and rules
|
|
71
|
+
const { tool, arguments: args, rules } = parseUserContent(userMessage.content);
|
|
72
|
+
if (!tool || !rules) {
|
|
73
|
+
return null;
|
|
74
|
+
}
|
|
75
|
+
// Parse expected response
|
|
76
|
+
const expected = parseExpectedResponse(assistantMessage.content);
|
|
77
|
+
if (!expected) {
|
|
78
|
+
return null;
|
|
79
|
+
}
|
|
80
|
+
return {
|
|
81
|
+
id: `sample-${id}`,
|
|
82
|
+
tool,
|
|
83
|
+
arguments: args,
|
|
84
|
+
rules,
|
|
85
|
+
expectedDecision: expected.decision,
|
|
86
|
+
expectedPassWeight: expected.pass_weight,
|
|
87
|
+
expectedBlockWeight: expected.block_weight,
|
|
88
|
+
sourceFile,
|
|
89
|
+
category,
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Parse user content to extract tool call and rules.
|
|
94
|
+
*/
|
|
95
|
+
function parseUserContent(content) {
|
|
96
|
+
const result = {
|
|
97
|
+
tool: null,
|
|
98
|
+
arguments: {},
|
|
99
|
+
rules: null,
|
|
100
|
+
};
|
|
101
|
+
// Split into TOOL CALL and RULES sections
|
|
102
|
+
const toolCallMatch = content.match(/TOOL CALL:\s*\n([\s\S]*?)(?=\nRULES:|$)/);
|
|
103
|
+
const rulesMatch = content.match(/RULES:\s*\n([\s\S]*?)$/);
|
|
104
|
+
if (toolCallMatch) {
|
|
105
|
+
const toolSection = toolCallMatch[1];
|
|
106
|
+
// Extract tool name
|
|
107
|
+
const toolNameMatch = toolSection.match(/tool:\s*(\S+)/);
|
|
108
|
+
if (toolNameMatch) {
|
|
109
|
+
result.tool = toolNameMatch[1];
|
|
110
|
+
}
|
|
111
|
+
// Extract arguments (YAML-like format)
|
|
112
|
+
const argsMatch = toolSection.match(/arguments:\s*\n([\s\S]*)/);
|
|
113
|
+
if (argsMatch) {
|
|
114
|
+
result.arguments = parseYamlLikeArgs(argsMatch[1]);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
if (rulesMatch) {
|
|
118
|
+
result.rules = parseRulesSection(rulesMatch[1]);
|
|
119
|
+
}
|
|
120
|
+
return result;
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* Parse YAML-like arguments.
|
|
124
|
+
*/
|
|
125
|
+
function parseYamlLikeArgs(content) {
|
|
126
|
+
const args = {};
|
|
127
|
+
const lines = content.split('\n');
|
|
128
|
+
for (const line of lines) {
|
|
129
|
+
const match = line.match(/^\s{2}(\w+):\s*(.*)$/);
|
|
130
|
+
if (match) {
|
|
131
|
+
const [, key, rawValue] = match;
|
|
132
|
+
args[key] = parseValue(rawValue.trim());
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
return args;
|
|
136
|
+
}
|
|
137
|
+
/**
|
|
138
|
+
* Parse a YAML-like value.
|
|
139
|
+
*/
|
|
140
|
+
function parseValue(value) {
|
|
141
|
+
// Empty string
|
|
142
|
+
if (value === '""' || value === "''")
|
|
143
|
+
return '';
|
|
144
|
+
// Quoted string
|
|
145
|
+
if ((value.startsWith('"') && value.endsWith('"')) ||
|
|
146
|
+
(value.startsWith("'") && value.endsWith("'"))) {
|
|
147
|
+
return value.slice(1, -1);
|
|
148
|
+
}
|
|
149
|
+
// Boolean
|
|
150
|
+
if (value === 'true')
|
|
151
|
+
return true;
|
|
152
|
+
if (value === 'false')
|
|
153
|
+
return false;
|
|
154
|
+
// Null
|
|
155
|
+
if (value === 'null' || value === '~')
|
|
156
|
+
return null;
|
|
157
|
+
// Number
|
|
158
|
+
const num = Number(value);
|
|
159
|
+
if (!isNaN(num) && value !== '')
|
|
160
|
+
return num;
|
|
161
|
+
// Array (simple)
|
|
162
|
+
if (value.startsWith('[') && value.endsWith(']')) {
|
|
163
|
+
const inner = value.slice(1, -1);
|
|
164
|
+
if (inner === '')
|
|
165
|
+
return [];
|
|
166
|
+
return inner.split(',').map(v => parseValue(v.trim()));
|
|
167
|
+
}
|
|
168
|
+
// Default to string
|
|
169
|
+
return value;
|
|
170
|
+
}
|
|
171
|
+
/**
|
|
172
|
+
* Parse RULES section into Rule objects.
|
|
173
|
+
*/
|
|
174
|
+
function parseRulesSection(content) {
|
|
175
|
+
const rules = [];
|
|
176
|
+
const ruleBlocks = content.split(/^- id:/m).filter(Boolean);
|
|
177
|
+
for (const block of ruleBlocks) {
|
|
178
|
+
const rule = parseRuleBlock('- id:' + block);
|
|
179
|
+
if (rule) {
|
|
180
|
+
rules.push(rule);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
return rules;
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* Parse a single rule block.
|
|
187
|
+
*/
|
|
188
|
+
function parseRuleBlock(block) {
|
|
189
|
+
const lines = block.split('\n');
|
|
190
|
+
const rule = {};
|
|
191
|
+
let currentSection = null;
|
|
192
|
+
let conditions = [];
|
|
193
|
+
let currentCondition = {};
|
|
194
|
+
for (const line of lines) {
|
|
195
|
+
// Rule ID
|
|
196
|
+
const idMatch = line.match(/^- id:\s*(.+)$/);
|
|
197
|
+
if (idMatch) {
|
|
198
|
+
rule.id = idMatch[1].trim();
|
|
199
|
+
continue;
|
|
200
|
+
}
|
|
201
|
+
// Simple fields
|
|
202
|
+
const fieldMatch = line.match(/^\s{2}(\w+):\s*(.+)$/);
|
|
203
|
+
if (fieldMatch) {
|
|
204
|
+
const [, key, value] = fieldMatch;
|
|
205
|
+
switch (key) {
|
|
206
|
+
case 'name':
|
|
207
|
+
rule.name = value;
|
|
208
|
+
break;
|
|
209
|
+
case 'enabled':
|
|
210
|
+
rule.enabled = value === 'true';
|
|
211
|
+
break;
|
|
212
|
+
case 'severity':
|
|
213
|
+
rule.severity = value;
|
|
214
|
+
break;
|
|
215
|
+
case 'action':
|
|
216
|
+
rule.action = value;
|
|
217
|
+
break;
|
|
218
|
+
case 'tools':
|
|
219
|
+
// Parse inline array: [tool1, tool2]
|
|
220
|
+
if (value.startsWith('[')) {
|
|
221
|
+
rule.tools = value.slice(1, -1).split(',').map(t => t.trim());
|
|
222
|
+
}
|
|
223
|
+
currentSection = 'tools';
|
|
224
|
+
break;
|
|
225
|
+
case 'conditions':
|
|
226
|
+
currentSection = 'conditions';
|
|
227
|
+
conditions = [];
|
|
228
|
+
break;
|
|
229
|
+
case 'condition_groups':
|
|
230
|
+
currentSection = 'condition_groups';
|
|
231
|
+
break;
|
|
232
|
+
}
|
|
233
|
+
continue;
|
|
234
|
+
}
|
|
235
|
+
// Condition fields
|
|
236
|
+
if (currentSection === 'conditions') {
|
|
237
|
+
const condFieldMatch = line.match(/^\s{4,6}- field:\s*(.+)$/);
|
|
238
|
+
if (condFieldMatch) {
|
|
239
|
+
if (Object.keys(currentCondition).length > 0) {
|
|
240
|
+
conditions.push(currentCondition);
|
|
241
|
+
}
|
|
242
|
+
currentCondition = { field: condFieldMatch[1].trim() };
|
|
243
|
+
continue;
|
|
244
|
+
}
|
|
245
|
+
const operatorMatch = line.match(/^\s{6,8}operator:\s*(.+)$/);
|
|
246
|
+
if (operatorMatch) {
|
|
247
|
+
currentCondition.operator = operatorMatch[1].trim();
|
|
248
|
+
continue;
|
|
249
|
+
}
|
|
250
|
+
const valueMatch = line.match(/^\s{6,8}value:\s*(.+)$/);
|
|
251
|
+
if (valueMatch) {
|
|
252
|
+
currentCondition.value = parseValue(valueMatch[1].trim());
|
|
253
|
+
continue;
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
// Add last condition
|
|
258
|
+
if (Object.keys(currentCondition).length > 0) {
|
|
259
|
+
conditions.push(currentCondition);
|
|
260
|
+
}
|
|
261
|
+
if (conditions.length > 0) {
|
|
262
|
+
rule.conditions = conditions;
|
|
263
|
+
}
|
|
264
|
+
// Validate required fields
|
|
265
|
+
if (!rule.id || !rule.name || rule.enabled === undefined || !rule.severity || !rule.action) {
|
|
266
|
+
return null;
|
|
267
|
+
}
|
|
268
|
+
return rule;
|
|
269
|
+
}
|
|
270
|
+
/**
|
|
271
|
+
* Parse expected response from assistant content.
|
|
272
|
+
*/
|
|
273
|
+
function parseExpectedResponse(content) {
|
|
274
|
+
try {
|
|
275
|
+
// Extract JSON from content (might have extra text)
|
|
276
|
+
const jsonMatch = content.match(/\{[\s\S]*\}/);
|
|
277
|
+
if (!jsonMatch)
|
|
278
|
+
return null;
|
|
279
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
280
|
+
if (typeof parsed.pass_weight !== 'number' ||
|
|
281
|
+
typeof parsed.block_weight !== 'number' ||
|
|
282
|
+
(parsed.decision !== 'pass' && parsed.decision !== 'block')) {
|
|
283
|
+
return null;
|
|
284
|
+
}
|
|
285
|
+
return parsed;
|
|
286
|
+
}
|
|
287
|
+
catch {
|
|
288
|
+
return null;
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
/**
|
|
292
|
+
* Extract category from file path.
|
|
293
|
+
*/
|
|
294
|
+
function extractCategory(filePath) {
|
|
295
|
+
const dir = basename(dirname(filePath));
|
|
296
|
+
const file = basename(filePath, '.jsonl');
|
|
297
|
+
return `${dir}/${file}`;
|
|
298
|
+
}
|
|
299
|
+
/**
|
|
300
|
+
* Shuffle array using Fisher-Yates algorithm with optional seed.
|
|
301
|
+
*/
|
|
302
|
+
function shuffleArray(array, seed) {
|
|
303
|
+
const result = [...array];
|
|
304
|
+
let random = seed !== undefined ? seededRandom(seed) : Math.random;
|
|
305
|
+
for (let i = result.length - 1; i > 0; i--) {
|
|
306
|
+
const j = Math.floor(random() * (i + 1));
|
|
307
|
+
[result[i], result[j]] = [result[j], result[i]];
|
|
308
|
+
}
|
|
309
|
+
return result;
|
|
310
|
+
}
|
|
311
|
+
/**
|
|
312
|
+
* Create a seeded random number generator.
|
|
313
|
+
*/
|
|
314
|
+
function seededRandom(seed) {
|
|
315
|
+
let state = seed;
|
|
316
|
+
return () => {
|
|
317
|
+
state = (state * 1103515245 + 12345) & 0x7fffffff;
|
|
318
|
+
return state / 0x7fffffff;
|
|
319
|
+
};
|
|
320
|
+
}
|
|
321
|
+
//# sourceMappingURL=loader.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"loader.js","sourceRoot":"","sources":["../../src/benchmark/loader.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACnD,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAC9C,OAAO,EAAE,IAAI,EAAE,MAAM,kBAAkB,CAAC;AA8BxC;;;;;;;GAOG;AACH,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,OAAe,EACf,aAAqB,CAAC,EACtB,UAAmB,KAAK,EACxB,IAAa;IAEb,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,CAAC;IAElC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,MAAM,IAAI,KAAK,CAAC,oCAAoC,OAAO,EAAE,CAAC,CAAC;IACjE,CAAC;IAED,MAAM,OAAO,GAAsB,EAAE,CAAC;IACtC,IAAI,QAAQ,GAAG,CAAC,CAAC;IAEjB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC;YAAE,SAAS;QAEhC,MAAM,QAAQ,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;QACvC,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QAC5C,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;QAErE,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,IAAI,CAAC;gBACH,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAoB,CAAC;gBACpD,MAAM,MAAM,GAAG,oBAAoB,CAAC,OAAO,EAAE,QAAQ,EAAE,EAAE,IAAI,EAAE,QAAQ,CAAC,CAAC;gBAEzE,IAAI,MAAM,EAAE,CAAC;oBACX,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;gBACvB,CAAC;YACH,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,uBAAuB;gBACvB,SAAS;YACX,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;IACvD,CAAC;IAED,uBAAuB;IACvB,IAAI,MAAM,GAAG,OAAO,CAAC,CAAC,CAAC,YAAY,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;IAE7D,6BAA6B;IAC7B,IAAI,UAAU,GAAG,CAAC,IAAI,MAAM,CAAC,MAAM,GAAG,UAAU,EAAE,CAAC;QACjD,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;IACvC,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAS,oBAAoB,CAC3B,OAAwB,EACxB,EAAU,EACV,UAAkB,EAClB,QAAgB;IAEhB,IAAI,CAAC,OAAO,CAAC,QAAQ,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,MAAM,WAAW,GAAG,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC;IAClE,MAAM,gBAAgB,GAAG,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,WAAW,CAAC,CAAC;IAE5E,IAAI,CAAC,WAAW,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACtC,OAAO,IAAI,CAAC;IACd,CAAC;IAED,oDAAoD;IACpD,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,gBAAgB,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;IAE/E,IAAI,CAAC,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;QACpB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,0BAA0B;IAC1B,MAAM,QAAQ,GAAG,qBAAqB,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC;IAEjE,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO;QACL,EAAE,EAAE,UAAU,EAAE,EAAE;QAClB,IAAI;QACJ,SAAS,EAAE,IAAI;QACf,KAAK;QACL,gBAAgB,EAAE,QAAQ,CAAC,QAAQ;QACnC,kBAAkB,EAAE,QAAQ,CAAC,WAAW;QACxC,mBAAmB,EAAE,QAAQ,CAAC,YAAY;QAC1C,UAAU;QACV,QAAQ;KACT,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CAAC,OAAe;IAKvC,MAAM,MAAM,GAIR;QACF,IAAI,EAAE,IAAI;QACV,SAAS,EAAE,EAAE;QACb,KAAK,EAAE,IAAI;KACZ,CAAC;IAEF,0CAA0C;IAC1C,MAAM,aAAa,GAAG,OAAO,CAAC,KAAK,CAAC,yCAAyC,CAAC,CAAC;IAC/E,MAAM,UAAU,GAAG,OAAO,CAAC,KAAK,CAAC,wBAAwB,CAAC,CAAC;IAE3D,IAAI,aAAa,EAAE,CAAC;QAClB,MAAM,WAAW,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;QAErC,oBAAoB;QACpB,MAAM,aAAa,GAAG,WAAW,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC;QACzD,IAAI,aAAa,EAAE,CAAC;YAClB,MAAM,CAAC,IAAI,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;QACjC,CAAC;QAED,uCAAuC;QACvC,MAAM,SAAS,GAAG,WAAW,CAAC,KAAK,CAAC,0BAA0B,CAAC,CAAC;QAChE,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,CAAC,SAAS,GAAG,iBAAiB,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;QACrD,CAAC;IACH,CAAC;IAED,IAAI,UAAU,EAAE,CAAC;QACf,MAAM,CAAC,KAAK,GAAG,iBAAiB,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;IAClD,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAS,iBAAiB,CAAC,OAAe;IACxC,MAAM,IAAI,GAA4B,EAAE,CAAC;IACzC,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,sBAAsB,CAAC,CAAC;QACjD,IAAI,KAAK,EAAE,CAAC;YACV,MAAM,CAAC,EAAE,GAAG,EAAE,QAAQ,CAAC,GAAG,KAAK,CAAC;YAChC,IAAI,CAAC,GAAG,CAAC,GAAG,UAAU,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC;QAC1C,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,SAAS,UAAU,CAAC,KAAa;IAC/B,eAAe;IACf,IAAI,KAAK,KAAK,IAAI,IAAI,KAAK,KAAK,IAAI;QAAE,OAAO,EAAE,CAAC;IAEhD,gBAAgB;IAChB,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;QAC9C,CAAC,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;QACnD,OAAO,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IAC5B,CAAC;IAED,UAAU;IACV,IAAI,KAAK,KAAK,MAAM;QAAE,OAAO,IAAI,CAAC;IAClC,IAAI,KAAK,KAAK,OAAO;QAAE,OAAO,KAAK,CAAC;IAEpC,OAAO;IACP,IAAI,KAAK,KAAK,MAAM,IAAI,KAAK,KAAK,GAAG;QAAE,OAAO,IAAI,CAAC;IAEnD,SAAS;IACT,MAAM,GAAG,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC;IAC1B,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,KAAK,KAAK,EAAE;QAAE,OAAO,GAAG,CAAC;IAE5C,iBAAiB;IACjB,IAAI,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QACjD,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QACjC,IAAI,KAAK,KAAK,EAAE;YAAE,OAAO,EAAE,CAAC;QAC5B,OAAO,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;IACzD,CAAC;IAED,oBAAoB;IACpB,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;GAEG;AACH,SAAS,iBAAiB,CAAC,OAAe;IACxC,MAAM,KAAK,GAAW,EAAE,CAAC;IACzB,MAAM,UAAU,GAAG,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAE5D,KAAK,MAAM,KAAK,IAAI,UAAU,EAAE,CAAC;QAC/B,MAAM,IAAI,GAAG,cAAc,CAAC,OAAO,GAAG,KAAK,CAAC,CAAC;QAC7C,IAAI,IAAI,EAAE,CAAC;YACT,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACnB,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CAAC,KAAa;IACnC,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAChC,MAAM,IAAI,GAAkB,EAAE,CAAC;IAC/B,IAAI,cAAc,GAAuD,IAAI,CAAC;IAC9E,IAAI,UAAU,GAAoB,EAAE,CAAC;IACrC,IAAI,gBAAgB,GAA2B,EAAE,CAAC;IAElD,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,UAAU;QACV,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;QAC7C,IAAI,OAAO,EAAE,CAAC;YACZ,IAAI,CAAC,EAAE,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;YAC5B,SAAS;QACX,CAAC;QAED,gBAAgB;QAChB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,sBAAsB,CAAC,CAAC;QACtD,IAAI,UAAU,EAAE,CAAC;YACf,MAAM,CAAC,EAAE,GAAG,EAAE,KAAK,CAAC,GAAG,UAAU,CAAC;YAClC,QAAQ,GAAG,EAAE,CAAC;gBACZ,KAAK,MAAM;oBACT,IAAI,CAAC,IAAI,GAAG,KAAK,CAAC;oBAClB,MAAM;gBACR,KAAK,SAAS;oBACZ,IAAI,CAAC,OAAO,GAAG,KAAK,KAAK,MAAM,CAAC;oBAChC,MAAM;gBACR,KAAK,UAAU;oBACb,IAAI,CAAC,QAAQ,GAAG,KAAyB,CAAC;oBAC1C,MAAM;gBACR,KAAK,QAAQ;oBACX,IAAI,CAAC,MAAM,GAAG,KAAuB,CAAC;oBACtC,MAAM;gBACR,KAAK,OAAO;oBACV,qCAAqC;oBACrC,IAAI,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;wBAC1B,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;oBAChE,CAAC;oBACD,cAAc,GAAG,OAAO,CAAC;oBACzB,MAAM;gBACR,KAAK,YAAY;oBACf,cAAc,GAAG,YAAY,CAAC;oBAC9B,UAAU,GAAG,EAAE,CAAC;oBAChB,MAAM;gBACR,KAAK,kBAAkB;oBACrB,cAAc,GAAG,kBAAkB,CAAC;oBACpC,MAAM;YACV,CAAC;YACD,SAAS;QACX,CAAC;QAED,mBAAmB;QACnB,IAAI,cAAc,KAAK,YAAY,EAAE,CAAC;YACpC,MAAM,cAAc,GAAG,IAAI,CAAC,KAAK,CAAC,0BAA0B,CAAC,CAAC;YAC9D,IAAI,cAAc,EAAE,CAAC;gBACnB,IAAI,MAAM,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAC7C,UAAU,CAAC,IAAI,CAAC,gBAAiC,CAAC,CAAC;gBACrD,CAAC;gBACD,gBAAgB,GAAG,EAAE,KAAK,EAAE,cAAc,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC;gBACvD,SAAS;YACX,CAAC;YAED,MAAM,aAAa,GAAG,IAAI,CAAC,KAAK,CAAC,2BAA2B,CAAC,CAAC;YAC9D,IAAI,aAAa,EAAE,CAAC;gBAClB,gBAAgB,CAAC,QAAQ,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC,IAAI,EAA+B,CAAC;gBACjF,SAAS;YACX,CAAC;YAED,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,wBAAwB,CAAC,CAAC;YACxD,IAAI,UAAU,EAAE,CAAC;gBACf,gBAAgB,CAAC,KAAK,GAAG,UAAU,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;gBAC1D,SAAS;YACX,CAAC;QACH,CAAC;IACH,CAAC;IAED,qBAAqB;IACrB,IAAI,MAAM,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC7C,UAAU,CAAC,IAAI,CAAC,gBAAiC,CAAC,CAAC;IACrD,CAAC;IAED,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC1B,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;IAC/B,CAAC;IAED,2BAA2B;IAC3B,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,OAAO,KAAK,SAAS,IAAI,CAAC,IAAI,CAAC,QAAQ,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;QAC3F,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,IAAY,CAAC;AACtB,CAAC;AAED;;GAEG;AACH,SAAS,qBAAqB,CAAC,OAAe;IAC5C,IAAI,CAAC;QACH,oDAAoD;QACpD,MAAM,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;QAC/C,IAAI,CAAC,SAAS;YAAE,OAAO,IAAI,CAAC;QAE5B,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;QAExC,IAAI,OAAO,MAAM,CAAC,WAAW,KAAK,QAAQ;YACtC,OAAO,MAAM,CAAC,YAAY,KAAK,QAAQ;YACvC,CAAC,MAAM,CAAC,QAAQ,KAAK,MAAM,IAAI,MAAM,CAAC,QAAQ,KAAK,OAAO,CAAC,EAAE,CAAC;YAChE,OAAO,IAAI,CAAC;QACd,CAAC;QAED,OAAO,MAA0B,CAAC;IACpC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,eAAe,CAAC,QAAgB;IACvC,MAAM,GAAG,GAAG,QAAQ,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC;IACxC,MAAM,IAAI,GAAG,QAAQ,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;IAC1C,OAAO,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC;AAC1B,CAAC;AAED;;GAEG;AACH,SAAS,YAAY,CAAI,KAAU,EAAE,IAAa;IAChD,MAAM,MAAM,GAAG,CAAC,GAAG,KAAK,CAAC,CAAC;IAC1B,IAAI,MAAM,GAAG,IAAI,KAAK,SAAS,CAAC,CAAC,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC;IAEnE,KAAK,IAAI,CAAC,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3C,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QACzC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;IAClD,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAS,YAAY,CAAC,IAAY;IAChC,IAAI,KAAK,GAAG,IAAI,CAAC;IACjB,OAAO,GAAG,EAAE;QACV,KAAK,GAAG,CAAC,KAAK,GAAG,UAAU,GAAG,KAAK,CAAC,GAAG,UAAU,CAAC;QAClD,OAAO,KAAK,GAAG,UAAU,CAAC;IAC5B,CAAC,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Metrics calculation for benchmark results.
|
|
3
|
+
*
|
|
4
|
+
* @module benchmark/metrics
|
|
5
|
+
*/
|
|
6
|
+
import type { BenchmarkResult, ConfusionMatrix, LatencyStats, ClassificationMetrics, CategoryMetrics } from './types.js';
|
|
7
|
+
/**
|
|
8
|
+
* Calculate confusion matrix from benchmark results.
|
|
9
|
+
* Treats "block" as the positive class.
|
|
10
|
+
*/
|
|
11
|
+
export declare function calculateConfusionMatrix(results: BenchmarkResult[]): ConfusionMatrix;
|
|
12
|
+
/**
|
|
13
|
+
* Calculate classification metrics from confusion matrix.
|
|
14
|
+
*/
|
|
15
|
+
export declare function calculateClassificationMetrics(cm: ConfusionMatrix): ClassificationMetrics;
|
|
16
|
+
/**
|
|
17
|
+
* Calculate latency statistics from benchmark results.
|
|
18
|
+
*/
|
|
19
|
+
export declare function calculateLatencyStats(results: BenchmarkResult[]): LatencyStats;
|
|
20
|
+
/**
|
|
21
|
+
* Calculate per-category metrics.
|
|
22
|
+
*/
|
|
23
|
+
export declare function calculateCategoryMetrics(results: BenchmarkResult[]): CategoryMetrics[];
|
|
24
|
+
/**
|
|
25
|
+
* Calculate weight calibration metrics.
|
|
26
|
+
* Measures how well the model's confidence weights match decisions.
|
|
27
|
+
*/
|
|
28
|
+
export declare function calculateWeightCalibration(results: BenchmarkResult[]): {
|
|
29
|
+
meanPassWeightForPass: number;
|
|
30
|
+
meanPassWeightForBlock: number;
|
|
31
|
+
meanBlockWeightForPass: number;
|
|
32
|
+
meanBlockWeightForBlock: number;
|
|
33
|
+
weightAccuracy: number;
|
|
34
|
+
};
|
|
35
|
+
//# sourceMappingURL=metrics.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"metrics.d.ts","sourceRoot":"","sources":["../../src/benchmark/metrics.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EACV,eAAe,EACf,eAAe,EACf,YAAY,EACZ,qBAAqB,EACrB,eAAe,EAChB,MAAM,YAAY,CAAC;AAEpB;;;GAGG;AACH,wBAAgB,wBAAwB,CAAC,OAAO,EAAE,eAAe,EAAE,GAAG,eAAe,CA0BpF;AAED;;GAEG;AACH,wBAAgB,8BAA8B,CAAC,EAAE,EAAE,eAAe,GAAG,qBAAqB,CAsCzF;AAED;;GAEG;AACH,wBAAgB,qBAAqB,CAAC,OAAO,EAAE,eAAe,EAAE,GAAG,YAAY,CA0C9E;AAqBD;;GAEG;AACH,wBAAgB,wBAAwB,CAAC,OAAO,EAAE,eAAe,EAAE,GAAG,eAAe,EAAE,CA8BtF;AAED;;;GAGG;AACH,wBAAgB,0BAA0B,CAAC,OAAO,EAAE,eAAe,EAAE,GAAG;IACtE,qBAAqB,EAAE,MAAM,CAAC;IAC9B,sBAAsB,EAAE,MAAM,CAAC;IAC/B,sBAAsB,EAAE,MAAM,CAAC;IAC/B,uBAAuB,EAAE,MAAM,CAAC;IAChC,cAAc,EAAE,MAAM,CAAC;CACxB,CAsCA"}
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Metrics calculation for benchmark results.
|
|
3
|
+
*
|
|
4
|
+
* @module benchmark/metrics
|
|
5
|
+
*/
|
|
6
|
+
/**
|
|
7
|
+
* Calculate confusion matrix from benchmark results.
|
|
8
|
+
* Treats "block" as the positive class.
|
|
9
|
+
*/
|
|
10
|
+
export function calculateConfusionMatrix(results) {
|
|
11
|
+
let tp = 0, tn = 0, fp = 0, fn = 0;
|
|
12
|
+
for (const result of results) {
|
|
13
|
+
if (result.error)
|
|
14
|
+
continue;
|
|
15
|
+
const expected = result.sample.expectedDecision;
|
|
16
|
+
const actual = result.actualDecision;
|
|
17
|
+
if (expected === 'block' && actual === 'block') {
|
|
18
|
+
tp++;
|
|
19
|
+
}
|
|
20
|
+
else if (expected === 'pass' && actual === 'pass') {
|
|
21
|
+
tn++;
|
|
22
|
+
}
|
|
23
|
+
else if (expected === 'pass' && actual === 'block') {
|
|
24
|
+
fp++;
|
|
25
|
+
}
|
|
26
|
+
else if (expected === 'block' && actual === 'pass') {
|
|
27
|
+
fn++;
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
return {
|
|
31
|
+
truePositive: tp,
|
|
32
|
+
trueNegative: tn,
|
|
33
|
+
falsePositive: fp,
|
|
34
|
+
falseNegative: fn,
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Calculate classification metrics from confusion matrix.
|
|
39
|
+
*/
|
|
40
|
+
export function calculateClassificationMetrics(cm) {
|
|
41
|
+
const { truePositive: tp, trueNegative: tn, falsePositive: fp, falseNegative: fn } = cm;
|
|
42
|
+
const total = tp + tn + fp + fn;
|
|
43
|
+
// Accuracy
|
|
44
|
+
const accuracy = total > 0 ? (tp + tn) / total : 0;
|
|
45
|
+
// Precision (for block class)
|
|
46
|
+
const precision = (tp + fp) > 0 ? tp / (tp + fp) : 0;
|
|
47
|
+
// Recall (for block class)
|
|
48
|
+
const recall = (tp + fn) > 0 ? tp / (tp + fn) : 0;
|
|
49
|
+
// F1 Score
|
|
50
|
+
const f1Score = (precision + recall) > 0
|
|
51
|
+
? 2 * (precision * recall) / (precision + recall)
|
|
52
|
+
: 0;
|
|
53
|
+
// False Positive Rate
|
|
54
|
+
const falsePositiveRate = (fp + tn) > 0 ? fp / (fp + tn) : 0;
|
|
55
|
+
// False Negative Rate
|
|
56
|
+
const falseNegativeRate = (fn + tp) > 0 ? fn / (fn + tp) : 0;
|
|
57
|
+
// Matthews Correlation Coefficient
|
|
58
|
+
const mccNumerator = (tp * tn) - (fp * fn);
|
|
59
|
+
const mccDenominator = Math.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn));
|
|
60
|
+
const mcc = mccDenominator > 0 ? mccNumerator / mccDenominator : 0;
|
|
61
|
+
return {
|
|
62
|
+
accuracy,
|
|
63
|
+
precision,
|
|
64
|
+
recall,
|
|
65
|
+
f1Score,
|
|
66
|
+
falsePositiveRate,
|
|
67
|
+
falseNegativeRate,
|
|
68
|
+
mcc,
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* Calculate latency statistics from benchmark results.
|
|
73
|
+
*/
|
|
74
|
+
export function calculateLatencyStats(results) {
|
|
75
|
+
const latencies = results
|
|
76
|
+
.filter(r => !r.error)
|
|
77
|
+
.map(r => r.latencyMs)
|
|
78
|
+
.sort((a, b) => a - b);
|
|
79
|
+
if (latencies.length === 0) {
|
|
80
|
+
return {
|
|
81
|
+
min: 0,
|
|
82
|
+
max: 0,
|
|
83
|
+
mean: 0,
|
|
84
|
+
median: 0,
|
|
85
|
+
p95: 0,
|
|
86
|
+
p99: 0,
|
|
87
|
+
stdDev: 0,
|
|
88
|
+
totalMs: 0,
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
const min = latencies[0];
|
|
92
|
+
const max = latencies[latencies.length - 1];
|
|
93
|
+
const sum = latencies.reduce((a, b) => a + b, 0);
|
|
94
|
+
const mean = sum / latencies.length;
|
|
95
|
+
const median = percentile(latencies, 50);
|
|
96
|
+
const p95 = percentile(latencies, 95);
|
|
97
|
+
const p99 = percentile(latencies, 99);
|
|
98
|
+
// Standard deviation
|
|
99
|
+
const squaredDiffs = latencies.map(l => Math.pow(l - mean, 2));
|
|
100
|
+
const avgSquaredDiff = squaredDiffs.reduce((a, b) => a + b, 0) / latencies.length;
|
|
101
|
+
const stdDev = Math.sqrt(avgSquaredDiff);
|
|
102
|
+
return {
|
|
103
|
+
min,
|
|
104
|
+
max,
|
|
105
|
+
mean,
|
|
106
|
+
median,
|
|
107
|
+
p95,
|
|
108
|
+
p99,
|
|
109
|
+
stdDev,
|
|
110
|
+
totalMs: sum,
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Calculate percentile value from sorted array.
|
|
115
|
+
*/
|
|
116
|
+
function percentile(sortedArray, p) {
|
|
117
|
+
if (sortedArray.length === 0)
|
|
118
|
+
return 0;
|
|
119
|
+
if (sortedArray.length === 1)
|
|
120
|
+
return sortedArray[0];
|
|
121
|
+
const index = (p / 100) * (sortedArray.length - 1);
|
|
122
|
+
const lower = Math.floor(index);
|
|
123
|
+
const upper = Math.ceil(index);
|
|
124
|
+
const fraction = index - lower;
|
|
125
|
+
if (lower === upper) {
|
|
126
|
+
return sortedArray[lower];
|
|
127
|
+
}
|
|
128
|
+
return sortedArray[lower] * (1 - fraction) + sortedArray[upper] * fraction;
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Calculate per-category metrics.
|
|
132
|
+
*/
|
|
133
|
+
export function calculateCategoryMetrics(results) {
|
|
134
|
+
// Group results by category
|
|
135
|
+
const byCategory = new Map();
|
|
136
|
+
for (const result of results) {
|
|
137
|
+
const category = result.sample.category ?? 'unknown';
|
|
138
|
+
const existing = byCategory.get(category) ?? [];
|
|
139
|
+
existing.push(result);
|
|
140
|
+
byCategory.set(category, existing);
|
|
141
|
+
}
|
|
142
|
+
// Calculate metrics for each category
|
|
143
|
+
const categoryMetrics = [];
|
|
144
|
+
for (const [category, categoryResults] of byCategory) {
|
|
145
|
+
const confusionMatrix = calculateConfusionMatrix(categoryResults);
|
|
146
|
+
const metrics = calculateClassificationMetrics(confusionMatrix);
|
|
147
|
+
const latency = calculateLatencyStats(categoryResults);
|
|
148
|
+
categoryMetrics.push({
|
|
149
|
+
category,
|
|
150
|
+
sampleCount: categoryResults.length,
|
|
151
|
+
accuracy: metrics.accuracy,
|
|
152
|
+
confusionMatrix,
|
|
153
|
+
latency,
|
|
154
|
+
});
|
|
155
|
+
}
|
|
156
|
+
// Sort by category name
|
|
157
|
+
return categoryMetrics.sort((a, b) => a.category.localeCompare(b.category));
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Calculate weight calibration metrics.
|
|
161
|
+
* Measures how well the model's confidence weights match decisions.
|
|
162
|
+
*/
|
|
163
|
+
export function calculateWeightCalibration(results) {
|
|
164
|
+
const passDecisions = results.filter(r => !r.error && r.actualDecision === 'pass');
|
|
165
|
+
const blockDecisions = results.filter(r => !r.error && r.actualDecision === 'block');
|
|
166
|
+
const meanPassWeightForPass = passDecisions.length > 0
|
|
167
|
+
? passDecisions.reduce((sum, r) => sum + r.actualPassWeight, 0) / passDecisions.length
|
|
168
|
+
: 0;
|
|
169
|
+
const meanBlockWeightForPass = passDecisions.length > 0
|
|
170
|
+
? passDecisions.reduce((sum, r) => sum + r.actualBlockWeight, 0) / passDecisions.length
|
|
171
|
+
: 0;
|
|
172
|
+
const meanPassWeightForBlock = blockDecisions.length > 0
|
|
173
|
+
? blockDecisions.reduce((sum, r) => sum + r.actualPassWeight, 0) / blockDecisions.length
|
|
174
|
+
: 0;
|
|
175
|
+
const meanBlockWeightForBlock = blockDecisions.length > 0
|
|
176
|
+
? blockDecisions.reduce((sum, r) => sum + r.actualBlockWeight, 0) / blockDecisions.length
|
|
177
|
+
: 0;
|
|
178
|
+
// Weight accuracy: how often does max weight match decision
|
|
179
|
+
const validResults = results.filter(r => !r.error);
|
|
180
|
+
const weightMatchCount = validResults.filter(r => {
|
|
181
|
+
const maxWeight = r.actualPassWeight > r.actualBlockWeight ? 'pass' : 'block';
|
|
182
|
+
return maxWeight === r.actualDecision;
|
|
183
|
+
}).length;
|
|
184
|
+
const weightAccuracy = validResults.length > 0
|
|
185
|
+
? weightMatchCount / validResults.length
|
|
186
|
+
: 0;
|
|
187
|
+
return {
|
|
188
|
+
meanPassWeightForPass,
|
|
189
|
+
meanPassWeightForBlock,
|
|
190
|
+
meanBlockWeightForPass,
|
|
191
|
+
meanBlockWeightForBlock,
|
|
192
|
+
weightAccuracy,
|
|
193
|
+
};
|
|
194
|
+
}
|
|
195
|
+
//# sourceMappingURL=metrics.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"metrics.js","sourceRoot":"","sources":["../../src/benchmark/metrics.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAUH;;;GAGG;AACH,MAAM,UAAU,wBAAwB,CAAC,OAA0B;IACjE,IAAI,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC;IAEnC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,IAAI,MAAM,CAAC,KAAK;YAAE,SAAS;QAE3B,MAAM,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC,gBAAgB,CAAC;QAChD,MAAM,MAAM,GAAG,MAAM,CAAC,cAAc,CAAC;QAErC,IAAI,QAAQ,KAAK,OAAO,IAAI,MAAM,KAAK,OAAO,EAAE,CAAC;YAC/C,EAAE,EAAE,CAAC;QACP,CAAC;aAAM,IAAI,QAAQ,KAAK,MAAM,IAAI,MAAM,KAAK,MAAM,EAAE,CAAC;YACpD,EAAE,EAAE,CAAC;QACP,CAAC;aAAM,IAAI,QAAQ,KAAK,MAAM,IAAI,MAAM,KAAK,OAAO,EAAE,CAAC;YACrD,EAAE,EAAE,CAAC;QACP,CAAC;aAAM,IAAI,QAAQ,KAAK,OAAO,IAAI,MAAM,KAAK,MAAM,EAAE,CAAC;YACrD,EAAE,EAAE,CAAC;QACP,CAAC;IACH,CAAC;IAED,OAAO;QACL,YAAY,EAAE,EAAE;QAChB,YAAY,EAAE,EAAE;QAChB,aAAa,EAAE,EAAE;QACjB,aAAa,EAAE,EAAE;KAClB,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,8BAA8B,CAAC,EAAmB;IAChE,MAAM,EAAE,YAAY,EAAE,EAAE,EAAE,YAAY,EAAE,EAAE,EAAE,aAAa,EAAE,EAAE,EAAE,aAAa,EAAE,EAAE,EAAE,GAAG,EAAE,CAAC;IACxF,MAAM,KAAK,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC;IAEhC,WAAW;IACX,MAAM,QAAQ,GAAG,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IAEnD,8BAA8B;IAC9B,MAAM,SAAS,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAErD,2BAA2B;IAC3B,MAAM,MAAM,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAElD,WAAW;IACX,MAAM,OAAO,GAAG,CAAC,SAAS,GAAG,MAAM,CAAC,GAAG,CAAC;QACtC,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,GAAG,MAAM,CAAC,GAAG,CAAC,SAAS,GAAG,MAAM,CAAC;QACjD,CAAC,CAAC,CAAC,CAAC;IAEN,sBAAsB;IACtB,MAAM,iBAAiB,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAE7D,sBAAsB;IACtB,MAAM,iBAAiB,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAE7D,mCAAmC;IACnC,MAAM,YAAY,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC;IAC3C,MAAM,cAAc,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC;IAChF,MAAM,GAAG,GAAG,cAAc,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,GAAG,cAAc,CAAC,CAAC,CAAC,CAAC,CAAC;IAEnE,OAAO;QACL,QAAQ;QACR,SAAS;QACT,MAAM;QACN,OAAO;QACP,iBAAiB;QACjB,iBAAiB;QACjB,GAAG;KACJ,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,qBAAqB,CAAC,OAA0B;IAC9D,MAAM,SAAS,GAAG,OAAO;SACtB,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;SACrB,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;SACrB,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAEzB,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC3B,OAAO;YACL,GAAG,EAAE,CAAC;YACN,GAAG,EAAE,CAAC;YACN,IAAI,EAAE,CAAC;YACP,MAAM,EAAE,CAAC;YACT,GAAG,EAAE,CAAC;YACN,GAAG,EAAE,CAAC;YACN,MAAM,EAAE,CAAC;YACT,OAAO,EAAE,CAAC;SACX,CAAC;IACJ,CAAC;IAED,MAAM,GAAG,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;IACzB,MAAM,GAAG,GAAG,SAAS,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC5C,MAAM,GAAG,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;IACjD,MAAM,IAAI,GAAG,GAAG,GAAG,SAAS,CAAC,MAAM,CAAC;IACpC,MAAM,MAAM,GAAG,UAAU,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;IACzC,MAAM,GAAG,GAAG,UAAU,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;IACtC,MAAM,GAAG,GAAG,UAAU,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;IAEtC,qBAAqB;IACrB,MAAM,YAAY,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IAC/D,MAAM,cAAc,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,SAAS,CAAC,MAAM,CAAC;IAClF,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;IAEzC,OAAO;QACL,GAAG;QACH,GAAG;QACH,IAAI;QACJ,MAAM;QACN,GAAG;QACH,GAAG;QACH,MAAM;QACN,OAAO,EAAE,GAAG;KACb,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,UAAU,CAAC,WAAqB,EAAE,CAAS;IAClD,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IACvC,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,WAAW,CAAC,CAAC,CAAC,CAAC;IAEpD,MAAM,KAAK,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,GAAG,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACnD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IAChC,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC/B,MAAM,QAAQ,GAAG,KAAK,GAAG,KAAK,CAAC;IAE/B,IAAI,KAAK,KAAK,KAAK,EAAE,CAAC;QACpB,OAAO,WAAW,CAAC,KAAK,CAAC,CAAC;IAC5B,CAAC;IAED,OAAO,WAAW,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,QAAQ,CAAC,GAAG,WAAW,CAAC,KAAK,CAAC,GAAG,QAAQ,CAAC;AAC7E,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,wBAAwB,CAAC,OAA0B;IACjE,4BAA4B;IAC5B,MAAM,UAAU,GAAG,IAAI,GAAG,EAA6B,CAAC;IAExD,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC,QAAQ,IAAI,SAAS,CAAC;QACrD,MAAM,QAAQ,GAAG,UAAU,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;QAChD,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACtB,UAAU,CAAC,GAAG,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;IACrC,CAAC;IAED,sCAAsC;IACtC,MAAM,eAAe,GAAsB,EAAE,CAAC;IAE9C,KAAK,MAAM,CAAC,QAAQ,EAAE,eAAe,CAAC,IAAI,UAAU,EAAE,CAAC;QACrD,MAAM,eAAe,GAAG,wBAAwB,CAAC,eAAe,CAAC,CAAC;QAClE,MAAM,OAAO,GAAG,8BAA8B,CAAC,eAAe,CAAC,CAAC;QAChE,MAAM,OAAO,GAAG,qBAAqB,CAAC,eAAe,CAAC,CAAC;QAEvD,eAAe,CAAC,IAAI,CAAC;YACnB,QAAQ;YACR,WAAW,EAAE,eAAe,CAAC,MAAM;YACnC,QAAQ,EAAE,OAAO,CAAC,QAAQ;YAC1B,eAAe;YACf,OAAO;SACR,CAAC,CAAC;IACL,CAAC;IAED,wBAAwB;IACxB,OAAO,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC;AAC9E,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,0BAA0B,CAAC,OAA0B;IAOnE,MAAM,aAAa,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,cAAc,KAAK,MAAM,CAAC,CAAC;IACnF,MAAM,cAAc,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,cAAc,KAAK,OAAO,CAAC,CAAC;IAErF,MAAM,qBAAqB,GAAG,aAAa,CAAC,MAAM,GAAG,CAAC;QACpD,CAAC,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,gBAAgB,EAAE,CAAC,CAAC,GAAG,aAAa,CAAC,MAAM;QACtF,CAAC,CAAC,CAAC,CAAC;IAEN,MAAM,sBAAsB,GAAG,aAAa,CAAC,MAAM,GAAG,CAAC;QACrD,CAAC,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,iBAAiB,EAAE,CAAC,CAAC,GAAG,aAAa,CAAC,MAAM;QACvF,CAAC,CAAC,CAAC,CAAC;IAEN,MAAM,sBAAsB,GAAG,cAAc,CAAC,MAAM,GAAG,CAAC;QACtD,CAAC,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,gBAAgB,EAAE,CAAC,CAAC,GAAG,cAAc,CAAC,MAAM;QACxF,CAAC,CAAC,CAAC,CAAC;IAEN,MAAM,uBAAuB,GAAG,cAAc,CAAC,MAAM,GAAG,CAAC;QACvD,CAAC,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,iBAAiB,EAAE,CAAC,CAAC,GAAG,cAAc,CAAC,MAAM;QACzF,CAAC,CAAC,CAAC,CAAC;IAEN,4DAA4D;IAC5D,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;IACnD,MAAM,gBAAgB,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE;QAC/C,MAAM,SAAS,GAAG,CAAC,CAAC,gBAAgB,GAAG,CAAC,CAAC,iBAAiB,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC;QAC9E,OAAO,SAAS,KAAK,CAAC,CAAC,cAAc,CAAC;IACxC,CAAC,CAAC,CAAC,MAAM,CAAC;IAEV,MAAM,cAAc,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC;QAC5C,CAAC,CAAC,gBAAgB,GAAG,YAAY,CAAC,MAAM;QACxC,CAAC,CAAC,CAAC,CAAC;IAEN,OAAO;QACL,qBAAqB;QACrB,sBAAsB;QACtB,sBAAsB;QACtB,uBAAuB;QACvB,cAAc;KACf,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Benchmark runner for evaluating Veto kernel performance.
|
|
3
|
+
*
|
|
4
|
+
* @module benchmark/runner
|
|
5
|
+
*/
|
|
6
|
+
import { type Logger } from '../utils/logger.js';
|
|
7
|
+
import type { BenchmarkResult, BenchmarkReport, BenchmarkConfig } from './types.js';
|
|
8
|
+
/**
|
|
9
|
+
* Progress callback for benchmark updates.
|
|
10
|
+
*/
|
|
11
|
+
export type ProgressCallback = (completed: number, total: number, current: BenchmarkResult | null, eta: number) => void;
|
|
12
|
+
/**
|
|
13
|
+
* Options for running a benchmark.
|
|
14
|
+
*/
|
|
15
|
+
export interface BenchmarkRunnerOptions {
|
|
16
|
+
/** Benchmark configuration */
|
|
17
|
+
config: BenchmarkConfig;
|
|
18
|
+
/** Progress callback */
|
|
19
|
+
onProgress?: ProgressCallback;
|
|
20
|
+
/** Logger instance */
|
|
21
|
+
logger?: Logger;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Run a benchmark evaluation.
|
|
25
|
+
*/
|
|
26
|
+
export declare function runBenchmark(options: BenchmarkRunnerOptions): Promise<BenchmarkReport>;
|
|
27
|
+
/**
|
|
28
|
+
* Format a benchmark report for console output.
|
|
29
|
+
*/
|
|
30
|
+
export declare function formatReportConsole(report: BenchmarkReport): string;
|
|
31
|
+
/**
|
|
32
|
+
* Save benchmark report to JSON file.
|
|
33
|
+
*/
|
|
34
|
+
export declare function saveReportJson(report: BenchmarkReport, path: string): void;
|
|
35
|
+
/**
|
|
36
|
+
* Create a default progress logger.
|
|
37
|
+
*/
|
|
38
|
+
export declare function createProgressLogger(): ProgressCallback;
|
|
39
|
+
//# sourceMappingURL=runner.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"runner.d.ts","sourceRoot":"","sources":["../../src/benchmark/runner.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAKH,OAAO,EAAgB,KAAK,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAQ/D,OAAO,KAAK,EACV,eAAe,EACf,eAAe,EACf,eAAe,EAChB,MAAM,YAAY,CAAC;AAEpB;;GAEG;AACH,MAAM,MAAM,gBAAgB,GAAG,CAC7B,SAAS,EAAE,MAAM,EACjB,KAAK,EAAE,MAAM,EACb,OAAO,EAAE,eAAe,GAAG,IAAI,EAC/B,GAAG,EAAE,MAAM,KACR,IAAI,CAAC;AAEV;;GAEG;AACH,MAAM,WAAW,sBAAsB;IACrC,8BAA8B;IAC9B,MAAM,EAAE,eAAe,CAAC;IACxB,wBAAwB;IACxB,UAAU,CAAC,EAAE,gBAAgB,CAAC;IAC9B,sBAAsB;IACtB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED;;GAEG;AACH,wBAAsB,YAAY,CAAC,OAAO,EAAE,sBAAsB,GAAG,OAAO,CAAC,eAAe,CAAC,CA0J5F;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,MAAM,EAAE,eAAe,GAAG,MAAM,CA8GnE;AAaD;;GAEG;AACH,wBAAgB,cAAc,CAAC,MAAM,EAAE,eAAe,EAAE,IAAI,EAAE,MAAM,GAAG,IAAI,CAE1E;AAED;;GAEG;AACH,wBAAgB,oBAAoB,IAAI,gBAAgB,CAkBvD"}
|