veto-sdk 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +209 -0
- package/dist/benchmark/cli.d.ts +22 -0
- package/dist/benchmark/cli.d.ts.map +1 -0
- package/dist/benchmark/cli.js +238 -0
- package/dist/benchmark/cli.js.map +1 -0
- package/dist/benchmark/index.d.ts +10 -0
- package/dist/benchmark/index.d.ts.map +1 -0
- package/dist/benchmark/index.js +10 -0
- package/dist/benchmark/index.js.map +1 -0
- package/dist/benchmark/loader.d.ts +19 -0
- package/dist/benchmark/loader.d.ts.map +1 -0
- package/dist/benchmark/loader.js +321 -0
- package/dist/benchmark/loader.js.map +1 -0
- package/dist/benchmark/metrics.d.ts +35 -0
- package/dist/benchmark/metrics.d.ts.map +1 -0
- package/dist/benchmark/metrics.js +195 -0
- package/dist/benchmark/metrics.js.map +1 -0
- package/dist/benchmark/runner.d.ts +39 -0
- package/dist/benchmark/runner.d.ts.map +1 -0
- package/dist/benchmark/runner.js +279 -0
- package/dist/benchmark/runner.js.map +1 -0
- package/dist/benchmark/types.d.ts +188 -0
- package/dist/benchmark/types.d.ts.map +1 -0
- package/dist/benchmark/types.js +24 -0
- package/dist/benchmark/types.js.map +1 -0
- package/dist/cli/bin.d.ts +8 -0
- package/dist/cli/bin.d.ts.map +1 -0
- package/dist/cli/bin.js +120 -0
- package/dist/cli/bin.js.map +1 -0
- package/dist/cli/config.d.ts +126 -0
- package/dist/cli/config.d.ts.map +1 -0
- package/dist/cli/config.js +137 -0
- package/dist/cli/config.js.map +1 -0
- package/dist/cli/index.d.ts +9 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +9 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/cli/init.d.ts +64 -0
- package/dist/cli/init.d.ts.map +1 -0
- package/dist/cli/init.js +160 -0
- package/dist/cli/init.js.map +1 -0
- package/dist/cli/templates.d.ts +22 -0
- package/dist/cli/templates.d.ts.map +1 -0
- package/dist/cli/templates.js +132 -0
- package/dist/cli/templates.js.map +1 -0
- package/dist/core/history.d.ts +104 -0
- package/dist/core/history.d.ts.map +1 -0
- package/dist/core/history.js +148 -0
- package/dist/core/history.js.map +1 -0
- package/dist/core/index.d.ts +10 -0
- package/dist/core/index.d.ts.map +1 -0
- package/dist/core/index.js +10 -0
- package/dist/core/index.js.map +1 -0
- package/dist/core/interceptor.d.ts +96 -0
- package/dist/core/interceptor.d.ts.map +1 -0
- package/dist/core/interceptor.js +227 -0
- package/dist/core/interceptor.js.map +1 -0
- package/dist/core/validator.d.ts +107 -0
- package/dist/core/validator.d.ts.map +1 -0
- package/dist/core/validator.js +263 -0
- package/dist/core/validator.js.map +1 -0
- package/dist/core/veto.d.ts +265 -0
- package/dist/core/veto.d.ts.map +1 -0
- package/dist/core/veto.js +681 -0
- package/dist/core/veto.js.map +1 -0
- package/dist/index.d.ts +43 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +40 -0
- package/dist/index.js.map +1 -0
- package/dist/kernel/client.d.ts +82 -0
- package/dist/kernel/client.d.ts.map +1 -0
- package/dist/kernel/client.js +162 -0
- package/dist/kernel/client.js.map +1 -0
- package/dist/kernel/index.d.ts +9 -0
- package/dist/kernel/index.d.ts.map +1 -0
- package/dist/kernel/index.js +9 -0
- package/dist/kernel/index.js.map +1 -0
- package/dist/kernel/prompt.d.ts +27 -0
- package/dist/kernel/prompt.d.ts.map +1 -0
- package/dist/kernel/prompt.js +127 -0
- package/dist/kernel/prompt.js.map +1 -0
- package/dist/kernel/types.d.ts +85 -0
- package/dist/kernel/types.d.ts.map +1 -0
- package/dist/kernel/types.js +52 -0
- package/dist/kernel/types.js.map +1 -0
- package/dist/providers/adapters.d.ts +167 -0
- package/dist/providers/adapters.d.ts.map +1 -0
- package/dist/providers/adapters.js +244 -0
- package/dist/providers/adapters.js.map +1 -0
- package/dist/providers/index.d.ts +11 -0
- package/dist/providers/index.d.ts.map +1 -0
- package/dist/providers/index.js +11 -0
- package/dist/providers/index.js.map +1 -0
- package/dist/providers/types.d.ts +92 -0
- package/dist/providers/types.d.ts.map +1 -0
- package/dist/providers/types.js +10 -0
- package/dist/providers/types.js.map +1 -0
- package/dist/rules/api-client.d.ts +103 -0
- package/dist/rules/api-client.d.ts.map +1 -0
- package/dist/rules/api-client.js +241 -0
- package/dist/rules/api-client.js.map +1 -0
- package/dist/rules/index.d.ts +10 -0
- package/dist/rules/index.d.ts.map +1 -0
- package/dist/rules/index.js +10 -0
- package/dist/rules/index.js.map +1 -0
- package/dist/rules/loader.d.ts +116 -0
- package/dist/rules/loader.d.ts.map +1 -0
- package/dist/rules/loader.js +300 -0
- package/dist/rules/loader.js.map +1 -0
- package/dist/rules/rule-validator.d.ts +135 -0
- package/dist/rules/rule-validator.d.ts.map +1 -0
- package/dist/rules/rule-validator.js +239 -0
- package/dist/rules/rule-validator.js.map +1 -0
- package/dist/rules/types.d.ts +162 -0
- package/dist/rules/types.d.ts.map +1 -0
- package/dist/rules/types.js +16 -0
- package/dist/rules/types.js.map +1 -0
- package/dist/types/config.d.ts +171 -0
- package/dist/types/config.d.ts.map +1 -0
- package/dist/types/config.js +31 -0
- package/dist/types/config.js.map +1 -0
- package/dist/types/index.d.ts +8 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +8 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/tool.d.ts +156 -0
- package/dist/types/tool.d.ts.map +1 -0
- package/dist/types/tool.js +27 -0
- package/dist/types/tool.js.map +1 -0
- package/dist/utils/glob.d.ts +21 -0
- package/dist/utils/glob.d.ts.map +1 -0
- package/dist/utils/glob.js +147 -0
- package/dist/utils/glob.js.map +1 -0
- package/dist/utils/id.d.ts +28 -0
- package/dist/utils/id.d.ts.map +1 -0
- package/dist/utils/id.js +43 -0
- package/dist/utils/id.js.map +1 -0
- package/dist/utils/index.d.ts +9 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +9 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/logger.d.ts +97 -0
- package/dist/utils/logger.d.ts.map +1 -0
- package/dist/utils/logger.js +153 -0
- package/dist/utils/logger.js.map +1 -0
- package/package.json +90 -0
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Benchmark runner for evaluating Veto kernel performance.
|
|
3
|
+
*
|
|
4
|
+
* @module benchmark/runner
|
|
5
|
+
*/
|
|
6
|
+
import { writeFileSync } from 'node:fs';
|
|
7
|
+
import { KernelClient } from '../kernel/client.js';
|
|
8
|
+
import { createLogger } from '../utils/logger.js';
|
|
9
|
+
import { loadBenchmarkSamples } from './loader.js';
|
|
10
|
+
import { calculateConfusionMatrix, calculateClassificationMetrics, calculateLatencyStats, calculateCategoryMetrics, } from './metrics.js';
|
|
11
|
+
/**
|
|
12
|
+
* Run a benchmark evaluation.
|
|
13
|
+
*/
|
|
14
|
+
export async function runBenchmark(options) {
|
|
15
|
+
const { config, onProgress } = options;
|
|
16
|
+
const logger = options.logger ?? createLogger('info');
|
|
17
|
+
const startTime = new Date();
|
|
18
|
+
logger.info('Starting benchmark', {
|
|
19
|
+
datasetPath: config.datasetPath,
|
|
20
|
+
maxSamples: config.maxSamples || 'all',
|
|
21
|
+
model: config.kernel.model,
|
|
22
|
+
});
|
|
23
|
+
// Load samples
|
|
24
|
+
logger.info('Loading benchmark samples...');
|
|
25
|
+
const samples = await loadBenchmarkSamples(config.datasetPath, config.maxSamples, config.shuffle, config.seed);
|
|
26
|
+
logger.info(`Loaded ${samples.length} samples`);
|
|
27
|
+
// Create kernel client
|
|
28
|
+
const kernelConfig = {
|
|
29
|
+
baseUrl: config.kernel.baseUrl,
|
|
30
|
+
model: config.kernel.model,
|
|
31
|
+
temperature: config.kernel.temperature,
|
|
32
|
+
maxTokens: config.kernel.maxTokens,
|
|
33
|
+
timeout: config.kernel.timeout,
|
|
34
|
+
};
|
|
35
|
+
const kernelClient = new KernelClient({
|
|
36
|
+
config: kernelConfig,
|
|
37
|
+
logger,
|
|
38
|
+
});
|
|
39
|
+
// Run evaluations
|
|
40
|
+
const results = [];
|
|
41
|
+
const startTimes = [];
|
|
42
|
+
let errorCount = 0;
|
|
43
|
+
for (let i = 0; i < samples.length; i++) {
|
|
44
|
+
const sample = samples[i];
|
|
45
|
+
const sampleStart = Date.now();
|
|
46
|
+
startTimes.push(sampleStart);
|
|
47
|
+
let result;
|
|
48
|
+
try {
|
|
49
|
+
const toolCall = {
|
|
50
|
+
tool: sample.tool,
|
|
51
|
+
arguments: sample.arguments,
|
|
52
|
+
};
|
|
53
|
+
const response = await kernelClient.evaluate(toolCall, sample.rules);
|
|
54
|
+
result = {
|
|
55
|
+
sample,
|
|
56
|
+
actualDecision: response.decision,
|
|
57
|
+
actualPassWeight: response.pass_weight,
|
|
58
|
+
actualBlockWeight: response.block_weight,
|
|
59
|
+
reasoning: response.reasoning,
|
|
60
|
+
matchedRules: response.matched_rules,
|
|
61
|
+
correct: response.decision === sample.expectedDecision,
|
|
62
|
+
latencyMs: Date.now() - sampleStart,
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
catch (error) {
|
|
66
|
+
errorCount++;
|
|
67
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
68
|
+
// Log first few errors for debugging
|
|
69
|
+
if (errorCount <= 3) {
|
|
70
|
+
logger.error(`Evaluation error (${errorCount}):`, {
|
|
71
|
+
tool: sample.tool,
|
|
72
|
+
error: errorMessage,
|
|
73
|
+
stack: error instanceof Error ? error.stack : undefined,
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
result = {
|
|
77
|
+
sample,
|
|
78
|
+
actualDecision: 'pass', // Default on error
|
|
79
|
+
actualPassWeight: 0,
|
|
80
|
+
actualBlockWeight: 0,
|
|
81
|
+
reasoning: '',
|
|
82
|
+
correct: false,
|
|
83
|
+
latencyMs: Date.now() - sampleStart,
|
|
84
|
+
error: errorMessage,
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
results.push(result);
|
|
88
|
+
// Calculate ETA
|
|
89
|
+
const elapsed = Date.now() - startTime.getTime();
|
|
90
|
+
const avgTimePerSample = elapsed / (i + 1);
|
|
91
|
+
const remaining = samples.length - (i + 1);
|
|
92
|
+
const eta = avgTimePerSample * remaining;
|
|
93
|
+
// Report progress
|
|
94
|
+
if (onProgress) {
|
|
95
|
+
onProgress(i + 1, samples.length, result, eta);
|
|
96
|
+
}
|
|
97
|
+
// Log every 10% or every 100 samples
|
|
98
|
+
if ((i + 1) % Math.max(Math.floor(samples.length / 10), 100) === 0) {
|
|
99
|
+
const progress = ((i + 1) / samples.length * 100).toFixed(1);
|
|
100
|
+
const correctSoFar = results.filter(r => r.correct).length;
|
|
101
|
+
const accuracySoFar = (correctSoFar / (i + 1) * 100).toFixed(2);
|
|
102
|
+
logger.info(`Progress: ${progress}% (${i + 1}/${samples.length}) - Accuracy: ${accuracySoFar}%`);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
const endTime = new Date();
|
|
106
|
+
// Calculate metrics
|
|
107
|
+
const confusionMatrix = calculateConfusionMatrix(results);
|
|
108
|
+
const metrics = calculateClassificationMetrics(confusionMatrix);
|
|
109
|
+
const latency = calculateLatencyStats(results);
|
|
110
|
+
const categories = calculateCategoryMetrics(results);
|
|
111
|
+
// Get incorrect predictions for analysis
|
|
112
|
+
const incorrectPredictions = results.filter(r => !r.correct && !r.error);
|
|
113
|
+
// Build report
|
|
114
|
+
const report = {
|
|
115
|
+
startTime: startTime.toISOString(),
|
|
116
|
+
endTime: endTime.toISOString(),
|
|
117
|
+
durationMs: endTime.getTime() - startTime.getTime(),
|
|
118
|
+
model: config.kernel.model,
|
|
119
|
+
totalSamples: samples.length,
|
|
120
|
+
correctCount: results.filter(r => r.correct).length,
|
|
121
|
+
incorrectCount: results.filter(r => !r.correct && !r.error).length,
|
|
122
|
+
errorCount,
|
|
123
|
+
confusionMatrix,
|
|
124
|
+
metrics,
|
|
125
|
+
latency,
|
|
126
|
+
categories,
|
|
127
|
+
incorrectPredictions: incorrectPredictions.slice(0, 100), // Limit to 100 for report size
|
|
128
|
+
config,
|
|
129
|
+
};
|
|
130
|
+
if (config.includeResults) {
|
|
131
|
+
report.results = results;
|
|
132
|
+
}
|
|
133
|
+
logger.info('Benchmark complete', {
|
|
134
|
+
totalSamples: report.totalSamples,
|
|
135
|
+
accuracy: (metrics.accuracy * 100).toFixed(2) + '%',
|
|
136
|
+
f1Score: metrics.f1Score.toFixed(4),
|
|
137
|
+
meanLatency: latency.mean.toFixed(2) + 'ms',
|
|
138
|
+
errors: errorCount,
|
|
139
|
+
});
|
|
140
|
+
return report;
|
|
141
|
+
}
|
|
142
|
+
/**
|
|
143
|
+
* Format a benchmark report for console output.
|
|
144
|
+
*/
|
|
145
|
+
export function formatReportConsole(report) {
|
|
146
|
+
const lines = [];
|
|
147
|
+
lines.push('');
|
|
148
|
+
lines.push('═══════════════════════════════════════════════════════════════');
|
|
149
|
+
lines.push(' VETO BENCHMARK REPORT ');
|
|
150
|
+
lines.push('═══════════════════════════════════════════════════════════════');
|
|
151
|
+
lines.push('');
|
|
152
|
+
// Overview
|
|
153
|
+
lines.push('┌─────────────────────────────────────────────────────────────┐');
|
|
154
|
+
lines.push('│ OVERVIEW │');
|
|
155
|
+
lines.push('├─────────────────────────────────────────────────────────────┤');
|
|
156
|
+
lines.push(`│ Model: ${report.model.padEnd(43)}│`);
|
|
157
|
+
lines.push(`│ Total Samples: ${report.totalSamples.toString().padEnd(43)}│`);
|
|
158
|
+
lines.push(`│ Duration: ${formatDuration(report.durationMs).padEnd(43)}│`);
|
|
159
|
+
lines.push(`│ Throughput: ${(report.totalSamples / (report.durationMs / 1000)).toFixed(2).padEnd(40)} samples/sec │`);
|
|
160
|
+
lines.push('└─────────────────────────────────────────────────────────────┘');
|
|
161
|
+
lines.push('');
|
|
162
|
+
// Classification Metrics
|
|
163
|
+
lines.push('┌─────────────────────────────────────────────────────────────┐');
|
|
164
|
+
lines.push('│ CLASSIFICATION METRICS │');
|
|
165
|
+
lines.push('├─────────────────────────────────────────────────────────────┤');
|
|
166
|
+
lines.push(`│ Accuracy: ${(report.metrics.accuracy * 100).toFixed(2)}%`.padEnd(62) + '│');
|
|
167
|
+
lines.push(`│ Precision: ${(report.metrics.precision * 100).toFixed(2)}%`.padEnd(62) + '│');
|
|
168
|
+
lines.push(`│ Recall: ${(report.metrics.recall * 100).toFixed(2)}%`.padEnd(62) + '│');
|
|
169
|
+
lines.push(`│ F1 Score: ${report.metrics.f1Score.toFixed(4)}`.padEnd(62) + '│');
|
|
170
|
+
lines.push(`│ MCC: ${report.metrics.mcc.toFixed(4)}`.padEnd(62) + '│');
|
|
171
|
+
lines.push(`│ FP Rate: ${(report.metrics.falsePositiveRate * 100).toFixed(2)}%`.padEnd(62) + '│');
|
|
172
|
+
lines.push(`│ FN Rate: ${(report.metrics.falseNegativeRate * 100).toFixed(2)}%`.padEnd(62) + '│');
|
|
173
|
+
lines.push('└─────────────────────────────────────────────────────────────┘');
|
|
174
|
+
lines.push('');
|
|
175
|
+
// Confusion Matrix
|
|
176
|
+
const cm = report.confusionMatrix;
|
|
177
|
+
lines.push('┌─────────────────────────────────────────────────────────────┐');
|
|
178
|
+
lines.push('│ CONFUSION MATRIX │');
|
|
179
|
+
lines.push('├─────────────────────────────────────────────────────────────┤');
|
|
180
|
+
lines.push('│ Predicted │');
|
|
181
|
+
lines.push('│ PASS BLOCK │');
|
|
182
|
+
lines.push(`│ Actual PASS ${cm.trueNegative.toString().padStart(6)} ${cm.falsePositive.toString().padStart(6)} (TN / FP) │`);
|
|
183
|
+
lines.push(`│ Actual BLOCK ${cm.falseNegative.toString().padStart(6)} ${cm.truePositive.toString().padStart(6)} (FN / TP) │`);
|
|
184
|
+
lines.push('└─────────────────────────────────────────────────────────────┘');
|
|
185
|
+
lines.push('');
|
|
186
|
+
// Latency Statistics
|
|
187
|
+
lines.push('┌─────────────────────────────────────────────────────────────┐');
|
|
188
|
+
lines.push('│ LATENCY STATISTICS │');
|
|
189
|
+
lines.push('├─────────────────────────────────────────────────────────────┤');
|
|
190
|
+
lines.push(`│ Min: ${report.latency.min.toFixed(2)} ms`.padEnd(62) + '│');
|
|
191
|
+
lines.push(`│ Max: ${report.latency.max.toFixed(2)} ms`.padEnd(62) + '│');
|
|
192
|
+
lines.push(`│ Mean: ${report.latency.mean.toFixed(2)} ms`.padEnd(62) + '│');
|
|
193
|
+
lines.push(`│ Median: ${report.latency.median.toFixed(2)} ms`.padEnd(62) + '│');
|
|
194
|
+
lines.push(`│ P95: ${report.latency.p95.toFixed(2)} ms`.padEnd(62) + '│');
|
|
195
|
+
lines.push(`│ P99: ${report.latency.p99.toFixed(2)} ms`.padEnd(62) + '│');
|
|
196
|
+
lines.push(`│ Std Dev: ${report.latency.stdDev.toFixed(2)} ms`.padEnd(62) + '│');
|
|
197
|
+
lines.push('└─────────────────────────────────────────────────────────────┘');
|
|
198
|
+
lines.push('');
|
|
199
|
+
// Per-Category Results
|
|
200
|
+
if (report.categories.length > 0) {
|
|
201
|
+
lines.push('┌─────────────────────────────────────────────────────────────┐');
|
|
202
|
+
lines.push('│ PER-CATEGORY ACCURACY │');
|
|
203
|
+
lines.push('├─────────────────────────────────────────────────────────────┤');
|
|
204
|
+
for (const cat of report.categories) {
|
|
205
|
+
const catName = cat.category.length > 30
|
|
206
|
+
? cat.category.slice(0, 27) + '...'
|
|
207
|
+
: cat.category;
|
|
208
|
+
const accuracy = (cat.accuracy * 100).toFixed(1) + '%';
|
|
209
|
+
lines.push(`│ ${catName.padEnd(32)} ${accuracy.padStart(7)} (n=${cat.sampleCount})`.padEnd(62) + '│');
|
|
210
|
+
}
|
|
211
|
+
lines.push('└─────────────────────────────────────────────────────────────┘');
|
|
212
|
+
lines.push('');
|
|
213
|
+
}
|
|
214
|
+
// Error Summary
|
|
215
|
+
if (report.errorCount > 0) {
|
|
216
|
+
lines.push('┌─────────────────────────────────────────────────────────────┐');
|
|
217
|
+
lines.push('│ ERRORS │');
|
|
218
|
+
lines.push('├─────────────────────────────────────────────────────────────┤');
|
|
219
|
+
lines.push(`│ Error Count: ${report.errorCount.toString().padEnd(43)}│`);
|
|
220
|
+
lines.push(`│ Error Rate: ${((report.errorCount / report.totalSamples) * 100).toFixed(2)}%`.padEnd(62) + '│');
|
|
221
|
+
lines.push('└─────────────────────────────────────────────────────────────┘');
|
|
222
|
+
lines.push('');
|
|
223
|
+
}
|
|
224
|
+
// Sample Incorrect Predictions
|
|
225
|
+
if (report.incorrectPredictions.length > 0) {
|
|
226
|
+
lines.push('┌─────────────────────────────────────────────────────────────┐');
|
|
227
|
+
lines.push('│ SAMPLE INCORRECT PREDICTIONS (first 5) │');
|
|
228
|
+
lines.push('├─────────────────────────────────────────────────────────────┤');
|
|
229
|
+
for (const pred of report.incorrectPredictions.slice(0, 5)) {
|
|
230
|
+
lines.push(`│ ID: ${pred.sample.id}`.padEnd(62) + '│');
|
|
231
|
+
lines.push(`│ Tool: ${pred.sample.tool}`.padEnd(62) + '│');
|
|
232
|
+
lines.push(`│ Expected: ${pred.sample.expectedDecision}, Actual: ${pred.actualDecision}`.padEnd(62) + '│');
|
|
233
|
+
lines.push(`│ Reasoning: ${pred.reasoning.slice(0, 45)}...`.padEnd(62) + '│');
|
|
234
|
+
lines.push('│'.padEnd(62) + '│');
|
|
235
|
+
}
|
|
236
|
+
lines.push('└─────────────────────────────────────────────────────────────┘');
|
|
237
|
+
}
|
|
238
|
+
lines.push('');
|
|
239
|
+
lines.push('═══════════════════════════════════════════════════════════════');
|
|
240
|
+
return lines.join('\n');
|
|
241
|
+
}
|
|
242
|
+
/**
|
|
243
|
+
* Format duration in human-readable format.
|
|
244
|
+
*/
|
|
245
|
+
function formatDuration(ms) {
|
|
246
|
+
if (ms < 1000)
|
|
247
|
+
return `${ms}ms`;
|
|
248
|
+
if (ms < 60000)
|
|
249
|
+
return `${(ms / 1000).toFixed(1)}s`;
|
|
250
|
+
const minutes = Math.floor(ms / 60000);
|
|
251
|
+
const seconds = ((ms % 60000) / 1000).toFixed(0);
|
|
252
|
+
return `${minutes}m ${seconds}s`;
|
|
253
|
+
}
|
|
254
|
+
/**
|
|
255
|
+
* Save benchmark report to JSON file.
|
|
256
|
+
*/
|
|
257
|
+
export function saveReportJson(report, path) {
|
|
258
|
+
writeFileSync(path, JSON.stringify(report, null, 2), 'utf-8');
|
|
259
|
+
}
|
|
260
|
+
/**
|
|
261
|
+
* Create a default progress logger.
|
|
262
|
+
*/
|
|
263
|
+
export function createProgressLogger() {
|
|
264
|
+
let lastPercent = -1;
|
|
265
|
+
return (completed, total, _current, eta) => {
|
|
266
|
+
const percent = Math.floor((completed / total) * 100);
|
|
267
|
+
// Only log on percentage change
|
|
268
|
+
if (percent !== lastPercent) {
|
|
269
|
+
lastPercent = percent;
|
|
270
|
+
const bar = '█'.repeat(Math.floor(percent / 2)) + '░'.repeat(50 - Math.floor(percent / 2));
|
|
271
|
+
const etaStr = formatDuration(eta);
|
|
272
|
+
process.stdout.write(`\r[${bar}] ${percent}% (${completed}/${total}) ETA: ${etaStr} `);
|
|
273
|
+
if (completed === total) {
|
|
274
|
+
process.stdout.write('\n');
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
};
|
|
278
|
+
}
|
|
279
|
+
//# sourceMappingURL=runner.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"runner.js","sourceRoot":"","sources":["../../src/benchmark/runner.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AACxC,OAAO,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AAEnD,OAAO,EAAE,YAAY,EAAe,MAAM,oBAAoB,CAAC;AAC/D,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AACnD,OAAO,EACL,wBAAwB,EACxB,8BAA8B,EAC9B,qBAAqB,EACrB,wBAAwB,GACzB,MAAM,cAAc,CAAC;AA6BtB;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,OAA+B;IAChE,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,OAAO,CAAC;IACvC,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,IAAI,YAAY,CAAC,MAAM,CAAC,CAAC;IAEtD,MAAM,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC;IAC7B,MAAM,CAAC,IAAI,CAAC,oBAAoB,EAAE;QAChC,WAAW,EAAE,MAAM,CAAC,WAAW;QAC/B,UAAU,EAAE,MAAM,CAAC,UAAU,IAAI,KAAK;QACtC,KAAK,EAAE,MAAM,CAAC,MAAM,CAAC,KAAK;KAC3B,CAAC,CAAC;IAEH,eAAe;IACf,MAAM,CAAC,IAAI,CAAC,8BAA8B,CAAC,CAAC;IAC5C,MAAM,OAAO,GAAG,MAAM,oBAAoB,CACxC,MAAM,CAAC,WAAW,EAClB,MAAM,CAAC,UAAU,EACjB,MAAM,CAAC,OAAO,EACd,MAAM,CAAC,IAAI,CACZ,CAAC;IACF,MAAM,CAAC,IAAI,CAAC,UAAU,OAAO,CAAC,MAAM,UAAU,CAAC,CAAC;IAEhD,uBAAuB;IACvB,MAAM,YAAY,GAAiB;QACjC,OAAO,EAAE,MAAM,CAAC,MAAM,CAAC,OAAO;QAC9B,KAAK,EAAE,MAAM,CAAC,MAAM,CAAC,KAAK;QAC1B,WAAW,EAAE,MAAM,CAAC,MAAM,CAAC,WAAW;QACtC,SAAS,EAAE,MAAM,CAAC,MAAM,CAAC,SAAS;QAClC,OAAO,EAAE,MAAM,CAAC,MAAM,CAAC,OAAO;KAC/B,CAAC;IAEF,MAAM,YAAY,GAAG,IAAI,YAAY,CAAC;QACpC,MAAM,EAAE,YAAY;QACpB,MAAM;KACP,CAAC,CAAC;IAEH,kBAAkB;IAClB,MAAM,OAAO,GAAsB,EAAE,CAAC;IACtC,MAAM,UAAU,GAAa,EAAE,CAAC;IAChC,IAAI,UAAU,GAAG,CAAC,CAAC;IAEnB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACxC,MAAM,MAAM,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;QAC1B,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC/B,UAAU,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QAE7B,IAAI,MAAuB,CAAC;QAE5B,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG;gBACf,IAAI,EAAE,MAAM,CAAC,IAAI;gBACjB,SAAS,EAAE,MAAM,CAAC,SAAS;aAC5B,CAAC;YAEF,MAAM,QAAQ,GAAG,MAAM,YAAY,CAAC,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;YAErE,MAAM,GAAG;gBACP,MAAM;gBACN,cAAc,EAAE,QAAQ,CAAC,QAAQ;gBACjC,gBAAgB,EAAE,QAAQ,CAAC,WAAW;gBACtC,iBAAiB,EAAE,QAAQ,CAAC,YAAY;gBACxC,SAAS,EAAE,QAAQ,CAAC,SAAS;gBAC7B,YAAY,EAAE,QAAQ,CAAC,aAAa;gBACpC,OAAO,EAAE,QAAQ,CAAC,QAAQ,KAAK,MAAM,CAAC,gBAAgB;gBACtD,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,WAAW;aACpC,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,UAAU,EAAE,CAAC;YACb,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YAE5E,qCAAqC;YACrC,IAAI,UAAU,IAAI,CAAC,EAAE,CAAC;gBACpB,MAAM,CAAC,KAAK,CAAC,qBAAqB,UAAU,IAAI,EAAE;oBAChD,IAAI,EAAE,MAAM,CAAC,IAAI;oBACjB,KAAK,EAAE,YAAY;oBACnB,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;iBACxD,CAAC,CAAC;YACL,CAAC;YAED,MAAM,GAAG;gBACP,MAAM;gBACN,cAAc,EAAE,MAAM,EAAE,mBAAmB;gBAC3C,gBAAgB,EAAE,CAAC;gBACnB,iBAAiB,EAAE,CAAC;gBACpB,SAAS,EAAE,EAAE;gBACb,OAAO,EAAE,KAAK;gBACd,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,WAAW;gBACnC,KAAK,EAAE,YAAY;aACpB,CAAC;QACJ,CAAC;QAED,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAErB,gBAAgB;QAChB,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC,OAAO,EAAE,CAAC;QACjD,MAAM,gBAAgB,GAAG,OAAO,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAC3C,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAC3C,MAAM,GAAG,GAAG,gBAAgB,GAAG,SAAS,CAAC;QAEzC,kBAAkB;QAClB,IAAI,UAAU,EAAE,CAAC;YACf,UAAU,CAAC,CAAC,GAAG,CAAC,EAAE,OAAO,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,CAAC,CAAC;QACjD,CAAC;QAED,qCAAqC;QACrC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,GAAG,EAAE,CAAC,EAAE,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;YACnE,MAAM,QAAQ,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;YAC7D,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;YAC3D,MAAM,aAAa,GAAG,CAAC,YAAY,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;YAChE,MAAM,CAAC,IAAI,CAAC,aAAa,QAAQ,MAAM,CAAC,GAAG,CAAC,IAAI,OAAO,CAAC,MAAM,iBAAiB,aAAa,GAAG,CAAC,CAAC;QACnG,CAAC;IACH,CAAC;IAED,MAAM,OAAO,GAAG,IAAI,IAAI,EAAE,CAAC;IAE3B,oBAAoB;IACpB,MAAM,eAAe,GAAG,wBAAwB,CAAC,OAAO,CAAC,CAAC;IAC1D,MAAM,OAAO,GAAG,8BAA8B,CAAC,eAAe,CAAC,CAAC;IAChE,MAAM,OAAO,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;IAC/C,MAAM,UAAU,GAAG,wBAAwB,CAAC,OAAO,CAAC,CAAC;IAErD,yCAAyC;IACzC,MAAM,oBAAoB,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;IAEzE,eAAe;IACf,MAAM,MAAM,GAAoB;QAC9B,SAAS,EAAE,SAAS,CAAC,WAAW,EAAE;QAClC,OAAO,EAAE,OAAO,CAAC,WAAW,EAAE;QAC9B,UAAU,EAAE,OAAO,CAAC,OAAO,EAAE,GAAG,SAAS,CAAC,OAAO,EAAE;QACnD,KAAK,EAAE,MAAM,CAAC,MAAM,CAAC,KAAK;QAC1B,YAAY,EAAE,OAAO,CAAC,MAAM;QAC5B,YAAY,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM;QACnD,cAAc,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,MAAM;QAClE,UAAU;QACV,eAAe;QACf,OAAO;QACP,OAAO;QACP,UAAU;QACV,oBAAoB,EAAE,oBAAoB,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,+BAA+B;QACzF,MAAM;KACP,CAAC;IAEF,IAAI,MAAM,CAAC,cAAc,EAAE,CAAC;QAC1B,MAAM,CAAC,OAAO,GAAG,OAAO,CAAC;IAC3B,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,oBAAoB,EAAE;QAChC,YAAY,EAAE,MAAM,CAAC,YAAY;QACjC,QAAQ,EAAE,CAAC,OAAO,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,GAAG;QACnD,OAAO,EAAE,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC;QACnC,WAAW,EAAE,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,IAAI;QAC3C,MAAM,EAAE,UAAU;KACnB,CAAC,CAAC;IAEH,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,mBAAmB,CAAC,MAAuB;IACzD,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACf,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC;IAC9E,KAAK,CAAC,IAAI,CAAC,kEAAkE,CAAC,CAAC;IAC/E,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC;IAC9E,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,WAAW;IACX,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC;IAC9E,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC;IAC9E,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC;IAC9E,KAAK,CAAC,IAAI,CAAC,qBAAqB,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC;IAC5D,KAAK,CAAC,IAAI,CAAC,qBAAqB,MAAM,CAAC,YAAY,CAAC,QAAQ,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC;IAC9E,KAAK,CAAC,IAAI,CAAC,qBAAqB,cAAc,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC;IACjF,KAAK,CAAC,IAAI,CAAC,qBAAqB,CAAC,MAAM,CAAC,YAAY,GAAG,CAAC,MAAM,CAAC,UAAU,GAAG,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC,gBAAgB,CAAC,CAAC;IAC1H,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC;IAC9E,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,yBAAyB;IACzB,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC;IAC9E,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC;IAC9E,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC;IAC9E,KAAK,CAAC,IAAI,CAAC,qBAAqB,CAAC,MAAM,CAAC,OAAO,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC;IAChG,KAAK,CAAC,IAAI,CAAC,qBAAqB,CAAC,MAAM,CAAC,OAAO,CAAC,SAAS,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC;IACjG,KAAK,CAAC,IAAI,CAAC,qBAAqB,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC;IAC9F,KAAK,CAAC,IAAI,CAAC,qBAAqB,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC;IACtF,KAAK,CAAC,IAAI,CAAC,qBAAqB,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC;IAClF,KAAK,CAAC,IAAI,CAAC,qBAAqB,CAAC,MAAM,CAAC,OAAO,CAAC,iBAAiB,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC;IACzG,KAAK,CAAC,IAAI,CAAC,qBAAqB,CAAC,MAAM,CAAC,OAAO,CAAC,iBAAiB,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC;IACzG,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC;IAC9E,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,mBAAmB;IACnB,MAAM,EAAE,GAAG,MAAM,CAAC,eAAe,CAAC;IAClC,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC;IAC9E,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC;IAC9E,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC;IAC9E,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC;IAC9E,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC;IAC9E,KAAK,CAAC,IAAI,CAAC,wBAAwB,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC,aAAa,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,2BAA2B,CAAC,CAAC;IACtJ,KAAK,CAAC,IAAI,CAAC,wBAAwB,EAAE,CAAC,aAAa,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,2BAA2B,CAAC,CAAC;IACtJ,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC;IAC9E,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,qBAAqB;IACrB,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC;IAC9E,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC;IAC9E,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC;IAC9E,KAAK,CAAC,IAAI,CAAC,qBAAqB,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC;IACrF,KAAK,CAAC,IAAI,CAAC,qBAAqB,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC;IACrF,KAAK,CAAC,IAAI,CAAC,qBAAqB,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC;IACtF,KAAK,CAAC,IAAI,CAAC,qBAAqB,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC;IACxF,KAAK,CAAC,IAAI,CAAC,qBAAqB,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC;IACrF,KAAK,CAAC,IAAI,CAAC,qBAAqB,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC;IACrF,KAAK,CAAC,IAAI,CAAC,qBAAqB,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC;IACxF,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC;IAC9E,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,uBAAuB;IACvB,IAAI,MAAM,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACjC,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC;QAC9E,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC;QAC9E,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC;QAE9E,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;YACpC,MAAM,OAAO,GAAG,GAAG,CAAC,QAAQ,CAAC,MAAM,GAAG,EAAE;gBACtC,CAAC,CAAC,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,KAAK;gBACnC,CAAC,CAAC,GAAG,CAAC,QAAQ,CAAC;YACjB,MAAM,QAAQ,GAAG,CAAC,GAAG,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC;YACvD,KAAK,CAAC,IAAI,CAAC,KAAK,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC,OAAO,GAAG,CAAC,WAAW,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC;QACxG,CAAC;QAED,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC;QAC9E,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACjB,CAAC;IAED,gBAAgB;IAChB,IAAI,MAAM,CAAC,UAAU,GAAG,CAAC,EAAE,CAAC;QAC1B,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC;QAC9E,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC;QAC9E,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC;QAC9E,KAAK,CAAC,IAAI,CAAC,qBAAqB,MAAM,CAAC,UAAU,CAAC,QAAQ,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC;QAC5E,KAAK,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC,MAAM,CAAC,UAAU,GAAG,MAAM,CAAC,YAAY,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC;QAClH,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC;QAC9E,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACjB,CAAC;IAED,+BAA+B;IAC/B,IAAI,MAAM,CAAC,oBAAoB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC3C,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC;QAC9E,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC;QAC9E,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC;QAE9E,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,oBAAoB,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;YAC3D,KAAK,CAAC,IAAI,CAAC,SAAS,IAAI,CAAC,MAAM,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC;YACvD,KAAK,CAAC,IAAI,CAAC,aAAa,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC;YAC7D,KAAK,CAAC,IAAI,CAAC,iBAAiB,IAAI,CAAC,MAAM,CAAC,gBAAgB,aAAa,IAAI,CAAC,cAAc,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC;YAC7G,KAAK,CAAC,IAAI,CAAC,kBAAkB,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC;YAChF,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC;QACnC,CAAC;QAED,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC;IAChF,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACf,KAAK,CAAC,IAAI,CAAC,iEAAiE,CAAC,CAAC;IAE9E,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CAAC,EAAU;IAChC,IAAI,EAAE,GAAG,IAAI;QAAE,OAAO,GAAG,EAAE,IAAI,CAAC;IAChC,IAAI,EAAE,GAAG,KAAK;QAAE,OAAO,GAAG,CAAC,EAAE,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;IACpD,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,GAAG,KAAK,CAAC,CAAC;IACvC,MAAM,OAAO,GAAG,CAAC,CAAC,EAAE,GAAG,KAAK,CAAC,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;IACjD,OAAO,GAAG,OAAO,KAAK,OAAO,GAAG,CAAC;AACnC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,MAAuB,EAAE,IAAY;IAClE,aAAa,CAAC,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;AAChE,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,oBAAoB;IAClC,IAAI,WAAW,GAAG,CAAC,CAAC,CAAC;IAErB,OAAO,CAAC,SAAS,EAAE,KAAK,EAAE,QAAQ,EAAE,GAAG,EAAE,EAAE;QACzC,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,SAAS,GAAG,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC;QAEtD,gCAAgC;QAChC,IAAI,OAAO,KAAK,WAAW,EAAE,CAAC;YAC5B,WAAW,GAAG,OAAO,CAAC;YACtB,MAAM,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC;YAC3F,MAAM,MAAM,GAAG,cAAc,CAAC,GAAG,CAAC,CAAC;YACnC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,KAAK,OAAO,MAAM,SAAS,IAAI,KAAK,UAAU,MAAM,IAAI,CAAC,CAAC;YAExF,IAAI,SAAS,KAAK,KAAK,EAAE,CAAC;gBACxB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YAC7B,CAAC;QACH,CAAC;IACH,CAAC,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Types for the Veto benchmark suite.
|
|
3
|
+
*
|
|
4
|
+
* @module benchmark/types
|
|
5
|
+
*/
|
|
6
|
+
/**
|
|
7
|
+
* A single benchmark sample from the dataset.
|
|
8
|
+
*/
|
|
9
|
+
export interface BenchmarkSample {
|
|
10
|
+
/** Unique identifier for this sample */
|
|
11
|
+
id: string;
|
|
12
|
+
/** Tool name being called */
|
|
13
|
+
tool: string;
|
|
14
|
+
/** Tool arguments */
|
|
15
|
+
arguments: Record<string, unknown>;
|
|
16
|
+
/** Rules to evaluate against */
|
|
17
|
+
rules: import('../rules/types.js').Rule[];
|
|
18
|
+
/** Expected decision */
|
|
19
|
+
expectedDecision: 'pass' | 'block';
|
|
20
|
+
/** Expected pass weight (approximate) */
|
|
21
|
+
expectedPassWeight: number;
|
|
22
|
+
/** Expected block weight (approximate) */
|
|
23
|
+
expectedBlockWeight: number;
|
|
24
|
+
/** Source file this sample came from */
|
|
25
|
+
sourceFile?: string;
|
|
26
|
+
/** Category/domain of this sample */
|
|
27
|
+
category?: string;
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Result of running a single benchmark sample.
|
|
31
|
+
*/
|
|
32
|
+
export interface BenchmarkResult {
|
|
33
|
+
/** Sample that was evaluated */
|
|
34
|
+
sample: BenchmarkSample;
|
|
35
|
+
/** Actual decision from the kernel */
|
|
36
|
+
actualDecision: 'pass' | 'block';
|
|
37
|
+
/** Actual pass weight */
|
|
38
|
+
actualPassWeight: number;
|
|
39
|
+
/** Actual block weight */
|
|
40
|
+
actualBlockWeight: number;
|
|
41
|
+
/** Reasoning from the kernel */
|
|
42
|
+
reasoning: string;
|
|
43
|
+
/** Matched rules (if block) */
|
|
44
|
+
matchedRules?: string[];
|
|
45
|
+
/** Whether the decision was correct */
|
|
46
|
+
correct: boolean;
|
|
47
|
+
/** Latency in milliseconds */
|
|
48
|
+
latencyMs: number;
|
|
49
|
+
/** Error if evaluation failed */
|
|
50
|
+
error?: string;
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Confusion matrix for binary classification.
|
|
54
|
+
*/
|
|
55
|
+
export interface ConfusionMatrix {
|
|
56
|
+
/** True Positives: expected block, actual block */
|
|
57
|
+
truePositive: number;
|
|
58
|
+
/** True Negatives: expected pass, actual pass */
|
|
59
|
+
trueNegative: number;
|
|
60
|
+
/** False Positives: expected pass, actual block */
|
|
61
|
+
falsePositive: number;
|
|
62
|
+
/** False Negatives: expected block, actual pass */
|
|
63
|
+
falseNegative: number;
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Latency statistics.
|
|
67
|
+
*/
|
|
68
|
+
export interface LatencyStats {
|
|
69
|
+
/** Minimum latency in ms */
|
|
70
|
+
min: number;
|
|
71
|
+
/** Maximum latency in ms */
|
|
72
|
+
max: number;
|
|
73
|
+
/** Mean latency in ms */
|
|
74
|
+
mean: number;
|
|
75
|
+
/** Median latency in ms */
|
|
76
|
+
median: number;
|
|
77
|
+
/** 95th percentile latency in ms */
|
|
78
|
+
p95: number;
|
|
79
|
+
/** 99th percentile latency in ms */
|
|
80
|
+
p99: number;
|
|
81
|
+
/** Standard deviation in ms */
|
|
82
|
+
stdDev: number;
|
|
83
|
+
/** Total time in ms */
|
|
84
|
+
totalMs: number;
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Classification metrics derived from confusion matrix.
|
|
88
|
+
*/
|
|
89
|
+
export interface ClassificationMetrics {
|
|
90
|
+
/** Overall accuracy */
|
|
91
|
+
accuracy: number;
|
|
92
|
+
/** Precision for block class (TP / (TP + FP)) */
|
|
93
|
+
precision: number;
|
|
94
|
+
/** Recall for block class (TP / (TP + FN)) */
|
|
95
|
+
recall: number;
|
|
96
|
+
/** F1 score (harmonic mean of precision and recall) */
|
|
97
|
+
f1Score: number;
|
|
98
|
+
/** False positive rate (FP / (FP + TN)) */
|
|
99
|
+
falsePositiveRate: number;
|
|
100
|
+
/** False negative rate (FN / (FN + TP)) */
|
|
101
|
+
falseNegativeRate: number;
|
|
102
|
+
/** Matthews Correlation Coefficient */
|
|
103
|
+
mcc: number;
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* Per-category breakdown of metrics.
|
|
107
|
+
*/
|
|
108
|
+
export interface CategoryMetrics {
|
|
109
|
+
/** Category name */
|
|
110
|
+
category: string;
|
|
111
|
+
/** Number of samples in this category */
|
|
112
|
+
sampleCount: number;
|
|
113
|
+
/** Accuracy for this category */
|
|
114
|
+
accuracy: number;
|
|
115
|
+
/** Confusion matrix for this category */
|
|
116
|
+
confusionMatrix: ConfusionMatrix;
|
|
117
|
+
/** Latency stats for this category */
|
|
118
|
+
latency: LatencyStats;
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* Complete benchmark report.
|
|
122
|
+
*/
|
|
123
|
+
export interface BenchmarkReport {
|
|
124
|
+
/** Timestamp when benchmark started */
|
|
125
|
+
startTime: string;
|
|
126
|
+
/** Timestamp when benchmark ended */
|
|
127
|
+
endTime: string;
|
|
128
|
+
/** Duration in milliseconds */
|
|
129
|
+
durationMs: number;
|
|
130
|
+
/** Model used for inference */
|
|
131
|
+
model: string;
|
|
132
|
+
/** Total samples evaluated */
|
|
133
|
+
totalSamples: number;
|
|
134
|
+
/** Samples that passed (correct) */
|
|
135
|
+
correctCount: number;
|
|
136
|
+
/** Samples that failed (incorrect) */
|
|
137
|
+
incorrectCount: number;
|
|
138
|
+
/** Samples that errored */
|
|
139
|
+
errorCount: number;
|
|
140
|
+
/** Overall confusion matrix */
|
|
141
|
+
confusionMatrix: ConfusionMatrix;
|
|
142
|
+
/** Classification metrics */
|
|
143
|
+
metrics: ClassificationMetrics;
|
|
144
|
+
/** Latency statistics */
|
|
145
|
+
latency: LatencyStats;
|
|
146
|
+
/** Per-category breakdown */
|
|
147
|
+
categories: CategoryMetrics[];
|
|
148
|
+
/** Individual results (optional, can be large) */
|
|
149
|
+
results?: BenchmarkResult[];
|
|
150
|
+
/** Incorrect predictions for analysis */
|
|
151
|
+
incorrectPredictions: BenchmarkResult[];
|
|
152
|
+
/** Configuration used */
|
|
153
|
+
config: BenchmarkConfig;
|
|
154
|
+
}
|
|
155
|
+
/**
|
|
156
|
+
* Configuration for running benchmarks.
|
|
157
|
+
*/
|
|
158
|
+
export interface BenchmarkConfig {
|
|
159
|
+
/** Path to dataset files (glob pattern) */
|
|
160
|
+
datasetPath: string;
|
|
161
|
+
/** Maximum samples to evaluate (0 = all) */
|
|
162
|
+
maxSamples: number;
|
|
163
|
+
/** Whether to shuffle samples */
|
|
164
|
+
shuffle: boolean;
|
|
165
|
+
/** Random seed for shuffling */
|
|
166
|
+
seed?: number;
|
|
167
|
+
/** Whether to include individual results in report */
|
|
168
|
+
includeResults: boolean;
|
|
169
|
+
/** Kernel configuration */
|
|
170
|
+
kernel: {
|
|
171
|
+
baseUrl: string;
|
|
172
|
+
model: string;
|
|
173
|
+
temperature: number;
|
|
174
|
+
maxTokens: number;
|
|
175
|
+
timeout: number;
|
|
176
|
+
};
|
|
177
|
+
/** Concurrency (parallel requests) */
|
|
178
|
+
concurrency: number;
|
|
179
|
+
/** Output format */
|
|
180
|
+
outputFormat: 'console' | 'json' | 'both';
|
|
181
|
+
/** Output file path (for JSON) */
|
|
182
|
+
outputPath?: string;
|
|
183
|
+
}
|
|
184
|
+
/**
|
|
185
|
+
* Default benchmark configuration.
|
|
186
|
+
*/
|
|
187
|
+
export declare const DEFAULT_BENCHMARK_CONFIG: BenchmarkConfig;
|
|
188
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/benchmark/types.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,wCAAwC;IACxC,EAAE,EAAE,MAAM,CAAC;IACX,6BAA6B;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,qBAAqB;IACrB,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACnC,gCAAgC;IAChC,KAAK,EAAE,OAAO,mBAAmB,EAAE,IAAI,EAAE,CAAC;IAC1C,wBAAwB;IACxB,gBAAgB,EAAE,MAAM,GAAG,OAAO,CAAC;IACnC,yCAAyC;IACzC,kBAAkB,EAAE,MAAM,CAAC;IAC3B,0CAA0C;IAC1C,mBAAmB,EAAE,MAAM,CAAC;IAC5B,wCAAwC;IACxC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,qCAAqC;IACrC,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,gCAAgC;IAChC,MAAM,EAAE,eAAe,CAAC;IACxB,sCAAsC;IACtC,cAAc,EAAE,MAAM,GAAG,OAAO,CAAC;IACjC,yBAAyB;IACzB,gBAAgB,EAAE,MAAM,CAAC;IACzB,0BAA0B;IAC1B,iBAAiB,EAAE,MAAM,CAAC;IAC1B,gCAAgC;IAChC,SAAS,EAAE,MAAM,CAAC;IAClB,+BAA+B;IAC/B,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,uCAAuC;IACvC,OAAO,EAAE,OAAO,CAAC;IACjB,8BAA8B;IAC9B,SAAS,EAAE,MAAM,CAAC;IAClB,iCAAiC;IACjC,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,mDAAmD;IACnD,YAAY,EAAE,MAAM,CAAC;IACrB,iDAAiD;IACjD,YAAY,EAAE,MAAM,CAAC;IACrB,mDAAmD;IACnD,aAAa,EAAE,MAAM,CAAC;IACtB,mDAAmD;IACnD,aAAa,EAAE,MAAM,CAAC;CACvB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,4BAA4B;IAC5B,GAAG,EAAE,MAAM,CAAC;IACZ,4BAA4B;IAC5B,GAAG,EAAE,MAAM,CAAC;IACZ,yBAAyB;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,2BAA2B;IAC3B,MAAM,EAAE,MAAM,CAAC;IACf,oCAAoC;IACpC,GAAG,EAAE,MAAM,CAAC;IACZ,oCAAoC;IACpC,GAAG,EAAE,MAAM,CAAC;IACZ,+BAA+B;IAC/B,MAAM,EAAE,MAAM,CAAC;IACf,uBAAuB;IACvB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,WAAW,qBAAqB;IACpC,uBAAuB;IACvB,QAAQ,EAAE,MAAM,CAAC;IACjB,iDAAiD;IACjD,SAAS,EAAE,MAAM,CAAC;IAClB,8CAA8C;IAC9C,MAAM,EAAE,MAAM,CAAC;IACf,uDAAuD;IACvD,OAAO,EAAE,MAAM,CAAC;IAChB,2CAA2C;IAC3C,iBAAiB,EAAE,MAAM,CAAC;IAC1B,2CAA2C;IAC3C,iBAAiB,EAAE,MAAM,CAAC;IAC1B,uCAAuC;IACvC,GAAG,EAAE,MAAM,CAAC;CACb;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,oBAAoB;IACpB,QAAQ,EAAE,MAAM,CAAC;IACjB,yCAAyC;IACzC,WAAW,EAAE,MAAM,CAAC;IACpB,iCAAiC;IACjC,QAAQ,EAAE,MAAM,CAAC;IACjB,yCAAyC;IACzC,eAAe,EAAE,eAAe,CAAC;IACjC,sCAAsC;IACtC,OAAO,EAAE,YAAY,CAAC;CACvB;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,uCAAuC;IACvC,SAAS,EAAE,MAAM,CAAC;IAClB,qCAAqC;IACrC,OAAO,EAAE,MAAM,CAAC;IAChB,+BAA+B;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,+BAA+B;IAC/B,KAAK,EAAE,MAAM,CAAC;IACd,8BAA8B;IAC9B,YAAY,EAAE,MAAM,CAAC;IACrB,oCAAoC;IACpC,YAAY,EAAE,MAAM,CAAC;IACrB,sCAAsC;IACtC,cAAc,EAAE,MAAM,CAAC;IACvB,2BAA2B;IAC3B,UAAU,EAAE,MAAM,CAAC;IACnB,+BAA+B;IAC/B,eAAe,EAAE,eAAe,CAAC;IACjC,6BAA6B;IAC7B,OAAO,EAAE,qBAAqB,CAAC;IAC/B,yBAAyB;IACzB,OAAO,EAAE,YAAY,CAAC;IACtB,6BAA6B;IAC7B,UAAU,EAAE,eAAe,EAAE,CAAC;IAC9B,kDAAkD;IAClD,OAAO,CAAC,EAAE,eAAe,EAAE,CAAC;IAC5B,yCAAyC;IACzC,oBAAoB,EAAE,eAAe,EAAE,CAAC;IACxC,yBAAyB;IACzB,MAAM,EAAE,eAAe,CAAC;CACzB;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,2CAA2C;IAC3C,WAAW,EAAE,MAAM,CAAC;IACpB,4CAA4C;IAC5C,UAAU,EAAE,MAAM,CAAC;IACnB,iCAAiC;IACjC,OAAO,EAAE,OAAO,CAAC;IACjB,gCAAgC;IAChC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,sDAAsD;IACtD,cAAc,EAAE,OAAO,CAAC;IACxB,2BAA2B;IAC3B,MAAM,EAAE;QACN,OAAO,EAAE,MAAM,CAAC;QAChB,KAAK,EAAE,MAAM,CAAC;QACd,WAAW,EAAE,MAAM,CAAC;QACpB,SAAS,EAAE,MAAM,CAAC;QAClB,OAAO,EAAE,MAAM,CAAC;KACjB,CAAC;IACF,sCAAsC;IACtC,WAAW,EAAE,MAAM,CAAC;IACpB,oBAAoB;IACpB,YAAY,EAAE,SAAS,GAAG,MAAM,GAAG,MAAM,CAAC;IAC1C,kCAAkC;IAClC,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;GAEG;AACH,eAAO,MAAM,wBAAwB,EAAE,eActC,CAAC"}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Types for the Veto benchmark suite.
|
|
3
|
+
*
|
|
4
|
+
* @module benchmark/types
|
|
5
|
+
*/
|
|
6
|
+
/**
|
|
7
|
+
* Default benchmark configuration.
|
|
8
|
+
*/
|
|
9
|
+
export const DEFAULT_BENCHMARK_CONFIG = {
|
|
10
|
+
datasetPath: 'data/batches/**/*.jsonl',
|
|
11
|
+
maxSamples: 0,
|
|
12
|
+
shuffle: true,
|
|
13
|
+
includeResults: false,
|
|
14
|
+
kernel: {
|
|
15
|
+
baseUrl: 'http://localhost:11434/v1',
|
|
16
|
+
model: 'hf.co/ycaleb/veto-warden-4b-GGUF:Q4_K_M',
|
|
17
|
+
temperature: 0.1,
|
|
18
|
+
maxTokens: 256,
|
|
19
|
+
timeout: 30000,
|
|
20
|
+
},
|
|
21
|
+
concurrency: 1,
|
|
22
|
+
outputFormat: 'both',
|
|
23
|
+
};
|
|
24
|
+
//# sourceMappingURL=types.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/benchmark/types.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AA4LH;;GAEG;AACH,MAAM,CAAC,MAAM,wBAAwB,GAAoB;IACvD,WAAW,EAAE,yBAAyB;IACtC,UAAU,EAAE,CAAC;IACb,OAAO,EAAE,IAAI;IACb,cAAc,EAAE,KAAK;IACrB,MAAM,EAAE;QACN,OAAO,EAAE,2BAA2B;QACpC,KAAK,EAAE,yCAAyC;QAChD,WAAW,EAAE,GAAG;QAChB,SAAS,EAAE,GAAG;QACd,OAAO,EAAE,KAAK;KACf;IACD,WAAW,EAAE,CAAC;IACd,YAAY,EAAE,MAAM;CACrB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bin.d.ts","sourceRoot":"","sources":["../../src/cli/bin.ts"],"names":[],"mappings":";AAEA;;;;GAIG"}
|
package/dist/cli/bin.js
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Veto CLI entry point.
|
|
4
|
+
*
|
|
5
|
+
* @module cli/bin
|
|
6
|
+
*/
|
|
7
|
+
import { init } from './init.js';
|
|
8
|
+
const VERSION = '0.1.0';
|
|
9
|
+
/**
|
|
10
|
+
* Print help message.
|
|
11
|
+
*/
|
|
12
|
+
function printHelp() {
|
|
13
|
+
console.log(`
|
|
14
|
+
Veto - AI Agent Tool Call Guardrail
|
|
15
|
+
|
|
16
|
+
Usage:
|
|
17
|
+
veto <command> [options]
|
|
18
|
+
|
|
19
|
+
Commands:
|
|
20
|
+
init Initialize Veto in the current directory
|
|
21
|
+
version Show version information
|
|
22
|
+
help Show this help message
|
|
23
|
+
|
|
24
|
+
Options:
|
|
25
|
+
--force, -f Force overwrite existing files (init)
|
|
26
|
+
--quiet, -q Suppress output
|
|
27
|
+
--help, -h Show help
|
|
28
|
+
|
|
29
|
+
Examples:
|
|
30
|
+
veto init Initialize Veto in current directory
|
|
31
|
+
veto init --force Reinitialize, overwriting existing files
|
|
32
|
+
`);
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Print version.
|
|
36
|
+
*/
|
|
37
|
+
function printVersion() {
|
|
38
|
+
console.log(`veto v${VERSION}`);
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Parse command line arguments.
|
|
42
|
+
*/
|
|
43
|
+
function parseArgs(args) {
|
|
44
|
+
const flags = {};
|
|
45
|
+
let command = '';
|
|
46
|
+
for (const arg of args) {
|
|
47
|
+
if (arg.startsWith('--')) {
|
|
48
|
+
const flag = arg.slice(2);
|
|
49
|
+
flags[flag] = true;
|
|
50
|
+
}
|
|
51
|
+
else if (arg.startsWith('-')) {
|
|
52
|
+
const shortFlags = arg.slice(1).split('');
|
|
53
|
+
for (const f of shortFlags) {
|
|
54
|
+
switch (f) {
|
|
55
|
+
case 'f':
|
|
56
|
+
flags['force'] = true;
|
|
57
|
+
break;
|
|
58
|
+
case 'q':
|
|
59
|
+
flags['quiet'] = true;
|
|
60
|
+
break;
|
|
61
|
+
case 'h':
|
|
62
|
+
flags['help'] = true;
|
|
63
|
+
break;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
else if (!command) {
|
|
68
|
+
command = arg;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
return { command, flags };
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Main CLI entry point.
|
|
75
|
+
*/
|
|
76
|
+
async function main() {
|
|
77
|
+
const args = process.argv.slice(2);
|
|
78
|
+
const { command, flags } = parseArgs(args);
|
|
79
|
+
// Handle help flag
|
|
80
|
+
if (flags['help'] || command === 'help') {
|
|
81
|
+
printHelp();
|
|
82
|
+
process.exit(0);
|
|
83
|
+
}
|
|
84
|
+
// Handle version flag or command
|
|
85
|
+
if (flags['version'] || command === 'version') {
|
|
86
|
+
printVersion();
|
|
87
|
+
process.exit(0);
|
|
88
|
+
}
|
|
89
|
+
// Handle commands
|
|
90
|
+
switch (command) {
|
|
91
|
+
case 'init': {
|
|
92
|
+
const result = await init({
|
|
93
|
+
force: flags['force'],
|
|
94
|
+
quiet: flags['quiet'],
|
|
95
|
+
});
|
|
96
|
+
process.exit(result.success ? 0 : 1);
|
|
97
|
+
break;
|
|
98
|
+
}
|
|
99
|
+
case '': {
|
|
100
|
+
// No command provided
|
|
101
|
+
console.log('Veto - AI Agent Tool Call Guardrail');
|
|
102
|
+
console.log('');
|
|
103
|
+
console.log('Run "veto help" for usage information.');
|
|
104
|
+
console.log('Run "veto init" to initialize Veto in your project.');
|
|
105
|
+
process.exit(0);
|
|
106
|
+
break;
|
|
107
|
+
}
|
|
108
|
+
default: {
|
|
109
|
+
console.error(`Unknown command: ${command}`);
|
|
110
|
+
console.error('Run "veto help" for usage information.');
|
|
111
|
+
process.exit(1);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
// Run the CLI
|
|
116
|
+
main().catch((error) => {
|
|
117
|
+
console.error('Error:', error.message);
|
|
118
|
+
process.exit(1);
|
|
119
|
+
});
|
|
120
|
+
//# sourceMappingURL=bin.js.map
|