evalsense 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +678 -0
- package/bin/evalsense.js +3 -0
- package/dist/chunk-5P7LNNO6.js +747 -0
- package/dist/chunk-5P7LNNO6.js.map +1 -0
- package/dist/chunk-BRPM6AB6.js +925 -0
- package/dist/chunk-BRPM6AB6.js.map +1 -0
- package/dist/chunk-HDJID3GC.cjs +779 -0
- package/dist/chunk-HDJID3GC.cjs.map +1 -0
- package/dist/chunk-Y23VHTD3.cjs +942 -0
- package/dist/chunk-Y23VHTD3.cjs.map +1 -0
- package/dist/cli.cjs +65 -0
- package/dist/cli.cjs.map +1 -0
- package/dist/cli.d.cts +1 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +63 -0
- package/dist/cli.js.map +1 -0
- package/dist/index.cjs +1126 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +604 -0
- package/dist/index.d.ts +604 -0
- package/dist/index.js +1043 -0
- package/dist/index.js.map +1 -0
- package/dist/metrics/index.cjs +275 -0
- package/dist/metrics/index.cjs.map +1 -0
- package/dist/metrics/index.d.cts +299 -0
- package/dist/metrics/index.d.ts +299 -0
- package/dist/metrics/index.js +191 -0
- package/dist/metrics/index.js.map +1 -0
- package/dist/metrics/opinionated/index.cjs +24 -0
- package/dist/metrics/opinionated/index.cjs.map +1 -0
- package/dist/metrics/opinionated/index.d.cts +163 -0
- package/dist/metrics/opinionated/index.d.ts +163 -0
- package/dist/metrics/opinionated/index.js +3 -0
- package/dist/metrics/opinionated/index.js.map +1 -0
- package/dist/types-C71p0wzM.d.cts +265 -0
- package/dist/types-C71p0wzM.d.ts +265 -0
- package/package.json +91 -0
|
@@ -0,0 +1,747 @@
|
|
|
1
|
+
import { writeFileSync, existsSync } from 'fs';
|
|
2
|
+
import stringify from 'fast-json-stable-stringify';
|
|
3
|
+
import { glob } from 'glob';
|
|
4
|
+
import { resolve } from 'path';
|
|
5
|
+
import { pathToFileURL } from 'url';
|
|
6
|
+
|
|
7
|
+
// src/core/errors.ts
|
|
8
|
+
var EvalSenseError = class extends Error {
|
|
9
|
+
constructor(message) {
|
|
10
|
+
super(message);
|
|
11
|
+
this.name = "EvalSenseError";
|
|
12
|
+
}
|
|
13
|
+
};
|
|
14
|
+
var AssertionError = class extends EvalSenseError {
|
|
15
|
+
expected;
|
|
16
|
+
actual;
|
|
17
|
+
field;
|
|
18
|
+
constructor(message, expected, actual, field) {
|
|
19
|
+
super(message);
|
|
20
|
+
this.name = "AssertionError";
|
|
21
|
+
this.expected = expected;
|
|
22
|
+
this.actual = actual;
|
|
23
|
+
this.field = field;
|
|
24
|
+
}
|
|
25
|
+
};
|
|
26
|
+
var DatasetError = class extends EvalSenseError {
|
|
27
|
+
source;
|
|
28
|
+
constructor(message, source) {
|
|
29
|
+
super(message);
|
|
30
|
+
this.name = "DatasetError";
|
|
31
|
+
this.source = source;
|
|
32
|
+
}
|
|
33
|
+
};
|
|
34
|
+
var IntegrityError = class extends EvalSenseError {
|
|
35
|
+
missingIds;
|
|
36
|
+
duplicateIds;
|
|
37
|
+
constructor(message, missingIds, duplicateIds) {
|
|
38
|
+
super(message);
|
|
39
|
+
this.name = "IntegrityError";
|
|
40
|
+
this.missingIds = missingIds;
|
|
41
|
+
this.duplicateIds = duplicateIds;
|
|
42
|
+
}
|
|
43
|
+
};
|
|
44
|
+
var ConfigurationError = class extends EvalSenseError {
|
|
45
|
+
constructor(message) {
|
|
46
|
+
super(message);
|
|
47
|
+
this.name = "ConfigurationError";
|
|
48
|
+
}
|
|
49
|
+
};
|
|
50
|
+
var TestExecutionError = class extends EvalSenseError {
|
|
51
|
+
testName;
|
|
52
|
+
originalError;
|
|
53
|
+
constructor(message, testName, originalError) {
|
|
54
|
+
super(message);
|
|
55
|
+
this.name = "TestExecutionError";
|
|
56
|
+
this.testName = testName;
|
|
57
|
+
this.originalError = originalError;
|
|
58
|
+
}
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
// src/statistics/confusion-matrix.ts
|
|
62
|
+
function buildConfusionMatrix(actual, expected) {
|
|
63
|
+
if (actual.length !== expected.length) {
|
|
64
|
+
throw new Error(
|
|
65
|
+
`Array length mismatch: actual has ${actual.length} elements, expected has ${expected.length}`
|
|
66
|
+
);
|
|
67
|
+
}
|
|
68
|
+
const labelSet = /* @__PURE__ */ new Set();
|
|
69
|
+
for (const val of actual) {
|
|
70
|
+
if (val !== void 0 && val !== null) {
|
|
71
|
+
labelSet.add(String(val));
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
for (const val of expected) {
|
|
75
|
+
if (val !== void 0 && val !== null) {
|
|
76
|
+
labelSet.add(String(val));
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
const labels = Array.from(labelSet).sort();
|
|
80
|
+
const labelIndex = /* @__PURE__ */ new Map();
|
|
81
|
+
labels.forEach((label, idx) => labelIndex.set(label, idx));
|
|
82
|
+
const matrix = labels.map(() => labels.map(() => 0));
|
|
83
|
+
for (let i = 0; i < actual.length; i++) {
|
|
84
|
+
const actualVal = actual[i];
|
|
85
|
+
const expectedVal = expected[i];
|
|
86
|
+
if (actualVal === void 0 || actualVal === null) continue;
|
|
87
|
+
if (expectedVal === void 0 || expectedVal === null) continue;
|
|
88
|
+
const actualIdx = labelIndex.get(String(actualVal));
|
|
89
|
+
const expectedIdx = labelIndex.get(String(expectedVal));
|
|
90
|
+
if (actualIdx !== void 0 && expectedIdx !== void 0) {
|
|
91
|
+
matrix[expectedIdx][actualIdx]++;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
const total = actual.filter(
|
|
95
|
+
(v, i) => v !== void 0 && v !== null && expected[i] !== void 0 && expected[i] !== null
|
|
96
|
+
).length;
|
|
97
|
+
return { matrix, labels, total };
|
|
98
|
+
}
|
|
99
|
+
function getCount(cm, expectedLabel, actualLabel) {
|
|
100
|
+
const expectedIdx = cm.labels.indexOf(expectedLabel);
|
|
101
|
+
const actualIdx = cm.labels.indexOf(actualLabel);
|
|
102
|
+
if (expectedIdx === -1 || actualIdx === -1) {
|
|
103
|
+
return 0;
|
|
104
|
+
}
|
|
105
|
+
return cm.matrix[expectedIdx]?.[actualIdx] ?? 0;
|
|
106
|
+
}
|
|
107
|
+
function getTruePositives(cm, label) {
|
|
108
|
+
return getCount(cm, label, label);
|
|
109
|
+
}
|
|
110
|
+
function getFalsePositives(cm, label) {
|
|
111
|
+
const labelIdx = cm.labels.indexOf(label);
|
|
112
|
+
if (labelIdx === -1) return 0;
|
|
113
|
+
let fp = 0;
|
|
114
|
+
for (let i = 0; i < cm.labels.length; i++) {
|
|
115
|
+
if (i !== labelIdx) {
|
|
116
|
+
fp += cm.matrix[i]?.[labelIdx] ?? 0;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
return fp;
|
|
120
|
+
}
|
|
121
|
+
function getFalseNegatives(cm, label) {
|
|
122
|
+
const labelIdx = cm.labels.indexOf(label);
|
|
123
|
+
if (labelIdx === -1) return 0;
|
|
124
|
+
let fn = 0;
|
|
125
|
+
for (let j = 0; j < cm.labels.length; j++) {
|
|
126
|
+
if (j !== labelIdx) {
|
|
127
|
+
fn += cm.matrix[labelIdx]?.[j] ?? 0;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
return fn;
|
|
131
|
+
}
|
|
132
|
+
function getSupport(cm, label) {
|
|
133
|
+
const labelIdx = cm.labels.indexOf(label);
|
|
134
|
+
if (labelIdx === -1) return 0;
|
|
135
|
+
let support = 0;
|
|
136
|
+
for (let j = 0; j < cm.labels.length; j++) {
|
|
137
|
+
support += cm.matrix[labelIdx]?.[j] ?? 0;
|
|
138
|
+
}
|
|
139
|
+
return support;
|
|
140
|
+
}
|
|
141
|
+
function formatConfusionMatrix(cm) {
|
|
142
|
+
const maxLabelLen = Math.max(...cm.labels.map((l) => l.length), 8);
|
|
143
|
+
const colWidth = Math.max(
|
|
144
|
+
...cm.matrix.flat().map((n) => String(n).length),
|
|
145
|
+
maxLabelLen
|
|
146
|
+
);
|
|
147
|
+
const header = " ".repeat(maxLabelLen + 2) + cm.labels.map((l) => l.padStart(colWidth)).join(" ");
|
|
148
|
+
const rows = cm.labels.map((label, i) => {
|
|
149
|
+
const rowData = cm.matrix[i].map((n) => String(n).padStart(colWidth)).join(" ");
|
|
150
|
+
return label.padEnd(maxLabelLen) + " " + rowData;
|
|
151
|
+
});
|
|
152
|
+
return [header, ...rows].join("\n");
|
|
153
|
+
}
|
|
154
|
+
var JsonReporter = class {
|
|
155
|
+
/**
|
|
156
|
+
* Formats a report as deterministic JSON
|
|
157
|
+
*/
|
|
158
|
+
format(report) {
|
|
159
|
+
const serializable = this.toSerializable(report);
|
|
160
|
+
return stringify(serializable) ?? "{}";
|
|
161
|
+
}
|
|
162
|
+
/**
|
|
163
|
+
* Writes report to a file
|
|
164
|
+
*/
|
|
165
|
+
writeToFile(report, path) {
|
|
166
|
+
const json = this.format(report);
|
|
167
|
+
writeFileSync(path, json, "utf-8");
|
|
168
|
+
}
|
|
169
|
+
/**
|
|
170
|
+
* Converts report to a JSON-serializable format
|
|
171
|
+
*/
|
|
172
|
+
toSerializable(report) {
|
|
173
|
+
return {
|
|
174
|
+
version: report.version,
|
|
175
|
+
timestamp: report.timestamp,
|
|
176
|
+
summary: report.summary,
|
|
177
|
+
suites: report.suites.map((suite) => ({
|
|
178
|
+
name: suite.name,
|
|
179
|
+
passed: suite.passed,
|
|
180
|
+
failed: suite.failed,
|
|
181
|
+
errors: suite.errors,
|
|
182
|
+
skipped: suite.skipped,
|
|
183
|
+
duration: suite.duration,
|
|
184
|
+
tests: suite.tests.map((test) => ({
|
|
185
|
+
name: test.name,
|
|
186
|
+
status: test.status,
|
|
187
|
+
duration: test.duration,
|
|
188
|
+
error: test.error ? {
|
|
189
|
+
name: test.error.name,
|
|
190
|
+
message: test.error.message
|
|
191
|
+
} : void 0,
|
|
192
|
+
assertions: test.assertions.map((a) => ({
|
|
193
|
+
type: a.type,
|
|
194
|
+
passed: a.passed,
|
|
195
|
+
message: a.message,
|
|
196
|
+
expected: a.expected,
|
|
197
|
+
actual: a.actual,
|
|
198
|
+
field: a.field,
|
|
199
|
+
class: a.class
|
|
200
|
+
})),
|
|
201
|
+
fieldMetrics: test.fieldMetrics.map((fm) => ({
|
|
202
|
+
field: fm.field,
|
|
203
|
+
binarized: fm.binarized,
|
|
204
|
+
binarizeThreshold: fm.binarizeThreshold,
|
|
205
|
+
metrics: {
|
|
206
|
+
accuracy: fm.metrics.accuracy,
|
|
207
|
+
perClass: fm.metrics.perClass,
|
|
208
|
+
macroAvg: fm.metrics.macroAvg,
|
|
209
|
+
weightedAvg: fm.metrics.weightedAvg,
|
|
210
|
+
confusionMatrix: {
|
|
211
|
+
labels: fm.metrics.confusionMatrix.labels,
|
|
212
|
+
matrix: fm.metrics.confusionMatrix.matrix,
|
|
213
|
+
total: fm.metrics.confusionMatrix.total
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
}))
|
|
217
|
+
}))
|
|
218
|
+
})),
|
|
219
|
+
integrity: report.integrity
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
};
|
|
223
|
+
function parseReport(json) {
|
|
224
|
+
const data = JSON.parse(json);
|
|
225
|
+
return data;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// src/report/console-reporter.ts
|
|
229
|
+
var colors = {
|
|
230
|
+
reset: "\x1B[0m",
|
|
231
|
+
bold: "\x1B[1m",
|
|
232
|
+
dim: "\x1B[2m",
|
|
233
|
+
red: "\x1B[31m",
|
|
234
|
+
green: "\x1B[32m",
|
|
235
|
+
yellow: "\x1B[33m",
|
|
236
|
+
blue: "\x1B[34m",
|
|
237
|
+
cyan: "\x1B[36m",
|
|
238
|
+
gray: "\x1B[90m"
|
|
239
|
+
};
|
|
240
|
+
var symbols = {
|
|
241
|
+
pass: "\u2713",
|
|
242
|
+
// ✓
|
|
243
|
+
fail: "\u2717",
|
|
244
|
+
// ✗
|
|
245
|
+
error: "!",
|
|
246
|
+
skip: "-"
|
|
247
|
+
};
|
|
248
|
+
var ConsoleReporter = class {
|
|
249
|
+
useColors;
|
|
250
|
+
constructor(useColors = true) {
|
|
251
|
+
this.useColors = useColors && process.stdout.isTTY !== false;
|
|
252
|
+
}
|
|
253
|
+
/**
|
|
254
|
+
* Prints the run header
|
|
255
|
+
*/
|
|
256
|
+
printHeader(fileCount) {
|
|
257
|
+
this.log("");
|
|
258
|
+
this.log(this.color("bold", `EvalSense v0.1.0`));
|
|
259
|
+
this.log(this.color("dim", `Running ${fileCount} eval file(s)...`));
|
|
260
|
+
this.log("");
|
|
261
|
+
}
|
|
262
|
+
/**
|
|
263
|
+
* Prints the full report
|
|
264
|
+
*/
|
|
265
|
+
printReport(report) {
|
|
266
|
+
for (const suite of report.suites) {
|
|
267
|
+
this.printSuite(suite.name, suite.tests);
|
|
268
|
+
}
|
|
269
|
+
this.printSummary(report);
|
|
270
|
+
}
|
|
271
|
+
/**
|
|
272
|
+
* Prints a suite's results
|
|
273
|
+
*/
|
|
274
|
+
printSuite(name, tests) {
|
|
275
|
+
this.log(this.color("bold", ` ${name}`));
|
|
276
|
+
this.log("");
|
|
277
|
+
for (const test of tests) {
|
|
278
|
+
this.printTest(test);
|
|
279
|
+
}
|
|
280
|
+
this.log("");
|
|
281
|
+
}
|
|
282
|
+
/**
|
|
283
|
+
* Prints a single test result
|
|
284
|
+
*/
|
|
285
|
+
printTest(test) {
|
|
286
|
+
const symbol = this.getStatusSymbol(test.status);
|
|
287
|
+
const statusColor = this.getStatusColor(test.status);
|
|
288
|
+
const duration = this.color("dim", `(${test.duration}ms)`);
|
|
289
|
+
this.log(` ${this.color(statusColor, symbol)} ${test.name} ${duration}`);
|
|
290
|
+
for (const fm of test.fieldMetrics) {
|
|
291
|
+
this.printFieldMetrics(fm);
|
|
292
|
+
}
|
|
293
|
+
if (test.error && test.status === "failed") {
|
|
294
|
+
this.log(this.color("red", ` ${test.error.message}`));
|
|
295
|
+
} else if (test.error && test.status === "error") {
|
|
296
|
+
this.log(this.color("red", ` Error: ${test.error.message}`));
|
|
297
|
+
}
|
|
298
|
+
for (const assertion of test.assertions) {
|
|
299
|
+
if (!assertion.passed) {
|
|
300
|
+
this.log(this.color("red", ` ${assertion.message}`));
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
/**
|
|
305
|
+
* Prints field metrics summary
|
|
306
|
+
*/
|
|
307
|
+
printFieldMetrics(fm) {
|
|
308
|
+
const { metrics, field, binarized, binarizeThreshold } = fm;
|
|
309
|
+
const fieldLabel = binarized ? `${field} (binarized @ ${binarizeThreshold})` : field;
|
|
310
|
+
this.log(
|
|
311
|
+
this.color(
|
|
312
|
+
"cyan",
|
|
313
|
+
` Field: ${fieldLabel} | Accuracy: ${this.pct(metrics.accuracy)} | F1: ${this.pct(metrics.macroAvg.f1)}`
|
|
314
|
+
)
|
|
315
|
+
);
|
|
316
|
+
if (Object.keys(metrics.perClass).length > 1) {
|
|
317
|
+
for (const [cls, classMetrics] of Object.entries(metrics.perClass)) {
|
|
318
|
+
this.log(
|
|
319
|
+
this.color(
|
|
320
|
+
"dim",
|
|
321
|
+
` ${cls}: P=${this.pct(classMetrics.precision)} R=${this.pct(classMetrics.recall)} F1=${this.pct(classMetrics.f1)} (n=${classMetrics.support})`
|
|
322
|
+
)
|
|
323
|
+
);
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
/**
|
|
328
|
+
* Prints the summary
|
|
329
|
+
*/
|
|
330
|
+
printSummary(report) {
|
|
331
|
+
const { summary } = report;
|
|
332
|
+
this.log(this.color("bold", " Summary"));
|
|
333
|
+
this.log("");
|
|
334
|
+
const passedStr = this.color("green", `${summary.passed} passed`);
|
|
335
|
+
const failedStr = summary.failed > 0 ? this.color("red", `${summary.failed} failed`) : `${summary.failed} failed`;
|
|
336
|
+
const errorsStr = summary.errors > 0 ? this.color("red", `${summary.errors} errors`) : `${summary.errors} errors`;
|
|
337
|
+
const skippedStr = summary.skipped > 0 ? this.color("yellow", `${summary.skipped} skipped`) : `${summary.skipped} skipped`;
|
|
338
|
+
this.log(` Tests: ${passedStr}, ${failedStr}, ${errorsStr}, ${skippedStr}`);
|
|
339
|
+
this.log(` Suites: ${summary.totalSuites}`);
|
|
340
|
+
this.log(` Duration: ${this.formatDuration(summary.duration)}`);
|
|
341
|
+
this.log("");
|
|
342
|
+
if (summary.failed === 0 && summary.errors === 0) {
|
|
343
|
+
this.log(this.color("green", " All tests passed!"));
|
|
344
|
+
} else {
|
|
345
|
+
this.log(this.color("red", " Some tests failed."));
|
|
346
|
+
}
|
|
347
|
+
this.log("");
|
|
348
|
+
}
|
|
349
|
+
/**
|
|
350
|
+
* Prints a confusion matrix
|
|
351
|
+
*/
|
|
352
|
+
printConfusionMatrix(fm) {
|
|
353
|
+
this.log("");
|
|
354
|
+
this.log(this.color("bold", ` Confusion Matrix: ${fm.field}`));
|
|
355
|
+
this.log("");
|
|
356
|
+
const matrixStr = formatConfusionMatrix(fm.metrics.confusionMatrix);
|
|
357
|
+
for (const line of matrixStr.split("\n")) {
|
|
358
|
+
this.log(` ${line}`);
|
|
359
|
+
}
|
|
360
|
+
this.log("");
|
|
361
|
+
}
|
|
362
|
+
/**
|
|
363
|
+
* Formats a percentage
|
|
364
|
+
*/
|
|
365
|
+
pct(value) {
|
|
366
|
+
return `${(value * 100).toFixed(1)}%`;
|
|
367
|
+
}
|
|
368
|
+
/**
|
|
369
|
+
* Formats duration
|
|
370
|
+
*/
|
|
371
|
+
formatDuration(ms) {
|
|
372
|
+
if (ms < 1e3) {
|
|
373
|
+
return `${ms}ms`;
|
|
374
|
+
}
|
|
375
|
+
return `${(ms / 1e3).toFixed(2)}s`;
|
|
376
|
+
}
|
|
377
|
+
/**
|
|
378
|
+
* Gets status symbol
|
|
379
|
+
*/
|
|
380
|
+
getStatusSymbol(status) {
|
|
381
|
+
switch (status) {
|
|
382
|
+
case "passed":
|
|
383
|
+
return symbols.pass;
|
|
384
|
+
case "failed":
|
|
385
|
+
return symbols.fail;
|
|
386
|
+
case "error":
|
|
387
|
+
return symbols.error;
|
|
388
|
+
case "skipped":
|
|
389
|
+
return symbols.skip;
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
/**
|
|
393
|
+
* Gets status color
|
|
394
|
+
*/
|
|
395
|
+
getStatusColor(status) {
|
|
396
|
+
switch (status) {
|
|
397
|
+
case "passed":
|
|
398
|
+
return "green";
|
|
399
|
+
case "failed":
|
|
400
|
+
return "red";
|
|
401
|
+
case "error":
|
|
402
|
+
return "red";
|
|
403
|
+
case "skipped":
|
|
404
|
+
return "yellow";
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
/**
|
|
408
|
+
* Applies color if enabled
|
|
409
|
+
*/
|
|
410
|
+
color(colorName, text) {
|
|
411
|
+
if (!this.useColors) {
|
|
412
|
+
return text;
|
|
413
|
+
}
|
|
414
|
+
return `${colors[colorName]}${text}${colors.reset}`;
|
|
415
|
+
}
|
|
416
|
+
/**
|
|
417
|
+
* Logs a line
|
|
418
|
+
*/
|
|
419
|
+
log(message) {
|
|
420
|
+
console.log(message);
|
|
421
|
+
}
|
|
422
|
+
};
|
|
423
|
+
var DEFAULT_PATTERNS = [
|
|
424
|
+
"**/*.eval.js",
|
|
425
|
+
"**/*.eval.ts",
|
|
426
|
+
"**/*.eval.mjs"
|
|
427
|
+
];
|
|
428
|
+
var DEFAULT_IGNORE = [
|
|
429
|
+
"**/node_modules/**",
|
|
430
|
+
"**/dist/**",
|
|
431
|
+
"**/build/**",
|
|
432
|
+
"**/.git/**"
|
|
433
|
+
];
|
|
434
|
+
async function discoverEvalFiles(options = {}) {
|
|
435
|
+
const {
|
|
436
|
+
patterns = DEFAULT_PATTERNS,
|
|
437
|
+
ignore = DEFAULT_IGNORE,
|
|
438
|
+
cwd = process.cwd()
|
|
439
|
+
} = options;
|
|
440
|
+
const files = [];
|
|
441
|
+
for (const pattern of patterns) {
|
|
442
|
+
const matches = await glob(pattern, {
|
|
443
|
+
cwd,
|
|
444
|
+
ignore,
|
|
445
|
+
absolute: true,
|
|
446
|
+
nodir: true
|
|
447
|
+
});
|
|
448
|
+
files.push(...matches);
|
|
449
|
+
}
|
|
450
|
+
const unique = [...new Set(files)].sort();
|
|
451
|
+
return unique;
|
|
452
|
+
}
|
|
453
|
+
async function discoverFromPath(path, options = {}) {
|
|
454
|
+
const absolutePath = resolve(process.cwd(), path);
|
|
455
|
+
if (!existsSync(absolutePath)) {
|
|
456
|
+
throw new Error(`Path does not exist: ${path}`);
|
|
457
|
+
}
|
|
458
|
+
const { statSync } = await import('fs');
|
|
459
|
+
const stat = statSync(absolutePath);
|
|
460
|
+
if (stat.isFile()) {
|
|
461
|
+
return [absolutePath];
|
|
462
|
+
}
|
|
463
|
+
return discoverEvalFiles({
|
|
464
|
+
...options,
|
|
465
|
+
cwd: absolutePath
|
|
466
|
+
});
|
|
467
|
+
}
|
|
468
|
+
function filterFiles(files, filter) {
|
|
469
|
+
if (!filter) {
|
|
470
|
+
return files;
|
|
471
|
+
}
|
|
472
|
+
const filterLower = filter.toLowerCase();
|
|
473
|
+
return files.filter((file) => file.toLowerCase().includes(filterLower));
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
// src/core/types.ts
|
|
477
|
+
var ExitCodes = {
|
|
478
|
+
SUCCESS: 0,
|
|
479
|
+
ASSERTION_FAILURE: 1,
|
|
480
|
+
INTEGRITY_FAILURE: 2,
|
|
481
|
+
EXECUTION_ERROR: 3,
|
|
482
|
+
CONFIGURATION_ERROR: 4
|
|
483
|
+
};
|
|
484
|
+
|
|
485
|
+
// src/core/context.ts
|
|
486
|
+
var globalContext = createEmptyContext();
|
|
487
|
+
var currentTestState = null;
|
|
488
|
+
function createEmptyContext() {
|
|
489
|
+
return {
|
|
490
|
+
currentSuite: null,
|
|
491
|
+
suites: [],
|
|
492
|
+
results: []
|
|
493
|
+
};
|
|
494
|
+
}
|
|
495
|
+
function getCurrentSuite() {
|
|
496
|
+
return globalContext.currentSuite;
|
|
497
|
+
}
|
|
498
|
+
function setCurrentSuite(suite) {
|
|
499
|
+
globalContext.currentSuite = suite;
|
|
500
|
+
}
|
|
501
|
+
function addSuite(suite) {
|
|
502
|
+
globalContext.suites.push(suite);
|
|
503
|
+
}
|
|
504
|
+
function addTestToCurrentSuite(test) {
|
|
505
|
+
if (!globalContext.currentSuite) {
|
|
506
|
+
throw new Error("Cannot add test outside of a describe() block");
|
|
507
|
+
}
|
|
508
|
+
globalContext.currentSuite.tests.push(test);
|
|
509
|
+
}
|
|
510
|
+
function getSuites() {
|
|
511
|
+
return globalContext.suites;
|
|
512
|
+
}
|
|
513
|
+
function startTestExecution() {
|
|
514
|
+
currentTestState = {
|
|
515
|
+
assertions: [],
|
|
516
|
+
fieldMetrics: []
|
|
517
|
+
};
|
|
518
|
+
}
|
|
519
|
+
function endTestExecution() {
|
|
520
|
+
const state = currentTestState;
|
|
521
|
+
currentTestState = null;
|
|
522
|
+
return state ?? { assertions: [], fieldMetrics: [] };
|
|
523
|
+
}
|
|
524
|
+
function recordAssertion(result) {
|
|
525
|
+
if (currentTestState) {
|
|
526
|
+
currentTestState.assertions.push(result);
|
|
527
|
+
}
|
|
528
|
+
}
|
|
529
|
+
function recordFieldMetrics(metrics) {
|
|
530
|
+
if (currentTestState) {
|
|
531
|
+
currentTestState.fieldMetrics.push(metrics);
|
|
532
|
+
}
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
// src/runner/executor.ts
|
|
536
|
+
async function executeEvalFiles(files, options = {}) {
|
|
537
|
+
const startTime = Date.now();
|
|
538
|
+
const suiteResults = [];
|
|
539
|
+
for (const file of files) {
|
|
540
|
+
try {
|
|
541
|
+
const fileUrl = pathToFileURL(file).href;
|
|
542
|
+
await import(fileUrl);
|
|
543
|
+
} catch (error) {
|
|
544
|
+
throw new TestExecutionError(`Failed to load eval file: ${file}`, file, error);
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
const suites = getSuites();
|
|
548
|
+
for (const suite of suites) {
|
|
549
|
+
const result = await executeSuite(suite, options);
|
|
550
|
+
suiteResults.push(result);
|
|
551
|
+
if (options.bail && result.failed > 0) {
|
|
552
|
+
break;
|
|
553
|
+
}
|
|
554
|
+
}
|
|
555
|
+
const report = buildReport(suiteResults, Date.now() - startTime);
|
|
556
|
+
return report;
|
|
557
|
+
}
|
|
558
|
+
async function executeSuite(suite, options) {
|
|
559
|
+
const startTime = Date.now();
|
|
560
|
+
const testResults = [];
|
|
561
|
+
let passed = 0;
|
|
562
|
+
let failed = 0;
|
|
563
|
+
let errors = 0;
|
|
564
|
+
let skipped = 0;
|
|
565
|
+
for (const hook of suite.beforeAll ?? []) {
|
|
566
|
+
try {
|
|
567
|
+
await hook();
|
|
568
|
+
} catch (error) {
|
|
569
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
570
|
+
for (const test of suite.tests) {
|
|
571
|
+
testResults.push({
|
|
572
|
+
name: test.name,
|
|
573
|
+
status: "error",
|
|
574
|
+
assertions: [],
|
|
575
|
+
fieldMetrics: [],
|
|
576
|
+
duration: 0,
|
|
577
|
+
error: new Error(`beforeAll hook failed: ${message}`)
|
|
578
|
+
});
|
|
579
|
+
errors++;
|
|
580
|
+
}
|
|
581
|
+
return {
|
|
582
|
+
name: suite.name,
|
|
583
|
+
tests: testResults,
|
|
584
|
+
passed,
|
|
585
|
+
failed,
|
|
586
|
+
errors,
|
|
587
|
+
skipped,
|
|
588
|
+
duration: Date.now() - startTime
|
|
589
|
+
};
|
|
590
|
+
}
|
|
591
|
+
}
|
|
592
|
+
for (const test of suite.tests) {
|
|
593
|
+
if (options.filter && !test.name.toLowerCase().includes(options.filter.toLowerCase())) {
|
|
594
|
+
testResults.push({
|
|
595
|
+
name: test.name,
|
|
596
|
+
status: "skipped",
|
|
597
|
+
assertions: [],
|
|
598
|
+
fieldMetrics: [],
|
|
599
|
+
duration: 0
|
|
600
|
+
});
|
|
601
|
+
skipped++;
|
|
602
|
+
continue;
|
|
603
|
+
}
|
|
604
|
+
if (test.name.startsWith("[SKIPPED]")) {
|
|
605
|
+
testResults.push({
|
|
606
|
+
name: test.name,
|
|
607
|
+
status: "skipped",
|
|
608
|
+
assertions: [],
|
|
609
|
+
fieldMetrics: [],
|
|
610
|
+
duration: 0
|
|
611
|
+
});
|
|
612
|
+
skipped++;
|
|
613
|
+
continue;
|
|
614
|
+
}
|
|
615
|
+
for (const hook of suite.beforeEach ?? []) {
|
|
616
|
+
try {
|
|
617
|
+
await hook();
|
|
618
|
+
} catch (error) {
|
|
619
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
620
|
+
testResults.push({
|
|
621
|
+
name: test.name,
|
|
622
|
+
status: "error",
|
|
623
|
+
assertions: [],
|
|
624
|
+
fieldMetrics: [],
|
|
625
|
+
duration: 0,
|
|
626
|
+
error: new Error(`beforeEach hook failed: ${message}`)
|
|
627
|
+
});
|
|
628
|
+
errors++;
|
|
629
|
+
continue;
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
const result = await executeTest(test.name, test.fn, options.timeout);
|
|
633
|
+
testResults.push(result);
|
|
634
|
+
if (result.status === "passed") {
|
|
635
|
+
passed++;
|
|
636
|
+
} else if (result.status === "failed") {
|
|
637
|
+
failed++;
|
|
638
|
+
} else if (result.status === "error") {
|
|
639
|
+
errors++;
|
|
640
|
+
}
|
|
641
|
+
for (const hook of suite.afterEach ?? []) {
|
|
642
|
+
try {
|
|
643
|
+
await hook();
|
|
644
|
+
} catch {
|
|
645
|
+
}
|
|
646
|
+
}
|
|
647
|
+
if (options.bail && (failed > 0 || errors > 0)) {
|
|
648
|
+
break;
|
|
649
|
+
}
|
|
650
|
+
}
|
|
651
|
+
for (const hook of suite.afterAll ?? []) {
|
|
652
|
+
try {
|
|
653
|
+
await hook();
|
|
654
|
+
} catch {
|
|
655
|
+
}
|
|
656
|
+
}
|
|
657
|
+
return {
|
|
658
|
+
name: suite.name,
|
|
659
|
+
tests: testResults,
|
|
660
|
+
passed,
|
|
661
|
+
failed,
|
|
662
|
+
errors,
|
|
663
|
+
skipped,
|
|
664
|
+
duration: Date.now() - startTime
|
|
665
|
+
};
|
|
666
|
+
}
|
|
667
|
+
async function executeTest(name, fn, timeout = 3e4) {
|
|
668
|
+
const startTime = Date.now();
|
|
669
|
+
startTestExecution();
|
|
670
|
+
try {
|
|
671
|
+
await Promise.race([
|
|
672
|
+
fn(),
|
|
673
|
+
new Promise(
|
|
674
|
+
(_, reject) => setTimeout(() => reject(new Error(`Test timed out after ${timeout}ms`)), timeout)
|
|
675
|
+
)
|
|
676
|
+
]);
|
|
677
|
+
const { assertions, fieldMetrics } = endTestExecution();
|
|
678
|
+
return {
|
|
679
|
+
name,
|
|
680
|
+
status: "passed",
|
|
681
|
+
assertions,
|
|
682
|
+
fieldMetrics,
|
|
683
|
+
duration: Date.now() - startTime
|
|
684
|
+
};
|
|
685
|
+
} catch (error) {
|
|
686
|
+
const { assertions, fieldMetrics } = endTestExecution();
|
|
687
|
+
if (error instanceof AssertionError) {
|
|
688
|
+
return {
|
|
689
|
+
name,
|
|
690
|
+
status: "failed",
|
|
691
|
+
assertions,
|
|
692
|
+
fieldMetrics,
|
|
693
|
+
duration: Date.now() - startTime,
|
|
694
|
+
error
|
|
695
|
+
};
|
|
696
|
+
}
|
|
697
|
+
return {
|
|
698
|
+
name,
|
|
699
|
+
status: "error",
|
|
700
|
+
assertions,
|
|
701
|
+
fieldMetrics,
|
|
702
|
+
duration: Date.now() - startTime,
|
|
703
|
+
error: error instanceof Error ? error : new Error(String(error))
|
|
704
|
+
};
|
|
705
|
+
}
|
|
706
|
+
}
|
|
707
|
+
function buildReport(suiteResults, totalDuration) {
|
|
708
|
+
let totalTests = 0;
|
|
709
|
+
let totalPassed = 0;
|
|
710
|
+
let totalFailed = 0;
|
|
711
|
+
let totalErrors = 0;
|
|
712
|
+
let totalSkipped = 0;
|
|
713
|
+
for (const suite of suiteResults) {
|
|
714
|
+
totalTests += suite.tests.length;
|
|
715
|
+
totalPassed += suite.passed;
|
|
716
|
+
totalFailed += suite.failed;
|
|
717
|
+
totalErrors += suite.errors;
|
|
718
|
+
totalSkipped += suite.skipped;
|
|
719
|
+
}
|
|
720
|
+
return {
|
|
721
|
+
version: "1.0.0",
|
|
722
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
723
|
+
suites: suiteResults,
|
|
724
|
+
summary: {
|
|
725
|
+
totalSuites: suiteResults.length,
|
|
726
|
+
totalTests,
|
|
727
|
+
passed: totalPassed,
|
|
728
|
+
failed: totalFailed,
|
|
729
|
+
errors: totalErrors,
|
|
730
|
+
skipped: totalSkipped,
|
|
731
|
+
duration: totalDuration
|
|
732
|
+
}
|
|
733
|
+
};
|
|
734
|
+
}
|
|
735
|
+
function getExitCode(report) {
|
|
736
|
+
if (report.summary.errors > 0) {
|
|
737
|
+
return ExitCodes.EXECUTION_ERROR;
|
|
738
|
+
}
|
|
739
|
+
if (report.summary.failed > 0) {
|
|
740
|
+
return ExitCodes.ASSERTION_FAILURE;
|
|
741
|
+
}
|
|
742
|
+
return ExitCodes.SUCCESS;
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
export { AssertionError, ConfigurationError, ConsoleReporter, DatasetError, EvalSenseError, ExitCodes, IntegrityError, JsonReporter, TestExecutionError, addSuite, addTestToCurrentSuite, buildConfusionMatrix, discoverEvalFiles, discoverFromPath, executeEvalFiles, filterFiles, formatConfusionMatrix, getCurrentSuite, getExitCode, getFalseNegatives, getFalsePositives, getSupport, getTruePositives, parseReport, recordAssertion, recordFieldMetrics, setCurrentSuite };
|
|
746
|
+
//# sourceMappingURL=chunk-5P7LNNO6.js.map
|
|
747
|
+
//# sourceMappingURL=chunk-5P7LNNO6.js.map
|