axiom 0.34.1 → 0.35.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin.cjs +1849 -1445
- package/dist/bin.cjs.map +1 -1
- package/dist/bin.js +4 -4
- package/dist/{chunk-AAEGYMAU.js → chunk-JRHHVAOI.js} +21 -6
- package/dist/chunk-JRHHVAOI.js.map +1 -0
- package/dist/chunk-TEYHYMEU.js +483 -0
- package/dist/chunk-TEYHYMEU.js.map +1 -0
- package/dist/{chunk-4G7GDQ6Q.js → chunk-W2A3Y5FZ.js} +145 -724
- package/dist/chunk-W2A3Y5FZ.js.map +1 -0
- package/dist/evals.cjs +2440 -2072
- package/dist/evals.cjs.map +1 -1
- package/dist/evals.js +6 -5
- package/dist/evals.js.map +1 -1
- package/dist/feedback.cjs +156 -0
- package/dist/feedback.cjs.map +1 -0
- package/dist/feedback.d.cts +65 -0
- package/dist/feedback.d.ts +65 -0
- package/dist/feedback.js +128 -0
- package/dist/feedback.js.map +1 -0
- package/dist/index.cjs +20 -5
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +1 -1
- package/dist/run-vitest-GI327YTG.js +141 -0
- package/dist/run-vitest-GI327YTG.js.map +1 -0
- package/package.json +13 -2
- package/dist/chunk-4G7GDQ6Q.js.map +0 -1
- package/dist/chunk-AAEGYMAU.js.map +0 -1
package/dist/bin.cjs
CHANGED
|
@@ -43,137 +43,13 @@ var init_cjs_shims = __esm({
|
|
|
43
43
|
}
|
|
44
44
|
});
|
|
45
45
|
|
|
46
|
-
// src/bin.ts
|
|
47
|
-
var bin_exports = {};
|
|
48
|
-
__export(bin_exports, {
|
|
49
|
-
program: () => program
|
|
50
|
-
});
|
|
51
|
-
module.exports = __toCommonJS(bin_exports);
|
|
52
|
-
init_cjs_shims();
|
|
53
|
-
var import_commander3 = require("commander");
|
|
54
|
-
|
|
55
|
-
// src/cli/commands/eval.command.ts
|
|
56
|
-
init_cjs_shims();
|
|
57
|
-
var import_commander = require("commander");
|
|
58
|
-
var import_nanoid = require("nanoid");
|
|
59
|
-
|
|
60
|
-
// src/evals/run-vitest.ts
|
|
61
|
-
init_cjs_shims();
|
|
62
|
-
|
|
63
|
-
// ../../node_modules/.pnpm/tinyrainbow@2.0.0/node_modules/tinyrainbow/dist/node.js
|
|
64
|
-
init_cjs_shims();
|
|
65
|
-
|
|
66
|
-
// ../../node_modules/.pnpm/tinyrainbow@2.0.0/node_modules/tinyrainbow/dist/chunk-BVHSVHOK.js
|
|
67
|
-
init_cjs_shims();
|
|
68
|
-
var f = {
|
|
69
|
-
reset: [0, 0],
|
|
70
|
-
bold: [1, 22, "\x1B[22m\x1B[1m"],
|
|
71
|
-
dim: [2, 22, "\x1B[22m\x1B[2m"],
|
|
72
|
-
italic: [3, 23],
|
|
73
|
-
underline: [4, 24],
|
|
74
|
-
inverse: [7, 27],
|
|
75
|
-
hidden: [8, 28],
|
|
76
|
-
strikethrough: [9, 29],
|
|
77
|
-
black: [30, 39],
|
|
78
|
-
red: [31, 39],
|
|
79
|
-
green: [32, 39],
|
|
80
|
-
yellow: [33, 39],
|
|
81
|
-
blue: [34, 39],
|
|
82
|
-
magenta: [35, 39],
|
|
83
|
-
cyan: [36, 39],
|
|
84
|
-
white: [37, 39],
|
|
85
|
-
gray: [90, 39],
|
|
86
|
-
bgBlack: [40, 49],
|
|
87
|
-
bgRed: [41, 49],
|
|
88
|
-
bgGreen: [42, 49],
|
|
89
|
-
bgYellow: [43, 49],
|
|
90
|
-
bgBlue: [44, 49],
|
|
91
|
-
bgMagenta: [45, 49],
|
|
92
|
-
bgCyan: [46, 49],
|
|
93
|
-
bgWhite: [47, 49],
|
|
94
|
-
blackBright: [90, 39],
|
|
95
|
-
redBright: [91, 39],
|
|
96
|
-
greenBright: [92, 39],
|
|
97
|
-
yellowBright: [93, 39],
|
|
98
|
-
blueBright: [94, 39],
|
|
99
|
-
magentaBright: [95, 39],
|
|
100
|
-
cyanBright: [96, 39],
|
|
101
|
-
whiteBright: [97, 39],
|
|
102
|
-
bgBlackBright: [100, 49],
|
|
103
|
-
bgRedBright: [101, 49],
|
|
104
|
-
bgGreenBright: [102, 49],
|
|
105
|
-
bgYellowBright: [103, 49],
|
|
106
|
-
bgBlueBright: [104, 49],
|
|
107
|
-
bgMagentaBright: [105, 49],
|
|
108
|
-
bgCyanBright: [106, 49],
|
|
109
|
-
bgWhiteBright: [107, 49]
|
|
110
|
-
};
|
|
111
|
-
var h = Object.entries(f);
|
|
112
|
-
function a(n) {
|
|
113
|
-
return String(n);
|
|
114
|
-
}
|
|
115
|
-
a.open = "";
|
|
116
|
-
a.close = "";
|
|
117
|
-
function C(n = false) {
|
|
118
|
-
let e = typeof process != "undefined" ? process : void 0, i = (e == null ? void 0 : e.env) || {}, g = (e == null ? void 0 : e.argv) || [];
|
|
119
|
-
return !("NO_COLOR" in i || g.includes("--no-color")) && ("FORCE_COLOR" in i || g.includes("--color") || (e == null ? void 0 : e.platform) === "win32" || n && i.TERM !== "dumb" || "CI" in i) || typeof window != "undefined" && !!window.chrome;
|
|
120
|
-
}
|
|
121
|
-
function p(n = false) {
|
|
122
|
-
let e = C(n), i = (r2, t, c, o) => {
|
|
123
|
-
let l = "", s2 = 0;
|
|
124
|
-
do
|
|
125
|
-
l += r2.substring(s2, o) + c, s2 = o + t.length, o = r2.indexOf(t, s2);
|
|
126
|
-
while (~o);
|
|
127
|
-
return l + r2.substring(s2);
|
|
128
|
-
}, g = (r2, t, c = r2) => {
|
|
129
|
-
let o = (l) => {
|
|
130
|
-
let s2 = String(l), b = s2.indexOf(t, r2.length);
|
|
131
|
-
return ~b ? r2 + i(s2, t, c, b) + t : r2 + s2 + t;
|
|
132
|
-
};
|
|
133
|
-
return o.open = r2, o.close = t, o;
|
|
134
|
-
}, u2 = {
|
|
135
|
-
isColorSupported: e
|
|
136
|
-
}, d = (r2) => `\x1B[${r2}m`;
|
|
137
|
-
for (let [r2, t] of h)
|
|
138
|
-
u2[r2] = e ? g(
|
|
139
|
-
d(t[0]),
|
|
140
|
-
d(t[1]),
|
|
141
|
-
t[2]
|
|
142
|
-
) : a;
|
|
143
|
-
return u2;
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
// ../../node_modules/.pnpm/tinyrainbow@2.0.0/node_modules/tinyrainbow/dist/node.js
|
|
147
|
-
var import_tty = require("tty");
|
|
148
|
-
var r = process.env.FORCE_TTY !== void 0 || (0, import_tty.isatty)(1);
|
|
149
|
-
var u = p(r);
|
|
150
|
-
|
|
151
|
-
// src/evals/run-vitest.ts
|
|
152
|
-
var import_node_path = require("path");
|
|
153
|
-
var import_node_fs = require("fs");
|
|
154
|
-
var import_node_os = require("os");
|
|
155
|
-
var import_node_path2 = __toESM(require("path"), 1);
|
|
156
|
-
var import_vite_tsconfig_paths = __toESM(require("vite-tsconfig-paths"), 1);
|
|
157
|
-
var import_node = require("vitest/node");
|
|
158
|
-
|
|
159
|
-
// src/evals/reporter.ts
|
|
160
|
-
init_cjs_shims();
|
|
161
|
-
|
|
162
|
-
// src/evals/context/storage.ts
|
|
163
|
-
init_cjs_shims();
|
|
164
|
-
var import_api9 = require("@opentelemetry/api");
|
|
165
|
-
|
|
166
46
|
// src/evals/context/manager.ts
|
|
167
|
-
init_cjs_shims();
|
|
168
|
-
var import_node_module = require("module");
|
|
169
|
-
var CONTEXT_MANAGER_SYMBOL = Symbol.for("axiom.context_manager");
|
|
170
47
|
function getGlobalContextManager() {
|
|
171
48
|
return globalThis[CONTEXT_MANAGER_SYMBOL];
|
|
172
49
|
}
|
|
173
50
|
function setGlobalContextManager(manager) {
|
|
174
51
|
globalThis[CONTEXT_MANAGER_SYMBOL] = manager;
|
|
175
52
|
}
|
|
176
|
-
var isNodeJS = typeof process !== "undefined" && !!process.versions?.node;
|
|
177
53
|
function getContextManager() {
|
|
178
54
|
const existing = getGlobalContextManager();
|
|
179
55
|
if (existing) return existing;
|
|
@@ -229,27 +105,34 @@ function createAsyncHook(_name) {
|
|
|
229
105
|
}
|
|
230
106
|
};
|
|
231
107
|
}
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
108
|
+
var import_node_module, CONTEXT_MANAGER_SYMBOL, isNodeJS;
|
|
109
|
+
var init_manager = __esm({
|
|
110
|
+
"src/evals/context/manager.ts"() {
|
|
111
|
+
"use strict";
|
|
112
|
+
init_cjs_shims();
|
|
113
|
+
import_node_module = require("module");
|
|
114
|
+
CONTEXT_MANAGER_SYMBOL = Symbol.for("axiom.context_manager");
|
|
115
|
+
isNodeJS = typeof process !== "undefined" && !!process.versions?.node;
|
|
116
|
+
}
|
|
117
|
+
});
|
|
235
118
|
|
|
236
119
|
// src/evals/context/global-flags.ts
|
|
237
|
-
init_cjs_shims();
|
|
238
|
-
var GLOBAL_OVERRIDES_SYMBOL = Symbol.for("axiom.global_flag_overrides");
|
|
239
120
|
function setRoot(val) {
|
|
240
121
|
globalThis[GLOBAL_OVERRIDES_SYMBOL] = val;
|
|
241
122
|
}
|
|
242
123
|
function setGlobalFlagOverrides(overrides2) {
|
|
243
124
|
setRoot(overrides2);
|
|
244
125
|
}
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
126
|
+
var GLOBAL_OVERRIDES_SYMBOL;
|
|
127
|
+
var init_global_flags = __esm({
|
|
128
|
+
"src/evals/context/global-flags.ts"() {
|
|
129
|
+
"use strict";
|
|
130
|
+
init_cjs_shims();
|
|
131
|
+
GLOBAL_OVERRIDES_SYMBOL = Symbol.for("axiom.global_flag_overrides");
|
|
132
|
+
}
|
|
133
|
+
});
|
|
249
134
|
|
|
250
135
|
// src/cli/utils/format-zod-errors.ts
|
|
251
|
-
init_cjs_shims();
|
|
252
|
-
var import_zod = require("zod");
|
|
253
136
|
function formatZodErrors(error) {
|
|
254
137
|
const issues = error.issues;
|
|
255
138
|
const messages = [];
|
|
@@ -345,13 +228,16 @@ function generateExampleForIssue(issue, path3) {
|
|
|
345
228
|
}
|
|
346
229
|
return null;
|
|
347
230
|
}
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
231
|
+
var import_zod;
|
|
232
|
+
var init_format_zod_errors = __esm({
|
|
233
|
+
"src/cli/utils/format-zod-errors.ts"() {
|
|
234
|
+
"use strict";
|
|
235
|
+
init_cjs_shims();
|
|
236
|
+
import_zod = require("zod");
|
|
237
|
+
}
|
|
238
|
+
});
|
|
352
239
|
|
|
353
240
|
// src/util/zod-internals.ts
|
|
354
|
-
init_cjs_shims();
|
|
355
241
|
function isZodV4Schema(schema) {
|
|
356
242
|
if (!schema || typeof schema !== "object") return false;
|
|
357
243
|
const s2 = schema;
|
|
@@ -381,20 +267,6 @@ function getDefRawType(def) {
|
|
|
381
267
|
if (raw == null) return void 0;
|
|
382
268
|
return typeof raw === "string" ? raw : String(raw);
|
|
383
269
|
}
|
|
384
|
-
var KNOWN_KINDS = /* @__PURE__ */ new Set([
|
|
385
|
-
"object",
|
|
386
|
-
"optional",
|
|
387
|
-
"default",
|
|
388
|
-
"nullable",
|
|
389
|
-
"readonly",
|
|
390
|
-
"prefault",
|
|
391
|
-
"nonoptional",
|
|
392
|
-
"catch",
|
|
393
|
-
"array",
|
|
394
|
-
"record",
|
|
395
|
-
"union",
|
|
396
|
-
"discriminatedunion"
|
|
397
|
-
]);
|
|
398
270
|
function getKind(schemaOrDef) {
|
|
399
271
|
const def = schemaOrDef && typeof schemaOrDef === "object" && "type" in schemaOrDef ? schemaOrDef : getDef(schemaOrDef);
|
|
400
272
|
const raw = getDefRawType(def);
|
|
@@ -429,16 +301,6 @@ function getDefaultValue(schema) {
|
|
|
429
301
|
const def = getDef(schema);
|
|
430
302
|
return def?.defaultValue;
|
|
431
303
|
}
|
|
432
|
-
var TRANSPARENT_WRAPPERS = [
|
|
433
|
-
"optional",
|
|
434
|
-
"nullable",
|
|
435
|
-
"default",
|
|
436
|
-
"readonly",
|
|
437
|
-
"prefault",
|
|
438
|
-
"nonoptional",
|
|
439
|
-
"catch"
|
|
440
|
-
// transparent for schema structure, but alters error behavior
|
|
441
|
-
];
|
|
442
304
|
function unwrapTransparent(schema) {
|
|
443
305
|
let current = schema;
|
|
444
306
|
for (let i = 0; i < 10; i++) {
|
|
@@ -454,6 +316,37 @@ function unwrapTransparent(schema) {
|
|
|
454
316
|
}
|
|
455
317
|
return current;
|
|
456
318
|
}
|
|
319
|
+
var KNOWN_KINDS, TRANSPARENT_WRAPPERS;
|
|
320
|
+
var init_zod_internals = __esm({
|
|
321
|
+
"src/util/zod-internals.ts"() {
|
|
322
|
+
"use strict";
|
|
323
|
+
init_cjs_shims();
|
|
324
|
+
KNOWN_KINDS = /* @__PURE__ */ new Set([
|
|
325
|
+
"object",
|
|
326
|
+
"optional",
|
|
327
|
+
"default",
|
|
328
|
+
"nullable",
|
|
329
|
+
"readonly",
|
|
330
|
+
"prefault",
|
|
331
|
+
"nonoptional",
|
|
332
|
+
"catch",
|
|
333
|
+
"array",
|
|
334
|
+
"record",
|
|
335
|
+
"union",
|
|
336
|
+
"discriminatedunion"
|
|
337
|
+
]);
|
|
338
|
+
TRANSPARENT_WRAPPERS = [
|
|
339
|
+
"optional",
|
|
340
|
+
"nullable",
|
|
341
|
+
"default",
|
|
342
|
+
"readonly",
|
|
343
|
+
"prefault",
|
|
344
|
+
"nonoptional",
|
|
345
|
+
"catch"
|
|
346
|
+
// transparent for schema structure, but alters error behavior
|
|
347
|
+
];
|
|
348
|
+
}
|
|
349
|
+
});
|
|
457
350
|
|
|
458
351
|
// src/util/dot-path.ts
|
|
459
352
|
function parsePath(path3) {
|
|
@@ -509,10 +402,17 @@ function isValidPath(schema, segments) {
|
|
|
509
402
|
}
|
|
510
403
|
return true;
|
|
511
404
|
}
|
|
405
|
+
var import_zod2;
|
|
406
|
+
var init_dot_path = __esm({
|
|
407
|
+
"src/util/dot-path.ts"() {
|
|
408
|
+
"use strict";
|
|
409
|
+
init_cjs_shims();
|
|
410
|
+
import_zod2 = require("zod");
|
|
411
|
+
init_zod_internals();
|
|
412
|
+
}
|
|
413
|
+
});
|
|
512
414
|
|
|
513
415
|
// src/util/deep-partial-schema.ts
|
|
514
|
-
init_cjs_shims();
|
|
515
|
-
var import_zod3 = require("zod");
|
|
516
416
|
function makeDeepPartial(schema) {
|
|
517
417
|
const shape = schema.shape;
|
|
518
418
|
const newShape = {};
|
|
@@ -562,180 +462,246 @@ function makeDeepPartialField(fieldSchema) {
|
|
|
562
462
|
}
|
|
563
463
|
return fieldSchema.optional();
|
|
564
464
|
}
|
|
465
|
+
var import_zod3;
|
|
466
|
+
var init_deep_partial_schema = __esm({
|
|
467
|
+
"src/util/deep-partial-schema.ts"() {
|
|
468
|
+
"use strict";
|
|
469
|
+
init_cjs_shims();
|
|
470
|
+
import_zod3 = require("zod");
|
|
471
|
+
init_zod_internals();
|
|
472
|
+
}
|
|
473
|
+
});
|
|
565
474
|
|
|
566
|
-
// src/
|
|
567
|
-
var
|
|
568
|
-
var
|
|
475
|
+
// src/validate-flags.ts
|
|
476
|
+
var import_zod4;
|
|
477
|
+
var init_validate_flags = __esm({
|
|
478
|
+
"src/validate-flags.ts"() {
|
|
479
|
+
"use strict";
|
|
480
|
+
init_cjs_shims();
|
|
481
|
+
import_zod4 = require("zod");
|
|
482
|
+
init_global_flags();
|
|
483
|
+
init_format_zod_errors();
|
|
484
|
+
init_dot_path();
|
|
485
|
+
init_deep_partial_schema();
|
|
486
|
+
init_zod_internals();
|
|
487
|
+
}
|
|
488
|
+
});
|
|
569
489
|
|
|
570
490
|
// src/otel/utils/to-otel-attribute.ts
|
|
571
|
-
|
|
572
|
-
var
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
init_cjs_shims();
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
var import_api6 = require("@opentelemetry/api");
|
|
491
|
+
var import_api;
|
|
492
|
+
var init_to_otel_attribute = __esm({
|
|
493
|
+
"src/otel/utils/to-otel-attribute.ts"() {
|
|
494
|
+
"use strict";
|
|
495
|
+
init_cjs_shims();
|
|
496
|
+
import_api = require("@opentelemetry/api");
|
|
497
|
+
}
|
|
498
|
+
});
|
|
580
499
|
|
|
581
500
|
// src/otel/withSpanBaggageKey.ts
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
init_cjs_shims();
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
// src/otel/semconv/attributes.ts
|
|
589
|
-
init_cjs_shims();
|
|
590
|
-
var import_semantic_conventions = require("@opentelemetry/semantic-conventions");
|
|
501
|
+
var init_withSpanBaggageKey = __esm({
|
|
502
|
+
"src/otel/withSpanBaggageKey.ts"() {
|
|
503
|
+
"use strict";
|
|
504
|
+
init_cjs_shims();
|
|
505
|
+
}
|
|
506
|
+
});
|
|
591
507
|
|
|
592
508
|
// src/otel/semconv/eval_proposal.ts
|
|
593
|
-
|
|
509
|
+
var init_eval_proposal = __esm({
|
|
510
|
+
"src/otel/semconv/eval_proposal.ts"() {
|
|
511
|
+
"use strict";
|
|
512
|
+
init_cjs_shims();
|
|
513
|
+
}
|
|
514
|
+
});
|
|
594
515
|
|
|
595
516
|
// src/otel/semconv/attributes.ts
|
|
596
|
-
var import_incubating
|
|
517
|
+
var import_semantic_conventions, import_incubating;
|
|
518
|
+
var init_attributes = __esm({
|
|
519
|
+
"src/otel/semconv/attributes.ts"() {
|
|
520
|
+
"use strict";
|
|
521
|
+
init_cjs_shims();
|
|
522
|
+
import_semantic_conventions = require("@opentelemetry/semantic-conventions");
|
|
523
|
+
init_eval_proposal();
|
|
524
|
+
import_incubating = require("@opentelemetry/semantic-conventions/incubating");
|
|
525
|
+
}
|
|
526
|
+
});
|
|
597
527
|
|
|
598
|
-
// src/
|
|
599
|
-
|
|
600
|
-
var
|
|
528
|
+
// src/schema.ts
|
|
529
|
+
var SCHEMA_VERSION, SCHEMA_BASE_URL, SCHEMA_URL;
|
|
530
|
+
var init_schema = __esm({
|
|
531
|
+
"src/schema.ts"() {
|
|
532
|
+
"use strict";
|
|
533
|
+
init_cjs_shims();
|
|
534
|
+
SCHEMA_VERSION = "0.0.2";
|
|
535
|
+
SCHEMA_BASE_URL = "https://axiom.co/ai/schemas/";
|
|
536
|
+
SCHEMA_URL = `${SCHEMA_BASE_URL}${SCHEMA_VERSION}`;
|
|
537
|
+
}
|
|
538
|
+
});
|
|
601
539
|
|
|
602
|
-
// src/otel/
|
|
603
|
-
|
|
604
|
-
var
|
|
540
|
+
// src/otel/startActiveSpan.ts
|
|
541
|
+
var import_api2;
|
|
542
|
+
var init_startActiveSpan = __esm({
|
|
543
|
+
"src/otel/startActiveSpan.ts"() {
|
|
544
|
+
"use strict";
|
|
545
|
+
init_cjs_shims();
|
|
546
|
+
import_api2 = require("@opentelemetry/api");
|
|
547
|
+
}
|
|
548
|
+
});
|
|
605
549
|
|
|
606
550
|
// package.json
|
|
607
|
-
var package_default
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
module: "./dist/index.js",
|
|
631
|
-
bin: {
|
|
632
|
-
axiom: "./dist/bin.js"
|
|
633
|
-
},
|
|
634
|
-
exports: {
|
|
635
|
-
"./ai": {
|
|
636
|
-
import: {
|
|
637
|
-
types: "./dist/index.d.ts",
|
|
638
|
-
default: "./dist/index.js"
|
|
551
|
+
var package_default;
|
|
552
|
+
var init_package = __esm({
|
|
553
|
+
"package.json"() {
|
|
554
|
+
package_default = {
|
|
555
|
+
name: "axiom",
|
|
556
|
+
version: "0.35.0",
|
|
557
|
+
type: "module",
|
|
558
|
+
author: "Axiom, Inc.",
|
|
559
|
+
contributors: [
|
|
560
|
+
"Islam Shehata <islam@axiom.co>",
|
|
561
|
+
"Chris Ehrlich <chris@axiom.co>",
|
|
562
|
+
"Gabriel de Andrade <gabriel@axiom.co>"
|
|
563
|
+
],
|
|
564
|
+
scripts: {
|
|
565
|
+
dev: "tsup --watch",
|
|
566
|
+
build: "tsup && chmod +x dist/bin.js",
|
|
567
|
+
format: "prettier --write .",
|
|
568
|
+
"format:check": "prettier --check .",
|
|
569
|
+
lint: "eslint './**/*.{js,ts}'",
|
|
570
|
+
typecheck: "tsc --noEmit",
|
|
571
|
+
test: "vitest run",
|
|
572
|
+
"test:watch": "vitest --watch",
|
|
573
|
+
publint: "npx publint"
|
|
639
574
|
},
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
"./ai/evals": {
|
|
646
|
-
import: {
|
|
647
|
-
types: "./dist/evals.d.ts",
|
|
648
|
-
default: "./dist/evals.js"
|
|
575
|
+
types: "./dist/index.d.ts",
|
|
576
|
+
main: "./dist/index.cjs",
|
|
577
|
+
module: "./dist/index.js",
|
|
578
|
+
bin: {
|
|
579
|
+
axiom: "./dist/bin.js"
|
|
649
580
|
},
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
581
|
+
exports: {
|
|
582
|
+
"./ai": {
|
|
583
|
+
import: {
|
|
584
|
+
types: "./dist/index.d.ts",
|
|
585
|
+
default: "./dist/index.js"
|
|
586
|
+
},
|
|
587
|
+
require: {
|
|
588
|
+
types: "./dist/index.d.cts",
|
|
589
|
+
default: "./dist/index.cjs"
|
|
590
|
+
}
|
|
591
|
+
},
|
|
592
|
+
"./ai/evals": {
|
|
593
|
+
import: {
|
|
594
|
+
types: "./dist/evals.d.ts",
|
|
595
|
+
default: "./dist/evals.js"
|
|
596
|
+
},
|
|
597
|
+
require: {
|
|
598
|
+
types: "./dist/evals.d.cts",
|
|
599
|
+
default: "./dist/evals.cjs"
|
|
600
|
+
}
|
|
601
|
+
},
|
|
602
|
+
"./ai/config": {
|
|
603
|
+
import: {
|
|
604
|
+
types: "./dist/config.d.ts",
|
|
605
|
+
default: "./dist/config.js"
|
|
606
|
+
},
|
|
607
|
+
require: {
|
|
608
|
+
types: "./dist/config.d.cts",
|
|
609
|
+
default: "./dist/config.cjs"
|
|
610
|
+
}
|
|
611
|
+
},
|
|
612
|
+
"./ai/feedback": {
|
|
613
|
+
import: {
|
|
614
|
+
types: "./dist/feedback.d.ts",
|
|
615
|
+
default: "./dist/feedback.js"
|
|
616
|
+
},
|
|
617
|
+
require: {
|
|
618
|
+
types: "./dist/feedback.d.cts",
|
|
619
|
+
default: "./dist/feedback.cjs"
|
|
620
|
+
}
|
|
621
|
+
}
|
|
659
622
|
},
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
],
|
|
729
|
-
packageManager: "pnpm@10.16.1"
|
|
730
|
-
};
|
|
623
|
+
keywords: [
|
|
624
|
+
"axiom",
|
|
625
|
+
"logging",
|
|
626
|
+
"ai",
|
|
627
|
+
"otel",
|
|
628
|
+
"opentelemetry"
|
|
629
|
+
],
|
|
630
|
+
repository: {
|
|
631
|
+
type: "git",
|
|
632
|
+
url: "git+https://github.com/axiomhq/ai.git",
|
|
633
|
+
directory: "packages/ai"
|
|
634
|
+
},
|
|
635
|
+
license: "MIT",
|
|
636
|
+
dependencies: {
|
|
637
|
+
"@next/env": "^15.4.2",
|
|
638
|
+
"@opentelemetry/auto-instrumentations-node": "^0.60.1",
|
|
639
|
+
"@opentelemetry/context-async-hooks": "^2.0.1",
|
|
640
|
+
"@opentelemetry/exporter-trace-otlp-http": "^0.202.0",
|
|
641
|
+
"@opentelemetry/resources": "^2.0.1",
|
|
642
|
+
"@opentelemetry/sdk-trace-node": "^2.0.1",
|
|
643
|
+
"@opentelemetry/semantic-conventions": "^1.37.0",
|
|
644
|
+
"@sinclair/typebox": "^0.34.37",
|
|
645
|
+
c12: "^2.0.4",
|
|
646
|
+
commander: "^14.0.0",
|
|
647
|
+
defu: "^6.1.4",
|
|
648
|
+
handlebars: "^4.7.8",
|
|
649
|
+
nanoid: "^5.1.5",
|
|
650
|
+
open: "^10.1.0",
|
|
651
|
+
"vite-tsconfig-paths": "^5.1.4",
|
|
652
|
+
vitest: "^4.0.0"
|
|
653
|
+
},
|
|
654
|
+
peerDependencies: {
|
|
655
|
+
"@opentelemetry/api": "^1.9.0",
|
|
656
|
+
zod: "^3.25.0 || ^4.0.0"
|
|
657
|
+
},
|
|
658
|
+
devDependencies: {
|
|
659
|
+
"@ai-sdk/anthropicv1": "npm:@ai-sdk/anthropic@^1.2.12",
|
|
660
|
+
"@ai-sdk/anthropicv2": "npm:@ai-sdk/anthropic@^2.0.44",
|
|
661
|
+
"@ai-sdk/openaiv1": "npm:@ai-sdk/openai@^1.3.24",
|
|
662
|
+
"@ai-sdk/openaiv2": "npm:@ai-sdk/openai@^2.0.67",
|
|
663
|
+
"@ai-sdk/providerv1": "npm:@ai-sdk/provider@^1.1.3",
|
|
664
|
+
"@ai-sdk/providerv2": "npm:@ai-sdk/provider@^2.0.0",
|
|
665
|
+
"@opentelemetry/api": "^1.9.0",
|
|
666
|
+
"@opentelemetry/core": "^2.0.1",
|
|
667
|
+
"@opentelemetry/sdk-trace-base": "^2.0.1",
|
|
668
|
+
"@opentelemetry/sdk-trace-node": "^2.0.1",
|
|
669
|
+
"@repo/eslint-config": "workspace:*",
|
|
670
|
+
"@types/node": "^22.15.29",
|
|
671
|
+
"@vitest/coverage-v8": "^4.0.0",
|
|
672
|
+
aiv4: "npm:ai@^4.3.19",
|
|
673
|
+
aiv5: "npm:ai@^5.0.93",
|
|
674
|
+
esbuild: "^0.25.8",
|
|
675
|
+
eslint: "catalog:",
|
|
676
|
+
msw: "^2.12.2",
|
|
677
|
+
prettier: "catalog:",
|
|
678
|
+
tinyrainbow: "^2.0.0",
|
|
679
|
+
tsup: "catalog:",
|
|
680
|
+
typescript: "catalog:",
|
|
681
|
+
vitest: "catalog:",
|
|
682
|
+
zod: "catalog:"
|
|
683
|
+
},
|
|
684
|
+
files: [
|
|
685
|
+
"dist"
|
|
686
|
+
],
|
|
687
|
+
packageManager: "pnpm@10.16.1"
|
|
688
|
+
};
|
|
689
|
+
}
|
|
690
|
+
});
|
|
731
691
|
|
|
732
692
|
// src/otel/utils/redaction.ts
|
|
733
|
-
|
|
734
|
-
var
|
|
735
|
-
|
|
693
|
+
var import_api3, AXIOM_AI_REDACTION_KEY;
|
|
694
|
+
var init_redaction = __esm({
|
|
695
|
+
"src/otel/utils/redaction.ts"() {
|
|
696
|
+
"use strict";
|
|
697
|
+
init_cjs_shims();
|
|
698
|
+
import_api3 = require("@opentelemetry/api");
|
|
699
|
+
init_withSpanBaggageKey();
|
|
700
|
+
AXIOM_AI_REDACTION_KEY = Symbol.for("__axiom_ai_redaction__");
|
|
701
|
+
}
|
|
702
|
+
});
|
|
736
703
|
|
|
737
704
|
// src/otel/initAxiomAI.ts
|
|
738
|
-
var AXIOM_AI_SCOPE_KEY = Symbol.for("__axiom_ai_scope__");
|
|
739
705
|
function extractTracerScope(tracer) {
|
|
740
706
|
const tracerAny = tracer;
|
|
741
707
|
const name = tracerAny._instrumentationScope?.name || tracerAny.instrumentationLibrary?.name || package_default.name;
|
|
@@ -758,51 +724,186 @@ function initAxiomAI(config) {
|
|
|
758
724
|
globalThis[AXIOM_AI_REDACTION_KEY] = config.redactionPolicy;
|
|
759
725
|
}
|
|
760
726
|
}
|
|
727
|
+
var import_api4, AXIOM_AI_SCOPE_KEY;
|
|
728
|
+
var init_initAxiomAI = __esm({
|
|
729
|
+
"src/otel/initAxiomAI.ts"() {
|
|
730
|
+
"use strict";
|
|
731
|
+
init_cjs_shims();
|
|
732
|
+
import_api4 = require("@opentelemetry/api");
|
|
733
|
+
init_package();
|
|
734
|
+
init_redaction();
|
|
735
|
+
AXIOM_AI_SCOPE_KEY = Symbol.for("__axiom_ai_scope__");
|
|
736
|
+
}
|
|
737
|
+
});
|
|
761
738
|
|
|
762
|
-
// src/
|
|
763
|
-
|
|
739
|
+
// src/otel/utils/wrapperUtils.ts
|
|
740
|
+
var import_api5;
|
|
741
|
+
var init_wrapperUtils = __esm({
|
|
742
|
+
"src/otel/utils/wrapperUtils.ts"() {
|
|
743
|
+
"use strict";
|
|
744
|
+
init_cjs_shims();
|
|
745
|
+
import_api5 = require("@opentelemetry/api");
|
|
746
|
+
init_attributes();
|
|
747
|
+
init_schema();
|
|
748
|
+
init_withSpanBaggageKey();
|
|
749
|
+
init_startActiveSpan();
|
|
750
|
+
init_initAxiomAI();
|
|
751
|
+
}
|
|
752
|
+
});
|
|
764
753
|
|
|
765
|
-
// src/
|
|
766
|
-
|
|
754
|
+
// src/util/name-validation-runtime.ts
|
|
755
|
+
var init_name_validation_runtime = __esm({
|
|
756
|
+
"src/util/name-validation-runtime.ts"() {
|
|
757
|
+
"use strict";
|
|
758
|
+
init_cjs_shims();
|
|
759
|
+
}
|
|
760
|
+
});
|
|
767
761
|
|
|
768
|
-
// src/otel/
|
|
769
|
-
|
|
762
|
+
// src/otel/withSpan.ts
|
|
763
|
+
var import_api6;
|
|
764
|
+
var init_withSpan = __esm({
|
|
765
|
+
"src/otel/withSpan.ts"() {
|
|
766
|
+
"use strict";
|
|
767
|
+
init_cjs_shims();
|
|
768
|
+
import_api6 = require("@opentelemetry/api");
|
|
769
|
+
init_withSpanBaggageKey();
|
|
770
|
+
init_wrapperUtils();
|
|
771
|
+
init_name_validation_runtime();
|
|
772
|
+
}
|
|
773
|
+
});
|
|
770
774
|
|
|
771
|
-
// src/otel/
|
|
772
|
-
|
|
773
|
-
|
|
775
|
+
// src/otel/utils/contentSanitizer.ts
|
|
776
|
+
var init_contentSanitizer = __esm({
|
|
777
|
+
"src/otel/utils/contentSanitizer.ts"() {
|
|
778
|
+
"use strict";
|
|
779
|
+
init_cjs_shims();
|
|
780
|
+
}
|
|
781
|
+
});
|
|
774
782
|
|
|
775
783
|
// src/otel/completionUtils.ts
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
init_cjs_shims();
|
|
784
|
+
var init_completionUtils = __esm({
|
|
785
|
+
"src/otel/completionUtils.ts"() {
|
|
786
|
+
"use strict";
|
|
787
|
+
init_cjs_shims();
|
|
788
|
+
init_contentSanitizer();
|
|
789
|
+
}
|
|
790
|
+
});
|
|
780
791
|
|
|
781
792
|
// src/util/promptUtils.ts
|
|
782
|
-
|
|
793
|
+
var init_promptUtils = __esm({
|
|
794
|
+
"src/util/promptUtils.ts"() {
|
|
795
|
+
"use strict";
|
|
796
|
+
init_cjs_shims();
|
|
797
|
+
}
|
|
798
|
+
});
|
|
783
799
|
|
|
784
800
|
// src/otel/utils/normalized.ts
|
|
785
|
-
|
|
801
|
+
var init_normalized = __esm({
|
|
802
|
+
"src/otel/utils/normalized.ts"() {
|
|
803
|
+
"use strict";
|
|
804
|
+
init_cjs_shims();
|
|
805
|
+
}
|
|
806
|
+
});
|
|
807
|
+
|
|
808
|
+
// src/util/currentUnixTime.ts
|
|
809
|
+
var init_currentUnixTime = __esm({
|
|
810
|
+
"src/util/currentUnixTime.ts"() {
|
|
811
|
+
"use strict";
|
|
812
|
+
init_cjs_shims();
|
|
813
|
+
}
|
|
814
|
+
});
|
|
786
815
|
|
|
787
816
|
// src/otel/streaming/aggregators.ts
|
|
788
|
-
|
|
817
|
+
var init_aggregators = __esm({
|
|
818
|
+
"src/otel/streaming/aggregators.ts"() {
|
|
819
|
+
"use strict";
|
|
820
|
+
init_cjs_shims();
|
|
821
|
+
init_currentUnixTime();
|
|
822
|
+
}
|
|
823
|
+
});
|
|
789
824
|
|
|
790
|
-
// src/
|
|
791
|
-
|
|
825
|
+
// src/otel/middleware.ts
|
|
826
|
+
var import_api7;
|
|
827
|
+
var init_middleware = __esm({
|
|
828
|
+
"src/otel/middleware.ts"() {
|
|
829
|
+
"use strict";
|
|
830
|
+
init_cjs_shims();
|
|
831
|
+
import_api7 = require("@opentelemetry/api");
|
|
832
|
+
init_attributes();
|
|
833
|
+
init_completionUtils();
|
|
834
|
+
init_promptUtils();
|
|
835
|
+
init_contentSanitizer();
|
|
836
|
+
init_wrapperUtils();
|
|
837
|
+
init_normalized();
|
|
838
|
+
init_aggregators();
|
|
839
|
+
init_redaction();
|
|
840
|
+
}
|
|
841
|
+
});
|
|
842
|
+
|
|
843
|
+
// src/otel/AxiomWrappedLanguageModelV1.ts
|
|
844
|
+
var init_AxiomWrappedLanguageModelV1 = __esm({
|
|
845
|
+
"src/otel/AxiomWrappedLanguageModelV1.ts"() {
|
|
846
|
+
"use strict";
|
|
847
|
+
init_cjs_shims();
|
|
848
|
+
init_middleware();
|
|
849
|
+
}
|
|
850
|
+
});
|
|
792
851
|
|
|
793
852
|
// src/otel/AxiomWrappedLanguageModelV2.ts
|
|
794
|
-
|
|
853
|
+
var init_AxiomWrappedLanguageModelV2 = __esm({
|
|
854
|
+
"src/otel/AxiomWrappedLanguageModelV2.ts"() {
|
|
855
|
+
"use strict";
|
|
856
|
+
init_cjs_shims();
|
|
857
|
+
init_middleware();
|
|
858
|
+
}
|
|
859
|
+
});
|
|
860
|
+
|
|
861
|
+
// src/otel/vercel.ts
|
|
862
|
+
var init_vercel = __esm({
|
|
863
|
+
"src/otel/vercel.ts"() {
|
|
864
|
+
"use strict";
|
|
865
|
+
init_cjs_shims();
|
|
866
|
+
init_AxiomWrappedLanguageModelV1();
|
|
867
|
+
init_AxiomWrappedLanguageModelV2();
|
|
868
|
+
}
|
|
869
|
+
});
|
|
870
|
+
|
|
871
|
+
// src/otel/index.ts
|
|
872
|
+
var init_otel = __esm({
|
|
873
|
+
"src/otel/index.ts"() {
|
|
874
|
+
"use strict";
|
|
875
|
+
init_cjs_shims();
|
|
876
|
+
init_withSpan();
|
|
877
|
+
init_vercel();
|
|
878
|
+
init_attributes();
|
|
879
|
+
}
|
|
880
|
+
});
|
|
881
|
+
|
|
882
|
+
// src/app-scope.ts
|
|
883
|
+
var import_api8, import_zod5;
|
|
884
|
+
var init_app_scope = __esm({
|
|
885
|
+
"src/app-scope.ts"() {
|
|
886
|
+
"use strict";
|
|
887
|
+
init_cjs_shims();
|
|
888
|
+
init_global_flags();
|
|
889
|
+
init_storage();
|
|
890
|
+
init_validate_flags();
|
|
891
|
+
init_dot_path();
|
|
892
|
+
init_zod_internals();
|
|
893
|
+
import_api8 = require("@opentelemetry/api");
|
|
894
|
+
import_zod5 = require("zod");
|
|
895
|
+
init_to_otel_attribute();
|
|
896
|
+
init_otel();
|
|
897
|
+
}
|
|
898
|
+
});
|
|
795
899
|
|
|
796
900
|
// src/evals/context/storage.ts
|
|
797
|
-
var CONFIG_SCOPE_SYMBOL = Symbol.for("axiom.eval.configScope");
|
|
798
|
-
var AXIOM_CONFIG_SYMBOL = Symbol.for("axiom.eval.config");
|
|
799
901
|
function getAxiomConfig() {
|
|
800
902
|
return globalThis[AXIOM_CONFIG_SYMBOL];
|
|
801
903
|
}
|
|
802
904
|
function setAxiomConfig(config) {
|
|
803
905
|
globalThis[AXIOM_CONFIG_SYMBOL] = config;
|
|
804
906
|
}
|
|
805
|
-
var EVAL_CONTEXT = createAsyncHook("eval-context");
|
|
806
907
|
function getEvalContext() {
|
|
807
908
|
const ctx = EVAL_CONTEXT.get();
|
|
808
909
|
if (!ctx) {
|
|
@@ -842,628 +943,297 @@ function withEvalContext(options = {}, fn) {
|
|
|
842
943
|
fn
|
|
843
944
|
);
|
|
844
945
|
}
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
return { text: "N/A", color: u.dim };
|
|
946
|
+
var import_api9, CONFIG_SCOPE_SYMBOL, AXIOM_CONFIG_SYMBOL, EVAL_CONTEXT;
|
|
947
|
+
var init_storage = __esm({
|
|
948
|
+
"src/evals/context/storage.ts"() {
|
|
949
|
+
"use strict";
|
|
950
|
+
init_cjs_shims();
|
|
951
|
+
import_api9 = require("@opentelemetry/api");
|
|
952
|
+
init_manager();
|
|
953
|
+
init_app_scope();
|
|
954
|
+
CONFIG_SCOPE_SYMBOL = Symbol.for("axiom.eval.configScope");
|
|
955
|
+
AXIOM_CONFIG_SYMBOL = Symbol.for("axiom.eval.config");
|
|
956
|
+
EVAL_CONTEXT = createAsyncHook("eval-context");
|
|
857
957
|
}
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
if (typeof value === "string") return value;
|
|
869
|
-
return JSON.stringify(value);
|
|
870
|
-
} catch {
|
|
871
|
-
return String(value);
|
|
958
|
+
});
|
|
959
|
+
|
|
960
|
+
// src/context.ts
|
|
961
|
+
function overrideFlags(partial) {
|
|
962
|
+
const current = getEvalContext();
|
|
963
|
+
if (!current) {
|
|
964
|
+
if (process.env.NODE_ENV !== "test") {
|
|
965
|
+
console.warn("overrideFlags called outside of evaluation context");
|
|
966
|
+
}
|
|
967
|
+
return;
|
|
872
968
|
}
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
}
|
|
879
|
-
|
|
880
|
-
const
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
u.bgCyan(u.black(` ${testSuite.project.name} `)),
|
|
884
|
-
u.bgBlue(u.black(` ${meta.evaluation.name}-${meta.evaluation.version} `)),
|
|
885
|
-
u.dim(`(${testSuite.children.size} cases)`)
|
|
886
|
-
);
|
|
887
|
-
logger(" ", u.dim(testSuite.module.moduleId.replace(cwd, "")));
|
|
888
|
-
}
|
|
889
|
-
function printBaselineNameAndVersion(testMeta, logger = console.log) {
|
|
890
|
-
if (testMeta.evaluation.baseline) {
|
|
891
|
-
logger(
|
|
892
|
-
" ",
|
|
893
|
-
" baseline ",
|
|
894
|
-
u.bgMagenta(
|
|
895
|
-
u.black(` ${testMeta.evaluation.baseline.name}-${testMeta.evaluation.baseline.version} `)
|
|
896
|
-
)
|
|
897
|
-
);
|
|
898
|
-
} else {
|
|
899
|
-
logger(" ", u.bgWhite(u.blackBright(" baseline: ")), "none");
|
|
969
|
+
const overlayContext = {
|
|
970
|
+
...current,
|
|
971
|
+
flags: { ...current.flags, ...partial },
|
|
972
|
+
// Merge for backwards compatibility
|
|
973
|
+
parent: current,
|
|
974
|
+
overrides: { ...partial }
|
|
975
|
+
};
|
|
976
|
+
const currentCtx = EVAL_CONTEXT.get();
|
|
977
|
+
if (currentCtx) {
|
|
978
|
+
Object.assign(currentCtx, overlayContext);
|
|
900
979
|
}
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
function printRuntimeFlags(testMeta, logger = console.log) {
|
|
904
|
-
if (testMeta.case.runtimeFlags && Object.keys(testMeta.case.runtimeFlags).length > 0) {
|
|
905
|
-
const entries = Object.entries(testMeta.case.runtimeFlags);
|
|
906
|
-
logger(" ", u.dim("runtime flags"));
|
|
907
|
-
for (const [k, v] of entries) {
|
|
908
|
-
switch (v.kind) {
|
|
909
|
-
case "replaced": {
|
|
910
|
-
const valText = truncate(stringify(v.value), 80);
|
|
911
|
-
const defText = truncate(stringify(v.default), 80);
|
|
912
|
-
logger(" ", `${k}: ${valText} (default: ${defText})`);
|
|
913
|
-
break;
|
|
914
|
-
}
|
|
915
|
-
case "introduced": {
|
|
916
|
-
const valText = truncate(stringify(v.value), 80);
|
|
917
|
-
logger(" ", `${k}: ${valText} (no default)`);
|
|
918
|
-
break;
|
|
919
|
-
}
|
|
920
|
-
}
|
|
921
|
-
}
|
|
980
|
+
for (const [key, value] of Object.entries(partial)) {
|
|
981
|
+
putOnSpan("flag", key, value);
|
|
922
982
|
}
|
|
923
983
|
}
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
984
|
+
var init_context = __esm({
|
|
985
|
+
"src/context.ts"() {
|
|
986
|
+
"use strict";
|
|
987
|
+
init_cjs_shims();
|
|
988
|
+
init_storage();
|
|
989
|
+
}
|
|
990
|
+
});
|
|
991
|
+
|
|
992
|
+
// src/cli/utils/eval-context-runner.ts
|
|
993
|
+
async function runEvalWithContext(overrides2, runFn) {
|
|
994
|
+
setGlobalFlagOverrides(overrides2);
|
|
995
|
+
return withEvalContext({ initialFlags: overrides2 }, async () => {
|
|
996
|
+
if (Object.keys(overrides2).length > 0) {
|
|
997
|
+
overrideFlags(overrides2);
|
|
998
|
+
}
|
|
999
|
+
return runFn();
|
|
1000
|
+
});
|
|
929
1001
|
}
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
1002
|
+
var init_eval_context_runner = __esm({
|
|
1003
|
+
"src/cli/utils/eval-context-runner.ts"() {
|
|
1004
|
+
"use strict";
|
|
1005
|
+
init_cjs_shims();
|
|
1006
|
+
init_context();
|
|
1007
|
+
init_storage();
|
|
1008
|
+
init_global_flags();
|
|
1009
|
+
}
|
|
1010
|
+
});
|
|
1011
|
+
|
|
1012
|
+
// src/cli/utils/parse-flag-overrides.ts
|
|
1013
|
+
function ensureNoSpaceSeparatedSyntax(flagName, value, nextToken, flagType) {
|
|
1014
|
+
if (value === void 0 && nextToken !== void 0) {
|
|
1015
|
+
if (flagType === "flag" && !nextToken.startsWith("-") && nextToken !== "true" && nextToken !== "false") {
|
|
1016
|
+
console.error(`\u274C Invalid syntax: --flag.${flagName} ${nextToken}`);
|
|
1017
|
+
console.error(`\u{1F4A1} Use: --flag.${flagName}=${nextToken}`);
|
|
1018
|
+
process.exit(1);
|
|
1019
|
+
} else if (flagType === "config" && !nextToken.startsWith("-")) {
|
|
1020
|
+
console.error(`\u274C Invalid syntax: --flags-config ${nextToken}`);
|
|
1021
|
+
console.error(`\u{1F4A1} Use: --flags-config=${nextToken}`);
|
|
1022
|
+
process.exit(1);
|
|
938
1023
|
}
|
|
939
1024
|
}
|
|
940
1025
|
}
|
|
941
|
-
function
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
if (keys.length === 0) {
|
|
945
|
-
return;
|
|
1026
|
+
function collectFlagValidationErrors(overrides2, flagSchema) {
|
|
1027
|
+
if (!flagSchema || Object.keys(overrides2).length === 0) {
|
|
1028
|
+
return { success: true, errors: [] };
|
|
946
1029
|
}
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
const
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
const coloredCurrent = hasError ? u.dim(paddedCurrent) : u.magentaBright(paddedCurrent);
|
|
955
|
-
const paddedName = k.padEnd(maxNameLength);
|
|
956
|
-
if (baselineCase?.scores[k]) {
|
|
957
|
-
const baselineScoreValue = baselineCase.scores[k].value;
|
|
958
|
-
const rawBaseline = formatPercentage(baselineScoreValue);
|
|
959
|
-
const paddedBaseline = rawBaseline.padStart(7);
|
|
960
|
-
const coloredBaseline = u.blueBright(paddedBaseline);
|
|
961
|
-
const { text: diffText, color: diffColor } = formatDiff(v, baselineScoreValue);
|
|
962
|
-
const paddedDiff = diffText.padStart(8);
|
|
963
|
-
logger(
|
|
964
|
-
` ${paddedName} ${coloredBaseline} \u2192 ${coloredCurrent} ${hasError ? u.dim("(scorer not run)") : u.dim("(") + diffColor(paddedDiff) + u.dim(")")}`
|
|
965
|
-
);
|
|
966
|
-
} else {
|
|
967
|
-
logger(` ${paddedName} ${coloredCurrent} ${hasError ? u.dim("(scorer not run)") : ""}`);
|
|
1030
|
+
assertZodV4(flagSchema, "flagSchema");
|
|
1031
|
+
const schema = flagSchema;
|
|
1032
|
+
const errors = [];
|
|
1033
|
+
for (const dotPath of Object.keys(overrides2)) {
|
|
1034
|
+
const segments = parsePath(dotPath);
|
|
1035
|
+
if (!isValidPath(schema, segments)) {
|
|
1036
|
+
errors.push({ type: "invalid_path", path: dotPath });
|
|
968
1037
|
}
|
|
969
|
-
}
|
|
1038
|
+
}
|
|
1039
|
+
if (errors.length > 0) {
|
|
1040
|
+
return { success: false, errors };
|
|
1041
|
+
}
|
|
1042
|
+
const nestedObject = dotNotationToNested(overrides2);
|
|
1043
|
+
const deepPartialSchema = makeDeepPartial(schema);
|
|
1044
|
+
const result = deepPartialSchema.safeParse(nestedObject);
|
|
1045
|
+
if (!result.success) {
|
|
1046
|
+
errors.push({ type: "invalid_value", zodError: result.error });
|
|
1047
|
+
}
|
|
1048
|
+
return { success: errors.length === 0, errors };
|
|
970
1049
|
}
|
|
971
|
-
function
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
});
|
|
1050
|
+
function printFlagValidationErrorsAndExit(errors) {
|
|
1051
|
+
console.error("\u274C Invalid CLI flags:");
|
|
1052
|
+
for (const error of errors) {
|
|
1053
|
+
if (error.type === "invalid_path") {
|
|
1054
|
+
console.error(` \u2022 flag '${error.path}': Invalid flag path`);
|
|
1055
|
+
} else {
|
|
1056
|
+
console.error(formatZodErrors(error.zodError));
|
|
1057
|
+
const examples = generateFlagExamples(error.zodError);
|
|
1058
|
+
if (examples.length > 0) {
|
|
1059
|
+
console.error("\n\u{1F4A1} Valid examples:");
|
|
1060
|
+
examples.forEach((example) => console.error(` ${example}`));
|
|
983
1061
|
}
|
|
984
|
-
}
|
|
1062
|
+
}
|
|
985
1063
|
}
|
|
1064
|
+
process.exit(1);
|
|
986
1065
|
}
|
|
987
|
-
function
|
|
988
|
-
const
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
return;
|
|
992
|
-
}
|
|
993
|
-
printTestCaseSuccessOrFailed(testMeta, ok, logger);
|
|
994
|
-
const fingerprint = getCaseFingerprint(testMeta.case.input, testMeta.case.expected);
|
|
995
|
-
const baselineCases = baselineCasesByFingerprint.get(fingerprint);
|
|
996
|
-
const baselineCase = baselineCases?.shift();
|
|
997
|
-
if (baselineCase) {
|
|
998
|
-
matchedIndices.add(baselineCase.index);
|
|
1066
|
+
function validateFlagOverrides(overrides2, flagSchema) {
|
|
1067
|
+
const result = collectFlagValidationErrors(overrides2, flagSchema);
|
|
1068
|
+
if (!result.success) {
|
|
1069
|
+
printFlagValidationErrorsAndExit(result.errors);
|
|
999
1070
|
}
|
|
1000
|
-
printTestCaseScores(testMeta, baselineCase, logger);
|
|
1001
|
-
printRuntimeFlags(testMeta, logger);
|
|
1002
|
-
printOutOfScopeFlags(testMeta, logger);
|
|
1003
1071
|
}
|
|
1004
|
-
function
|
|
1005
|
-
|
|
1006
|
-
if (
|
|
1007
|
-
|
|
1072
|
+
function coerceValue(raw) {
|
|
1073
|
+
if (raw === "true") return true;
|
|
1074
|
+
if (raw === "false") return false;
|
|
1075
|
+
const num = Number(raw);
|
|
1076
|
+
if (!Number.isNaN(num) && raw.trim() === num.toString()) {
|
|
1077
|
+
return num;
|
|
1008
1078
|
}
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
" ",
|
|
1014
|
-
u.dim(
|
|
1015
|
-
`case ${orphanedCase.index}: ${truncate(orphanedCase.input, 50)} (score: ${truncate(
|
|
1016
|
-
JSON.stringify(orphanedCase.scores),
|
|
1017
|
-
50
|
|
1018
|
-
)})`
|
|
1019
|
-
)
|
|
1020
|
-
);
|
|
1021
|
-
const keys = Object.keys(orphanedCase.scores);
|
|
1022
|
-
if (keys.length > 0) {
|
|
1023
|
-
const maxNameLength = Math.max(...keys.map((k) => k.length));
|
|
1024
|
-
keys.forEach((k) => {
|
|
1025
|
-
const scoreData = orphanedCase.scores[k];
|
|
1026
|
-
const rawScore = formatPercentage(scoreData.value);
|
|
1027
|
-
const paddedName = k.padEnd(maxNameLength);
|
|
1028
|
-
const paddedScore = rawScore.padStart(7);
|
|
1029
|
-
logger(` ${paddedName} ${u.blueBright(paddedScore)}`);
|
|
1030
|
-
});
|
|
1031
|
-
}
|
|
1079
|
+
try {
|
|
1080
|
+
return JSON.parse(raw);
|
|
1081
|
+
} catch {
|
|
1082
|
+
return raw;
|
|
1032
1083
|
}
|
|
1033
1084
|
}
|
|
1034
|
-
|
|
1035
|
-
const
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
logger("");
|
|
1043
|
-
logger(u.dim("Flag overrides: (none)"));
|
|
1044
|
-
logger("");
|
|
1045
|
-
return;
|
|
1046
|
-
}
|
|
1047
|
-
logger("");
|
|
1048
|
-
logger("Flag overrides:");
|
|
1049
|
-
for (const [key, value] of Object.entries(overrides2)) {
|
|
1050
|
-
const defaultValue = defaults[key];
|
|
1051
|
-
const valueStr = JSON.stringify(value);
|
|
1052
|
-
const defaultStr = defaultValue !== void 0 ? JSON.stringify(defaultValue) : "none";
|
|
1053
|
-
logger(` \u2022 ${key}: ${valueStr} ${u.dim(`(default: ${defaultStr})`)}`);
|
|
1054
|
-
}
|
|
1055
|
-
logger("");
|
|
1056
|
-
}
|
|
1057
|
-
function printSuiteBox({
|
|
1058
|
-
suite,
|
|
1059
|
-
scorerAverages,
|
|
1060
|
-
calculateBaselineScorerAverage: calculateBaselineScorerAverage2,
|
|
1061
|
-
flagDiff,
|
|
1062
|
-
logger = console.log
|
|
1063
|
-
}) {
|
|
1064
|
-
const filename = suite.file.split("/").pop();
|
|
1065
|
-
logger("\u250C\u2500");
|
|
1066
|
-
logger(`\u2502 ${u.blue(suite.name)} ${u.gray(`(${filename})`)}`);
|
|
1067
|
-
logger("\u251C\u2500");
|
|
1068
|
-
const scorerNames = Object.keys(scorerAverages);
|
|
1069
|
-
const maxNameLength = Math.max(...scorerNames.map((name) => name.length));
|
|
1070
|
-
const allCasesErrored = (scorerName) => {
|
|
1071
|
-
return suite.cases.every((caseData) => caseData.scores[scorerName]?.metadata?.error);
|
|
1072
|
-
};
|
|
1073
|
-
for (const scorerName of scorerNames) {
|
|
1074
|
-
const avg = scorerAverages[scorerName];
|
|
1075
|
-
const paddedName = scorerName.padEnd(maxNameLength);
|
|
1076
|
-
const hasAllErrors = allCasesErrored(scorerName);
|
|
1077
|
-
const baselineAvg = suite.baseline ? calculateBaselineScorerAverage2(suite.baseline, scorerName) : null;
|
|
1078
|
-
if (baselineAvg !== null) {
|
|
1079
|
-
const currentPercent = hasAllErrors ? u.dim("N/A") : formatPercentage(avg);
|
|
1080
|
-
const baselinePercent = formatPercentage(baselineAvg);
|
|
1081
|
-
const { text: diffText, color: diffColor } = formatDiff(avg, baselineAvg);
|
|
1082
|
-
const paddedBaseline = baselinePercent.padStart(7);
|
|
1083
|
-
const paddedCurrent = hasAllErrors ? currentPercent : currentPercent.padStart(7);
|
|
1084
|
-
const diffDisplay = hasAllErrors ? u.dim("all cases failed") : diffColor(diffText.padStart(8));
|
|
1085
|
-
logger(
|
|
1086
|
-
`\u2502 ${paddedName} ${u.blueBright(paddedBaseline)} \u2192 ${hasAllErrors ? paddedCurrent : u.magentaBright(paddedCurrent)} (${diffDisplay})`
|
|
1085
|
+
function loadConfigFile(path3) {
|
|
1086
|
+
const abs = (0, import_node_path.resolve)(process.cwd(), path3);
|
|
1087
|
+
try {
|
|
1088
|
+
const contents = (0, import_node_fs.readFileSync)(abs, "utf8");
|
|
1089
|
+
const parsed = JSON.parse(contents);
|
|
1090
|
+
if (typeof parsed !== "object" || Array.isArray(parsed) || parsed === null) {
|
|
1091
|
+
console.error(
|
|
1092
|
+
`\u274C Flags config must be a JSON object, got ${Array.isArray(parsed) ? "array" : typeof parsed}`
|
|
1087
1093
|
);
|
|
1088
|
-
|
|
1089
|
-
const currentPercent = hasAllErrors ? u.red("N/A (all cases failed)") : formatPercentage(avg);
|
|
1090
|
-
logger(`\u2502 \u2022 ${paddedName} ${currentPercent}`);
|
|
1094
|
+
process.exit(1);
|
|
1091
1095
|
}
|
|
1096
|
+
return parsed;
|
|
1097
|
+
} catch (err) {
|
|
1098
|
+
console.error(`\u274C Could not read or parse flags config "${path3}": ${err.message}`);
|
|
1099
|
+
process.exit(1);
|
|
1092
1100
|
}
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1101
|
+
}
|
|
1102
|
+
function extractOverrides(argv) {
|
|
1103
|
+
const cleanedArgv2 = [];
|
|
1104
|
+
const overrides2 = {};
|
|
1105
|
+
let configPath = null;
|
|
1106
|
+
let hasCliFlags = false;
|
|
1107
|
+
let configPathCount = 0;
|
|
1108
|
+
for (let i = 0; i < argv.length; i++) {
|
|
1109
|
+
const token = argv[i];
|
|
1110
|
+
const configMatch = token.match(CONFIG_RE);
|
|
1111
|
+
const flagMatch = token.match(FLAG_RE);
|
|
1112
|
+
if (configMatch) {
|
|
1113
|
+
configPathCount++;
|
|
1114
|
+
if (configPathCount > 1) {
|
|
1115
|
+
console.error("\u274C Only one --flags-config can be supplied.");
|
|
1116
|
+
process.exit(1);
|
|
1109
1117
|
}
|
|
1110
|
-
|
|
1111
|
-
|
|
1118
|
+
const value = configMatch[1];
|
|
1119
|
+
const nextToken = argv.length > i + 1 ? argv[i + 1] : void 0;
|
|
1120
|
+
ensureNoSpaceSeparatedSyntax("flags-config", value, nextToken, "config");
|
|
1121
|
+
if (!value) {
|
|
1122
|
+
console.error("\u274C --flags-config requires a file path");
|
|
1123
|
+
console.error("\u{1F4A1} Use: --flags-config=path/to/config.json");
|
|
1124
|
+
process.exit(1);
|
|
1112
1125
|
}
|
|
1126
|
+
configPath = value;
|
|
1127
|
+
} else if (flagMatch) {
|
|
1128
|
+
hasCliFlags = true;
|
|
1129
|
+
const key = flagMatch[1];
|
|
1130
|
+
const value = flagMatch[2];
|
|
1131
|
+
const nextToken = argv.length > i + 1 ? argv[i + 1] : void 0;
|
|
1132
|
+
ensureNoSpaceSeparatedSyntax(key, value, nextToken, "flag");
|
|
1133
|
+
const finalValue = value === void 0 ? "true" : value;
|
|
1134
|
+
overrides2[key] = coerceValue(finalValue);
|
|
1135
|
+
} else {
|
|
1136
|
+
cleanedArgv2.push(token);
|
|
1113
1137
|
}
|
|
1114
1138
|
}
|
|
1115
|
-
if (
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
const lastStackTraceFnName = lastStackTraceFrame.split(" ").shift();
|
|
1122
|
-
const lastStackTraceFile = lastStackTraceFrame.split("/").pop()?.slice(0, -1);
|
|
1123
|
-
logger(
|
|
1124
|
-
`\u2502 \u2022 ${flag.flagPath} ${u.gray(`at ${lastStackTraceFnName} (${lastStackTraceFile})`)}`
|
|
1125
|
-
);
|
|
1126
|
-
}
|
|
1139
|
+
if (configPath && hasCliFlags) {
|
|
1140
|
+
console.error("\u274C Cannot use both --flags-config and --flag.* arguments together.");
|
|
1141
|
+
console.error("Choose one approach:");
|
|
1142
|
+
console.error(" \u2022 Config file: --flags-config=my-flags.json");
|
|
1143
|
+
console.error(" \u2022 CLI flags: --flag.temperature=0.9 --flag.model=gpt-4o");
|
|
1144
|
+
process.exit(1);
|
|
1127
1145
|
}
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
const scorerTotals = {};
|
|
1132
|
-
for (const caseData of suite.cases) {
|
|
1133
|
-
for (const [scorerName, score] of Object.entries(caseData.scores)) {
|
|
1134
|
-
if (!scorerTotals[scorerName]) {
|
|
1135
|
-
scorerTotals[scorerName] = { sum: 0, count: 0 };
|
|
1136
|
-
}
|
|
1137
|
-
if (!score.metadata?.error) {
|
|
1138
|
-
scorerTotals[scorerName].sum += score.score || 0;
|
|
1139
|
-
scorerTotals[scorerName].count += 1;
|
|
1140
|
-
}
|
|
1141
|
-
}
|
|
1146
|
+
if (configPath) {
|
|
1147
|
+
const configOverrides = loadConfigFile(configPath);
|
|
1148
|
+
return { cleanedArgv: cleanedArgv2, overrides: configOverrides };
|
|
1142
1149
|
}
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1150
|
+
return { cleanedArgv: cleanedArgv2, overrides: overrides2 };
|
|
1151
|
+
}
|
|
1152
|
+
var import_zod6, import_node_fs, import_node_path, FLAG_RE, CONFIG_RE;
|
|
1153
|
+
var init_parse_flag_overrides = __esm({
|
|
1154
|
+
"src/cli/utils/parse-flag-overrides.ts"() {
|
|
1155
|
+
"use strict";
|
|
1156
|
+
init_cjs_shims();
|
|
1157
|
+
import_zod6 = require("zod");
|
|
1158
|
+
init_format_zod_errors();
|
|
1159
|
+
import_node_fs = require("fs");
|
|
1160
|
+
import_node_path = require("path");
|
|
1161
|
+
init_dot_path();
|
|
1162
|
+
init_deep_partial_schema();
|
|
1163
|
+
init_zod_internals();
|
|
1164
|
+
FLAG_RE = /^--flag\.([^=]+)(?:=(.*))?$/;
|
|
1165
|
+
CONFIG_RE = /^--flags-config(?:=(.*))?$/;
|
|
1146
1166
|
}
|
|
1147
|
-
|
|
1167
|
+
});
|
|
1168
|
+
|
|
1169
|
+
// src/cli/utils/glob-utils.ts
|
|
1170
|
+
function isGlob(str) {
|
|
1171
|
+
return /[*?[\]{}!]/.test(str);
|
|
1148
1172
|
}
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
scores.push(caseData.scores[scorerName].value);
|
|
1154
|
-
}
|
|
1173
|
+
var init_glob_utils = __esm({
|
|
1174
|
+
"src/cli/utils/glob-utils.ts"() {
|
|
1175
|
+
"use strict";
|
|
1176
|
+
init_cjs_shims();
|
|
1155
1177
|
}
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1178
|
+
});
|
|
1179
|
+
|
|
1180
|
+
// src/util/errors.ts
|
|
1181
|
+
function getCircularReplacer() {
|
|
1182
|
+
const seen = /* @__PURE__ */ new WeakSet();
|
|
1183
|
+
return (_k, v) => {
|
|
1184
|
+
if (typeof v === "object" && v !== null) {
|
|
1185
|
+
if (seen.has(v)) return "[Circular]";
|
|
1186
|
+
seen.add(v);
|
|
1187
|
+
}
|
|
1188
|
+
return v;
|
|
1189
|
+
};
|
|
1159
1190
|
}
|
|
1160
|
-
function
|
|
1161
|
-
|
|
1162
|
-
return
|
|
1191
|
+
function safeJson(x) {
|
|
1192
|
+
try {
|
|
1193
|
+
return JSON.stringify(x, getCircularReplacer());
|
|
1194
|
+
} catch {
|
|
1195
|
+
return String(x);
|
|
1163
1196
|
}
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
]);
|
|
1176
|
-
for (const key of allKeys) {
|
|
1177
|
-
const isInScope = suite.configFlags.some((pattern) => key.startsWith(pattern));
|
|
1178
|
-
if (!isInScope) continue;
|
|
1179
|
-
const currentValue = currentFlat[key];
|
|
1180
|
-
const baselineValue = baselineFlat[key];
|
|
1181
|
-
const defaultValue = defaultFlat[key];
|
|
1182
|
-
const currentStr = currentValue !== void 0 ? JSON.stringify(currentValue) : void 0;
|
|
1183
|
-
const baselineStr = baselineValue !== void 0 ? JSON.stringify(baselineValue) : void 0;
|
|
1184
|
-
const defaultStr = defaultValue !== void 0 ? JSON.stringify(defaultValue) : void 0;
|
|
1185
|
-
const diffFromBaseline = suite.baseline && currentStr !== baselineStr;
|
|
1186
|
-
const diffFromDefault = currentStr !== defaultStr;
|
|
1187
|
-
if (diffFromBaseline || diffFromDefault) {
|
|
1188
|
-
diffs.push({
|
|
1189
|
-
flag: key,
|
|
1190
|
-
current: currentStr,
|
|
1191
|
-
baseline: suite.baseline ? baselineStr : void 0,
|
|
1192
|
-
default: defaultStr
|
|
1193
|
-
});
|
|
1197
|
+
}
|
|
1198
|
+
function errorToString(err) {
|
|
1199
|
+
try {
|
|
1200
|
+
if (typeof err === "string") return err;
|
|
1201
|
+
if (err instanceof Error) {
|
|
1202
|
+
return err.stack ?? err.message;
|
|
1203
|
+
}
|
|
1204
|
+
if (typeof err === "object" && err !== null) {
|
|
1205
|
+
const msg = err.message;
|
|
1206
|
+
const json = safeJson(err);
|
|
1207
|
+
return msg ? `${msg} (${json})` : json;
|
|
1194
1208
|
}
|
|
1209
|
+
return String(err);
|
|
1210
|
+
} catch {
|
|
1211
|
+
return "[unserializable error]";
|
|
1195
1212
|
}
|
|
1196
|
-
return diffs;
|
|
1197
1213
|
}
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
const scorerAverages = calculateScorerAverages(suite);
|
|
1210
|
-
const flagDiff = calculateFlagDiff(suite);
|
|
1211
|
-
printSuiteBox({ suite, scorerAverages, calculateBaselineScorerAverage, flagDiff, logger });
|
|
1212
|
-
logger("");
|
|
1214
|
+
var AxiomCLIError;
|
|
1215
|
+
var init_errors = __esm({
|
|
1216
|
+
"src/util/errors.ts"() {
|
|
1217
|
+
"use strict";
|
|
1218
|
+
init_cjs_shims();
|
|
1219
|
+
AxiomCLIError = class extends Error {
|
|
1220
|
+
constructor(message) {
|
|
1221
|
+
super(message);
|
|
1222
|
+
this.name = "AxiomCLIError";
|
|
1223
|
+
}
|
|
1224
|
+
};
|
|
1213
1225
|
}
|
|
1214
|
-
|
|
1215
|
-
const orgId = suiteData[0]?.orgId;
|
|
1216
|
-
const anyRegistered = registrationStatus.some((s2) => s2.registered);
|
|
1217
|
-
const anyFailed = registrationStatus.some((s2) => !s2.registered);
|
|
1218
|
-
if (anyRegistered && orgId && config?.consoleEndpointUrl) {
|
|
1219
|
-
if (suiteData.length === 1) {
|
|
1220
|
-
const suite = suiteData[0];
|
|
1221
|
-
const baselineParam = suite.baseline?.traceId ? `?baselineId=${suite.baseline.traceId}` : "";
|
|
1222
|
-
logger("View eval result:");
|
|
1223
|
-
logger(
|
|
1224
|
-
`${config.consoleEndpointUrl}/${orgId}/ai-engineering/evaluations/${suite.name}/${suite.version}${baselineParam}`
|
|
1225
|
-
);
|
|
1226
|
-
} else {
|
|
1227
|
-
logger("View full report:");
|
|
1228
|
-
logger(`${config.consoleEndpointUrl}/${orgId}/ai-engineering/evaluations?runId=${runId}`);
|
|
1229
|
-
}
|
|
1230
|
-
} else if (isDebug) {
|
|
1231
|
-
logger(u.dim("Results not uploaded to Axiom (debug mode)"));
|
|
1232
|
-
} else {
|
|
1233
|
-
logger("Results not available in Axiom UI (registration failed)");
|
|
1234
|
-
}
|
|
1235
|
-
if (anyFailed) {
|
|
1236
|
-
logger("");
|
|
1237
|
-
for (const status of registrationStatus) {
|
|
1238
|
-
if (!status.registered) {
|
|
1239
|
-
logger(u.yellow(`\u26A0\uFE0F Warning: Failed to register "${status.name}" with Axiom`));
|
|
1240
|
-
if (status.error) {
|
|
1241
|
-
logger(u.dim(` Error: ${status.error}`));
|
|
1242
|
-
}
|
|
1243
|
-
logger(u.dim(` Results for this evaluation will not be available in the Axiom UI.`));
|
|
1244
|
-
}
|
|
1245
|
-
}
|
|
1246
|
-
}
|
|
1247
|
-
}
|
|
1248
|
-
|
|
1249
|
-
// src/config/resolver.ts
|
|
1250
|
-
init_cjs_shims();
|
|
1251
|
-
var buildConsoleUrl = (urlString) => {
|
|
1252
|
-
const url = new URL(urlString);
|
|
1253
|
-
return `${url.protocol}//app.${url.host.split("api.").at(-1)}`;
|
|
1254
|
-
};
|
|
1255
|
-
function resolveAxiomConnection(config, consoleUrlOverride) {
|
|
1256
|
-
const consoleEndpointUrl = consoleUrlOverride ?? buildConsoleUrl(config.eval.url);
|
|
1257
|
-
return {
|
|
1258
|
-
url: config.eval.url,
|
|
1259
|
-
consoleEndpointUrl,
|
|
1260
|
-
token: config.eval.token,
|
|
1261
|
-
dataset: config.eval.dataset,
|
|
1262
|
-
orgId: config.eval.orgId
|
|
1263
|
-
};
|
|
1264
|
-
}
|
|
1265
|
-
|
|
1266
|
-
// src/evals/reporter.ts
|
|
1267
|
-
var AxiomReporter = class {
|
|
1268
|
-
constructor() {
|
|
1269
|
-
__publicField(this, "startTime", 0);
|
|
1270
|
-
__publicField(this, "start", 0);
|
|
1271
|
-
__publicField(this, "_endOfRunConfigEnd");
|
|
1272
|
-
__publicField(this, "_suiteData", []);
|
|
1273
|
-
__publicField(this, "_printedFlagOverrides", false);
|
|
1274
|
-
__publicField(this, "_config");
|
|
1275
|
-
}
|
|
1276
|
-
onTestRunStart() {
|
|
1277
|
-
this.start = performance.now();
|
|
1278
|
-
this.startTime = (/* @__PURE__ */ new Date()).getTime();
|
|
1279
|
-
const config = getAxiomConfig();
|
|
1280
|
-
if (config) {
|
|
1281
|
-
this._config = resolveAxiomConnection(config, getConsoleUrl());
|
|
1282
|
-
}
|
|
1283
|
-
}
|
|
1284
|
-
async onTestSuiteReady(_testSuite) {
|
|
1285
|
-
const meta = _testSuite.meta();
|
|
1286
|
-
if (_testSuite.state() === "skipped" || !meta?.evaluation) {
|
|
1287
|
-
return;
|
|
1288
|
-
}
|
|
1289
|
-
if (!this._printedFlagOverrides) {
|
|
1290
|
-
const defaultsFromConfigEnd = meta.evaluation.configEnd?.flags ?? {};
|
|
1291
|
-
const overridesFromConfigEnd = meta.evaluation.configEnd?.overrides ?? {};
|
|
1292
|
-
if (Object.keys(overridesFromConfigEnd).length > 0) {
|
|
1293
|
-
printGlobalFlagOverrides(overridesFromConfigEnd, defaultsFromConfigEnd);
|
|
1294
|
-
}
|
|
1295
|
-
this._printedFlagOverrides = true;
|
|
1296
|
-
}
|
|
1297
|
-
if (meta.evaluation.configEnd && !this._endOfRunConfigEnd) {
|
|
1298
|
-
this._endOfRunConfigEnd = meta.evaluation.configEnd;
|
|
1299
|
-
}
|
|
1300
|
-
}
|
|
1301
|
-
onTestCaseReady(test) {
|
|
1302
|
-
const meta = test.meta();
|
|
1303
|
-
if (!meta.case) return;
|
|
1304
|
-
}
|
|
1305
|
-
async onTestSuiteResult(testSuite) {
|
|
1306
|
-
const meta = testSuite.meta();
|
|
1307
|
-
if (testSuite.state() === "skipped" || !meta?.evaluation) {
|
|
1308
|
-
return;
|
|
1309
|
-
}
|
|
1310
|
-
if (meta.evaluation.configEnd && !this._endOfRunConfigEnd) {
|
|
1311
|
-
this._endOfRunConfigEnd = meta.evaluation.configEnd;
|
|
1312
|
-
}
|
|
1313
|
-
const durationSeconds = Number((performance.now() - this.start) / 1e3).toFixed(2);
|
|
1314
|
-
const cases = [];
|
|
1315
|
-
for (const test of testSuite.children) {
|
|
1316
|
-
if (test.type !== "test") continue;
|
|
1317
|
-
const testMeta = test.meta();
|
|
1318
|
-
if (!testMeta?.case) continue;
|
|
1319
|
-
cases.push({
|
|
1320
|
-
index: testMeta.case.index,
|
|
1321
|
-
scores: testMeta.case.scores,
|
|
1322
|
-
outOfScopeFlags: testMeta.case.outOfScopeFlags,
|
|
1323
|
-
errors: testMeta.case.errors,
|
|
1324
|
-
runtimeFlags: testMeta.case.runtimeFlags
|
|
1325
|
-
});
|
|
1326
|
-
}
|
|
1327
|
-
const cwd = process.cwd();
|
|
1328
|
-
const relativePath = testSuite.module.moduleId.replace(cwd, "").replace(/^\//, "");
|
|
1329
|
-
let suiteBaseline = meta.evaluation.baseline;
|
|
1330
|
-
let flagConfig = meta.evaluation.flagConfig;
|
|
1331
|
-
if (meta.evaluation.configEnd) {
|
|
1332
|
-
const defaults = meta.evaluation.configEnd.flags ?? {};
|
|
1333
|
-
const overrides2 = meta.evaluation.configEnd.overrides ?? {};
|
|
1334
|
-
const defaultsFlat = flattenObject(defaults);
|
|
1335
|
-
const overridesFlat = flattenObject(overrides2);
|
|
1336
|
-
flagConfig = dotNotationToNested({ ...defaultsFlat, ...overridesFlat });
|
|
1337
|
-
}
|
|
1338
|
-
const defaultFlagConfig = meta.evaluation.configEnd?.flags;
|
|
1339
|
-
this._suiteData.push({
|
|
1340
|
-
version: meta.evaluation.version,
|
|
1341
|
-
name: meta.evaluation.name,
|
|
1342
|
-
file: relativePath,
|
|
1343
|
-
duration: durationSeconds + "s",
|
|
1344
|
-
baseline: suiteBaseline || null,
|
|
1345
|
-
configFlags: meta.evaluation.configFlags,
|
|
1346
|
-
flagConfig,
|
|
1347
|
-
defaultFlagConfig,
|
|
1348
|
-
runId: meta.evaluation.runId,
|
|
1349
|
-
orgId: meta.evaluation.orgId,
|
|
1350
|
-
cases,
|
|
1351
|
-
outOfScopeFlags: meta.evaluation.outOfScopeFlags,
|
|
1352
|
-
registrationStatus: meta.evaluation.registrationStatus
|
|
1353
|
-
});
|
|
1354
|
-
printEvalNameAndFileName(testSuite, meta);
|
|
1355
|
-
printBaselineNameAndVersion(meta);
|
|
1356
|
-
printTestCaseCountStartDuration(testSuite, this.startTime, durationSeconds);
|
|
1357
|
-
const matchedBaselineIndices = /* @__PURE__ */ new Set();
|
|
1358
|
-
const baselineCasesByFingerprint = /* @__PURE__ */ new Map();
|
|
1359
|
-
if (suiteBaseline) {
|
|
1360
|
-
for (const c of suiteBaseline.cases) {
|
|
1361
|
-
const fp = getCaseFingerprint(c.input, c.expected);
|
|
1362
|
-
const cases2 = baselineCasesByFingerprint.get(fp) || [];
|
|
1363
|
-
cases2.push(c);
|
|
1364
|
-
baselineCasesByFingerprint.set(fp, cases2);
|
|
1365
|
-
}
|
|
1366
|
-
}
|
|
1367
|
-
for (const test of testSuite.children) {
|
|
1368
|
-
if (test.type !== "test") continue;
|
|
1369
|
-
printCaseResult(test, baselineCasesByFingerprint, matchedBaselineIndices);
|
|
1370
|
-
}
|
|
1371
|
-
if (suiteBaseline) {
|
|
1372
|
-
printOrphanedBaselineCases(suiteBaseline, matchedBaselineIndices);
|
|
1373
|
-
}
|
|
1374
|
-
console.log("");
|
|
1375
|
-
}
|
|
1376
|
-
async onTestRunEnd(_testModules, _errors, _reason) {
|
|
1377
|
-
const shouldClear = !process.env.CI && process.stdout.isTTY !== false;
|
|
1378
|
-
if (shouldClear) {
|
|
1379
|
-
process.stdout.write("\x1B[2J\x1B[0f");
|
|
1380
|
-
}
|
|
1381
|
-
const registrationStatus = this._suiteData.map((suite) => ({
|
|
1382
|
-
name: suite.name,
|
|
1383
|
-
registered: suite.registrationStatus?.status === "success",
|
|
1384
|
-
error: suite.registrationStatus?.status === "failed" ? suite.registrationStatus.error : void 0
|
|
1385
|
-
}));
|
|
1386
|
-
const isDebug = process.env.AXIOM_DEBUG === "true";
|
|
1387
|
-
printFinalReport({
|
|
1388
|
-
suiteData: this._suiteData,
|
|
1389
|
-
config: this._config,
|
|
1390
|
-
registrationStatus,
|
|
1391
|
-
isDebug
|
|
1392
|
-
});
|
|
1393
|
-
}
|
|
1394
|
-
};
|
|
1395
|
-
|
|
1396
|
-
// src/evals/instrument.ts
|
|
1397
|
-
init_cjs_shims();
|
|
1398
|
-
var import_sdk_trace_node = require("@opentelemetry/sdk-trace-node");
|
|
1399
|
-
var import_resources = require("@opentelemetry/resources");
|
|
1400
|
-
var import_exporter_trace_otlp_http = require("@opentelemetry/exporter-trace-otlp-http");
|
|
1401
|
-
var import_api10 = require("@opentelemetry/api");
|
|
1402
|
-
|
|
1403
|
-
// src/util/errors.ts
|
|
1404
|
-
init_cjs_shims();
|
|
1405
|
-
var AxiomCLIError = class extends Error {
|
|
1406
|
-
constructor(message) {
|
|
1407
|
-
super(message);
|
|
1408
|
-
this.name = "AxiomCLIError";
|
|
1409
|
-
}
|
|
1410
|
-
};
|
|
1411
|
-
function getCircularReplacer() {
|
|
1412
|
-
const seen = /* @__PURE__ */ new WeakSet();
|
|
1413
|
-
return (_k, v) => {
|
|
1414
|
-
if (typeof v === "object" && v !== null) {
|
|
1415
|
-
if (seen.has(v)) return "[Circular]";
|
|
1416
|
-
seen.add(v);
|
|
1417
|
-
}
|
|
1418
|
-
return v;
|
|
1419
|
-
};
|
|
1420
|
-
}
|
|
1421
|
-
function safeJson(x) {
|
|
1422
|
-
try {
|
|
1423
|
-
return JSON.stringify(x, getCircularReplacer());
|
|
1424
|
-
} catch {
|
|
1425
|
-
return String(x);
|
|
1426
|
-
}
|
|
1427
|
-
}
|
|
1428
|
-
function errorToString(err) {
|
|
1429
|
-
try {
|
|
1430
|
-
if (typeof err === "string") return err;
|
|
1431
|
-
if (err instanceof Error) {
|
|
1432
|
-
return err.stack ?? err.message;
|
|
1433
|
-
}
|
|
1434
|
-
if (typeof err === "object" && err !== null) {
|
|
1435
|
-
const msg = err.message;
|
|
1436
|
-
const json = safeJson(err);
|
|
1437
|
-
return msg ? `${msg} (${json})` : json;
|
|
1438
|
-
}
|
|
1439
|
-
return String(err);
|
|
1440
|
-
} catch {
|
|
1441
|
-
return "[unserializable error]";
|
|
1442
|
-
}
|
|
1443
|
-
}
|
|
1444
|
-
|
|
1445
|
-
// src/config/loader.ts
|
|
1446
|
-
init_cjs_shims();
|
|
1447
|
-
var import_c12 = require("c12");
|
|
1448
|
-
var import_defu = require("defu");
|
|
1449
|
-
|
|
1450
|
-
// src/config/index.ts
|
|
1451
|
-
init_cjs_shims();
|
|
1452
|
-
var import_zod6 = require("zod");
|
|
1453
|
-
|
|
1454
|
-
// src/cli/auth/index.ts
|
|
1455
|
-
init_cjs_shims();
|
|
1226
|
+
});
|
|
1456
1227
|
|
|
1457
1228
|
// src/cli/auth/types.ts
|
|
1458
|
-
|
|
1229
|
+
var init_types = __esm({
|
|
1230
|
+
"src/cli/auth/types.ts"() {
|
|
1231
|
+
"use strict";
|
|
1232
|
+
init_cjs_shims();
|
|
1233
|
+
}
|
|
1234
|
+
});
|
|
1459
1235
|
|
|
1460
1236
|
// src/cli/auth/config.ts
|
|
1461
|
-
init_cjs_shims();
|
|
1462
|
-
var import_fs = require("fs");
|
|
1463
|
-
var import_path = __toESM(require("path"), 1);
|
|
1464
|
-
var import_os = __toESM(require("os"), 1);
|
|
1465
|
-
var CONFIG_FILENAME = "config.json";
|
|
1466
|
-
var CONFIG_DIR_NAME = "axiom";
|
|
1467
1237
|
function getConfigDir() {
|
|
1468
1238
|
const platform = process.platform;
|
|
1469
1239
|
const homeDir = import_os.default.homedir();
|
|
@@ -1510,64 +1280,81 @@ function getActiveProfile(config) {
|
|
|
1510
1280
|
if (!profile) return null;
|
|
1511
1281
|
return profile;
|
|
1512
1282
|
}
|
|
1283
|
+
var import_fs, import_path, import_os, CONFIG_FILENAME, CONFIG_DIR_NAME;
|
|
1284
|
+
var init_config = __esm({
|
|
1285
|
+
"src/cli/auth/config.ts"() {
|
|
1286
|
+
"use strict";
|
|
1287
|
+
init_cjs_shims();
|
|
1288
|
+
import_fs = require("fs");
|
|
1289
|
+
import_path = __toESM(require("path"), 1);
|
|
1290
|
+
import_os = __toESM(require("os"), 1);
|
|
1291
|
+
CONFIG_FILENAME = "config.json";
|
|
1292
|
+
CONFIG_DIR_NAME = "axiom";
|
|
1293
|
+
}
|
|
1294
|
+
});
|
|
1513
1295
|
|
|
1514
1296
|
// src/cli/auth/oauth.ts
|
|
1515
|
-
|
|
1516
|
-
var
|
|
1517
|
-
|
|
1518
|
-
|
|
1519
|
-
|
|
1520
|
-
|
|
1521
|
-
|
|
1522
|
-
|
|
1523
|
-
|
|
1524
|
-
|
|
1525
|
-
|
|
1526
|
-
|
|
1527
|
-
|
|
1528
|
-
|
|
1529
|
-
|
|
1530
|
-
|
|
1531
|
-
|
|
1532
|
-
|
|
1533
|
-
|
|
1534
|
-
|
|
1535
|
-
|
|
1536
|
-
|
|
1537
|
-
|
|
1538
|
-
|
|
1539
|
-
|
|
1540
|
-
|
|
1541
|
-
|
|
1542
|
-
|
|
1543
|
-
|
|
1544
|
-
|
|
1545
|
-
|
|
1546
|
-
|
|
1547
|
-
|
|
1548
|
-
|
|
1549
|
-
|
|
1550
|
-
|
|
1551
|
-
|
|
1552
|
-
|
|
1553
|
-
|
|
1554
|
-
|
|
1555
|
-
|
|
1556
|
-
|
|
1557
|
-
|
|
1558
|
-
|
|
1559
|
-
|
|
1560
|
-
|
|
1561
|
-
|
|
1562
|
-
|
|
1563
|
-
|
|
1564
|
-
|
|
1565
|
-
|
|
1297
|
+
var import_crypto, OAUTH_CLIENT_ID, OAUTH_AUTH_PATH, OAUTH_TOKEN_PATH, OAuth;
|
|
1298
|
+
var init_oauth = __esm({
|
|
1299
|
+
"src/cli/auth/oauth.ts"() {
|
|
1300
|
+
"use strict";
|
|
1301
|
+
init_cjs_shims();
|
|
1302
|
+
import_crypto = require("crypto");
|
|
1303
|
+
OAUTH_CLIENT_ID = "264d906a404efc209b027f6595e6b616";
|
|
1304
|
+
OAUTH_AUTH_PATH = "/oauth/authorize";
|
|
1305
|
+
OAUTH_TOKEN_PATH = "/oauth/token";
|
|
1306
|
+
OAuth = class {
|
|
1307
|
+
constructor(oauthBaseUrl) {
|
|
1308
|
+
this.oauthBaseUrl = oauthBaseUrl;
|
|
1309
|
+
}
|
|
1310
|
+
static generateCodeVerifier() {
|
|
1311
|
+
return (0, import_crypto.randomBytes)(32).toString("base64url");
|
|
1312
|
+
}
|
|
1313
|
+
static generateCodeChallenge(verifier) {
|
|
1314
|
+
return (0, import_crypto.createHash)("sha256").update(verifier).digest("base64url");
|
|
1315
|
+
}
|
|
1316
|
+
static generateState() {
|
|
1317
|
+
return (0, import_crypto.randomBytes)(16).toString("hex");
|
|
1318
|
+
}
|
|
1319
|
+
buildAuthUrl(params) {
|
|
1320
|
+
const url = new URL(OAUTH_AUTH_PATH, this.oauthBaseUrl);
|
|
1321
|
+
url.searchParams.set("client_id", OAUTH_CLIENT_ID);
|
|
1322
|
+
url.searchParams.set("redirect_uri", params.redirectUri);
|
|
1323
|
+
url.searchParams.set("response_type", "code");
|
|
1324
|
+
url.searchParams.set("state", params.state);
|
|
1325
|
+
url.searchParams.set("code_challenge", params.codeChallenge);
|
|
1326
|
+
url.searchParams.set("code_challenge_method", "S256");
|
|
1327
|
+
url.searchParams.set("scope", "*");
|
|
1328
|
+
return url.toString();
|
|
1329
|
+
}
|
|
1330
|
+
async exchangeCodeForToken(params) {
|
|
1331
|
+
const tokenUrl = new URL(OAUTH_TOKEN_PATH, this.oauthBaseUrl);
|
|
1332
|
+
const body = new URLSearchParams({
|
|
1333
|
+
grant_type: "authorization_code",
|
|
1334
|
+
client_id: OAUTH_CLIENT_ID,
|
|
1335
|
+
code: params.code,
|
|
1336
|
+
redirect_uri: params.redirectUri,
|
|
1337
|
+
code_verifier: params.codeVerifier
|
|
1338
|
+
});
|
|
1339
|
+
const response = await fetch(tokenUrl.toString(), {
|
|
1340
|
+
method: "POST",
|
|
1341
|
+
headers: {
|
|
1342
|
+
"Content-Type": "application/x-www-form-urlencoded"
|
|
1343
|
+
},
|
|
1344
|
+
body: body.toString()
|
|
1345
|
+
});
|
|
1346
|
+
if (!response.ok) {
|
|
1347
|
+
const errorText = await response.text();
|
|
1348
|
+
throw new Error(`Token exchange failed: ${response.status} ${errorText}`);
|
|
1349
|
+
}
|
|
1350
|
+
const data = await response.json();
|
|
1351
|
+
return data.access_token;
|
|
1352
|
+
}
|
|
1353
|
+
};
|
|
1566
1354
|
}
|
|
1567
|
-
};
|
|
1355
|
+
});
|
|
1568
1356
|
|
|
1569
1357
|
// src/cli/auth/api.ts
|
|
1570
|
-
init_cjs_shims();
|
|
1571
1358
|
async function fetchOrganizations(token, apiBaseUrl) {
|
|
1572
1359
|
const response = await fetch(`${apiBaseUrl}/v2/orgs`, {
|
|
1573
1360
|
headers: {
|
|
@@ -1593,10 +1380,15 @@ async function verifyToken(token, orgId, apiBaseUrl) {
|
|
|
1593
1380
|
});
|
|
1594
1381
|
return response.ok;
|
|
1595
1382
|
}
|
|
1383
|
+
var init_api = __esm({
|
|
1384
|
+
"src/cli/auth/api.ts"() {
|
|
1385
|
+
"use strict";
|
|
1386
|
+
init_cjs_shims();
|
|
1387
|
+
init_errors();
|
|
1388
|
+
}
|
|
1389
|
+
});
|
|
1596
1390
|
|
|
1597
1391
|
// src/cli/auth/callback-server.ts
|
|
1598
|
-
init_cjs_shims();
|
|
1599
|
-
var import_http = __toESM(require("http"), 1);
|
|
1600
1392
|
function escapeHtml(text) {
|
|
1601
1393
|
const map = {
|
|
1602
1394
|
"&": "&",
|
|
@@ -1608,9 +1400,6 @@ function escapeHtml(text) {
|
|
|
1608
1400
|
};
|
|
1609
1401
|
return text.replace(/[&<>"']/g, (m2) => map[m2] || m2);
|
|
1610
1402
|
}
|
|
1611
|
-
var SVG_LOGO = `<svg width="124" height="24" viewBox="0 0 124 24" fill="none" xmlns="http://www.w3.org/2000/svg" class="logo">
|
|
1612
|
-
<path d="M42.9919 16.8116H36.3696L35.5537 19.1572C35.4209 19.539 34.9714 19.8513 34.5548 19.8513H30.9779C30.5614 19.8513 30.3459 19.5449 30.499 19.1703L36.9816 3.31644C37.1346 2.9419 37.6009 2.63546 38.0174 2.63546H41.3718C41.7883 2.63546 42.2541 2.94207 42.4067 3.3168L48.8634 19.17C49.016 19.5447 48.8 19.8513 48.3835 19.8513H44.8067C44.3901 19.8513 43.9406 19.539 43.8078 19.1572L42.9919 16.8116ZM41.8232 13.4223L39.6807 7.18148L37.5383 13.4223H41.8232ZM64.1105 19.8513C63.694 19.8513 63.1767 19.5694 62.9611 19.2247L59.9029 14.3369L56.8447 19.2247C56.6291 19.5694 56.1119 19.8513 55.6953 19.8513H51.3392C50.9227 19.8513 50.7754 19.5801 51.0119 19.2486L56.8978 11.0013L51.5315 3.24414C51.2999 2.90937 51.4513 2.63546 51.8679 2.63546H55.89C56.3066 2.63546 56.8268 2.91563 57.0461 3.25807L59.9029 7.71959L62.7343 3.25955C62.9522 2.9163 63.4713 2.63546 63.8879 2.63546H67.938C68.3545 2.63546 68.5048 2.90868 68.272 3.24261L62.8801 10.9743L68.7935 19.2489C69.0303 19.5802 68.8832 19.8513 68.4666 19.8513H64.1105ZM76.525 19.119C76.525 19.5218 76.1841 19.8513 75.7675 19.8513H72.5522C72.1356 19.8513 71.7947 19.5218 71.7947 19.119V3.36771C71.7947 2.96498 72.1356 2.63546 72.5522 2.63546H75.7675C76.1841 2.63546 76.525 2.96498 76.525 3.36771V19.119ZM79.3736 11.1896C79.3736 6.18625 83.2688 2.15134 89.2511 2.15134C95.2334 2.15134 99.1289 6.18625 99.1289 11.1896C99.1289 16.2199 95.2334 20.2548 89.2511 20.2548C83.2688 20.2548 79.3736 16.2199 79.3736 11.1896ZM94.2873 11.1896C94.2873 8.58038 92.3953 6.21307 89.2511 6.21307C86.1349 6.21307 84.2149 8.58038 84.2149 11.1896C84.2149 13.7989 86.1349 16.2199 89.2511 16.2199C92.3675 16.2199 94.2873 13.7989 94.2873 11.1896ZM119.318 19.8513C118.902 19.8513 118.517 19.5245 118.463 19.1251L117.225 9.89847L113.65 19.1652C113.505 19.5426 113.045 19.8513 112.628 19.8513H111.055C110.638 19.8513 110.179 19.5423 110.034 19.1645L106.457 9.81766L105.218 19.125C105.165 19.5245 104.781 19.8513 104.364 19.8513H101.344C100.927 19.8513 100.633 19.5249 100.69 19.1259L102.932 3.3609C102.988 2.96191 103.376 2.63546 103.792 2.63546H106.59C107.007 2.63546 107.469 2.94351 107.617 3.32002L111.827 14.041L116.064 3.31949C116.213 2.94327 116.676 2.63546 117.092 2.63546H119.89C120.307 2.63546 120.694 2.96191 120.751 3.3609L122.993 19.1259C123.05 19.5249 122.755 19.8513 122.339 19.8513H119.318ZM23.9616 15.6531L18.8054 6.97021C18.5689 6.57115 17.9863 6.24465 17.5106 6.24465H14.2915C13.5433 6.24465 13.2365 5.73171 13.6097 5.1048L15.375 2.13986C15.5151 1.90455 15.5148 1.61487 15.3743 1.37981C15.2337 1.14476 14.9741 1 14.6933 1H10.2025C9.72681 1 9.14291 1.32577 8.90491 1.72395L0.17865 16.3217C-0.0594434 16.7199 -0.0595348 17.3715 0.178285 17.7698L2.42362 21.5297C2.79777 22.1561 3.41129 22.1569 3.78699 21.5313L5.54143 18.6103C5.91722 17.9847 6.53065 17.9854 6.9048 18.6119L8.49538 21.2754C8.7332 21.6737 9.317 21.9995 9.79273 21.9995H20.1698C20.6455 21.9995 21.2293 21.6737 21.4672 21.2754L23.959 17.1028C24.1968 16.7045 24.198 16.0521 23.9616 15.6531ZM16.9981 15.2352C17.3699 15.8629 17.0619 16.3765 16.3136 16.3765H8.24192C7.49372 16.3765 7.1876 15.864 7.56175 15.2375L11.6007 8.47417C11.9748 7.84772 12.5869 7.84774 12.9611 8.47421L16.9981 15.2352Z" fill="#121224"/>
|
|
1613
|
-
</svg>`;
|
|
1614
1403
|
function renderCallbackPage(error) {
|
|
1615
1404
|
const errorClass = error ? ' class="error"' : "";
|
|
1616
1405
|
const errorMessage = error ? escapeHtml(error) : "";
|
|
@@ -1757,10 +1546,19 @@ async function waitForCallback(server, expectedState) {
|
|
|
1757
1546
|
});
|
|
1758
1547
|
});
|
|
1759
1548
|
}
|
|
1549
|
+
var import_http, SVG_LOGO;
|
|
1550
|
+
var init_callback_server = __esm({
|
|
1551
|
+
"src/cli/auth/callback-server.ts"() {
|
|
1552
|
+
"use strict";
|
|
1553
|
+
init_cjs_shims();
|
|
1554
|
+
import_http = __toESM(require("http"), 1);
|
|
1555
|
+
SVG_LOGO = `<svg width="124" height="24" viewBox="0 0 124 24" fill="none" xmlns="http://www.w3.org/2000/svg" class="logo">
|
|
1556
|
+
<path d="M42.9919 16.8116H36.3696L35.5537 19.1572C35.4209 19.539 34.9714 19.8513 34.5548 19.8513H30.9779C30.5614 19.8513 30.3459 19.5449 30.499 19.1703L36.9816 3.31644C37.1346 2.9419 37.6009 2.63546 38.0174 2.63546H41.3718C41.7883 2.63546 42.2541 2.94207 42.4067 3.3168L48.8634 19.17C49.016 19.5447 48.8 19.8513 48.3835 19.8513H44.8067C44.3901 19.8513 43.9406 19.539 43.8078 19.1572L42.9919 16.8116ZM41.8232 13.4223L39.6807 7.18148L37.5383 13.4223H41.8232ZM64.1105 19.8513C63.694 19.8513 63.1767 19.5694 62.9611 19.2247L59.9029 14.3369L56.8447 19.2247C56.6291 19.5694 56.1119 19.8513 55.6953 19.8513H51.3392C50.9227 19.8513 50.7754 19.5801 51.0119 19.2486L56.8978 11.0013L51.5315 3.24414C51.2999 2.90937 51.4513 2.63546 51.8679 2.63546H55.89C56.3066 2.63546 56.8268 2.91563 57.0461 3.25807L59.9029 7.71959L62.7343 3.25955C62.9522 2.9163 63.4713 2.63546 63.8879 2.63546H67.938C68.3545 2.63546 68.5048 2.90868 68.272 3.24261L62.8801 10.9743L68.7935 19.2489C69.0303 19.5802 68.8832 19.8513 68.4666 19.8513H64.1105ZM76.525 19.119C76.525 19.5218 76.1841 19.8513 75.7675 19.8513H72.5522C72.1356 19.8513 71.7947 19.5218 71.7947 19.119V3.36771C71.7947 2.96498 72.1356 2.63546 72.5522 2.63546H75.7675C76.1841 2.63546 76.525 2.96498 76.525 3.36771V19.119ZM79.3736 11.1896C79.3736 6.18625 83.2688 2.15134 89.2511 2.15134C95.2334 2.15134 99.1289 6.18625 99.1289 11.1896C99.1289 16.2199 95.2334 20.2548 89.2511 20.2548C83.2688 20.2548 79.3736 16.2199 79.3736 11.1896ZM94.2873 11.1896C94.2873 8.58038 92.3953 6.21307 89.2511 6.21307C86.1349 6.21307 84.2149 8.58038 84.2149 11.1896C84.2149 13.7989 86.1349 16.2199 89.2511 16.2199C92.3675 16.2199 94.2873 13.7989 94.2873 11.1896ZM119.318 19.8513C118.902 19.8513 118.517 19.5245 118.463 19.1251L117.225 9.89847L113.65 19.1652C113.505 19.5426 113.045 19.8513 112.628 19.8513H111.055C110.638 19.8513 110.179 19.5423 110.034 19.1645L106.457 9.81766L105.218 19.125C105.165 19.5245 104.781 19.8513 104.364 19.8513H101.344C100.927 19.8513 100.633 19.5249 100.69 19.1259L102.932 3.3609C102.988 2.96191 103.376 2.63546 103.792 2.63546H106.59C107.007 2.63546 107.469 2.94351 107.617 3.32002L111.827 14.041L116.064 3.31949C116.213 2.94327 116.676 2.63546 117.092 2.63546H119.89C120.307 2.63546 120.694 2.96191 120.751 3.3609L122.993 19.1259C123.05 19.5249 122.755 19.8513 122.339 19.8513H119.318ZM23.9616 15.6531L18.8054 6.97021C18.5689 6.57115 17.9863 6.24465 17.5106 6.24465H14.2915C13.5433 6.24465 13.2365 5.73171 13.6097 5.1048L15.375 2.13986C15.5151 1.90455 15.5148 1.61487 15.3743 1.37981C15.2337 1.14476 14.9741 1 14.6933 1H10.2025C9.72681 1 9.14291 1.32577 8.90491 1.72395L0.17865 16.3217C-0.0594434 16.7199 -0.0595348 17.3715 0.178285 17.7698L2.42362 21.5297C2.79777 22.1561 3.41129 22.1569 3.78699 21.5313L5.54143 18.6103C5.91722 17.9847 6.53065 17.9854 6.9048 18.6119L8.49538 21.2754C8.7332 21.6737 9.317 21.9995 9.79273 21.9995H20.1698C20.6455 21.9995 21.2293 21.6737 21.4672 21.2754L23.959 17.1028C24.1968 16.7045 24.198 16.0521 23.9616 15.6531ZM16.9981 15.2352C17.3699 15.8629 17.0619 16.3765 16.3136 16.3765H8.24192C7.49372 16.3765 7.1876 15.864 7.56175 15.2375L11.6007 8.47417C11.9748 7.84772 12.5869 7.84774 12.9611 8.47421L16.9981 15.2352Z" fill="#121224"/>
|
|
1557
|
+
</svg>`;
|
|
1558
|
+
}
|
|
1559
|
+
});
|
|
1760
1560
|
|
|
1761
1561
|
// src/cli/auth/global-auth.ts
|
|
1762
|
-
init_cjs_shims();
|
|
1763
|
-
var authContext = null;
|
|
1764
1562
|
function getAuthContext() {
|
|
1765
1563
|
return authContext;
|
|
1766
1564
|
}
|
|
@@ -1776,9 +1574,31 @@ async function setupGlobalAuth() {
|
|
|
1776
1574
|
}
|
|
1777
1575
|
return authContext;
|
|
1778
1576
|
}
|
|
1577
|
+
var authContext;
|
|
1578
|
+
var init_global_auth = __esm({
|
|
1579
|
+
"src/cli/auth/global-auth.ts"() {
|
|
1580
|
+
"use strict";
|
|
1581
|
+
init_cjs_shims();
|
|
1582
|
+
init_config();
|
|
1583
|
+
authContext = null;
|
|
1584
|
+
}
|
|
1585
|
+
});
|
|
1586
|
+
|
|
1587
|
+
// src/cli/auth/index.ts
|
|
1588
|
+
var init_auth = __esm({
|
|
1589
|
+
"src/cli/auth/index.ts"() {
|
|
1590
|
+
"use strict";
|
|
1591
|
+
init_cjs_shims();
|
|
1592
|
+
init_types();
|
|
1593
|
+
init_config();
|
|
1594
|
+
init_oauth();
|
|
1595
|
+
init_api();
|
|
1596
|
+
init_callback_server();
|
|
1597
|
+
init_global_auth();
|
|
1598
|
+
}
|
|
1599
|
+
});
|
|
1779
1600
|
|
|
1780
1601
|
// src/config/index.ts
|
|
1781
|
-
var DEFAULT_EVAL_INCLUDE = ["**/*.eval.{ts,js,mts,mjs,cts,cjs}"];
|
|
1782
1602
|
function createPartialDefaults() {
|
|
1783
1603
|
let token;
|
|
1784
1604
|
let url;
|
|
@@ -1841,6 +1661,17 @@ function validateConfig(config) {
|
|
|
1841
1661
|
}
|
|
1842
1662
|
return config;
|
|
1843
1663
|
}
|
|
1664
|
+
var import_zod7, DEFAULT_EVAL_INCLUDE;
|
|
1665
|
+
var init_config2 = __esm({
|
|
1666
|
+
"src/config/index.ts"() {
|
|
1667
|
+
"use strict";
|
|
1668
|
+
init_cjs_shims();
|
|
1669
|
+
import_zod7 = require("zod");
|
|
1670
|
+
init_errors();
|
|
1671
|
+
init_auth();
|
|
1672
|
+
DEFAULT_EVAL_INCLUDE = ["**/*.eval.{ts,js,mts,mjs,cts,cjs}"];
|
|
1673
|
+
}
|
|
1674
|
+
});
|
|
1844
1675
|
|
|
1845
1676
|
// src/config/loader.ts
|
|
1846
1677
|
function customMerger(target, source) {
|
|
@@ -1880,446 +1711,967 @@ async function loadConfig(cwd = process.cwd()) {
|
|
|
1880
1711
|
throw new AxiomCLIError(`Failed to load config file: ${errorToString(error)}`);
|
|
1881
1712
|
}
|
|
1882
1713
|
}
|
|
1714
|
+
var import_c12, import_defu;
|
|
1715
|
+
var init_loader = __esm({
|
|
1716
|
+
"src/config/loader.ts"() {
|
|
1717
|
+
"use strict";
|
|
1718
|
+
init_cjs_shims();
|
|
1719
|
+
import_c12 = require("c12");
|
|
1720
|
+
import_defu = require("defu");
|
|
1721
|
+
init_config2();
|
|
1722
|
+
init_errors();
|
|
1723
|
+
}
|
|
1724
|
+
});
|
|
1883
1725
|
|
|
1884
|
-
//
|
|
1885
|
-
|
|
1886
|
-
|
|
1887
|
-
|
|
1888
|
-
|
|
1889
|
-
|
|
1890
|
-
|
|
1891
|
-
|
|
1892
|
-
|
|
1726
|
+
// ../../node_modules/.pnpm/tinyrainbow@2.0.0/node_modules/tinyrainbow/dist/chunk-BVHSVHOK.js
|
|
1727
|
+
function a(n) {
|
|
1728
|
+
return String(n);
|
|
1729
|
+
}
|
|
1730
|
+
function C(n = false) {
|
|
1731
|
+
let e = typeof process != "undefined" ? process : void 0, i = (e == null ? void 0 : e.env) || {}, g = (e == null ? void 0 : e.argv) || [];
|
|
1732
|
+
return !("NO_COLOR" in i || g.includes("--no-color")) && ("FORCE_COLOR" in i || g.includes("--color") || (e == null ? void 0 : e.platform) === "win32" || n && i.TERM !== "dumb" || "CI" in i) || typeof window != "undefined" && !!window.chrome;
|
|
1733
|
+
}
|
|
1734
|
+
function p(n = false) {
|
|
1735
|
+
let e = C(n), i = (r2, t, c, o) => {
|
|
1736
|
+
let l = "", s2 = 0;
|
|
1737
|
+
do
|
|
1738
|
+
l += r2.substring(s2, o) + c, s2 = o + t.length, o = r2.indexOf(t, s2);
|
|
1739
|
+
while (~o);
|
|
1740
|
+
return l + r2.substring(s2);
|
|
1741
|
+
}, g = (r2, t, c = r2) => {
|
|
1742
|
+
let o = (l) => {
|
|
1743
|
+
let s2 = String(l), b = s2.indexOf(t, r2.length);
|
|
1744
|
+
return ~b ? r2 + i(s2, t, c, b) + t : r2 + s2 + t;
|
|
1745
|
+
};
|
|
1746
|
+
return o.open = r2, o.close = t, o;
|
|
1747
|
+
}, u2 = {
|
|
1748
|
+
isColorSupported: e
|
|
1749
|
+
}, d = (r2) => `\x1B[${r2}m`;
|
|
1750
|
+
for (let [r2, t] of h)
|
|
1751
|
+
u2[r2] = e ? g(
|
|
1752
|
+
d(t[0]),
|
|
1753
|
+
d(t[1]),
|
|
1754
|
+
t[2]
|
|
1755
|
+
) : a;
|
|
1756
|
+
return u2;
|
|
1757
|
+
}
|
|
1758
|
+
var f, h;
|
|
1759
|
+
var init_chunk_BVHSVHOK = __esm({
|
|
1760
|
+
"../../node_modules/.pnpm/tinyrainbow@2.0.0/node_modules/tinyrainbow/dist/chunk-BVHSVHOK.js"() {
|
|
1761
|
+
"use strict";
|
|
1762
|
+
init_cjs_shims();
|
|
1763
|
+
f = {
|
|
1764
|
+
reset: [0, 0],
|
|
1765
|
+
bold: [1, 22, "\x1B[22m\x1B[1m"],
|
|
1766
|
+
dim: [2, 22, "\x1B[22m\x1B[2m"],
|
|
1767
|
+
italic: [3, 23],
|
|
1768
|
+
underline: [4, 24],
|
|
1769
|
+
inverse: [7, 27],
|
|
1770
|
+
hidden: [8, 28],
|
|
1771
|
+
strikethrough: [9, 29],
|
|
1772
|
+
black: [30, 39],
|
|
1773
|
+
red: [31, 39],
|
|
1774
|
+
green: [32, 39],
|
|
1775
|
+
yellow: [33, 39],
|
|
1776
|
+
blue: [34, 39],
|
|
1777
|
+
magenta: [35, 39],
|
|
1778
|
+
cyan: [36, 39],
|
|
1779
|
+
white: [37, 39],
|
|
1780
|
+
gray: [90, 39],
|
|
1781
|
+
bgBlack: [40, 49],
|
|
1782
|
+
bgRed: [41, 49],
|
|
1783
|
+
bgGreen: [42, 49],
|
|
1784
|
+
bgYellow: [43, 49],
|
|
1785
|
+
bgBlue: [44, 49],
|
|
1786
|
+
bgMagenta: [45, 49],
|
|
1787
|
+
bgCyan: [46, 49],
|
|
1788
|
+
bgWhite: [47, 49],
|
|
1789
|
+
blackBright: [90, 39],
|
|
1790
|
+
redBright: [91, 39],
|
|
1791
|
+
greenBright: [92, 39],
|
|
1792
|
+
yellowBright: [93, 39],
|
|
1793
|
+
blueBright: [94, 39],
|
|
1794
|
+
magentaBright: [95, 39],
|
|
1795
|
+
cyanBright: [96, 39],
|
|
1796
|
+
whiteBright: [97, 39],
|
|
1797
|
+
bgBlackBright: [100, 49],
|
|
1798
|
+
bgRedBright: [101, 49],
|
|
1799
|
+
bgGreenBright: [102, 49],
|
|
1800
|
+
bgYellowBright: [103, 49],
|
|
1801
|
+
bgBlueBright: [104, 49],
|
|
1802
|
+
bgMagentaBright: [105, 49],
|
|
1803
|
+
bgCyanBright: [106, 49],
|
|
1804
|
+
bgWhiteBright: [107, 49]
|
|
1805
|
+
};
|
|
1806
|
+
h = Object.entries(f);
|
|
1807
|
+
a.open = "";
|
|
1808
|
+
a.close = "";
|
|
1893
1809
|
}
|
|
1894
|
-
|
|
1895
|
-
|
|
1896
|
-
|
|
1897
|
-
|
|
1898
|
-
|
|
1899
|
-
|
|
1900
|
-
|
|
1810
|
+
});
|
|
1811
|
+
|
|
1812
|
+
// ../../node_modules/.pnpm/tinyrainbow@2.0.0/node_modules/tinyrainbow/dist/node.js
|
|
1813
|
+
var import_tty, r, u;
|
|
1814
|
+
var init_node = __esm({
|
|
1815
|
+
"../../node_modules/.pnpm/tinyrainbow@2.0.0/node_modules/tinyrainbow/dist/node.js"() {
|
|
1816
|
+
"use strict";
|
|
1817
|
+
init_cjs_shims();
|
|
1818
|
+
init_chunk_BVHSVHOK();
|
|
1819
|
+
import_tty = require("tty");
|
|
1820
|
+
r = process.env.FORCE_TTY !== void 0 || (0, import_tty.isatty)(1);
|
|
1821
|
+
u = p(r);
|
|
1822
|
+
}
|
|
1823
|
+
});
|
|
1824
|
+
|
|
1825
|
+
// src/evals/reporter.console-utils.ts
|
|
1826
|
+
function formatPercentage(value) {
|
|
1827
|
+
if (!Number.isFinite(value)) {
|
|
1828
|
+
return "N/A";
|
|
1901
1829
|
}
|
|
1830
|
+
return Number(value * 100).toFixed(2) + "%";
|
|
1902
1831
|
}
|
|
1903
|
-
|
|
1832
|
+
function formatDiff(current, baseline) {
|
|
1833
|
+
if (!Number.isFinite(current) || !Number.isFinite(baseline)) {
|
|
1834
|
+
return { text: "N/A", color: u.dim };
|
|
1835
|
+
}
|
|
1836
|
+
const diff = current - baseline;
|
|
1837
|
+
const diffText = (diff >= 0 ? "+" : "") + formatPercentage(diff);
|
|
1838
|
+
const color = diff > 0 ? u.green : diff < 0 ? u.red : u.dim;
|
|
1839
|
+
return { text: diffText, color };
|
|
1840
|
+
}
|
|
1841
|
+
function truncate(str, max) {
|
|
1842
|
+
return str.length > max ? str.slice(0, max) + "\u2026" : str;
|
|
1843
|
+
}
|
|
1844
|
+
function stringify(value) {
|
|
1904
1845
|
try {
|
|
1905
|
-
|
|
1906
|
-
|
|
1907
|
-
|
|
1846
|
+
if (typeof value === "string") return value;
|
|
1847
|
+
return JSON.stringify(value);
|
|
1848
|
+
} catch {
|
|
1849
|
+
return String(value);
|
|
1908
1850
|
}
|
|
1909
1851
|
}
|
|
1910
|
-
function
|
|
1911
|
-
const
|
|
1912
|
-
|
|
1913
|
-
|
|
1914
|
-
|
|
1915
|
-
|
|
1916
|
-
|
|
1852
|
+
function getCaseFingerprint(input, expected) {
|
|
1853
|
+
const inputStr = typeof input === "string" ? input : JSON.stringify(input);
|
|
1854
|
+
const expectedStr = typeof expected === "string" ? expected : JSON.stringify(expected);
|
|
1855
|
+
return JSON.stringify({ input: inputStr, expected: expectedStr });
|
|
1856
|
+
}
|
|
1857
|
+
function printEvalNameAndFileName(testSuite, meta, logger = console.log) {
|
|
1858
|
+
const cwd = process.cwd();
|
|
1859
|
+
logger(
|
|
1860
|
+
" ",
|
|
1861
|
+
u.bgCyan(u.black(` ${testSuite.project.name} `)),
|
|
1862
|
+
u.bgBlue(u.black(` ${meta.evaluation.name}-${meta.evaluation.version} `)),
|
|
1863
|
+
u.dim(`(${testSuite.children.size} cases)`)
|
|
1864
|
+
);
|
|
1865
|
+
logger(" ", u.dim(testSuite.module.moduleId.replace(cwd, "")));
|
|
1866
|
+
}
|
|
1867
|
+
function printBaselineNameAndVersion(testMeta, logger = console.log) {
|
|
1868
|
+
if (testMeta.evaluation.baseline) {
|
|
1869
|
+
logger(
|
|
1870
|
+
" ",
|
|
1871
|
+
" baseline ",
|
|
1872
|
+
u.bgMagenta(
|
|
1873
|
+
u.black(` ${testMeta.evaluation.baseline.name}-${testMeta.evaluation.baseline.version} `)
|
|
1874
|
+
)
|
|
1875
|
+
);
|
|
1876
|
+
} else {
|
|
1877
|
+
logger(" ", u.bgWhite(u.blackBright(" baseline: ")), "none");
|
|
1917
1878
|
}
|
|
1918
|
-
|
|
1919
|
-
|
|
1920
|
-
|
|
1921
|
-
|
|
1922
|
-
|
|
1923
|
-
|
|
1924
|
-
|
|
1925
|
-
|
|
1926
|
-
|
|
1927
|
-
|
|
1928
|
-
|
|
1929
|
-
|
|
1930
|
-
|
|
1931
|
-
|
|
1932
|
-
|
|
1933
|
-
|
|
1934
|
-
|
|
1935
|
-
|
|
1879
|
+
logger("");
|
|
1880
|
+
}
|
|
1881
|
+
function printRuntimeFlags(testMeta, logger = console.log) {
|
|
1882
|
+
if (testMeta.case.runtimeFlags && Object.keys(testMeta.case.runtimeFlags).length > 0) {
|
|
1883
|
+
const entries = Object.entries(testMeta.case.runtimeFlags);
|
|
1884
|
+
logger(" ", u.dim("runtime flags"));
|
|
1885
|
+
for (const [k, v] of entries) {
|
|
1886
|
+
switch (v.kind) {
|
|
1887
|
+
case "replaced": {
|
|
1888
|
+
const valText = truncate(stringify(v.value), 80);
|
|
1889
|
+
const defText = truncate(stringify(v.default), 80);
|
|
1890
|
+
logger(" ", `${k}: ${valText} (default: ${defText})`);
|
|
1891
|
+
break;
|
|
1892
|
+
}
|
|
1893
|
+
case "introduced": {
|
|
1894
|
+
const valText = truncate(stringify(v.value), 80);
|
|
1895
|
+
logger(" ", `${k}: ${valText} (no default)`);
|
|
1896
|
+
break;
|
|
1897
|
+
}
|
|
1898
|
+
}
|
|
1899
|
+
}
|
|
1900
|
+
}
|
|
1901
|
+
}
|
|
1902
|
+
function printTestCaseCountStartDuration(testSuite, startTime, duration, logger = console.log) {
|
|
1903
|
+
logger(" ");
|
|
1904
|
+
logger(" ", u.dim("Cases"), testSuite.children.size);
|
|
1905
|
+
logger(" ", u.dim("Start at"), new Date(startTime).toTimeString());
|
|
1906
|
+
logger(" ", u.dim("Duration"), `${duration}s`);
|
|
1907
|
+
}
|
|
1908
|
+
function printTestCaseSuccessOrFailed(testMeta, ok, logger = console.log) {
|
|
1909
|
+
const index = testMeta.case.index;
|
|
1910
|
+
if (ok) {
|
|
1911
|
+
logger(" ", u.yellow(` \u2714 case ${index}:`));
|
|
1912
|
+
} else {
|
|
1913
|
+
logger(" ", u.red(` \u2716 case ${index}: failed`));
|
|
1914
|
+
for (const e of testMeta.case.errors ?? []) {
|
|
1915
|
+
logger("", e.message);
|
|
1916
|
+
}
|
|
1917
|
+
}
|
|
1918
|
+
}
|
|
1919
|
+
function printTestCaseScores(testMeta, baselineCase, logger = console.log) {
|
|
1920
|
+
const scores = testMeta.case.scores;
|
|
1921
|
+
const keys = Object.keys(scores);
|
|
1922
|
+
if (keys.length === 0) {
|
|
1923
|
+
return;
|
|
1924
|
+
}
|
|
1925
|
+
const maxNameLength = Math.max(...keys.map((k) => k.length));
|
|
1926
|
+
keys.forEach((k) => {
|
|
1927
|
+
const scoreData = scores[k];
|
|
1928
|
+
const hasError = scoreData.metadata?.error;
|
|
1929
|
+
const v = scoreData.score ? scoreData.score : 0;
|
|
1930
|
+
const rawCurrent = hasError ? "N/A" : formatPercentage(v);
|
|
1931
|
+
const paddedCurrent = rawCurrent.padStart(7);
|
|
1932
|
+
const coloredCurrent = hasError ? u.dim(paddedCurrent) : u.magentaBright(paddedCurrent);
|
|
1933
|
+
const paddedName = k.padEnd(maxNameLength);
|
|
1934
|
+
if (baselineCase?.scores[k]) {
|
|
1935
|
+
const baselineScoreValue = baselineCase.scores[k].value;
|
|
1936
|
+
const rawBaseline = formatPercentage(baselineScoreValue);
|
|
1937
|
+
const paddedBaseline = rawBaseline.padStart(7);
|
|
1938
|
+
const coloredBaseline = u.blueBright(paddedBaseline);
|
|
1939
|
+
const { text: diffText, color: diffColor } = formatDiff(v, baselineScoreValue);
|
|
1940
|
+
const paddedDiff = diffText.padStart(8);
|
|
1941
|
+
logger(
|
|
1942
|
+
` ${paddedName} ${coloredBaseline} \u2192 ${coloredCurrent} ${hasError ? u.dim("(scorer not run)") : u.dim("(") + diffColor(paddedDiff) + u.dim(")")}`
|
|
1943
|
+
);
|
|
1944
|
+
} else {
|
|
1945
|
+
logger(` ${paddedName} ${coloredCurrent} ${hasError ? u.dim("(scorer not run)") : ""}`);
|
|
1946
|
+
}
|
|
1936
1947
|
});
|
|
1937
|
-
axiomTracer = axiomProvider.getTracer("axiom", "0.34.1");
|
|
1938
1948
|
}
|
|
1939
|
-
|
|
1940
|
-
if (
|
|
1949
|
+
function printOutOfScopeFlags(testMeta, logger = console.log) {
|
|
1950
|
+
if (testMeta.case.outOfScopeFlags && testMeta.case.outOfScopeFlags.length > 0) {
|
|
1951
|
+
const pickedFlagsText = testMeta.case.pickedFlags ? `(picked: ${testMeta.case.pickedFlags.map((f2) => `'${f2}'`).join(", ")})` : "(none)";
|
|
1952
|
+
logger(" ", u.yellow(`\u26A0 Out-of-scope flags: ${pickedFlagsText}`));
|
|
1953
|
+
testMeta.case.outOfScopeFlags.forEach((flag) => {
|
|
1954
|
+
const timeStr = new Date(flag.accessedAt).toLocaleTimeString();
|
|
1955
|
+
logger(" ", `${flag.flagPath} (at ${timeStr})`);
|
|
1956
|
+
if (flag.stackTrace && flag.stackTrace.length > 0) {
|
|
1957
|
+
flag.stackTrace.forEach((frame, i) => {
|
|
1958
|
+
const prefix = i === flag.stackTrace.length - 1 ? " \u2514\u2500" : " \u251C\u2500";
|
|
1959
|
+
logger(" ", u.dim(`${prefix} ${frame}`));
|
|
1960
|
+
});
|
|
1961
|
+
}
|
|
1962
|
+
});
|
|
1963
|
+
}
|
|
1964
|
+
}
|
|
1965
|
+
function printCaseResult(test, baselineCasesByFingerprint, matchedIndices, logger = console.log) {
|
|
1966
|
+
const ok = test.ok();
|
|
1967
|
+
const testMeta = test.meta();
|
|
1968
|
+
if (!testMeta?.case) {
|
|
1941
1969
|
return;
|
|
1942
1970
|
}
|
|
1943
|
-
|
|
1944
|
-
|
|
1971
|
+
printTestCaseSuccessOrFailed(testMeta, ok, logger);
|
|
1972
|
+
const fingerprint = getCaseFingerprint(testMeta.case.input, testMeta.case.expected);
|
|
1973
|
+
const baselineCases = baselineCasesByFingerprint.get(fingerprint);
|
|
1974
|
+
const baselineCase = baselineCases?.shift();
|
|
1975
|
+
if (baselineCase) {
|
|
1976
|
+
matchedIndices.add(baselineCase.index);
|
|
1977
|
+
}
|
|
1978
|
+
printTestCaseScores(testMeta, baselineCase, logger);
|
|
1979
|
+
printRuntimeFlags(testMeta, logger);
|
|
1980
|
+
printOutOfScopeFlags(testMeta, logger);
|
|
1981
|
+
}
|
|
1982
|
+
function printOrphanedBaselineCases(baseline, matchedIndices, logger = console.log) {
|
|
1983
|
+
const orphanedCases = baseline.cases.filter((c) => !matchedIndices.has(c.index));
|
|
1984
|
+
if (orphanedCases.length === 0) {
|
|
1945
1985
|
return;
|
|
1946
1986
|
}
|
|
1947
|
-
|
|
1948
|
-
|
|
1949
|
-
|
|
1950
|
-
|
|
1951
|
-
|
|
1952
|
-
|
|
1953
|
-
|
|
1954
|
-
|
|
1955
|
-
|
|
1956
|
-
|
|
1957
|
-
|
|
1958
|
-
|
|
1959
|
-
|
|
1960
|
-
|
|
1961
|
-
|
|
1962
|
-
|
|
1987
|
+
logger("");
|
|
1988
|
+
logger(" ", u.yellow("Orphaned baseline cases:"));
|
|
1989
|
+
for (const orphanedCase of orphanedCases) {
|
|
1990
|
+
logger(
|
|
1991
|
+
" ",
|
|
1992
|
+
u.dim(
|
|
1993
|
+
`case ${orphanedCase.index}: ${truncate(orphanedCase.input, 50)} (score: ${truncate(
|
|
1994
|
+
JSON.stringify(orphanedCase.scores),
|
|
1995
|
+
50
|
|
1996
|
+
)})`
|
|
1997
|
+
)
|
|
1998
|
+
);
|
|
1999
|
+
const keys = Object.keys(orphanedCase.scores);
|
|
2000
|
+
if (keys.length > 0) {
|
|
2001
|
+
const maxNameLength = Math.max(...keys.map((k) => k.length));
|
|
2002
|
+
keys.forEach((k) => {
|
|
2003
|
+
const scoreData = orphanedCase.scores[k];
|
|
2004
|
+
const rawScore = formatPercentage(scoreData.value);
|
|
2005
|
+
const paddedName = k.padEnd(maxNameLength);
|
|
2006
|
+
const paddedScore = rawScore.padStart(7);
|
|
2007
|
+
logger(` ${paddedName} ${u.blueBright(paddedScore)}`);
|
|
1963
2008
|
});
|
|
1964
|
-
userProvider = hookResult?.provider ?? userProvider;
|
|
1965
2009
|
}
|
|
1966
|
-
|
|
1967
|
-
|
|
1968
|
-
|
|
1969
|
-
|
|
1970
|
-
|
|
2010
|
+
}
|
|
2011
|
+
}
|
|
2012
|
+
function printGlobalFlagOverrides(overrides2, defaults, logger = console.log) {
|
|
2013
|
+
if (Object.keys(overrides2).length === 0) {
|
|
2014
|
+
logger("");
|
|
2015
|
+
logger(u.dim("Flag overrides: (none)"));
|
|
2016
|
+
logger("");
|
|
2017
|
+
return;
|
|
2018
|
+
}
|
|
2019
|
+
logger("");
|
|
2020
|
+
logger("Flag overrides:");
|
|
2021
|
+
for (const [key, value] of Object.entries(overrides2)) {
|
|
2022
|
+
const defaultValue = defaults[key];
|
|
2023
|
+
const valueStr = JSON.stringify(value);
|
|
2024
|
+
const defaultStr = defaultValue !== void 0 ? JSON.stringify(defaultValue) : "none";
|
|
2025
|
+
logger(` \u2022 ${key}: ${valueStr} ${u.dim(`(default: ${defaultStr})`)}`);
|
|
2026
|
+
}
|
|
2027
|
+
logger("");
|
|
2028
|
+
}
|
|
2029
|
+
function printSuiteBox({
|
|
2030
|
+
suite,
|
|
2031
|
+
scorerAverages,
|
|
2032
|
+
calculateBaselineScorerAverage: calculateBaselineScorerAverage2,
|
|
2033
|
+
flagDiff,
|
|
2034
|
+
logger = console.log
|
|
2035
|
+
}) {
|
|
2036
|
+
const filename = suite.file.split("/").pop();
|
|
2037
|
+
logger("\u250C\u2500");
|
|
2038
|
+
logger(`\u2502 ${u.blue(suite.name)} ${u.gray(`(${filename})`)}`);
|
|
2039
|
+
logger("\u251C\u2500");
|
|
2040
|
+
const scorerNames = Object.keys(scorerAverages);
|
|
2041
|
+
const maxNameLength = Math.max(...scorerNames.map((name) => name.length));
|
|
2042
|
+
const allCasesErrored = (scorerName) => {
|
|
2043
|
+
return suite.cases.every((caseData) => caseData.scores[scorerName]?.metadata?.error);
|
|
2044
|
+
};
|
|
2045
|
+
for (const scorerName of scorerNames) {
|
|
2046
|
+
const avg = scorerAverages[scorerName];
|
|
2047
|
+
const paddedName = scorerName.padEnd(maxNameLength);
|
|
2048
|
+
const hasAllErrors = allCasesErrored(scorerName);
|
|
2049
|
+
const baselineAvg = suite.baseline ? calculateBaselineScorerAverage2(suite.baseline, scorerName) : null;
|
|
2050
|
+
if (baselineAvg !== null) {
|
|
2051
|
+
const currentPercent = hasAllErrors ? u.dim("N/A") : formatPercentage(avg);
|
|
2052
|
+
const baselinePercent = formatPercentage(baselineAvg);
|
|
2053
|
+
const { text: diffText, color: diffColor } = formatDiff(avg, baselineAvg);
|
|
2054
|
+
const paddedBaseline = baselinePercent.padStart(7);
|
|
2055
|
+
const paddedCurrent = hasAllErrors ? currentPercent : currentPercent.padStart(7);
|
|
2056
|
+
const diffDisplay = hasAllErrors ? u.dim("all cases failed") : diffColor(diffText.padStart(8));
|
|
2057
|
+
logger(
|
|
2058
|
+
`\u2502 ${paddedName} ${u.blueBright(paddedBaseline)} \u2192 ${hasAllErrors ? paddedCurrent : u.magentaBright(paddedCurrent)} (${diffDisplay})`
|
|
2059
|
+
);
|
|
2060
|
+
} else {
|
|
2061
|
+
const currentPercent = hasAllErrors ? u.red("N/A (all cases failed)") : formatPercentage(avg);
|
|
2062
|
+
logger(`\u2502 \u2022 ${paddedName} ${currentPercent}`);
|
|
2063
|
+
}
|
|
2064
|
+
}
|
|
2065
|
+
logger("\u251C\u2500");
|
|
2066
|
+
if (suite.baseline) {
|
|
2067
|
+
const baselineTimestamp = suite.baseline.runAt ? reporterDate(new Date(suite.baseline.runAt)) : "unknown time";
|
|
2068
|
+
logger(
|
|
2069
|
+
`\u2502 Baseline: ${suite.baseline.name}-${suite.baseline.version} ${u.gray(`(${baselineTimestamp})`)}`
|
|
2070
|
+
);
|
|
2071
|
+
} else {
|
|
2072
|
+
logger(`\u2502 Baseline: ${u.gray("(none)")}`);
|
|
2073
|
+
}
|
|
2074
|
+
const hasConfigChanges = flagDiff.length > 0;
|
|
2075
|
+
logger("\u2502 Config changes:", hasConfigChanges ? "" : u.gray("(none)"));
|
|
2076
|
+
if (hasConfigChanges) {
|
|
2077
|
+
for (const { flag, current, baseline, default: defaultVal } of flagDiff) {
|
|
2078
|
+
logger(`\u2502 \u2022 ${flag}: ${current ?? "<not set>"}`);
|
|
2079
|
+
if (defaultVal !== void 0) {
|
|
2080
|
+
logger(`\u2502 ${u.gray(`default: ${defaultVal}`)}`);
|
|
2081
|
+
}
|
|
2082
|
+
if (suite.baseline) {
|
|
2083
|
+
logger(`\u2502 ${u.gray(`baseline: ${baseline ?? "<not set>"}`)}`);
|
|
1971
2084
|
}
|
|
1972
2085
|
}
|
|
1973
|
-
initialized = true;
|
|
1974
|
-
})();
|
|
1975
|
-
try {
|
|
1976
|
-
await initializationPromise;
|
|
1977
|
-
} finally {
|
|
1978
|
-
initializationPromise = null;
|
|
1979
2086
|
}
|
|
2087
|
+
if (suite.outOfScopeFlags && suite.outOfScopeFlags.length > 0) {
|
|
2088
|
+
const pickedFlagsText = suite.configFlags && suite.configFlags.length > 0 ? suite.configFlags.map((f2) => `'${f2}'`).join(", ") : "none";
|
|
2089
|
+
logger("\u2502");
|
|
2090
|
+
logger(`\u2502 ${u.yellow("\u26A0 Out-of-scope flags")} ${u.gray(`(picked: ${pickedFlagsText})`)}:`);
|
|
2091
|
+
for (const flag of suite.outOfScopeFlags) {
|
|
2092
|
+
const lastStackTraceFrame = flag.stackTrace[0];
|
|
2093
|
+
const lastStackTraceFnName = lastStackTraceFrame.split(" ").shift();
|
|
2094
|
+
const lastStackTraceFile = lastStackTraceFrame.split("/").pop()?.slice(0, -1);
|
|
2095
|
+
logger(
|
|
2096
|
+
`\u2502 \u2022 ${flag.flagPath} ${u.gray(`at ${lastStackTraceFnName} (${lastStackTraceFile})`)}`
|
|
2097
|
+
);
|
|
2098
|
+
}
|
|
2099
|
+
}
|
|
2100
|
+
logger("\u2514\u2500");
|
|
1980
2101
|
}
|
|
1981
|
-
|
|
1982
|
-
|
|
1983
|
-
|
|
2102
|
+
function calculateScorerAverages(suite) {
|
|
2103
|
+
const scorerTotals = {};
|
|
2104
|
+
for (const caseData of suite.cases) {
|
|
2105
|
+
for (const [scorerName, score] of Object.entries(caseData.scores)) {
|
|
2106
|
+
if (!scorerTotals[scorerName]) {
|
|
2107
|
+
scorerTotals[scorerName] = { sum: 0, count: 0 };
|
|
2108
|
+
}
|
|
2109
|
+
if (!score.metadata?.error) {
|
|
2110
|
+
scorerTotals[scorerName].sum += score.score || 0;
|
|
2111
|
+
scorerTotals[scorerName].count += 1;
|
|
2112
|
+
}
|
|
2113
|
+
}
|
|
1984
2114
|
}
|
|
1985
|
-
const
|
|
1986
|
-
|
|
1987
|
-
|
|
2115
|
+
const averages = {};
|
|
2116
|
+
for (const [scorerName, totals] of Object.entries(scorerTotals)) {
|
|
2117
|
+
averages[scorerName] = totals.count > 0 ? totals.sum / totals.count : 0;
|
|
1988
2118
|
}
|
|
1989
|
-
|
|
1990
|
-
|
|
1991
|
-
|
|
2119
|
+
return averages;
|
|
2120
|
+
}
|
|
2121
|
+
function calculateBaselineScorerAverage(baseline, scorerName) {
|
|
2122
|
+
const scores = [];
|
|
2123
|
+
for (const caseData of baseline.cases) {
|
|
2124
|
+
if (caseData.scores[scorerName]) {
|
|
2125
|
+
scores.push(caseData.scores[scorerName].value);
|
|
2126
|
+
}
|
|
1992
2127
|
}
|
|
1993
|
-
|
|
1994
|
-
|
|
1995
|
-
|
|
2128
|
+
if (scores.length === 0) return null;
|
|
2129
|
+
const sum = scores.reduce((acc, val) => acc + val, 0);
|
|
2130
|
+
return sum / scores.length;
|
|
2131
|
+
}
|
|
2132
|
+
function calculateFlagDiff(suite) {
|
|
2133
|
+
if (!suite.configFlags || suite.configFlags.length === 0) {
|
|
2134
|
+
return [];
|
|
1996
2135
|
}
|
|
1997
|
-
|
|
1998
|
-
|
|
1999
|
-
|
|
2000
|
-
|
|
2001
|
-
|
|
2002
|
-
|
|
2003
|
-
|
|
2004
|
-
|
|
2136
|
+
const diffs = [];
|
|
2137
|
+
const currentConfig = suite.flagConfig || {};
|
|
2138
|
+
const baselineConfig = suite.baseline?.flagConfig || {};
|
|
2139
|
+
const defaultConfig = suite.defaultFlagConfig || {};
|
|
2140
|
+
const currentFlat = flattenObject(currentConfig);
|
|
2141
|
+
const baselineFlat = flattenObject(baselineConfig);
|
|
2142
|
+
const defaultFlat = flattenObject(defaultConfig);
|
|
2143
|
+
const allKeys = /* @__PURE__ */ new Set([
|
|
2144
|
+
...Object.keys(currentFlat),
|
|
2145
|
+
...Object.keys(baselineFlat),
|
|
2146
|
+
...Object.keys(defaultFlat)
|
|
2147
|
+
]);
|
|
2148
|
+
for (const key of allKeys) {
|
|
2149
|
+
const isInScope = suite.configFlags.some((pattern) => key.startsWith(pattern));
|
|
2150
|
+
if (!isInScope) continue;
|
|
2151
|
+
const currentValue = currentFlat[key];
|
|
2152
|
+
const baselineValue = baselineFlat[key];
|
|
2153
|
+
const defaultValue = defaultFlat[key];
|
|
2154
|
+
const currentStr = currentValue !== void 0 ? JSON.stringify(currentValue) : void 0;
|
|
2155
|
+
const baselineStr = baselineValue !== void 0 ? JSON.stringify(baselineValue) : void 0;
|
|
2156
|
+
const defaultStr = defaultValue !== void 0 ? JSON.stringify(defaultValue) : void 0;
|
|
2157
|
+
const diffFromBaseline = suite.baseline && currentStr !== baselineStr;
|
|
2158
|
+
const diffFromDefault = currentStr !== defaultStr;
|
|
2159
|
+
if (diffFromBaseline || diffFromDefault) {
|
|
2160
|
+
diffs.push({
|
|
2161
|
+
flag: key,
|
|
2162
|
+
current: currentStr,
|
|
2163
|
+
baseline: suite.baseline ? baselineStr : void 0,
|
|
2164
|
+
default: defaultStr
|
|
2165
|
+
});
|
|
2005
2166
|
}
|
|
2006
2167
|
}
|
|
2007
|
-
|
|
2008
|
-
|
|
2009
|
-
|
|
2010
|
-
|
|
2011
|
-
|
|
2012
|
-
|
|
2013
|
-
|
|
2014
|
-
|
|
2015
|
-
|
|
2016
|
-
|
|
2168
|
+
return diffs;
|
|
2169
|
+
}
|
|
2170
|
+
function printFinalReport({
|
|
2171
|
+
suiteData,
|
|
2172
|
+
config,
|
|
2173
|
+
registrationStatus,
|
|
2174
|
+
isDebug,
|
|
2175
|
+
logger = console.log
|
|
2176
|
+
}) {
|
|
2177
|
+
logger("");
|
|
2178
|
+
logger(u.bgBlue(u.white(" FINAL EVALUATION REPORT ")));
|
|
2179
|
+
logger("");
|
|
2180
|
+
for (const suite of suiteData) {
|
|
2181
|
+
const scorerAverages = calculateScorerAverages(suite);
|
|
2182
|
+
const flagDiff = calculateFlagDiff(suite);
|
|
2183
|
+
printSuiteBox({ suite, scorerAverages, calculateBaselineScorerAverage, flagDiff, logger });
|
|
2184
|
+
logger("");
|
|
2017
2185
|
}
|
|
2018
|
-
|
|
2019
|
-
|
|
2020
|
-
|
|
2021
|
-
|
|
2022
|
-
|
|
2023
|
-
|
|
2024
|
-
|
|
2025
|
-
const
|
|
2026
|
-
|
|
2027
|
-
|
|
2028
|
-
|
|
2029
|
-
|
|
2186
|
+
const runId = suiteData[0]?.runId;
|
|
2187
|
+
const orgId = suiteData[0]?.orgId;
|
|
2188
|
+
const anyRegistered = registrationStatus.some((s2) => s2.registered);
|
|
2189
|
+
const anyFailed = registrationStatus.some((s2) => !s2.registered);
|
|
2190
|
+
if (anyRegistered && orgId && config?.consoleEndpointUrl) {
|
|
2191
|
+
if (suiteData.length === 1) {
|
|
2192
|
+
const suite = suiteData[0];
|
|
2193
|
+
const baselineParam = suite.baseline?.traceId ? `?baselineId=${suite.baseline.traceId}` : "";
|
|
2194
|
+
logger("View eval result:");
|
|
2195
|
+
logger(
|
|
2196
|
+
`${config.consoleEndpointUrl}/${orgId}/ai-engineering/evaluations/${suite.name}/${suite.version}${baselineParam}`
|
|
2197
|
+
);
|
|
2198
|
+
} else {
|
|
2199
|
+
logger("View full report:");
|
|
2200
|
+
logger(`${config.consoleEndpointUrl}/${orgId}/ai-engineering/evaluations?runId=${runId}`);
|
|
2030
2201
|
}
|
|
2202
|
+
} else if (isDebug) {
|
|
2203
|
+
logger(u.dim("Results not uploaded to Axiom (debug mode)"));
|
|
2204
|
+
} else {
|
|
2205
|
+
logger("Results not available in Axiom UI (registration failed)");
|
|
2031
2206
|
}
|
|
2032
|
-
|
|
2033
|
-
|
|
2034
|
-
|
|
2035
|
-
|
|
2036
|
-
|
|
2037
|
-
|
|
2038
|
-
|
|
2039
|
-
|
|
2040
|
-
|
|
2041
|
-
const providedConfig = {
|
|
2042
|
-
...opts.config,
|
|
2043
|
-
eval: {
|
|
2044
|
-
...opts.config.eval,
|
|
2045
|
-
// These can't be serialized, so we need to remove them
|
|
2046
|
-
instrumentation: null,
|
|
2047
|
-
flagSchema: null
|
|
2048
|
-
}
|
|
2049
|
-
};
|
|
2050
|
-
if (opts.debug) {
|
|
2051
|
-
console.log(u.bgWhite(u.blackBright(" Debug mode enabled ")));
|
|
2052
|
-
}
|
|
2053
|
-
const tmpDir = (0, import_node_path.join)((0, import_node_os.tmpdir)(), "axiom-eval", opts.runId);
|
|
2054
|
-
(0, import_node_fs.mkdirSync)(tmpDir, { recursive: true });
|
|
2055
|
-
const nameRegistryFile = (0, import_node_path.join)(tmpDir, "names.jsonl");
|
|
2056
|
-
const abortFile = (0, import_node_path.join)(tmpDir, "abort.txt");
|
|
2057
|
-
(0, import_node_fs.writeFileSync)(nameRegistryFile, "", "utf8");
|
|
2058
|
-
if ((0, import_node_fs.existsSync)(abortFile)) {
|
|
2059
|
-
(0, import_node_fs.unlinkSync)(abortFile);
|
|
2060
|
-
}
|
|
2061
|
-
process.env.AXIOM_NAME_REGISTRY_FILE = nameRegistryFile;
|
|
2062
|
-
process.env.AXIOM_ABORT_FILE = abortFile;
|
|
2063
|
-
if (opts.list) {
|
|
2064
|
-
console.log(u.bgWhite(u.blackBright(" List mode ")));
|
|
2065
|
-
}
|
|
2066
|
-
const vi = await (0, import_node.createVitest)(
|
|
2067
|
-
"test",
|
|
2068
|
-
{
|
|
2069
|
-
root: dir ? dir : process.cwd(),
|
|
2070
|
-
mode: "test",
|
|
2071
|
-
include: opts.include,
|
|
2072
|
-
exclude: opts.exclude,
|
|
2073
|
-
testNamePattern: opts.testNamePattern,
|
|
2074
|
-
reporters: ["verbose", new AxiomReporter()],
|
|
2075
|
-
environment: "node",
|
|
2076
|
-
browser: void 0,
|
|
2077
|
-
watch: opts.watch,
|
|
2078
|
-
setupFiles: [],
|
|
2079
|
-
// ignore user vitest.config.ts etc
|
|
2080
|
-
name: "axiom:eval",
|
|
2081
|
-
printConsoleTrace: true,
|
|
2082
|
-
silent: false,
|
|
2083
|
-
disableConsoleIntercept: true,
|
|
2084
|
-
testTimeout: opts.config?.eval?.timeoutMs || 6e4,
|
|
2085
|
-
globals: true,
|
|
2086
|
-
runner: (0, import_node_path.resolve)(__dirname, "evals", "custom-runner.js"),
|
|
2087
|
-
provide: {
|
|
2088
|
-
baseline: opts.baseline,
|
|
2089
|
-
debug: opts.debug,
|
|
2090
|
-
list: opts.list,
|
|
2091
|
-
overrides: opts.overrides,
|
|
2092
|
-
axiomConfig: providedConfig,
|
|
2093
|
-
runId: opts.runId,
|
|
2094
|
-
consoleUrl: opts.consoleUrl
|
|
2207
|
+
if (anyFailed) {
|
|
2208
|
+
logger("");
|
|
2209
|
+
for (const status of registrationStatus) {
|
|
2210
|
+
if (!status.registered) {
|
|
2211
|
+
logger(u.yellow(`\u26A0\uFE0F Warning: Failed to register "${status.name}" with Axiom`));
|
|
2212
|
+
if (status.error) {
|
|
2213
|
+
logger(u.dim(` Error: ${status.error}`));
|
|
2214
|
+
}
|
|
2215
|
+
logger(u.dim(` Results for this evaluation will not be available in the Axiom UI.`));
|
|
2095
2216
|
}
|
|
2096
|
-
},
|
|
2097
|
-
{
|
|
2098
|
-
plugins: [(0, import_vite_tsconfig_paths.default)({ root: dir || process.cwd() })]
|
|
2099
2217
|
}
|
|
2100
|
-
);
|
|
2101
|
-
if (opts.list) {
|
|
2102
|
-
const result = await vi.collect();
|
|
2103
|
-
printCollectedEvals(result, dir || process.cwd());
|
|
2104
|
-
await vi.close();
|
|
2105
|
-
process.exit(0);
|
|
2106
|
-
}
|
|
2107
|
-
await vi.start();
|
|
2108
|
-
if ((0, import_node_fs.existsSync)(abortFile)) {
|
|
2109
|
-
const message = (0, import_node_fs.readFileSync)(abortFile, "utf8");
|
|
2110
|
-
console.error("\n" + message);
|
|
2111
|
-
await vi.close();
|
|
2112
|
-
process.exit(1);
|
|
2113
2218
|
}
|
|
2114
|
-
|
|
2115
|
-
|
|
2116
|
-
|
|
2117
|
-
|
|
2118
|
-
|
|
2119
|
-
|
|
2219
|
+
}
|
|
2220
|
+
var reporterDate;
|
|
2221
|
+
var init_reporter_console_utils = __esm({
|
|
2222
|
+
"src/evals/reporter.console-utils.ts"() {
|
|
2223
|
+
"use strict";
|
|
2224
|
+
init_cjs_shims();
|
|
2225
|
+
init_node();
|
|
2226
|
+
init_dot_path();
|
|
2227
|
+
reporterDate = (d) => {
|
|
2228
|
+
const date = d.toISOString().slice(0, 10);
|
|
2229
|
+
const hours = d.getUTCHours().toString().padStart(2, "0");
|
|
2230
|
+
const minutes = d.getUTCMinutes().toString().padStart(2, "0");
|
|
2231
|
+
return `${date}, ${hours}:${minutes} UTC`;
|
|
2232
|
+
};
|
|
2120
2233
|
}
|
|
2121
|
-
|
|
2122
|
-
};
|
|
2123
|
-
|
|
2124
|
-
// src/cli/commands/eval.command.ts
|
|
2125
|
-
var import_node_fs3 = require("fs");
|
|
2126
|
-
|
|
2127
|
-
// src/cli/utils/eval-context-runner.ts
|
|
2128
|
-
init_cjs_shims();
|
|
2234
|
+
});
|
|
2129
2235
|
|
|
2130
|
-
// src/
|
|
2131
|
-
|
|
2132
|
-
|
|
2133
|
-
|
|
2134
|
-
|
|
2135
|
-
|
|
2136
|
-
|
|
2137
|
-
|
|
2138
|
-
|
|
2139
|
-
}
|
|
2140
|
-
const overlayContext = {
|
|
2141
|
-
...current,
|
|
2142
|
-
flags: { ...current.flags, ...partial },
|
|
2143
|
-
// Merge for backwards compatibility
|
|
2144
|
-
parent: current,
|
|
2145
|
-
overrides: { ...partial }
|
|
2236
|
+
// src/config/resolver.ts
|
|
2237
|
+
function resolveAxiomConnection(config, consoleUrlOverride) {
|
|
2238
|
+
const consoleEndpointUrl = consoleUrlOverride ?? buildConsoleUrl(config.eval.url);
|
|
2239
|
+
return {
|
|
2240
|
+
url: config.eval.url,
|
|
2241
|
+
consoleEndpointUrl,
|
|
2242
|
+
token: config.eval.token,
|
|
2243
|
+
dataset: config.eval.dataset,
|
|
2244
|
+
orgId: config.eval.orgId
|
|
2146
2245
|
};
|
|
2147
|
-
const currentCtx = EVAL_CONTEXT.get();
|
|
2148
|
-
if (currentCtx) {
|
|
2149
|
-
Object.assign(currentCtx, overlayContext);
|
|
2150
|
-
}
|
|
2151
|
-
for (const [key, value] of Object.entries(partial)) {
|
|
2152
|
-
putOnSpan("flag", key, value);
|
|
2153
|
-
}
|
|
2154
|
-
}
|
|
2155
|
-
|
|
2156
|
-
// src/cli/utils/eval-context-runner.ts
|
|
2157
|
-
async function runEvalWithContext(overrides2, runFn) {
|
|
2158
|
-
setGlobalFlagOverrides(overrides2);
|
|
2159
|
-
return withEvalContext({ initialFlags: overrides2 }, async () => {
|
|
2160
|
-
if (Object.keys(overrides2).length > 0) {
|
|
2161
|
-
overrideFlags(overrides2);
|
|
2162
|
-
}
|
|
2163
|
-
return runFn();
|
|
2164
|
-
});
|
|
2165
|
-
}
|
|
2166
|
-
|
|
2167
|
-
// src/cli/utils/parse-flag-overrides.ts
|
|
2168
|
-
init_cjs_shims();
|
|
2169
|
-
var import_zod7 = require("zod");
|
|
2170
|
-
var import_node_fs2 = require("fs");
|
|
2171
|
-
var import_node_path3 = require("path");
|
|
2172
|
-
var FLAG_RE = /^--flag\.([^=]+)(?:=(.*))?$/;
|
|
2173
|
-
var CONFIG_RE = /^--flags-config(?:=(.*))?$/;
|
|
2174
|
-
function ensureNoSpaceSeparatedSyntax(flagName, value, nextToken, flagType) {
|
|
2175
|
-
if (value === void 0 && nextToken !== void 0) {
|
|
2176
|
-
if (flagType === "flag" && !nextToken.startsWith("-") && nextToken !== "true" && nextToken !== "false") {
|
|
2177
|
-
console.error(`\u274C Invalid syntax: --flag.${flagName} ${nextToken}`);
|
|
2178
|
-
console.error(`\u{1F4A1} Use: --flag.${flagName}=${nextToken}`);
|
|
2179
|
-
process.exit(1);
|
|
2180
|
-
} else if (flagType === "config" && !nextToken.startsWith("-")) {
|
|
2181
|
-
console.error(`\u274C Invalid syntax: --flags-config ${nextToken}`);
|
|
2182
|
-
console.error(`\u{1F4A1} Use: --flags-config=${nextToken}`);
|
|
2183
|
-
process.exit(1);
|
|
2184
|
-
}
|
|
2185
|
-
}
|
|
2186
2246
|
}
|
|
2187
|
-
|
|
2188
|
-
|
|
2189
|
-
|
|
2247
|
+
var buildConsoleUrl;
|
|
2248
|
+
var init_resolver = __esm({
|
|
2249
|
+
"src/config/resolver.ts"() {
|
|
2250
|
+
"use strict";
|
|
2251
|
+
init_cjs_shims();
|
|
2252
|
+
buildConsoleUrl = (urlString) => {
|
|
2253
|
+
const url = new URL(urlString);
|
|
2254
|
+
return `${url.protocol}//app.${url.host.split("api.").at(-1)}`;
|
|
2255
|
+
};
|
|
2190
2256
|
}
|
|
2191
|
-
|
|
2192
|
-
|
|
2193
|
-
|
|
2194
|
-
|
|
2195
|
-
|
|
2196
|
-
|
|
2197
|
-
|
|
2198
|
-
|
|
2257
|
+
});
|
|
2258
|
+
|
|
2259
|
+
// src/evals/reporter.ts
|
|
2260
|
+
var AxiomReporter;
|
|
2261
|
+
var init_reporter = __esm({
|
|
2262
|
+
"src/evals/reporter.ts"() {
|
|
2263
|
+
"use strict";
|
|
2264
|
+
init_cjs_shims();
|
|
2265
|
+
init_storage();
|
|
2266
|
+
init_reporter_console_utils();
|
|
2267
|
+
init_resolver();
|
|
2268
|
+
init_eval_command();
|
|
2269
|
+
init_dot_path();
|
|
2270
|
+
AxiomReporter = class {
|
|
2271
|
+
constructor() {
|
|
2272
|
+
__publicField(this, "startTime", 0);
|
|
2273
|
+
__publicField(this, "start", 0);
|
|
2274
|
+
__publicField(this, "_endOfRunConfigEnd");
|
|
2275
|
+
__publicField(this, "_suiteData", []);
|
|
2276
|
+
__publicField(this, "_printedFlagOverrides", false);
|
|
2277
|
+
__publicField(this, "_config");
|
|
2278
|
+
}
|
|
2279
|
+
onTestRunStart() {
|
|
2280
|
+
this.start = performance.now();
|
|
2281
|
+
this.startTime = (/* @__PURE__ */ new Date()).getTime();
|
|
2282
|
+
const config = getAxiomConfig();
|
|
2283
|
+
if (config) {
|
|
2284
|
+
this._config = resolveAxiomConnection(config, getConsoleUrl());
|
|
2285
|
+
}
|
|
2286
|
+
}
|
|
2287
|
+
async onTestSuiteReady(_testSuite) {
|
|
2288
|
+
const meta = _testSuite.meta();
|
|
2289
|
+
if (_testSuite.state() === "skipped" || !meta?.evaluation) {
|
|
2290
|
+
return;
|
|
2291
|
+
}
|
|
2292
|
+
if (!this._printedFlagOverrides) {
|
|
2293
|
+
const defaultsFromConfigEnd = meta.evaluation.configEnd?.flags ?? {};
|
|
2294
|
+
const overridesFromConfigEnd = meta.evaluation.configEnd?.overrides ?? {};
|
|
2295
|
+
if (Object.keys(overridesFromConfigEnd).length > 0) {
|
|
2296
|
+
printGlobalFlagOverrides(overridesFromConfigEnd, defaultsFromConfigEnd);
|
|
2297
|
+
}
|
|
2298
|
+
this._printedFlagOverrides = true;
|
|
2299
|
+
}
|
|
2300
|
+
if (meta.evaluation.configEnd && !this._endOfRunConfigEnd) {
|
|
2301
|
+
this._endOfRunConfigEnd = meta.evaluation.configEnd;
|
|
2302
|
+
}
|
|
2303
|
+
}
|
|
2304
|
+
onTestCaseReady(test) {
|
|
2305
|
+
const meta = test.meta();
|
|
2306
|
+
if (!meta.case) return;
|
|
2307
|
+
}
|
|
2308
|
+
async onTestSuiteResult(testSuite) {
|
|
2309
|
+
const meta = testSuite.meta();
|
|
2310
|
+
if (testSuite.state() === "skipped" || !meta?.evaluation) {
|
|
2311
|
+
return;
|
|
2312
|
+
}
|
|
2313
|
+
if (meta.evaluation.configEnd && !this._endOfRunConfigEnd) {
|
|
2314
|
+
this._endOfRunConfigEnd = meta.evaluation.configEnd;
|
|
2315
|
+
}
|
|
2316
|
+
const durationSeconds = Number((performance.now() - this.start) / 1e3).toFixed(2);
|
|
2317
|
+
const cases = [];
|
|
2318
|
+
for (const test of testSuite.children) {
|
|
2319
|
+
if (test.type !== "test") continue;
|
|
2320
|
+
const testMeta = test.meta();
|
|
2321
|
+
if (!testMeta?.case) continue;
|
|
2322
|
+
cases.push({
|
|
2323
|
+
index: testMeta.case.index,
|
|
2324
|
+
scores: testMeta.case.scores,
|
|
2325
|
+
outOfScopeFlags: testMeta.case.outOfScopeFlags,
|
|
2326
|
+
errors: testMeta.case.errors,
|
|
2327
|
+
runtimeFlags: testMeta.case.runtimeFlags
|
|
2328
|
+
});
|
|
2329
|
+
}
|
|
2330
|
+
const cwd = process.cwd();
|
|
2331
|
+
const relativePath = testSuite.module.moduleId.replace(cwd, "").replace(/^\//, "");
|
|
2332
|
+
let suiteBaseline = meta.evaluation.baseline;
|
|
2333
|
+
let flagConfig = meta.evaluation.flagConfig;
|
|
2334
|
+
if (meta.evaluation.configEnd) {
|
|
2335
|
+
const defaults = meta.evaluation.configEnd.flags ?? {};
|
|
2336
|
+
const overrides2 = meta.evaluation.configEnd.overrides ?? {};
|
|
2337
|
+
const defaultsFlat = flattenObject(defaults);
|
|
2338
|
+
const overridesFlat = flattenObject(overrides2);
|
|
2339
|
+
flagConfig = dotNotationToNested({ ...defaultsFlat, ...overridesFlat });
|
|
2340
|
+
}
|
|
2341
|
+
const defaultFlagConfig = meta.evaluation.configEnd?.flags;
|
|
2342
|
+
this._suiteData.push({
|
|
2343
|
+
version: meta.evaluation.version,
|
|
2344
|
+
name: meta.evaluation.name,
|
|
2345
|
+
file: relativePath,
|
|
2346
|
+
duration: durationSeconds + "s",
|
|
2347
|
+
baseline: suiteBaseline || null,
|
|
2348
|
+
configFlags: meta.evaluation.configFlags,
|
|
2349
|
+
flagConfig,
|
|
2350
|
+
defaultFlagConfig,
|
|
2351
|
+
runId: meta.evaluation.runId,
|
|
2352
|
+
orgId: meta.evaluation.orgId,
|
|
2353
|
+
cases,
|
|
2354
|
+
outOfScopeFlags: meta.evaluation.outOfScopeFlags,
|
|
2355
|
+
registrationStatus: meta.evaluation.registrationStatus
|
|
2356
|
+
});
|
|
2357
|
+
printEvalNameAndFileName(testSuite, meta);
|
|
2358
|
+
printBaselineNameAndVersion(meta);
|
|
2359
|
+
printTestCaseCountStartDuration(testSuite, this.startTime, durationSeconds);
|
|
2360
|
+
const matchedBaselineIndices = /* @__PURE__ */ new Set();
|
|
2361
|
+
const baselineCasesByFingerprint = /* @__PURE__ */ new Map();
|
|
2362
|
+
if (suiteBaseline) {
|
|
2363
|
+
for (const c of suiteBaseline.cases) {
|
|
2364
|
+
const fp = getCaseFingerprint(c.input, c.expected);
|
|
2365
|
+
const cases2 = baselineCasesByFingerprint.get(fp) || [];
|
|
2366
|
+
cases2.push(c);
|
|
2367
|
+
baselineCasesByFingerprint.set(fp, cases2);
|
|
2368
|
+
}
|
|
2369
|
+
}
|
|
2370
|
+
for (const test of testSuite.children) {
|
|
2371
|
+
if (test.type !== "test") continue;
|
|
2372
|
+
printCaseResult(test, baselineCasesByFingerprint, matchedBaselineIndices);
|
|
2373
|
+
}
|
|
2374
|
+
if (suiteBaseline) {
|
|
2375
|
+
printOrphanedBaselineCases(suiteBaseline, matchedBaselineIndices);
|
|
2376
|
+
}
|
|
2377
|
+
console.log("");
|
|
2378
|
+
}
|
|
2379
|
+
async onTestRunEnd(_testModules, _errors, _reason) {
|
|
2380
|
+
const shouldClear = !process.env.CI && process.stdout.isTTY !== false;
|
|
2381
|
+
if (shouldClear) {
|
|
2382
|
+
process.stdout.write("\x1B[2J\x1B[0f");
|
|
2383
|
+
}
|
|
2384
|
+
const registrationStatus = this._suiteData.map((suite) => ({
|
|
2385
|
+
name: suite.name,
|
|
2386
|
+
registered: suite.registrationStatus?.status === "success",
|
|
2387
|
+
error: suite.registrationStatus?.status === "failed" ? suite.registrationStatus.error : void 0
|
|
2388
|
+
}));
|
|
2389
|
+
const isDebug = process.env.AXIOM_DEBUG === "true";
|
|
2390
|
+
printFinalReport({
|
|
2391
|
+
suiteData: this._suiteData,
|
|
2392
|
+
config: this._config,
|
|
2393
|
+
registrationStatus,
|
|
2394
|
+
isDebug
|
|
2395
|
+
});
|
|
2396
|
+
}
|
|
2397
|
+
};
|
|
2199
2398
|
}
|
|
2200
|
-
|
|
2201
|
-
|
|
2399
|
+
});
|
|
2400
|
+
|
|
2401
|
+
// src/evals/instrument.ts
|
|
2402
|
+
async function resolveInstrumentationHook(config) {
|
|
2403
|
+
if (config.eval.instrumentation) {
|
|
2404
|
+
return config.eval.instrumentation;
|
|
2202
2405
|
}
|
|
2203
|
-
|
|
2204
|
-
|
|
2205
|
-
|
|
2206
|
-
|
|
2207
|
-
|
|
2406
|
+
try {
|
|
2407
|
+
const { config: loadedConfig } = await loadConfig(process.cwd());
|
|
2408
|
+
return loadedConfig.eval.instrumentation ?? null;
|
|
2409
|
+
} catch (error) {
|
|
2410
|
+
throw new AxiomCLIError(
|
|
2411
|
+
`Failed to reload instrumentation from config: ${errorToString(error)}`
|
|
2412
|
+
);
|
|
2208
2413
|
}
|
|
2209
|
-
return { success: errors.length === 0, errors };
|
|
2210
2414
|
}
|
|
2211
|
-
function
|
|
2212
|
-
|
|
2213
|
-
|
|
2214
|
-
|
|
2215
|
-
|
|
2216
|
-
} else {
|
|
2217
|
-
console.error(formatZodErrors(error.zodError));
|
|
2218
|
-
const examples = generateFlagExamples(error.zodError);
|
|
2219
|
-
if (examples.length > 0) {
|
|
2220
|
-
console.error("\n\u{1F4A1} Valid examples:");
|
|
2221
|
-
examples.forEach((example) => console.error(` ${example}`));
|
|
2222
|
-
}
|
|
2223
|
-
}
|
|
2415
|
+
async function runInstrumentationHook(hook, options) {
|
|
2416
|
+
try {
|
|
2417
|
+
return await hook(options);
|
|
2418
|
+
} catch (error) {
|
|
2419
|
+
throw new AxiomCLIError(`Failed to execute instrumentation hook: ${errorToString(error)}`);
|
|
2224
2420
|
}
|
|
2225
|
-
process.exit(1);
|
|
2226
2421
|
}
|
|
2227
|
-
function
|
|
2228
|
-
const
|
|
2229
|
-
|
|
2230
|
-
|
|
2422
|
+
function setupEvalProvider(connection) {
|
|
2423
|
+
const headers = {
|
|
2424
|
+
"X-Axiom-Dataset": connection.dataset,
|
|
2425
|
+
...connection.orgId ? { "X-AXIOM-ORG-ID": connection.orgId } : {}
|
|
2426
|
+
};
|
|
2427
|
+
if (connection.token) {
|
|
2428
|
+
headers.Authorization = `Bearer ${connection.token}`;
|
|
2231
2429
|
}
|
|
2430
|
+
const collectorOptions = {
|
|
2431
|
+
url: `${connection.url}/v1/traces`,
|
|
2432
|
+
headers,
|
|
2433
|
+
concurrencyLimit: 10
|
|
2434
|
+
};
|
|
2435
|
+
const exporter = new import_exporter_trace_otlp_http.OTLPTraceExporter(collectorOptions);
|
|
2436
|
+
const processor = new import_sdk_trace_node.BatchSpanProcessor(exporter, {
|
|
2437
|
+
maxQueueSize: 2048,
|
|
2438
|
+
maxExportBatchSize: 512,
|
|
2439
|
+
scheduledDelayMillis: 5e3,
|
|
2440
|
+
exportTimeoutMillis: 3e4
|
|
2441
|
+
});
|
|
2442
|
+
axiomProvider = new import_sdk_trace_node.NodeTracerProvider({
|
|
2443
|
+
resource: (0, import_resources.resourceFromAttributes)({
|
|
2444
|
+
["service.name"]: "axiom",
|
|
2445
|
+
["service.version"]: "0.35.0"
|
|
2446
|
+
}),
|
|
2447
|
+
spanProcessors: [processor]
|
|
2448
|
+
});
|
|
2449
|
+
axiomTracer = axiomProvider.getTracer("axiom", "0.35.0");
|
|
2232
2450
|
}
|
|
2233
|
-
function
|
|
2234
|
-
if (
|
|
2235
|
-
|
|
2236
|
-
const num = Number(raw);
|
|
2237
|
-
if (!Number.isNaN(num) && raw.trim() === num.toString()) {
|
|
2238
|
-
return num;
|
|
2451
|
+
async function initInstrumentation(config) {
|
|
2452
|
+
if (initialized) {
|
|
2453
|
+
return;
|
|
2239
2454
|
}
|
|
2240
|
-
|
|
2241
|
-
|
|
2242
|
-
|
|
2243
|
-
return raw;
|
|
2455
|
+
if (initializationPromise) {
|
|
2456
|
+
await initializationPromise;
|
|
2457
|
+
return;
|
|
2244
2458
|
}
|
|
2245
|
-
|
|
2246
|
-
|
|
2247
|
-
|
|
2248
|
-
|
|
2249
|
-
|
|
2250
|
-
const parsed = JSON.parse(contents);
|
|
2251
|
-
if (typeof parsed !== "object" || Array.isArray(parsed) || parsed === null) {
|
|
2252
|
-
console.error(
|
|
2253
|
-
`\u274C Flags config must be a JSON object, got ${Array.isArray(parsed) ? "array" : typeof parsed}`
|
|
2254
|
-
);
|
|
2255
|
-
process.exit(1);
|
|
2459
|
+
initializationPromise = (async () => {
|
|
2460
|
+
if (!config.enabled) {
|
|
2461
|
+
axiomTracer = import_api10.trace.getTracer("axiom", "0.35.0");
|
|
2462
|
+
initialized = true;
|
|
2463
|
+
return;
|
|
2256
2464
|
}
|
|
2257
|
-
|
|
2258
|
-
|
|
2259
|
-
|
|
2260
|
-
|
|
2465
|
+
const connection = resolveAxiomConnection(config.config);
|
|
2466
|
+
const hook = await resolveInstrumentationHook(config.config);
|
|
2467
|
+
let hookResult = void 0;
|
|
2468
|
+
if (hook) {
|
|
2469
|
+
config.config.eval.instrumentation = hook;
|
|
2470
|
+
hookResult = await runInstrumentationHook(hook, {
|
|
2471
|
+
dataset: connection.dataset,
|
|
2472
|
+
token: connection.token,
|
|
2473
|
+
url: connection.url,
|
|
2474
|
+
orgId: connection.orgId
|
|
2475
|
+
});
|
|
2476
|
+
userProvider = hookResult?.provider ?? userProvider;
|
|
2477
|
+
}
|
|
2478
|
+
setupEvalProvider(connection);
|
|
2479
|
+
if (!hook) {
|
|
2480
|
+
axiomProvider?.register();
|
|
2481
|
+
if (axiomTracer) {
|
|
2482
|
+
initAxiomAI({ tracer: axiomTracer });
|
|
2483
|
+
}
|
|
2484
|
+
}
|
|
2485
|
+
initialized = true;
|
|
2486
|
+
})();
|
|
2487
|
+
try {
|
|
2488
|
+
await initializationPromise;
|
|
2489
|
+
} finally {
|
|
2490
|
+
initializationPromise = null;
|
|
2261
2491
|
}
|
|
2262
2492
|
}
|
|
2263
|
-
|
|
2264
|
-
|
|
2265
|
-
|
|
2266
|
-
|
|
2267
|
-
|
|
2268
|
-
|
|
2269
|
-
|
|
2270
|
-
|
|
2271
|
-
|
|
2272
|
-
|
|
2273
|
-
|
|
2274
|
-
|
|
2275
|
-
|
|
2276
|
-
|
|
2277
|
-
|
|
2493
|
+
var import_sdk_trace_node, import_resources, import_exporter_trace_otlp_http, import_api10, axiomProvider, axiomTracer, userProvider, initializationPromise, initialized, flush;
|
|
2494
|
+
var init_instrument = __esm({
|
|
2495
|
+
"src/evals/instrument.ts"() {
|
|
2496
|
+
"use strict";
|
|
2497
|
+
init_cjs_shims();
|
|
2498
|
+
import_sdk_trace_node = require("@opentelemetry/sdk-trace-node");
|
|
2499
|
+
import_resources = require("@opentelemetry/resources");
|
|
2500
|
+
import_exporter_trace_otlp_http = require("@opentelemetry/exporter-trace-otlp-http");
|
|
2501
|
+
import_api10 = require("@opentelemetry/api");
|
|
2502
|
+
init_initAxiomAI();
|
|
2503
|
+
init_resolver();
|
|
2504
|
+
init_errors();
|
|
2505
|
+
init_loader();
|
|
2506
|
+
initializationPromise = null;
|
|
2507
|
+
initialized = false;
|
|
2508
|
+
flush = async () => {
|
|
2509
|
+
if (initializationPromise) {
|
|
2510
|
+
await initializationPromise;
|
|
2278
2511
|
}
|
|
2279
|
-
const
|
|
2280
|
-
|
|
2281
|
-
|
|
2282
|
-
if (!value) {
|
|
2283
|
-
console.error("\u274C --flags-config requires a file path");
|
|
2284
|
-
console.error("\u{1F4A1} Use: --flags-config=path/to/config.json");
|
|
2285
|
-
process.exit(1);
|
|
2512
|
+
const tasks = [];
|
|
2513
|
+
if (axiomProvider) {
|
|
2514
|
+
tasks.push(axiomProvider.forceFlush());
|
|
2286
2515
|
}
|
|
2287
|
-
|
|
2288
|
-
|
|
2289
|
-
|
|
2290
|
-
|
|
2291
|
-
const
|
|
2292
|
-
|
|
2293
|
-
|
|
2294
|
-
|
|
2295
|
-
|
|
2296
|
-
|
|
2297
|
-
|
|
2298
|
-
|
|
2299
|
-
|
|
2300
|
-
|
|
2301
|
-
|
|
2302
|
-
|
|
2303
|
-
|
|
2304
|
-
|
|
2305
|
-
|
|
2306
|
-
|
|
2307
|
-
|
|
2308
|
-
|
|
2309
|
-
return { cleanedArgv: cleanedArgv2, overrides: configOverrides };
|
|
2516
|
+
const candidateProviders = /* @__PURE__ */ new Set();
|
|
2517
|
+
if (userProvider) {
|
|
2518
|
+
candidateProviders.add(userProvider);
|
|
2519
|
+
}
|
|
2520
|
+
const globalProvider = import_api10.trace.getTracerProvider();
|
|
2521
|
+
if (globalProvider) {
|
|
2522
|
+
candidateProviders.add(globalProvider);
|
|
2523
|
+
}
|
|
2524
|
+
for (const provider of candidateProviders) {
|
|
2525
|
+
const flushFn = provider.forceFlush;
|
|
2526
|
+
if (typeof flushFn === "function") {
|
|
2527
|
+
tasks.push(
|
|
2528
|
+
flushFn.call(provider).catch((error) => {
|
|
2529
|
+
console.warn("[AxiomAI] Failed to flush tracer provider:", errorToString(error));
|
|
2530
|
+
})
|
|
2531
|
+
);
|
|
2532
|
+
}
|
|
2533
|
+
}
|
|
2534
|
+
if (tasks.length > 0) {
|
|
2535
|
+
await Promise.all(tasks);
|
|
2536
|
+
}
|
|
2537
|
+
};
|
|
2310
2538
|
}
|
|
2311
|
-
|
|
2312
|
-
}
|
|
2539
|
+
});
|
|
2313
2540
|
|
|
2314
|
-
// src/
|
|
2315
|
-
|
|
2316
|
-
|
|
2317
|
-
|
|
2318
|
-
}
|
|
2541
|
+
// src/evals/run-vitest.ts
|
|
2542
|
+
var run_vitest_exports = {};
|
|
2543
|
+
__export(run_vitest_exports, {
|
|
2544
|
+
runVitest: () => runVitest
|
|
2545
|
+
});
|
|
2546
|
+
var import_node_path2, import_node_fs2, import_node_os, import_node_path3, import_vite_tsconfig_paths, import_node, printCollectedEvals, runVitest;
|
|
2547
|
+
var init_run_vitest = __esm({
|
|
2548
|
+
"src/evals/run-vitest.ts"() {
|
|
2549
|
+
"use strict";
|
|
2550
|
+
init_cjs_shims();
|
|
2551
|
+
init_node();
|
|
2552
|
+
import_node_path2 = require("path");
|
|
2553
|
+
import_node_fs2 = require("fs");
|
|
2554
|
+
import_node_os = require("os");
|
|
2555
|
+
import_node_path3 = __toESM(require("path"), 1);
|
|
2556
|
+
import_vite_tsconfig_paths = __toESM(require("vite-tsconfig-paths"), 1);
|
|
2557
|
+
import_node = require("vitest/node");
|
|
2558
|
+
init_reporter();
|
|
2559
|
+
init_instrument();
|
|
2560
|
+
init_storage();
|
|
2561
|
+
printCollectedEvals = (result, rootDir) => {
|
|
2562
|
+
if (!result.testModules || result.testModules.length === 0) {
|
|
2563
|
+
console.log(u.yellow("\nNo evaluations found\n"));
|
|
2564
|
+
return;
|
|
2565
|
+
}
|
|
2566
|
+
console.log(u.bold("\nFound evaluations:\n"));
|
|
2567
|
+
let totalEvals = 0;
|
|
2568
|
+
let totalCases = 0;
|
|
2569
|
+
for (const module2 of result.testModules) {
|
|
2570
|
+
const relativePath = import_node_path3.default.relative(rootDir, module2.moduleId);
|
|
2571
|
+
for (const suite of module2.children.suites()) {
|
|
2572
|
+
totalEvals++;
|
|
2573
|
+
const caseCount = suite.children.size;
|
|
2574
|
+
totalCases += caseCount;
|
|
2575
|
+
console.log(u.green(`\u2713 ${suite.name} (${caseCount} cases)`));
|
|
2576
|
+
console.log(u.dim(` ${relativePath}`));
|
|
2577
|
+
console.log("");
|
|
2578
|
+
}
|
|
2579
|
+
}
|
|
2580
|
+
console.log(u.bold(`Total: ${totalEvals} evaluations, ${totalCases} test cases
|
|
2581
|
+
`));
|
|
2582
|
+
};
|
|
2583
|
+
runVitest = async (dir, opts) => {
|
|
2584
|
+
setAxiomConfig(opts.config);
|
|
2585
|
+
await initInstrumentation({
|
|
2586
|
+
enabled: !opts.debug && !opts.list,
|
|
2587
|
+
config: opts.config
|
|
2588
|
+
});
|
|
2589
|
+
const providedConfig = {
|
|
2590
|
+
...opts.config,
|
|
2591
|
+
eval: {
|
|
2592
|
+
...opts.config.eval,
|
|
2593
|
+
// These can't be serialized, so we need to remove them
|
|
2594
|
+
instrumentation: null,
|
|
2595
|
+
flagSchema: null
|
|
2596
|
+
}
|
|
2597
|
+
};
|
|
2598
|
+
if (opts.debug) {
|
|
2599
|
+
console.log(u.bgWhite(u.blackBright(" Debug mode enabled ")));
|
|
2600
|
+
}
|
|
2601
|
+
const tmpDir = (0, import_node_path2.join)((0, import_node_os.tmpdir)(), "axiom-eval", opts.runId);
|
|
2602
|
+
(0, import_node_fs2.mkdirSync)(tmpDir, { recursive: true });
|
|
2603
|
+
const nameRegistryFile = (0, import_node_path2.join)(tmpDir, "names.jsonl");
|
|
2604
|
+
const abortFile = (0, import_node_path2.join)(tmpDir, "abort.txt");
|
|
2605
|
+
(0, import_node_fs2.writeFileSync)(nameRegistryFile, "", "utf8");
|
|
2606
|
+
if ((0, import_node_fs2.existsSync)(abortFile)) {
|
|
2607
|
+
(0, import_node_fs2.unlinkSync)(abortFile);
|
|
2608
|
+
}
|
|
2609
|
+
process.env.AXIOM_NAME_REGISTRY_FILE = nameRegistryFile;
|
|
2610
|
+
process.env.AXIOM_ABORT_FILE = abortFile;
|
|
2611
|
+
if (opts.list) {
|
|
2612
|
+
console.log(u.bgWhite(u.blackBright(" List mode ")));
|
|
2613
|
+
}
|
|
2614
|
+
const vi = await (0, import_node.createVitest)(
|
|
2615
|
+
"test",
|
|
2616
|
+
{
|
|
2617
|
+
root: dir ? dir : process.cwd(),
|
|
2618
|
+
mode: "test",
|
|
2619
|
+
include: opts.include,
|
|
2620
|
+
exclude: opts.exclude,
|
|
2621
|
+
testNamePattern: opts.testNamePattern,
|
|
2622
|
+
reporters: ["verbose", new AxiomReporter()],
|
|
2623
|
+
environment: "node",
|
|
2624
|
+
browser: void 0,
|
|
2625
|
+
watch: opts.watch,
|
|
2626
|
+
setupFiles: [],
|
|
2627
|
+
// ignore user vitest.config.ts etc
|
|
2628
|
+
name: "axiom:eval",
|
|
2629
|
+
printConsoleTrace: true,
|
|
2630
|
+
silent: false,
|
|
2631
|
+
disableConsoleIntercept: true,
|
|
2632
|
+
testTimeout: opts.config?.eval?.timeoutMs || 6e4,
|
|
2633
|
+
globals: true,
|
|
2634
|
+
runner: (0, import_node_path2.resolve)(__dirname, "evals", "custom-runner.js"),
|
|
2635
|
+
provide: {
|
|
2636
|
+
baseline: opts.baseline,
|
|
2637
|
+
debug: opts.debug,
|
|
2638
|
+
list: opts.list,
|
|
2639
|
+
overrides: opts.overrides,
|
|
2640
|
+
axiomConfig: providedConfig,
|
|
2641
|
+
runId: opts.runId,
|
|
2642
|
+
consoleUrl: opts.consoleUrl
|
|
2643
|
+
}
|
|
2644
|
+
},
|
|
2645
|
+
{
|
|
2646
|
+
plugins: [(0, import_vite_tsconfig_paths.default)({ root: dir || process.cwd() })]
|
|
2647
|
+
}
|
|
2648
|
+
);
|
|
2649
|
+
if (opts.list) {
|
|
2650
|
+
const result = await vi.collect();
|
|
2651
|
+
printCollectedEvals(result, dir || process.cwd());
|
|
2652
|
+
await vi.close();
|
|
2653
|
+
process.exit(0);
|
|
2654
|
+
}
|
|
2655
|
+
await vi.start();
|
|
2656
|
+
if ((0, import_node_fs2.existsSync)(abortFile)) {
|
|
2657
|
+
const message = (0, import_node_fs2.readFileSync)(abortFile, "utf8");
|
|
2658
|
+
console.error("\n" + message);
|
|
2659
|
+
await vi.close();
|
|
2660
|
+
process.exit(1);
|
|
2661
|
+
}
|
|
2662
|
+
const dispose = (0, import_node.registerConsoleShortcuts)(vi, process.stdin, process.stdout);
|
|
2663
|
+
if (!vi.shouldKeepServer()) {
|
|
2664
|
+
dispose();
|
|
2665
|
+
await flush();
|
|
2666
|
+
await vi.close();
|
|
2667
|
+
process.exit(0);
|
|
2668
|
+
}
|
|
2669
|
+
await flush();
|
|
2670
|
+
};
|
|
2671
|
+
}
|
|
2672
|
+
});
|
|
2319
2673
|
|
|
2320
2674
|
// src/cli/commands/eval.command.ts
|
|
2321
|
-
var createRunId = (0, import_nanoid.customAlphabet)("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ", 10);
|
|
2322
|
-
var consoleUrl;
|
|
2323
2675
|
function getConsoleUrl() {
|
|
2324
2676
|
return consoleUrl;
|
|
2325
2677
|
}
|
|
@@ -2344,93 +2696,136 @@ function getDefaultOrgId(value) {
|
|
|
2344
2696
|
const authContext2 = getAuthContext();
|
|
2345
2697
|
return authContext2?.orgId ?? process.env.AXIOM_ORG_ID;
|
|
2346
2698
|
}
|
|
2347
|
-
var
|
|
2348
|
-
|
|
2349
|
-
|
|
2350
|
-
|
|
2351
|
-
|
|
2352
|
-
|
|
2353
|
-
|
|
2354
|
-
|
|
2355
|
-
|
|
2356
|
-
|
|
2357
|
-
|
|
2358
|
-
|
|
2359
|
-
|
|
2360
|
-
|
|
2361
|
-
|
|
2362
|
-
|
|
2363
|
-
|
|
2364
|
-
|
|
2365
|
-
|
|
2366
|
-
|
|
2367
|
-
|
|
2368
|
-
|
|
2369
|
-
|
|
2370
|
-
|
|
2371
|
-
...options.dataset && { dataset: options.dataset },
|
|
2372
|
-
...options.orgId && { orgId: options.orgId }
|
|
2373
|
-
}
|
|
2374
|
-
};
|
|
2375
|
-
if (isGlobPattern) {
|
|
2376
|
-
include = [target];
|
|
2377
|
-
} else {
|
|
2699
|
+
var import_commander, import_nanoid, import_node_fs3, createRunId, consoleUrl, loadEvalCommand;
|
|
2700
|
+
var init_eval_command = __esm({
|
|
2701
|
+
"src/cli/commands/eval.command.ts"() {
|
|
2702
|
+
"use strict";
|
|
2703
|
+
init_cjs_shims();
|
|
2704
|
+
import_commander = require("commander");
|
|
2705
|
+
import_nanoid = require("nanoid");
|
|
2706
|
+
import_node_fs3 = require("fs");
|
|
2707
|
+
init_eval_context_runner();
|
|
2708
|
+
init_parse_flag_overrides();
|
|
2709
|
+
init_glob_utils();
|
|
2710
|
+
init_loader();
|
|
2711
|
+
init_errors();
|
|
2712
|
+
init_global_auth();
|
|
2713
|
+
init_node();
|
|
2714
|
+
createRunId = (0, import_nanoid.customAlphabet)("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ", 10);
|
|
2715
|
+
loadEvalCommand = (program2, flagOverrides = {}) => {
|
|
2716
|
+
return program2.addCommand(
|
|
2717
|
+
new import_commander.Command("eval").description("run evals locally").addArgument(
|
|
2718
|
+
new import_commander.Argument("[target]", "file, directory, glob pattern, or eval name").default(
|
|
2719
|
+
".",
|
|
2720
|
+
"any *.eval.ts file in current directory"
|
|
2721
|
+
)
|
|
2722
|
+
).option("-w, --watch true", "keep server running and watch for changes", false).option("-t, --token <TOKEN>", "axiom token", getDefaultToken).option("-d, --dataset <DATASET>", "axiom dataset name", process.env.AXIOM_DATASET).option("-u, --url <AXIOM URL>", "axiom url", getDefaultUrl).option("-o, --org-id <ORG ID>", "axiom organization id", getDefaultOrgId).option("-b, --baseline <BASELINE ID>", "id of baseline evaluation to compare against").option("--debug", "run locally without any network operations", false).option("--list", "list evaluations and test cases without running them", false).addOption(new import_commander.Option("-c, --console-url <URL>", "console url override").hideHelp()).action(async (target, options) => {
|
|
2378
2723
|
try {
|
|
2379
|
-
|
|
2380
|
-
|
|
2381
|
-
|
|
2382
|
-
|
|
2724
|
+
if (options.debug) {
|
|
2725
|
+
process.env.AXIOM_DEBUG = "true";
|
|
2726
|
+
}
|
|
2727
|
+
let include = [];
|
|
2728
|
+
let exclude;
|
|
2729
|
+
let testNamePattern;
|
|
2730
|
+
const isGlobPattern = isGlob(target);
|
|
2731
|
+
const { config: loadedConfig } = await loadConfig(".");
|
|
2732
|
+
validateFlagOverrides(flagOverrides, loadedConfig.eval.flagSchema);
|
|
2733
|
+
const config = {
|
|
2734
|
+
...loadedConfig,
|
|
2735
|
+
eval: {
|
|
2736
|
+
...loadedConfig.eval,
|
|
2737
|
+
...options.token && { token: options.token },
|
|
2738
|
+
...options.url && { url: options.url },
|
|
2739
|
+
...options.dataset && { dataset: options.dataset },
|
|
2740
|
+
...options.orgId && { orgId: options.orgId }
|
|
2741
|
+
}
|
|
2742
|
+
};
|
|
2743
|
+
if (isGlobPattern) {
|
|
2383
2744
|
include = [target];
|
|
2745
|
+
} else {
|
|
2746
|
+
try {
|
|
2747
|
+
const stat = (0, import_node_fs3.lstatSync)(target);
|
|
2748
|
+
if (stat.isDirectory()) {
|
|
2749
|
+
include = config?.eval?.include || [];
|
|
2750
|
+
} else {
|
|
2751
|
+
include = [target];
|
|
2752
|
+
}
|
|
2753
|
+
} catch {
|
|
2754
|
+
testNamePattern = new RegExp(target, "i");
|
|
2755
|
+
include = config?.eval?.include || [];
|
|
2756
|
+
}
|
|
2384
2757
|
}
|
|
2385
|
-
|
|
2386
|
-
|
|
2387
|
-
|
|
2388
|
-
|
|
2389
|
-
|
|
2390
|
-
|
|
2391
|
-
|
|
2392
|
-
|
|
2393
|
-
|
|
2394
|
-
|
|
2395
|
-
|
|
2396
|
-
|
|
2397
|
-
|
|
2398
|
-
|
|
2399
|
-
|
|
2400
|
-
|
|
2401
|
-
|
|
2402
|
-
|
|
2403
|
-
|
|
2404
|
-
|
|
2405
|
-
|
|
2406
|
-
|
|
2407
|
-
|
|
2408
|
-
|
|
2409
|
-
|
|
2410
|
-
|
|
2411
|
-
|
|
2412
|
-
|
|
2413
|
-
|
|
2414
|
-
|
|
2415
|
-
|
|
2416
|
-
|
|
2417
|
-
|
|
2418
|
-
|
|
2758
|
+
exclude = config?.eval?.exclude;
|
|
2759
|
+
if (!config?.eval?.instrumentation) {
|
|
2760
|
+
console.warn(
|
|
2761
|
+
u.yellow(
|
|
2762
|
+
"\u26A0 App instrumentation (`eval.instrumentation` in `axiom.config.ts`) not configured. Using default provider."
|
|
2763
|
+
)
|
|
2764
|
+
);
|
|
2765
|
+
console.log("");
|
|
2766
|
+
}
|
|
2767
|
+
const runId = createRunId();
|
|
2768
|
+
consoleUrl = options.consoleUrl;
|
|
2769
|
+
let runVitestModule;
|
|
2770
|
+
try {
|
|
2771
|
+
runVitestModule = await Promise.resolve().then(() => (init_run_vitest(), run_vitest_exports));
|
|
2772
|
+
} catch (err) {
|
|
2773
|
+
if (err && typeof err === "object" && "code" in err && (err.code === "ERR_MODULE_NOT_FOUND" || err.code === "MODULE_NOT_FOUND")) {
|
|
2774
|
+
throw new AxiomCLIError("Failed to load vitest.");
|
|
2775
|
+
}
|
|
2776
|
+
throw err;
|
|
2777
|
+
}
|
|
2778
|
+
const { runVitest: runVitest2 } = runVitestModule;
|
|
2779
|
+
await runEvalWithContext(flagOverrides, async () => {
|
|
2780
|
+
return runVitest2(".", {
|
|
2781
|
+
watch: options.watch,
|
|
2782
|
+
baseline: options.baseline,
|
|
2783
|
+
include,
|
|
2784
|
+
exclude,
|
|
2785
|
+
testNamePattern,
|
|
2786
|
+
debug: options.debug,
|
|
2787
|
+
list: options.list,
|
|
2788
|
+
overrides: flagOverrides,
|
|
2789
|
+
config,
|
|
2790
|
+
runId,
|
|
2791
|
+
consoleUrl: options.consoleUrl
|
|
2792
|
+
});
|
|
2793
|
+
});
|
|
2794
|
+
} catch (error) {
|
|
2795
|
+
if (error instanceof AxiomCLIError) {
|
|
2796
|
+
console.error(`
|
|
2419
2797
|
\u274C ${error.message}
|
|
2420
2798
|
`);
|
|
2421
|
-
|
|
2422
|
-
|
|
2423
|
-
|
|
2424
|
-
|
|
2425
|
-
|
|
2426
|
-
|
|
2427
|
-
};
|
|
2799
|
+
process.exit(1);
|
|
2800
|
+
}
|
|
2801
|
+
throw error;
|
|
2802
|
+
}
|
|
2803
|
+
})
|
|
2804
|
+
);
|
|
2805
|
+
};
|
|
2806
|
+
}
|
|
2807
|
+
});
|
|
2808
|
+
|
|
2809
|
+
// src/bin.ts
|
|
2810
|
+
var bin_exports = {};
|
|
2811
|
+
__export(bin_exports, {
|
|
2812
|
+
program: () => program
|
|
2813
|
+
});
|
|
2814
|
+
module.exports = __toCommonJS(bin_exports);
|
|
2815
|
+
init_cjs_shims();
|
|
2816
|
+
var import_commander3 = require("commander");
|
|
2817
|
+
init_eval_command();
|
|
2428
2818
|
|
|
2429
2819
|
// src/cli/commands/auth.command.ts
|
|
2430
2820
|
init_cjs_shims();
|
|
2431
2821
|
|
|
2432
2822
|
// src/cli/commands/auth-login.command.ts
|
|
2433
2823
|
init_cjs_shims();
|
|
2824
|
+
init_oauth();
|
|
2825
|
+
init_callback_server();
|
|
2826
|
+
init_config();
|
|
2827
|
+
init_api();
|
|
2828
|
+
init_errors();
|
|
2434
2829
|
var BASE_HOSTNAME = "axiom.co";
|
|
2435
2830
|
var getApiUrl = (hostname) => {
|
|
2436
2831
|
return `https://api.${hostname}`;
|
|
@@ -2583,6 +2978,8 @@ function loadAuthLoginCommand(auth, root) {
|
|
|
2583
2978
|
|
|
2584
2979
|
// src/cli/commands/auth-logout.command.ts
|
|
2585
2980
|
init_cjs_shims();
|
|
2981
|
+
init_config();
|
|
2982
|
+
init_errors();
|
|
2586
2983
|
async function logoutCommand(alias) {
|
|
2587
2984
|
const config = await loadGlobalConfig();
|
|
2588
2985
|
const profileToRemove = alias || config.active_profile;
|
|
@@ -2628,6 +3025,9 @@ function loadAuthLogoutCommand(auth, root) {
|
|
|
2628
3025
|
|
|
2629
3026
|
// src/cli/commands/auth-status.command.ts
|
|
2630
3027
|
init_cjs_shims();
|
|
3028
|
+
init_config();
|
|
3029
|
+
init_api();
|
|
3030
|
+
init_errors();
|
|
2631
3031
|
async function statusCommand() {
|
|
2632
3032
|
const config = await loadGlobalConfig();
|
|
2633
3033
|
if (Object.keys(config.profiles).length === 0) {
|
|
@@ -2693,6 +3093,8 @@ function loadAuthStatusCommand(auth, program2) {
|
|
|
2693
3093
|
|
|
2694
3094
|
// src/cli/commands/auth-switch.command.ts
|
|
2695
3095
|
init_cjs_shims();
|
|
3096
|
+
init_config();
|
|
3097
|
+
init_errors();
|
|
2696
3098
|
async function promptSelect2(message, choices) {
|
|
2697
3099
|
console.log(`
|
|
2698
3100
|
${message}`);
|
|
@@ -2788,6 +3190,8 @@ function loadAuthCommand(program2) {
|
|
|
2788
3190
|
}
|
|
2789
3191
|
|
|
2790
3192
|
// src/bin.ts
|
|
3193
|
+
init_parse_flag_overrides();
|
|
3194
|
+
init_global_auth();
|
|
2791
3195
|
var import_env = __toESM(require("@next/env"), 1);
|
|
2792
3196
|
|
|
2793
3197
|
// src/cli/commands/version.command.ts
|
|
@@ -2796,7 +3200,7 @@ var import_commander2 = require("commander");
|
|
|
2796
3200
|
var loadVersionCommand = (program2) => {
|
|
2797
3201
|
return program2.addCommand(
|
|
2798
3202
|
new import_commander2.Command("version").description("cli version").action(() => {
|
|
2799
|
-
console.log("0.
|
|
3203
|
+
console.log("0.35.0");
|
|
2800
3204
|
})
|
|
2801
3205
|
);
|
|
2802
3206
|
};
|
|
@@ -2806,7 +3210,7 @@ var { loadEnvConfig } = import_env.default;
|
|
|
2806
3210
|
loadEnvConfig(process.cwd());
|
|
2807
3211
|
var { cleanedArgv, overrides } = extractOverrides(process.argv.slice(2));
|
|
2808
3212
|
var program = new import_commander3.Command();
|
|
2809
|
-
program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.
|
|
3213
|
+
program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.35.0");
|
|
2810
3214
|
program.hook("preAction", async (_, actionCommand) => {
|
|
2811
3215
|
const commandName = actionCommand.name();
|
|
2812
3216
|
const parentCommand = actionCommand.parent;
|