axiom 0.34.0 → 0.34.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/bin.cjs CHANGED
@@ -43,137 +43,13 @@ var init_cjs_shims = __esm({
43
43
  }
44
44
  });
45
45
 
46
- // src/bin.ts
47
- var bin_exports = {};
48
- __export(bin_exports, {
49
- program: () => program
50
- });
51
- module.exports = __toCommonJS(bin_exports);
52
- init_cjs_shims();
53
- var import_commander3 = require("commander");
54
-
55
- // src/cli/commands/eval.command.ts
56
- init_cjs_shims();
57
- var import_commander = require("commander");
58
- var import_nanoid = require("nanoid");
59
-
60
- // src/evals/run-vitest.ts
61
- init_cjs_shims();
62
-
63
- // ../../node_modules/.pnpm/tinyrainbow@2.0.0/node_modules/tinyrainbow/dist/node.js
64
- init_cjs_shims();
65
-
66
- // ../../node_modules/.pnpm/tinyrainbow@2.0.0/node_modules/tinyrainbow/dist/chunk-BVHSVHOK.js
67
- init_cjs_shims();
68
- var f = {
69
- reset: [0, 0],
70
- bold: [1, 22, "\x1B[22m\x1B[1m"],
71
- dim: [2, 22, "\x1B[22m\x1B[2m"],
72
- italic: [3, 23],
73
- underline: [4, 24],
74
- inverse: [7, 27],
75
- hidden: [8, 28],
76
- strikethrough: [9, 29],
77
- black: [30, 39],
78
- red: [31, 39],
79
- green: [32, 39],
80
- yellow: [33, 39],
81
- blue: [34, 39],
82
- magenta: [35, 39],
83
- cyan: [36, 39],
84
- white: [37, 39],
85
- gray: [90, 39],
86
- bgBlack: [40, 49],
87
- bgRed: [41, 49],
88
- bgGreen: [42, 49],
89
- bgYellow: [43, 49],
90
- bgBlue: [44, 49],
91
- bgMagenta: [45, 49],
92
- bgCyan: [46, 49],
93
- bgWhite: [47, 49],
94
- blackBright: [90, 39],
95
- redBright: [91, 39],
96
- greenBright: [92, 39],
97
- yellowBright: [93, 39],
98
- blueBright: [94, 39],
99
- magentaBright: [95, 39],
100
- cyanBright: [96, 39],
101
- whiteBright: [97, 39],
102
- bgBlackBright: [100, 49],
103
- bgRedBright: [101, 49],
104
- bgGreenBright: [102, 49],
105
- bgYellowBright: [103, 49],
106
- bgBlueBright: [104, 49],
107
- bgMagentaBright: [105, 49],
108
- bgCyanBright: [106, 49],
109
- bgWhiteBright: [107, 49]
110
- };
111
- var h = Object.entries(f);
112
- function a(n) {
113
- return String(n);
114
- }
115
- a.open = "";
116
- a.close = "";
117
- function C(n = false) {
118
- let e = typeof process != "undefined" ? process : void 0, i = (e == null ? void 0 : e.env) || {}, g = (e == null ? void 0 : e.argv) || [];
119
- return !("NO_COLOR" in i || g.includes("--no-color")) && ("FORCE_COLOR" in i || g.includes("--color") || (e == null ? void 0 : e.platform) === "win32" || n && i.TERM !== "dumb" || "CI" in i) || typeof window != "undefined" && !!window.chrome;
120
- }
121
- function p(n = false) {
122
- let e = C(n), i = (r2, t, c, o) => {
123
- let l = "", s2 = 0;
124
- do
125
- l += r2.substring(s2, o) + c, s2 = o + t.length, o = r2.indexOf(t, s2);
126
- while (~o);
127
- return l + r2.substring(s2);
128
- }, g = (r2, t, c = r2) => {
129
- let o = (l) => {
130
- let s2 = String(l), b = s2.indexOf(t, r2.length);
131
- return ~b ? r2 + i(s2, t, c, b) + t : r2 + s2 + t;
132
- };
133
- return o.open = r2, o.close = t, o;
134
- }, u2 = {
135
- isColorSupported: e
136
- }, d = (r2) => `\x1B[${r2}m`;
137
- for (let [r2, t] of h)
138
- u2[r2] = e ? g(
139
- d(t[0]),
140
- d(t[1]),
141
- t[2]
142
- ) : a;
143
- return u2;
144
- }
145
-
146
- // ../../node_modules/.pnpm/tinyrainbow@2.0.0/node_modules/tinyrainbow/dist/node.js
147
- var import_tty = require("tty");
148
- var r = process.env.FORCE_TTY !== void 0 || (0, import_tty.isatty)(1);
149
- var u = p(r);
150
-
151
- // src/evals/run-vitest.ts
152
- var import_node_path = require("path");
153
- var import_node_fs = require("fs");
154
- var import_node_os = require("os");
155
- var import_node_path2 = __toESM(require("path"), 1);
156
- var import_vite_tsconfig_paths = __toESM(require("vite-tsconfig-paths"), 1);
157
- var import_node = require("vitest/node");
158
-
159
- // src/evals/reporter.ts
160
- init_cjs_shims();
161
-
162
- // src/evals/context/storage.ts
163
- init_cjs_shims();
164
- var import_api9 = require("@opentelemetry/api");
165
-
166
46
  // src/evals/context/manager.ts
167
- init_cjs_shims();
168
- var import_node_module = require("module");
169
- var CONTEXT_MANAGER_SYMBOL = Symbol.for("axiom.context_manager");
170
47
  function getGlobalContextManager() {
171
48
  return globalThis[CONTEXT_MANAGER_SYMBOL];
172
49
  }
173
50
  function setGlobalContextManager(manager) {
174
51
  globalThis[CONTEXT_MANAGER_SYMBOL] = manager;
175
52
  }
176
- var isNodeJS = typeof process !== "undefined" && !!process.versions?.node;
177
53
  function getContextManager() {
178
54
  const existing = getGlobalContextManager();
179
55
  if (existing) return existing;
@@ -229,27 +105,34 @@ function createAsyncHook(_name) {
229
105
  }
230
106
  };
231
107
  }
232
-
233
- // src/app-scope.ts
234
- init_cjs_shims();
108
+ var import_node_module, CONTEXT_MANAGER_SYMBOL, isNodeJS;
109
+ var init_manager = __esm({
110
+ "src/evals/context/manager.ts"() {
111
+ "use strict";
112
+ init_cjs_shims();
113
+ import_node_module = require("module");
114
+ CONTEXT_MANAGER_SYMBOL = Symbol.for("axiom.context_manager");
115
+ isNodeJS = typeof process !== "undefined" && !!process.versions?.node;
116
+ }
117
+ });
235
118
 
236
119
  // src/evals/context/global-flags.ts
237
- init_cjs_shims();
238
- var GLOBAL_OVERRIDES_SYMBOL = Symbol.for("axiom.global_flag_overrides");
239
120
  function setRoot(val) {
240
121
  globalThis[GLOBAL_OVERRIDES_SYMBOL] = val;
241
122
  }
242
123
  function setGlobalFlagOverrides(overrides2) {
243
124
  setRoot(overrides2);
244
125
  }
245
-
246
- // src/validate-flags.ts
247
- init_cjs_shims();
248
- var import_zod4 = require("zod");
126
+ var GLOBAL_OVERRIDES_SYMBOL;
127
+ var init_global_flags = __esm({
128
+ "src/evals/context/global-flags.ts"() {
129
+ "use strict";
130
+ init_cjs_shims();
131
+ GLOBAL_OVERRIDES_SYMBOL = Symbol.for("axiom.global_flag_overrides");
132
+ }
133
+ });
249
134
 
250
135
  // src/cli/utils/format-zod-errors.ts
251
- init_cjs_shims();
252
- var import_zod = require("zod");
253
136
  function formatZodErrors(error) {
254
137
  const issues = error.issues;
255
138
  const messages = [];
@@ -345,13 +228,16 @@ function generateExampleForIssue(issue, path3) {
345
228
  }
346
229
  return null;
347
230
  }
348
-
349
- // src/util/dot-path.ts
350
- init_cjs_shims();
351
- var import_zod2 = require("zod");
231
+ var import_zod;
232
+ var init_format_zod_errors = __esm({
233
+ "src/cli/utils/format-zod-errors.ts"() {
234
+ "use strict";
235
+ init_cjs_shims();
236
+ import_zod = require("zod");
237
+ }
238
+ });
352
239
 
353
240
  // src/util/zod-internals.ts
354
- init_cjs_shims();
355
241
  function isZodV4Schema(schema) {
356
242
  if (!schema || typeof schema !== "object") return false;
357
243
  const s2 = schema;
@@ -381,20 +267,6 @@ function getDefRawType(def) {
381
267
  if (raw == null) return void 0;
382
268
  return typeof raw === "string" ? raw : String(raw);
383
269
  }
384
- var KNOWN_KINDS = /* @__PURE__ */ new Set([
385
- "object",
386
- "optional",
387
- "default",
388
- "nullable",
389
- "readonly",
390
- "prefault",
391
- "nonoptional",
392
- "catch",
393
- "array",
394
- "record",
395
- "union",
396
- "discriminatedunion"
397
- ]);
398
270
  function getKind(schemaOrDef) {
399
271
  const def = schemaOrDef && typeof schemaOrDef === "object" && "type" in schemaOrDef ? schemaOrDef : getDef(schemaOrDef);
400
272
  const raw = getDefRawType(def);
@@ -429,16 +301,6 @@ function getDefaultValue(schema) {
429
301
  const def = getDef(schema);
430
302
  return def?.defaultValue;
431
303
  }
432
- var TRANSPARENT_WRAPPERS = [
433
- "optional",
434
- "nullable",
435
- "default",
436
- "readonly",
437
- "prefault",
438
- "nonoptional",
439
- "catch"
440
- // transparent for schema structure, but alters error behavior
441
- ];
442
304
  function unwrapTransparent(schema) {
443
305
  let current = schema;
444
306
  for (let i = 0; i < 10; i++) {
@@ -454,6 +316,37 @@ function unwrapTransparent(schema) {
454
316
  }
455
317
  return current;
456
318
  }
319
+ var KNOWN_KINDS, TRANSPARENT_WRAPPERS;
320
+ var init_zod_internals = __esm({
321
+ "src/util/zod-internals.ts"() {
322
+ "use strict";
323
+ init_cjs_shims();
324
+ KNOWN_KINDS = /* @__PURE__ */ new Set([
325
+ "object",
326
+ "optional",
327
+ "default",
328
+ "nullable",
329
+ "readonly",
330
+ "prefault",
331
+ "nonoptional",
332
+ "catch",
333
+ "array",
334
+ "record",
335
+ "union",
336
+ "discriminatedunion"
337
+ ]);
338
+ TRANSPARENT_WRAPPERS = [
339
+ "optional",
340
+ "nullable",
341
+ "default",
342
+ "readonly",
343
+ "prefault",
344
+ "nonoptional",
345
+ "catch"
346
+ // transparent for schema structure, but alters error behavior
347
+ ];
348
+ }
349
+ });
457
350
 
458
351
  // src/util/dot-path.ts
459
352
  function parsePath(path3) {
@@ -509,10 +402,17 @@ function isValidPath(schema, segments) {
509
402
  }
510
403
  return true;
511
404
  }
405
+ var import_zod2;
406
+ var init_dot_path = __esm({
407
+ "src/util/dot-path.ts"() {
408
+ "use strict";
409
+ init_cjs_shims();
410
+ import_zod2 = require("zod");
411
+ init_zod_internals();
412
+ }
413
+ });
512
414
 
513
415
  // src/util/deep-partial-schema.ts
514
- init_cjs_shims();
515
- var import_zod3 = require("zod");
516
416
  function makeDeepPartial(schema) {
517
417
  const shape = schema.shape;
518
418
  const newShape = {};
@@ -562,180 +462,224 @@ function makeDeepPartialField(fieldSchema) {
562
462
  }
563
463
  return fieldSchema.optional();
564
464
  }
465
+ var import_zod3;
466
+ var init_deep_partial_schema = __esm({
467
+ "src/util/deep-partial-schema.ts"() {
468
+ "use strict";
469
+ init_cjs_shims();
470
+ import_zod3 = require("zod");
471
+ init_zod_internals();
472
+ }
473
+ });
565
474
 
566
- // src/app-scope.ts
567
- var import_api8 = require("@opentelemetry/api");
568
- var import_zod5 = require("zod");
475
+ // src/validate-flags.ts
476
+ var import_zod4;
477
+ var init_validate_flags = __esm({
478
+ "src/validate-flags.ts"() {
479
+ "use strict";
480
+ init_cjs_shims();
481
+ import_zod4 = require("zod");
482
+ init_global_flags();
483
+ init_format_zod_errors();
484
+ init_dot_path();
485
+ init_deep_partial_schema();
486
+ init_zod_internals();
487
+ }
488
+ });
569
489
 
570
490
  // src/otel/utils/to-otel-attribute.ts
571
- init_cjs_shims();
572
- var import_api = require("@opentelemetry/api");
573
-
574
- // src/otel/index.ts
575
- init_cjs_shims();
576
-
577
- // src/otel/withSpan.ts
578
- init_cjs_shims();
579
- var import_api6 = require("@opentelemetry/api");
491
+ var import_api;
492
+ var init_to_otel_attribute = __esm({
493
+ "src/otel/utils/to-otel-attribute.ts"() {
494
+ "use strict";
495
+ init_cjs_shims();
496
+ import_api = require("@opentelemetry/api");
497
+ }
498
+ });
580
499
 
581
500
  // src/otel/withSpanBaggageKey.ts
582
- init_cjs_shims();
583
-
584
- // src/otel/utils/wrapperUtils.ts
585
- init_cjs_shims();
586
- var import_api5 = require("@opentelemetry/api");
587
-
588
- // src/otel/semconv/attributes.ts
589
- init_cjs_shims();
590
- var import_semantic_conventions = require("@opentelemetry/semantic-conventions");
501
+ var init_withSpanBaggageKey = __esm({
502
+ "src/otel/withSpanBaggageKey.ts"() {
503
+ "use strict";
504
+ init_cjs_shims();
505
+ }
506
+ });
591
507
 
592
508
  // src/otel/semconv/eval_proposal.ts
593
- init_cjs_shims();
509
+ var init_eval_proposal = __esm({
510
+ "src/otel/semconv/eval_proposal.ts"() {
511
+ "use strict";
512
+ init_cjs_shims();
513
+ }
514
+ });
594
515
 
595
516
  // src/otel/semconv/attributes.ts
596
- var import_incubating = require("@opentelemetry/semantic-conventions/incubating");
517
+ var import_semantic_conventions, import_incubating;
518
+ var init_attributes = __esm({
519
+ "src/otel/semconv/attributes.ts"() {
520
+ "use strict";
521
+ init_cjs_shims();
522
+ import_semantic_conventions = require("@opentelemetry/semantic-conventions");
523
+ init_eval_proposal();
524
+ import_incubating = require("@opentelemetry/semantic-conventions/incubating");
525
+ }
526
+ });
597
527
 
598
528
  // src/otel/startActiveSpan.ts
599
- init_cjs_shims();
600
- var import_api2 = require("@opentelemetry/api");
601
-
602
- // src/otel/initAxiomAI.ts
603
- init_cjs_shims();
604
- var import_api4 = require("@opentelemetry/api");
529
+ var import_api2;
530
+ var init_startActiveSpan = __esm({
531
+ "src/otel/startActiveSpan.ts"() {
532
+ "use strict";
533
+ init_cjs_shims();
534
+ import_api2 = require("@opentelemetry/api");
535
+ }
536
+ });
605
537
 
606
538
  // package.json
607
- var package_default = {
608
- name: "axiom",
609
- version: "0.34.0",
610
- type: "module",
611
- author: "Axiom, Inc.",
612
- contributors: [
613
- "Islam Shehata <islam@axiom.co>",
614
- "Chris Ehrlich <chris@axiom.co>",
615
- "Gabriel de Andrade <gabriel@axiom.co>"
616
- ],
617
- scripts: {
618
- dev: "tsup --watch",
619
- build: "tsup && chmod +x dist/bin.js",
620
- format: "prettier --write .",
621
- "format:check": "prettier --check .",
622
- lint: "eslint './**/*.{js,ts}'",
623
- typecheck: "tsc --noEmit",
624
- test: "vitest run",
625
- "test:watch": "vitest --watch",
626
- publint: "npx publint"
627
- },
628
- types: "./dist/index.d.ts",
629
- main: "./dist/index.cjs",
630
- module: "./dist/index.js",
631
- bin: {
632
- axiom: "./dist/bin.js"
633
- },
634
- exports: {
635
- "./ai": {
636
- import: {
637
- types: "./dist/index.d.ts",
638
- default: "./dist/index.js"
539
+ var package_default;
540
+ var init_package = __esm({
541
+ "package.json"() {
542
+ package_default = {
543
+ name: "axiom",
544
+ version: "0.34.2",
545
+ type: "module",
546
+ author: "Axiom, Inc.",
547
+ contributors: [
548
+ "Islam Shehata <islam@axiom.co>",
549
+ "Chris Ehrlich <chris@axiom.co>",
550
+ "Gabriel de Andrade <gabriel@axiom.co>"
551
+ ],
552
+ scripts: {
553
+ dev: "tsup --watch",
554
+ build: "tsup && chmod +x dist/bin.js",
555
+ format: "prettier --write .",
556
+ "format:check": "prettier --check .",
557
+ lint: "eslint './**/*.{js,ts}'",
558
+ typecheck: "tsc --noEmit",
559
+ test: "vitest run",
560
+ "test:watch": "vitest --watch",
561
+ publint: "npx publint"
639
562
  },
640
- require: {
641
- types: "./dist/index.d.cts",
642
- default: "./dist/index.cjs"
643
- }
644
- },
645
- "./ai/evals": {
646
- import: {
647
- types: "./dist/evals.d.ts",
648
- default: "./dist/evals.js"
563
+ types: "./dist/index.d.ts",
564
+ main: "./dist/index.cjs",
565
+ module: "./dist/index.js",
566
+ bin: {
567
+ axiom: "./dist/bin.js"
649
568
  },
650
- require: {
651
- types: "./dist/evals.d.cts",
652
- default: "./dist/evals.cjs"
653
- }
654
- },
655
- "./ai/config": {
656
- import: {
657
- types: "./dist/config.d.ts",
658
- default: "./dist/config.js"
569
+ exports: {
570
+ "./ai": {
571
+ import: {
572
+ types: "./dist/index.d.ts",
573
+ default: "./dist/index.js"
574
+ },
575
+ require: {
576
+ types: "./dist/index.d.cts",
577
+ default: "./dist/index.cjs"
578
+ }
579
+ },
580
+ "./ai/evals": {
581
+ import: {
582
+ types: "./dist/evals.d.ts",
583
+ default: "./dist/evals.js"
584
+ },
585
+ require: {
586
+ types: "./dist/evals.d.cts",
587
+ default: "./dist/evals.cjs"
588
+ }
589
+ },
590
+ "./ai/config": {
591
+ import: {
592
+ types: "./dist/config.d.ts",
593
+ default: "./dist/config.js"
594
+ },
595
+ require: {
596
+ types: "./dist/config.d.cts",
597
+ default: "./dist/config.cjs"
598
+ }
599
+ }
659
600
  },
660
- require: {
661
- types: "./dist/config.d.cts",
662
- default: "./dist/config.cjs"
663
- }
664
- }
665
- },
666
- keywords: [
667
- "axiom",
668
- "logging",
669
- "ai",
670
- "otel",
671
- "opentelemetry"
672
- ],
673
- repository: {
674
- type: "git",
675
- url: "git+https://github.com/axiomhq/ai.git",
676
- directory: "packages/ai"
677
- },
678
- license: "MIT",
679
- dependencies: {
680
- "@next/env": "^15.4.2",
681
- "@opentelemetry/auto-instrumentations-node": "^0.60.1",
682
- "@opentelemetry/context-async-hooks": "^2.0.1",
683
- "@opentelemetry/exporter-trace-otlp-http": "^0.202.0",
684
- "@opentelemetry/resources": "^2.0.1",
685
- "@opentelemetry/sdk-trace-node": "^2.0.1",
686
- "@opentelemetry/semantic-conventions": "^1.37.0",
687
- "@sinclair/typebox": "^0.34.37",
688
- c12: "^2.0.4",
689
- commander: "^14.0.0",
690
- defu: "^6.1.4",
691
- handlebars: "^4.7.8",
692
- nanoid: "^5.1.5",
693
- open: "^10.1.0",
694
- "vite-tsconfig-paths": "^5.1.4"
695
- },
696
- peerDependencies: {
697
- "@opentelemetry/api": "^1.9.0",
698
- zod: "^3.25.0 || ^4.0.0"
699
- },
700
- devDependencies: {
701
- "@ai-sdk/anthropicv1": "npm:@ai-sdk/anthropic@^1.2.12",
702
- "@ai-sdk/anthropicv2": "npm:@ai-sdk/anthropic@^2.0.44",
703
- "@ai-sdk/openaiv1": "npm:@ai-sdk/openai@^1.3.24",
704
- "@ai-sdk/openaiv2": "npm:@ai-sdk/openai@^2.0.67",
705
- "@ai-sdk/providerv1": "npm:@ai-sdk/provider@^1.1.3",
706
- "@ai-sdk/providerv2": "npm:@ai-sdk/provider@^2.0.0",
707
- "@opentelemetry/api": "^1.9.0",
708
- "@opentelemetry/core": "^2.0.1",
709
- "@opentelemetry/sdk-trace-base": "^2.0.1",
710
- "@opentelemetry/sdk-trace-node": "^2.0.1",
711
- "@repo/eslint-config": "workspace:*",
712
- "@types/node": "^22.15.29",
713
- "@vitest/coverage-v8": "^4.0.0",
714
- aiv4: "npm:ai@^4.3.19",
715
- aiv5: "npm:ai@^5.0.93",
716
- esbuild: "^0.25.8",
717
- eslint: "catalog:",
718
- msw: "^2.12.2",
719
- prettier: "catalog:",
720
- tinyrainbow: "^2.0.0",
721
- tsup: "catalog:",
722
- typescript: "catalog:",
723
- vitest: "catalog:",
724
- zod: "catalog:"
725
- },
726
- files: [
727
- "dist"
728
- ],
729
- packageManager: "pnpm@10.16.1"
730
- };
601
+ keywords: [
602
+ "axiom",
603
+ "logging",
604
+ "ai",
605
+ "otel",
606
+ "opentelemetry"
607
+ ],
608
+ repository: {
609
+ type: "git",
610
+ url: "git+https://github.com/axiomhq/ai.git",
611
+ directory: "packages/ai"
612
+ },
613
+ license: "MIT",
614
+ dependencies: {
615
+ "@next/env": "^15.4.2",
616
+ "@opentelemetry/auto-instrumentations-node": "^0.60.1",
617
+ "@opentelemetry/context-async-hooks": "^2.0.1",
618
+ "@opentelemetry/exporter-trace-otlp-http": "^0.202.0",
619
+ "@opentelemetry/resources": "^2.0.1",
620
+ "@opentelemetry/sdk-trace-node": "^2.0.1",
621
+ "@opentelemetry/semantic-conventions": "^1.37.0",
622
+ "@sinclair/typebox": "^0.34.37",
623
+ c12: "^2.0.4",
624
+ commander: "^14.0.0",
625
+ defu: "^6.1.4",
626
+ handlebars: "^4.7.8",
627
+ nanoid: "^5.1.5",
628
+ open: "^10.1.0",
629
+ "vite-tsconfig-paths": "^5.1.4",
630
+ vitest: "^4.0.0"
631
+ },
632
+ peerDependencies: {
633
+ "@opentelemetry/api": "^1.9.0",
634
+ zod: "^3.25.0 || ^4.0.0"
635
+ },
636
+ devDependencies: {
637
+ "@ai-sdk/anthropicv1": "npm:@ai-sdk/anthropic@^1.2.12",
638
+ "@ai-sdk/anthropicv2": "npm:@ai-sdk/anthropic@^2.0.44",
639
+ "@ai-sdk/openaiv1": "npm:@ai-sdk/openai@^1.3.24",
640
+ "@ai-sdk/openaiv2": "npm:@ai-sdk/openai@^2.0.67",
641
+ "@ai-sdk/providerv1": "npm:@ai-sdk/provider@^1.1.3",
642
+ "@ai-sdk/providerv2": "npm:@ai-sdk/provider@^2.0.0",
643
+ "@opentelemetry/api": "^1.9.0",
644
+ "@opentelemetry/core": "^2.0.1",
645
+ "@opentelemetry/sdk-trace-base": "^2.0.1",
646
+ "@opentelemetry/sdk-trace-node": "^2.0.1",
647
+ "@repo/eslint-config": "workspace:*",
648
+ "@types/node": "^22.15.29",
649
+ "@vitest/coverage-v8": "^4.0.0",
650
+ aiv4: "npm:ai@^4.3.19",
651
+ aiv5: "npm:ai@^5.0.93",
652
+ esbuild: "^0.25.8",
653
+ eslint: "catalog:",
654
+ msw: "^2.12.2",
655
+ prettier: "catalog:",
656
+ tinyrainbow: "^2.0.0",
657
+ tsup: "catalog:",
658
+ typescript: "catalog:",
659
+ vitest: "catalog:",
660
+ zod: "catalog:"
661
+ },
662
+ files: [
663
+ "dist"
664
+ ],
665
+ packageManager: "pnpm@10.16.1"
666
+ };
667
+ }
668
+ });
731
669
 
732
670
  // src/otel/utils/redaction.ts
733
- init_cjs_shims();
734
- var import_api3 = require("@opentelemetry/api");
735
- var AXIOM_AI_REDACTION_KEY = Symbol.for("__axiom_ai_redaction__");
671
+ var import_api3, AXIOM_AI_REDACTION_KEY;
672
+ var init_redaction = __esm({
673
+ "src/otel/utils/redaction.ts"() {
674
+ "use strict";
675
+ init_cjs_shims();
676
+ import_api3 = require("@opentelemetry/api");
677
+ init_withSpanBaggageKey();
678
+ AXIOM_AI_REDACTION_KEY = Symbol.for("__axiom_ai_redaction__");
679
+ }
680
+ });
736
681
 
737
682
  // src/otel/initAxiomAI.ts
738
- var AXIOM_AI_SCOPE_KEY = Symbol.for("__axiom_ai_scope__");
739
683
  function extractTracerScope(tracer) {
740
684
  const tracerAny = tracer;
741
685
  const name = tracerAny._instrumentationScope?.name || tracerAny.instrumentationLibrary?.name || package_default.name;
@@ -758,51 +702,185 @@ function initAxiomAI(config) {
758
702
  globalThis[AXIOM_AI_REDACTION_KEY] = config.redactionPolicy;
759
703
  }
760
704
  }
705
+ var import_api4, AXIOM_AI_SCOPE_KEY;
706
+ var init_initAxiomAI = __esm({
707
+ "src/otel/initAxiomAI.ts"() {
708
+ "use strict";
709
+ init_cjs_shims();
710
+ import_api4 = require("@opentelemetry/api");
711
+ init_package();
712
+ init_redaction();
713
+ AXIOM_AI_SCOPE_KEY = Symbol.for("__axiom_ai_scope__");
714
+ }
715
+ });
761
716
 
762
- // src/util/name-validation-runtime.ts
763
- init_cjs_shims();
717
+ // src/otel/utils/wrapperUtils.ts
718
+ var import_api5;
719
+ var init_wrapperUtils = __esm({
720
+ "src/otel/utils/wrapperUtils.ts"() {
721
+ "use strict";
722
+ init_cjs_shims();
723
+ import_api5 = require("@opentelemetry/api");
724
+ init_attributes();
725
+ init_withSpanBaggageKey();
726
+ init_startActiveSpan();
727
+ init_initAxiomAI();
728
+ }
729
+ });
764
730
 
765
- // src/otel/vercel.ts
766
- init_cjs_shims();
731
+ // src/util/name-validation-runtime.ts
732
+ var init_name_validation_runtime = __esm({
733
+ "src/util/name-validation-runtime.ts"() {
734
+ "use strict";
735
+ init_cjs_shims();
736
+ }
737
+ });
767
738
 
768
- // src/otel/AxiomWrappedLanguageModelV1.ts
769
- init_cjs_shims();
739
+ // src/otel/withSpan.ts
740
+ var import_api6;
741
+ var init_withSpan = __esm({
742
+ "src/otel/withSpan.ts"() {
743
+ "use strict";
744
+ init_cjs_shims();
745
+ import_api6 = require("@opentelemetry/api");
746
+ init_withSpanBaggageKey();
747
+ init_wrapperUtils();
748
+ init_name_validation_runtime();
749
+ }
750
+ });
770
751
 
771
- // src/otel/middleware.ts
772
- init_cjs_shims();
773
- var import_api7 = require("@opentelemetry/api");
752
+ // src/otel/utils/contentSanitizer.ts
753
+ var init_contentSanitizer = __esm({
754
+ "src/otel/utils/contentSanitizer.ts"() {
755
+ "use strict";
756
+ init_cjs_shims();
757
+ }
758
+ });
774
759
 
775
760
  // src/otel/completionUtils.ts
776
- init_cjs_shims();
777
-
778
- // src/otel/utils/contentSanitizer.ts
779
- init_cjs_shims();
761
+ var init_completionUtils = __esm({
762
+ "src/otel/completionUtils.ts"() {
763
+ "use strict";
764
+ init_cjs_shims();
765
+ init_contentSanitizer();
766
+ }
767
+ });
780
768
 
781
769
  // src/util/promptUtils.ts
782
- init_cjs_shims();
770
+ var init_promptUtils = __esm({
771
+ "src/util/promptUtils.ts"() {
772
+ "use strict";
773
+ init_cjs_shims();
774
+ }
775
+ });
783
776
 
784
777
  // src/otel/utils/normalized.ts
785
- init_cjs_shims();
778
+ var init_normalized = __esm({
779
+ "src/otel/utils/normalized.ts"() {
780
+ "use strict";
781
+ init_cjs_shims();
782
+ }
783
+ });
784
+
785
+ // src/util/currentUnixTime.ts
786
+ var init_currentUnixTime = __esm({
787
+ "src/util/currentUnixTime.ts"() {
788
+ "use strict";
789
+ init_cjs_shims();
790
+ }
791
+ });
786
792
 
787
793
  // src/otel/streaming/aggregators.ts
788
- init_cjs_shims();
794
+ var init_aggregators = __esm({
795
+ "src/otel/streaming/aggregators.ts"() {
796
+ "use strict";
797
+ init_cjs_shims();
798
+ init_currentUnixTime();
799
+ }
800
+ });
789
801
 
790
- // src/util/currentUnixTime.ts
791
- init_cjs_shims();
802
+ // src/otel/middleware.ts
803
+ var import_api7;
804
+ var init_middleware = __esm({
805
+ "src/otel/middleware.ts"() {
806
+ "use strict";
807
+ init_cjs_shims();
808
+ import_api7 = require("@opentelemetry/api");
809
+ init_attributes();
810
+ init_completionUtils();
811
+ init_promptUtils();
812
+ init_contentSanitizer();
813
+ init_wrapperUtils();
814
+ init_normalized();
815
+ init_aggregators();
816
+ init_redaction();
817
+ }
818
+ });
819
+
820
+ // src/otel/AxiomWrappedLanguageModelV1.ts
821
+ var init_AxiomWrappedLanguageModelV1 = __esm({
822
+ "src/otel/AxiomWrappedLanguageModelV1.ts"() {
823
+ "use strict";
824
+ init_cjs_shims();
825
+ init_middleware();
826
+ }
827
+ });
792
828
 
793
829
  // src/otel/AxiomWrappedLanguageModelV2.ts
794
- init_cjs_shims();
830
+ var init_AxiomWrappedLanguageModelV2 = __esm({
831
+ "src/otel/AxiomWrappedLanguageModelV2.ts"() {
832
+ "use strict";
833
+ init_cjs_shims();
834
+ init_middleware();
835
+ }
836
+ });
837
+
838
+ // src/otel/vercel.ts
839
+ var init_vercel = __esm({
840
+ "src/otel/vercel.ts"() {
841
+ "use strict";
842
+ init_cjs_shims();
843
+ init_AxiomWrappedLanguageModelV1();
844
+ init_AxiomWrappedLanguageModelV2();
845
+ }
846
+ });
847
+
848
+ // src/otel/index.ts
849
+ var init_otel = __esm({
850
+ "src/otel/index.ts"() {
851
+ "use strict";
852
+ init_cjs_shims();
853
+ init_withSpan();
854
+ init_vercel();
855
+ init_attributes();
856
+ }
857
+ });
858
+
859
+ // src/app-scope.ts
860
+ var import_api8, import_zod5;
861
+ var init_app_scope = __esm({
862
+ "src/app-scope.ts"() {
863
+ "use strict";
864
+ init_cjs_shims();
865
+ init_global_flags();
866
+ init_storage();
867
+ init_validate_flags();
868
+ init_dot_path();
869
+ init_zod_internals();
870
+ import_api8 = require("@opentelemetry/api");
871
+ import_zod5 = require("zod");
872
+ init_to_otel_attribute();
873
+ init_otel();
874
+ }
875
+ });
795
876
 
796
877
  // src/evals/context/storage.ts
797
- var CONFIG_SCOPE_SYMBOL = Symbol.for("axiom.eval.configScope");
798
- var AXIOM_CONFIG_SYMBOL = Symbol.for("axiom.eval.config");
799
878
  function getAxiomConfig() {
800
879
  return globalThis[AXIOM_CONFIG_SYMBOL];
801
880
  }
802
881
  function setAxiomConfig(config) {
803
882
  globalThis[AXIOM_CONFIG_SYMBOL] = config;
804
883
  }
805
- var EVAL_CONTEXT = createAsyncHook("eval-context");
806
884
  function getEvalContext() {
807
885
  const ctx = EVAL_CONTEXT.get();
808
886
  if (!ctx) {
@@ -842,885 +920,429 @@ function withEvalContext(options = {}, fn) {
842
920
  fn
843
921
  );
844
922
  }
845
-
846
- // src/evals/reporter.console-utils.ts
847
- init_cjs_shims();
848
-
849
- // src/util/deep-equal.ts
850
- init_cjs_shims();
851
- function deepEqual(data, other) {
852
- if (data === other) {
853
- return true;
923
+ var import_api9, CONFIG_SCOPE_SYMBOL, AXIOM_CONFIG_SYMBOL, EVAL_CONTEXT;
924
+ var init_storage = __esm({
925
+ "src/evals/context/storage.ts"() {
926
+ "use strict";
927
+ init_cjs_shims();
928
+ import_api9 = require("@opentelemetry/api");
929
+ init_manager();
930
+ init_app_scope();
931
+ CONFIG_SCOPE_SYMBOL = Symbol.for("axiom.eval.configScope");
932
+ AXIOM_CONFIG_SYMBOL = Symbol.for("axiom.eval.config");
933
+ EVAL_CONTEXT = createAsyncHook("eval-context");
854
934
  }
855
- if (Object.is(data, other)) {
856
- return true;
935
+ });
936
+
937
+ // src/context.ts
938
+ function overrideFlags(partial) {
939
+ const current = getEvalContext();
940
+ if (!current) {
941
+ if (process.env.NODE_ENV !== "test") {
942
+ console.warn("overrideFlags called outside of evaluation context");
943
+ }
944
+ return;
857
945
  }
858
- if (typeof data !== "object" || typeof other !== "object") {
859
- return false;
860
- }
861
- if (data === null || other === null) {
862
- return false;
863
- }
864
- if (Object.getPrototypeOf(data) !== Object.getPrototypeOf(other)) {
865
- return false;
866
- }
867
- if (Array.isArray(data)) {
868
- return isDeepEqualArrays(data, other);
869
- }
870
- if (data instanceof Map) {
871
- return isDeepEqualMaps(data, other);
872
- }
873
- if (data instanceof Set) {
874
- return isDeepEqualSets(data, other);
875
- }
876
- if (data instanceof Date) {
877
- return data.getTime() === other.getTime();
946
+ const overlayContext = {
947
+ ...current,
948
+ flags: { ...current.flags, ...partial },
949
+ // Merge for backwards compatibility
950
+ parent: current,
951
+ overrides: { ...partial }
952
+ };
953
+ const currentCtx = EVAL_CONTEXT.get();
954
+ if (currentCtx) {
955
+ Object.assign(currentCtx, overlayContext);
878
956
  }
879
- if (data instanceof RegExp) {
880
- return data.toString() === other.toString();
957
+ for (const [key, value] of Object.entries(partial)) {
958
+ putOnSpan("flag", key, value);
881
959
  }
882
- if (Object.keys(data).length !== Object.keys(other).length) {
883
- return false;
960
+ }
961
+ var init_context = __esm({
962
+ "src/context.ts"() {
963
+ "use strict";
964
+ init_cjs_shims();
965
+ init_storage();
884
966
  }
885
- for (const [key, value] of Object.entries(data)) {
886
- if (!(key in other)) {
887
- return false;
888
- }
889
- if (!deepEqual(
890
- value,
891
- // @ts-expect-error [ts7053] - We already checked that `other` has `key`
892
- other[key]
893
- )) {
894
- return false;
967
+ });
968
+
969
+ // src/cli/utils/eval-context-runner.ts
970
+ async function runEvalWithContext(overrides2, runFn) {
971
+ setGlobalFlagOverrides(overrides2);
972
+ return withEvalContext({ initialFlags: overrides2 }, async () => {
973
+ if (Object.keys(overrides2).length > 0) {
974
+ overrideFlags(overrides2);
895
975
  }
896
- }
897
- return true;
976
+ return runFn();
977
+ });
898
978
  }
899
- function isDeepEqualArrays(data, other) {
900
- if (data.length !== other.length) {
901
- return false;
979
+ var init_eval_context_runner = __esm({
980
+ "src/cli/utils/eval-context-runner.ts"() {
981
+ "use strict";
982
+ init_cjs_shims();
983
+ init_context();
984
+ init_storage();
985
+ init_global_flags();
902
986
  }
903
- for (const [index, item] of data.entries()) {
904
- if (!deepEqual(item, other[index])) {
905
- return false;
987
+ });
988
+
989
+ // src/cli/utils/parse-flag-overrides.ts
990
+ function ensureNoSpaceSeparatedSyntax(flagName, value, nextToken, flagType) {
991
+ if (value === void 0 && nextToken !== void 0) {
992
+ if (flagType === "flag" && !nextToken.startsWith("-") && nextToken !== "true" && nextToken !== "false") {
993
+ console.error(`\u274C Invalid syntax: --flag.${flagName} ${nextToken}`);
994
+ console.error(`\u{1F4A1} Use: --flag.${flagName}=${nextToken}`);
995
+ process.exit(1);
996
+ } else if (flagType === "config" && !nextToken.startsWith("-")) {
997
+ console.error(`\u274C Invalid syntax: --flags-config ${nextToken}`);
998
+ console.error(`\u{1F4A1} Use: --flags-config=${nextToken}`);
999
+ process.exit(1);
906
1000
  }
907
1001
  }
908
- return true;
909
1002
  }
910
- function isDeepEqualMaps(data, other) {
911
- if (data.size !== other.size) {
912
- return false;
1003
+ function collectFlagValidationErrors(overrides2, flagSchema) {
1004
+ if (!flagSchema || Object.keys(overrides2).length === 0) {
1005
+ return { success: true, errors: [] };
913
1006
  }
914
- for (const [key, value] of data.entries()) {
915
- if (!other.has(key)) {
916
- return false;
917
- }
918
- if (!deepEqual(value, other.get(key))) {
919
- return false;
1007
+ assertZodV4(flagSchema, "flagSchema");
1008
+ const schema = flagSchema;
1009
+ const errors = [];
1010
+ for (const dotPath of Object.keys(overrides2)) {
1011
+ const segments = parsePath(dotPath);
1012
+ if (!isValidPath(schema, segments)) {
1013
+ errors.push({ type: "invalid_path", path: dotPath });
920
1014
  }
921
1015
  }
922
- return true;
1016
+ if (errors.length > 0) {
1017
+ return { success: false, errors };
1018
+ }
1019
+ const nestedObject = dotNotationToNested(overrides2);
1020
+ const deepPartialSchema = makeDeepPartial(schema);
1021
+ const result = deepPartialSchema.safeParse(nestedObject);
1022
+ if (!result.success) {
1023
+ errors.push({ type: "invalid_value", zodError: result.error });
1024
+ }
1025
+ return { success: errors.length === 0, errors };
923
1026
  }
924
- function isDeepEqualSets(data, other) {
925
- if (data.size !== other.size) {
926
- return false;
927
- }
928
- const otherCopy = [...other];
929
- for (const dataItem of data) {
930
- let isFound = false;
931
- for (const [index, otherItem] of otherCopy.entries()) {
932
- if (deepEqual(dataItem, otherItem)) {
933
- isFound = true;
934
- otherCopy.splice(index, 1);
935
- break;
1027
+ function printFlagValidationErrorsAndExit(errors) {
1028
+ console.error("\u274C Invalid CLI flags:");
1029
+ for (const error of errors) {
1030
+ if (error.type === "invalid_path") {
1031
+ console.error(` \u2022 flag '${error.path}': Invalid flag path`);
1032
+ } else {
1033
+ console.error(formatZodErrors(error.zodError));
1034
+ const examples = generateFlagExamples(error.zodError);
1035
+ if (examples.length > 0) {
1036
+ console.error("\n\u{1F4A1} Valid examples:");
1037
+ examples.forEach((example) => console.error(` ${example}`));
936
1038
  }
937
1039
  }
938
- if (!isFound) {
939
- return false;
940
- }
941
1040
  }
942
- return true;
1041
+ process.exit(1);
943
1042
  }
944
-
945
- // src/evals/reporter.console-utils.ts
946
- function formatPercentage(value) {
947
- if (!Number.isFinite(value)) {
948
- return "N/A";
1043
+ function validateFlagOverrides(overrides2, flagSchema) {
1044
+ const result = collectFlagValidationErrors(overrides2, flagSchema);
1045
+ if (!result.success) {
1046
+ printFlagValidationErrorsAndExit(result.errors);
949
1047
  }
950
- return Number(value * 100).toFixed(2) + "%";
951
1048
  }
952
- function formatDiff(current, baseline) {
953
- if (!Number.isFinite(current) || !Number.isFinite(baseline)) {
954
- return { text: "N/A", color: u.dim };
1049
+ function coerceValue(raw) {
1050
+ if (raw === "true") return true;
1051
+ if (raw === "false") return false;
1052
+ const num = Number(raw);
1053
+ if (!Number.isNaN(num) && raw.trim() === num.toString()) {
1054
+ return num;
955
1055
  }
956
- const diff = current - baseline;
957
- const diffText = (diff >= 0 ? "+" : "") + formatPercentage(diff);
958
- const color = diff > 0 ? u.green : diff < 0 ? u.red : u.dim;
959
- return { text: diffText, color };
960
- }
961
- function truncate(str, max) {
962
- return str.length > max ? str.slice(0, max) + "\u2026" : str;
963
- }
964
- function stringify(value) {
965
1056
  try {
966
- if (typeof value === "string") return value;
967
- return JSON.stringify(value);
1057
+ return JSON.parse(raw);
968
1058
  } catch {
969
- return String(value);
1059
+ return raw;
970
1060
  }
971
1061
  }
972
- function getCaseFingerprint(input, expected) {
973
- const inputStr = typeof input === "string" ? input : JSON.stringify(input);
974
- const expectedStr = typeof expected === "string" ? expected : JSON.stringify(expected);
975
- return JSON.stringify({ input: inputStr, expected: expectedStr });
976
- }
977
- function printEvalNameAndFileName(testSuite, meta, logger = console.log) {
978
- const cwd = process.cwd();
979
- logger(
980
- " ",
981
- u.bgCyan(u.black(` ${testSuite.project.name} `)),
982
- u.bgBlue(u.black(` ${meta.evaluation.name}-${meta.evaluation.version} `)),
983
- u.dim(`(${testSuite.children.size} cases)`)
984
- );
985
- logger(" ", u.dim(testSuite.module.moduleId.replace(cwd, "")));
986
- }
987
- function printBaselineNameAndVersion(testMeta, logger = console.log) {
988
- if (testMeta.evaluation.baseline) {
989
- logger(
990
- " ",
991
- " baseline ",
992
- u.bgMagenta(
993
- u.black(` ${testMeta.evaluation.baseline.name}-${testMeta.evaluation.baseline.version} `)
994
- )
995
- );
996
- } else {
997
- logger(" ", u.bgWhite(u.blackBright(" baseline: ")), "none");
1062
+ function loadConfigFile(path3) {
1063
+ const abs = (0, import_node_path.resolve)(process.cwd(), path3);
1064
+ try {
1065
+ const contents = (0, import_node_fs.readFileSync)(abs, "utf8");
1066
+ const parsed = JSON.parse(contents);
1067
+ if (typeof parsed !== "object" || Array.isArray(parsed) || parsed === null) {
1068
+ console.error(
1069
+ `\u274C Flags config must be a JSON object, got ${Array.isArray(parsed) ? "array" : typeof parsed}`
1070
+ );
1071
+ process.exit(1);
1072
+ }
1073
+ return parsed;
1074
+ } catch (err) {
1075
+ console.error(`\u274C Could not read or parse flags config "${path3}": ${err.message}`);
1076
+ process.exit(1);
998
1077
  }
999
- logger("");
1000
1078
  }
1001
- function printRuntimeFlags(testMeta, logger = console.log) {
1002
- if (testMeta.case.runtimeFlags && Object.keys(testMeta.case.runtimeFlags).length > 0) {
1003
- const entries = Object.entries(testMeta.case.runtimeFlags);
1004
- logger(" ", u.dim("runtime flags"));
1005
- for (const [k, v] of entries) {
1006
- switch (v.kind) {
1007
- case "replaced": {
1008
- const valText = truncate(stringify(v.value), 80);
1009
- const defText = truncate(stringify(v.default), 80);
1010
- logger(" ", `${k}: ${valText} (default: ${defText})`);
1011
- break;
1012
- }
1013
- case "introduced": {
1014
- const valText = truncate(stringify(v.value), 80);
1015
- logger(" ", `${k}: ${valText} (no default)`);
1016
- break;
1017
- }
1079
+ function extractOverrides(argv) {
1080
+ const cleanedArgv2 = [];
1081
+ const overrides2 = {};
1082
+ let configPath = null;
1083
+ let hasCliFlags = false;
1084
+ let configPathCount = 0;
1085
+ for (let i = 0; i < argv.length; i++) {
1086
+ const token = argv[i];
1087
+ const configMatch = token.match(CONFIG_RE);
1088
+ const flagMatch = token.match(FLAG_RE);
1089
+ if (configMatch) {
1090
+ configPathCount++;
1091
+ if (configPathCount > 1) {
1092
+ console.error("\u274C Only one --flags-config can be supplied.");
1093
+ process.exit(1);
1094
+ }
1095
+ const value = configMatch[1];
1096
+ const nextToken = argv.length > i + 1 ? argv[i + 1] : void 0;
1097
+ ensureNoSpaceSeparatedSyntax("flags-config", value, nextToken, "config");
1098
+ if (!value) {
1099
+ console.error("\u274C --flags-config requires a file path");
1100
+ console.error("\u{1F4A1} Use: --flags-config=path/to/config.json");
1101
+ process.exit(1);
1018
1102
  }
1103
+ configPath = value;
1104
+ } else if (flagMatch) {
1105
+ hasCliFlags = true;
1106
+ const key = flagMatch[1];
1107
+ const value = flagMatch[2];
1108
+ const nextToken = argv.length > i + 1 ? argv[i + 1] : void 0;
1109
+ ensureNoSpaceSeparatedSyntax(key, value, nextToken, "flag");
1110
+ const finalValue = value === void 0 ? "true" : value;
1111
+ overrides2[key] = coerceValue(finalValue);
1112
+ } else {
1113
+ cleanedArgv2.push(token);
1019
1114
  }
1020
1115
  }
1116
+ if (configPath && hasCliFlags) {
1117
+ console.error("\u274C Cannot use both --flags-config and --flag.* arguments together.");
1118
+ console.error("Choose one approach:");
1119
+ console.error(" \u2022 Config file: --flags-config=my-flags.json");
1120
+ console.error(" \u2022 CLI flags: --flag.temperature=0.9 --flag.model=gpt-4o");
1121
+ process.exit(1);
1122
+ }
1123
+ if (configPath) {
1124
+ const configOverrides = loadConfigFile(configPath);
1125
+ return { cleanedArgv: cleanedArgv2, overrides: configOverrides };
1126
+ }
1127
+ return { cleanedArgv: cleanedArgv2, overrides: overrides2 };
1021
1128
  }
1022
- function printTestCaseCountStartDuration(testSuite, startTime, duration, logger = console.log) {
1023
- logger(" ");
1024
- logger(" ", u.dim("Cases"), testSuite.children.size);
1025
- logger(" ", u.dim("Start at"), new Date(startTime).toTimeString());
1026
- logger(" ", u.dim("Duration"), `${duration}s`);
1027
- }
1028
- function printTestCaseSuccessOrFailed(testMeta, ok, logger = console.log) {
1029
- const index = testMeta.case.index;
1030
- if (ok) {
1031
- logger(" ", u.yellow(` \u2714 case ${index}:`));
1032
- } else {
1033
- logger(" ", u.red(` \u2716 case ${index}: failed`));
1034
- for (const e of testMeta.case.errors ?? []) {
1035
- logger("", e.message);
1036
- }
1129
+ var import_zod6, import_node_fs, import_node_path, FLAG_RE, CONFIG_RE;
1130
+ var init_parse_flag_overrides = __esm({
1131
+ "src/cli/utils/parse-flag-overrides.ts"() {
1132
+ "use strict";
1133
+ init_cjs_shims();
1134
+ import_zod6 = require("zod");
1135
+ init_format_zod_errors();
1136
+ import_node_fs = require("fs");
1137
+ import_node_path = require("path");
1138
+ init_dot_path();
1139
+ init_deep_partial_schema();
1140
+ init_zod_internals();
1141
+ FLAG_RE = /^--flag\.([^=]+)(?:=(.*))?$/;
1142
+ CONFIG_RE = /^--flags-config(?:=(.*))?$/;
1037
1143
  }
1144
+ });
1145
+
1146
+ // src/cli/utils/glob-utils.ts
1147
+ function isGlob(str) {
1148
+ return /[*?[\]{}!]/.test(str);
1038
1149
  }
1039
- function printTestCaseScores(testMeta, baselineCase, logger = console.log) {
1040
- const scores = testMeta.case.scores;
1041
- const keys = Object.keys(scores);
1042
- if (keys.length === 0) {
1043
- return;
1150
+ var init_glob_utils = __esm({
1151
+ "src/cli/utils/glob-utils.ts"() {
1152
+ "use strict";
1153
+ init_cjs_shims();
1044
1154
  }
1045
- const maxNameLength = Math.max(...keys.map((k) => k.length));
1046
- keys.forEach((k) => {
1047
- const scoreData = scores[k];
1048
- const hasError = scoreData.metadata?.error;
1049
- const v = scoreData.score ? scoreData.score : 0;
1050
- const rawCurrent = hasError ? "N/A" : formatPercentage(v);
1051
- const paddedCurrent = rawCurrent.padStart(7);
1052
- const coloredCurrent = hasError ? u.dim(paddedCurrent) : u.magentaBright(paddedCurrent);
1053
- const paddedName = k.padEnd(maxNameLength);
1054
- if (baselineCase?.scores[k]) {
1055
- const baselineScoreValue = baselineCase.scores[k].value;
1056
- const rawBaseline = formatPercentage(baselineScoreValue);
1057
- const paddedBaseline = rawBaseline.padStart(7);
1058
- const coloredBaseline = u.blueBright(paddedBaseline);
1059
- const { text: diffText, color: diffColor } = formatDiff(v, baselineScoreValue);
1060
- const paddedDiff = diffText.padStart(8);
1061
- logger(
1062
- ` ${paddedName} ${coloredBaseline} \u2192 ${coloredCurrent} ${hasError ? u.dim("(scorer not run)") : u.dim("(") + diffColor(paddedDiff) + u.dim(")")}`
1063
- );
1064
- } else {
1065
- logger(` ${paddedName} ${coloredCurrent} ${hasError ? u.dim("(scorer not run)") : ""}`);
1155
+ });
1156
+
1157
+ // src/util/errors.ts
1158
+ function getCircularReplacer() {
1159
+ const seen = /* @__PURE__ */ new WeakSet();
1160
+ return (_k, v) => {
1161
+ if (typeof v === "object" && v !== null) {
1162
+ if (seen.has(v)) return "[Circular]";
1163
+ seen.add(v);
1066
1164
  }
1067
- });
1165
+ return v;
1166
+ };
1068
1167
  }
1069
- function printOutOfScopeFlags(testMeta, logger = console.log) {
1070
- if (testMeta.case.outOfScopeFlags && testMeta.case.outOfScopeFlags.length > 0) {
1071
- const pickedFlagsText = testMeta.case.pickedFlags ? `(picked: ${testMeta.case.pickedFlags.map((f2) => `'${f2}'`).join(", ")})` : "(none)";
1072
- logger(" ", u.yellow(`\u26A0 Out-of-scope flags: ${pickedFlagsText}`));
1073
- testMeta.case.outOfScopeFlags.forEach((flag) => {
1074
- const timeStr = new Date(flag.accessedAt).toLocaleTimeString();
1075
- logger(" ", `${flag.flagPath} (at ${timeStr})`);
1076
- if (flag.stackTrace && flag.stackTrace.length > 0) {
1077
- flag.stackTrace.forEach((frame, i) => {
1078
- const prefix = i === flag.stackTrace.length - 1 ? " \u2514\u2500" : " \u251C\u2500";
1079
- logger(" ", u.dim(`${prefix} ${frame}`));
1080
- });
1081
- }
1082
- });
1168
+ function safeJson(x) {
1169
+ try {
1170
+ return JSON.stringify(x, getCircularReplacer());
1171
+ } catch {
1172
+ return String(x);
1083
1173
  }
1084
1174
  }
1085
- function printCaseResult(test, baselineCasesByFingerprint, matchedIndices, logger = console.log) {
1086
- const ok = test.ok();
1087
- const testMeta = test.meta();
1088
- if (!testMeta?.case) {
1089
- return;
1090
- }
1091
- printTestCaseSuccessOrFailed(testMeta, ok, logger);
1092
- const fingerprint = getCaseFingerprint(testMeta.case.input, testMeta.case.expected);
1093
- const baselineCases = baselineCasesByFingerprint.get(fingerprint);
1094
- const baselineCase = baselineCases?.shift();
1095
- if (baselineCase) {
1096
- matchedIndices.add(baselineCase.index);
1175
+ function errorToString(err) {
1176
+ try {
1177
+ if (typeof err === "string") return err;
1178
+ if (err instanceof Error) {
1179
+ return err.stack ?? err.message;
1180
+ }
1181
+ if (typeof err === "object" && err !== null) {
1182
+ const msg = err.message;
1183
+ const json = safeJson(err);
1184
+ return msg ? `${msg} (${json})` : json;
1185
+ }
1186
+ return String(err);
1187
+ } catch {
1188
+ return "[unserializable error]";
1097
1189
  }
1098
- printTestCaseScores(testMeta, baselineCase, logger);
1099
- printRuntimeFlags(testMeta, logger);
1100
- printOutOfScopeFlags(testMeta, logger);
1101
1190
  }
1102
- function printOrphanedBaselineCases(baseline, matchedIndices, logger = console.log) {
1103
- const orphanedCases = baseline.cases.filter((c) => !matchedIndices.has(c.index));
1104
- if (orphanedCases.length === 0) {
1105
- return;
1191
+ var AxiomCLIError;
1192
+ var init_errors = __esm({
1193
+ "src/util/errors.ts"() {
1194
+ "use strict";
1195
+ init_cjs_shims();
1196
+ AxiomCLIError = class extends Error {
1197
+ constructor(message) {
1198
+ super(message);
1199
+ this.name = "AxiomCLIError";
1200
+ }
1201
+ };
1106
1202
  }
1107
- logger("");
1108
- logger(" ", u.yellow("Orphaned baseline cases:"));
1109
- for (const orphanedCase of orphanedCases) {
1110
- logger(
1111
- " ",
1112
- u.dim(
1113
- `case ${orphanedCase.index}: ${truncate(orphanedCase.input, 50)} (score: ${truncate(
1114
- JSON.stringify(orphanedCase.scores),
1115
- 50
1116
- )})`
1117
- )
1118
- );
1119
- const keys = Object.keys(orphanedCase.scores);
1120
- if (keys.length > 0) {
1121
- const maxNameLength = Math.max(...keys.map((k) => k.length));
1122
- keys.forEach((k) => {
1123
- const scoreData = orphanedCase.scores[k];
1124
- const rawScore = formatPercentage(scoreData.value);
1125
- const paddedName = k.padEnd(maxNameLength);
1126
- const paddedScore = rawScore.padStart(7);
1127
- logger(` ${paddedName} ${u.blueBright(paddedScore)}`);
1128
- });
1129
- }
1203
+ });
1204
+
1205
+ // src/cli/auth/types.ts
1206
+ var init_types = __esm({
1207
+ "src/cli/auth/types.ts"() {
1208
+ "use strict";
1209
+ init_cjs_shims();
1130
1210
  }
1131
- }
1132
- function printConfigHeader(logger = console.log) {
1133
- logger("");
1134
- logger(" ", u.bgWhite(u.blackBright(" Config ")));
1135
- }
1136
- function printConfigEnd(configEnd, logger = console.log) {
1137
- printConfigHeader(logger);
1138
- maybePrintFlags(configEnd, logger);
1139
- }
1140
- function maybePrintFlags(configEnd, logger = console.log) {
1141
- const defaults = configEnd?.flags ?? {};
1142
- const overrides2 = configEnd?.overrides ?? {};
1143
- const defaultKeys = Object.keys(defaults);
1144
- const overrideKeys = Object.keys(overrides2);
1145
- const allKeys = Array.from(/* @__PURE__ */ new Set([...defaultKeys, ...overrideKeys])).sort();
1146
- if (allKeys.length === 0) {
1147
- return;
1211
+ });
1212
+
1213
+ // src/cli/auth/config.ts
1214
+ function getConfigDir() {
1215
+ const platform = process.platform;
1216
+ const homeDir = import_os.default.homedir();
1217
+ const xdgConfigHome = process.env.XDG_CONFIG_HOME;
1218
+ if (xdgConfigHome) {
1219
+ return import_path.default.join(xdgConfigHome, CONFIG_DIR_NAME);
1148
1220
  }
1149
- for (const key of allKeys) {
1150
- const hasDefault = key in defaults;
1151
- const hasOverride = key in overrides2;
1152
- if (hasDefault && hasOverride) {
1153
- const defVal = defaults[key];
1154
- const ovVal = overrides2[key];
1155
- const changed = !deepEqual(ovVal, defVal);
1156
- const ovText = truncate(stringify(ovVal), 80);
1157
- const defText = truncate(stringify(defVal), 80);
1158
- if (changed) {
1159
- logger(" ", `${key}: ${ovText} ${u.dim(`(overridden by CLI, original: ${defText})`)}`);
1160
- } else {
1161
- logger(" ", `${key}: ${defText}`);
1162
- }
1163
- } else if (hasOverride) {
1164
- const ovText = truncate(stringify(overrides2[key]), 80);
1165
- logger(" ", `${key}: ${ovText} ${u.dim("(added by CLI)")}`);
1166
- } else if (hasDefault) {
1167
- const defText = truncate(stringify(defaults[key]), 80);
1168
- logger(" ", `${key}: ${defText}`);
1221
+ if (platform === "win32") {
1222
+ const appData = process.env.APPDATA;
1223
+ if (appData) {
1224
+ return import_path.default.join(appData, CONFIG_DIR_NAME);
1169
1225
  }
1226
+ return import_path.default.join(homeDir, "AppData", "Roaming", CONFIG_DIR_NAME);
1170
1227
  }
1171
- logger("");
1228
+ return import_path.default.join(homeDir, ".config", CONFIG_DIR_NAME);
1172
1229
  }
1173
- var reporterDate = (d) => {
1174
- const date = d.toISOString().slice(0, 10);
1175
- const hours = d.getUTCHours().toString().padStart(2, "0");
1176
- const minutes = d.getUTCMinutes().toString().padStart(2, "0");
1177
- return `${date}, ${hours}:${minutes} UTC`;
1178
- };
1179
- function printGlobalFlagOverrides(overrides2, defaults, logger = console.log) {
1180
- if (Object.keys(overrides2).length === 0) {
1181
- logger("");
1182
- logger(u.dim("Flag overrides: (none)"));
1183
- logger("");
1184
- return;
1185
- }
1186
- logger("");
1187
- logger("Flag overrides:");
1188
- for (const [key, value] of Object.entries(overrides2)) {
1189
- const defaultValue = defaults[key];
1190
- const valueStr = JSON.stringify(value);
1191
- const defaultStr = defaultValue !== void 0 ? JSON.stringify(defaultValue) : "none";
1192
- logger(` \u2022 ${key}: ${valueStr} ${u.dim(`(default: ${defaultStr})`)}`);
1193
- }
1194
- logger("");
1230
+ function getGlobalConfigPath() {
1231
+ return import_path.default.join(getConfigDir(), CONFIG_FILENAME);
1195
1232
  }
1196
- function printSuiteBox({
1197
- suite,
1198
- scorerAverages,
1199
- calculateBaselineScorerAverage: calculateBaselineScorerAverage2,
1200
- flagDiff,
1201
- logger = console.log
1202
- }) {
1203
- const filename = suite.file.split("/").pop();
1204
- logger("\u250C\u2500");
1205
- logger(`\u2502 ${u.blue(suite.name)} ${u.gray(`(${filename})`)}`);
1206
- logger("\u251C\u2500");
1207
- const scorerNames = Object.keys(scorerAverages);
1208
- const maxNameLength = Math.max(...scorerNames.map((name) => name.length));
1209
- const allCasesErrored = (scorerName) => {
1210
- return suite.cases.every((caseData) => caseData.scores[scorerName]?.metadata?.error);
1211
- };
1212
- for (const scorerName of scorerNames) {
1213
- const avg = scorerAverages[scorerName];
1214
- const paddedName = scorerName.padEnd(maxNameLength);
1215
- const hasAllErrors = allCasesErrored(scorerName);
1216
- const baselineAvg = suite.baseline ? calculateBaselineScorerAverage2(suite.baseline, scorerName) : null;
1217
- if (baselineAvg !== null) {
1218
- const currentPercent = hasAllErrors ? u.dim("N/A") : formatPercentage(avg);
1219
- const baselinePercent = formatPercentage(baselineAvg);
1220
- const { text: diffText, color: diffColor } = formatDiff(avg, baselineAvg);
1221
- const paddedBaseline = baselinePercent.padStart(7);
1222
- const paddedCurrent = hasAllErrors ? currentPercent : currentPercent.padStart(7);
1223
- const diffDisplay = hasAllErrors ? u.dim("all cases failed") : diffColor(diffText.padStart(8));
1224
- logger(
1225
- `\u2502 ${paddedName} ${u.blueBright(paddedBaseline)} \u2192 ${hasAllErrors ? paddedCurrent : u.magentaBright(paddedCurrent)} (${diffDisplay})`
1226
- );
1227
- } else {
1228
- const currentPercent = hasAllErrors ? u.red("N/A (all cases failed)") : formatPercentage(avg);
1229
- logger(`\u2502 \u2022 ${paddedName} ${currentPercent}`);
1233
+ async function loadGlobalConfig() {
1234
+ const configPath = getGlobalConfigPath();
1235
+ try {
1236
+ const content = await import_fs.promises.readFile(configPath, "utf-8");
1237
+ return JSON.parse(content);
1238
+ } catch (error) {
1239
+ if (error.code === "ENOENT") {
1240
+ return { profiles: {} };
1230
1241
  }
1242
+ throw error;
1231
1243
  }
1232
- logger("\u251C\u2500");
1233
- if (suite.baseline) {
1234
- const baselineTimestamp = suite.baseline.runAt ? reporterDate(new Date(suite.baseline.runAt)) : "unknown time";
1235
- logger(
1236
- `\u2502 Baseline: ${suite.baseline.name}-${suite.baseline.version} ${u.gray(`(${baselineTimestamp})`)}`
1237
- );
1238
- } else {
1239
- logger(`\u2502 Baseline: ${u.gray("(none)")}`);
1244
+ }
1245
+ async function saveGlobalConfig(config) {
1246
+ const configPath = getGlobalConfigPath();
1247
+ const configDir = import_path.default.dirname(configPath);
1248
+ const content = JSON.stringify(config, null, 2);
1249
+ await import_fs.promises.mkdir(configDir, { recursive: true, mode: 448 });
1250
+ await import_fs.promises.writeFile(configPath, content, "utf-8");
1251
+ await import_fs.promises.chmod(configPath, 384);
1252
+ }
1253
+ function getActiveProfile(config) {
1254
+ const profileName = config.active_profile;
1255
+ if (!profileName) return null;
1256
+ const profile = config.profiles[profileName];
1257
+ if (!profile) return null;
1258
+ return profile;
1259
+ }
1260
+ var import_fs, import_path, import_os, CONFIG_FILENAME, CONFIG_DIR_NAME;
1261
+ var init_config = __esm({
1262
+ "src/cli/auth/config.ts"() {
1263
+ "use strict";
1264
+ init_cjs_shims();
1265
+ import_fs = require("fs");
1266
+ import_path = __toESM(require("path"), 1);
1267
+ import_os = __toESM(require("os"), 1);
1268
+ CONFIG_FILENAME = "config.json";
1269
+ CONFIG_DIR_NAME = "axiom";
1240
1270
  }
1241
- const hasConfigChanges = flagDiff.length > 0;
1242
- logger("\u2502 Config changes:", hasConfigChanges ? "" : u.gray("(none)"));
1243
- if (hasConfigChanges) {
1244
- for (const { flag, current, baseline, default: defaultVal } of flagDiff) {
1245
- logger(`\u2502 \u2022 ${flag}: ${current ?? "<not set>"}`);
1246
- if (defaultVal !== void 0) {
1247
- logger(`\u2502 ${u.gray(`default: ${defaultVal}`)}`);
1271
+ });
1272
+
1273
+ // src/cli/auth/oauth.ts
1274
+ var import_crypto, OAUTH_CLIENT_ID, OAUTH_AUTH_PATH, OAUTH_TOKEN_PATH, OAuth;
1275
+ var init_oauth = __esm({
1276
+ "src/cli/auth/oauth.ts"() {
1277
+ "use strict";
1278
+ init_cjs_shims();
1279
+ import_crypto = require("crypto");
1280
+ OAUTH_CLIENT_ID = "264d906a404efc209b027f6595e6b616";
1281
+ OAUTH_AUTH_PATH = "/oauth/authorize";
1282
+ OAUTH_TOKEN_PATH = "/oauth/token";
1283
+ OAuth = class {
1284
+ constructor(oauthBaseUrl) {
1285
+ this.oauthBaseUrl = oauthBaseUrl;
1248
1286
  }
1249
- if (suite.baseline) {
1250
- logger(`\u2502 ${u.gray(`baseline: ${baseline ?? "<not set>"}`)}`);
1287
+ static generateCodeVerifier() {
1288
+ return (0, import_crypto.randomBytes)(32).toString("base64url");
1289
+ }
1290
+ static generateCodeChallenge(verifier) {
1291
+ return (0, import_crypto.createHash)("sha256").update(verifier).digest("base64url");
1292
+ }
1293
+ static generateState() {
1294
+ return (0, import_crypto.randomBytes)(16).toString("hex");
1295
+ }
1296
+ buildAuthUrl(params) {
1297
+ const url = new URL(OAUTH_AUTH_PATH, this.oauthBaseUrl);
1298
+ url.searchParams.set("client_id", OAUTH_CLIENT_ID);
1299
+ url.searchParams.set("redirect_uri", params.redirectUri);
1300
+ url.searchParams.set("response_type", "code");
1301
+ url.searchParams.set("state", params.state);
1302
+ url.searchParams.set("code_challenge", params.codeChallenge);
1303
+ url.searchParams.set("code_challenge_method", "S256");
1304
+ url.searchParams.set("scope", "*");
1305
+ return url.toString();
1306
+ }
1307
+ async exchangeCodeForToken(params) {
1308
+ const tokenUrl = new URL(OAUTH_TOKEN_PATH, this.oauthBaseUrl);
1309
+ const body = new URLSearchParams({
1310
+ grant_type: "authorization_code",
1311
+ client_id: OAUTH_CLIENT_ID,
1312
+ code: params.code,
1313
+ redirect_uri: params.redirectUri,
1314
+ code_verifier: params.codeVerifier
1315
+ });
1316
+ const response = await fetch(tokenUrl.toString(), {
1317
+ method: "POST",
1318
+ headers: {
1319
+ "Content-Type": "application/x-www-form-urlencoded"
1320
+ },
1321
+ body: body.toString()
1322
+ });
1323
+ if (!response.ok) {
1324
+ const errorText = await response.text();
1325
+ throw new Error(`Token exchange failed: ${response.status} ${errorText}`);
1326
+ }
1327
+ const data = await response.json();
1328
+ return data.access_token;
1251
1329
  }
1330
+ };
1331
+ }
1332
+ });
1333
+
1334
+ // src/cli/auth/api.ts
1335
+ async function fetchOrganizations(token, apiBaseUrl) {
1336
+ const response = await fetch(`${apiBaseUrl}/v2/orgs`, {
1337
+ headers: {
1338
+ Authorization: `Bearer ${token}`,
1339
+ "Content-Type": "application/json"
1252
1340
  }
1253
- }
1254
- if (suite.outOfScopeFlags && suite.outOfScopeFlags.length > 0) {
1255
- const pickedFlagsText = suite.configFlags && suite.configFlags.length > 0 ? suite.configFlags.map((f2) => `'${f2}'`).join(", ") : "none";
1256
- logger("\u2502");
1257
- logger(`\u2502 ${u.yellow("\u26A0 Out-of-scope flags")} ${u.gray(`(picked: ${pickedFlagsText})`)}:`);
1258
- for (const flag of suite.outOfScopeFlags) {
1259
- const lastStackTraceFrame = flag.stackTrace[0];
1260
- const lastStackTraceFnName = lastStackTraceFrame.split(" ").shift();
1261
- const lastStackTraceFile = lastStackTraceFrame.split("/").pop()?.slice(0, -1);
1262
- logger(
1263
- `\u2502 \u2022 ${flag.flagPath} ${u.gray(`at ${lastStackTraceFnName} (${lastStackTraceFile})`)}`
1264
- );
1265
- }
1266
- }
1267
- logger("\u2514\u2500");
1268
- }
1269
- function calculateScorerAverages(suite) {
1270
- const scorerTotals = {};
1271
- for (const caseData of suite.cases) {
1272
- for (const [scorerName, score] of Object.entries(caseData.scores)) {
1273
- if (!scorerTotals[scorerName]) {
1274
- scorerTotals[scorerName] = { sum: 0, count: 0 };
1275
- }
1276
- if (!score.metadata?.error) {
1277
- scorerTotals[scorerName].sum += score.score || 0;
1278
- scorerTotals[scorerName].count += 1;
1279
- }
1280
- }
1281
- }
1282
- const averages = {};
1283
- for (const [scorerName, totals] of Object.entries(scorerTotals)) {
1284
- averages[scorerName] = totals.count > 0 ? totals.sum / totals.count : 0;
1285
- }
1286
- return averages;
1287
- }
1288
- function calculateBaselineScorerAverage(baseline, scorerName) {
1289
- const scores = [];
1290
- for (const caseData of baseline.cases) {
1291
- if (caseData.scores[scorerName]) {
1292
- scores.push(caseData.scores[scorerName].value);
1293
- }
1294
- }
1295
- if (scores.length === 0) return null;
1296
- const sum = scores.reduce((acc, val) => acc + val, 0);
1297
- return sum / scores.length;
1298
- }
1299
- function calculateFlagDiff(suite) {
1300
- if (!suite.configFlags || suite.configFlags.length === 0) {
1301
- return [];
1302
- }
1303
- const diffs = [];
1304
- const currentConfig = suite.flagConfig || {};
1305
- const baselineConfig = suite.baseline?.flagConfig || {};
1306
- const defaultConfig = suite.defaultFlagConfig || {};
1307
- const currentFlat = flattenObject(currentConfig);
1308
- const baselineFlat = flattenObject(baselineConfig);
1309
- const defaultFlat = flattenObject(defaultConfig);
1310
- const allKeys = /* @__PURE__ */ new Set([
1311
- ...Object.keys(currentFlat),
1312
- ...Object.keys(baselineFlat),
1313
- ...Object.keys(defaultFlat)
1314
- ]);
1315
- for (const key of allKeys) {
1316
- const isInScope = suite.configFlags.some((pattern) => key.startsWith(pattern));
1317
- if (!isInScope) continue;
1318
- const currentValue = currentFlat[key];
1319
- const baselineValue = baselineFlat[key];
1320
- const defaultValue = defaultFlat[key];
1321
- const currentStr = currentValue !== void 0 ? JSON.stringify(currentValue) : void 0;
1322
- const baselineStr = baselineValue !== void 0 ? JSON.stringify(baselineValue) : void 0;
1323
- const defaultStr = defaultValue !== void 0 ? JSON.stringify(defaultValue) : void 0;
1324
- const diffFromBaseline = suite.baseline && currentStr !== baselineStr;
1325
- const diffFromDefault = currentStr !== defaultStr;
1326
- if (diffFromBaseline || diffFromDefault) {
1327
- diffs.push({
1328
- flag: key,
1329
- current: currentStr,
1330
- baseline: suite.baseline ? baselineStr : void 0,
1331
- default: defaultStr
1332
- });
1333
- }
1334
- }
1335
- return diffs;
1336
- }
1337
- function printFinalReport({
1338
- suiteData,
1339
- config,
1340
- registrationStatus,
1341
- isDebug,
1342
- logger = console.log
1343
- }) {
1344
- logger("");
1345
- logger(u.bgBlue(u.white(" FINAL EVALUATION REPORT ")));
1346
- logger("");
1347
- for (const suite of suiteData) {
1348
- const scorerAverages = calculateScorerAverages(suite);
1349
- const flagDiff = calculateFlagDiff(suite);
1350
- printSuiteBox({ suite, scorerAverages, calculateBaselineScorerAverage, flagDiff, logger });
1351
- logger("");
1352
- }
1353
- const runId = suiteData[0]?.runId;
1354
- const orgId = suiteData[0]?.orgId;
1355
- const anyRegistered = registrationStatus.some((s2) => s2.registered);
1356
- const anyFailed = registrationStatus.some((s2) => !s2.registered);
1357
- if (anyRegistered && orgId && config?.consoleEndpointUrl) {
1358
- if (suiteData.length === 1) {
1359
- const suite = suiteData[0];
1360
- const baselineParam = suite.baseline?.traceId ? `?baselineId=${suite.baseline.traceId}` : "";
1361
- logger("View eval result:");
1362
- logger(
1363
- `${config.consoleEndpointUrl}/${orgId}/ai-engineering/evaluations/${suite.name}/${suite.version}${baselineParam}`
1364
- );
1365
- } else {
1366
- logger("View full report:");
1367
- logger(`${config.consoleEndpointUrl}/${orgId}/ai-engineering/evaluations?runId=${runId}`);
1368
- }
1369
- } else if (isDebug) {
1370
- logger(u.dim("Results not uploaded to Axiom (debug mode)"));
1371
- } else {
1372
- logger("Results not available in Axiom UI (registration failed)");
1373
- }
1374
- if (anyFailed) {
1375
- logger("");
1376
- for (const status of registrationStatus) {
1377
- if (!status.registered) {
1378
- logger(u.yellow(`\u26A0\uFE0F Warning: Failed to register "${status.name}" with Axiom`));
1379
- if (status.error) {
1380
- logger(u.dim(` Error: ${status.error}`));
1381
- }
1382
- logger(u.dim(` Results for this evaluation will not be available in the Axiom UI.`));
1383
- }
1384
- }
1385
- }
1386
- }
1387
-
1388
- // src/config/resolver.ts
1389
- init_cjs_shims();
1390
- var buildConsoleUrl = (urlString) => {
1391
- const url = new URL(urlString);
1392
- return `${url.protocol}//app.${url.host.split("api.").at(-1)}`;
1393
- };
1394
- function resolveAxiomConnection(config, consoleUrlOverride) {
1395
- const consoleEndpointUrl = consoleUrlOverride ?? buildConsoleUrl(config.eval.url);
1396
- return {
1397
- url: config.eval.url,
1398
- consoleEndpointUrl,
1399
- token: config.eval.token,
1400
- dataset: config.eval.dataset,
1401
- orgId: config.eval.orgId
1402
- };
1403
- }
1404
-
1405
- // src/evals/reporter.ts
1406
- var AxiomReporter = class {
1407
- constructor() {
1408
- __publicField(this, "startTime", 0);
1409
- __publicField(this, "start", 0);
1410
- __publicField(this, "_endOfRunConfigEnd");
1411
- __publicField(this, "_suiteData", []);
1412
- __publicField(this, "_printedFlagOverrides", false);
1413
- __publicField(this, "_config");
1414
- }
1415
- onTestRunStart() {
1416
- this.start = performance.now();
1417
- this.startTime = (/* @__PURE__ */ new Date()).getTime();
1418
- const config = getAxiomConfig();
1419
- if (config) {
1420
- this._config = resolveAxiomConnection(config, getConsoleUrl());
1421
- }
1422
- }
1423
- async onTestSuiteReady(_testSuite) {
1424
- const meta = _testSuite.meta();
1425
- if (_testSuite.state() === "skipped" || !meta?.evaluation) {
1426
- return;
1427
- }
1428
- if (!this._printedFlagOverrides) {
1429
- const defaultsFromConfigEnd = meta.evaluation.configEnd?.flags ?? {};
1430
- const overridesFromConfigEnd = meta.evaluation.configEnd?.overrides ?? {};
1431
- if (Object.keys(overridesFromConfigEnd).length > 0) {
1432
- printGlobalFlagOverrides(overridesFromConfigEnd, defaultsFromConfigEnd);
1433
- }
1434
- this._printedFlagOverrides = true;
1435
- }
1436
- if (meta.evaluation.configEnd && !this._endOfRunConfigEnd) {
1437
- this._endOfRunConfigEnd = meta.evaluation.configEnd;
1438
- }
1439
- }
1440
- onTestCaseReady(test) {
1441
- const meta = test.meta();
1442
- if (!meta.case) return;
1443
- }
1444
- async onTestSuiteResult(testSuite) {
1445
- const meta = testSuite.meta();
1446
- if (testSuite.state() === "skipped" || !meta?.evaluation) {
1447
- return;
1448
- }
1449
- if (meta.evaluation.configEnd && !this._endOfRunConfigEnd) {
1450
- this._endOfRunConfigEnd = meta.evaluation.configEnd;
1451
- }
1452
- const durationSeconds = Number((performance.now() - this.start) / 1e3).toFixed(2);
1453
- const cases = [];
1454
- for (const test of testSuite.children) {
1455
- if (test.type !== "test") continue;
1456
- const testMeta = test.meta();
1457
- if (!testMeta?.case) continue;
1458
- cases.push({
1459
- index: testMeta.case.index,
1460
- scores: testMeta.case.scores,
1461
- outOfScopeFlags: testMeta.case.outOfScopeFlags,
1462
- errors: testMeta.case.errors,
1463
- runtimeFlags: testMeta.case.runtimeFlags
1464
- });
1465
- }
1466
- const cwd = process.cwd();
1467
- const relativePath = testSuite.module.moduleId.replace(cwd, "").replace(/^\//, "");
1468
- let suiteBaseline = meta.evaluation.baseline;
1469
- let flagConfig = meta.evaluation.flagConfig;
1470
- if (meta.evaluation.configEnd) {
1471
- const defaults = meta.evaluation.configEnd.flags ?? {};
1472
- const overrides2 = meta.evaluation.configEnd.overrides ?? {};
1473
- const defaultsFlat = flattenObject(defaults);
1474
- const overridesFlat = flattenObject(overrides2);
1475
- flagConfig = dotNotationToNested({ ...defaultsFlat, ...overridesFlat });
1476
- }
1477
- const defaultFlagConfig = meta.evaluation.configEnd?.flags;
1478
- this._suiteData.push({
1479
- version: meta.evaluation.version,
1480
- name: meta.evaluation.name,
1481
- file: relativePath,
1482
- duration: durationSeconds + "s",
1483
- baseline: suiteBaseline || null,
1484
- configFlags: meta.evaluation.configFlags,
1485
- flagConfig,
1486
- defaultFlagConfig,
1487
- runId: meta.evaluation.runId,
1488
- orgId: meta.evaluation.orgId,
1489
- cases,
1490
- outOfScopeFlags: meta.evaluation.outOfScopeFlags,
1491
- registrationStatus: meta.evaluation.registrationStatus
1492
- });
1493
- printEvalNameAndFileName(testSuite, meta);
1494
- printBaselineNameAndVersion(meta);
1495
- printTestCaseCountStartDuration(testSuite, this.startTime, durationSeconds);
1496
- const matchedBaselineIndices = /* @__PURE__ */ new Set();
1497
- const baselineCasesByFingerprint = /* @__PURE__ */ new Map();
1498
- if (suiteBaseline) {
1499
- for (const c of suiteBaseline.cases) {
1500
- const fp = getCaseFingerprint(c.input, c.expected);
1501
- const cases2 = baselineCasesByFingerprint.get(fp) || [];
1502
- cases2.push(c);
1503
- baselineCasesByFingerprint.set(fp, cases2);
1504
- }
1505
- }
1506
- for (const test of testSuite.children) {
1507
- if (test.type !== "test") continue;
1508
- printCaseResult(test, baselineCasesByFingerprint, matchedBaselineIndices);
1509
- }
1510
- if (suiteBaseline) {
1511
- printOrphanedBaselineCases(suiteBaseline, matchedBaselineIndices);
1512
- }
1513
- console.log("");
1514
- }
1515
- async onTestRunEnd(_testModules, _errors, _reason) {
1516
- const shouldClear = !process.env.CI && process.stdout.isTTY !== false;
1517
- if (shouldClear) {
1518
- process.stdout.write("\x1B[2J\x1B[0f");
1519
- }
1520
- const registrationStatus = this._suiteData.map((suite) => ({
1521
- name: suite.name,
1522
- registered: suite.registrationStatus?.status === "success",
1523
- error: suite.registrationStatus?.status === "failed" ? suite.registrationStatus.error : void 0
1524
- }));
1525
- const DEBUG = process.env.AXIOM_DEBUG === "true";
1526
- printFinalReport({
1527
- suiteData: this._suiteData,
1528
- config: this._config,
1529
- registrationStatus,
1530
- isDebug: DEBUG
1531
- });
1532
- if (DEBUG && this._endOfRunConfigEnd) {
1533
- printConfigEnd(this._endOfRunConfigEnd);
1534
- }
1535
- }
1536
- };
1537
-
1538
- // src/evals/instrument.ts
1539
- init_cjs_shims();
1540
- var import_sdk_trace_node = require("@opentelemetry/sdk-trace-node");
1541
- var import_resources = require("@opentelemetry/resources");
1542
- var import_exporter_trace_otlp_http = require("@opentelemetry/exporter-trace-otlp-http");
1543
- var import_api10 = require("@opentelemetry/api");
1544
-
1545
- // src/util/errors.ts
1546
- init_cjs_shims();
1547
- var AxiomCLIError = class extends Error {
1548
- constructor(message) {
1549
- super(message);
1550
- this.name = "AxiomCLIError";
1551
- }
1552
- };
1553
- function getCircularReplacer() {
1554
- const seen = /* @__PURE__ */ new WeakSet();
1555
- return (_k, v) => {
1556
- if (typeof v === "object" && v !== null) {
1557
- if (seen.has(v)) return "[Circular]";
1558
- seen.add(v);
1559
- }
1560
- return v;
1561
- };
1562
- }
1563
- function safeJson(x) {
1564
- try {
1565
- return JSON.stringify(x, getCircularReplacer());
1566
- } catch {
1567
- return String(x);
1568
- }
1569
- }
1570
- function errorToString(err) {
1571
- try {
1572
- if (typeof err === "string") return err;
1573
- if (err instanceof Error) {
1574
- return err.stack ?? err.message;
1575
- }
1576
- if (typeof err === "object" && err !== null) {
1577
- const msg = err.message;
1578
- const json = safeJson(err);
1579
- return msg ? `${msg} (${json})` : json;
1580
- }
1581
- return String(err);
1582
- } catch {
1583
- return "[unserializable error]";
1584
- }
1585
- }
1586
-
1587
- // src/config/loader.ts
1588
- init_cjs_shims();
1589
- var import_c12 = require("c12");
1590
- var import_defu = require("defu");
1591
-
1592
- // src/config/index.ts
1593
- init_cjs_shims();
1594
- var import_zod6 = require("zod");
1595
-
1596
- // src/cli/auth/index.ts
1597
- init_cjs_shims();
1598
-
1599
- // src/cli/auth/types.ts
1600
- init_cjs_shims();
1601
-
1602
- // src/cli/auth/config.ts
1603
- init_cjs_shims();
1604
- var import_fs = require("fs");
1605
- var import_path = __toESM(require("path"), 1);
1606
- var import_os = __toESM(require("os"), 1);
1607
- var CONFIG_FILENAME = "config.json";
1608
- var CONFIG_DIR_NAME = "axiom";
1609
- function getConfigDir() {
1610
- const platform = process.platform;
1611
- const homeDir = import_os.default.homedir();
1612
- const xdgConfigHome = process.env.XDG_CONFIG_HOME;
1613
- if (xdgConfigHome) {
1614
- return import_path.default.join(xdgConfigHome, CONFIG_DIR_NAME);
1615
- }
1616
- if (platform === "win32") {
1617
- const appData = process.env.APPDATA;
1618
- if (appData) {
1619
- return import_path.default.join(appData, CONFIG_DIR_NAME);
1620
- }
1621
- return import_path.default.join(homeDir, "AppData", "Roaming", CONFIG_DIR_NAME);
1622
- }
1623
- return import_path.default.join(homeDir, ".config", CONFIG_DIR_NAME);
1624
- }
1625
- function getGlobalConfigPath() {
1626
- return import_path.default.join(getConfigDir(), CONFIG_FILENAME);
1627
- }
1628
- async function loadGlobalConfig() {
1629
- const configPath = getGlobalConfigPath();
1630
- try {
1631
- const content = await import_fs.promises.readFile(configPath, "utf-8");
1632
- return JSON.parse(content);
1633
- } catch (error) {
1634
- if (error.code === "ENOENT") {
1635
- return { profiles: {} };
1636
- }
1637
- throw error;
1638
- }
1639
- }
1640
- async function saveGlobalConfig(config) {
1641
- const configPath = getGlobalConfigPath();
1642
- const configDir = import_path.default.dirname(configPath);
1643
- const content = JSON.stringify(config, null, 2);
1644
- await import_fs.promises.mkdir(configDir, { recursive: true, mode: 448 });
1645
- await import_fs.promises.writeFile(configPath, content, "utf-8");
1646
- await import_fs.promises.chmod(configPath, 384);
1647
- }
1648
- function getActiveProfile(config) {
1649
- const profileName = config.active_profile;
1650
- if (!profileName) return null;
1651
- const profile = config.profiles[profileName];
1652
- if (!profile) return null;
1653
- return profile;
1654
- }
1655
-
1656
- // src/cli/auth/oauth.ts
1657
- init_cjs_shims();
1658
- var import_crypto = require("crypto");
1659
- var OAUTH_CLIENT_ID = "264d906a404efc209b027f6595e6b616";
1660
- var OAUTH_AUTH_PATH = "/oauth/authorize";
1661
- var OAUTH_TOKEN_PATH = "/oauth/token";
1662
- var OAuth = class {
1663
- constructor(oauthBaseUrl) {
1664
- this.oauthBaseUrl = oauthBaseUrl;
1665
- }
1666
- static generateCodeVerifier() {
1667
- return (0, import_crypto.randomBytes)(32).toString("base64url");
1668
- }
1669
- static generateCodeChallenge(verifier) {
1670
- return (0, import_crypto.createHash)("sha256").update(verifier).digest("base64url");
1671
- }
1672
- static generateState() {
1673
- return (0, import_crypto.randomBytes)(16).toString("hex");
1674
- }
1675
- buildAuthUrl(params) {
1676
- const url = new URL(OAUTH_AUTH_PATH, this.oauthBaseUrl);
1677
- url.searchParams.set("client_id", OAUTH_CLIENT_ID);
1678
- url.searchParams.set("redirect_uri", params.redirectUri);
1679
- url.searchParams.set("response_type", "code");
1680
- url.searchParams.set("state", params.state);
1681
- url.searchParams.set("code_challenge", params.codeChallenge);
1682
- url.searchParams.set("code_challenge_method", "S256");
1683
- url.searchParams.set("scope", "*");
1684
- return url.toString();
1685
- }
1686
- async exchangeCodeForToken(params) {
1687
- const tokenUrl = new URL(OAUTH_TOKEN_PATH, this.oauthBaseUrl);
1688
- const body = new URLSearchParams({
1689
- grant_type: "authorization_code",
1690
- client_id: OAUTH_CLIENT_ID,
1691
- code: params.code,
1692
- redirect_uri: params.redirectUri,
1693
- code_verifier: params.codeVerifier
1694
- });
1695
- const response = await fetch(tokenUrl.toString(), {
1696
- method: "POST",
1697
- headers: {
1698
- "Content-Type": "application/x-www-form-urlencoded"
1699
- },
1700
- body: body.toString()
1701
- });
1702
- if (!response.ok) {
1703
- const errorText = await response.text();
1704
- throw new Error(`Token exchange failed: ${response.status} ${errorText}`);
1705
- }
1706
- const data = await response.json();
1707
- return data.access_token;
1708
- }
1709
- };
1710
-
1711
- // src/cli/auth/api.ts
1712
- init_cjs_shims();
1713
- async function fetchOrganizations(token, apiBaseUrl) {
1714
- const response = await fetch(`${apiBaseUrl}/v2/orgs`, {
1715
- headers: {
1716
- Authorization: `Bearer ${token}`,
1717
- "Content-Type": "application/json"
1718
- }
1719
- });
1720
- if (!response.ok) {
1721
- throw new AxiomCLIError(
1722
- `Failed to fetch organizations: ${response.status} ${response.statusText}`
1723
- );
1341
+ });
1342
+ if (!response.ok) {
1343
+ throw new AxiomCLIError(
1344
+ `Failed to fetch organizations: ${response.status} ${response.statusText}`
1345
+ );
1724
1346
  }
1725
1347
  const data = await response.json();
1726
1348
  return data;
@@ -1735,10 +1357,15 @@ async function verifyToken(token, orgId, apiBaseUrl) {
1735
1357
  });
1736
1358
  return response.ok;
1737
1359
  }
1360
+ var init_api = __esm({
1361
+ "src/cli/auth/api.ts"() {
1362
+ "use strict";
1363
+ init_cjs_shims();
1364
+ init_errors();
1365
+ }
1366
+ });
1738
1367
 
1739
1368
  // src/cli/auth/callback-server.ts
1740
- init_cjs_shims();
1741
- var import_http = __toESM(require("http"), 1);
1742
1369
  function escapeHtml(text) {
1743
1370
  const map = {
1744
1371
  "&": "&amp;",
@@ -1750,9 +1377,6 @@ function escapeHtml(text) {
1750
1377
  };
1751
1378
  return text.replace(/[&<>"']/g, (m2) => map[m2] || m2);
1752
1379
  }
1753
- var SVG_LOGO = `<svg width="124" height="24" viewBox="0 0 124 24" fill="none" xmlns="http://www.w3.org/2000/svg" class="logo">
1754
- <path d="M42.9919 16.8116H36.3696L35.5537 19.1572C35.4209 19.539 34.9714 19.8513 34.5548 19.8513H30.9779C30.5614 19.8513 30.3459 19.5449 30.499 19.1703L36.9816 3.31644C37.1346 2.9419 37.6009 2.63546 38.0174 2.63546H41.3718C41.7883 2.63546 42.2541 2.94207 42.4067 3.3168L48.8634 19.17C49.016 19.5447 48.8 19.8513 48.3835 19.8513H44.8067C44.3901 19.8513 43.9406 19.539 43.8078 19.1572L42.9919 16.8116ZM41.8232 13.4223L39.6807 7.18148L37.5383 13.4223H41.8232ZM64.1105 19.8513C63.694 19.8513 63.1767 19.5694 62.9611 19.2247L59.9029 14.3369L56.8447 19.2247C56.6291 19.5694 56.1119 19.8513 55.6953 19.8513H51.3392C50.9227 19.8513 50.7754 19.5801 51.0119 19.2486L56.8978 11.0013L51.5315 3.24414C51.2999 2.90937 51.4513 2.63546 51.8679 2.63546H55.89C56.3066 2.63546 56.8268 2.91563 57.0461 3.25807L59.9029 7.71959L62.7343 3.25955C62.9522 2.9163 63.4713 2.63546 63.8879 2.63546H67.938C68.3545 2.63546 68.5048 2.90868 68.272 3.24261L62.8801 10.9743L68.7935 19.2489C69.0303 19.5802 68.8832 19.8513 68.4666 19.8513H64.1105ZM76.525 19.119C76.525 19.5218 76.1841 19.8513 75.7675 19.8513H72.5522C72.1356 19.8513 71.7947 19.5218 71.7947 19.119V3.36771C71.7947 2.96498 72.1356 2.63546 72.5522 2.63546H75.7675C76.1841 2.63546 76.525 2.96498 76.525 3.36771V19.119ZM79.3736 11.1896C79.3736 6.18625 83.2688 2.15134 89.2511 2.15134C95.2334 2.15134 99.1289 6.18625 99.1289 11.1896C99.1289 16.2199 95.2334 20.2548 89.2511 20.2548C83.2688 20.2548 79.3736 16.2199 79.3736 11.1896ZM94.2873 11.1896C94.2873 8.58038 92.3953 6.21307 89.2511 6.21307C86.1349 6.21307 84.2149 8.58038 84.2149 11.1896C84.2149 13.7989 86.1349 16.2199 89.2511 16.2199C92.3675 16.2199 94.2873 13.7989 94.2873 11.1896ZM119.318 19.8513C118.902 19.8513 118.517 19.5245 118.463 19.1251L117.225 9.89847L113.65 19.1652C113.505 19.5426 113.045 19.8513 112.628 19.8513H111.055C110.638 19.8513 110.179 19.5423 110.034 19.1645L106.457 9.81766L105.218 19.125C105.165 19.5245 104.781 19.8513 104.364 19.8513H101.344C100.927 19.8513 100.633 19.5249 100.69 19.1259L102.932 3.3609C102.988 2.96191 103.376 2.63546 103.792 2.63546H106.59C107.007 2.63546 107.469 2.94351 107.617 3.32002L111.827 14.041L116.064 3.31949C116.213 2.94327 116.676 2.63546 117.092 2.63546H119.89C120.307 2.63546 120.694 2.96191 120.751 3.3609L122.993 19.1259C123.05 19.5249 122.755 19.8513 122.339 19.8513H119.318ZM23.9616 15.6531L18.8054 6.97021C18.5689 6.57115 17.9863 6.24465 17.5106 6.24465H14.2915C13.5433 6.24465 13.2365 5.73171 13.6097 5.1048L15.375 2.13986C15.5151 1.90455 15.5148 1.61487 15.3743 1.37981C15.2337 1.14476 14.9741 1 14.6933 1H10.2025C9.72681 1 9.14291 1.32577 8.90491 1.72395L0.17865 16.3217C-0.0594434 16.7199 -0.0595348 17.3715 0.178285 17.7698L2.42362 21.5297C2.79777 22.1561 3.41129 22.1569 3.78699 21.5313L5.54143 18.6103C5.91722 17.9847 6.53065 17.9854 6.9048 18.6119L8.49538 21.2754C8.7332 21.6737 9.317 21.9995 9.79273 21.9995H20.1698C20.6455 21.9995 21.2293 21.6737 21.4672 21.2754L23.959 17.1028C24.1968 16.7045 24.198 16.0521 23.9616 15.6531ZM16.9981 15.2352C17.3699 15.8629 17.0619 16.3765 16.3136 16.3765H8.24192C7.49372 16.3765 7.1876 15.864 7.56175 15.2375L11.6007 8.47417C11.9748 7.84772 12.5869 7.84774 12.9611 8.47421L16.9981 15.2352Z" fill="#121224"/>
1755
- </svg>`;
1756
1380
  function renderCallbackPage(error) {
1757
1381
  const errorClass = error ? ' class="error"' : "";
1758
1382
  const errorMessage = error ? escapeHtml(error) : "";
@@ -1899,10 +1523,19 @@ async function waitForCallback(server, expectedState) {
1899
1523
  });
1900
1524
  });
1901
1525
  }
1526
+ var import_http, SVG_LOGO;
1527
+ var init_callback_server = __esm({
1528
+ "src/cli/auth/callback-server.ts"() {
1529
+ "use strict";
1530
+ init_cjs_shims();
1531
+ import_http = __toESM(require("http"), 1);
1532
+ SVG_LOGO = `<svg width="124" height="24" viewBox="0 0 124 24" fill="none" xmlns="http://www.w3.org/2000/svg" class="logo">
1533
+ <path d="M42.9919 16.8116H36.3696L35.5537 19.1572C35.4209 19.539 34.9714 19.8513 34.5548 19.8513H30.9779C30.5614 19.8513 30.3459 19.5449 30.499 19.1703L36.9816 3.31644C37.1346 2.9419 37.6009 2.63546 38.0174 2.63546H41.3718C41.7883 2.63546 42.2541 2.94207 42.4067 3.3168L48.8634 19.17C49.016 19.5447 48.8 19.8513 48.3835 19.8513H44.8067C44.3901 19.8513 43.9406 19.539 43.8078 19.1572L42.9919 16.8116ZM41.8232 13.4223L39.6807 7.18148L37.5383 13.4223H41.8232ZM64.1105 19.8513C63.694 19.8513 63.1767 19.5694 62.9611 19.2247L59.9029 14.3369L56.8447 19.2247C56.6291 19.5694 56.1119 19.8513 55.6953 19.8513H51.3392C50.9227 19.8513 50.7754 19.5801 51.0119 19.2486L56.8978 11.0013L51.5315 3.24414C51.2999 2.90937 51.4513 2.63546 51.8679 2.63546H55.89C56.3066 2.63546 56.8268 2.91563 57.0461 3.25807L59.9029 7.71959L62.7343 3.25955C62.9522 2.9163 63.4713 2.63546 63.8879 2.63546H67.938C68.3545 2.63546 68.5048 2.90868 68.272 3.24261L62.8801 10.9743L68.7935 19.2489C69.0303 19.5802 68.8832 19.8513 68.4666 19.8513H64.1105ZM76.525 19.119C76.525 19.5218 76.1841 19.8513 75.7675 19.8513H72.5522C72.1356 19.8513 71.7947 19.5218 71.7947 19.119V3.36771C71.7947 2.96498 72.1356 2.63546 72.5522 2.63546H75.7675C76.1841 2.63546 76.525 2.96498 76.525 3.36771V19.119ZM79.3736 11.1896C79.3736 6.18625 83.2688 2.15134 89.2511 2.15134C95.2334 2.15134 99.1289 6.18625 99.1289 11.1896C99.1289 16.2199 95.2334 20.2548 89.2511 20.2548C83.2688 20.2548 79.3736 16.2199 79.3736 11.1896ZM94.2873 11.1896C94.2873 8.58038 92.3953 6.21307 89.2511 6.21307C86.1349 6.21307 84.2149 8.58038 84.2149 11.1896C84.2149 13.7989 86.1349 16.2199 89.2511 16.2199C92.3675 16.2199 94.2873 13.7989 94.2873 11.1896ZM119.318 19.8513C118.902 19.8513 118.517 19.5245 118.463 19.1251L117.225 9.89847L113.65 19.1652C113.505 19.5426 113.045 19.8513 112.628 19.8513H111.055C110.638 19.8513 110.179 19.5423 110.034 19.1645L106.457 9.81766L105.218 19.125C105.165 19.5245 104.781 19.8513 104.364 19.8513H101.344C100.927 19.8513 100.633 19.5249 100.69 19.1259L102.932 3.3609C102.988 2.96191 103.376 2.63546 103.792 2.63546H106.59C107.007 2.63546 107.469 2.94351 107.617 3.32002L111.827 14.041L116.064 3.31949C116.213 2.94327 116.676 2.63546 117.092 2.63546H119.89C120.307 2.63546 120.694 2.96191 120.751 3.3609L122.993 19.1259C123.05 19.5249 122.755 19.8513 122.339 19.8513H119.318ZM23.9616 15.6531L18.8054 6.97021C18.5689 6.57115 17.9863 6.24465 17.5106 6.24465H14.2915C13.5433 6.24465 13.2365 5.73171 13.6097 5.1048L15.375 2.13986C15.5151 1.90455 15.5148 1.61487 15.3743 1.37981C15.2337 1.14476 14.9741 1 14.6933 1H10.2025C9.72681 1 9.14291 1.32577 8.90491 1.72395L0.17865 16.3217C-0.0594434 16.7199 -0.0595348 17.3715 0.178285 17.7698L2.42362 21.5297C2.79777 22.1561 3.41129 22.1569 3.78699 21.5313L5.54143 18.6103C5.91722 17.9847 6.53065 17.9854 6.9048 18.6119L8.49538 21.2754C8.7332 21.6737 9.317 21.9995 9.79273 21.9995H20.1698C20.6455 21.9995 21.2293 21.6737 21.4672 21.2754L23.959 17.1028C24.1968 16.7045 24.198 16.0521 23.9616 15.6531ZM16.9981 15.2352C17.3699 15.8629 17.0619 16.3765 16.3136 16.3765H8.24192C7.49372 16.3765 7.1876 15.864 7.56175 15.2375L11.6007 8.47417C11.9748 7.84772 12.5869 7.84774 12.9611 8.47421L16.9981 15.2352Z" fill="#121224"/>
1534
+ </svg>`;
1535
+ }
1536
+ });
1902
1537
 
1903
1538
  // src/cli/auth/global-auth.ts
1904
- init_cjs_shims();
1905
- var authContext = null;
1906
1539
  function getAuthContext() {
1907
1540
  return authContext;
1908
1541
  }
@@ -1918,9 +1551,31 @@ async function setupGlobalAuth() {
1918
1551
  }
1919
1552
  return authContext;
1920
1553
  }
1554
+ var authContext;
1555
+ var init_global_auth = __esm({
1556
+ "src/cli/auth/global-auth.ts"() {
1557
+ "use strict";
1558
+ init_cjs_shims();
1559
+ init_config();
1560
+ authContext = null;
1561
+ }
1562
+ });
1563
+
1564
+ // src/cli/auth/index.ts
1565
+ var init_auth = __esm({
1566
+ "src/cli/auth/index.ts"() {
1567
+ "use strict";
1568
+ init_cjs_shims();
1569
+ init_types();
1570
+ init_config();
1571
+ init_oauth();
1572
+ init_api();
1573
+ init_callback_server();
1574
+ init_global_auth();
1575
+ }
1576
+ });
1921
1577
 
1922
1578
  // src/config/index.ts
1923
- var DEFAULT_EVAL_INCLUDE = ["**/*.eval.{ts,js,mts,mjs,cts,cjs}"];
1924
1579
  function createPartialDefaults() {
1925
1580
  let token;
1926
1581
  let url;
@@ -1953,20 +1608,23 @@ function createPartialDefaults() {
1953
1608
  }
1954
1609
  function validateConfig(config) {
1955
1610
  const errors = [];
1956
- if (!config.eval?.token) {
1957
- errors.push(
1958
- "eval.token is required (set in axiom.config.ts or AXIOM_TOKEN environment variable)"
1959
- );
1960
- }
1961
- if (!config.eval?.dataset) {
1962
- errors.push(
1963
- "eval.dataset is required (set in axiom.config.ts or AXIOM_DATASET environment variable)"
1964
- );
1965
- }
1966
- if (!config.eval?.url) {
1967
- console.log(
1968
- "eval.url was not specified. Defaulting to `https://api.axiom.co`. Please set it in axiom.config.ts or AXIOM_URL environment variable if you want to use a different endpoint."
1969
- );
1611
+ const isDebug = process.env.AXIOM_DEBUG === "true";
1612
+ if (!isDebug) {
1613
+ if (!config.eval?.token) {
1614
+ errors.push(
1615
+ "eval.token is required (set in axiom.config.ts or AXIOM_TOKEN environment variable)"
1616
+ );
1617
+ }
1618
+ if (!config.eval?.dataset) {
1619
+ errors.push(
1620
+ "eval.dataset is required (set in axiom.config.ts or AXIOM_DATASET environment variable)"
1621
+ );
1622
+ }
1623
+ if (!config.eval?.url) {
1624
+ console.log(
1625
+ "eval.url was not specified. Defaulting to `https://api.axiom.co`. Please set it in axiom.config.ts or AXIOM_URL environment variable if you want to use a different endpoint."
1626
+ );
1627
+ }
1970
1628
  }
1971
1629
  const instrumentation = config.eval?.instrumentation;
1972
1630
  if (instrumentation !== null && instrumentation !== void 0 && typeof instrumentation !== "function") {
@@ -1980,6 +1638,17 @@ function validateConfig(config) {
1980
1638
  }
1981
1639
  return config;
1982
1640
  }
1641
+ var import_zod7, DEFAULT_EVAL_INCLUDE;
1642
+ var init_config2 = __esm({
1643
+ "src/config/index.ts"() {
1644
+ "use strict";
1645
+ init_cjs_shims();
1646
+ import_zod7 = require("zod");
1647
+ init_errors();
1648
+ init_auth();
1649
+ DEFAULT_EVAL_INCLUDE = ["**/*.eval.{ts,js,mts,mjs,cts,cjs}"];
1650
+ }
1651
+ });
1983
1652
 
1984
1653
  // src/config/loader.ts
1985
1654
  function customMerger(target, source) {
@@ -1987,478 +1656,999 @@ function customMerger(target, source) {
1987
1656
  if (source?.eval && "include" in source.eval) {
1988
1657
  merged.eval.include = source.eval.include;
1989
1658
  }
1990
- if (source?.eval && "flagSchema" in source.eval) {
1991
- merged.eval.flagSchema = source.eval.flagSchema;
1659
+ if (source?.eval && "flagSchema" in source.eval) {
1660
+ merged.eval.flagSchema = source.eval.flagSchema;
1661
+ }
1662
+ return merged;
1663
+ }
1664
+ async function loadConfig(cwd = process.cwd()) {
1665
+ try {
1666
+ const defaults = createPartialDefaults();
1667
+ const result = await (0, import_c12.loadConfig)({
1668
+ name: "axiom",
1669
+ cwd,
1670
+ // Support common config file extensions
1671
+ configFile: "axiom.config",
1672
+ // Don't use defaultConfig - we'll merge manually to control array behavior
1673
+ // Disable configs other than .ts/.js/.mts/.mjs/.cts/.cjs
1674
+ rcFile: false,
1675
+ globalRc: false,
1676
+ packageJson: false,
1677
+ giget: false
1678
+ });
1679
+ const mergedConfig = customMerger(defaults, result.config);
1680
+ const validatedConfig = validateConfig(mergedConfig);
1681
+ return {
1682
+ config: validatedConfig
1683
+ };
1684
+ } catch (error) {
1685
+ if (error instanceof AxiomCLIError) {
1686
+ throw error;
1687
+ }
1688
+ throw new AxiomCLIError(`Failed to load config file: ${errorToString(error)}`);
1689
+ }
1690
+ }
1691
+ var import_c12, import_defu;
1692
+ var init_loader = __esm({
1693
+ "src/config/loader.ts"() {
1694
+ "use strict";
1695
+ init_cjs_shims();
1696
+ import_c12 = require("c12");
1697
+ import_defu = require("defu");
1698
+ init_config2();
1699
+ init_errors();
1700
+ }
1701
+ });
1702
+
1703
+ // ../../node_modules/.pnpm/tinyrainbow@2.0.0/node_modules/tinyrainbow/dist/chunk-BVHSVHOK.js
1704
+ function a(n) {
1705
+ return String(n);
1706
+ }
1707
+ function C(n = false) {
1708
+ let e = typeof process != "undefined" ? process : void 0, i = (e == null ? void 0 : e.env) || {}, g = (e == null ? void 0 : e.argv) || [];
1709
+ return !("NO_COLOR" in i || g.includes("--no-color")) && ("FORCE_COLOR" in i || g.includes("--color") || (e == null ? void 0 : e.platform) === "win32" || n && i.TERM !== "dumb" || "CI" in i) || typeof window != "undefined" && !!window.chrome;
1710
+ }
1711
+ function p(n = false) {
1712
+ let e = C(n), i = (r2, t, c, o) => {
1713
+ let l = "", s2 = 0;
1714
+ do
1715
+ l += r2.substring(s2, o) + c, s2 = o + t.length, o = r2.indexOf(t, s2);
1716
+ while (~o);
1717
+ return l + r2.substring(s2);
1718
+ }, g = (r2, t, c = r2) => {
1719
+ let o = (l) => {
1720
+ let s2 = String(l), b = s2.indexOf(t, r2.length);
1721
+ return ~b ? r2 + i(s2, t, c, b) + t : r2 + s2 + t;
1722
+ };
1723
+ return o.open = r2, o.close = t, o;
1724
+ }, u2 = {
1725
+ isColorSupported: e
1726
+ }, d = (r2) => `\x1B[${r2}m`;
1727
+ for (let [r2, t] of h)
1728
+ u2[r2] = e ? g(
1729
+ d(t[0]),
1730
+ d(t[1]),
1731
+ t[2]
1732
+ ) : a;
1733
+ return u2;
1734
+ }
1735
+ var f, h;
1736
+ var init_chunk_BVHSVHOK = __esm({
1737
+ "../../node_modules/.pnpm/tinyrainbow@2.0.0/node_modules/tinyrainbow/dist/chunk-BVHSVHOK.js"() {
1738
+ "use strict";
1739
+ init_cjs_shims();
1740
+ f = {
1741
+ reset: [0, 0],
1742
+ bold: [1, 22, "\x1B[22m\x1B[1m"],
1743
+ dim: [2, 22, "\x1B[22m\x1B[2m"],
1744
+ italic: [3, 23],
1745
+ underline: [4, 24],
1746
+ inverse: [7, 27],
1747
+ hidden: [8, 28],
1748
+ strikethrough: [9, 29],
1749
+ black: [30, 39],
1750
+ red: [31, 39],
1751
+ green: [32, 39],
1752
+ yellow: [33, 39],
1753
+ blue: [34, 39],
1754
+ magenta: [35, 39],
1755
+ cyan: [36, 39],
1756
+ white: [37, 39],
1757
+ gray: [90, 39],
1758
+ bgBlack: [40, 49],
1759
+ bgRed: [41, 49],
1760
+ bgGreen: [42, 49],
1761
+ bgYellow: [43, 49],
1762
+ bgBlue: [44, 49],
1763
+ bgMagenta: [45, 49],
1764
+ bgCyan: [46, 49],
1765
+ bgWhite: [47, 49],
1766
+ blackBright: [90, 39],
1767
+ redBright: [91, 39],
1768
+ greenBright: [92, 39],
1769
+ yellowBright: [93, 39],
1770
+ blueBright: [94, 39],
1771
+ magentaBright: [95, 39],
1772
+ cyanBright: [96, 39],
1773
+ whiteBright: [97, 39],
1774
+ bgBlackBright: [100, 49],
1775
+ bgRedBright: [101, 49],
1776
+ bgGreenBright: [102, 49],
1777
+ bgYellowBright: [103, 49],
1778
+ bgBlueBright: [104, 49],
1779
+ bgMagentaBright: [105, 49],
1780
+ bgCyanBright: [106, 49],
1781
+ bgWhiteBright: [107, 49]
1782
+ };
1783
+ h = Object.entries(f);
1784
+ a.open = "";
1785
+ a.close = "";
1786
+ }
1787
+ });
1788
+
1789
+ // ../../node_modules/.pnpm/tinyrainbow@2.0.0/node_modules/tinyrainbow/dist/node.js
1790
+ var import_tty, r, u;
1791
+ var init_node = __esm({
1792
+ "../../node_modules/.pnpm/tinyrainbow@2.0.0/node_modules/tinyrainbow/dist/node.js"() {
1793
+ "use strict";
1794
+ init_cjs_shims();
1795
+ init_chunk_BVHSVHOK();
1796
+ import_tty = require("tty");
1797
+ r = process.env.FORCE_TTY !== void 0 || (0, import_tty.isatty)(1);
1798
+ u = p(r);
1799
+ }
1800
+ });
1801
+
1802
+ // src/evals/reporter.console-utils.ts
1803
+ function formatPercentage(value) {
1804
+ if (!Number.isFinite(value)) {
1805
+ return "N/A";
1806
+ }
1807
+ return Number(value * 100).toFixed(2) + "%";
1808
+ }
1809
+ function formatDiff(current, baseline) {
1810
+ if (!Number.isFinite(current) || !Number.isFinite(baseline)) {
1811
+ return { text: "N/A", color: u.dim };
1812
+ }
1813
+ const diff = current - baseline;
1814
+ const diffText = (diff >= 0 ? "+" : "") + formatPercentage(diff);
1815
+ const color = diff > 0 ? u.green : diff < 0 ? u.red : u.dim;
1816
+ return { text: diffText, color };
1817
+ }
1818
+ function truncate(str, max) {
1819
+ return str.length > max ? str.slice(0, max) + "\u2026" : str;
1820
+ }
1821
+ function stringify(value) {
1822
+ try {
1823
+ if (typeof value === "string") return value;
1824
+ return JSON.stringify(value);
1825
+ } catch {
1826
+ return String(value);
1827
+ }
1828
+ }
1829
+ function getCaseFingerprint(input, expected) {
1830
+ const inputStr = typeof input === "string" ? input : JSON.stringify(input);
1831
+ const expectedStr = typeof expected === "string" ? expected : JSON.stringify(expected);
1832
+ return JSON.stringify({ input: inputStr, expected: expectedStr });
1833
+ }
1834
+ function printEvalNameAndFileName(testSuite, meta, logger = console.log) {
1835
+ const cwd = process.cwd();
1836
+ logger(
1837
+ " ",
1838
+ u.bgCyan(u.black(` ${testSuite.project.name} `)),
1839
+ u.bgBlue(u.black(` ${meta.evaluation.name}-${meta.evaluation.version} `)),
1840
+ u.dim(`(${testSuite.children.size} cases)`)
1841
+ );
1842
+ logger(" ", u.dim(testSuite.module.moduleId.replace(cwd, "")));
1843
+ }
1844
+ function printBaselineNameAndVersion(testMeta, logger = console.log) {
1845
+ if (testMeta.evaluation.baseline) {
1846
+ logger(
1847
+ " ",
1848
+ " baseline ",
1849
+ u.bgMagenta(
1850
+ u.black(` ${testMeta.evaluation.baseline.name}-${testMeta.evaluation.baseline.version} `)
1851
+ )
1852
+ );
1853
+ } else {
1854
+ logger(" ", u.bgWhite(u.blackBright(" baseline: ")), "none");
1855
+ }
1856
+ logger("");
1857
+ }
1858
+ function printRuntimeFlags(testMeta, logger = console.log) {
1859
+ if (testMeta.case.runtimeFlags && Object.keys(testMeta.case.runtimeFlags).length > 0) {
1860
+ const entries = Object.entries(testMeta.case.runtimeFlags);
1861
+ logger(" ", u.dim("runtime flags"));
1862
+ for (const [k, v] of entries) {
1863
+ switch (v.kind) {
1864
+ case "replaced": {
1865
+ const valText = truncate(stringify(v.value), 80);
1866
+ const defText = truncate(stringify(v.default), 80);
1867
+ logger(" ", `${k}: ${valText} (default: ${defText})`);
1868
+ break;
1869
+ }
1870
+ case "introduced": {
1871
+ const valText = truncate(stringify(v.value), 80);
1872
+ logger(" ", `${k}: ${valText} (no default)`);
1873
+ break;
1874
+ }
1875
+ }
1876
+ }
1877
+ }
1878
+ }
1879
+ function printTestCaseCountStartDuration(testSuite, startTime, duration, logger = console.log) {
1880
+ logger(" ");
1881
+ logger(" ", u.dim("Cases"), testSuite.children.size);
1882
+ logger(" ", u.dim("Start at"), new Date(startTime).toTimeString());
1883
+ logger(" ", u.dim("Duration"), `${duration}s`);
1884
+ }
1885
+ function printTestCaseSuccessOrFailed(testMeta, ok, logger = console.log) {
1886
+ const index = testMeta.case.index;
1887
+ if (ok) {
1888
+ logger(" ", u.yellow(` \u2714 case ${index}:`));
1889
+ } else {
1890
+ logger(" ", u.red(` \u2716 case ${index}: failed`));
1891
+ for (const e of testMeta.case.errors ?? []) {
1892
+ logger("", e.message);
1893
+ }
1894
+ }
1895
+ }
1896
+ function printTestCaseScores(testMeta, baselineCase, logger = console.log) {
1897
+ const scores = testMeta.case.scores;
1898
+ const keys = Object.keys(scores);
1899
+ if (keys.length === 0) {
1900
+ return;
1992
1901
  }
1993
- return merged;
1902
+ const maxNameLength = Math.max(...keys.map((k) => k.length));
1903
+ keys.forEach((k) => {
1904
+ const scoreData = scores[k];
1905
+ const hasError = scoreData.metadata?.error;
1906
+ const v = scoreData.score ? scoreData.score : 0;
1907
+ const rawCurrent = hasError ? "N/A" : formatPercentage(v);
1908
+ const paddedCurrent = rawCurrent.padStart(7);
1909
+ const coloredCurrent = hasError ? u.dim(paddedCurrent) : u.magentaBright(paddedCurrent);
1910
+ const paddedName = k.padEnd(maxNameLength);
1911
+ if (baselineCase?.scores[k]) {
1912
+ const baselineScoreValue = baselineCase.scores[k].value;
1913
+ const rawBaseline = formatPercentage(baselineScoreValue);
1914
+ const paddedBaseline = rawBaseline.padStart(7);
1915
+ const coloredBaseline = u.blueBright(paddedBaseline);
1916
+ const { text: diffText, color: diffColor } = formatDiff(v, baselineScoreValue);
1917
+ const paddedDiff = diffText.padStart(8);
1918
+ logger(
1919
+ ` ${paddedName} ${coloredBaseline} \u2192 ${coloredCurrent} ${hasError ? u.dim("(scorer not run)") : u.dim("(") + diffColor(paddedDiff) + u.dim(")")}`
1920
+ );
1921
+ } else {
1922
+ logger(` ${paddedName} ${coloredCurrent} ${hasError ? u.dim("(scorer not run)") : ""}`);
1923
+ }
1924
+ });
1994
1925
  }
1995
- async function loadConfig(cwd = process.cwd()) {
1996
- try {
1997
- const defaults = createPartialDefaults();
1998
- const result = await (0, import_c12.loadConfig)({
1999
- name: "axiom",
2000
- cwd,
2001
- // Support common config file extensions
2002
- configFile: "axiom.config",
2003
- // Don't use defaultConfig - we'll merge manually to control array behavior
2004
- // Disable configs other than .ts/.js/.mts/.mjs/.cts/.cjs
2005
- rcFile: false,
2006
- globalRc: false,
2007
- packageJson: false,
2008
- giget: false
1926
+ function printOutOfScopeFlags(testMeta, logger = console.log) {
1927
+ if (testMeta.case.outOfScopeFlags && testMeta.case.outOfScopeFlags.length > 0) {
1928
+ const pickedFlagsText = testMeta.case.pickedFlags ? `(picked: ${testMeta.case.pickedFlags.map((f2) => `'${f2}'`).join(", ")})` : "(none)";
1929
+ logger(" ", u.yellow(`\u26A0 Out-of-scope flags: ${pickedFlagsText}`));
1930
+ testMeta.case.outOfScopeFlags.forEach((flag) => {
1931
+ const timeStr = new Date(flag.accessedAt).toLocaleTimeString();
1932
+ logger(" ", `${flag.flagPath} (at ${timeStr})`);
1933
+ if (flag.stackTrace && flag.stackTrace.length > 0) {
1934
+ flag.stackTrace.forEach((frame, i) => {
1935
+ const prefix = i === flag.stackTrace.length - 1 ? " \u2514\u2500" : " \u251C\u2500";
1936
+ logger(" ", u.dim(`${prefix} ${frame}`));
1937
+ });
1938
+ }
2009
1939
  });
2010
- const mergedConfig = customMerger(defaults, result.config);
2011
- const validatedConfig = validateConfig(mergedConfig);
2012
- return {
2013
- config: validatedConfig
2014
- };
2015
- } catch (error) {
2016
- if (error instanceof AxiomCLIError) {
2017
- throw error;
2018
- }
2019
- throw new AxiomCLIError(`Failed to load config file: ${errorToString(error)}`);
2020
1940
  }
2021
1941
  }
2022
-
2023
- // src/evals/instrument.ts
2024
- var axiomProvider;
2025
- var axiomTracer;
2026
- var userProvider;
2027
- var initializationPromise = null;
2028
- var initialized = false;
2029
- async function resolveInstrumentationHook(config) {
2030
- if (config.eval.instrumentation) {
2031
- return config.eval.instrumentation;
1942
+ function printCaseResult(test, baselineCasesByFingerprint, matchedIndices, logger = console.log) {
1943
+ const ok = test.ok();
1944
+ const testMeta = test.meta();
1945
+ if (!testMeta?.case) {
1946
+ return;
2032
1947
  }
2033
- try {
2034
- const { config: loadedConfig } = await loadConfig(process.cwd());
2035
- return loadedConfig.eval.instrumentation ?? null;
2036
- } catch (error) {
2037
- throw new AxiomCLIError(
2038
- `Failed to reload instrumentation from config: ${errorToString(error)}`
2039
- );
1948
+ printTestCaseSuccessOrFailed(testMeta, ok, logger);
1949
+ const fingerprint = getCaseFingerprint(testMeta.case.input, testMeta.case.expected);
1950
+ const baselineCases = baselineCasesByFingerprint.get(fingerprint);
1951
+ const baselineCase = baselineCases?.shift();
1952
+ if (baselineCase) {
1953
+ matchedIndices.add(baselineCase.index);
2040
1954
  }
1955
+ printTestCaseScores(testMeta, baselineCase, logger);
1956
+ printRuntimeFlags(testMeta, logger);
1957
+ printOutOfScopeFlags(testMeta, logger);
2041
1958
  }
2042
- async function runInstrumentationHook(hook, options) {
2043
- try {
2044
- return await hook(options);
2045
- } catch (error) {
2046
- throw new AxiomCLIError(`Failed to execute instrumentation hook: ${errorToString(error)}`);
1959
+ function printOrphanedBaselineCases(baseline, matchedIndices, logger = console.log) {
1960
+ const orphanedCases = baseline.cases.filter((c) => !matchedIndices.has(c.index));
1961
+ if (orphanedCases.length === 0) {
1962
+ return;
2047
1963
  }
2048
- }
2049
- function setupEvalProvider(connection) {
2050
- const headers = {
2051
- "X-Axiom-Dataset": connection.dataset,
2052
- ...connection.orgId ? { "X-AXIOM-ORG-ID": connection.orgId } : {}
2053
- };
2054
- if (connection.token) {
2055
- headers.Authorization = `Bearer ${connection.token}`;
1964
+ logger("");
1965
+ logger(" ", u.yellow("Orphaned baseline cases:"));
1966
+ for (const orphanedCase of orphanedCases) {
1967
+ logger(
1968
+ " ",
1969
+ u.dim(
1970
+ `case ${orphanedCase.index}: ${truncate(orphanedCase.input, 50)} (score: ${truncate(
1971
+ JSON.stringify(orphanedCase.scores),
1972
+ 50
1973
+ )})`
1974
+ )
1975
+ );
1976
+ const keys = Object.keys(orphanedCase.scores);
1977
+ if (keys.length > 0) {
1978
+ const maxNameLength = Math.max(...keys.map((k) => k.length));
1979
+ keys.forEach((k) => {
1980
+ const scoreData = orphanedCase.scores[k];
1981
+ const rawScore = formatPercentage(scoreData.value);
1982
+ const paddedName = k.padEnd(maxNameLength);
1983
+ const paddedScore = rawScore.padStart(7);
1984
+ logger(` ${paddedName} ${u.blueBright(paddedScore)}`);
1985
+ });
1986
+ }
2056
1987
  }
2057
- const collectorOptions = {
2058
- url: `${connection.url}/v1/traces`,
2059
- headers,
2060
- concurrencyLimit: 10
2061
- };
2062
- const exporter = new import_exporter_trace_otlp_http.OTLPTraceExporter(collectorOptions);
2063
- const processor = new import_sdk_trace_node.BatchSpanProcessor(exporter, {
2064
- maxQueueSize: 2048,
2065
- maxExportBatchSize: 512,
2066
- scheduledDelayMillis: 5e3,
2067
- exportTimeoutMillis: 3e4
2068
- });
2069
- axiomProvider = new import_sdk_trace_node.NodeTracerProvider({
2070
- resource: (0, import_resources.resourceFromAttributes)({
2071
- ["service.name"]: "axiom",
2072
- ["service.version"]: "0.34.0"
2073
- }),
2074
- spanProcessors: [processor]
2075
- });
2076
- axiomTracer = axiomProvider.getTracer("axiom", "0.34.0");
2077
1988
  }
2078
- async function initInstrumentation(config) {
2079
- if (initialized) {
1989
+ function printGlobalFlagOverrides(overrides2, defaults, logger = console.log) {
1990
+ if (Object.keys(overrides2).length === 0) {
1991
+ logger("");
1992
+ logger(u.dim("Flag overrides: (none)"));
1993
+ logger("");
2080
1994
  return;
2081
1995
  }
2082
- if (initializationPromise) {
2083
- await initializationPromise;
2084
- return;
1996
+ logger("");
1997
+ logger("Flag overrides:");
1998
+ for (const [key, value] of Object.entries(overrides2)) {
1999
+ const defaultValue = defaults[key];
2000
+ const valueStr = JSON.stringify(value);
2001
+ const defaultStr = defaultValue !== void 0 ? JSON.stringify(defaultValue) : "none";
2002
+ logger(` \u2022 ${key}: ${valueStr} ${u.dim(`(default: ${defaultStr})`)}`);
2085
2003
  }
2086
- initializationPromise = (async () => {
2087
- if (!config.enabled) {
2088
- axiomTracer = import_api10.trace.getTracer("axiom", "0.34.0");
2089
- initialized = true;
2090
- return;
2091
- }
2092
- const connection = resolveAxiomConnection(config.config);
2093
- const hook = await resolveInstrumentationHook(config.config);
2094
- let hookResult = void 0;
2095
- if (hook) {
2096
- config.config.eval.instrumentation = hook;
2097
- hookResult = await runInstrumentationHook(hook, {
2098
- dataset: connection.dataset,
2099
- token: connection.token,
2100
- url: connection.url,
2101
- orgId: connection.orgId
2102
- });
2103
- userProvider = hookResult?.provider ?? userProvider;
2004
+ logger("");
2005
+ }
2006
+ function printSuiteBox({
2007
+ suite,
2008
+ scorerAverages,
2009
+ calculateBaselineScorerAverage: calculateBaselineScorerAverage2,
2010
+ flagDiff,
2011
+ logger = console.log
2012
+ }) {
2013
+ const filename = suite.file.split("/").pop();
2014
+ logger("\u250C\u2500");
2015
+ logger(`\u2502 ${u.blue(suite.name)} ${u.gray(`(${filename})`)}`);
2016
+ logger("\u251C\u2500");
2017
+ const scorerNames = Object.keys(scorerAverages);
2018
+ const maxNameLength = Math.max(...scorerNames.map((name) => name.length));
2019
+ const allCasesErrored = (scorerName) => {
2020
+ return suite.cases.every((caseData) => caseData.scores[scorerName]?.metadata?.error);
2021
+ };
2022
+ for (const scorerName of scorerNames) {
2023
+ const avg = scorerAverages[scorerName];
2024
+ const paddedName = scorerName.padEnd(maxNameLength);
2025
+ const hasAllErrors = allCasesErrored(scorerName);
2026
+ const baselineAvg = suite.baseline ? calculateBaselineScorerAverage2(suite.baseline, scorerName) : null;
2027
+ if (baselineAvg !== null) {
2028
+ const currentPercent = hasAllErrors ? u.dim("N/A") : formatPercentage(avg);
2029
+ const baselinePercent = formatPercentage(baselineAvg);
2030
+ const { text: diffText, color: diffColor } = formatDiff(avg, baselineAvg);
2031
+ const paddedBaseline = baselinePercent.padStart(7);
2032
+ const paddedCurrent = hasAllErrors ? currentPercent : currentPercent.padStart(7);
2033
+ const diffDisplay = hasAllErrors ? u.dim("all cases failed") : diffColor(diffText.padStart(8));
2034
+ logger(
2035
+ `\u2502 ${paddedName} ${u.blueBright(paddedBaseline)} \u2192 ${hasAllErrors ? paddedCurrent : u.magentaBright(paddedCurrent)} (${diffDisplay})`
2036
+ );
2037
+ } else {
2038
+ const currentPercent = hasAllErrors ? u.red("N/A (all cases failed)") : formatPercentage(avg);
2039
+ logger(`\u2502 \u2022 ${paddedName} ${currentPercent}`);
2104
2040
  }
2105
- setupEvalProvider(connection);
2106
- if (!hook) {
2107
- axiomProvider?.register();
2108
- if (axiomTracer) {
2109
- initAxiomAI({ tracer: axiomTracer });
2041
+ }
2042
+ logger("\u251C\u2500");
2043
+ if (suite.baseline) {
2044
+ const baselineTimestamp = suite.baseline.runAt ? reporterDate(new Date(suite.baseline.runAt)) : "unknown time";
2045
+ logger(
2046
+ `\u2502 Baseline: ${suite.baseline.name}-${suite.baseline.version} ${u.gray(`(${baselineTimestamp})`)}`
2047
+ );
2048
+ } else {
2049
+ logger(`\u2502 Baseline: ${u.gray("(none)")}`);
2050
+ }
2051
+ const hasConfigChanges = flagDiff.length > 0;
2052
+ logger("\u2502 Config changes:", hasConfigChanges ? "" : u.gray("(none)"));
2053
+ if (hasConfigChanges) {
2054
+ for (const { flag, current, baseline, default: defaultVal } of flagDiff) {
2055
+ logger(`\u2502 \u2022 ${flag}: ${current ?? "<not set>"}`);
2056
+ if (defaultVal !== void 0) {
2057
+ logger(`\u2502 ${u.gray(`default: ${defaultVal}`)}`);
2058
+ }
2059
+ if (suite.baseline) {
2060
+ logger(`\u2502 ${u.gray(`baseline: ${baseline ?? "<not set>"}`)}`);
2110
2061
  }
2111
2062
  }
2112
- initialized = true;
2113
- })();
2114
- try {
2115
- await initializationPromise;
2116
- } finally {
2117
- initializationPromise = null;
2118
2063
  }
2064
+ if (suite.outOfScopeFlags && suite.outOfScopeFlags.length > 0) {
2065
+ const pickedFlagsText = suite.configFlags && suite.configFlags.length > 0 ? suite.configFlags.map((f2) => `'${f2}'`).join(", ") : "none";
2066
+ logger("\u2502");
2067
+ logger(`\u2502 ${u.yellow("\u26A0 Out-of-scope flags")} ${u.gray(`(picked: ${pickedFlagsText})`)}:`);
2068
+ for (const flag of suite.outOfScopeFlags) {
2069
+ const lastStackTraceFrame = flag.stackTrace[0];
2070
+ const lastStackTraceFnName = lastStackTraceFrame.split(" ").shift();
2071
+ const lastStackTraceFile = lastStackTraceFrame.split("/").pop()?.slice(0, -1);
2072
+ logger(
2073
+ `\u2502 \u2022 ${flag.flagPath} ${u.gray(`at ${lastStackTraceFnName} (${lastStackTraceFile})`)}`
2074
+ );
2075
+ }
2076
+ }
2077
+ logger("\u2514\u2500");
2119
2078
  }
2120
- var flush = async () => {
2121
- if (initializationPromise) {
2122
- await initializationPromise;
2079
+ function calculateScorerAverages(suite) {
2080
+ const scorerTotals = {};
2081
+ for (const caseData of suite.cases) {
2082
+ for (const [scorerName, score] of Object.entries(caseData.scores)) {
2083
+ if (!scorerTotals[scorerName]) {
2084
+ scorerTotals[scorerName] = { sum: 0, count: 0 };
2085
+ }
2086
+ if (!score.metadata?.error) {
2087
+ scorerTotals[scorerName].sum += score.score || 0;
2088
+ scorerTotals[scorerName].count += 1;
2089
+ }
2090
+ }
2123
2091
  }
2124
- const tasks = [];
2125
- if (axiomProvider) {
2126
- tasks.push(axiomProvider.forceFlush());
2092
+ const averages = {};
2093
+ for (const [scorerName, totals] of Object.entries(scorerTotals)) {
2094
+ averages[scorerName] = totals.count > 0 ? totals.sum / totals.count : 0;
2127
2095
  }
2128
- const candidateProviders = /* @__PURE__ */ new Set();
2129
- if (userProvider) {
2130
- candidateProviders.add(userProvider);
2096
+ return averages;
2097
+ }
2098
+ function calculateBaselineScorerAverage(baseline, scorerName) {
2099
+ const scores = [];
2100
+ for (const caseData of baseline.cases) {
2101
+ if (caseData.scores[scorerName]) {
2102
+ scores.push(caseData.scores[scorerName].value);
2103
+ }
2131
2104
  }
2132
- const globalProvider = import_api10.trace.getTracerProvider();
2133
- if (globalProvider) {
2134
- candidateProviders.add(globalProvider);
2105
+ if (scores.length === 0) return null;
2106
+ const sum = scores.reduce((acc, val) => acc + val, 0);
2107
+ return sum / scores.length;
2108
+ }
2109
+ function calculateFlagDiff(suite) {
2110
+ if (!suite.configFlags || suite.configFlags.length === 0) {
2111
+ return [];
2135
2112
  }
2136
- for (const provider of candidateProviders) {
2137
- const flushFn = provider.forceFlush;
2138
- if (typeof flushFn === "function") {
2139
- tasks.push(
2140
- flushFn.call(provider).catch((error) => {
2141
- console.warn("[AxiomAI] Failed to flush tracer provider:", errorToString(error));
2142
- })
2143
- );
2113
+ const diffs = [];
2114
+ const currentConfig = suite.flagConfig || {};
2115
+ const baselineConfig = suite.baseline?.flagConfig || {};
2116
+ const defaultConfig = suite.defaultFlagConfig || {};
2117
+ const currentFlat = flattenObject(currentConfig);
2118
+ const baselineFlat = flattenObject(baselineConfig);
2119
+ const defaultFlat = flattenObject(defaultConfig);
2120
+ const allKeys = /* @__PURE__ */ new Set([
2121
+ ...Object.keys(currentFlat),
2122
+ ...Object.keys(baselineFlat),
2123
+ ...Object.keys(defaultFlat)
2124
+ ]);
2125
+ for (const key of allKeys) {
2126
+ const isInScope = suite.configFlags.some((pattern) => key.startsWith(pattern));
2127
+ if (!isInScope) continue;
2128
+ const currentValue = currentFlat[key];
2129
+ const baselineValue = baselineFlat[key];
2130
+ const defaultValue = defaultFlat[key];
2131
+ const currentStr = currentValue !== void 0 ? JSON.stringify(currentValue) : void 0;
2132
+ const baselineStr = baselineValue !== void 0 ? JSON.stringify(baselineValue) : void 0;
2133
+ const defaultStr = defaultValue !== void 0 ? JSON.stringify(defaultValue) : void 0;
2134
+ const diffFromBaseline = suite.baseline && currentStr !== baselineStr;
2135
+ const diffFromDefault = currentStr !== defaultStr;
2136
+ if (diffFromBaseline || diffFromDefault) {
2137
+ diffs.push({
2138
+ flag: key,
2139
+ current: currentStr,
2140
+ baseline: suite.baseline ? baselineStr : void 0,
2141
+ default: defaultStr
2142
+ });
2144
2143
  }
2145
2144
  }
2146
- if (tasks.length > 0) {
2147
- await Promise.all(tasks);
2148
- }
2149
- };
2150
-
2151
- // src/evals/run-vitest.ts
2152
- var printCollectedEvals = (result, rootDir) => {
2153
- if (!result.testModules || result.testModules.length === 0) {
2154
- console.log(u.yellow("\nNo evaluations found\n"));
2155
- return;
2145
+ return diffs;
2146
+ }
2147
+ function printFinalReport({
2148
+ suiteData,
2149
+ config,
2150
+ registrationStatus,
2151
+ isDebug,
2152
+ logger = console.log
2153
+ }) {
2154
+ logger("");
2155
+ logger(u.bgBlue(u.white(" FINAL EVALUATION REPORT ")));
2156
+ logger("");
2157
+ for (const suite of suiteData) {
2158
+ const scorerAverages = calculateScorerAverages(suite);
2159
+ const flagDiff = calculateFlagDiff(suite);
2160
+ printSuiteBox({ suite, scorerAverages, calculateBaselineScorerAverage, flagDiff, logger });
2161
+ logger("");
2156
2162
  }
2157
- console.log(u.bold("\nFound evaluations:\n"));
2158
- let totalEvals = 0;
2159
- let totalCases = 0;
2160
- for (const module2 of result.testModules) {
2161
- const relativePath = import_node_path2.default.relative(rootDir, module2.moduleId);
2162
- for (const suite of module2.children.suites()) {
2163
- totalEvals++;
2164
- const caseCount = suite.children.size;
2165
- totalCases += caseCount;
2166
- console.log(u.green(`\u2713 ${suite.name} (${caseCount} cases)`));
2167
- console.log(u.dim(` ${relativePath}`));
2168
- console.log("");
2163
+ const runId = suiteData[0]?.runId;
2164
+ const orgId = suiteData[0]?.orgId;
2165
+ const anyRegistered = registrationStatus.some((s2) => s2.registered);
2166
+ const anyFailed = registrationStatus.some((s2) => !s2.registered);
2167
+ if (anyRegistered && orgId && config?.consoleEndpointUrl) {
2168
+ if (suiteData.length === 1) {
2169
+ const suite = suiteData[0];
2170
+ const baselineParam = suite.baseline?.traceId ? `?baselineId=${suite.baseline.traceId}` : "";
2171
+ logger("View eval result:");
2172
+ logger(
2173
+ `${config.consoleEndpointUrl}/${orgId}/ai-engineering/evaluations/${suite.name}/${suite.version}${baselineParam}`
2174
+ );
2175
+ } else {
2176
+ logger("View full report:");
2177
+ logger(`${config.consoleEndpointUrl}/${orgId}/ai-engineering/evaluations?runId=${runId}`);
2169
2178
  }
2179
+ } else if (isDebug) {
2180
+ logger(u.dim("Results not uploaded to Axiom (debug mode)"));
2181
+ } else {
2182
+ logger("Results not available in Axiom UI (registration failed)");
2170
2183
  }
2171
- console.log(u.bold(`Total: ${totalEvals} evaluations, ${totalCases} test cases
2172
- `));
2173
- };
2174
- var runVitest = async (dir, opts) => {
2175
- setAxiomConfig(opts.config);
2176
- await initInstrumentation({
2177
- enabled: !opts.debug && !opts.list,
2178
- config: opts.config
2179
- });
2180
- const providedConfig = {
2181
- ...opts.config,
2182
- eval: {
2183
- ...opts.config.eval,
2184
- // These can't be serialized, so we need to remove them
2185
- instrumentation: null,
2186
- flagSchema: null
2187
- }
2188
- };
2189
- if (opts.debug) {
2190
- console.log(u.bgWhite(u.blackBright(" Debug mode enabled ")));
2191
- }
2192
- const tmpDir = (0, import_node_path.join)((0, import_node_os.tmpdir)(), "axiom-eval", opts.runId);
2193
- (0, import_node_fs.mkdirSync)(tmpDir, { recursive: true });
2194
- const nameRegistryFile = (0, import_node_path.join)(tmpDir, "names.jsonl");
2195
- const abortFile = (0, import_node_path.join)(tmpDir, "abort.txt");
2196
- (0, import_node_fs.writeFileSync)(nameRegistryFile, "", "utf8");
2197
- if ((0, import_node_fs.existsSync)(abortFile)) {
2198
- (0, import_node_fs.unlinkSync)(abortFile);
2199
- }
2200
- process.env.AXIOM_NAME_REGISTRY_FILE = nameRegistryFile;
2201
- process.env.AXIOM_ABORT_FILE = abortFile;
2202
- if (opts.list) {
2203
- console.log(u.bgWhite(u.blackBright(" List mode ")));
2204
- }
2205
- const vi = await (0, import_node.createVitest)(
2206
- "test",
2207
- {
2208
- root: dir ? dir : process.cwd(),
2209
- mode: "test",
2210
- include: opts.include,
2211
- exclude: opts.exclude,
2212
- testNamePattern: opts.testNamePattern,
2213
- reporters: ["verbose", new AxiomReporter()],
2214
- environment: "node",
2215
- browser: void 0,
2216
- watch: opts.watch,
2217
- setupFiles: [],
2218
- // ignore user vitest.config.ts etc
2219
- name: "axiom:eval",
2220
- printConsoleTrace: true,
2221
- silent: false,
2222
- disableConsoleIntercept: true,
2223
- testTimeout: opts.config?.eval?.timeoutMs || 6e4,
2224
- globals: true,
2225
- runner: (0, import_node_path.resolve)(__dirname, "evals", "custom-runner.js"),
2226
- provide: {
2227
- baseline: opts.baseline,
2228
- debug: opts.debug,
2229
- list: opts.list,
2230
- overrides: opts.overrides,
2231
- axiomConfig: providedConfig,
2232
- runId: opts.runId,
2233
- consoleUrl: opts.consoleUrl
2184
+ if (anyFailed) {
2185
+ logger("");
2186
+ for (const status of registrationStatus) {
2187
+ if (!status.registered) {
2188
+ logger(u.yellow(`\u26A0\uFE0F Warning: Failed to register "${status.name}" with Axiom`));
2189
+ if (status.error) {
2190
+ logger(u.dim(` Error: ${status.error}`));
2191
+ }
2192
+ logger(u.dim(` Results for this evaluation will not be available in the Axiom UI.`));
2234
2193
  }
2235
- },
2236
- {
2237
- plugins: [(0, import_vite_tsconfig_paths.default)({ root: dir || process.cwd() })]
2238
2194
  }
2239
- );
2240
- if (opts.list) {
2241
- const result = await vi.collect();
2242
- printCollectedEvals(result, dir || process.cwd());
2243
- await vi.close();
2244
- process.exit(0);
2245
- }
2246
- await vi.start();
2247
- if ((0, import_node_fs.existsSync)(abortFile)) {
2248
- const message = (0, import_node_fs.readFileSync)(abortFile, "utf8");
2249
- console.error("\n" + message);
2250
- await vi.close();
2251
- process.exit(1);
2252
2195
  }
2253
- const dispose = (0, import_node.registerConsoleShortcuts)(vi, process.stdin, process.stdout);
2254
- if (!vi.shouldKeepServer()) {
2255
- dispose();
2256
- await flush();
2257
- await vi.close();
2258
- process.exit(0);
2196
+ }
2197
+ var reporterDate;
2198
+ var init_reporter_console_utils = __esm({
2199
+ "src/evals/reporter.console-utils.ts"() {
2200
+ "use strict";
2201
+ init_cjs_shims();
2202
+ init_node();
2203
+ init_dot_path();
2204
+ reporterDate = (d) => {
2205
+ const date = d.toISOString().slice(0, 10);
2206
+ const hours = d.getUTCHours().toString().padStart(2, "0");
2207
+ const minutes = d.getUTCMinutes().toString().padStart(2, "0");
2208
+ return `${date}, ${hours}:${minutes} UTC`;
2209
+ };
2259
2210
  }
2260
- await flush();
2261
- };
2262
-
2263
- // src/cli/commands/eval.command.ts
2264
- var import_node_fs3 = require("fs");
2265
-
2266
- // src/cli/utils/eval-context-runner.ts
2267
- init_cjs_shims();
2211
+ });
2268
2212
 
2269
- // src/context.ts
2270
- init_cjs_shims();
2271
- function overrideFlags(partial) {
2272
- const current = getEvalContext();
2273
- if (!current) {
2274
- if (process.env.NODE_ENV !== "test") {
2275
- console.warn("overrideFlags called outside of evaluation context");
2276
- }
2277
- return;
2278
- }
2279
- const overlayContext = {
2280
- ...current,
2281
- flags: { ...current.flags, ...partial },
2282
- // Merge for backwards compatibility
2283
- parent: current,
2284
- overrides: { ...partial }
2213
+ // src/config/resolver.ts
2214
+ function resolveAxiomConnection(config, consoleUrlOverride) {
2215
+ const consoleEndpointUrl = consoleUrlOverride ?? buildConsoleUrl(config.eval.url);
2216
+ return {
2217
+ url: config.eval.url,
2218
+ consoleEndpointUrl,
2219
+ token: config.eval.token,
2220
+ dataset: config.eval.dataset,
2221
+ orgId: config.eval.orgId
2285
2222
  };
2286
- const currentCtx = EVAL_CONTEXT.get();
2287
- if (currentCtx) {
2288
- Object.assign(currentCtx, overlayContext);
2289
- }
2290
- for (const [key, value] of Object.entries(partial)) {
2291
- putOnSpan("flag", key, value);
2292
- }
2293
- }
2294
-
2295
- // src/cli/utils/eval-context-runner.ts
2296
- async function runEvalWithContext(overrides2, runFn) {
2297
- setGlobalFlagOverrides(overrides2);
2298
- return withEvalContext({ initialFlags: overrides2 }, async () => {
2299
- if (Object.keys(overrides2).length > 0) {
2300
- overrideFlags(overrides2);
2301
- }
2302
- return runFn();
2303
- });
2304
- }
2305
-
2306
- // src/cli/utils/parse-flag-overrides.ts
2307
- init_cjs_shims();
2308
- var import_zod7 = require("zod");
2309
- var import_node_fs2 = require("fs");
2310
- var import_node_path3 = require("path");
2311
- var FLAG_RE = /^--flag\.([^=]+)(?:=(.*))?$/;
2312
- var CONFIG_RE = /^--flags-config(?:=(.*))?$/;
2313
- function ensureNoSpaceSeparatedSyntax(flagName, value, nextToken, flagType) {
2314
- if (value === void 0 && nextToken !== void 0) {
2315
- if (flagType === "flag" && !nextToken.startsWith("-") && nextToken !== "true" && nextToken !== "false") {
2316
- console.error(`\u274C Invalid syntax: --flag.${flagName} ${nextToken}`);
2317
- console.error(`\u{1F4A1} Use: --flag.${flagName}=${nextToken}`);
2318
- process.exit(1);
2319
- } else if (flagType === "config" && !nextToken.startsWith("-")) {
2320
- console.error(`\u274C Invalid syntax: --flags-config ${nextToken}`);
2321
- console.error(`\u{1F4A1} Use: --flags-config=${nextToken}`);
2322
- process.exit(1);
2323
- }
2324
- }
2325
2223
  }
2326
- function collectFlagValidationErrors(overrides2, flagSchema) {
2327
- if (!flagSchema || Object.keys(overrides2).length === 0) {
2328
- return { success: true, errors: [] };
2224
+ var buildConsoleUrl;
2225
+ var init_resolver = __esm({
2226
+ "src/config/resolver.ts"() {
2227
+ "use strict";
2228
+ init_cjs_shims();
2229
+ buildConsoleUrl = (urlString) => {
2230
+ const url = new URL(urlString);
2231
+ return `${url.protocol}//app.${url.host.split("api.").at(-1)}`;
2232
+ };
2329
2233
  }
2330
- assertZodV4(flagSchema, "flagSchema");
2331
- const schema = flagSchema;
2332
- const errors = [];
2333
- for (const dotPath of Object.keys(overrides2)) {
2334
- const segments = parsePath(dotPath);
2335
- if (!isValidPath(schema, segments)) {
2336
- errors.push({ type: "invalid_path", path: dotPath });
2337
- }
2234
+ });
2235
+
2236
+ // src/evals/reporter.ts
2237
+ var AxiomReporter;
2238
+ var init_reporter = __esm({
2239
+ "src/evals/reporter.ts"() {
2240
+ "use strict";
2241
+ init_cjs_shims();
2242
+ init_storage();
2243
+ init_reporter_console_utils();
2244
+ init_resolver();
2245
+ init_eval_command();
2246
+ init_dot_path();
2247
+ AxiomReporter = class {
2248
+ constructor() {
2249
+ __publicField(this, "startTime", 0);
2250
+ __publicField(this, "start", 0);
2251
+ __publicField(this, "_endOfRunConfigEnd");
2252
+ __publicField(this, "_suiteData", []);
2253
+ __publicField(this, "_printedFlagOverrides", false);
2254
+ __publicField(this, "_config");
2255
+ }
2256
+ onTestRunStart() {
2257
+ this.start = performance.now();
2258
+ this.startTime = (/* @__PURE__ */ new Date()).getTime();
2259
+ const config = getAxiomConfig();
2260
+ if (config) {
2261
+ this._config = resolveAxiomConnection(config, getConsoleUrl());
2262
+ }
2263
+ }
2264
+ async onTestSuiteReady(_testSuite) {
2265
+ const meta = _testSuite.meta();
2266
+ if (_testSuite.state() === "skipped" || !meta?.evaluation) {
2267
+ return;
2268
+ }
2269
+ if (!this._printedFlagOverrides) {
2270
+ const defaultsFromConfigEnd = meta.evaluation.configEnd?.flags ?? {};
2271
+ const overridesFromConfigEnd = meta.evaluation.configEnd?.overrides ?? {};
2272
+ if (Object.keys(overridesFromConfigEnd).length > 0) {
2273
+ printGlobalFlagOverrides(overridesFromConfigEnd, defaultsFromConfigEnd);
2274
+ }
2275
+ this._printedFlagOverrides = true;
2276
+ }
2277
+ if (meta.evaluation.configEnd && !this._endOfRunConfigEnd) {
2278
+ this._endOfRunConfigEnd = meta.evaluation.configEnd;
2279
+ }
2280
+ }
2281
+ onTestCaseReady(test) {
2282
+ const meta = test.meta();
2283
+ if (!meta.case) return;
2284
+ }
2285
+ async onTestSuiteResult(testSuite) {
2286
+ const meta = testSuite.meta();
2287
+ if (testSuite.state() === "skipped" || !meta?.evaluation) {
2288
+ return;
2289
+ }
2290
+ if (meta.evaluation.configEnd && !this._endOfRunConfigEnd) {
2291
+ this._endOfRunConfigEnd = meta.evaluation.configEnd;
2292
+ }
2293
+ const durationSeconds = Number((performance.now() - this.start) / 1e3).toFixed(2);
2294
+ const cases = [];
2295
+ for (const test of testSuite.children) {
2296
+ if (test.type !== "test") continue;
2297
+ const testMeta = test.meta();
2298
+ if (!testMeta?.case) continue;
2299
+ cases.push({
2300
+ index: testMeta.case.index,
2301
+ scores: testMeta.case.scores,
2302
+ outOfScopeFlags: testMeta.case.outOfScopeFlags,
2303
+ errors: testMeta.case.errors,
2304
+ runtimeFlags: testMeta.case.runtimeFlags
2305
+ });
2306
+ }
2307
+ const cwd = process.cwd();
2308
+ const relativePath = testSuite.module.moduleId.replace(cwd, "").replace(/^\//, "");
2309
+ let suiteBaseline = meta.evaluation.baseline;
2310
+ let flagConfig = meta.evaluation.flagConfig;
2311
+ if (meta.evaluation.configEnd) {
2312
+ const defaults = meta.evaluation.configEnd.flags ?? {};
2313
+ const overrides2 = meta.evaluation.configEnd.overrides ?? {};
2314
+ const defaultsFlat = flattenObject(defaults);
2315
+ const overridesFlat = flattenObject(overrides2);
2316
+ flagConfig = dotNotationToNested({ ...defaultsFlat, ...overridesFlat });
2317
+ }
2318
+ const defaultFlagConfig = meta.evaluation.configEnd?.flags;
2319
+ this._suiteData.push({
2320
+ version: meta.evaluation.version,
2321
+ name: meta.evaluation.name,
2322
+ file: relativePath,
2323
+ duration: durationSeconds + "s",
2324
+ baseline: suiteBaseline || null,
2325
+ configFlags: meta.evaluation.configFlags,
2326
+ flagConfig,
2327
+ defaultFlagConfig,
2328
+ runId: meta.evaluation.runId,
2329
+ orgId: meta.evaluation.orgId,
2330
+ cases,
2331
+ outOfScopeFlags: meta.evaluation.outOfScopeFlags,
2332
+ registrationStatus: meta.evaluation.registrationStatus
2333
+ });
2334
+ printEvalNameAndFileName(testSuite, meta);
2335
+ printBaselineNameAndVersion(meta);
2336
+ printTestCaseCountStartDuration(testSuite, this.startTime, durationSeconds);
2337
+ const matchedBaselineIndices = /* @__PURE__ */ new Set();
2338
+ const baselineCasesByFingerprint = /* @__PURE__ */ new Map();
2339
+ if (suiteBaseline) {
2340
+ for (const c of suiteBaseline.cases) {
2341
+ const fp = getCaseFingerprint(c.input, c.expected);
2342
+ const cases2 = baselineCasesByFingerprint.get(fp) || [];
2343
+ cases2.push(c);
2344
+ baselineCasesByFingerprint.set(fp, cases2);
2345
+ }
2346
+ }
2347
+ for (const test of testSuite.children) {
2348
+ if (test.type !== "test") continue;
2349
+ printCaseResult(test, baselineCasesByFingerprint, matchedBaselineIndices);
2350
+ }
2351
+ if (suiteBaseline) {
2352
+ printOrphanedBaselineCases(suiteBaseline, matchedBaselineIndices);
2353
+ }
2354
+ console.log("");
2355
+ }
2356
+ async onTestRunEnd(_testModules, _errors, _reason) {
2357
+ const shouldClear = !process.env.CI && process.stdout.isTTY !== false;
2358
+ if (shouldClear) {
2359
+ process.stdout.write("\x1B[2J\x1B[0f");
2360
+ }
2361
+ const registrationStatus = this._suiteData.map((suite) => ({
2362
+ name: suite.name,
2363
+ registered: suite.registrationStatus?.status === "success",
2364
+ error: suite.registrationStatus?.status === "failed" ? suite.registrationStatus.error : void 0
2365
+ }));
2366
+ const isDebug = process.env.AXIOM_DEBUG === "true";
2367
+ printFinalReport({
2368
+ suiteData: this._suiteData,
2369
+ config: this._config,
2370
+ registrationStatus,
2371
+ isDebug
2372
+ });
2373
+ }
2374
+ };
2338
2375
  }
2339
- if (errors.length > 0) {
2340
- return { success: false, errors };
2376
+ });
2377
+
2378
+ // src/evals/instrument.ts
2379
+ async function resolveInstrumentationHook(config) {
2380
+ if (config.eval.instrumentation) {
2381
+ return config.eval.instrumentation;
2341
2382
  }
2342
- const nestedObject = dotNotationToNested(overrides2);
2343
- const deepPartialSchema = makeDeepPartial(schema);
2344
- const result = deepPartialSchema.safeParse(nestedObject);
2345
- if (!result.success) {
2346
- errors.push({ type: "invalid_value", zodError: result.error });
2383
+ try {
2384
+ const { config: loadedConfig } = await loadConfig(process.cwd());
2385
+ return loadedConfig.eval.instrumentation ?? null;
2386
+ } catch (error) {
2387
+ throw new AxiomCLIError(
2388
+ `Failed to reload instrumentation from config: ${errorToString(error)}`
2389
+ );
2347
2390
  }
2348
- return { success: errors.length === 0, errors };
2349
2391
  }
2350
- function printFlagValidationErrorsAndExit(errors) {
2351
- console.error("\u274C Invalid CLI flags:");
2352
- for (const error of errors) {
2353
- if (error.type === "invalid_path") {
2354
- console.error(` \u2022 flag '${error.path}': Invalid flag path`);
2355
- } else {
2356
- console.error(formatZodErrors(error.zodError));
2357
- const examples = generateFlagExamples(error.zodError);
2358
- if (examples.length > 0) {
2359
- console.error("\n\u{1F4A1} Valid examples:");
2360
- examples.forEach((example) => console.error(` ${example}`));
2361
- }
2362
- }
2392
+ async function runInstrumentationHook(hook, options) {
2393
+ try {
2394
+ return await hook(options);
2395
+ } catch (error) {
2396
+ throw new AxiomCLIError(`Failed to execute instrumentation hook: ${errorToString(error)}`);
2363
2397
  }
2364
- process.exit(1);
2365
2398
  }
2366
- function validateFlagOverrides(overrides2, flagSchema) {
2367
- const result = collectFlagValidationErrors(overrides2, flagSchema);
2368
- if (!result.success) {
2369
- printFlagValidationErrorsAndExit(result.errors);
2399
+ function setupEvalProvider(connection) {
2400
+ const headers = {
2401
+ "X-Axiom-Dataset": connection.dataset,
2402
+ ...connection.orgId ? { "X-AXIOM-ORG-ID": connection.orgId } : {}
2403
+ };
2404
+ if (connection.token) {
2405
+ headers.Authorization = `Bearer ${connection.token}`;
2370
2406
  }
2407
+ const collectorOptions = {
2408
+ url: `${connection.url}/v1/traces`,
2409
+ headers,
2410
+ concurrencyLimit: 10
2411
+ };
2412
+ const exporter = new import_exporter_trace_otlp_http.OTLPTraceExporter(collectorOptions);
2413
+ const processor = new import_sdk_trace_node.BatchSpanProcessor(exporter, {
2414
+ maxQueueSize: 2048,
2415
+ maxExportBatchSize: 512,
2416
+ scheduledDelayMillis: 5e3,
2417
+ exportTimeoutMillis: 3e4
2418
+ });
2419
+ axiomProvider = new import_sdk_trace_node.NodeTracerProvider({
2420
+ resource: (0, import_resources.resourceFromAttributes)({
2421
+ ["service.name"]: "axiom",
2422
+ ["service.version"]: "0.34.2"
2423
+ }),
2424
+ spanProcessors: [processor]
2425
+ });
2426
+ axiomTracer = axiomProvider.getTracer("axiom", "0.34.2");
2371
2427
  }
2372
- function coerceValue(raw) {
2373
- if (raw === "true") return true;
2374
- if (raw === "false") return false;
2375
- const num = Number(raw);
2376
- if (!Number.isNaN(num) && raw.trim() === num.toString()) {
2377
- return num;
2428
+ async function initInstrumentation(config) {
2429
+ if (initialized) {
2430
+ return;
2378
2431
  }
2379
- try {
2380
- return JSON.parse(raw);
2381
- } catch {
2382
- return raw;
2432
+ if (initializationPromise) {
2433
+ await initializationPromise;
2434
+ return;
2383
2435
  }
2384
- }
2385
- function loadConfigFile(path3) {
2386
- const abs = (0, import_node_path3.resolve)(process.cwd(), path3);
2387
- try {
2388
- const contents = (0, import_node_fs2.readFileSync)(abs, "utf8");
2389
- const parsed = JSON.parse(contents);
2390
- if (typeof parsed !== "object" || Array.isArray(parsed) || parsed === null) {
2391
- console.error(
2392
- `\u274C Flags config must be a JSON object, got ${Array.isArray(parsed) ? "array" : typeof parsed}`
2393
- );
2394
- process.exit(1);
2436
+ initializationPromise = (async () => {
2437
+ if (!config.enabled) {
2438
+ axiomTracer = import_api10.trace.getTracer("axiom", "0.34.2");
2439
+ initialized = true;
2440
+ return;
2395
2441
  }
2396
- return parsed;
2397
- } catch (err) {
2398
- console.error(`\u274C Could not read or parse flags config "${path3}": ${err.message}`);
2399
- process.exit(1);
2442
+ const connection = resolveAxiomConnection(config.config);
2443
+ const hook = await resolveInstrumentationHook(config.config);
2444
+ let hookResult = void 0;
2445
+ if (hook) {
2446
+ config.config.eval.instrumentation = hook;
2447
+ hookResult = await runInstrumentationHook(hook, {
2448
+ dataset: connection.dataset,
2449
+ token: connection.token,
2450
+ url: connection.url,
2451
+ orgId: connection.orgId
2452
+ });
2453
+ userProvider = hookResult?.provider ?? userProvider;
2454
+ }
2455
+ setupEvalProvider(connection);
2456
+ if (!hook) {
2457
+ axiomProvider?.register();
2458
+ if (axiomTracer) {
2459
+ initAxiomAI({ tracer: axiomTracer });
2460
+ }
2461
+ }
2462
+ initialized = true;
2463
+ })();
2464
+ try {
2465
+ await initializationPromise;
2466
+ } finally {
2467
+ initializationPromise = null;
2400
2468
  }
2401
2469
  }
2402
- function extractOverrides(argv) {
2403
- const cleanedArgv2 = [];
2404
- const overrides2 = {};
2405
- let configPath = null;
2406
- let hasCliFlags = false;
2407
- let configPathCount = 0;
2408
- for (let i = 0; i < argv.length; i++) {
2409
- const token = argv[i];
2410
- const configMatch = token.match(CONFIG_RE);
2411
- const flagMatch = token.match(FLAG_RE);
2412
- if (configMatch) {
2413
- configPathCount++;
2414
- if (configPathCount > 1) {
2415
- console.error("\u274C Only one --flags-config can be supplied.");
2416
- process.exit(1);
2470
+ var import_sdk_trace_node, import_resources, import_exporter_trace_otlp_http, import_api10, axiomProvider, axiomTracer, userProvider, initializationPromise, initialized, flush;
2471
+ var init_instrument = __esm({
2472
+ "src/evals/instrument.ts"() {
2473
+ "use strict";
2474
+ init_cjs_shims();
2475
+ import_sdk_trace_node = require("@opentelemetry/sdk-trace-node");
2476
+ import_resources = require("@opentelemetry/resources");
2477
+ import_exporter_trace_otlp_http = require("@opentelemetry/exporter-trace-otlp-http");
2478
+ import_api10 = require("@opentelemetry/api");
2479
+ init_initAxiomAI();
2480
+ init_resolver();
2481
+ init_errors();
2482
+ init_loader();
2483
+ initializationPromise = null;
2484
+ initialized = false;
2485
+ flush = async () => {
2486
+ if (initializationPromise) {
2487
+ await initializationPromise;
2417
2488
  }
2418
- const value = configMatch[1];
2419
- const nextToken = argv.length > i + 1 ? argv[i + 1] : void 0;
2420
- ensureNoSpaceSeparatedSyntax("flags-config", value, nextToken, "config");
2421
- if (!value) {
2422
- console.error("\u274C --flags-config requires a file path");
2423
- console.error("\u{1F4A1} Use: --flags-config=path/to/config.json");
2424
- process.exit(1);
2489
+ const tasks = [];
2490
+ if (axiomProvider) {
2491
+ tasks.push(axiomProvider.forceFlush());
2425
2492
  }
2426
- configPath = value;
2427
- } else if (flagMatch) {
2428
- hasCliFlags = true;
2429
- const key = flagMatch[1];
2430
- const value = flagMatch[2];
2431
- const nextToken = argv.length > i + 1 ? argv[i + 1] : void 0;
2432
- ensureNoSpaceSeparatedSyntax(key, value, nextToken, "flag");
2433
- const finalValue = value === void 0 ? "true" : value;
2434
- overrides2[key] = coerceValue(finalValue);
2435
- } else {
2436
- cleanedArgv2.push(token);
2437
- }
2438
- }
2439
- if (configPath && hasCliFlags) {
2440
- console.error("\u274C Cannot use both --flags-config and --flag.* arguments together.");
2441
- console.error("Choose one approach:");
2442
- console.error(" \u2022 Config file: --flags-config=my-flags.json");
2443
- console.error(" \u2022 CLI flags: --flag.temperature=0.9 --flag.model=gpt-4o");
2444
- process.exit(1);
2445
- }
2446
- if (configPath) {
2447
- const configOverrides = loadConfigFile(configPath);
2448
- return { cleanedArgv: cleanedArgv2, overrides: configOverrides };
2493
+ const candidateProviders = /* @__PURE__ */ new Set();
2494
+ if (userProvider) {
2495
+ candidateProviders.add(userProvider);
2496
+ }
2497
+ const globalProvider = import_api10.trace.getTracerProvider();
2498
+ if (globalProvider) {
2499
+ candidateProviders.add(globalProvider);
2500
+ }
2501
+ for (const provider of candidateProviders) {
2502
+ const flushFn = provider.forceFlush;
2503
+ if (typeof flushFn === "function") {
2504
+ tasks.push(
2505
+ flushFn.call(provider).catch((error) => {
2506
+ console.warn("[AxiomAI] Failed to flush tracer provider:", errorToString(error));
2507
+ })
2508
+ );
2509
+ }
2510
+ }
2511
+ if (tasks.length > 0) {
2512
+ await Promise.all(tasks);
2513
+ }
2514
+ };
2449
2515
  }
2450
- return { cleanedArgv: cleanedArgv2, overrides: overrides2 };
2451
- }
2516
+ });
2452
2517
 
2453
- // src/cli/utils/glob-utils.ts
2454
- init_cjs_shims();
2455
- function isGlob(str) {
2456
- return /[*?[\]{}!]/.test(str);
2457
- }
2518
+ // src/evals/run-vitest.ts
2519
+ var run_vitest_exports = {};
2520
+ __export(run_vitest_exports, {
2521
+ runVitest: () => runVitest
2522
+ });
2523
+ var import_node_path2, import_node_fs2, import_node_os, import_node_path3, import_vite_tsconfig_paths, import_node, printCollectedEvals, runVitest;
2524
+ var init_run_vitest = __esm({
2525
+ "src/evals/run-vitest.ts"() {
2526
+ "use strict";
2527
+ init_cjs_shims();
2528
+ init_node();
2529
+ import_node_path2 = require("path");
2530
+ import_node_fs2 = require("fs");
2531
+ import_node_os = require("os");
2532
+ import_node_path3 = __toESM(require("path"), 1);
2533
+ import_vite_tsconfig_paths = __toESM(require("vite-tsconfig-paths"), 1);
2534
+ import_node = require("vitest/node");
2535
+ init_reporter();
2536
+ init_instrument();
2537
+ init_storage();
2538
+ printCollectedEvals = (result, rootDir) => {
2539
+ if (!result.testModules || result.testModules.length === 0) {
2540
+ console.log(u.yellow("\nNo evaluations found\n"));
2541
+ return;
2542
+ }
2543
+ console.log(u.bold("\nFound evaluations:\n"));
2544
+ let totalEvals = 0;
2545
+ let totalCases = 0;
2546
+ for (const module2 of result.testModules) {
2547
+ const relativePath = import_node_path3.default.relative(rootDir, module2.moduleId);
2548
+ for (const suite of module2.children.suites()) {
2549
+ totalEvals++;
2550
+ const caseCount = suite.children.size;
2551
+ totalCases += caseCount;
2552
+ console.log(u.green(`\u2713 ${suite.name} (${caseCount} cases)`));
2553
+ console.log(u.dim(` ${relativePath}`));
2554
+ console.log("");
2555
+ }
2556
+ }
2557
+ console.log(u.bold(`Total: ${totalEvals} evaluations, ${totalCases} test cases
2558
+ `));
2559
+ };
2560
+ runVitest = async (dir, opts) => {
2561
+ setAxiomConfig(opts.config);
2562
+ await initInstrumentation({
2563
+ enabled: !opts.debug && !opts.list,
2564
+ config: opts.config
2565
+ });
2566
+ const providedConfig = {
2567
+ ...opts.config,
2568
+ eval: {
2569
+ ...opts.config.eval,
2570
+ // These can't be serialized, so we need to remove them
2571
+ instrumentation: null,
2572
+ flagSchema: null
2573
+ }
2574
+ };
2575
+ if (opts.debug) {
2576
+ console.log(u.bgWhite(u.blackBright(" Debug mode enabled ")));
2577
+ }
2578
+ const tmpDir = (0, import_node_path2.join)((0, import_node_os.tmpdir)(), "axiom-eval", opts.runId);
2579
+ (0, import_node_fs2.mkdirSync)(tmpDir, { recursive: true });
2580
+ const nameRegistryFile = (0, import_node_path2.join)(tmpDir, "names.jsonl");
2581
+ const abortFile = (0, import_node_path2.join)(tmpDir, "abort.txt");
2582
+ (0, import_node_fs2.writeFileSync)(nameRegistryFile, "", "utf8");
2583
+ if ((0, import_node_fs2.existsSync)(abortFile)) {
2584
+ (0, import_node_fs2.unlinkSync)(abortFile);
2585
+ }
2586
+ process.env.AXIOM_NAME_REGISTRY_FILE = nameRegistryFile;
2587
+ process.env.AXIOM_ABORT_FILE = abortFile;
2588
+ if (opts.list) {
2589
+ console.log(u.bgWhite(u.blackBright(" List mode ")));
2590
+ }
2591
+ const vi = await (0, import_node.createVitest)(
2592
+ "test",
2593
+ {
2594
+ root: dir ? dir : process.cwd(),
2595
+ mode: "test",
2596
+ include: opts.include,
2597
+ exclude: opts.exclude,
2598
+ testNamePattern: opts.testNamePattern,
2599
+ reporters: ["verbose", new AxiomReporter()],
2600
+ environment: "node",
2601
+ browser: void 0,
2602
+ watch: opts.watch,
2603
+ setupFiles: [],
2604
+ // ignore user vitest.config.ts etc
2605
+ name: "axiom:eval",
2606
+ printConsoleTrace: true,
2607
+ silent: false,
2608
+ disableConsoleIntercept: true,
2609
+ testTimeout: opts.config?.eval?.timeoutMs || 6e4,
2610
+ globals: true,
2611
+ runner: (0, import_node_path2.resolve)(__dirname, "evals", "custom-runner.js"),
2612
+ provide: {
2613
+ baseline: opts.baseline,
2614
+ debug: opts.debug,
2615
+ list: opts.list,
2616
+ overrides: opts.overrides,
2617
+ axiomConfig: providedConfig,
2618
+ runId: opts.runId,
2619
+ consoleUrl: opts.consoleUrl
2620
+ }
2621
+ },
2622
+ {
2623
+ plugins: [(0, import_vite_tsconfig_paths.default)({ root: dir || process.cwd() })]
2624
+ }
2625
+ );
2626
+ if (opts.list) {
2627
+ const result = await vi.collect();
2628
+ printCollectedEvals(result, dir || process.cwd());
2629
+ await vi.close();
2630
+ process.exit(0);
2631
+ }
2632
+ await vi.start();
2633
+ if ((0, import_node_fs2.existsSync)(abortFile)) {
2634
+ const message = (0, import_node_fs2.readFileSync)(abortFile, "utf8");
2635
+ console.error("\n" + message);
2636
+ await vi.close();
2637
+ process.exit(1);
2638
+ }
2639
+ const dispose = (0, import_node.registerConsoleShortcuts)(vi, process.stdin, process.stdout);
2640
+ if (!vi.shouldKeepServer()) {
2641
+ dispose();
2642
+ await flush();
2643
+ await vi.close();
2644
+ process.exit(0);
2645
+ }
2646
+ await flush();
2647
+ };
2648
+ }
2649
+ });
2458
2650
 
2459
2651
  // src/cli/commands/eval.command.ts
2460
- var createRunId = (0, import_nanoid.customAlphabet)("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ", 10);
2461
- var consoleUrl;
2462
2652
  function getConsoleUrl() {
2463
2653
  return consoleUrl;
2464
2654
  }
@@ -2483,93 +2673,136 @@ function getDefaultOrgId(value) {
2483
2673
  const authContext2 = getAuthContext();
2484
2674
  return authContext2?.orgId ?? process.env.AXIOM_ORG_ID;
2485
2675
  }
2486
- var loadEvalCommand = (program2, flagOverrides = {}) => {
2487
- return program2.addCommand(
2488
- new import_commander.Command("eval").description("run evals locally").addArgument(
2489
- new import_commander.Argument("[target]", "file, directory, glob pattern, or eval name").default(
2490
- ".",
2491
- "any *.eval.ts file in current directory"
2492
- )
2493
- ).option("-w, --watch true", "keep server running and watch for changes", false).option("-t, --token <TOKEN>", "axiom token", getDefaultToken).option("-d, --dataset <DATASET>", "axiom dataset name", process.env.AXIOM_DATASET).option("-u, --url <AXIOM URL>", "axiom url", getDefaultUrl).option("-o, --org-id <ORG ID>", "axiom organization id", getDefaultOrgId).option("-b, --baseline <BASELINE ID>", "id of baseline evaluation to compare against").option("--debug", "run locally without sending to Axiom or loading baselines", false).option("--list", "list evaluations and test cases without running them", false).addOption(new import_commander.Option("-c, --console-url <URL>", "console url override").hideHelp()).action(async (target, options) => {
2494
- try {
2495
- if (options.debug) {
2496
- process.env.AXIOM_DEBUG = "true";
2497
- }
2498
- let include = [];
2499
- let exclude;
2500
- let testNamePattern;
2501
- const isGlobPattern = isGlob(target);
2502
- const { config: loadedConfig } = await loadConfig(".");
2503
- validateFlagOverrides(flagOverrides, loadedConfig.eval.flagSchema);
2504
- const config = {
2505
- ...loadedConfig,
2506
- eval: {
2507
- ...loadedConfig.eval,
2508
- ...options.token && { token: options.token },
2509
- ...options.url && { url: options.url },
2510
- ...options.dataset && { dataset: options.dataset },
2511
- ...options.orgId && { orgId: options.orgId }
2512
- }
2513
- };
2514
- if (isGlobPattern) {
2515
- include = [target];
2516
- } else {
2676
+ var import_commander, import_nanoid, import_node_fs3, createRunId, consoleUrl, loadEvalCommand;
2677
+ var init_eval_command = __esm({
2678
+ "src/cli/commands/eval.command.ts"() {
2679
+ "use strict";
2680
+ init_cjs_shims();
2681
+ import_commander = require("commander");
2682
+ import_nanoid = require("nanoid");
2683
+ import_node_fs3 = require("fs");
2684
+ init_eval_context_runner();
2685
+ init_parse_flag_overrides();
2686
+ init_glob_utils();
2687
+ init_loader();
2688
+ init_errors();
2689
+ init_global_auth();
2690
+ init_node();
2691
+ createRunId = (0, import_nanoid.customAlphabet)("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ", 10);
2692
+ loadEvalCommand = (program2, flagOverrides = {}) => {
2693
+ return program2.addCommand(
2694
+ new import_commander.Command("eval").description("run evals locally").addArgument(
2695
+ new import_commander.Argument("[target]", "file, directory, glob pattern, or eval name").default(
2696
+ ".",
2697
+ "any *.eval.ts file in current directory"
2698
+ )
2699
+ ).option("-w, --watch true", "keep server running and watch for changes", false).option("-t, --token <TOKEN>", "axiom token", getDefaultToken).option("-d, --dataset <DATASET>", "axiom dataset name", process.env.AXIOM_DATASET).option("-u, --url <AXIOM URL>", "axiom url", getDefaultUrl).option("-o, --org-id <ORG ID>", "axiom organization id", getDefaultOrgId).option("-b, --baseline <BASELINE ID>", "id of baseline evaluation to compare against").option("--debug", "run locally without any network operations", false).option("--list", "list evaluations and test cases without running them", false).addOption(new import_commander.Option("-c, --console-url <URL>", "console url override").hideHelp()).action(async (target, options) => {
2517
2700
  try {
2518
- const stat = (0, import_node_fs3.lstatSync)(target);
2519
- if (stat.isDirectory()) {
2520
- include = config?.eval?.include || [];
2521
- } else {
2701
+ if (options.debug) {
2702
+ process.env.AXIOM_DEBUG = "true";
2703
+ }
2704
+ let include = [];
2705
+ let exclude;
2706
+ let testNamePattern;
2707
+ const isGlobPattern = isGlob(target);
2708
+ const { config: loadedConfig } = await loadConfig(".");
2709
+ validateFlagOverrides(flagOverrides, loadedConfig.eval.flagSchema);
2710
+ const config = {
2711
+ ...loadedConfig,
2712
+ eval: {
2713
+ ...loadedConfig.eval,
2714
+ ...options.token && { token: options.token },
2715
+ ...options.url && { url: options.url },
2716
+ ...options.dataset && { dataset: options.dataset },
2717
+ ...options.orgId && { orgId: options.orgId }
2718
+ }
2719
+ };
2720
+ if (isGlobPattern) {
2522
2721
  include = [target];
2722
+ } else {
2723
+ try {
2724
+ const stat = (0, import_node_fs3.lstatSync)(target);
2725
+ if (stat.isDirectory()) {
2726
+ include = config?.eval?.include || [];
2727
+ } else {
2728
+ include = [target];
2729
+ }
2730
+ } catch {
2731
+ testNamePattern = new RegExp(target, "i");
2732
+ include = config?.eval?.include || [];
2733
+ }
2523
2734
  }
2524
- } catch {
2525
- testNamePattern = new RegExp(target, "i");
2526
- include = config?.eval?.include || [];
2527
- }
2528
- }
2529
- exclude = config?.eval?.exclude;
2530
- if (!config?.eval?.instrumentation) {
2531
- console.warn(
2532
- u.yellow(
2533
- "\u26A0 App instrumentation (`eval.instrumentation` in `axiom.config.ts`) not configured. Using default provider."
2534
- )
2535
- );
2536
- console.log("");
2537
- }
2538
- const runId = createRunId();
2539
- consoleUrl = options.consoleUrl;
2540
- await runEvalWithContext(flagOverrides, async () => {
2541
- return runVitest(".", {
2542
- watch: options.watch,
2543
- baseline: options.baseline,
2544
- include,
2545
- exclude,
2546
- testNamePattern,
2547
- debug: options.debug,
2548
- list: options.list,
2549
- overrides: flagOverrides,
2550
- config,
2551
- runId,
2552
- consoleUrl: options.consoleUrl
2553
- });
2554
- });
2555
- } catch (error) {
2556
- if (error instanceof AxiomCLIError) {
2557
- console.error(`
2735
+ exclude = config?.eval?.exclude;
2736
+ if (!config?.eval?.instrumentation) {
2737
+ console.warn(
2738
+ u.yellow(
2739
+ "\u26A0 App instrumentation (`eval.instrumentation` in `axiom.config.ts`) not configured. Using default provider."
2740
+ )
2741
+ );
2742
+ console.log("");
2743
+ }
2744
+ const runId = createRunId();
2745
+ consoleUrl = options.consoleUrl;
2746
+ let runVitestModule;
2747
+ try {
2748
+ runVitestModule = await Promise.resolve().then(() => (init_run_vitest(), run_vitest_exports));
2749
+ } catch (err) {
2750
+ if (err && typeof err === "object" && "code" in err && (err.code === "ERR_MODULE_NOT_FOUND" || err.code === "MODULE_NOT_FOUND")) {
2751
+ throw new AxiomCLIError("Failed to load vitest.");
2752
+ }
2753
+ throw err;
2754
+ }
2755
+ const { runVitest: runVitest2 } = runVitestModule;
2756
+ await runEvalWithContext(flagOverrides, async () => {
2757
+ return runVitest2(".", {
2758
+ watch: options.watch,
2759
+ baseline: options.baseline,
2760
+ include,
2761
+ exclude,
2762
+ testNamePattern,
2763
+ debug: options.debug,
2764
+ list: options.list,
2765
+ overrides: flagOverrides,
2766
+ config,
2767
+ runId,
2768
+ consoleUrl: options.consoleUrl
2769
+ });
2770
+ });
2771
+ } catch (error) {
2772
+ if (error instanceof AxiomCLIError) {
2773
+ console.error(`
2558
2774
  \u274C ${error.message}
2559
2775
  `);
2560
- process.exit(1);
2561
- }
2562
- throw error;
2563
- }
2564
- })
2565
- );
2566
- };
2776
+ process.exit(1);
2777
+ }
2778
+ throw error;
2779
+ }
2780
+ })
2781
+ );
2782
+ };
2783
+ }
2784
+ });
2785
+
2786
+ // src/bin.ts
2787
+ var bin_exports = {};
2788
+ __export(bin_exports, {
2789
+ program: () => program
2790
+ });
2791
+ module.exports = __toCommonJS(bin_exports);
2792
+ init_cjs_shims();
2793
+ var import_commander3 = require("commander");
2794
+ init_eval_command();
2567
2795
 
2568
2796
  // src/cli/commands/auth.command.ts
2569
2797
  init_cjs_shims();
2570
2798
 
2571
2799
  // src/cli/commands/auth-login.command.ts
2572
2800
  init_cjs_shims();
2801
+ init_oauth();
2802
+ init_callback_server();
2803
+ init_config();
2804
+ init_api();
2805
+ init_errors();
2573
2806
  var BASE_HOSTNAME = "axiom.co";
2574
2807
  var getApiUrl = (hostname) => {
2575
2808
  return `https://api.${hostname}`;
@@ -2722,6 +2955,8 @@ function loadAuthLoginCommand(auth, root) {
2722
2955
 
2723
2956
  // src/cli/commands/auth-logout.command.ts
2724
2957
  init_cjs_shims();
2958
+ init_config();
2959
+ init_errors();
2725
2960
  async function logoutCommand(alias) {
2726
2961
  const config = await loadGlobalConfig();
2727
2962
  const profileToRemove = alias || config.active_profile;
@@ -2767,6 +3002,9 @@ function loadAuthLogoutCommand(auth, root) {
2767
3002
 
2768
3003
  // src/cli/commands/auth-status.command.ts
2769
3004
  init_cjs_shims();
3005
+ init_config();
3006
+ init_api();
3007
+ init_errors();
2770
3008
  async function statusCommand() {
2771
3009
  const config = await loadGlobalConfig();
2772
3010
  if (Object.keys(config.profiles).length === 0) {
@@ -2832,6 +3070,8 @@ function loadAuthStatusCommand(auth, program2) {
2832
3070
 
2833
3071
  // src/cli/commands/auth-switch.command.ts
2834
3072
  init_cjs_shims();
3073
+ init_config();
3074
+ init_errors();
2835
3075
  async function promptSelect2(message, choices) {
2836
3076
  console.log(`
2837
3077
  ${message}`);
@@ -2927,6 +3167,8 @@ function loadAuthCommand(program2) {
2927
3167
  }
2928
3168
 
2929
3169
  // src/bin.ts
3170
+ init_parse_flag_overrides();
3171
+ init_global_auth();
2930
3172
  var import_env = __toESM(require("@next/env"), 1);
2931
3173
 
2932
3174
  // src/cli/commands/version.command.ts
@@ -2935,7 +3177,7 @@ var import_commander2 = require("commander");
2935
3177
  var loadVersionCommand = (program2) => {
2936
3178
  return program2.addCommand(
2937
3179
  new import_commander2.Command("version").description("cli version").action(() => {
2938
- console.log("0.34.0");
3180
+ console.log("0.34.2");
2939
3181
  })
2940
3182
  );
2941
3183
  };
@@ -2945,7 +3187,7 @@ var { loadEnvConfig } = import_env.default;
2945
3187
  loadEnvConfig(process.cwd());
2946
3188
  var { cleanedArgv, overrides } = extractOverrides(process.argv.slice(2));
2947
3189
  var program = new import_commander3.Command();
2948
- program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.34.0");
3190
+ program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.34.2");
2949
3191
  program.hook("preAction", async (_, actionCommand) => {
2950
3192
  const commandName = actionCommand.name();
2951
3193
  const parentCommand = actionCommand.parent;