axiom 0.34.1 → 0.35.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/bin.cjs CHANGED
@@ -43,137 +43,13 @@ var init_cjs_shims = __esm({
43
43
  }
44
44
  });
45
45
 
46
- // src/bin.ts
47
- var bin_exports = {};
48
- __export(bin_exports, {
49
- program: () => program
50
- });
51
- module.exports = __toCommonJS(bin_exports);
52
- init_cjs_shims();
53
- var import_commander3 = require("commander");
54
-
55
- // src/cli/commands/eval.command.ts
56
- init_cjs_shims();
57
- var import_commander = require("commander");
58
- var import_nanoid = require("nanoid");
59
-
60
- // src/evals/run-vitest.ts
61
- init_cjs_shims();
62
-
63
- // ../../node_modules/.pnpm/tinyrainbow@2.0.0/node_modules/tinyrainbow/dist/node.js
64
- init_cjs_shims();
65
-
66
- // ../../node_modules/.pnpm/tinyrainbow@2.0.0/node_modules/tinyrainbow/dist/chunk-BVHSVHOK.js
67
- init_cjs_shims();
68
- var f = {
69
- reset: [0, 0],
70
- bold: [1, 22, "\x1B[22m\x1B[1m"],
71
- dim: [2, 22, "\x1B[22m\x1B[2m"],
72
- italic: [3, 23],
73
- underline: [4, 24],
74
- inverse: [7, 27],
75
- hidden: [8, 28],
76
- strikethrough: [9, 29],
77
- black: [30, 39],
78
- red: [31, 39],
79
- green: [32, 39],
80
- yellow: [33, 39],
81
- blue: [34, 39],
82
- magenta: [35, 39],
83
- cyan: [36, 39],
84
- white: [37, 39],
85
- gray: [90, 39],
86
- bgBlack: [40, 49],
87
- bgRed: [41, 49],
88
- bgGreen: [42, 49],
89
- bgYellow: [43, 49],
90
- bgBlue: [44, 49],
91
- bgMagenta: [45, 49],
92
- bgCyan: [46, 49],
93
- bgWhite: [47, 49],
94
- blackBright: [90, 39],
95
- redBright: [91, 39],
96
- greenBright: [92, 39],
97
- yellowBright: [93, 39],
98
- blueBright: [94, 39],
99
- magentaBright: [95, 39],
100
- cyanBright: [96, 39],
101
- whiteBright: [97, 39],
102
- bgBlackBright: [100, 49],
103
- bgRedBright: [101, 49],
104
- bgGreenBright: [102, 49],
105
- bgYellowBright: [103, 49],
106
- bgBlueBright: [104, 49],
107
- bgMagentaBright: [105, 49],
108
- bgCyanBright: [106, 49],
109
- bgWhiteBright: [107, 49]
110
- };
111
- var h = Object.entries(f);
112
- function a(n) {
113
- return String(n);
114
- }
115
- a.open = "";
116
- a.close = "";
117
- function C(n = false) {
118
- let e = typeof process != "undefined" ? process : void 0, i = (e == null ? void 0 : e.env) || {}, g = (e == null ? void 0 : e.argv) || [];
119
- return !("NO_COLOR" in i || g.includes("--no-color")) && ("FORCE_COLOR" in i || g.includes("--color") || (e == null ? void 0 : e.platform) === "win32" || n && i.TERM !== "dumb" || "CI" in i) || typeof window != "undefined" && !!window.chrome;
120
- }
121
- function p(n = false) {
122
- let e = C(n), i = (r2, t, c, o) => {
123
- let l = "", s2 = 0;
124
- do
125
- l += r2.substring(s2, o) + c, s2 = o + t.length, o = r2.indexOf(t, s2);
126
- while (~o);
127
- return l + r2.substring(s2);
128
- }, g = (r2, t, c = r2) => {
129
- let o = (l) => {
130
- let s2 = String(l), b = s2.indexOf(t, r2.length);
131
- return ~b ? r2 + i(s2, t, c, b) + t : r2 + s2 + t;
132
- };
133
- return o.open = r2, o.close = t, o;
134
- }, u2 = {
135
- isColorSupported: e
136
- }, d = (r2) => `\x1B[${r2}m`;
137
- for (let [r2, t] of h)
138
- u2[r2] = e ? g(
139
- d(t[0]),
140
- d(t[1]),
141
- t[2]
142
- ) : a;
143
- return u2;
144
- }
145
-
146
- // ../../node_modules/.pnpm/tinyrainbow@2.0.0/node_modules/tinyrainbow/dist/node.js
147
- var import_tty = require("tty");
148
- var r = process.env.FORCE_TTY !== void 0 || (0, import_tty.isatty)(1);
149
- var u = p(r);
150
-
151
- // src/evals/run-vitest.ts
152
- var import_node_path = require("path");
153
- var import_node_fs = require("fs");
154
- var import_node_os = require("os");
155
- var import_node_path2 = __toESM(require("path"), 1);
156
- var import_vite_tsconfig_paths = __toESM(require("vite-tsconfig-paths"), 1);
157
- var import_node = require("vitest/node");
158
-
159
- // src/evals/reporter.ts
160
- init_cjs_shims();
161
-
162
- // src/evals/context/storage.ts
163
- init_cjs_shims();
164
- var import_api9 = require("@opentelemetry/api");
165
-
166
46
  // src/evals/context/manager.ts
167
- init_cjs_shims();
168
- var import_node_module = require("module");
169
- var CONTEXT_MANAGER_SYMBOL = Symbol.for("axiom.context_manager");
170
47
  function getGlobalContextManager() {
171
48
  return globalThis[CONTEXT_MANAGER_SYMBOL];
172
49
  }
173
50
  function setGlobalContextManager(manager) {
174
51
  globalThis[CONTEXT_MANAGER_SYMBOL] = manager;
175
52
  }
176
- var isNodeJS = typeof process !== "undefined" && !!process.versions?.node;
177
53
  function getContextManager() {
178
54
  const existing = getGlobalContextManager();
179
55
  if (existing) return existing;
@@ -229,27 +105,34 @@ function createAsyncHook(_name) {
229
105
  }
230
106
  };
231
107
  }
232
-
233
- // src/app-scope.ts
234
- init_cjs_shims();
108
+ var import_node_module, CONTEXT_MANAGER_SYMBOL, isNodeJS;
109
+ var init_manager = __esm({
110
+ "src/evals/context/manager.ts"() {
111
+ "use strict";
112
+ init_cjs_shims();
113
+ import_node_module = require("module");
114
+ CONTEXT_MANAGER_SYMBOL = Symbol.for("axiom.context_manager");
115
+ isNodeJS = typeof process !== "undefined" && !!process.versions?.node;
116
+ }
117
+ });
235
118
 
236
119
  // src/evals/context/global-flags.ts
237
- init_cjs_shims();
238
- var GLOBAL_OVERRIDES_SYMBOL = Symbol.for("axiom.global_flag_overrides");
239
120
  function setRoot(val) {
240
121
  globalThis[GLOBAL_OVERRIDES_SYMBOL] = val;
241
122
  }
242
123
  function setGlobalFlagOverrides(overrides2) {
243
124
  setRoot(overrides2);
244
125
  }
245
-
246
- // src/validate-flags.ts
247
- init_cjs_shims();
248
- var import_zod4 = require("zod");
126
+ var GLOBAL_OVERRIDES_SYMBOL;
127
+ var init_global_flags = __esm({
128
+ "src/evals/context/global-flags.ts"() {
129
+ "use strict";
130
+ init_cjs_shims();
131
+ GLOBAL_OVERRIDES_SYMBOL = Symbol.for("axiom.global_flag_overrides");
132
+ }
133
+ });
249
134
 
250
135
  // src/cli/utils/format-zod-errors.ts
251
- init_cjs_shims();
252
- var import_zod = require("zod");
253
136
  function formatZodErrors(error) {
254
137
  const issues = error.issues;
255
138
  const messages = [];
@@ -345,13 +228,16 @@ function generateExampleForIssue(issue, path3) {
345
228
  }
346
229
  return null;
347
230
  }
348
-
349
- // src/util/dot-path.ts
350
- init_cjs_shims();
351
- var import_zod2 = require("zod");
231
+ var import_zod;
232
+ var init_format_zod_errors = __esm({
233
+ "src/cli/utils/format-zod-errors.ts"() {
234
+ "use strict";
235
+ init_cjs_shims();
236
+ import_zod = require("zod");
237
+ }
238
+ });
352
239
 
353
240
  // src/util/zod-internals.ts
354
- init_cjs_shims();
355
241
  function isZodV4Schema(schema) {
356
242
  if (!schema || typeof schema !== "object") return false;
357
243
  const s2 = schema;
@@ -381,20 +267,6 @@ function getDefRawType(def) {
381
267
  if (raw == null) return void 0;
382
268
  return typeof raw === "string" ? raw : String(raw);
383
269
  }
384
- var KNOWN_KINDS = /* @__PURE__ */ new Set([
385
- "object",
386
- "optional",
387
- "default",
388
- "nullable",
389
- "readonly",
390
- "prefault",
391
- "nonoptional",
392
- "catch",
393
- "array",
394
- "record",
395
- "union",
396
- "discriminatedunion"
397
- ]);
398
270
  function getKind(schemaOrDef) {
399
271
  const def = schemaOrDef && typeof schemaOrDef === "object" && "type" in schemaOrDef ? schemaOrDef : getDef(schemaOrDef);
400
272
  const raw = getDefRawType(def);
@@ -429,16 +301,6 @@ function getDefaultValue(schema) {
429
301
  const def = getDef(schema);
430
302
  return def?.defaultValue;
431
303
  }
432
- var TRANSPARENT_WRAPPERS = [
433
- "optional",
434
- "nullable",
435
- "default",
436
- "readonly",
437
- "prefault",
438
- "nonoptional",
439
- "catch"
440
- // transparent for schema structure, but alters error behavior
441
- ];
442
304
  function unwrapTransparent(schema) {
443
305
  let current = schema;
444
306
  for (let i = 0; i < 10; i++) {
@@ -454,6 +316,37 @@ function unwrapTransparent(schema) {
454
316
  }
455
317
  return current;
456
318
  }
319
+ var KNOWN_KINDS, TRANSPARENT_WRAPPERS;
320
+ var init_zod_internals = __esm({
321
+ "src/util/zod-internals.ts"() {
322
+ "use strict";
323
+ init_cjs_shims();
324
+ KNOWN_KINDS = /* @__PURE__ */ new Set([
325
+ "object",
326
+ "optional",
327
+ "default",
328
+ "nullable",
329
+ "readonly",
330
+ "prefault",
331
+ "nonoptional",
332
+ "catch",
333
+ "array",
334
+ "record",
335
+ "union",
336
+ "discriminatedunion"
337
+ ]);
338
+ TRANSPARENT_WRAPPERS = [
339
+ "optional",
340
+ "nullable",
341
+ "default",
342
+ "readonly",
343
+ "prefault",
344
+ "nonoptional",
345
+ "catch"
346
+ // transparent for schema structure, but alters error behavior
347
+ ];
348
+ }
349
+ });
457
350
 
458
351
  // src/util/dot-path.ts
459
352
  function parsePath(path3) {
@@ -509,10 +402,17 @@ function isValidPath(schema, segments) {
509
402
  }
510
403
  return true;
511
404
  }
405
+ var import_zod2;
406
+ var init_dot_path = __esm({
407
+ "src/util/dot-path.ts"() {
408
+ "use strict";
409
+ init_cjs_shims();
410
+ import_zod2 = require("zod");
411
+ init_zod_internals();
412
+ }
413
+ });
512
414
 
513
415
  // src/util/deep-partial-schema.ts
514
- init_cjs_shims();
515
- var import_zod3 = require("zod");
516
416
  function makeDeepPartial(schema) {
517
417
  const shape = schema.shape;
518
418
  const newShape = {};
@@ -562,180 +462,246 @@ function makeDeepPartialField(fieldSchema) {
562
462
  }
563
463
  return fieldSchema.optional();
564
464
  }
465
+ var import_zod3;
466
+ var init_deep_partial_schema = __esm({
467
+ "src/util/deep-partial-schema.ts"() {
468
+ "use strict";
469
+ init_cjs_shims();
470
+ import_zod3 = require("zod");
471
+ init_zod_internals();
472
+ }
473
+ });
565
474
 
566
- // src/app-scope.ts
567
- var import_api8 = require("@opentelemetry/api");
568
- var import_zod5 = require("zod");
475
+ // src/validate-flags.ts
476
+ var import_zod4;
477
+ var init_validate_flags = __esm({
478
+ "src/validate-flags.ts"() {
479
+ "use strict";
480
+ init_cjs_shims();
481
+ import_zod4 = require("zod");
482
+ init_global_flags();
483
+ init_format_zod_errors();
484
+ init_dot_path();
485
+ init_deep_partial_schema();
486
+ init_zod_internals();
487
+ }
488
+ });
569
489
 
570
490
  // src/otel/utils/to-otel-attribute.ts
571
- init_cjs_shims();
572
- var import_api = require("@opentelemetry/api");
573
-
574
- // src/otel/index.ts
575
- init_cjs_shims();
576
-
577
- // src/otel/withSpan.ts
578
- init_cjs_shims();
579
- var import_api6 = require("@opentelemetry/api");
491
+ var import_api;
492
+ var init_to_otel_attribute = __esm({
493
+ "src/otel/utils/to-otel-attribute.ts"() {
494
+ "use strict";
495
+ init_cjs_shims();
496
+ import_api = require("@opentelemetry/api");
497
+ }
498
+ });
580
499
 
581
500
  // src/otel/withSpanBaggageKey.ts
582
- init_cjs_shims();
583
-
584
- // src/otel/utils/wrapperUtils.ts
585
- init_cjs_shims();
586
- var import_api5 = require("@opentelemetry/api");
587
-
588
- // src/otel/semconv/attributes.ts
589
- init_cjs_shims();
590
- var import_semantic_conventions = require("@opentelemetry/semantic-conventions");
501
+ var init_withSpanBaggageKey = __esm({
502
+ "src/otel/withSpanBaggageKey.ts"() {
503
+ "use strict";
504
+ init_cjs_shims();
505
+ }
506
+ });
591
507
 
592
508
  // src/otel/semconv/eval_proposal.ts
593
- init_cjs_shims();
509
+ var init_eval_proposal = __esm({
510
+ "src/otel/semconv/eval_proposal.ts"() {
511
+ "use strict";
512
+ init_cjs_shims();
513
+ }
514
+ });
594
515
 
595
516
  // src/otel/semconv/attributes.ts
596
- var import_incubating = require("@opentelemetry/semantic-conventions/incubating");
517
+ var import_semantic_conventions, import_incubating;
518
+ var init_attributes = __esm({
519
+ "src/otel/semconv/attributes.ts"() {
520
+ "use strict";
521
+ init_cjs_shims();
522
+ import_semantic_conventions = require("@opentelemetry/semantic-conventions");
523
+ init_eval_proposal();
524
+ import_incubating = require("@opentelemetry/semantic-conventions/incubating");
525
+ }
526
+ });
597
527
 
598
- // src/otel/startActiveSpan.ts
599
- init_cjs_shims();
600
- var import_api2 = require("@opentelemetry/api");
528
+ // src/schema.ts
529
+ var SCHEMA_VERSION, SCHEMA_BASE_URL, SCHEMA_URL;
530
+ var init_schema = __esm({
531
+ "src/schema.ts"() {
532
+ "use strict";
533
+ init_cjs_shims();
534
+ SCHEMA_VERSION = "0.0.2";
535
+ SCHEMA_BASE_URL = "https://axiom.co/ai/schemas/";
536
+ SCHEMA_URL = `${SCHEMA_BASE_URL}${SCHEMA_VERSION}`;
537
+ }
538
+ });
601
539
 
602
- // src/otel/initAxiomAI.ts
603
- init_cjs_shims();
604
- var import_api4 = require("@opentelemetry/api");
540
+ // src/otel/startActiveSpan.ts
541
+ var import_api2;
542
+ var init_startActiveSpan = __esm({
543
+ "src/otel/startActiveSpan.ts"() {
544
+ "use strict";
545
+ init_cjs_shims();
546
+ import_api2 = require("@opentelemetry/api");
547
+ }
548
+ });
605
549
 
606
550
  // package.json
607
- var package_default = {
608
- name: "axiom",
609
- version: "0.34.1",
610
- type: "module",
611
- author: "Axiom, Inc.",
612
- contributors: [
613
- "Islam Shehata <islam@axiom.co>",
614
- "Chris Ehrlich <chris@axiom.co>",
615
- "Gabriel de Andrade <gabriel@axiom.co>"
616
- ],
617
- scripts: {
618
- dev: "tsup --watch",
619
- build: "tsup && chmod +x dist/bin.js",
620
- format: "prettier --write .",
621
- "format:check": "prettier --check .",
622
- lint: "eslint './**/*.{js,ts}'",
623
- typecheck: "tsc --noEmit",
624
- test: "vitest run",
625
- "test:watch": "vitest --watch",
626
- publint: "npx publint"
627
- },
628
- types: "./dist/index.d.ts",
629
- main: "./dist/index.cjs",
630
- module: "./dist/index.js",
631
- bin: {
632
- axiom: "./dist/bin.js"
633
- },
634
- exports: {
635
- "./ai": {
636
- import: {
637
- types: "./dist/index.d.ts",
638
- default: "./dist/index.js"
551
+ var package_default;
552
+ var init_package = __esm({
553
+ "package.json"() {
554
+ package_default = {
555
+ name: "axiom",
556
+ version: "0.35.0",
557
+ type: "module",
558
+ author: "Axiom, Inc.",
559
+ contributors: [
560
+ "Islam Shehata <islam@axiom.co>",
561
+ "Chris Ehrlich <chris@axiom.co>",
562
+ "Gabriel de Andrade <gabriel@axiom.co>"
563
+ ],
564
+ scripts: {
565
+ dev: "tsup --watch",
566
+ build: "tsup && chmod +x dist/bin.js",
567
+ format: "prettier --write .",
568
+ "format:check": "prettier --check .",
569
+ lint: "eslint './**/*.{js,ts}'",
570
+ typecheck: "tsc --noEmit",
571
+ test: "vitest run",
572
+ "test:watch": "vitest --watch",
573
+ publint: "npx publint"
639
574
  },
640
- require: {
641
- types: "./dist/index.d.cts",
642
- default: "./dist/index.cjs"
643
- }
644
- },
645
- "./ai/evals": {
646
- import: {
647
- types: "./dist/evals.d.ts",
648
- default: "./dist/evals.js"
575
+ types: "./dist/index.d.ts",
576
+ main: "./dist/index.cjs",
577
+ module: "./dist/index.js",
578
+ bin: {
579
+ axiom: "./dist/bin.js"
649
580
  },
650
- require: {
651
- types: "./dist/evals.d.cts",
652
- default: "./dist/evals.cjs"
653
- }
654
- },
655
- "./ai/config": {
656
- import: {
657
- types: "./dist/config.d.ts",
658
- default: "./dist/config.js"
581
+ exports: {
582
+ "./ai": {
583
+ import: {
584
+ types: "./dist/index.d.ts",
585
+ default: "./dist/index.js"
586
+ },
587
+ require: {
588
+ types: "./dist/index.d.cts",
589
+ default: "./dist/index.cjs"
590
+ }
591
+ },
592
+ "./ai/evals": {
593
+ import: {
594
+ types: "./dist/evals.d.ts",
595
+ default: "./dist/evals.js"
596
+ },
597
+ require: {
598
+ types: "./dist/evals.d.cts",
599
+ default: "./dist/evals.cjs"
600
+ }
601
+ },
602
+ "./ai/config": {
603
+ import: {
604
+ types: "./dist/config.d.ts",
605
+ default: "./dist/config.js"
606
+ },
607
+ require: {
608
+ types: "./dist/config.d.cts",
609
+ default: "./dist/config.cjs"
610
+ }
611
+ },
612
+ "./ai/feedback": {
613
+ import: {
614
+ types: "./dist/feedback.d.ts",
615
+ default: "./dist/feedback.js"
616
+ },
617
+ require: {
618
+ types: "./dist/feedback.d.cts",
619
+ default: "./dist/feedback.cjs"
620
+ }
621
+ }
659
622
  },
660
- require: {
661
- types: "./dist/config.d.cts",
662
- default: "./dist/config.cjs"
663
- }
664
- }
665
- },
666
- keywords: [
667
- "axiom",
668
- "logging",
669
- "ai",
670
- "otel",
671
- "opentelemetry"
672
- ],
673
- repository: {
674
- type: "git",
675
- url: "git+https://github.com/axiomhq/ai.git",
676
- directory: "packages/ai"
677
- },
678
- license: "MIT",
679
- dependencies: {
680
- "@next/env": "^15.4.2",
681
- "@opentelemetry/auto-instrumentations-node": "^0.60.1",
682
- "@opentelemetry/context-async-hooks": "^2.0.1",
683
- "@opentelemetry/exporter-trace-otlp-http": "^0.202.0",
684
- "@opentelemetry/resources": "^2.0.1",
685
- "@opentelemetry/sdk-trace-node": "^2.0.1",
686
- "@opentelemetry/semantic-conventions": "^1.37.0",
687
- "@sinclair/typebox": "^0.34.37",
688
- c12: "^2.0.4",
689
- commander: "^14.0.0",
690
- defu: "^6.1.4",
691
- handlebars: "^4.7.8",
692
- nanoid: "^5.1.5",
693
- open: "^10.1.0",
694
- "vite-tsconfig-paths": "^5.1.4"
695
- },
696
- peerDependencies: {
697
- "@opentelemetry/api": "^1.9.0",
698
- zod: "^3.25.0 || ^4.0.0"
699
- },
700
- devDependencies: {
701
- "@ai-sdk/anthropicv1": "npm:@ai-sdk/anthropic@^1.2.12",
702
- "@ai-sdk/anthropicv2": "npm:@ai-sdk/anthropic@^2.0.44",
703
- "@ai-sdk/openaiv1": "npm:@ai-sdk/openai@^1.3.24",
704
- "@ai-sdk/openaiv2": "npm:@ai-sdk/openai@^2.0.67",
705
- "@ai-sdk/providerv1": "npm:@ai-sdk/provider@^1.1.3",
706
- "@ai-sdk/providerv2": "npm:@ai-sdk/provider@^2.0.0",
707
- "@opentelemetry/api": "^1.9.0",
708
- "@opentelemetry/core": "^2.0.1",
709
- "@opentelemetry/sdk-trace-base": "^2.0.1",
710
- "@opentelemetry/sdk-trace-node": "^2.0.1",
711
- "@repo/eslint-config": "workspace:*",
712
- "@types/node": "^22.15.29",
713
- "@vitest/coverage-v8": "^4.0.0",
714
- aiv4: "npm:ai@^4.3.19",
715
- aiv5: "npm:ai@^5.0.93",
716
- esbuild: "^0.25.8",
717
- eslint: "catalog:",
718
- msw: "^2.12.2",
719
- prettier: "catalog:",
720
- tinyrainbow: "^2.0.0",
721
- tsup: "catalog:",
722
- typescript: "catalog:",
723
- vitest: "catalog:",
724
- zod: "catalog:"
725
- },
726
- files: [
727
- "dist"
728
- ],
729
- packageManager: "pnpm@10.16.1"
730
- };
623
+ keywords: [
624
+ "axiom",
625
+ "logging",
626
+ "ai",
627
+ "otel",
628
+ "opentelemetry"
629
+ ],
630
+ repository: {
631
+ type: "git",
632
+ url: "git+https://github.com/axiomhq/ai.git",
633
+ directory: "packages/ai"
634
+ },
635
+ license: "MIT",
636
+ dependencies: {
637
+ "@next/env": "^15.4.2",
638
+ "@opentelemetry/auto-instrumentations-node": "^0.60.1",
639
+ "@opentelemetry/context-async-hooks": "^2.0.1",
640
+ "@opentelemetry/exporter-trace-otlp-http": "^0.202.0",
641
+ "@opentelemetry/resources": "^2.0.1",
642
+ "@opentelemetry/sdk-trace-node": "^2.0.1",
643
+ "@opentelemetry/semantic-conventions": "^1.37.0",
644
+ "@sinclair/typebox": "^0.34.37",
645
+ c12: "^2.0.4",
646
+ commander: "^14.0.0",
647
+ defu: "^6.1.4",
648
+ handlebars: "^4.7.8",
649
+ nanoid: "^5.1.5",
650
+ open: "^10.1.0",
651
+ "vite-tsconfig-paths": "^5.1.4",
652
+ vitest: "^4.0.0"
653
+ },
654
+ peerDependencies: {
655
+ "@opentelemetry/api": "^1.9.0",
656
+ zod: "^3.25.0 || ^4.0.0"
657
+ },
658
+ devDependencies: {
659
+ "@ai-sdk/anthropicv1": "npm:@ai-sdk/anthropic@^1.2.12",
660
+ "@ai-sdk/anthropicv2": "npm:@ai-sdk/anthropic@^2.0.44",
661
+ "@ai-sdk/openaiv1": "npm:@ai-sdk/openai@^1.3.24",
662
+ "@ai-sdk/openaiv2": "npm:@ai-sdk/openai@^2.0.67",
663
+ "@ai-sdk/providerv1": "npm:@ai-sdk/provider@^1.1.3",
664
+ "@ai-sdk/providerv2": "npm:@ai-sdk/provider@^2.0.0",
665
+ "@opentelemetry/api": "^1.9.0",
666
+ "@opentelemetry/core": "^2.0.1",
667
+ "@opentelemetry/sdk-trace-base": "^2.0.1",
668
+ "@opentelemetry/sdk-trace-node": "^2.0.1",
669
+ "@repo/eslint-config": "workspace:*",
670
+ "@types/node": "^22.15.29",
671
+ "@vitest/coverage-v8": "^4.0.0",
672
+ aiv4: "npm:ai@^4.3.19",
673
+ aiv5: "npm:ai@^5.0.93",
674
+ esbuild: "^0.25.8",
675
+ eslint: "catalog:",
676
+ msw: "^2.12.2",
677
+ prettier: "catalog:",
678
+ tinyrainbow: "^2.0.0",
679
+ tsup: "catalog:",
680
+ typescript: "catalog:",
681
+ vitest: "catalog:",
682
+ zod: "catalog:"
683
+ },
684
+ files: [
685
+ "dist"
686
+ ],
687
+ packageManager: "pnpm@10.16.1"
688
+ };
689
+ }
690
+ });
731
691
 
732
692
  // src/otel/utils/redaction.ts
733
- init_cjs_shims();
734
- var import_api3 = require("@opentelemetry/api");
735
- var AXIOM_AI_REDACTION_KEY = Symbol.for("__axiom_ai_redaction__");
693
+ var import_api3, AXIOM_AI_REDACTION_KEY;
694
+ var init_redaction = __esm({
695
+ "src/otel/utils/redaction.ts"() {
696
+ "use strict";
697
+ init_cjs_shims();
698
+ import_api3 = require("@opentelemetry/api");
699
+ init_withSpanBaggageKey();
700
+ AXIOM_AI_REDACTION_KEY = Symbol.for("__axiom_ai_redaction__");
701
+ }
702
+ });
736
703
 
737
704
  // src/otel/initAxiomAI.ts
738
- var AXIOM_AI_SCOPE_KEY = Symbol.for("__axiom_ai_scope__");
739
705
  function extractTracerScope(tracer) {
740
706
  const tracerAny = tracer;
741
707
  const name = tracerAny._instrumentationScope?.name || tracerAny.instrumentationLibrary?.name || package_default.name;
@@ -758,51 +724,186 @@ function initAxiomAI(config) {
758
724
  globalThis[AXIOM_AI_REDACTION_KEY] = config.redactionPolicy;
759
725
  }
760
726
  }
727
+ var import_api4, AXIOM_AI_SCOPE_KEY;
728
+ var init_initAxiomAI = __esm({
729
+ "src/otel/initAxiomAI.ts"() {
730
+ "use strict";
731
+ init_cjs_shims();
732
+ import_api4 = require("@opentelemetry/api");
733
+ init_package();
734
+ init_redaction();
735
+ AXIOM_AI_SCOPE_KEY = Symbol.for("__axiom_ai_scope__");
736
+ }
737
+ });
761
738
 
762
- // src/util/name-validation-runtime.ts
763
- init_cjs_shims();
739
+ // src/otel/utils/wrapperUtils.ts
740
+ var import_api5;
741
+ var init_wrapperUtils = __esm({
742
+ "src/otel/utils/wrapperUtils.ts"() {
743
+ "use strict";
744
+ init_cjs_shims();
745
+ import_api5 = require("@opentelemetry/api");
746
+ init_attributes();
747
+ init_schema();
748
+ init_withSpanBaggageKey();
749
+ init_startActiveSpan();
750
+ init_initAxiomAI();
751
+ }
752
+ });
764
753
 
765
- // src/otel/vercel.ts
766
- init_cjs_shims();
754
+ // src/util/name-validation-runtime.ts
755
+ var init_name_validation_runtime = __esm({
756
+ "src/util/name-validation-runtime.ts"() {
757
+ "use strict";
758
+ init_cjs_shims();
759
+ }
760
+ });
767
761
 
768
- // src/otel/AxiomWrappedLanguageModelV1.ts
769
- init_cjs_shims();
762
+ // src/otel/withSpan.ts
763
+ var import_api6;
764
+ var init_withSpan = __esm({
765
+ "src/otel/withSpan.ts"() {
766
+ "use strict";
767
+ init_cjs_shims();
768
+ import_api6 = require("@opentelemetry/api");
769
+ init_withSpanBaggageKey();
770
+ init_wrapperUtils();
771
+ init_name_validation_runtime();
772
+ }
773
+ });
770
774
 
771
- // src/otel/middleware.ts
772
- init_cjs_shims();
773
- var import_api7 = require("@opentelemetry/api");
775
+ // src/otel/utils/contentSanitizer.ts
776
+ var init_contentSanitizer = __esm({
777
+ "src/otel/utils/contentSanitizer.ts"() {
778
+ "use strict";
779
+ init_cjs_shims();
780
+ }
781
+ });
774
782
 
775
783
  // src/otel/completionUtils.ts
776
- init_cjs_shims();
777
-
778
- // src/otel/utils/contentSanitizer.ts
779
- init_cjs_shims();
784
+ var init_completionUtils = __esm({
785
+ "src/otel/completionUtils.ts"() {
786
+ "use strict";
787
+ init_cjs_shims();
788
+ init_contentSanitizer();
789
+ }
790
+ });
780
791
 
781
792
  // src/util/promptUtils.ts
782
- init_cjs_shims();
793
+ var init_promptUtils = __esm({
794
+ "src/util/promptUtils.ts"() {
795
+ "use strict";
796
+ init_cjs_shims();
797
+ }
798
+ });
783
799
 
784
800
  // src/otel/utils/normalized.ts
785
- init_cjs_shims();
801
+ var init_normalized = __esm({
802
+ "src/otel/utils/normalized.ts"() {
803
+ "use strict";
804
+ init_cjs_shims();
805
+ }
806
+ });
807
+
808
+ // src/util/currentUnixTime.ts
809
+ var init_currentUnixTime = __esm({
810
+ "src/util/currentUnixTime.ts"() {
811
+ "use strict";
812
+ init_cjs_shims();
813
+ }
814
+ });
786
815
 
787
816
  // src/otel/streaming/aggregators.ts
788
- init_cjs_shims();
817
+ var init_aggregators = __esm({
818
+ "src/otel/streaming/aggregators.ts"() {
819
+ "use strict";
820
+ init_cjs_shims();
821
+ init_currentUnixTime();
822
+ }
823
+ });
789
824
 
790
- // src/util/currentUnixTime.ts
791
- init_cjs_shims();
825
+ // src/otel/middleware.ts
826
+ var import_api7;
827
+ var init_middleware = __esm({
828
+ "src/otel/middleware.ts"() {
829
+ "use strict";
830
+ init_cjs_shims();
831
+ import_api7 = require("@opentelemetry/api");
832
+ init_attributes();
833
+ init_completionUtils();
834
+ init_promptUtils();
835
+ init_contentSanitizer();
836
+ init_wrapperUtils();
837
+ init_normalized();
838
+ init_aggregators();
839
+ init_redaction();
840
+ }
841
+ });
842
+
843
+ // src/otel/AxiomWrappedLanguageModelV1.ts
844
+ var init_AxiomWrappedLanguageModelV1 = __esm({
845
+ "src/otel/AxiomWrappedLanguageModelV1.ts"() {
846
+ "use strict";
847
+ init_cjs_shims();
848
+ init_middleware();
849
+ }
850
+ });
792
851
 
793
852
  // src/otel/AxiomWrappedLanguageModelV2.ts
794
- init_cjs_shims();
853
+ var init_AxiomWrappedLanguageModelV2 = __esm({
854
+ "src/otel/AxiomWrappedLanguageModelV2.ts"() {
855
+ "use strict";
856
+ init_cjs_shims();
857
+ init_middleware();
858
+ }
859
+ });
860
+
861
+ // src/otel/vercel.ts
862
+ var init_vercel = __esm({
863
+ "src/otel/vercel.ts"() {
864
+ "use strict";
865
+ init_cjs_shims();
866
+ init_AxiomWrappedLanguageModelV1();
867
+ init_AxiomWrappedLanguageModelV2();
868
+ }
869
+ });
870
+
871
+ // src/otel/index.ts
872
+ var init_otel = __esm({
873
+ "src/otel/index.ts"() {
874
+ "use strict";
875
+ init_cjs_shims();
876
+ init_withSpan();
877
+ init_vercel();
878
+ init_attributes();
879
+ }
880
+ });
881
+
882
+ // src/app-scope.ts
883
+ var import_api8, import_zod5;
884
+ var init_app_scope = __esm({
885
+ "src/app-scope.ts"() {
886
+ "use strict";
887
+ init_cjs_shims();
888
+ init_global_flags();
889
+ init_storage();
890
+ init_validate_flags();
891
+ init_dot_path();
892
+ init_zod_internals();
893
+ import_api8 = require("@opentelemetry/api");
894
+ import_zod5 = require("zod");
895
+ init_to_otel_attribute();
896
+ init_otel();
897
+ }
898
+ });
795
899
 
796
900
  // src/evals/context/storage.ts
797
- var CONFIG_SCOPE_SYMBOL = Symbol.for("axiom.eval.configScope");
798
- var AXIOM_CONFIG_SYMBOL = Symbol.for("axiom.eval.config");
799
901
  function getAxiomConfig() {
800
902
  return globalThis[AXIOM_CONFIG_SYMBOL];
801
903
  }
802
904
  function setAxiomConfig(config) {
803
905
  globalThis[AXIOM_CONFIG_SYMBOL] = config;
804
906
  }
805
- var EVAL_CONTEXT = createAsyncHook("eval-context");
806
907
  function getEvalContext() {
807
908
  const ctx = EVAL_CONTEXT.get();
808
909
  if (!ctx) {
@@ -842,628 +943,297 @@ function withEvalContext(options = {}, fn) {
842
943
  fn
843
944
  );
844
945
  }
845
-
846
- // src/evals/reporter.console-utils.ts
847
- init_cjs_shims();
848
- function formatPercentage(value) {
849
- if (!Number.isFinite(value)) {
850
- return "N/A";
851
- }
852
- return Number(value * 100).toFixed(2) + "%";
853
- }
854
- function formatDiff(current, baseline) {
855
- if (!Number.isFinite(current) || !Number.isFinite(baseline)) {
856
- return { text: "N/A", color: u.dim };
946
+ var import_api9, CONFIG_SCOPE_SYMBOL, AXIOM_CONFIG_SYMBOL, EVAL_CONTEXT;
947
+ var init_storage = __esm({
948
+ "src/evals/context/storage.ts"() {
949
+ "use strict";
950
+ init_cjs_shims();
951
+ import_api9 = require("@opentelemetry/api");
952
+ init_manager();
953
+ init_app_scope();
954
+ CONFIG_SCOPE_SYMBOL = Symbol.for("axiom.eval.configScope");
955
+ AXIOM_CONFIG_SYMBOL = Symbol.for("axiom.eval.config");
956
+ EVAL_CONTEXT = createAsyncHook("eval-context");
857
957
  }
858
- const diff = current - baseline;
859
- const diffText = (diff >= 0 ? "+" : "") + formatPercentage(diff);
860
- const color = diff > 0 ? u.green : diff < 0 ? u.red : u.dim;
861
- return { text: diffText, color };
862
- }
863
- function truncate(str, max) {
864
- return str.length > max ? str.slice(0, max) + "\u2026" : str;
865
- }
866
- function stringify(value) {
867
- try {
868
- if (typeof value === "string") return value;
869
- return JSON.stringify(value);
870
- } catch {
871
- return String(value);
958
+ });
959
+
960
+ // src/context.ts
961
+ function overrideFlags(partial) {
962
+ const current = getEvalContext();
963
+ if (!current) {
964
+ if (process.env.NODE_ENV !== "test") {
965
+ console.warn("overrideFlags called outside of evaluation context");
966
+ }
967
+ return;
872
968
  }
873
- }
874
- function getCaseFingerprint(input, expected) {
875
- const inputStr = typeof input === "string" ? input : JSON.stringify(input);
876
- const expectedStr = typeof expected === "string" ? expected : JSON.stringify(expected);
877
- return JSON.stringify({ input: inputStr, expected: expectedStr });
878
- }
879
- function printEvalNameAndFileName(testSuite, meta, logger = console.log) {
880
- const cwd = process.cwd();
881
- logger(
882
- " ",
883
- u.bgCyan(u.black(` ${testSuite.project.name} `)),
884
- u.bgBlue(u.black(` ${meta.evaluation.name}-${meta.evaluation.version} `)),
885
- u.dim(`(${testSuite.children.size} cases)`)
886
- );
887
- logger(" ", u.dim(testSuite.module.moduleId.replace(cwd, "")));
888
- }
889
- function printBaselineNameAndVersion(testMeta, logger = console.log) {
890
- if (testMeta.evaluation.baseline) {
891
- logger(
892
- " ",
893
- " baseline ",
894
- u.bgMagenta(
895
- u.black(` ${testMeta.evaluation.baseline.name}-${testMeta.evaluation.baseline.version} `)
896
- )
897
- );
898
- } else {
899
- logger(" ", u.bgWhite(u.blackBright(" baseline: ")), "none");
969
+ const overlayContext = {
970
+ ...current,
971
+ flags: { ...current.flags, ...partial },
972
+ // Merge for backwards compatibility
973
+ parent: current,
974
+ overrides: { ...partial }
975
+ };
976
+ const currentCtx = EVAL_CONTEXT.get();
977
+ if (currentCtx) {
978
+ Object.assign(currentCtx, overlayContext);
900
979
  }
901
- logger("");
902
- }
903
- function printRuntimeFlags(testMeta, logger = console.log) {
904
- if (testMeta.case.runtimeFlags && Object.keys(testMeta.case.runtimeFlags).length > 0) {
905
- const entries = Object.entries(testMeta.case.runtimeFlags);
906
- logger(" ", u.dim("runtime flags"));
907
- for (const [k, v] of entries) {
908
- switch (v.kind) {
909
- case "replaced": {
910
- const valText = truncate(stringify(v.value), 80);
911
- const defText = truncate(stringify(v.default), 80);
912
- logger(" ", `${k}: ${valText} (default: ${defText})`);
913
- break;
914
- }
915
- case "introduced": {
916
- const valText = truncate(stringify(v.value), 80);
917
- logger(" ", `${k}: ${valText} (no default)`);
918
- break;
919
- }
920
- }
921
- }
980
+ for (const [key, value] of Object.entries(partial)) {
981
+ putOnSpan("flag", key, value);
922
982
  }
923
983
  }
924
- function printTestCaseCountStartDuration(testSuite, startTime, duration, logger = console.log) {
925
- logger(" ");
926
- logger(" ", u.dim("Cases"), testSuite.children.size);
927
- logger(" ", u.dim("Start at"), new Date(startTime).toTimeString());
928
- logger(" ", u.dim("Duration"), `${duration}s`);
984
+ var init_context = __esm({
985
+ "src/context.ts"() {
986
+ "use strict";
987
+ init_cjs_shims();
988
+ init_storage();
989
+ }
990
+ });
991
+
992
+ // src/cli/utils/eval-context-runner.ts
993
+ async function runEvalWithContext(overrides2, runFn) {
994
+ setGlobalFlagOverrides(overrides2);
995
+ return withEvalContext({ initialFlags: overrides2 }, async () => {
996
+ if (Object.keys(overrides2).length > 0) {
997
+ overrideFlags(overrides2);
998
+ }
999
+ return runFn();
1000
+ });
929
1001
  }
930
- function printTestCaseSuccessOrFailed(testMeta, ok, logger = console.log) {
931
- const index = testMeta.case.index;
932
- if (ok) {
933
- logger(" ", u.yellow(` \u2714 case ${index}:`));
934
- } else {
935
- logger(" ", u.red(` \u2716 case ${index}: failed`));
936
- for (const e of testMeta.case.errors ?? []) {
937
- logger("", e.message);
1002
+ var init_eval_context_runner = __esm({
1003
+ "src/cli/utils/eval-context-runner.ts"() {
1004
+ "use strict";
1005
+ init_cjs_shims();
1006
+ init_context();
1007
+ init_storage();
1008
+ init_global_flags();
1009
+ }
1010
+ });
1011
+
1012
+ // src/cli/utils/parse-flag-overrides.ts
1013
+ function ensureNoSpaceSeparatedSyntax(flagName, value, nextToken, flagType) {
1014
+ if (value === void 0 && nextToken !== void 0) {
1015
+ if (flagType === "flag" && !nextToken.startsWith("-") && nextToken !== "true" && nextToken !== "false") {
1016
+ console.error(`\u274C Invalid syntax: --flag.${flagName} ${nextToken}`);
1017
+ console.error(`\u{1F4A1} Use: --flag.${flagName}=${nextToken}`);
1018
+ process.exit(1);
1019
+ } else if (flagType === "config" && !nextToken.startsWith("-")) {
1020
+ console.error(`\u274C Invalid syntax: --flags-config ${nextToken}`);
1021
+ console.error(`\u{1F4A1} Use: --flags-config=${nextToken}`);
1022
+ process.exit(1);
938
1023
  }
939
1024
  }
940
1025
  }
941
- function printTestCaseScores(testMeta, baselineCase, logger = console.log) {
942
- const scores = testMeta.case.scores;
943
- const keys = Object.keys(scores);
944
- if (keys.length === 0) {
945
- return;
1026
+ function collectFlagValidationErrors(overrides2, flagSchema) {
1027
+ if (!flagSchema || Object.keys(overrides2).length === 0) {
1028
+ return { success: true, errors: [] };
946
1029
  }
947
- const maxNameLength = Math.max(...keys.map((k) => k.length));
948
- keys.forEach((k) => {
949
- const scoreData = scores[k];
950
- const hasError = scoreData.metadata?.error;
951
- const v = scoreData.score ? scoreData.score : 0;
952
- const rawCurrent = hasError ? "N/A" : formatPercentage(v);
953
- const paddedCurrent = rawCurrent.padStart(7);
954
- const coloredCurrent = hasError ? u.dim(paddedCurrent) : u.magentaBright(paddedCurrent);
955
- const paddedName = k.padEnd(maxNameLength);
956
- if (baselineCase?.scores[k]) {
957
- const baselineScoreValue = baselineCase.scores[k].value;
958
- const rawBaseline = formatPercentage(baselineScoreValue);
959
- const paddedBaseline = rawBaseline.padStart(7);
960
- const coloredBaseline = u.blueBright(paddedBaseline);
961
- const { text: diffText, color: diffColor } = formatDiff(v, baselineScoreValue);
962
- const paddedDiff = diffText.padStart(8);
963
- logger(
964
- ` ${paddedName} ${coloredBaseline} \u2192 ${coloredCurrent} ${hasError ? u.dim("(scorer not run)") : u.dim("(") + diffColor(paddedDiff) + u.dim(")")}`
965
- );
966
- } else {
967
- logger(` ${paddedName} ${coloredCurrent} ${hasError ? u.dim("(scorer not run)") : ""}`);
1030
+ assertZodV4(flagSchema, "flagSchema");
1031
+ const schema = flagSchema;
1032
+ const errors = [];
1033
+ for (const dotPath of Object.keys(overrides2)) {
1034
+ const segments = parsePath(dotPath);
1035
+ if (!isValidPath(schema, segments)) {
1036
+ errors.push({ type: "invalid_path", path: dotPath });
968
1037
  }
969
- });
1038
+ }
1039
+ if (errors.length > 0) {
1040
+ return { success: false, errors };
1041
+ }
1042
+ const nestedObject = dotNotationToNested(overrides2);
1043
+ const deepPartialSchema = makeDeepPartial(schema);
1044
+ const result = deepPartialSchema.safeParse(nestedObject);
1045
+ if (!result.success) {
1046
+ errors.push({ type: "invalid_value", zodError: result.error });
1047
+ }
1048
+ return { success: errors.length === 0, errors };
970
1049
  }
971
- function printOutOfScopeFlags(testMeta, logger = console.log) {
972
- if (testMeta.case.outOfScopeFlags && testMeta.case.outOfScopeFlags.length > 0) {
973
- const pickedFlagsText = testMeta.case.pickedFlags ? `(picked: ${testMeta.case.pickedFlags.map((f2) => `'${f2}'`).join(", ")})` : "(none)";
974
- logger(" ", u.yellow(`\u26A0 Out-of-scope flags: ${pickedFlagsText}`));
975
- testMeta.case.outOfScopeFlags.forEach((flag) => {
976
- const timeStr = new Date(flag.accessedAt).toLocaleTimeString();
977
- logger(" ", `${flag.flagPath} (at ${timeStr})`);
978
- if (flag.stackTrace && flag.stackTrace.length > 0) {
979
- flag.stackTrace.forEach((frame, i) => {
980
- const prefix = i === flag.stackTrace.length - 1 ? " \u2514\u2500" : " \u251C\u2500";
981
- logger(" ", u.dim(`${prefix} ${frame}`));
982
- });
1050
+ function printFlagValidationErrorsAndExit(errors) {
1051
+ console.error("\u274C Invalid CLI flags:");
1052
+ for (const error of errors) {
1053
+ if (error.type === "invalid_path") {
1054
+ console.error(` \u2022 flag '${error.path}': Invalid flag path`);
1055
+ } else {
1056
+ console.error(formatZodErrors(error.zodError));
1057
+ const examples = generateFlagExamples(error.zodError);
1058
+ if (examples.length > 0) {
1059
+ console.error("\n\u{1F4A1} Valid examples:");
1060
+ examples.forEach((example) => console.error(` ${example}`));
983
1061
  }
984
- });
1062
+ }
985
1063
  }
1064
+ process.exit(1);
986
1065
  }
987
- function printCaseResult(test, baselineCasesByFingerprint, matchedIndices, logger = console.log) {
988
- const ok = test.ok();
989
- const testMeta = test.meta();
990
- if (!testMeta?.case) {
991
- return;
992
- }
993
- printTestCaseSuccessOrFailed(testMeta, ok, logger);
994
- const fingerprint = getCaseFingerprint(testMeta.case.input, testMeta.case.expected);
995
- const baselineCases = baselineCasesByFingerprint.get(fingerprint);
996
- const baselineCase = baselineCases?.shift();
997
- if (baselineCase) {
998
- matchedIndices.add(baselineCase.index);
1066
+ function validateFlagOverrides(overrides2, flagSchema) {
1067
+ const result = collectFlagValidationErrors(overrides2, flagSchema);
1068
+ if (!result.success) {
1069
+ printFlagValidationErrorsAndExit(result.errors);
999
1070
  }
1000
- printTestCaseScores(testMeta, baselineCase, logger);
1001
- printRuntimeFlags(testMeta, logger);
1002
- printOutOfScopeFlags(testMeta, logger);
1003
1071
  }
1004
- function printOrphanedBaselineCases(baseline, matchedIndices, logger = console.log) {
1005
- const orphanedCases = baseline.cases.filter((c) => !matchedIndices.has(c.index));
1006
- if (orphanedCases.length === 0) {
1007
- return;
1072
+ function coerceValue(raw) {
1073
+ if (raw === "true") return true;
1074
+ if (raw === "false") return false;
1075
+ const num = Number(raw);
1076
+ if (!Number.isNaN(num) && raw.trim() === num.toString()) {
1077
+ return num;
1008
1078
  }
1009
- logger("");
1010
- logger(" ", u.yellow("Orphaned baseline cases:"));
1011
- for (const orphanedCase of orphanedCases) {
1012
- logger(
1013
- " ",
1014
- u.dim(
1015
- `case ${orphanedCase.index}: ${truncate(orphanedCase.input, 50)} (score: ${truncate(
1016
- JSON.stringify(orphanedCase.scores),
1017
- 50
1018
- )})`
1019
- )
1020
- );
1021
- const keys = Object.keys(orphanedCase.scores);
1022
- if (keys.length > 0) {
1023
- const maxNameLength = Math.max(...keys.map((k) => k.length));
1024
- keys.forEach((k) => {
1025
- const scoreData = orphanedCase.scores[k];
1026
- const rawScore = formatPercentage(scoreData.value);
1027
- const paddedName = k.padEnd(maxNameLength);
1028
- const paddedScore = rawScore.padStart(7);
1029
- logger(` ${paddedName} ${u.blueBright(paddedScore)}`);
1030
- });
1031
- }
1079
+ try {
1080
+ return JSON.parse(raw);
1081
+ } catch {
1082
+ return raw;
1032
1083
  }
1033
1084
  }
1034
- var reporterDate = (d) => {
1035
- const date = d.toISOString().slice(0, 10);
1036
- const hours = d.getUTCHours().toString().padStart(2, "0");
1037
- const minutes = d.getUTCMinutes().toString().padStart(2, "0");
1038
- return `${date}, ${hours}:${minutes} UTC`;
1039
- };
1040
- function printGlobalFlagOverrides(overrides2, defaults, logger = console.log) {
1041
- if (Object.keys(overrides2).length === 0) {
1042
- logger("");
1043
- logger(u.dim("Flag overrides: (none)"));
1044
- logger("");
1045
- return;
1046
- }
1047
- logger("");
1048
- logger("Flag overrides:");
1049
- for (const [key, value] of Object.entries(overrides2)) {
1050
- const defaultValue = defaults[key];
1051
- const valueStr = JSON.stringify(value);
1052
- const defaultStr = defaultValue !== void 0 ? JSON.stringify(defaultValue) : "none";
1053
- logger(` \u2022 ${key}: ${valueStr} ${u.dim(`(default: ${defaultStr})`)}`);
1054
- }
1055
- logger("");
1056
- }
1057
- function printSuiteBox({
1058
- suite,
1059
- scorerAverages,
1060
- calculateBaselineScorerAverage: calculateBaselineScorerAverage2,
1061
- flagDiff,
1062
- logger = console.log
1063
- }) {
1064
- const filename = suite.file.split("/").pop();
1065
- logger("\u250C\u2500");
1066
- logger(`\u2502 ${u.blue(suite.name)} ${u.gray(`(${filename})`)}`);
1067
- logger("\u251C\u2500");
1068
- const scorerNames = Object.keys(scorerAverages);
1069
- const maxNameLength = Math.max(...scorerNames.map((name) => name.length));
1070
- const allCasesErrored = (scorerName) => {
1071
- return suite.cases.every((caseData) => caseData.scores[scorerName]?.metadata?.error);
1072
- };
1073
- for (const scorerName of scorerNames) {
1074
- const avg = scorerAverages[scorerName];
1075
- const paddedName = scorerName.padEnd(maxNameLength);
1076
- const hasAllErrors = allCasesErrored(scorerName);
1077
- const baselineAvg = suite.baseline ? calculateBaselineScorerAverage2(suite.baseline, scorerName) : null;
1078
- if (baselineAvg !== null) {
1079
- const currentPercent = hasAllErrors ? u.dim("N/A") : formatPercentage(avg);
1080
- const baselinePercent = formatPercentage(baselineAvg);
1081
- const { text: diffText, color: diffColor } = formatDiff(avg, baselineAvg);
1082
- const paddedBaseline = baselinePercent.padStart(7);
1083
- const paddedCurrent = hasAllErrors ? currentPercent : currentPercent.padStart(7);
1084
- const diffDisplay = hasAllErrors ? u.dim("all cases failed") : diffColor(diffText.padStart(8));
1085
- logger(
1086
- `\u2502 ${paddedName} ${u.blueBright(paddedBaseline)} \u2192 ${hasAllErrors ? paddedCurrent : u.magentaBright(paddedCurrent)} (${diffDisplay})`
1085
+ function loadConfigFile(path3) {
1086
+ const abs = (0, import_node_path.resolve)(process.cwd(), path3);
1087
+ try {
1088
+ const contents = (0, import_node_fs.readFileSync)(abs, "utf8");
1089
+ const parsed = JSON.parse(contents);
1090
+ if (typeof parsed !== "object" || Array.isArray(parsed) || parsed === null) {
1091
+ console.error(
1092
+ `\u274C Flags config must be a JSON object, got ${Array.isArray(parsed) ? "array" : typeof parsed}`
1087
1093
  );
1088
- } else {
1089
- const currentPercent = hasAllErrors ? u.red("N/A (all cases failed)") : formatPercentage(avg);
1090
- logger(`\u2502 \u2022 ${paddedName} ${currentPercent}`);
1094
+ process.exit(1);
1091
1095
  }
1096
+ return parsed;
1097
+ } catch (err) {
1098
+ console.error(`\u274C Could not read or parse flags config "${path3}": ${err.message}`);
1099
+ process.exit(1);
1092
1100
  }
1093
- logger("\u251C\u2500");
1094
- if (suite.baseline) {
1095
- const baselineTimestamp = suite.baseline.runAt ? reporterDate(new Date(suite.baseline.runAt)) : "unknown time";
1096
- logger(
1097
- `\u2502 Baseline: ${suite.baseline.name}-${suite.baseline.version} ${u.gray(`(${baselineTimestamp})`)}`
1098
- );
1099
- } else {
1100
- logger(`\u2502 Baseline: ${u.gray("(none)")}`);
1101
- }
1102
- const hasConfigChanges = flagDiff.length > 0;
1103
- logger("\u2502 Config changes:", hasConfigChanges ? "" : u.gray("(none)"));
1104
- if (hasConfigChanges) {
1105
- for (const { flag, current, baseline, default: defaultVal } of flagDiff) {
1106
- logger(`\u2502 \u2022 ${flag}: ${current ?? "<not set>"}`);
1107
- if (defaultVal !== void 0) {
1108
- logger(`\u2502 ${u.gray(`default: ${defaultVal}`)}`);
1101
+ }
1102
+ function extractOverrides(argv) {
1103
+ const cleanedArgv2 = [];
1104
+ const overrides2 = {};
1105
+ let configPath = null;
1106
+ let hasCliFlags = false;
1107
+ let configPathCount = 0;
1108
+ for (let i = 0; i < argv.length; i++) {
1109
+ const token = argv[i];
1110
+ const configMatch = token.match(CONFIG_RE);
1111
+ const flagMatch = token.match(FLAG_RE);
1112
+ if (configMatch) {
1113
+ configPathCount++;
1114
+ if (configPathCount > 1) {
1115
+ console.error("\u274C Only one --flags-config can be supplied.");
1116
+ process.exit(1);
1109
1117
  }
1110
- if (suite.baseline) {
1111
- logger(`\u2502 ${u.gray(`baseline: ${baseline ?? "<not set>"}`)}`);
1118
+ const value = configMatch[1];
1119
+ const nextToken = argv.length > i + 1 ? argv[i + 1] : void 0;
1120
+ ensureNoSpaceSeparatedSyntax("flags-config", value, nextToken, "config");
1121
+ if (!value) {
1122
+ console.error("\u274C --flags-config requires a file path");
1123
+ console.error("\u{1F4A1} Use: --flags-config=path/to/config.json");
1124
+ process.exit(1);
1112
1125
  }
1126
+ configPath = value;
1127
+ } else if (flagMatch) {
1128
+ hasCliFlags = true;
1129
+ const key = flagMatch[1];
1130
+ const value = flagMatch[2];
1131
+ const nextToken = argv.length > i + 1 ? argv[i + 1] : void 0;
1132
+ ensureNoSpaceSeparatedSyntax(key, value, nextToken, "flag");
1133
+ const finalValue = value === void 0 ? "true" : value;
1134
+ overrides2[key] = coerceValue(finalValue);
1135
+ } else {
1136
+ cleanedArgv2.push(token);
1113
1137
  }
1114
1138
  }
1115
- if (suite.outOfScopeFlags && suite.outOfScopeFlags.length > 0) {
1116
- const pickedFlagsText = suite.configFlags && suite.configFlags.length > 0 ? suite.configFlags.map((f2) => `'${f2}'`).join(", ") : "none";
1117
- logger("\u2502");
1118
- logger(`\u2502 ${u.yellow("\u26A0 Out-of-scope flags")} ${u.gray(`(picked: ${pickedFlagsText})`)}:`);
1119
- for (const flag of suite.outOfScopeFlags) {
1120
- const lastStackTraceFrame = flag.stackTrace[0];
1121
- const lastStackTraceFnName = lastStackTraceFrame.split(" ").shift();
1122
- const lastStackTraceFile = lastStackTraceFrame.split("/").pop()?.slice(0, -1);
1123
- logger(
1124
- `\u2502 \u2022 ${flag.flagPath} ${u.gray(`at ${lastStackTraceFnName} (${lastStackTraceFile})`)}`
1125
- );
1126
- }
1139
+ if (configPath && hasCliFlags) {
1140
+ console.error("\u274C Cannot use both --flags-config and --flag.* arguments together.");
1141
+ console.error("Choose one approach:");
1142
+ console.error(" \u2022 Config file: --flags-config=my-flags.json");
1143
+ console.error(" \u2022 CLI flags: --flag.temperature=0.9 --flag.model=gpt-4o");
1144
+ process.exit(1);
1127
1145
  }
1128
- logger("\u2514\u2500");
1129
- }
1130
- function calculateScorerAverages(suite) {
1131
- const scorerTotals = {};
1132
- for (const caseData of suite.cases) {
1133
- for (const [scorerName, score] of Object.entries(caseData.scores)) {
1134
- if (!scorerTotals[scorerName]) {
1135
- scorerTotals[scorerName] = { sum: 0, count: 0 };
1136
- }
1137
- if (!score.metadata?.error) {
1138
- scorerTotals[scorerName].sum += score.score || 0;
1139
- scorerTotals[scorerName].count += 1;
1140
- }
1141
- }
1146
+ if (configPath) {
1147
+ const configOverrides = loadConfigFile(configPath);
1148
+ return { cleanedArgv: cleanedArgv2, overrides: configOverrides };
1142
1149
  }
1143
- const averages = {};
1144
- for (const [scorerName, totals] of Object.entries(scorerTotals)) {
1145
- averages[scorerName] = totals.count > 0 ? totals.sum / totals.count : 0;
1150
+ return { cleanedArgv: cleanedArgv2, overrides: overrides2 };
1151
+ }
1152
+ var import_zod6, import_node_fs, import_node_path, FLAG_RE, CONFIG_RE;
1153
+ var init_parse_flag_overrides = __esm({
1154
+ "src/cli/utils/parse-flag-overrides.ts"() {
1155
+ "use strict";
1156
+ init_cjs_shims();
1157
+ import_zod6 = require("zod");
1158
+ init_format_zod_errors();
1159
+ import_node_fs = require("fs");
1160
+ import_node_path = require("path");
1161
+ init_dot_path();
1162
+ init_deep_partial_schema();
1163
+ init_zod_internals();
1164
+ FLAG_RE = /^--flag\.([^=]+)(?:=(.*))?$/;
1165
+ CONFIG_RE = /^--flags-config(?:=(.*))?$/;
1146
1166
  }
1147
- return averages;
1167
+ });
1168
+
1169
+ // src/cli/utils/glob-utils.ts
1170
+ function isGlob(str) {
1171
+ return /[*?[\]{}!]/.test(str);
1148
1172
  }
1149
- function calculateBaselineScorerAverage(baseline, scorerName) {
1150
- const scores = [];
1151
- for (const caseData of baseline.cases) {
1152
- if (caseData.scores[scorerName]) {
1153
- scores.push(caseData.scores[scorerName].value);
1154
- }
1173
+ var init_glob_utils = __esm({
1174
+ "src/cli/utils/glob-utils.ts"() {
1175
+ "use strict";
1176
+ init_cjs_shims();
1155
1177
  }
1156
- if (scores.length === 0) return null;
1157
- const sum = scores.reduce((acc, val) => acc + val, 0);
1158
- return sum / scores.length;
1178
+ });
1179
+
1180
+ // src/util/errors.ts
1181
+ function getCircularReplacer() {
1182
+ const seen = /* @__PURE__ */ new WeakSet();
1183
+ return (_k, v) => {
1184
+ if (typeof v === "object" && v !== null) {
1185
+ if (seen.has(v)) return "[Circular]";
1186
+ seen.add(v);
1187
+ }
1188
+ return v;
1189
+ };
1159
1190
  }
1160
- function calculateFlagDiff(suite) {
1161
- if (!suite.configFlags || suite.configFlags.length === 0) {
1162
- return [];
1191
+ function safeJson(x) {
1192
+ try {
1193
+ return JSON.stringify(x, getCircularReplacer());
1194
+ } catch {
1195
+ return String(x);
1163
1196
  }
1164
- const diffs = [];
1165
- const currentConfig = suite.flagConfig || {};
1166
- const baselineConfig = suite.baseline?.flagConfig || {};
1167
- const defaultConfig = suite.defaultFlagConfig || {};
1168
- const currentFlat = flattenObject(currentConfig);
1169
- const baselineFlat = flattenObject(baselineConfig);
1170
- const defaultFlat = flattenObject(defaultConfig);
1171
- const allKeys = /* @__PURE__ */ new Set([
1172
- ...Object.keys(currentFlat),
1173
- ...Object.keys(baselineFlat),
1174
- ...Object.keys(defaultFlat)
1175
- ]);
1176
- for (const key of allKeys) {
1177
- const isInScope = suite.configFlags.some((pattern) => key.startsWith(pattern));
1178
- if (!isInScope) continue;
1179
- const currentValue = currentFlat[key];
1180
- const baselineValue = baselineFlat[key];
1181
- const defaultValue = defaultFlat[key];
1182
- const currentStr = currentValue !== void 0 ? JSON.stringify(currentValue) : void 0;
1183
- const baselineStr = baselineValue !== void 0 ? JSON.stringify(baselineValue) : void 0;
1184
- const defaultStr = defaultValue !== void 0 ? JSON.stringify(defaultValue) : void 0;
1185
- const diffFromBaseline = suite.baseline && currentStr !== baselineStr;
1186
- const diffFromDefault = currentStr !== defaultStr;
1187
- if (diffFromBaseline || diffFromDefault) {
1188
- diffs.push({
1189
- flag: key,
1190
- current: currentStr,
1191
- baseline: suite.baseline ? baselineStr : void 0,
1192
- default: defaultStr
1193
- });
1197
+ }
1198
+ function errorToString(err) {
1199
+ try {
1200
+ if (typeof err === "string") return err;
1201
+ if (err instanceof Error) {
1202
+ return err.stack ?? err.message;
1203
+ }
1204
+ if (typeof err === "object" && err !== null) {
1205
+ const msg = err.message;
1206
+ const json = safeJson(err);
1207
+ return msg ? `${msg} (${json})` : json;
1194
1208
  }
1209
+ return String(err);
1210
+ } catch {
1211
+ return "[unserializable error]";
1195
1212
  }
1196
- return diffs;
1197
1213
  }
1198
- function printFinalReport({
1199
- suiteData,
1200
- config,
1201
- registrationStatus,
1202
- isDebug,
1203
- logger = console.log
1204
- }) {
1205
- logger("");
1206
- logger(u.bgBlue(u.white(" FINAL EVALUATION REPORT ")));
1207
- logger("");
1208
- for (const suite of suiteData) {
1209
- const scorerAverages = calculateScorerAverages(suite);
1210
- const flagDiff = calculateFlagDiff(suite);
1211
- printSuiteBox({ suite, scorerAverages, calculateBaselineScorerAverage, flagDiff, logger });
1212
- logger("");
1214
+ var AxiomCLIError;
1215
+ var init_errors = __esm({
1216
+ "src/util/errors.ts"() {
1217
+ "use strict";
1218
+ init_cjs_shims();
1219
+ AxiomCLIError = class extends Error {
1220
+ constructor(message) {
1221
+ super(message);
1222
+ this.name = "AxiomCLIError";
1223
+ }
1224
+ };
1213
1225
  }
1214
- const runId = suiteData[0]?.runId;
1215
- const orgId = suiteData[0]?.orgId;
1216
- const anyRegistered = registrationStatus.some((s2) => s2.registered);
1217
- const anyFailed = registrationStatus.some((s2) => !s2.registered);
1218
- if (anyRegistered && orgId && config?.consoleEndpointUrl) {
1219
- if (suiteData.length === 1) {
1220
- const suite = suiteData[0];
1221
- const baselineParam = suite.baseline?.traceId ? `?baselineId=${suite.baseline.traceId}` : "";
1222
- logger("View eval result:");
1223
- logger(
1224
- `${config.consoleEndpointUrl}/${orgId}/ai-engineering/evaluations/${suite.name}/${suite.version}${baselineParam}`
1225
- );
1226
- } else {
1227
- logger("View full report:");
1228
- logger(`${config.consoleEndpointUrl}/${orgId}/ai-engineering/evaluations?runId=${runId}`);
1229
- }
1230
- } else if (isDebug) {
1231
- logger(u.dim("Results not uploaded to Axiom (debug mode)"));
1232
- } else {
1233
- logger("Results not available in Axiom UI (registration failed)");
1234
- }
1235
- if (anyFailed) {
1236
- logger("");
1237
- for (const status of registrationStatus) {
1238
- if (!status.registered) {
1239
- logger(u.yellow(`\u26A0\uFE0F Warning: Failed to register "${status.name}" with Axiom`));
1240
- if (status.error) {
1241
- logger(u.dim(` Error: ${status.error}`));
1242
- }
1243
- logger(u.dim(` Results for this evaluation will not be available in the Axiom UI.`));
1244
- }
1245
- }
1246
- }
1247
- }
1248
-
1249
- // src/config/resolver.ts
1250
- init_cjs_shims();
1251
- var buildConsoleUrl = (urlString) => {
1252
- const url = new URL(urlString);
1253
- return `${url.protocol}//app.${url.host.split("api.").at(-1)}`;
1254
- };
1255
- function resolveAxiomConnection(config, consoleUrlOverride) {
1256
- const consoleEndpointUrl = consoleUrlOverride ?? buildConsoleUrl(config.eval.url);
1257
- return {
1258
- url: config.eval.url,
1259
- consoleEndpointUrl,
1260
- token: config.eval.token,
1261
- dataset: config.eval.dataset,
1262
- orgId: config.eval.orgId
1263
- };
1264
- }
1265
-
1266
- // src/evals/reporter.ts
1267
- var AxiomReporter = class {
1268
- constructor() {
1269
- __publicField(this, "startTime", 0);
1270
- __publicField(this, "start", 0);
1271
- __publicField(this, "_endOfRunConfigEnd");
1272
- __publicField(this, "_suiteData", []);
1273
- __publicField(this, "_printedFlagOverrides", false);
1274
- __publicField(this, "_config");
1275
- }
1276
- onTestRunStart() {
1277
- this.start = performance.now();
1278
- this.startTime = (/* @__PURE__ */ new Date()).getTime();
1279
- const config = getAxiomConfig();
1280
- if (config) {
1281
- this._config = resolveAxiomConnection(config, getConsoleUrl());
1282
- }
1283
- }
1284
- async onTestSuiteReady(_testSuite) {
1285
- const meta = _testSuite.meta();
1286
- if (_testSuite.state() === "skipped" || !meta?.evaluation) {
1287
- return;
1288
- }
1289
- if (!this._printedFlagOverrides) {
1290
- const defaultsFromConfigEnd = meta.evaluation.configEnd?.flags ?? {};
1291
- const overridesFromConfigEnd = meta.evaluation.configEnd?.overrides ?? {};
1292
- if (Object.keys(overridesFromConfigEnd).length > 0) {
1293
- printGlobalFlagOverrides(overridesFromConfigEnd, defaultsFromConfigEnd);
1294
- }
1295
- this._printedFlagOverrides = true;
1296
- }
1297
- if (meta.evaluation.configEnd && !this._endOfRunConfigEnd) {
1298
- this._endOfRunConfigEnd = meta.evaluation.configEnd;
1299
- }
1300
- }
1301
- onTestCaseReady(test) {
1302
- const meta = test.meta();
1303
- if (!meta.case) return;
1304
- }
1305
- async onTestSuiteResult(testSuite) {
1306
- const meta = testSuite.meta();
1307
- if (testSuite.state() === "skipped" || !meta?.evaluation) {
1308
- return;
1309
- }
1310
- if (meta.evaluation.configEnd && !this._endOfRunConfigEnd) {
1311
- this._endOfRunConfigEnd = meta.evaluation.configEnd;
1312
- }
1313
- const durationSeconds = Number((performance.now() - this.start) / 1e3).toFixed(2);
1314
- const cases = [];
1315
- for (const test of testSuite.children) {
1316
- if (test.type !== "test") continue;
1317
- const testMeta = test.meta();
1318
- if (!testMeta?.case) continue;
1319
- cases.push({
1320
- index: testMeta.case.index,
1321
- scores: testMeta.case.scores,
1322
- outOfScopeFlags: testMeta.case.outOfScopeFlags,
1323
- errors: testMeta.case.errors,
1324
- runtimeFlags: testMeta.case.runtimeFlags
1325
- });
1326
- }
1327
- const cwd = process.cwd();
1328
- const relativePath = testSuite.module.moduleId.replace(cwd, "").replace(/^\//, "");
1329
- let suiteBaseline = meta.evaluation.baseline;
1330
- let flagConfig = meta.evaluation.flagConfig;
1331
- if (meta.evaluation.configEnd) {
1332
- const defaults = meta.evaluation.configEnd.flags ?? {};
1333
- const overrides2 = meta.evaluation.configEnd.overrides ?? {};
1334
- const defaultsFlat = flattenObject(defaults);
1335
- const overridesFlat = flattenObject(overrides2);
1336
- flagConfig = dotNotationToNested({ ...defaultsFlat, ...overridesFlat });
1337
- }
1338
- const defaultFlagConfig = meta.evaluation.configEnd?.flags;
1339
- this._suiteData.push({
1340
- version: meta.evaluation.version,
1341
- name: meta.evaluation.name,
1342
- file: relativePath,
1343
- duration: durationSeconds + "s",
1344
- baseline: suiteBaseline || null,
1345
- configFlags: meta.evaluation.configFlags,
1346
- flagConfig,
1347
- defaultFlagConfig,
1348
- runId: meta.evaluation.runId,
1349
- orgId: meta.evaluation.orgId,
1350
- cases,
1351
- outOfScopeFlags: meta.evaluation.outOfScopeFlags,
1352
- registrationStatus: meta.evaluation.registrationStatus
1353
- });
1354
- printEvalNameAndFileName(testSuite, meta);
1355
- printBaselineNameAndVersion(meta);
1356
- printTestCaseCountStartDuration(testSuite, this.startTime, durationSeconds);
1357
- const matchedBaselineIndices = /* @__PURE__ */ new Set();
1358
- const baselineCasesByFingerprint = /* @__PURE__ */ new Map();
1359
- if (suiteBaseline) {
1360
- for (const c of suiteBaseline.cases) {
1361
- const fp = getCaseFingerprint(c.input, c.expected);
1362
- const cases2 = baselineCasesByFingerprint.get(fp) || [];
1363
- cases2.push(c);
1364
- baselineCasesByFingerprint.set(fp, cases2);
1365
- }
1366
- }
1367
- for (const test of testSuite.children) {
1368
- if (test.type !== "test") continue;
1369
- printCaseResult(test, baselineCasesByFingerprint, matchedBaselineIndices);
1370
- }
1371
- if (suiteBaseline) {
1372
- printOrphanedBaselineCases(suiteBaseline, matchedBaselineIndices);
1373
- }
1374
- console.log("");
1375
- }
1376
- async onTestRunEnd(_testModules, _errors, _reason) {
1377
- const shouldClear = !process.env.CI && process.stdout.isTTY !== false;
1378
- if (shouldClear) {
1379
- process.stdout.write("\x1B[2J\x1B[0f");
1380
- }
1381
- const registrationStatus = this._suiteData.map((suite) => ({
1382
- name: suite.name,
1383
- registered: suite.registrationStatus?.status === "success",
1384
- error: suite.registrationStatus?.status === "failed" ? suite.registrationStatus.error : void 0
1385
- }));
1386
- const isDebug = process.env.AXIOM_DEBUG === "true";
1387
- printFinalReport({
1388
- suiteData: this._suiteData,
1389
- config: this._config,
1390
- registrationStatus,
1391
- isDebug
1392
- });
1393
- }
1394
- };
1395
-
1396
- // src/evals/instrument.ts
1397
- init_cjs_shims();
1398
- var import_sdk_trace_node = require("@opentelemetry/sdk-trace-node");
1399
- var import_resources = require("@opentelemetry/resources");
1400
- var import_exporter_trace_otlp_http = require("@opentelemetry/exporter-trace-otlp-http");
1401
- var import_api10 = require("@opentelemetry/api");
1402
-
1403
- // src/util/errors.ts
1404
- init_cjs_shims();
1405
- var AxiomCLIError = class extends Error {
1406
- constructor(message) {
1407
- super(message);
1408
- this.name = "AxiomCLIError";
1409
- }
1410
- };
1411
- function getCircularReplacer() {
1412
- const seen = /* @__PURE__ */ new WeakSet();
1413
- return (_k, v) => {
1414
- if (typeof v === "object" && v !== null) {
1415
- if (seen.has(v)) return "[Circular]";
1416
- seen.add(v);
1417
- }
1418
- return v;
1419
- };
1420
- }
1421
- function safeJson(x) {
1422
- try {
1423
- return JSON.stringify(x, getCircularReplacer());
1424
- } catch {
1425
- return String(x);
1426
- }
1427
- }
1428
- function errorToString(err) {
1429
- try {
1430
- if (typeof err === "string") return err;
1431
- if (err instanceof Error) {
1432
- return err.stack ?? err.message;
1433
- }
1434
- if (typeof err === "object" && err !== null) {
1435
- const msg = err.message;
1436
- const json = safeJson(err);
1437
- return msg ? `${msg} (${json})` : json;
1438
- }
1439
- return String(err);
1440
- } catch {
1441
- return "[unserializable error]";
1442
- }
1443
- }
1444
-
1445
- // src/config/loader.ts
1446
- init_cjs_shims();
1447
- var import_c12 = require("c12");
1448
- var import_defu = require("defu");
1449
-
1450
- // src/config/index.ts
1451
- init_cjs_shims();
1452
- var import_zod6 = require("zod");
1453
-
1454
- // src/cli/auth/index.ts
1455
- init_cjs_shims();
1226
+ });
1456
1227
 
1457
1228
  // src/cli/auth/types.ts
1458
- init_cjs_shims();
1229
+ var init_types = __esm({
1230
+ "src/cli/auth/types.ts"() {
1231
+ "use strict";
1232
+ init_cjs_shims();
1233
+ }
1234
+ });
1459
1235
 
1460
1236
  // src/cli/auth/config.ts
1461
- init_cjs_shims();
1462
- var import_fs = require("fs");
1463
- var import_path = __toESM(require("path"), 1);
1464
- var import_os = __toESM(require("os"), 1);
1465
- var CONFIG_FILENAME = "config.json";
1466
- var CONFIG_DIR_NAME = "axiom";
1467
1237
  function getConfigDir() {
1468
1238
  const platform = process.platform;
1469
1239
  const homeDir = import_os.default.homedir();
@@ -1510,64 +1280,81 @@ function getActiveProfile(config) {
1510
1280
  if (!profile) return null;
1511
1281
  return profile;
1512
1282
  }
1283
+ var import_fs, import_path, import_os, CONFIG_FILENAME, CONFIG_DIR_NAME;
1284
+ var init_config = __esm({
1285
+ "src/cli/auth/config.ts"() {
1286
+ "use strict";
1287
+ init_cjs_shims();
1288
+ import_fs = require("fs");
1289
+ import_path = __toESM(require("path"), 1);
1290
+ import_os = __toESM(require("os"), 1);
1291
+ CONFIG_FILENAME = "config.json";
1292
+ CONFIG_DIR_NAME = "axiom";
1293
+ }
1294
+ });
1513
1295
 
1514
1296
  // src/cli/auth/oauth.ts
1515
- init_cjs_shims();
1516
- var import_crypto = require("crypto");
1517
- var OAUTH_CLIENT_ID = "264d906a404efc209b027f6595e6b616";
1518
- var OAUTH_AUTH_PATH = "/oauth/authorize";
1519
- var OAUTH_TOKEN_PATH = "/oauth/token";
1520
- var OAuth = class {
1521
- constructor(oauthBaseUrl) {
1522
- this.oauthBaseUrl = oauthBaseUrl;
1523
- }
1524
- static generateCodeVerifier() {
1525
- return (0, import_crypto.randomBytes)(32).toString("base64url");
1526
- }
1527
- static generateCodeChallenge(verifier) {
1528
- return (0, import_crypto.createHash)("sha256").update(verifier).digest("base64url");
1529
- }
1530
- static generateState() {
1531
- return (0, import_crypto.randomBytes)(16).toString("hex");
1532
- }
1533
- buildAuthUrl(params) {
1534
- const url = new URL(OAUTH_AUTH_PATH, this.oauthBaseUrl);
1535
- url.searchParams.set("client_id", OAUTH_CLIENT_ID);
1536
- url.searchParams.set("redirect_uri", params.redirectUri);
1537
- url.searchParams.set("response_type", "code");
1538
- url.searchParams.set("state", params.state);
1539
- url.searchParams.set("code_challenge", params.codeChallenge);
1540
- url.searchParams.set("code_challenge_method", "S256");
1541
- url.searchParams.set("scope", "*");
1542
- return url.toString();
1543
- }
1544
- async exchangeCodeForToken(params) {
1545
- const tokenUrl = new URL(OAUTH_TOKEN_PATH, this.oauthBaseUrl);
1546
- const body = new URLSearchParams({
1547
- grant_type: "authorization_code",
1548
- client_id: OAUTH_CLIENT_ID,
1549
- code: params.code,
1550
- redirect_uri: params.redirectUri,
1551
- code_verifier: params.codeVerifier
1552
- });
1553
- const response = await fetch(tokenUrl.toString(), {
1554
- method: "POST",
1555
- headers: {
1556
- "Content-Type": "application/x-www-form-urlencoded"
1557
- },
1558
- body: body.toString()
1559
- });
1560
- if (!response.ok) {
1561
- const errorText = await response.text();
1562
- throw new Error(`Token exchange failed: ${response.status} ${errorText}`);
1563
- }
1564
- const data = await response.json();
1565
- return data.access_token;
1297
+ var import_crypto, OAUTH_CLIENT_ID, OAUTH_AUTH_PATH, OAUTH_TOKEN_PATH, OAuth;
1298
+ var init_oauth = __esm({
1299
+ "src/cli/auth/oauth.ts"() {
1300
+ "use strict";
1301
+ init_cjs_shims();
1302
+ import_crypto = require("crypto");
1303
+ OAUTH_CLIENT_ID = "264d906a404efc209b027f6595e6b616";
1304
+ OAUTH_AUTH_PATH = "/oauth/authorize";
1305
+ OAUTH_TOKEN_PATH = "/oauth/token";
1306
+ OAuth = class {
1307
+ constructor(oauthBaseUrl) {
1308
+ this.oauthBaseUrl = oauthBaseUrl;
1309
+ }
1310
+ static generateCodeVerifier() {
1311
+ return (0, import_crypto.randomBytes)(32).toString("base64url");
1312
+ }
1313
+ static generateCodeChallenge(verifier) {
1314
+ return (0, import_crypto.createHash)("sha256").update(verifier).digest("base64url");
1315
+ }
1316
+ static generateState() {
1317
+ return (0, import_crypto.randomBytes)(16).toString("hex");
1318
+ }
1319
+ buildAuthUrl(params) {
1320
+ const url = new URL(OAUTH_AUTH_PATH, this.oauthBaseUrl);
1321
+ url.searchParams.set("client_id", OAUTH_CLIENT_ID);
1322
+ url.searchParams.set("redirect_uri", params.redirectUri);
1323
+ url.searchParams.set("response_type", "code");
1324
+ url.searchParams.set("state", params.state);
1325
+ url.searchParams.set("code_challenge", params.codeChallenge);
1326
+ url.searchParams.set("code_challenge_method", "S256");
1327
+ url.searchParams.set("scope", "*");
1328
+ return url.toString();
1329
+ }
1330
+ async exchangeCodeForToken(params) {
1331
+ const tokenUrl = new URL(OAUTH_TOKEN_PATH, this.oauthBaseUrl);
1332
+ const body = new URLSearchParams({
1333
+ grant_type: "authorization_code",
1334
+ client_id: OAUTH_CLIENT_ID,
1335
+ code: params.code,
1336
+ redirect_uri: params.redirectUri,
1337
+ code_verifier: params.codeVerifier
1338
+ });
1339
+ const response = await fetch(tokenUrl.toString(), {
1340
+ method: "POST",
1341
+ headers: {
1342
+ "Content-Type": "application/x-www-form-urlencoded"
1343
+ },
1344
+ body: body.toString()
1345
+ });
1346
+ if (!response.ok) {
1347
+ const errorText = await response.text();
1348
+ throw new Error(`Token exchange failed: ${response.status} ${errorText}`);
1349
+ }
1350
+ const data = await response.json();
1351
+ return data.access_token;
1352
+ }
1353
+ };
1566
1354
  }
1567
- };
1355
+ });
1568
1356
 
1569
1357
  // src/cli/auth/api.ts
1570
- init_cjs_shims();
1571
1358
  async function fetchOrganizations(token, apiBaseUrl) {
1572
1359
  const response = await fetch(`${apiBaseUrl}/v2/orgs`, {
1573
1360
  headers: {
@@ -1593,10 +1380,15 @@ async function verifyToken(token, orgId, apiBaseUrl) {
1593
1380
  });
1594
1381
  return response.ok;
1595
1382
  }
1383
+ var init_api = __esm({
1384
+ "src/cli/auth/api.ts"() {
1385
+ "use strict";
1386
+ init_cjs_shims();
1387
+ init_errors();
1388
+ }
1389
+ });
1596
1390
 
1597
1391
  // src/cli/auth/callback-server.ts
1598
- init_cjs_shims();
1599
- var import_http = __toESM(require("http"), 1);
1600
1392
  function escapeHtml(text) {
1601
1393
  const map = {
1602
1394
  "&": "&amp;",
@@ -1608,9 +1400,6 @@ function escapeHtml(text) {
1608
1400
  };
1609
1401
  return text.replace(/[&<>"']/g, (m2) => map[m2] || m2);
1610
1402
  }
1611
- var SVG_LOGO = `<svg width="124" height="24" viewBox="0 0 124 24" fill="none" xmlns="http://www.w3.org/2000/svg" class="logo">
1612
- <path d="M42.9919 16.8116H36.3696L35.5537 19.1572C35.4209 19.539 34.9714 19.8513 34.5548 19.8513H30.9779C30.5614 19.8513 30.3459 19.5449 30.499 19.1703L36.9816 3.31644C37.1346 2.9419 37.6009 2.63546 38.0174 2.63546H41.3718C41.7883 2.63546 42.2541 2.94207 42.4067 3.3168L48.8634 19.17C49.016 19.5447 48.8 19.8513 48.3835 19.8513H44.8067C44.3901 19.8513 43.9406 19.539 43.8078 19.1572L42.9919 16.8116ZM41.8232 13.4223L39.6807 7.18148L37.5383 13.4223H41.8232ZM64.1105 19.8513C63.694 19.8513 63.1767 19.5694 62.9611 19.2247L59.9029 14.3369L56.8447 19.2247C56.6291 19.5694 56.1119 19.8513 55.6953 19.8513H51.3392C50.9227 19.8513 50.7754 19.5801 51.0119 19.2486L56.8978 11.0013L51.5315 3.24414C51.2999 2.90937 51.4513 2.63546 51.8679 2.63546H55.89C56.3066 2.63546 56.8268 2.91563 57.0461 3.25807L59.9029 7.71959L62.7343 3.25955C62.9522 2.9163 63.4713 2.63546 63.8879 2.63546H67.938C68.3545 2.63546 68.5048 2.90868 68.272 3.24261L62.8801 10.9743L68.7935 19.2489C69.0303 19.5802 68.8832 19.8513 68.4666 19.8513H64.1105ZM76.525 19.119C76.525 19.5218 76.1841 19.8513 75.7675 19.8513H72.5522C72.1356 19.8513 71.7947 19.5218 71.7947 19.119V3.36771C71.7947 2.96498 72.1356 2.63546 72.5522 2.63546H75.7675C76.1841 2.63546 76.525 2.96498 76.525 3.36771V19.119ZM79.3736 11.1896C79.3736 6.18625 83.2688 2.15134 89.2511 2.15134C95.2334 2.15134 99.1289 6.18625 99.1289 11.1896C99.1289 16.2199 95.2334 20.2548 89.2511 20.2548C83.2688 20.2548 79.3736 16.2199 79.3736 11.1896ZM94.2873 11.1896C94.2873 8.58038 92.3953 6.21307 89.2511 6.21307C86.1349 6.21307 84.2149 8.58038 84.2149 11.1896C84.2149 13.7989 86.1349 16.2199 89.2511 16.2199C92.3675 16.2199 94.2873 13.7989 94.2873 11.1896ZM119.318 19.8513C118.902 19.8513 118.517 19.5245 118.463 19.1251L117.225 9.89847L113.65 19.1652C113.505 19.5426 113.045 19.8513 112.628 19.8513H111.055C110.638 19.8513 110.179 19.5423 110.034 19.1645L106.457 9.81766L105.218 19.125C105.165 19.5245 104.781 19.8513 104.364 19.8513H101.344C100.927 19.8513 100.633 19.5249 100.69 19.1259L102.932 3.3609C102.988 2.96191 103.376 2.63546 103.792 2.63546H106.59C107.007 2.63546 107.469 2.94351 107.617 3.32002L111.827 14.041L116.064 3.31949C116.213 2.94327 116.676 2.63546 117.092 2.63546H119.89C120.307 2.63546 120.694 2.96191 120.751 3.3609L122.993 19.1259C123.05 19.5249 122.755 19.8513 122.339 19.8513H119.318ZM23.9616 15.6531L18.8054 6.97021C18.5689 6.57115 17.9863 6.24465 17.5106 6.24465H14.2915C13.5433 6.24465 13.2365 5.73171 13.6097 5.1048L15.375 2.13986C15.5151 1.90455 15.5148 1.61487 15.3743 1.37981C15.2337 1.14476 14.9741 1 14.6933 1H10.2025C9.72681 1 9.14291 1.32577 8.90491 1.72395L0.17865 16.3217C-0.0594434 16.7199 -0.0595348 17.3715 0.178285 17.7698L2.42362 21.5297C2.79777 22.1561 3.41129 22.1569 3.78699 21.5313L5.54143 18.6103C5.91722 17.9847 6.53065 17.9854 6.9048 18.6119L8.49538 21.2754C8.7332 21.6737 9.317 21.9995 9.79273 21.9995H20.1698C20.6455 21.9995 21.2293 21.6737 21.4672 21.2754L23.959 17.1028C24.1968 16.7045 24.198 16.0521 23.9616 15.6531ZM16.9981 15.2352C17.3699 15.8629 17.0619 16.3765 16.3136 16.3765H8.24192C7.49372 16.3765 7.1876 15.864 7.56175 15.2375L11.6007 8.47417C11.9748 7.84772 12.5869 7.84774 12.9611 8.47421L16.9981 15.2352Z" fill="#121224"/>
1613
- </svg>`;
1614
1403
  function renderCallbackPage(error) {
1615
1404
  const errorClass = error ? ' class="error"' : "";
1616
1405
  const errorMessage = error ? escapeHtml(error) : "";
@@ -1757,10 +1546,19 @@ async function waitForCallback(server, expectedState) {
1757
1546
  });
1758
1547
  });
1759
1548
  }
1549
+ var import_http, SVG_LOGO;
1550
+ var init_callback_server = __esm({
1551
+ "src/cli/auth/callback-server.ts"() {
1552
+ "use strict";
1553
+ init_cjs_shims();
1554
+ import_http = __toESM(require("http"), 1);
1555
+ SVG_LOGO = `<svg width="124" height="24" viewBox="0 0 124 24" fill="none" xmlns="http://www.w3.org/2000/svg" class="logo">
1556
+ <path d="M42.9919 16.8116H36.3696L35.5537 19.1572C35.4209 19.539 34.9714 19.8513 34.5548 19.8513H30.9779C30.5614 19.8513 30.3459 19.5449 30.499 19.1703L36.9816 3.31644C37.1346 2.9419 37.6009 2.63546 38.0174 2.63546H41.3718C41.7883 2.63546 42.2541 2.94207 42.4067 3.3168L48.8634 19.17C49.016 19.5447 48.8 19.8513 48.3835 19.8513H44.8067C44.3901 19.8513 43.9406 19.539 43.8078 19.1572L42.9919 16.8116ZM41.8232 13.4223L39.6807 7.18148L37.5383 13.4223H41.8232ZM64.1105 19.8513C63.694 19.8513 63.1767 19.5694 62.9611 19.2247L59.9029 14.3369L56.8447 19.2247C56.6291 19.5694 56.1119 19.8513 55.6953 19.8513H51.3392C50.9227 19.8513 50.7754 19.5801 51.0119 19.2486L56.8978 11.0013L51.5315 3.24414C51.2999 2.90937 51.4513 2.63546 51.8679 2.63546H55.89C56.3066 2.63546 56.8268 2.91563 57.0461 3.25807L59.9029 7.71959L62.7343 3.25955C62.9522 2.9163 63.4713 2.63546 63.8879 2.63546H67.938C68.3545 2.63546 68.5048 2.90868 68.272 3.24261L62.8801 10.9743L68.7935 19.2489C69.0303 19.5802 68.8832 19.8513 68.4666 19.8513H64.1105ZM76.525 19.119C76.525 19.5218 76.1841 19.8513 75.7675 19.8513H72.5522C72.1356 19.8513 71.7947 19.5218 71.7947 19.119V3.36771C71.7947 2.96498 72.1356 2.63546 72.5522 2.63546H75.7675C76.1841 2.63546 76.525 2.96498 76.525 3.36771V19.119ZM79.3736 11.1896C79.3736 6.18625 83.2688 2.15134 89.2511 2.15134C95.2334 2.15134 99.1289 6.18625 99.1289 11.1896C99.1289 16.2199 95.2334 20.2548 89.2511 20.2548C83.2688 20.2548 79.3736 16.2199 79.3736 11.1896ZM94.2873 11.1896C94.2873 8.58038 92.3953 6.21307 89.2511 6.21307C86.1349 6.21307 84.2149 8.58038 84.2149 11.1896C84.2149 13.7989 86.1349 16.2199 89.2511 16.2199C92.3675 16.2199 94.2873 13.7989 94.2873 11.1896ZM119.318 19.8513C118.902 19.8513 118.517 19.5245 118.463 19.1251L117.225 9.89847L113.65 19.1652C113.505 19.5426 113.045 19.8513 112.628 19.8513H111.055C110.638 19.8513 110.179 19.5423 110.034 19.1645L106.457 9.81766L105.218 19.125C105.165 19.5245 104.781 19.8513 104.364 19.8513H101.344C100.927 19.8513 100.633 19.5249 100.69 19.1259L102.932 3.3609C102.988 2.96191 103.376 2.63546 103.792 2.63546H106.59C107.007 2.63546 107.469 2.94351 107.617 3.32002L111.827 14.041L116.064 3.31949C116.213 2.94327 116.676 2.63546 117.092 2.63546H119.89C120.307 2.63546 120.694 2.96191 120.751 3.3609L122.993 19.1259C123.05 19.5249 122.755 19.8513 122.339 19.8513H119.318ZM23.9616 15.6531L18.8054 6.97021C18.5689 6.57115 17.9863 6.24465 17.5106 6.24465H14.2915C13.5433 6.24465 13.2365 5.73171 13.6097 5.1048L15.375 2.13986C15.5151 1.90455 15.5148 1.61487 15.3743 1.37981C15.2337 1.14476 14.9741 1 14.6933 1H10.2025C9.72681 1 9.14291 1.32577 8.90491 1.72395L0.17865 16.3217C-0.0594434 16.7199 -0.0595348 17.3715 0.178285 17.7698L2.42362 21.5297C2.79777 22.1561 3.41129 22.1569 3.78699 21.5313L5.54143 18.6103C5.91722 17.9847 6.53065 17.9854 6.9048 18.6119L8.49538 21.2754C8.7332 21.6737 9.317 21.9995 9.79273 21.9995H20.1698C20.6455 21.9995 21.2293 21.6737 21.4672 21.2754L23.959 17.1028C24.1968 16.7045 24.198 16.0521 23.9616 15.6531ZM16.9981 15.2352C17.3699 15.8629 17.0619 16.3765 16.3136 16.3765H8.24192C7.49372 16.3765 7.1876 15.864 7.56175 15.2375L11.6007 8.47417C11.9748 7.84772 12.5869 7.84774 12.9611 8.47421L16.9981 15.2352Z" fill="#121224"/>
1557
+ </svg>`;
1558
+ }
1559
+ });
1760
1560
 
1761
1561
  // src/cli/auth/global-auth.ts
1762
- init_cjs_shims();
1763
- var authContext = null;
1764
1562
  function getAuthContext() {
1765
1563
  return authContext;
1766
1564
  }
@@ -1776,9 +1574,31 @@ async function setupGlobalAuth() {
1776
1574
  }
1777
1575
  return authContext;
1778
1576
  }
1577
+ var authContext;
1578
+ var init_global_auth = __esm({
1579
+ "src/cli/auth/global-auth.ts"() {
1580
+ "use strict";
1581
+ init_cjs_shims();
1582
+ init_config();
1583
+ authContext = null;
1584
+ }
1585
+ });
1586
+
1587
+ // src/cli/auth/index.ts
1588
+ var init_auth = __esm({
1589
+ "src/cli/auth/index.ts"() {
1590
+ "use strict";
1591
+ init_cjs_shims();
1592
+ init_types();
1593
+ init_config();
1594
+ init_oauth();
1595
+ init_api();
1596
+ init_callback_server();
1597
+ init_global_auth();
1598
+ }
1599
+ });
1779
1600
 
1780
1601
  // src/config/index.ts
1781
- var DEFAULT_EVAL_INCLUDE = ["**/*.eval.{ts,js,mts,mjs,cts,cjs}"];
1782
1602
  function createPartialDefaults() {
1783
1603
  let token;
1784
1604
  let url;
@@ -1841,6 +1661,17 @@ function validateConfig(config) {
1841
1661
  }
1842
1662
  return config;
1843
1663
  }
1664
+ var import_zod7, DEFAULT_EVAL_INCLUDE;
1665
+ var init_config2 = __esm({
1666
+ "src/config/index.ts"() {
1667
+ "use strict";
1668
+ init_cjs_shims();
1669
+ import_zod7 = require("zod");
1670
+ init_errors();
1671
+ init_auth();
1672
+ DEFAULT_EVAL_INCLUDE = ["**/*.eval.{ts,js,mts,mjs,cts,cjs}"];
1673
+ }
1674
+ });
1844
1675
 
1845
1676
  // src/config/loader.ts
1846
1677
  function customMerger(target, source) {
@@ -1880,446 +1711,967 @@ async function loadConfig(cwd = process.cwd()) {
1880
1711
  throw new AxiomCLIError(`Failed to load config file: ${errorToString(error)}`);
1881
1712
  }
1882
1713
  }
1714
+ var import_c12, import_defu;
1715
+ var init_loader = __esm({
1716
+ "src/config/loader.ts"() {
1717
+ "use strict";
1718
+ init_cjs_shims();
1719
+ import_c12 = require("c12");
1720
+ import_defu = require("defu");
1721
+ init_config2();
1722
+ init_errors();
1723
+ }
1724
+ });
1883
1725
 
1884
- // src/evals/instrument.ts
1885
- var axiomProvider;
1886
- var axiomTracer;
1887
- var userProvider;
1888
- var initializationPromise = null;
1889
- var initialized = false;
1890
- async function resolveInstrumentationHook(config) {
1891
- if (config.eval.instrumentation) {
1892
- return config.eval.instrumentation;
1726
+ // ../../node_modules/.pnpm/tinyrainbow@2.0.0/node_modules/tinyrainbow/dist/chunk-BVHSVHOK.js
1727
+ function a(n) {
1728
+ return String(n);
1729
+ }
1730
+ function C(n = false) {
1731
+ let e = typeof process != "undefined" ? process : void 0, i = (e == null ? void 0 : e.env) || {}, g = (e == null ? void 0 : e.argv) || [];
1732
+ return !("NO_COLOR" in i || g.includes("--no-color")) && ("FORCE_COLOR" in i || g.includes("--color") || (e == null ? void 0 : e.platform) === "win32" || n && i.TERM !== "dumb" || "CI" in i) || typeof window != "undefined" && !!window.chrome;
1733
+ }
1734
+ function p(n = false) {
1735
+ let e = C(n), i = (r2, t, c, o) => {
1736
+ let l = "", s2 = 0;
1737
+ do
1738
+ l += r2.substring(s2, o) + c, s2 = o + t.length, o = r2.indexOf(t, s2);
1739
+ while (~o);
1740
+ return l + r2.substring(s2);
1741
+ }, g = (r2, t, c = r2) => {
1742
+ let o = (l) => {
1743
+ let s2 = String(l), b = s2.indexOf(t, r2.length);
1744
+ return ~b ? r2 + i(s2, t, c, b) + t : r2 + s2 + t;
1745
+ };
1746
+ return o.open = r2, o.close = t, o;
1747
+ }, u2 = {
1748
+ isColorSupported: e
1749
+ }, d = (r2) => `\x1B[${r2}m`;
1750
+ for (let [r2, t] of h)
1751
+ u2[r2] = e ? g(
1752
+ d(t[0]),
1753
+ d(t[1]),
1754
+ t[2]
1755
+ ) : a;
1756
+ return u2;
1757
+ }
1758
+ var f, h;
1759
+ var init_chunk_BVHSVHOK = __esm({
1760
+ "../../node_modules/.pnpm/tinyrainbow@2.0.0/node_modules/tinyrainbow/dist/chunk-BVHSVHOK.js"() {
1761
+ "use strict";
1762
+ init_cjs_shims();
1763
+ f = {
1764
+ reset: [0, 0],
1765
+ bold: [1, 22, "\x1B[22m\x1B[1m"],
1766
+ dim: [2, 22, "\x1B[22m\x1B[2m"],
1767
+ italic: [3, 23],
1768
+ underline: [4, 24],
1769
+ inverse: [7, 27],
1770
+ hidden: [8, 28],
1771
+ strikethrough: [9, 29],
1772
+ black: [30, 39],
1773
+ red: [31, 39],
1774
+ green: [32, 39],
1775
+ yellow: [33, 39],
1776
+ blue: [34, 39],
1777
+ magenta: [35, 39],
1778
+ cyan: [36, 39],
1779
+ white: [37, 39],
1780
+ gray: [90, 39],
1781
+ bgBlack: [40, 49],
1782
+ bgRed: [41, 49],
1783
+ bgGreen: [42, 49],
1784
+ bgYellow: [43, 49],
1785
+ bgBlue: [44, 49],
1786
+ bgMagenta: [45, 49],
1787
+ bgCyan: [46, 49],
1788
+ bgWhite: [47, 49],
1789
+ blackBright: [90, 39],
1790
+ redBright: [91, 39],
1791
+ greenBright: [92, 39],
1792
+ yellowBright: [93, 39],
1793
+ blueBright: [94, 39],
1794
+ magentaBright: [95, 39],
1795
+ cyanBright: [96, 39],
1796
+ whiteBright: [97, 39],
1797
+ bgBlackBright: [100, 49],
1798
+ bgRedBright: [101, 49],
1799
+ bgGreenBright: [102, 49],
1800
+ bgYellowBright: [103, 49],
1801
+ bgBlueBright: [104, 49],
1802
+ bgMagentaBright: [105, 49],
1803
+ bgCyanBright: [106, 49],
1804
+ bgWhiteBright: [107, 49]
1805
+ };
1806
+ h = Object.entries(f);
1807
+ a.open = "";
1808
+ a.close = "";
1893
1809
  }
1894
- try {
1895
- const { config: loadedConfig } = await loadConfig(process.cwd());
1896
- return loadedConfig.eval.instrumentation ?? null;
1897
- } catch (error) {
1898
- throw new AxiomCLIError(
1899
- `Failed to reload instrumentation from config: ${errorToString(error)}`
1900
- );
1810
+ });
1811
+
1812
+ // ../../node_modules/.pnpm/tinyrainbow@2.0.0/node_modules/tinyrainbow/dist/node.js
1813
+ var import_tty, r, u;
1814
+ var init_node = __esm({
1815
+ "../../node_modules/.pnpm/tinyrainbow@2.0.0/node_modules/tinyrainbow/dist/node.js"() {
1816
+ "use strict";
1817
+ init_cjs_shims();
1818
+ init_chunk_BVHSVHOK();
1819
+ import_tty = require("tty");
1820
+ r = process.env.FORCE_TTY !== void 0 || (0, import_tty.isatty)(1);
1821
+ u = p(r);
1822
+ }
1823
+ });
1824
+
1825
+ // src/evals/reporter.console-utils.ts
1826
+ function formatPercentage(value) {
1827
+ if (!Number.isFinite(value)) {
1828
+ return "N/A";
1901
1829
  }
1830
+ return Number(value * 100).toFixed(2) + "%";
1902
1831
  }
1903
- async function runInstrumentationHook(hook, options) {
1832
+ function formatDiff(current, baseline) {
1833
+ if (!Number.isFinite(current) || !Number.isFinite(baseline)) {
1834
+ return { text: "N/A", color: u.dim };
1835
+ }
1836
+ const diff = current - baseline;
1837
+ const diffText = (diff >= 0 ? "+" : "") + formatPercentage(diff);
1838
+ const color = diff > 0 ? u.green : diff < 0 ? u.red : u.dim;
1839
+ return { text: diffText, color };
1840
+ }
1841
+ function truncate(str, max) {
1842
+ return str.length > max ? str.slice(0, max) + "\u2026" : str;
1843
+ }
1844
+ function stringify(value) {
1904
1845
  try {
1905
- return await hook(options);
1906
- } catch (error) {
1907
- throw new AxiomCLIError(`Failed to execute instrumentation hook: ${errorToString(error)}`);
1846
+ if (typeof value === "string") return value;
1847
+ return JSON.stringify(value);
1848
+ } catch {
1849
+ return String(value);
1908
1850
  }
1909
1851
  }
1910
- function setupEvalProvider(connection) {
1911
- const headers = {
1912
- "X-Axiom-Dataset": connection.dataset,
1913
- ...connection.orgId ? { "X-AXIOM-ORG-ID": connection.orgId } : {}
1914
- };
1915
- if (connection.token) {
1916
- headers.Authorization = `Bearer ${connection.token}`;
1852
+ function getCaseFingerprint(input, expected) {
1853
+ const inputStr = typeof input === "string" ? input : JSON.stringify(input);
1854
+ const expectedStr = typeof expected === "string" ? expected : JSON.stringify(expected);
1855
+ return JSON.stringify({ input: inputStr, expected: expectedStr });
1856
+ }
1857
+ function printEvalNameAndFileName(testSuite, meta, logger = console.log) {
1858
+ const cwd = process.cwd();
1859
+ logger(
1860
+ " ",
1861
+ u.bgCyan(u.black(` ${testSuite.project.name} `)),
1862
+ u.bgBlue(u.black(` ${meta.evaluation.name}-${meta.evaluation.version} `)),
1863
+ u.dim(`(${testSuite.children.size} cases)`)
1864
+ );
1865
+ logger(" ", u.dim(testSuite.module.moduleId.replace(cwd, "")));
1866
+ }
1867
+ function printBaselineNameAndVersion(testMeta, logger = console.log) {
1868
+ if (testMeta.evaluation.baseline) {
1869
+ logger(
1870
+ " ",
1871
+ " baseline ",
1872
+ u.bgMagenta(
1873
+ u.black(` ${testMeta.evaluation.baseline.name}-${testMeta.evaluation.baseline.version} `)
1874
+ )
1875
+ );
1876
+ } else {
1877
+ logger(" ", u.bgWhite(u.blackBright(" baseline: ")), "none");
1917
1878
  }
1918
- const collectorOptions = {
1919
- url: `${connection.url}/v1/traces`,
1920
- headers,
1921
- concurrencyLimit: 10
1922
- };
1923
- const exporter = new import_exporter_trace_otlp_http.OTLPTraceExporter(collectorOptions);
1924
- const processor = new import_sdk_trace_node.BatchSpanProcessor(exporter, {
1925
- maxQueueSize: 2048,
1926
- maxExportBatchSize: 512,
1927
- scheduledDelayMillis: 5e3,
1928
- exportTimeoutMillis: 3e4
1929
- });
1930
- axiomProvider = new import_sdk_trace_node.NodeTracerProvider({
1931
- resource: (0, import_resources.resourceFromAttributes)({
1932
- ["service.name"]: "axiom",
1933
- ["service.version"]: "0.34.1"
1934
- }),
1935
- spanProcessors: [processor]
1879
+ logger("");
1880
+ }
1881
+ function printRuntimeFlags(testMeta, logger = console.log) {
1882
+ if (testMeta.case.runtimeFlags && Object.keys(testMeta.case.runtimeFlags).length > 0) {
1883
+ const entries = Object.entries(testMeta.case.runtimeFlags);
1884
+ logger(" ", u.dim("runtime flags"));
1885
+ for (const [k, v] of entries) {
1886
+ switch (v.kind) {
1887
+ case "replaced": {
1888
+ const valText = truncate(stringify(v.value), 80);
1889
+ const defText = truncate(stringify(v.default), 80);
1890
+ logger(" ", `${k}: ${valText} (default: ${defText})`);
1891
+ break;
1892
+ }
1893
+ case "introduced": {
1894
+ const valText = truncate(stringify(v.value), 80);
1895
+ logger(" ", `${k}: ${valText} (no default)`);
1896
+ break;
1897
+ }
1898
+ }
1899
+ }
1900
+ }
1901
+ }
1902
+ function printTestCaseCountStartDuration(testSuite, startTime, duration, logger = console.log) {
1903
+ logger(" ");
1904
+ logger(" ", u.dim("Cases"), testSuite.children.size);
1905
+ logger(" ", u.dim("Start at"), new Date(startTime).toTimeString());
1906
+ logger(" ", u.dim("Duration"), `${duration}s`);
1907
+ }
1908
+ function printTestCaseSuccessOrFailed(testMeta, ok, logger = console.log) {
1909
+ const index = testMeta.case.index;
1910
+ if (ok) {
1911
+ logger(" ", u.yellow(` \u2714 case ${index}:`));
1912
+ } else {
1913
+ logger(" ", u.red(` \u2716 case ${index}: failed`));
1914
+ for (const e of testMeta.case.errors ?? []) {
1915
+ logger("", e.message);
1916
+ }
1917
+ }
1918
+ }
1919
+ function printTestCaseScores(testMeta, baselineCase, logger = console.log) {
1920
+ const scores = testMeta.case.scores;
1921
+ const keys = Object.keys(scores);
1922
+ if (keys.length === 0) {
1923
+ return;
1924
+ }
1925
+ const maxNameLength = Math.max(...keys.map((k) => k.length));
1926
+ keys.forEach((k) => {
1927
+ const scoreData = scores[k];
1928
+ const hasError = scoreData.metadata?.error;
1929
+ const v = scoreData.score ? scoreData.score : 0;
1930
+ const rawCurrent = hasError ? "N/A" : formatPercentage(v);
1931
+ const paddedCurrent = rawCurrent.padStart(7);
1932
+ const coloredCurrent = hasError ? u.dim(paddedCurrent) : u.magentaBright(paddedCurrent);
1933
+ const paddedName = k.padEnd(maxNameLength);
1934
+ if (baselineCase?.scores[k]) {
1935
+ const baselineScoreValue = baselineCase.scores[k].value;
1936
+ const rawBaseline = formatPercentage(baselineScoreValue);
1937
+ const paddedBaseline = rawBaseline.padStart(7);
1938
+ const coloredBaseline = u.blueBright(paddedBaseline);
1939
+ const { text: diffText, color: diffColor } = formatDiff(v, baselineScoreValue);
1940
+ const paddedDiff = diffText.padStart(8);
1941
+ logger(
1942
+ ` ${paddedName} ${coloredBaseline} \u2192 ${coloredCurrent} ${hasError ? u.dim("(scorer not run)") : u.dim("(") + diffColor(paddedDiff) + u.dim(")")}`
1943
+ );
1944
+ } else {
1945
+ logger(` ${paddedName} ${coloredCurrent} ${hasError ? u.dim("(scorer not run)") : ""}`);
1946
+ }
1936
1947
  });
1937
- axiomTracer = axiomProvider.getTracer("axiom", "0.34.1");
1938
1948
  }
1939
- async function initInstrumentation(config) {
1940
- if (initialized) {
1949
+ function printOutOfScopeFlags(testMeta, logger = console.log) {
1950
+ if (testMeta.case.outOfScopeFlags && testMeta.case.outOfScopeFlags.length > 0) {
1951
+ const pickedFlagsText = testMeta.case.pickedFlags ? `(picked: ${testMeta.case.pickedFlags.map((f2) => `'${f2}'`).join(", ")})` : "(none)";
1952
+ logger(" ", u.yellow(`\u26A0 Out-of-scope flags: ${pickedFlagsText}`));
1953
+ testMeta.case.outOfScopeFlags.forEach((flag) => {
1954
+ const timeStr = new Date(flag.accessedAt).toLocaleTimeString();
1955
+ logger(" ", `${flag.flagPath} (at ${timeStr})`);
1956
+ if (flag.stackTrace && flag.stackTrace.length > 0) {
1957
+ flag.stackTrace.forEach((frame, i) => {
1958
+ const prefix = i === flag.stackTrace.length - 1 ? " \u2514\u2500" : " \u251C\u2500";
1959
+ logger(" ", u.dim(`${prefix} ${frame}`));
1960
+ });
1961
+ }
1962
+ });
1963
+ }
1964
+ }
1965
+ function printCaseResult(test, baselineCasesByFingerprint, matchedIndices, logger = console.log) {
1966
+ const ok = test.ok();
1967
+ const testMeta = test.meta();
1968
+ if (!testMeta?.case) {
1941
1969
  return;
1942
1970
  }
1943
- if (initializationPromise) {
1944
- await initializationPromise;
1971
+ printTestCaseSuccessOrFailed(testMeta, ok, logger);
1972
+ const fingerprint = getCaseFingerprint(testMeta.case.input, testMeta.case.expected);
1973
+ const baselineCases = baselineCasesByFingerprint.get(fingerprint);
1974
+ const baselineCase = baselineCases?.shift();
1975
+ if (baselineCase) {
1976
+ matchedIndices.add(baselineCase.index);
1977
+ }
1978
+ printTestCaseScores(testMeta, baselineCase, logger);
1979
+ printRuntimeFlags(testMeta, logger);
1980
+ printOutOfScopeFlags(testMeta, logger);
1981
+ }
1982
+ function printOrphanedBaselineCases(baseline, matchedIndices, logger = console.log) {
1983
+ const orphanedCases = baseline.cases.filter((c) => !matchedIndices.has(c.index));
1984
+ if (orphanedCases.length === 0) {
1945
1985
  return;
1946
1986
  }
1947
- initializationPromise = (async () => {
1948
- if (!config.enabled) {
1949
- axiomTracer = import_api10.trace.getTracer("axiom", "0.34.1");
1950
- initialized = true;
1951
- return;
1952
- }
1953
- const connection = resolveAxiomConnection(config.config);
1954
- const hook = await resolveInstrumentationHook(config.config);
1955
- let hookResult = void 0;
1956
- if (hook) {
1957
- config.config.eval.instrumentation = hook;
1958
- hookResult = await runInstrumentationHook(hook, {
1959
- dataset: connection.dataset,
1960
- token: connection.token,
1961
- url: connection.url,
1962
- orgId: connection.orgId
1987
+ logger("");
1988
+ logger(" ", u.yellow("Orphaned baseline cases:"));
1989
+ for (const orphanedCase of orphanedCases) {
1990
+ logger(
1991
+ " ",
1992
+ u.dim(
1993
+ `case ${orphanedCase.index}: ${truncate(orphanedCase.input, 50)} (score: ${truncate(
1994
+ JSON.stringify(orphanedCase.scores),
1995
+ 50
1996
+ )})`
1997
+ )
1998
+ );
1999
+ const keys = Object.keys(orphanedCase.scores);
2000
+ if (keys.length > 0) {
2001
+ const maxNameLength = Math.max(...keys.map((k) => k.length));
2002
+ keys.forEach((k) => {
2003
+ const scoreData = orphanedCase.scores[k];
2004
+ const rawScore = formatPercentage(scoreData.value);
2005
+ const paddedName = k.padEnd(maxNameLength);
2006
+ const paddedScore = rawScore.padStart(7);
2007
+ logger(` ${paddedName} ${u.blueBright(paddedScore)}`);
1963
2008
  });
1964
- userProvider = hookResult?.provider ?? userProvider;
1965
2009
  }
1966
- setupEvalProvider(connection);
1967
- if (!hook) {
1968
- axiomProvider?.register();
1969
- if (axiomTracer) {
1970
- initAxiomAI({ tracer: axiomTracer });
2010
+ }
2011
+ }
2012
+ function printGlobalFlagOverrides(overrides2, defaults, logger = console.log) {
2013
+ if (Object.keys(overrides2).length === 0) {
2014
+ logger("");
2015
+ logger(u.dim("Flag overrides: (none)"));
2016
+ logger("");
2017
+ return;
2018
+ }
2019
+ logger("");
2020
+ logger("Flag overrides:");
2021
+ for (const [key, value] of Object.entries(overrides2)) {
2022
+ const defaultValue = defaults[key];
2023
+ const valueStr = JSON.stringify(value);
2024
+ const defaultStr = defaultValue !== void 0 ? JSON.stringify(defaultValue) : "none";
2025
+ logger(` \u2022 ${key}: ${valueStr} ${u.dim(`(default: ${defaultStr})`)}`);
2026
+ }
2027
+ logger("");
2028
+ }
2029
+ function printSuiteBox({
2030
+ suite,
2031
+ scorerAverages,
2032
+ calculateBaselineScorerAverage: calculateBaselineScorerAverage2,
2033
+ flagDiff,
2034
+ logger = console.log
2035
+ }) {
2036
+ const filename = suite.file.split("/").pop();
2037
+ logger("\u250C\u2500");
2038
+ logger(`\u2502 ${u.blue(suite.name)} ${u.gray(`(${filename})`)}`);
2039
+ logger("\u251C\u2500");
2040
+ const scorerNames = Object.keys(scorerAverages);
2041
+ const maxNameLength = Math.max(...scorerNames.map((name) => name.length));
2042
+ const allCasesErrored = (scorerName) => {
2043
+ return suite.cases.every((caseData) => caseData.scores[scorerName]?.metadata?.error);
2044
+ };
2045
+ for (const scorerName of scorerNames) {
2046
+ const avg = scorerAverages[scorerName];
2047
+ const paddedName = scorerName.padEnd(maxNameLength);
2048
+ const hasAllErrors = allCasesErrored(scorerName);
2049
+ const baselineAvg = suite.baseline ? calculateBaselineScorerAverage2(suite.baseline, scorerName) : null;
2050
+ if (baselineAvg !== null) {
2051
+ const currentPercent = hasAllErrors ? u.dim("N/A") : formatPercentage(avg);
2052
+ const baselinePercent = formatPercentage(baselineAvg);
2053
+ const { text: diffText, color: diffColor } = formatDiff(avg, baselineAvg);
2054
+ const paddedBaseline = baselinePercent.padStart(7);
2055
+ const paddedCurrent = hasAllErrors ? currentPercent : currentPercent.padStart(7);
2056
+ const diffDisplay = hasAllErrors ? u.dim("all cases failed") : diffColor(diffText.padStart(8));
2057
+ logger(
2058
+ `\u2502 ${paddedName} ${u.blueBright(paddedBaseline)} \u2192 ${hasAllErrors ? paddedCurrent : u.magentaBright(paddedCurrent)} (${diffDisplay})`
2059
+ );
2060
+ } else {
2061
+ const currentPercent = hasAllErrors ? u.red("N/A (all cases failed)") : formatPercentage(avg);
2062
+ logger(`\u2502 \u2022 ${paddedName} ${currentPercent}`);
2063
+ }
2064
+ }
2065
+ logger("\u251C\u2500");
2066
+ if (suite.baseline) {
2067
+ const baselineTimestamp = suite.baseline.runAt ? reporterDate(new Date(suite.baseline.runAt)) : "unknown time";
2068
+ logger(
2069
+ `\u2502 Baseline: ${suite.baseline.name}-${suite.baseline.version} ${u.gray(`(${baselineTimestamp})`)}`
2070
+ );
2071
+ } else {
2072
+ logger(`\u2502 Baseline: ${u.gray("(none)")}`);
2073
+ }
2074
+ const hasConfigChanges = flagDiff.length > 0;
2075
+ logger("\u2502 Config changes:", hasConfigChanges ? "" : u.gray("(none)"));
2076
+ if (hasConfigChanges) {
2077
+ for (const { flag, current, baseline, default: defaultVal } of flagDiff) {
2078
+ logger(`\u2502 \u2022 ${flag}: ${current ?? "<not set>"}`);
2079
+ if (defaultVal !== void 0) {
2080
+ logger(`\u2502 ${u.gray(`default: ${defaultVal}`)}`);
2081
+ }
2082
+ if (suite.baseline) {
2083
+ logger(`\u2502 ${u.gray(`baseline: ${baseline ?? "<not set>"}`)}`);
1971
2084
  }
1972
2085
  }
1973
- initialized = true;
1974
- })();
1975
- try {
1976
- await initializationPromise;
1977
- } finally {
1978
- initializationPromise = null;
1979
2086
  }
2087
+ if (suite.outOfScopeFlags && suite.outOfScopeFlags.length > 0) {
2088
+ const pickedFlagsText = suite.configFlags && suite.configFlags.length > 0 ? suite.configFlags.map((f2) => `'${f2}'`).join(", ") : "none";
2089
+ logger("\u2502");
2090
+ logger(`\u2502 ${u.yellow("\u26A0 Out-of-scope flags")} ${u.gray(`(picked: ${pickedFlagsText})`)}:`);
2091
+ for (const flag of suite.outOfScopeFlags) {
2092
+ const lastStackTraceFrame = flag.stackTrace[0];
2093
+ const lastStackTraceFnName = lastStackTraceFrame.split(" ").shift();
2094
+ const lastStackTraceFile = lastStackTraceFrame.split("/").pop()?.slice(0, -1);
2095
+ logger(
2096
+ `\u2502 \u2022 ${flag.flagPath} ${u.gray(`at ${lastStackTraceFnName} (${lastStackTraceFile})`)}`
2097
+ );
2098
+ }
2099
+ }
2100
+ logger("\u2514\u2500");
1980
2101
  }
1981
- var flush = async () => {
1982
- if (initializationPromise) {
1983
- await initializationPromise;
2102
+ function calculateScorerAverages(suite) {
2103
+ const scorerTotals = {};
2104
+ for (const caseData of suite.cases) {
2105
+ for (const [scorerName, score] of Object.entries(caseData.scores)) {
2106
+ if (!scorerTotals[scorerName]) {
2107
+ scorerTotals[scorerName] = { sum: 0, count: 0 };
2108
+ }
2109
+ if (!score.metadata?.error) {
2110
+ scorerTotals[scorerName].sum += score.score || 0;
2111
+ scorerTotals[scorerName].count += 1;
2112
+ }
2113
+ }
1984
2114
  }
1985
- const tasks = [];
1986
- if (axiomProvider) {
1987
- tasks.push(axiomProvider.forceFlush());
2115
+ const averages = {};
2116
+ for (const [scorerName, totals] of Object.entries(scorerTotals)) {
2117
+ averages[scorerName] = totals.count > 0 ? totals.sum / totals.count : 0;
1988
2118
  }
1989
- const candidateProviders = /* @__PURE__ */ new Set();
1990
- if (userProvider) {
1991
- candidateProviders.add(userProvider);
2119
+ return averages;
2120
+ }
2121
+ function calculateBaselineScorerAverage(baseline, scorerName) {
2122
+ const scores = [];
2123
+ for (const caseData of baseline.cases) {
2124
+ if (caseData.scores[scorerName]) {
2125
+ scores.push(caseData.scores[scorerName].value);
2126
+ }
1992
2127
  }
1993
- const globalProvider = import_api10.trace.getTracerProvider();
1994
- if (globalProvider) {
1995
- candidateProviders.add(globalProvider);
2128
+ if (scores.length === 0) return null;
2129
+ const sum = scores.reduce((acc, val) => acc + val, 0);
2130
+ return sum / scores.length;
2131
+ }
2132
+ function calculateFlagDiff(suite) {
2133
+ if (!suite.configFlags || suite.configFlags.length === 0) {
2134
+ return [];
1996
2135
  }
1997
- for (const provider of candidateProviders) {
1998
- const flushFn = provider.forceFlush;
1999
- if (typeof flushFn === "function") {
2000
- tasks.push(
2001
- flushFn.call(provider).catch((error) => {
2002
- console.warn("[AxiomAI] Failed to flush tracer provider:", errorToString(error));
2003
- })
2004
- );
2136
+ const diffs = [];
2137
+ const currentConfig = suite.flagConfig || {};
2138
+ const baselineConfig = suite.baseline?.flagConfig || {};
2139
+ const defaultConfig = suite.defaultFlagConfig || {};
2140
+ const currentFlat = flattenObject(currentConfig);
2141
+ const baselineFlat = flattenObject(baselineConfig);
2142
+ const defaultFlat = flattenObject(defaultConfig);
2143
+ const allKeys = /* @__PURE__ */ new Set([
2144
+ ...Object.keys(currentFlat),
2145
+ ...Object.keys(baselineFlat),
2146
+ ...Object.keys(defaultFlat)
2147
+ ]);
2148
+ for (const key of allKeys) {
2149
+ const isInScope = suite.configFlags.some((pattern) => key.startsWith(pattern));
2150
+ if (!isInScope) continue;
2151
+ const currentValue = currentFlat[key];
2152
+ const baselineValue = baselineFlat[key];
2153
+ const defaultValue = defaultFlat[key];
2154
+ const currentStr = currentValue !== void 0 ? JSON.stringify(currentValue) : void 0;
2155
+ const baselineStr = baselineValue !== void 0 ? JSON.stringify(baselineValue) : void 0;
2156
+ const defaultStr = defaultValue !== void 0 ? JSON.stringify(defaultValue) : void 0;
2157
+ const diffFromBaseline = suite.baseline && currentStr !== baselineStr;
2158
+ const diffFromDefault = currentStr !== defaultStr;
2159
+ if (diffFromBaseline || diffFromDefault) {
2160
+ diffs.push({
2161
+ flag: key,
2162
+ current: currentStr,
2163
+ baseline: suite.baseline ? baselineStr : void 0,
2164
+ default: defaultStr
2165
+ });
2005
2166
  }
2006
2167
  }
2007
- if (tasks.length > 0) {
2008
- await Promise.all(tasks);
2009
- }
2010
- };
2011
-
2012
- // src/evals/run-vitest.ts
2013
- var printCollectedEvals = (result, rootDir) => {
2014
- if (!result.testModules || result.testModules.length === 0) {
2015
- console.log(u.yellow("\nNo evaluations found\n"));
2016
- return;
2168
+ return diffs;
2169
+ }
2170
+ function printFinalReport({
2171
+ suiteData,
2172
+ config,
2173
+ registrationStatus,
2174
+ isDebug,
2175
+ logger = console.log
2176
+ }) {
2177
+ logger("");
2178
+ logger(u.bgBlue(u.white(" FINAL EVALUATION REPORT ")));
2179
+ logger("");
2180
+ for (const suite of suiteData) {
2181
+ const scorerAverages = calculateScorerAverages(suite);
2182
+ const flagDiff = calculateFlagDiff(suite);
2183
+ printSuiteBox({ suite, scorerAverages, calculateBaselineScorerAverage, flagDiff, logger });
2184
+ logger("");
2017
2185
  }
2018
- console.log(u.bold("\nFound evaluations:\n"));
2019
- let totalEvals = 0;
2020
- let totalCases = 0;
2021
- for (const module2 of result.testModules) {
2022
- const relativePath = import_node_path2.default.relative(rootDir, module2.moduleId);
2023
- for (const suite of module2.children.suites()) {
2024
- totalEvals++;
2025
- const caseCount = suite.children.size;
2026
- totalCases += caseCount;
2027
- console.log(u.green(`\u2713 ${suite.name} (${caseCount} cases)`));
2028
- console.log(u.dim(` ${relativePath}`));
2029
- console.log("");
2186
+ const runId = suiteData[0]?.runId;
2187
+ const orgId = suiteData[0]?.orgId;
2188
+ const anyRegistered = registrationStatus.some((s2) => s2.registered);
2189
+ const anyFailed = registrationStatus.some((s2) => !s2.registered);
2190
+ if (anyRegistered && orgId && config?.consoleEndpointUrl) {
2191
+ if (suiteData.length === 1) {
2192
+ const suite = suiteData[0];
2193
+ const baselineParam = suite.baseline?.traceId ? `?baselineId=${suite.baseline.traceId}` : "";
2194
+ logger("View eval result:");
2195
+ logger(
2196
+ `${config.consoleEndpointUrl}/${orgId}/ai-engineering/evaluations/${suite.name}/${suite.version}${baselineParam}`
2197
+ );
2198
+ } else {
2199
+ logger("View full report:");
2200
+ logger(`${config.consoleEndpointUrl}/${orgId}/ai-engineering/evaluations?runId=${runId}`);
2030
2201
  }
2202
+ } else if (isDebug) {
2203
+ logger(u.dim("Results not uploaded to Axiom (debug mode)"));
2204
+ } else {
2205
+ logger("Results not available in Axiom UI (registration failed)");
2031
2206
  }
2032
- console.log(u.bold(`Total: ${totalEvals} evaluations, ${totalCases} test cases
2033
- `));
2034
- };
2035
- var runVitest = async (dir, opts) => {
2036
- setAxiomConfig(opts.config);
2037
- await initInstrumentation({
2038
- enabled: !opts.debug && !opts.list,
2039
- config: opts.config
2040
- });
2041
- const providedConfig = {
2042
- ...opts.config,
2043
- eval: {
2044
- ...opts.config.eval,
2045
- // These can't be serialized, so we need to remove them
2046
- instrumentation: null,
2047
- flagSchema: null
2048
- }
2049
- };
2050
- if (opts.debug) {
2051
- console.log(u.bgWhite(u.blackBright(" Debug mode enabled ")));
2052
- }
2053
- const tmpDir = (0, import_node_path.join)((0, import_node_os.tmpdir)(), "axiom-eval", opts.runId);
2054
- (0, import_node_fs.mkdirSync)(tmpDir, { recursive: true });
2055
- const nameRegistryFile = (0, import_node_path.join)(tmpDir, "names.jsonl");
2056
- const abortFile = (0, import_node_path.join)(tmpDir, "abort.txt");
2057
- (0, import_node_fs.writeFileSync)(nameRegistryFile, "", "utf8");
2058
- if ((0, import_node_fs.existsSync)(abortFile)) {
2059
- (0, import_node_fs.unlinkSync)(abortFile);
2060
- }
2061
- process.env.AXIOM_NAME_REGISTRY_FILE = nameRegistryFile;
2062
- process.env.AXIOM_ABORT_FILE = abortFile;
2063
- if (opts.list) {
2064
- console.log(u.bgWhite(u.blackBright(" List mode ")));
2065
- }
2066
- const vi = await (0, import_node.createVitest)(
2067
- "test",
2068
- {
2069
- root: dir ? dir : process.cwd(),
2070
- mode: "test",
2071
- include: opts.include,
2072
- exclude: opts.exclude,
2073
- testNamePattern: opts.testNamePattern,
2074
- reporters: ["verbose", new AxiomReporter()],
2075
- environment: "node",
2076
- browser: void 0,
2077
- watch: opts.watch,
2078
- setupFiles: [],
2079
- // ignore user vitest.config.ts etc
2080
- name: "axiom:eval",
2081
- printConsoleTrace: true,
2082
- silent: false,
2083
- disableConsoleIntercept: true,
2084
- testTimeout: opts.config?.eval?.timeoutMs || 6e4,
2085
- globals: true,
2086
- runner: (0, import_node_path.resolve)(__dirname, "evals", "custom-runner.js"),
2087
- provide: {
2088
- baseline: opts.baseline,
2089
- debug: opts.debug,
2090
- list: opts.list,
2091
- overrides: opts.overrides,
2092
- axiomConfig: providedConfig,
2093
- runId: opts.runId,
2094
- consoleUrl: opts.consoleUrl
2207
+ if (anyFailed) {
2208
+ logger("");
2209
+ for (const status of registrationStatus) {
2210
+ if (!status.registered) {
2211
+ logger(u.yellow(`\u26A0\uFE0F Warning: Failed to register "${status.name}" with Axiom`));
2212
+ if (status.error) {
2213
+ logger(u.dim(` Error: ${status.error}`));
2214
+ }
2215
+ logger(u.dim(` Results for this evaluation will not be available in the Axiom UI.`));
2095
2216
  }
2096
- },
2097
- {
2098
- plugins: [(0, import_vite_tsconfig_paths.default)({ root: dir || process.cwd() })]
2099
2217
  }
2100
- );
2101
- if (opts.list) {
2102
- const result = await vi.collect();
2103
- printCollectedEvals(result, dir || process.cwd());
2104
- await vi.close();
2105
- process.exit(0);
2106
- }
2107
- await vi.start();
2108
- if ((0, import_node_fs.existsSync)(abortFile)) {
2109
- const message = (0, import_node_fs.readFileSync)(abortFile, "utf8");
2110
- console.error("\n" + message);
2111
- await vi.close();
2112
- process.exit(1);
2113
2218
  }
2114
- const dispose = (0, import_node.registerConsoleShortcuts)(vi, process.stdin, process.stdout);
2115
- if (!vi.shouldKeepServer()) {
2116
- dispose();
2117
- await flush();
2118
- await vi.close();
2119
- process.exit(0);
2219
+ }
2220
+ var reporterDate;
2221
+ var init_reporter_console_utils = __esm({
2222
+ "src/evals/reporter.console-utils.ts"() {
2223
+ "use strict";
2224
+ init_cjs_shims();
2225
+ init_node();
2226
+ init_dot_path();
2227
+ reporterDate = (d) => {
2228
+ const date = d.toISOString().slice(0, 10);
2229
+ const hours = d.getUTCHours().toString().padStart(2, "0");
2230
+ const minutes = d.getUTCMinutes().toString().padStart(2, "0");
2231
+ return `${date}, ${hours}:${minutes} UTC`;
2232
+ };
2120
2233
  }
2121
- await flush();
2122
- };
2123
-
2124
- // src/cli/commands/eval.command.ts
2125
- var import_node_fs3 = require("fs");
2126
-
2127
- // src/cli/utils/eval-context-runner.ts
2128
- init_cjs_shims();
2234
+ });
2129
2235
 
2130
- // src/context.ts
2131
- init_cjs_shims();
2132
- function overrideFlags(partial) {
2133
- const current = getEvalContext();
2134
- if (!current) {
2135
- if (process.env.NODE_ENV !== "test") {
2136
- console.warn("overrideFlags called outside of evaluation context");
2137
- }
2138
- return;
2139
- }
2140
- const overlayContext = {
2141
- ...current,
2142
- flags: { ...current.flags, ...partial },
2143
- // Merge for backwards compatibility
2144
- parent: current,
2145
- overrides: { ...partial }
2236
+ // src/config/resolver.ts
2237
+ function resolveAxiomConnection(config, consoleUrlOverride) {
2238
+ const consoleEndpointUrl = consoleUrlOverride ?? buildConsoleUrl(config.eval.url);
2239
+ return {
2240
+ url: config.eval.url,
2241
+ consoleEndpointUrl,
2242
+ token: config.eval.token,
2243
+ dataset: config.eval.dataset,
2244
+ orgId: config.eval.orgId
2146
2245
  };
2147
- const currentCtx = EVAL_CONTEXT.get();
2148
- if (currentCtx) {
2149
- Object.assign(currentCtx, overlayContext);
2150
- }
2151
- for (const [key, value] of Object.entries(partial)) {
2152
- putOnSpan("flag", key, value);
2153
- }
2154
- }
2155
-
2156
- // src/cli/utils/eval-context-runner.ts
2157
- async function runEvalWithContext(overrides2, runFn) {
2158
- setGlobalFlagOverrides(overrides2);
2159
- return withEvalContext({ initialFlags: overrides2 }, async () => {
2160
- if (Object.keys(overrides2).length > 0) {
2161
- overrideFlags(overrides2);
2162
- }
2163
- return runFn();
2164
- });
2165
- }
2166
-
2167
- // src/cli/utils/parse-flag-overrides.ts
2168
- init_cjs_shims();
2169
- var import_zod7 = require("zod");
2170
- var import_node_fs2 = require("fs");
2171
- var import_node_path3 = require("path");
2172
- var FLAG_RE = /^--flag\.([^=]+)(?:=(.*))?$/;
2173
- var CONFIG_RE = /^--flags-config(?:=(.*))?$/;
2174
- function ensureNoSpaceSeparatedSyntax(flagName, value, nextToken, flagType) {
2175
- if (value === void 0 && nextToken !== void 0) {
2176
- if (flagType === "flag" && !nextToken.startsWith("-") && nextToken !== "true" && nextToken !== "false") {
2177
- console.error(`\u274C Invalid syntax: --flag.${flagName} ${nextToken}`);
2178
- console.error(`\u{1F4A1} Use: --flag.${flagName}=${nextToken}`);
2179
- process.exit(1);
2180
- } else if (flagType === "config" && !nextToken.startsWith("-")) {
2181
- console.error(`\u274C Invalid syntax: --flags-config ${nextToken}`);
2182
- console.error(`\u{1F4A1} Use: --flags-config=${nextToken}`);
2183
- process.exit(1);
2184
- }
2185
- }
2186
2246
  }
2187
- function collectFlagValidationErrors(overrides2, flagSchema) {
2188
- if (!flagSchema || Object.keys(overrides2).length === 0) {
2189
- return { success: true, errors: [] };
2247
+ var buildConsoleUrl;
2248
+ var init_resolver = __esm({
2249
+ "src/config/resolver.ts"() {
2250
+ "use strict";
2251
+ init_cjs_shims();
2252
+ buildConsoleUrl = (urlString) => {
2253
+ const url = new URL(urlString);
2254
+ return `${url.protocol}//app.${url.host.split("api.").at(-1)}`;
2255
+ };
2190
2256
  }
2191
- assertZodV4(flagSchema, "flagSchema");
2192
- const schema = flagSchema;
2193
- const errors = [];
2194
- for (const dotPath of Object.keys(overrides2)) {
2195
- const segments = parsePath(dotPath);
2196
- if (!isValidPath(schema, segments)) {
2197
- errors.push({ type: "invalid_path", path: dotPath });
2198
- }
2257
+ });
2258
+
2259
+ // src/evals/reporter.ts
2260
+ var AxiomReporter;
2261
+ var init_reporter = __esm({
2262
+ "src/evals/reporter.ts"() {
2263
+ "use strict";
2264
+ init_cjs_shims();
2265
+ init_storage();
2266
+ init_reporter_console_utils();
2267
+ init_resolver();
2268
+ init_eval_command();
2269
+ init_dot_path();
2270
+ AxiomReporter = class {
2271
+ constructor() {
2272
+ __publicField(this, "startTime", 0);
2273
+ __publicField(this, "start", 0);
2274
+ __publicField(this, "_endOfRunConfigEnd");
2275
+ __publicField(this, "_suiteData", []);
2276
+ __publicField(this, "_printedFlagOverrides", false);
2277
+ __publicField(this, "_config");
2278
+ }
2279
+ onTestRunStart() {
2280
+ this.start = performance.now();
2281
+ this.startTime = (/* @__PURE__ */ new Date()).getTime();
2282
+ const config = getAxiomConfig();
2283
+ if (config) {
2284
+ this._config = resolveAxiomConnection(config, getConsoleUrl());
2285
+ }
2286
+ }
2287
+ async onTestSuiteReady(_testSuite) {
2288
+ const meta = _testSuite.meta();
2289
+ if (_testSuite.state() === "skipped" || !meta?.evaluation) {
2290
+ return;
2291
+ }
2292
+ if (!this._printedFlagOverrides) {
2293
+ const defaultsFromConfigEnd = meta.evaluation.configEnd?.flags ?? {};
2294
+ const overridesFromConfigEnd = meta.evaluation.configEnd?.overrides ?? {};
2295
+ if (Object.keys(overridesFromConfigEnd).length > 0) {
2296
+ printGlobalFlagOverrides(overridesFromConfigEnd, defaultsFromConfigEnd);
2297
+ }
2298
+ this._printedFlagOverrides = true;
2299
+ }
2300
+ if (meta.evaluation.configEnd && !this._endOfRunConfigEnd) {
2301
+ this._endOfRunConfigEnd = meta.evaluation.configEnd;
2302
+ }
2303
+ }
2304
+ onTestCaseReady(test) {
2305
+ const meta = test.meta();
2306
+ if (!meta.case) return;
2307
+ }
2308
+ async onTestSuiteResult(testSuite) {
2309
+ const meta = testSuite.meta();
2310
+ if (testSuite.state() === "skipped" || !meta?.evaluation) {
2311
+ return;
2312
+ }
2313
+ if (meta.evaluation.configEnd && !this._endOfRunConfigEnd) {
2314
+ this._endOfRunConfigEnd = meta.evaluation.configEnd;
2315
+ }
2316
+ const durationSeconds = Number((performance.now() - this.start) / 1e3).toFixed(2);
2317
+ const cases = [];
2318
+ for (const test of testSuite.children) {
2319
+ if (test.type !== "test") continue;
2320
+ const testMeta = test.meta();
2321
+ if (!testMeta?.case) continue;
2322
+ cases.push({
2323
+ index: testMeta.case.index,
2324
+ scores: testMeta.case.scores,
2325
+ outOfScopeFlags: testMeta.case.outOfScopeFlags,
2326
+ errors: testMeta.case.errors,
2327
+ runtimeFlags: testMeta.case.runtimeFlags
2328
+ });
2329
+ }
2330
+ const cwd = process.cwd();
2331
+ const relativePath = testSuite.module.moduleId.replace(cwd, "").replace(/^\//, "");
2332
+ let suiteBaseline = meta.evaluation.baseline;
2333
+ let flagConfig = meta.evaluation.flagConfig;
2334
+ if (meta.evaluation.configEnd) {
2335
+ const defaults = meta.evaluation.configEnd.flags ?? {};
2336
+ const overrides2 = meta.evaluation.configEnd.overrides ?? {};
2337
+ const defaultsFlat = flattenObject(defaults);
2338
+ const overridesFlat = flattenObject(overrides2);
2339
+ flagConfig = dotNotationToNested({ ...defaultsFlat, ...overridesFlat });
2340
+ }
2341
+ const defaultFlagConfig = meta.evaluation.configEnd?.flags;
2342
+ this._suiteData.push({
2343
+ version: meta.evaluation.version,
2344
+ name: meta.evaluation.name,
2345
+ file: relativePath,
2346
+ duration: durationSeconds + "s",
2347
+ baseline: suiteBaseline || null,
2348
+ configFlags: meta.evaluation.configFlags,
2349
+ flagConfig,
2350
+ defaultFlagConfig,
2351
+ runId: meta.evaluation.runId,
2352
+ orgId: meta.evaluation.orgId,
2353
+ cases,
2354
+ outOfScopeFlags: meta.evaluation.outOfScopeFlags,
2355
+ registrationStatus: meta.evaluation.registrationStatus
2356
+ });
2357
+ printEvalNameAndFileName(testSuite, meta);
2358
+ printBaselineNameAndVersion(meta);
2359
+ printTestCaseCountStartDuration(testSuite, this.startTime, durationSeconds);
2360
+ const matchedBaselineIndices = /* @__PURE__ */ new Set();
2361
+ const baselineCasesByFingerprint = /* @__PURE__ */ new Map();
2362
+ if (suiteBaseline) {
2363
+ for (const c of suiteBaseline.cases) {
2364
+ const fp = getCaseFingerprint(c.input, c.expected);
2365
+ const cases2 = baselineCasesByFingerprint.get(fp) || [];
2366
+ cases2.push(c);
2367
+ baselineCasesByFingerprint.set(fp, cases2);
2368
+ }
2369
+ }
2370
+ for (const test of testSuite.children) {
2371
+ if (test.type !== "test") continue;
2372
+ printCaseResult(test, baselineCasesByFingerprint, matchedBaselineIndices);
2373
+ }
2374
+ if (suiteBaseline) {
2375
+ printOrphanedBaselineCases(suiteBaseline, matchedBaselineIndices);
2376
+ }
2377
+ console.log("");
2378
+ }
2379
+ async onTestRunEnd(_testModules, _errors, _reason) {
2380
+ const shouldClear = !process.env.CI && process.stdout.isTTY !== false;
2381
+ if (shouldClear) {
2382
+ process.stdout.write("\x1B[2J\x1B[0f");
2383
+ }
2384
+ const registrationStatus = this._suiteData.map((suite) => ({
2385
+ name: suite.name,
2386
+ registered: suite.registrationStatus?.status === "success",
2387
+ error: suite.registrationStatus?.status === "failed" ? suite.registrationStatus.error : void 0
2388
+ }));
2389
+ const isDebug = process.env.AXIOM_DEBUG === "true";
2390
+ printFinalReport({
2391
+ suiteData: this._suiteData,
2392
+ config: this._config,
2393
+ registrationStatus,
2394
+ isDebug
2395
+ });
2396
+ }
2397
+ };
2199
2398
  }
2200
- if (errors.length > 0) {
2201
- return { success: false, errors };
2399
+ });
2400
+
2401
+ // src/evals/instrument.ts
2402
+ async function resolveInstrumentationHook(config) {
2403
+ if (config.eval.instrumentation) {
2404
+ return config.eval.instrumentation;
2202
2405
  }
2203
- const nestedObject = dotNotationToNested(overrides2);
2204
- const deepPartialSchema = makeDeepPartial(schema);
2205
- const result = deepPartialSchema.safeParse(nestedObject);
2206
- if (!result.success) {
2207
- errors.push({ type: "invalid_value", zodError: result.error });
2406
+ try {
2407
+ const { config: loadedConfig } = await loadConfig(process.cwd());
2408
+ return loadedConfig.eval.instrumentation ?? null;
2409
+ } catch (error) {
2410
+ throw new AxiomCLIError(
2411
+ `Failed to reload instrumentation from config: ${errorToString(error)}`
2412
+ );
2208
2413
  }
2209
- return { success: errors.length === 0, errors };
2210
2414
  }
2211
- function printFlagValidationErrorsAndExit(errors) {
2212
- console.error("\u274C Invalid CLI flags:");
2213
- for (const error of errors) {
2214
- if (error.type === "invalid_path") {
2215
- console.error(` \u2022 flag '${error.path}': Invalid flag path`);
2216
- } else {
2217
- console.error(formatZodErrors(error.zodError));
2218
- const examples = generateFlagExamples(error.zodError);
2219
- if (examples.length > 0) {
2220
- console.error("\n\u{1F4A1} Valid examples:");
2221
- examples.forEach((example) => console.error(` ${example}`));
2222
- }
2223
- }
2415
+ async function runInstrumentationHook(hook, options) {
2416
+ try {
2417
+ return await hook(options);
2418
+ } catch (error) {
2419
+ throw new AxiomCLIError(`Failed to execute instrumentation hook: ${errorToString(error)}`);
2224
2420
  }
2225
- process.exit(1);
2226
2421
  }
2227
- function validateFlagOverrides(overrides2, flagSchema) {
2228
- const result = collectFlagValidationErrors(overrides2, flagSchema);
2229
- if (!result.success) {
2230
- printFlagValidationErrorsAndExit(result.errors);
2422
+ function setupEvalProvider(connection) {
2423
+ const headers = {
2424
+ "X-Axiom-Dataset": connection.dataset,
2425
+ ...connection.orgId ? { "X-AXIOM-ORG-ID": connection.orgId } : {}
2426
+ };
2427
+ if (connection.token) {
2428
+ headers.Authorization = `Bearer ${connection.token}`;
2231
2429
  }
2430
+ const collectorOptions = {
2431
+ url: `${connection.url}/v1/traces`,
2432
+ headers,
2433
+ concurrencyLimit: 10
2434
+ };
2435
+ const exporter = new import_exporter_trace_otlp_http.OTLPTraceExporter(collectorOptions);
2436
+ const processor = new import_sdk_trace_node.BatchSpanProcessor(exporter, {
2437
+ maxQueueSize: 2048,
2438
+ maxExportBatchSize: 512,
2439
+ scheduledDelayMillis: 5e3,
2440
+ exportTimeoutMillis: 3e4
2441
+ });
2442
+ axiomProvider = new import_sdk_trace_node.NodeTracerProvider({
2443
+ resource: (0, import_resources.resourceFromAttributes)({
2444
+ ["service.name"]: "axiom",
2445
+ ["service.version"]: "0.35.0"
2446
+ }),
2447
+ spanProcessors: [processor]
2448
+ });
2449
+ axiomTracer = axiomProvider.getTracer("axiom", "0.35.0");
2232
2450
  }
2233
- function coerceValue(raw) {
2234
- if (raw === "true") return true;
2235
- if (raw === "false") return false;
2236
- const num = Number(raw);
2237
- if (!Number.isNaN(num) && raw.trim() === num.toString()) {
2238
- return num;
2451
+ async function initInstrumentation(config) {
2452
+ if (initialized) {
2453
+ return;
2239
2454
  }
2240
- try {
2241
- return JSON.parse(raw);
2242
- } catch {
2243
- return raw;
2455
+ if (initializationPromise) {
2456
+ await initializationPromise;
2457
+ return;
2244
2458
  }
2245
- }
2246
- function loadConfigFile(path3) {
2247
- const abs = (0, import_node_path3.resolve)(process.cwd(), path3);
2248
- try {
2249
- const contents = (0, import_node_fs2.readFileSync)(abs, "utf8");
2250
- const parsed = JSON.parse(contents);
2251
- if (typeof parsed !== "object" || Array.isArray(parsed) || parsed === null) {
2252
- console.error(
2253
- `\u274C Flags config must be a JSON object, got ${Array.isArray(parsed) ? "array" : typeof parsed}`
2254
- );
2255
- process.exit(1);
2459
+ initializationPromise = (async () => {
2460
+ if (!config.enabled) {
2461
+ axiomTracer = import_api10.trace.getTracer("axiom", "0.35.0");
2462
+ initialized = true;
2463
+ return;
2256
2464
  }
2257
- return parsed;
2258
- } catch (err) {
2259
- console.error(`\u274C Could not read or parse flags config "${path3}": ${err.message}`);
2260
- process.exit(1);
2465
+ const connection = resolveAxiomConnection(config.config);
2466
+ const hook = await resolveInstrumentationHook(config.config);
2467
+ let hookResult = void 0;
2468
+ if (hook) {
2469
+ config.config.eval.instrumentation = hook;
2470
+ hookResult = await runInstrumentationHook(hook, {
2471
+ dataset: connection.dataset,
2472
+ token: connection.token,
2473
+ url: connection.url,
2474
+ orgId: connection.orgId
2475
+ });
2476
+ userProvider = hookResult?.provider ?? userProvider;
2477
+ }
2478
+ setupEvalProvider(connection);
2479
+ if (!hook) {
2480
+ axiomProvider?.register();
2481
+ if (axiomTracer) {
2482
+ initAxiomAI({ tracer: axiomTracer });
2483
+ }
2484
+ }
2485
+ initialized = true;
2486
+ })();
2487
+ try {
2488
+ await initializationPromise;
2489
+ } finally {
2490
+ initializationPromise = null;
2261
2491
  }
2262
2492
  }
2263
- function extractOverrides(argv) {
2264
- const cleanedArgv2 = [];
2265
- const overrides2 = {};
2266
- let configPath = null;
2267
- let hasCliFlags = false;
2268
- let configPathCount = 0;
2269
- for (let i = 0; i < argv.length; i++) {
2270
- const token = argv[i];
2271
- const configMatch = token.match(CONFIG_RE);
2272
- const flagMatch = token.match(FLAG_RE);
2273
- if (configMatch) {
2274
- configPathCount++;
2275
- if (configPathCount > 1) {
2276
- console.error("\u274C Only one --flags-config can be supplied.");
2277
- process.exit(1);
2493
+ var import_sdk_trace_node, import_resources, import_exporter_trace_otlp_http, import_api10, axiomProvider, axiomTracer, userProvider, initializationPromise, initialized, flush;
2494
+ var init_instrument = __esm({
2495
+ "src/evals/instrument.ts"() {
2496
+ "use strict";
2497
+ init_cjs_shims();
2498
+ import_sdk_trace_node = require("@opentelemetry/sdk-trace-node");
2499
+ import_resources = require("@opentelemetry/resources");
2500
+ import_exporter_trace_otlp_http = require("@opentelemetry/exporter-trace-otlp-http");
2501
+ import_api10 = require("@opentelemetry/api");
2502
+ init_initAxiomAI();
2503
+ init_resolver();
2504
+ init_errors();
2505
+ init_loader();
2506
+ initializationPromise = null;
2507
+ initialized = false;
2508
+ flush = async () => {
2509
+ if (initializationPromise) {
2510
+ await initializationPromise;
2278
2511
  }
2279
- const value = configMatch[1];
2280
- const nextToken = argv.length > i + 1 ? argv[i + 1] : void 0;
2281
- ensureNoSpaceSeparatedSyntax("flags-config", value, nextToken, "config");
2282
- if (!value) {
2283
- console.error("\u274C --flags-config requires a file path");
2284
- console.error("\u{1F4A1} Use: --flags-config=path/to/config.json");
2285
- process.exit(1);
2512
+ const tasks = [];
2513
+ if (axiomProvider) {
2514
+ tasks.push(axiomProvider.forceFlush());
2286
2515
  }
2287
- configPath = value;
2288
- } else if (flagMatch) {
2289
- hasCliFlags = true;
2290
- const key = flagMatch[1];
2291
- const value = flagMatch[2];
2292
- const nextToken = argv.length > i + 1 ? argv[i + 1] : void 0;
2293
- ensureNoSpaceSeparatedSyntax(key, value, nextToken, "flag");
2294
- const finalValue = value === void 0 ? "true" : value;
2295
- overrides2[key] = coerceValue(finalValue);
2296
- } else {
2297
- cleanedArgv2.push(token);
2298
- }
2299
- }
2300
- if (configPath && hasCliFlags) {
2301
- console.error("\u274C Cannot use both --flags-config and --flag.* arguments together.");
2302
- console.error("Choose one approach:");
2303
- console.error(" \u2022 Config file: --flags-config=my-flags.json");
2304
- console.error(" \u2022 CLI flags: --flag.temperature=0.9 --flag.model=gpt-4o");
2305
- process.exit(1);
2306
- }
2307
- if (configPath) {
2308
- const configOverrides = loadConfigFile(configPath);
2309
- return { cleanedArgv: cleanedArgv2, overrides: configOverrides };
2516
+ const candidateProviders = /* @__PURE__ */ new Set();
2517
+ if (userProvider) {
2518
+ candidateProviders.add(userProvider);
2519
+ }
2520
+ const globalProvider = import_api10.trace.getTracerProvider();
2521
+ if (globalProvider) {
2522
+ candidateProviders.add(globalProvider);
2523
+ }
2524
+ for (const provider of candidateProviders) {
2525
+ const flushFn = provider.forceFlush;
2526
+ if (typeof flushFn === "function") {
2527
+ tasks.push(
2528
+ flushFn.call(provider).catch((error) => {
2529
+ console.warn("[AxiomAI] Failed to flush tracer provider:", errorToString(error));
2530
+ })
2531
+ );
2532
+ }
2533
+ }
2534
+ if (tasks.length > 0) {
2535
+ await Promise.all(tasks);
2536
+ }
2537
+ };
2310
2538
  }
2311
- return { cleanedArgv: cleanedArgv2, overrides: overrides2 };
2312
- }
2539
+ });
2313
2540
 
2314
- // src/cli/utils/glob-utils.ts
2315
- init_cjs_shims();
2316
- function isGlob(str) {
2317
- return /[*?[\]{}!]/.test(str);
2318
- }
2541
+ // src/evals/run-vitest.ts
2542
+ var run_vitest_exports = {};
2543
+ __export(run_vitest_exports, {
2544
+ runVitest: () => runVitest
2545
+ });
2546
+ var import_node_path2, import_node_fs2, import_node_os, import_node_path3, import_vite_tsconfig_paths, import_node, printCollectedEvals, runVitest;
2547
+ var init_run_vitest = __esm({
2548
+ "src/evals/run-vitest.ts"() {
2549
+ "use strict";
2550
+ init_cjs_shims();
2551
+ init_node();
2552
+ import_node_path2 = require("path");
2553
+ import_node_fs2 = require("fs");
2554
+ import_node_os = require("os");
2555
+ import_node_path3 = __toESM(require("path"), 1);
2556
+ import_vite_tsconfig_paths = __toESM(require("vite-tsconfig-paths"), 1);
2557
+ import_node = require("vitest/node");
2558
+ init_reporter();
2559
+ init_instrument();
2560
+ init_storage();
2561
+ printCollectedEvals = (result, rootDir) => {
2562
+ if (!result.testModules || result.testModules.length === 0) {
2563
+ console.log(u.yellow("\nNo evaluations found\n"));
2564
+ return;
2565
+ }
2566
+ console.log(u.bold("\nFound evaluations:\n"));
2567
+ let totalEvals = 0;
2568
+ let totalCases = 0;
2569
+ for (const module2 of result.testModules) {
2570
+ const relativePath = import_node_path3.default.relative(rootDir, module2.moduleId);
2571
+ for (const suite of module2.children.suites()) {
2572
+ totalEvals++;
2573
+ const caseCount = suite.children.size;
2574
+ totalCases += caseCount;
2575
+ console.log(u.green(`\u2713 ${suite.name} (${caseCount} cases)`));
2576
+ console.log(u.dim(` ${relativePath}`));
2577
+ console.log("");
2578
+ }
2579
+ }
2580
+ console.log(u.bold(`Total: ${totalEvals} evaluations, ${totalCases} test cases
2581
+ `));
2582
+ };
2583
+ runVitest = async (dir, opts) => {
2584
+ setAxiomConfig(opts.config);
2585
+ await initInstrumentation({
2586
+ enabled: !opts.debug && !opts.list,
2587
+ config: opts.config
2588
+ });
2589
+ const providedConfig = {
2590
+ ...opts.config,
2591
+ eval: {
2592
+ ...opts.config.eval,
2593
+ // These can't be serialized, so we need to remove them
2594
+ instrumentation: null,
2595
+ flagSchema: null
2596
+ }
2597
+ };
2598
+ if (opts.debug) {
2599
+ console.log(u.bgWhite(u.blackBright(" Debug mode enabled ")));
2600
+ }
2601
+ const tmpDir = (0, import_node_path2.join)((0, import_node_os.tmpdir)(), "axiom-eval", opts.runId);
2602
+ (0, import_node_fs2.mkdirSync)(tmpDir, { recursive: true });
2603
+ const nameRegistryFile = (0, import_node_path2.join)(tmpDir, "names.jsonl");
2604
+ const abortFile = (0, import_node_path2.join)(tmpDir, "abort.txt");
2605
+ (0, import_node_fs2.writeFileSync)(nameRegistryFile, "", "utf8");
2606
+ if ((0, import_node_fs2.existsSync)(abortFile)) {
2607
+ (0, import_node_fs2.unlinkSync)(abortFile);
2608
+ }
2609
+ process.env.AXIOM_NAME_REGISTRY_FILE = nameRegistryFile;
2610
+ process.env.AXIOM_ABORT_FILE = abortFile;
2611
+ if (opts.list) {
2612
+ console.log(u.bgWhite(u.blackBright(" List mode ")));
2613
+ }
2614
+ const vi = await (0, import_node.createVitest)(
2615
+ "test",
2616
+ {
2617
+ root: dir ? dir : process.cwd(),
2618
+ mode: "test",
2619
+ include: opts.include,
2620
+ exclude: opts.exclude,
2621
+ testNamePattern: opts.testNamePattern,
2622
+ reporters: ["verbose", new AxiomReporter()],
2623
+ environment: "node",
2624
+ browser: void 0,
2625
+ watch: opts.watch,
2626
+ setupFiles: [],
2627
+ // ignore user vitest.config.ts etc
2628
+ name: "axiom:eval",
2629
+ printConsoleTrace: true,
2630
+ silent: false,
2631
+ disableConsoleIntercept: true,
2632
+ testTimeout: opts.config?.eval?.timeoutMs || 6e4,
2633
+ globals: true,
2634
+ runner: (0, import_node_path2.resolve)(__dirname, "evals", "custom-runner.js"),
2635
+ provide: {
2636
+ baseline: opts.baseline,
2637
+ debug: opts.debug,
2638
+ list: opts.list,
2639
+ overrides: opts.overrides,
2640
+ axiomConfig: providedConfig,
2641
+ runId: opts.runId,
2642
+ consoleUrl: opts.consoleUrl
2643
+ }
2644
+ },
2645
+ {
2646
+ plugins: [(0, import_vite_tsconfig_paths.default)({ root: dir || process.cwd() })]
2647
+ }
2648
+ );
2649
+ if (opts.list) {
2650
+ const result = await vi.collect();
2651
+ printCollectedEvals(result, dir || process.cwd());
2652
+ await vi.close();
2653
+ process.exit(0);
2654
+ }
2655
+ await vi.start();
2656
+ if ((0, import_node_fs2.existsSync)(abortFile)) {
2657
+ const message = (0, import_node_fs2.readFileSync)(abortFile, "utf8");
2658
+ console.error("\n" + message);
2659
+ await vi.close();
2660
+ process.exit(1);
2661
+ }
2662
+ const dispose = (0, import_node.registerConsoleShortcuts)(vi, process.stdin, process.stdout);
2663
+ if (!vi.shouldKeepServer()) {
2664
+ dispose();
2665
+ await flush();
2666
+ await vi.close();
2667
+ process.exit(0);
2668
+ }
2669
+ await flush();
2670
+ };
2671
+ }
2672
+ });
2319
2673
 
2320
2674
  // src/cli/commands/eval.command.ts
2321
- var createRunId = (0, import_nanoid.customAlphabet)("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ", 10);
2322
- var consoleUrl;
2323
2675
  function getConsoleUrl() {
2324
2676
  return consoleUrl;
2325
2677
  }
@@ -2344,93 +2696,136 @@ function getDefaultOrgId(value) {
2344
2696
  const authContext2 = getAuthContext();
2345
2697
  return authContext2?.orgId ?? process.env.AXIOM_ORG_ID;
2346
2698
  }
2347
- var loadEvalCommand = (program2, flagOverrides = {}) => {
2348
- return program2.addCommand(
2349
- new import_commander.Command("eval").description("run evals locally").addArgument(
2350
- new import_commander.Argument("[target]", "file, directory, glob pattern, or eval name").default(
2351
- ".",
2352
- "any *.eval.ts file in current directory"
2353
- )
2354
- ).option("-w, --watch true", "keep server running and watch for changes", false).option("-t, --token <TOKEN>", "axiom token", getDefaultToken).option("-d, --dataset <DATASET>", "axiom dataset name", process.env.AXIOM_DATASET).option("-u, --url <AXIOM URL>", "axiom url", getDefaultUrl).option("-o, --org-id <ORG ID>", "axiom organization id", getDefaultOrgId).option("-b, --baseline <BASELINE ID>", "id of baseline evaluation to compare against").option("--debug", "run locally without any network operations", false).option("--list", "list evaluations and test cases without running them", false).addOption(new import_commander.Option("-c, --console-url <URL>", "console url override").hideHelp()).action(async (target, options) => {
2355
- try {
2356
- if (options.debug) {
2357
- process.env.AXIOM_DEBUG = "true";
2358
- }
2359
- let include = [];
2360
- let exclude;
2361
- let testNamePattern;
2362
- const isGlobPattern = isGlob(target);
2363
- const { config: loadedConfig } = await loadConfig(".");
2364
- validateFlagOverrides(flagOverrides, loadedConfig.eval.flagSchema);
2365
- const config = {
2366
- ...loadedConfig,
2367
- eval: {
2368
- ...loadedConfig.eval,
2369
- ...options.token && { token: options.token },
2370
- ...options.url && { url: options.url },
2371
- ...options.dataset && { dataset: options.dataset },
2372
- ...options.orgId && { orgId: options.orgId }
2373
- }
2374
- };
2375
- if (isGlobPattern) {
2376
- include = [target];
2377
- } else {
2699
+ var import_commander, import_nanoid, import_node_fs3, createRunId, consoleUrl, loadEvalCommand;
2700
+ var init_eval_command = __esm({
2701
+ "src/cli/commands/eval.command.ts"() {
2702
+ "use strict";
2703
+ init_cjs_shims();
2704
+ import_commander = require("commander");
2705
+ import_nanoid = require("nanoid");
2706
+ import_node_fs3 = require("fs");
2707
+ init_eval_context_runner();
2708
+ init_parse_flag_overrides();
2709
+ init_glob_utils();
2710
+ init_loader();
2711
+ init_errors();
2712
+ init_global_auth();
2713
+ init_node();
2714
+ createRunId = (0, import_nanoid.customAlphabet)("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ", 10);
2715
+ loadEvalCommand = (program2, flagOverrides = {}) => {
2716
+ return program2.addCommand(
2717
+ new import_commander.Command("eval").description("run evals locally").addArgument(
2718
+ new import_commander.Argument("[target]", "file, directory, glob pattern, or eval name").default(
2719
+ ".",
2720
+ "any *.eval.ts file in current directory"
2721
+ )
2722
+ ).option("-w, --watch true", "keep server running and watch for changes", false).option("-t, --token <TOKEN>", "axiom token", getDefaultToken).option("-d, --dataset <DATASET>", "axiom dataset name", process.env.AXIOM_DATASET).option("-u, --url <AXIOM URL>", "axiom url", getDefaultUrl).option("-o, --org-id <ORG ID>", "axiom organization id", getDefaultOrgId).option("-b, --baseline <BASELINE ID>", "id of baseline evaluation to compare against").option("--debug", "run locally without any network operations", false).option("--list", "list evaluations and test cases without running them", false).addOption(new import_commander.Option("-c, --console-url <URL>", "console url override").hideHelp()).action(async (target, options) => {
2378
2723
  try {
2379
- const stat = (0, import_node_fs3.lstatSync)(target);
2380
- if (stat.isDirectory()) {
2381
- include = config?.eval?.include || [];
2382
- } else {
2724
+ if (options.debug) {
2725
+ process.env.AXIOM_DEBUG = "true";
2726
+ }
2727
+ let include = [];
2728
+ let exclude;
2729
+ let testNamePattern;
2730
+ const isGlobPattern = isGlob(target);
2731
+ const { config: loadedConfig } = await loadConfig(".");
2732
+ validateFlagOverrides(flagOverrides, loadedConfig.eval.flagSchema);
2733
+ const config = {
2734
+ ...loadedConfig,
2735
+ eval: {
2736
+ ...loadedConfig.eval,
2737
+ ...options.token && { token: options.token },
2738
+ ...options.url && { url: options.url },
2739
+ ...options.dataset && { dataset: options.dataset },
2740
+ ...options.orgId && { orgId: options.orgId }
2741
+ }
2742
+ };
2743
+ if (isGlobPattern) {
2383
2744
  include = [target];
2745
+ } else {
2746
+ try {
2747
+ const stat = (0, import_node_fs3.lstatSync)(target);
2748
+ if (stat.isDirectory()) {
2749
+ include = config?.eval?.include || [];
2750
+ } else {
2751
+ include = [target];
2752
+ }
2753
+ } catch {
2754
+ testNamePattern = new RegExp(target, "i");
2755
+ include = config?.eval?.include || [];
2756
+ }
2384
2757
  }
2385
- } catch {
2386
- testNamePattern = new RegExp(target, "i");
2387
- include = config?.eval?.include || [];
2388
- }
2389
- }
2390
- exclude = config?.eval?.exclude;
2391
- if (!config?.eval?.instrumentation) {
2392
- console.warn(
2393
- u.yellow(
2394
- "\u26A0 App instrumentation (`eval.instrumentation` in `axiom.config.ts`) not configured. Using default provider."
2395
- )
2396
- );
2397
- console.log("");
2398
- }
2399
- const runId = createRunId();
2400
- consoleUrl = options.consoleUrl;
2401
- await runEvalWithContext(flagOverrides, async () => {
2402
- return runVitest(".", {
2403
- watch: options.watch,
2404
- baseline: options.baseline,
2405
- include,
2406
- exclude,
2407
- testNamePattern,
2408
- debug: options.debug,
2409
- list: options.list,
2410
- overrides: flagOverrides,
2411
- config,
2412
- runId,
2413
- consoleUrl: options.consoleUrl
2414
- });
2415
- });
2416
- } catch (error) {
2417
- if (error instanceof AxiomCLIError) {
2418
- console.error(`
2758
+ exclude = config?.eval?.exclude;
2759
+ if (!config?.eval?.instrumentation) {
2760
+ console.warn(
2761
+ u.yellow(
2762
+ "\u26A0 App instrumentation (`eval.instrumentation` in `axiom.config.ts`) not configured. Using default provider."
2763
+ )
2764
+ );
2765
+ console.log("");
2766
+ }
2767
+ const runId = createRunId();
2768
+ consoleUrl = options.consoleUrl;
2769
+ let runVitestModule;
2770
+ try {
2771
+ runVitestModule = await Promise.resolve().then(() => (init_run_vitest(), run_vitest_exports));
2772
+ } catch (err) {
2773
+ if (err && typeof err === "object" && "code" in err && (err.code === "ERR_MODULE_NOT_FOUND" || err.code === "MODULE_NOT_FOUND")) {
2774
+ throw new AxiomCLIError("Failed to load vitest.");
2775
+ }
2776
+ throw err;
2777
+ }
2778
+ const { runVitest: runVitest2 } = runVitestModule;
2779
+ await runEvalWithContext(flagOverrides, async () => {
2780
+ return runVitest2(".", {
2781
+ watch: options.watch,
2782
+ baseline: options.baseline,
2783
+ include,
2784
+ exclude,
2785
+ testNamePattern,
2786
+ debug: options.debug,
2787
+ list: options.list,
2788
+ overrides: flagOverrides,
2789
+ config,
2790
+ runId,
2791
+ consoleUrl: options.consoleUrl
2792
+ });
2793
+ });
2794
+ } catch (error) {
2795
+ if (error instanceof AxiomCLIError) {
2796
+ console.error(`
2419
2797
  \u274C ${error.message}
2420
2798
  `);
2421
- process.exit(1);
2422
- }
2423
- throw error;
2424
- }
2425
- })
2426
- );
2427
- };
2799
+ process.exit(1);
2800
+ }
2801
+ throw error;
2802
+ }
2803
+ })
2804
+ );
2805
+ };
2806
+ }
2807
+ });
2808
+
2809
+ // src/bin.ts
2810
+ var bin_exports = {};
2811
+ __export(bin_exports, {
2812
+ program: () => program
2813
+ });
2814
+ module.exports = __toCommonJS(bin_exports);
2815
+ init_cjs_shims();
2816
+ var import_commander3 = require("commander");
2817
+ init_eval_command();
2428
2818
 
2429
2819
  // src/cli/commands/auth.command.ts
2430
2820
  init_cjs_shims();
2431
2821
 
2432
2822
  // src/cli/commands/auth-login.command.ts
2433
2823
  init_cjs_shims();
2824
+ init_oauth();
2825
+ init_callback_server();
2826
+ init_config();
2827
+ init_api();
2828
+ init_errors();
2434
2829
  var BASE_HOSTNAME = "axiom.co";
2435
2830
  var getApiUrl = (hostname) => {
2436
2831
  return `https://api.${hostname}`;
@@ -2583,6 +2978,8 @@ function loadAuthLoginCommand(auth, root) {
2583
2978
 
2584
2979
  // src/cli/commands/auth-logout.command.ts
2585
2980
  init_cjs_shims();
2981
+ init_config();
2982
+ init_errors();
2586
2983
  async function logoutCommand(alias) {
2587
2984
  const config = await loadGlobalConfig();
2588
2985
  const profileToRemove = alias || config.active_profile;
@@ -2628,6 +3025,9 @@ function loadAuthLogoutCommand(auth, root) {
2628
3025
 
2629
3026
  // src/cli/commands/auth-status.command.ts
2630
3027
  init_cjs_shims();
3028
+ init_config();
3029
+ init_api();
3030
+ init_errors();
2631
3031
  async function statusCommand() {
2632
3032
  const config = await loadGlobalConfig();
2633
3033
  if (Object.keys(config.profiles).length === 0) {
@@ -2693,6 +3093,8 @@ function loadAuthStatusCommand(auth, program2) {
2693
3093
 
2694
3094
  // src/cli/commands/auth-switch.command.ts
2695
3095
  init_cjs_shims();
3096
+ init_config();
3097
+ init_errors();
2696
3098
  async function promptSelect2(message, choices) {
2697
3099
  console.log(`
2698
3100
  ${message}`);
@@ -2788,6 +3190,8 @@ function loadAuthCommand(program2) {
2788
3190
  }
2789
3191
 
2790
3192
  // src/bin.ts
3193
+ init_parse_flag_overrides();
3194
+ init_global_auth();
2791
3195
  var import_env = __toESM(require("@next/env"), 1);
2792
3196
 
2793
3197
  // src/cli/commands/version.command.ts
@@ -2796,7 +3200,7 @@ var import_commander2 = require("commander");
2796
3200
  var loadVersionCommand = (program2) => {
2797
3201
  return program2.addCommand(
2798
3202
  new import_commander2.Command("version").description("cli version").action(() => {
2799
- console.log("0.34.1");
3203
+ console.log("0.35.0");
2800
3204
  })
2801
3205
  );
2802
3206
  };
@@ -2806,7 +3210,7 @@ var { loadEnvConfig } = import_env.default;
2806
3210
  loadEnvConfig(process.cwd());
2807
3211
  var { cleanedArgv, overrides } = extractOverrides(process.argv.slice(2));
2808
3212
  var program = new import_commander3.Command();
2809
- program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.34.1");
3213
+ program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.35.0");
2810
3214
  program.hook("preAction", async (_, actionCommand) => {
2811
3215
  const commandName = actionCommand.name();
2812
3216
  const parentCommand = actionCommand.parent;