axiom 0.41.0 → 0.43.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/README.md +25 -0
  2. package/dist/{app-scope-BpysVzZT.d.ts → app-scope-BgNUnFZY.d.cts} +1 -161
  3. package/dist/{app-scope-BVlVbgs0.d.cts → app-scope-BgNUnFZY.d.ts} +1 -161
  4. package/dist/bin.cjs +28 -8
  5. package/dist/bin.cjs.map +1 -1
  6. package/dist/bin.js +5 -4
  7. package/dist/bin.js.map +1 -1
  8. package/dist/chunk-3THTOTTP.js +370 -0
  9. package/dist/chunk-3THTOTTP.js.map +1 -0
  10. package/dist/chunk-HCJKRSW4.js +28 -0
  11. package/dist/chunk-HCJKRSW4.js.map +1 -0
  12. package/dist/chunk-KPQJE7AU.js +219 -0
  13. package/dist/chunk-KPQJE7AU.js.map +1 -0
  14. package/dist/{chunk-CE5HHCSX.js → chunk-N4LWNPI5.js} +2 -2
  15. package/dist/chunk-N4LWNPI5.js.map +1 -0
  16. package/dist/{chunk-EOOUH7G4.js → chunk-OGWPMUHQ.js} +8 -8
  17. package/dist/chunk-OGWPMUHQ.js.map +1 -0
  18. package/dist/{chunk-EIFWUGXW.js → chunk-QSI2ES43.js} +3 -3
  19. package/dist/{chunk-BNDTR25U.js → chunk-T7DGZCOP.js} +25 -365
  20. package/dist/chunk-T7DGZCOP.js.map +1 -0
  21. package/dist/evals/online.cjs +539 -0
  22. package/dist/evals/online.cjs.map +1 -0
  23. package/dist/evals/online.d.cts +132 -0
  24. package/dist/evals/online.d.ts +132 -0
  25. package/dist/evals/online.js +14 -0
  26. package/dist/evals/online.js.map +1 -0
  27. package/dist/evals/scorers.cjs +378 -0
  28. package/dist/evals/scorers.cjs.map +1 -0
  29. package/dist/evals/scorers.d.cts +3 -0
  30. package/dist/evals/scorers.d.ts +3 -0
  31. package/dist/evals/scorers.js +14 -0
  32. package/dist/evals/scorers.js.map +1 -0
  33. package/dist/evals.cjs +45 -8
  34. package/dist/evals.cjs.map +1 -1
  35. package/dist/evals.d.cts +8 -3
  36. package/dist/evals.d.ts +8 -3
  37. package/dist/evals.js +26 -15
  38. package/dist/evals.js.map +1 -1
  39. package/dist/index.cjs +196 -50
  40. package/dist/index.cjs.map +1 -1
  41. package/dist/index.d.cts +16 -106
  42. package/dist/index.d.ts +16 -106
  43. package/dist/index.js +27 -118
  44. package/dist/index.js.map +1 -1
  45. package/dist/{run-vitest-LJOVEDIQ.js → run-vitest-ZLJB4UJX.js} +5 -4
  46. package/dist/{run-vitest-LJOVEDIQ.js.map → run-vitest-ZLJB4UJX.js.map} +1 -1
  47. package/dist/scorer.types-BY_ig9od.d.cts +38 -0
  48. package/dist/scorer.types-DMiCs7kl.d.ts +38 -0
  49. package/dist/scorers-FD50_6M6.d.cts +131 -0
  50. package/dist/scorers-ZbAJP2FN.d.ts +131 -0
  51. package/package.json +21 -1
  52. package/dist/chunk-BNDTR25U.js.map +0 -1
  53. package/dist/chunk-CE5HHCSX.js.map +0 -1
  54. package/dist/chunk-EOOUH7G4.js.map +0 -1
  55. /package/dist/{chunk-EIFWUGXW.js.map → chunk-QSI2ES43.js.map} +0 -0
package/README.md CHANGED
@@ -77,6 +77,31 @@ const result = await withSpan(
77
77
  )
78
78
  ```
79
79
 
80
+ ## Online Evals
81
+
82
+ For running scorers in production (without vitest dependency):
83
+
84
+ ```ts
85
+ import { withSpan } from 'axiom/ai';
86
+ import { Scorer } from 'axiom/ai/evals/scorers';
87
+ import { onlineEval } from 'axiom/ai/evals/online';
88
+
89
+ const formatScorer = Scorer('format-check', ({ output }: { output: string }) => {
90
+ return output.length > 0;
91
+ });
92
+
93
+ await withSpan({ capability: 'qa', step: 'answer' }, async () => {
94
+ const response = await generateText({ model, messages });
95
+ void onlineEval(
96
+ { capability: 'qa', step: 'answer' },
97
+ { output: response.text, scorers: [formatScorer] }
98
+ );
99
+ return response.text;
100
+ });
101
+ ```
102
+
103
+ > For offline evals that use `Eval()`, continue importing from `axiom/ai/evals`.
104
+
80
105
  ## Documentation
81
106
 
82
107
  For more information about how to set up and use the Axiom JavaScript SDK, read documentation on [axiom.co/docs/ai-engineering/quickstart](https://axiom.co/docs/ai-engineering/quickstart).
@@ -1,165 +1,5 @@
1
- import { Aggregation } from './evals/aggregations.js';
2
1
  import { ZodObject, ZodDefault, z } from 'zod';
3
2
 
4
- type ValidChars =
5
- | 'a'
6
- | 'b'
7
- | 'c'
8
- | 'd'
9
- | 'e'
10
- | 'f'
11
- | 'g'
12
- | 'h'
13
- | 'i'
14
- | 'j'
15
- | 'k'
16
- | 'l'
17
- | 'm'
18
- | 'n'
19
- | 'o'
20
- | 'p'
21
- | 'q'
22
- | 'r'
23
- | 's'
24
- | 't'
25
- | 'u'
26
- | 'v'
27
- | 'w'
28
- | 'x'
29
- | 'y'
30
- | 'z'
31
- | 'A'
32
- | 'B'
33
- | 'C'
34
- | 'D'
35
- | 'E'
36
- | 'F'
37
- | 'G'
38
- | 'H'
39
- | 'I'
40
- | 'J'
41
- | 'K'
42
- | 'L'
43
- | 'M'
44
- | 'N'
45
- | 'O'
46
- | 'P'
47
- | 'Q'
48
- | 'R'
49
- | 'S'
50
- | 'T'
51
- | 'U'
52
- | 'V'
53
- | 'W'
54
- | 'X'
55
- | 'Y'
56
- | 'Z'
57
- | '0'
58
- | '1'
59
- | '2'
60
- | '3'
61
- | '4'
62
- | '5'
63
- | '6'
64
- | '7'
65
- | '8'
66
- | '9'
67
- | '-'
68
- | '_';
69
-
70
- type ValidateName<T extends string, Original extends string = T> =
71
- // For widened strings, don't attempt validation – let them flow through unchanged
72
- string extends T // string is not wider than T, ie T is string
73
- ? T
74
- : T extends ''
75
- ? Original extends ''
76
- ? '❌ Name cannot be empty'
77
- : Original
78
- : T extends `${infer First}${infer Rest}`
79
- ? First extends ValidChars
80
- ? ValidateName<Rest, Original>
81
- : `❌ Invalid character in "${Original}". Only A-Z, a-z, 0-9, -, _ allowed`
82
- : never;
83
-
84
- type Simplify<T> = {
85
- [K in keyof T]: T[K];
86
- } & {};
87
- /**
88
- * Creates a scorer to be used in evals.
89
- *
90
- * Scorers need to return a number or a boolean. If returning a number, it is
91
- * suggested that this number is between 0 and 1.
92
- *
93
- * @example
94
- * const scorer = createScorer('exact-match',
95
- * (args: { output: string; expected: string; }) => {
96
- * return args.output === args.expected ? true : false;
97
- * }
98
- * );
99
- *
100
- * @example
101
- * // With aggregation for trials
102
- * import { PassAtK } from '@axiomhq/ai/evals/aggregations';
103
- * const scorer = createScorer('tool-called',
104
- * (args: { output: string }) => args.output.includes('tool') ? 1 : 0,
105
- * { aggregation: PassAtK({ threshold: 0.8 }) }
106
- * );
107
- */
108
- declare function createScorer<TArgs extends Record<string, any> = {}, TInput = [TArgs] extends [{
109
- input: infer I;
110
- }] ? I : unknown, TExpected = [TArgs] extends [{
111
- expected: infer E;
112
- }] ? Exclude<E, undefined> : unknown, TOutput = [TArgs] extends [{
113
- output: infer O;
114
- }] ? Exclude<O, undefined> : never, TExtra extends Record<string, any> = Simplify<Omit<TArgs, 'input' | 'expected' | 'output' | 'trialIndex'>>, TName extends string = string>(
115
- /**
116
- * The name of the scorer
117
- */
118
- name: ValidateName<TName>,
119
- /**
120
- * The scorer function. Can be sync or async.
121
- */
122
- fn: (args: TArgs) => number | boolean | Score | Promise<number | boolean | Score>,
123
- /**
124
- * Optional configuration for the scorer, including aggregation for trials.
125
- */
126
- options?: ScorerOptions): [TOutput] extends [never] ? never : Scorer<TInput, TExpected, TOutput, TExtra>;
127
-
128
- type Score = {
129
- score: number | boolean | null;
130
- metadata?: Record<string, any>;
131
- };
132
- type ScoreWithName = Score & {
133
- name: string;
134
- /** Per-trial scores when running multiple trials */
135
- trials?: number[];
136
- /** Aggregation type used (e.g., 'mean', 'pass@k') */
137
- aggregation?: string;
138
- /** Threshold for pass-based aggregations */
139
- threshold?: number;
140
- };
141
- /**
142
- * Configuration options for a scorer.
143
- */
144
- type ScorerOptions = {
145
- /**
146
- * Aggregation function for combining scores across multiple trials.
147
- * Defaults to Mean() if not specified.
148
- */
149
- aggregation?: Aggregation;
150
- };
151
- type ScorerLike<TInput = any, TExpected = any, TOutput = any, TExtra extends Record<string, any> = {}> = (args: {
152
- input?: TInput;
153
- expected?: TExpected;
154
- output: TOutput;
155
- /** Current trial index (0-based) when running multiple trials */
156
- trialIndex?: number;
157
- } & TExtra) => Score | Promise<Score>;
158
- type Scorer<TInput = any, TExpected = any, TOutput = any, TExtra extends Record<string, any> = {}> = ScorerLike<TInput, TExpected, TOutput, TExtra> & {
159
- readonly name: string;
160
- readonly aggregation?: Aggregation;
161
- };
162
-
163
3
  type DefaultMaxDepth = 8;
164
4
  type HasDefaults<S> = S extends {
165
5
  _zod: {
@@ -284,4 +124,4 @@ declare function createAppScope<FlagSchema extends ZodObject<any>, FactSchema ex
284
124
  __error__: 'createAppScope: flagSchema must have .default() for all leaf fields';
285
125
  }): AppScope<FlagSchema, FactSchema>;
286
126
 
287
- export { type ScoreWithName as S, type ValidateName as V, type ScorerLike as a, type Score as b, createAppScope as c, type ScorerOptions as d, createScorer as e, type Scorer as f };
127
+ export { createAppScope as c };
@@ -1,165 +1,5 @@
1
- import { Aggregation } from './evals/aggregations.cjs';
2
1
  import { ZodObject, ZodDefault, z } from 'zod';
3
2
 
4
- type ValidChars =
5
- | 'a'
6
- | 'b'
7
- | 'c'
8
- | 'd'
9
- | 'e'
10
- | 'f'
11
- | 'g'
12
- | 'h'
13
- | 'i'
14
- | 'j'
15
- | 'k'
16
- | 'l'
17
- | 'm'
18
- | 'n'
19
- | 'o'
20
- | 'p'
21
- | 'q'
22
- | 'r'
23
- | 's'
24
- | 't'
25
- | 'u'
26
- | 'v'
27
- | 'w'
28
- | 'x'
29
- | 'y'
30
- | 'z'
31
- | 'A'
32
- | 'B'
33
- | 'C'
34
- | 'D'
35
- | 'E'
36
- | 'F'
37
- | 'G'
38
- | 'H'
39
- | 'I'
40
- | 'J'
41
- | 'K'
42
- | 'L'
43
- | 'M'
44
- | 'N'
45
- | 'O'
46
- | 'P'
47
- | 'Q'
48
- | 'R'
49
- | 'S'
50
- | 'T'
51
- | 'U'
52
- | 'V'
53
- | 'W'
54
- | 'X'
55
- | 'Y'
56
- | 'Z'
57
- | '0'
58
- | '1'
59
- | '2'
60
- | '3'
61
- | '4'
62
- | '5'
63
- | '6'
64
- | '7'
65
- | '8'
66
- | '9'
67
- | '-'
68
- | '_';
69
-
70
- type ValidateName<T extends string, Original extends string = T> =
71
- // For widened strings, don't attempt validation – let them flow through unchanged
72
- string extends T // string is not wider than T, ie T is string
73
- ? T
74
- : T extends ''
75
- ? Original extends ''
76
- ? '❌ Name cannot be empty'
77
- : Original
78
- : T extends `${infer First}${infer Rest}`
79
- ? First extends ValidChars
80
- ? ValidateName<Rest, Original>
81
- : `❌ Invalid character in "${Original}". Only A-Z, a-z, 0-9, -, _ allowed`
82
- : never;
83
-
84
- type Simplify<T> = {
85
- [K in keyof T]: T[K];
86
- } & {};
87
- /**
88
- * Creates a scorer to be used in evals.
89
- *
90
- * Scorers need to return a number or a boolean. If returning a number, it is
91
- * suggested that this number is between 0 and 1.
92
- *
93
- * @example
94
- * const scorer = createScorer('exact-match',
95
- * (args: { output: string; expected: string; }) => {
96
- * return args.output === args.expected ? true : false;
97
- * }
98
- * );
99
- *
100
- * @example
101
- * // With aggregation for trials
102
- * import { PassAtK } from '@axiomhq/ai/evals/aggregations';
103
- * const scorer = createScorer('tool-called',
104
- * (args: { output: string }) => args.output.includes('tool') ? 1 : 0,
105
- * { aggregation: PassAtK({ threshold: 0.8 }) }
106
- * );
107
- */
108
- declare function createScorer<TArgs extends Record<string, any> = {}, TInput = [TArgs] extends [{
109
- input: infer I;
110
- }] ? I : unknown, TExpected = [TArgs] extends [{
111
- expected: infer E;
112
- }] ? Exclude<E, undefined> : unknown, TOutput = [TArgs] extends [{
113
- output: infer O;
114
- }] ? Exclude<O, undefined> : never, TExtra extends Record<string, any> = Simplify<Omit<TArgs, 'input' | 'expected' | 'output' | 'trialIndex'>>, TName extends string = string>(
115
- /**
116
- * The name of the scorer
117
- */
118
- name: ValidateName<TName>,
119
- /**
120
- * The scorer function. Can be sync or async.
121
- */
122
- fn: (args: TArgs) => number | boolean | Score | Promise<number | boolean | Score>,
123
- /**
124
- * Optional configuration for the scorer, including aggregation for trials.
125
- */
126
- options?: ScorerOptions): [TOutput] extends [never] ? never : Scorer<TInput, TExpected, TOutput, TExtra>;
127
-
128
- type Score = {
129
- score: number | boolean | null;
130
- metadata?: Record<string, any>;
131
- };
132
- type ScoreWithName = Score & {
133
- name: string;
134
- /** Per-trial scores when running multiple trials */
135
- trials?: number[];
136
- /** Aggregation type used (e.g., 'mean', 'pass@k') */
137
- aggregation?: string;
138
- /** Threshold for pass-based aggregations */
139
- threshold?: number;
140
- };
141
- /**
142
- * Configuration options for a scorer.
143
- */
144
- type ScorerOptions = {
145
- /**
146
- * Aggregation function for combining scores across multiple trials.
147
- * Defaults to Mean() if not specified.
148
- */
149
- aggregation?: Aggregation;
150
- };
151
- type ScorerLike<TInput = any, TExpected = any, TOutput = any, TExtra extends Record<string, any> = {}> = (args: {
152
- input?: TInput;
153
- expected?: TExpected;
154
- output: TOutput;
155
- /** Current trial index (0-based) when running multiple trials */
156
- trialIndex?: number;
157
- } & TExtra) => Score | Promise<Score>;
158
- type Scorer<TInput = any, TExpected = any, TOutput = any, TExtra extends Record<string, any> = {}> = ScorerLike<TInput, TExpected, TOutput, TExtra> & {
159
- readonly name: string;
160
- readonly aggregation?: Aggregation;
161
- };
162
-
163
3
  type DefaultMaxDepth = 8;
164
4
  type HasDefaults<S> = S extends {
165
5
  _zod: {
@@ -284,4 +124,4 @@ declare function createAppScope<FlagSchema extends ZodObject<any>, FactSchema ex
284
124
  __error__: 'createAppScope: flagSchema must have .default() for all leaf fields';
285
125
  }): AppScope<FlagSchema, FactSchema>;
286
126
 
287
- export { type ScoreWithName as S, type ValidateName as V, type ScorerLike as a, type Score as b, createAppScope as c, type ScorerOptions as d, createScorer as e, type Scorer as f };
127
+ export { createAppScope as c };
package/dist/bin.cjs CHANGED
@@ -553,7 +553,7 @@ var init_package = __esm({
553
553
  "package.json"() {
554
554
  package_default = {
555
555
  name: "axiom",
556
- version: "0.41.0",
556
+ version: "0.43.0",
557
557
  type: "module",
558
558
  author: "Axiom, Inc.",
559
559
  contributors: [
@@ -609,6 +609,26 @@ var init_package = __esm({
609
609
  default: "./dist/evals/aggregations.cjs"
610
610
  }
611
611
  },
612
+ "./ai/evals/scorers": {
613
+ import: {
614
+ types: "./dist/evals/scorers.d.ts",
615
+ default: "./dist/evals/scorers.js"
616
+ },
617
+ require: {
618
+ types: "./dist/evals/scorers.d.cts",
619
+ default: "./dist/evals/scorers.cjs"
620
+ }
621
+ },
622
+ "./ai/evals/online": {
623
+ import: {
624
+ types: "./dist/evals/online.d.ts",
625
+ default: "./dist/evals/online.js"
626
+ },
627
+ require: {
628
+ types: "./dist/evals/online.d.cts",
629
+ default: "./dist/evals/online.cjs"
630
+ }
631
+ },
612
632
  "./ai/config": {
613
633
  import: {
614
634
  types: "./dist/config.d.ts",
@@ -2224,11 +2244,11 @@ function printFinalReport({
2224
2244
  const baselineParam = suite.baseline?.traceId ? `?baselineId=${suite.baseline.traceId}` : "";
2225
2245
  logger("View eval result:");
2226
2246
  logger(
2227
- `${config.consoleEndpointUrl}/${orgId}/ai-engineering/evaluations/${suite.name}/${suite.version}${baselineParam}`
2247
+ `${config.consoleEndpointUrl}/${orgId}/ai/evaluations/${suite.name}/${suite.version}${baselineParam}`
2228
2248
  );
2229
2249
  } else {
2230
2250
  logger("View full report:");
2231
- logger(`${config.consoleEndpointUrl}/${orgId}/ai-engineering/evaluations?runId=${runId}`);
2251
+ logger(`${config.consoleEndpointUrl}/${orgId}/ai/evaluations?runId=${runId}`);
2232
2252
  }
2233
2253
  } else if (isDebug) {
2234
2254
  logger(u.dim("Results not uploaded to Axiom (debug mode)"));
@@ -2480,11 +2500,11 @@ function setupEvalProvider(connection) {
2480
2500
  axiomProvider = new import_sdk_trace_node.NodeTracerProvider({
2481
2501
  resource: (0, import_resources.resourceFromAttributes)({
2482
2502
  ["service.name"]: "axiom",
2483
- ["service.version"]: "0.41.0"
2503
+ ["service.version"]: "0.43.0"
2484
2504
  }),
2485
2505
  spanProcessors: [processor]
2486
2506
  });
2487
- axiomTracer = axiomProvider.getTracer("axiom", "0.41.0");
2507
+ axiomTracer = axiomProvider.getTracer("axiom", "0.43.0");
2488
2508
  }
2489
2509
  async function initInstrumentation(config) {
2490
2510
  if (initialized) {
@@ -2496,7 +2516,7 @@ async function initInstrumentation(config) {
2496
2516
  }
2497
2517
  initializationPromise = (async () => {
2498
2518
  if (!config.enabled) {
2499
- axiomTracer = import_api10.trace.getTracer("axiom", "0.41.0");
2519
+ axiomTracer = import_api10.trace.getTracer("axiom", "0.43.0");
2500
2520
  initialized = true;
2501
2521
  return;
2502
2522
  }
@@ -3239,7 +3259,7 @@ var import_commander2 = require("commander");
3239
3259
  var loadVersionCommand = (program2) => {
3240
3260
  return program2.addCommand(
3241
3261
  new import_commander2.Command("version").description("cli version").action(() => {
3242
- console.log("0.41.0");
3262
+ console.log("0.43.0");
3243
3263
  })
3244
3264
  );
3245
3265
  };
@@ -3249,7 +3269,7 @@ var { loadEnvConfig } = import_env.default;
3249
3269
  loadEnvConfig(process.cwd());
3250
3270
  var { cleanedArgv, overrides } = extractOverrides(process.argv.slice(2));
3251
3271
  var program = new import_commander3.Command();
3252
- program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.41.0");
3272
+ program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.43.0");
3253
3273
  program.hook("preAction", async (_, actionCommand) => {
3254
3274
  const commandName = actionCommand.name();
3255
3275
  const parentCommand = actionCommand.parent;