axiom 0.41.0 → 0.42.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +24 -0
  2. package/dist/{app-scope-BpysVzZT.d.ts → app-scope-BgNUnFZY.d.cts} +1 -161
  3. package/dist/{app-scope-BVlVbgs0.d.cts → app-scope-BgNUnFZY.d.ts} +1 -161
  4. package/dist/bin.cjs +16 -6
  5. package/dist/bin.cjs.map +1 -1
  6. package/dist/bin.js +5 -4
  7. package/dist/bin.js.map +1 -1
  8. package/dist/chunk-3THTOTTP.js +370 -0
  9. package/dist/chunk-3THTOTTP.js.map +1 -0
  10. package/dist/{chunk-EIFWUGXW.js → chunk-7AIWUQUO.js} +3 -3
  11. package/dist/{chunk-EOOUH7G4.js → chunk-AF26RXVP.js} +6 -6
  12. package/dist/chunk-AF26RXVP.js.map +1 -0
  13. package/dist/{chunk-BNDTR25U.js → chunk-DL77W2XP.js} +15 -365
  14. package/dist/chunk-DL77W2XP.js.map +1 -0
  15. package/dist/{chunk-CE5HHCSX.js → chunk-N6WWQZ4E.js} +2 -2
  16. package/dist/chunk-N6WWQZ4E.js.map +1 -0
  17. package/dist/evals/scorers.cjs +380 -0
  18. package/dist/evals/scorers.cjs.map +1 -0
  19. package/dist/evals/scorers.d.cts +2 -0
  20. package/dist/evals/scorers.d.ts +2 -0
  21. package/dist/evals/scorers.js +15 -0
  22. package/dist/evals/scorers.js.map +1 -0
  23. package/dist/evals.cjs +15 -5
  24. package/dist/evals.cjs.map +1 -1
  25. package/dist/evals.d.cts +3 -2
  26. package/dist/evals.d.ts +3 -2
  27. package/dist/evals.js +10 -8
  28. package/dist/evals.js.map +1 -1
  29. package/dist/index.cjs +11 -1
  30. package/dist/index.cjs.map +1 -1
  31. package/dist/index.d.cts +3 -2
  32. package/dist/index.d.ts +3 -2
  33. package/dist/index.js +5 -3
  34. package/dist/index.js.map +1 -1
  35. package/dist/{run-vitest-LJOVEDIQ.js → run-vitest-TX7FOGF2.js} +5 -4
  36. package/dist/{run-vitest-LJOVEDIQ.js.map → run-vitest-TX7FOGF2.js.map} +1 -1
  37. package/dist/scorers-BQJ3Xrf7.d.ts +162 -0
  38. package/dist/scorers-CiX7MIog.d.cts +162 -0
  39. package/package.json +11 -1
  40. package/dist/chunk-BNDTR25U.js.map +0 -1
  41. package/dist/chunk-CE5HHCSX.js.map +0 -1
  42. package/dist/chunk-EOOUH7G4.js.map +0 -1
  43. /package/dist/{chunk-EIFWUGXW.js.map → chunk-7AIWUQUO.js.map} +0 -0
package/README.md CHANGED
@@ -77,6 +77,30 @@ const result = await withSpan(
77
77
  )
78
78
  ```
79
79
 
80
+ ## Online Evals
81
+
82
+ For running scorers in production (without vitest dependency):
83
+
84
+ ```ts
85
+ import { withSpan, onlineEval } from 'axiom/ai';
86
+ import { Scorer } from 'axiom/ai/evals/scorers';
87
+
88
+ const formatScorer = Scorer('format-check', ({ output }: { output: string }) => {
89
+ return output.length > 0;
90
+ });
91
+
92
+ await withSpan({ capability: 'qa', step: 'answer' }, async () => {
93
+ const response = await generateText({ model, messages });
94
+ void onlineEval(
95
+ { capability: 'qa', step: 'answer' },
96
+ { output: response.text, scorers: [formatScorer] }
97
+ );
98
+ return response.text;
99
+ });
100
+ ```
101
+
102
+ > For offline evals that use `Eval()`, continue importing from `axiom/ai/evals`.
103
+
80
104
  ## Documentation
81
105
 
82
106
  For more information about how to set up and use the Axiom JavaScript SDK, read documentation on [axiom.co/docs/ai-engineering/quickstart](https://axiom.co/docs/ai-engineering/quickstart).
@@ -1,165 +1,5 @@
1
- import { Aggregation } from './evals/aggregations.js';
2
1
  import { ZodObject, ZodDefault, z } from 'zod';
3
2
 
4
- type ValidChars =
5
- | 'a'
6
- | 'b'
7
- | 'c'
8
- | 'd'
9
- | 'e'
10
- | 'f'
11
- | 'g'
12
- | 'h'
13
- | 'i'
14
- | 'j'
15
- | 'k'
16
- | 'l'
17
- | 'm'
18
- | 'n'
19
- | 'o'
20
- | 'p'
21
- | 'q'
22
- | 'r'
23
- | 's'
24
- | 't'
25
- | 'u'
26
- | 'v'
27
- | 'w'
28
- | 'x'
29
- | 'y'
30
- | 'z'
31
- | 'A'
32
- | 'B'
33
- | 'C'
34
- | 'D'
35
- | 'E'
36
- | 'F'
37
- | 'G'
38
- | 'H'
39
- | 'I'
40
- | 'J'
41
- | 'K'
42
- | 'L'
43
- | 'M'
44
- | 'N'
45
- | 'O'
46
- | 'P'
47
- | 'Q'
48
- | 'R'
49
- | 'S'
50
- | 'T'
51
- | 'U'
52
- | 'V'
53
- | 'W'
54
- | 'X'
55
- | 'Y'
56
- | 'Z'
57
- | '0'
58
- | '1'
59
- | '2'
60
- | '3'
61
- | '4'
62
- | '5'
63
- | '6'
64
- | '7'
65
- | '8'
66
- | '9'
67
- | '-'
68
- | '_';
69
-
70
- type ValidateName<T extends string, Original extends string = T> =
71
- // For widened strings, don't attempt validation – let them flow through unchanged
72
- string extends T // string is not wider than T, ie T is string
73
- ? T
74
- : T extends ''
75
- ? Original extends ''
76
- ? '❌ Name cannot be empty'
77
- : Original
78
- : T extends `${infer First}${infer Rest}`
79
- ? First extends ValidChars
80
- ? ValidateName<Rest, Original>
81
- : `❌ Invalid character in "${Original}". Only A-Z, a-z, 0-9, -, _ allowed`
82
- : never;
83
-
84
- type Simplify<T> = {
85
- [K in keyof T]: T[K];
86
- } & {};
87
- /**
88
- * Creates a scorer to be used in evals.
89
- *
90
- * Scorers need to return a number or a boolean. If returning a number, it is
91
- * suggested that this number is between 0 and 1.
92
- *
93
- * @example
94
- * const scorer = createScorer('exact-match',
95
- * (args: { output: string; expected: string; }) => {
96
- * return args.output === args.expected ? true : false;
97
- * }
98
- * );
99
- *
100
- * @example
101
- * // With aggregation for trials
102
- * import { PassAtK } from '@axiomhq/ai/evals/aggregations';
103
- * const scorer = createScorer('tool-called',
104
- * (args: { output: string }) => args.output.includes('tool') ? 1 : 0,
105
- * { aggregation: PassAtK({ threshold: 0.8 }) }
106
- * );
107
- */
108
- declare function createScorer<TArgs extends Record<string, any> = {}, TInput = [TArgs] extends [{
109
- input: infer I;
110
- }] ? I : unknown, TExpected = [TArgs] extends [{
111
- expected: infer E;
112
- }] ? Exclude<E, undefined> : unknown, TOutput = [TArgs] extends [{
113
- output: infer O;
114
- }] ? Exclude<O, undefined> : never, TExtra extends Record<string, any> = Simplify<Omit<TArgs, 'input' | 'expected' | 'output' | 'trialIndex'>>, TName extends string = string>(
115
- /**
116
- * The name of the scorer
117
- */
118
- name: ValidateName<TName>,
119
- /**
120
- * The scorer function. Can be sync or async.
121
- */
122
- fn: (args: TArgs) => number | boolean | Score | Promise<number | boolean | Score>,
123
- /**
124
- * Optional configuration for the scorer, including aggregation for trials.
125
- */
126
- options?: ScorerOptions): [TOutput] extends [never] ? never : Scorer<TInput, TExpected, TOutput, TExtra>;
127
-
128
- type Score = {
129
- score: number | boolean | null;
130
- metadata?: Record<string, any>;
131
- };
132
- type ScoreWithName = Score & {
133
- name: string;
134
- /** Per-trial scores when running multiple trials */
135
- trials?: number[];
136
- /** Aggregation type used (e.g., 'mean', 'pass@k') */
137
- aggregation?: string;
138
- /** Threshold for pass-based aggregations */
139
- threshold?: number;
140
- };
141
- /**
142
- * Configuration options for a scorer.
143
- */
144
- type ScorerOptions = {
145
- /**
146
- * Aggregation function for combining scores across multiple trials.
147
- * Defaults to Mean() if not specified.
148
- */
149
- aggregation?: Aggregation;
150
- };
151
- type ScorerLike<TInput = any, TExpected = any, TOutput = any, TExtra extends Record<string, any> = {}> = (args: {
152
- input?: TInput;
153
- expected?: TExpected;
154
- output: TOutput;
155
- /** Current trial index (0-based) when running multiple trials */
156
- trialIndex?: number;
157
- } & TExtra) => Score | Promise<Score>;
158
- type Scorer<TInput = any, TExpected = any, TOutput = any, TExtra extends Record<string, any> = {}> = ScorerLike<TInput, TExpected, TOutput, TExtra> & {
159
- readonly name: string;
160
- readonly aggregation?: Aggregation;
161
- };
162
-
163
3
  type DefaultMaxDepth = 8;
164
4
  type HasDefaults<S> = S extends {
165
5
  _zod: {
@@ -284,4 +124,4 @@ declare function createAppScope<FlagSchema extends ZodObject<any>, FactSchema ex
284
124
  __error__: 'createAppScope: flagSchema must have .default() for all leaf fields';
285
125
  }): AppScope<FlagSchema, FactSchema>;
286
126
 
287
- export { type ScoreWithName as S, type ValidateName as V, type ScorerLike as a, type Score as b, createAppScope as c, type ScorerOptions as d, createScorer as e, type Scorer as f };
127
+ export { createAppScope as c };
@@ -1,165 +1,5 @@
1
- import { Aggregation } from './evals/aggregations.cjs';
2
1
  import { ZodObject, ZodDefault, z } from 'zod';
3
2
 
4
- type ValidChars =
5
- | 'a'
6
- | 'b'
7
- | 'c'
8
- | 'd'
9
- | 'e'
10
- | 'f'
11
- | 'g'
12
- | 'h'
13
- | 'i'
14
- | 'j'
15
- | 'k'
16
- | 'l'
17
- | 'm'
18
- | 'n'
19
- | 'o'
20
- | 'p'
21
- | 'q'
22
- | 'r'
23
- | 's'
24
- | 't'
25
- | 'u'
26
- | 'v'
27
- | 'w'
28
- | 'x'
29
- | 'y'
30
- | 'z'
31
- | 'A'
32
- | 'B'
33
- | 'C'
34
- | 'D'
35
- | 'E'
36
- | 'F'
37
- | 'G'
38
- | 'H'
39
- | 'I'
40
- | 'J'
41
- | 'K'
42
- | 'L'
43
- | 'M'
44
- | 'N'
45
- | 'O'
46
- | 'P'
47
- | 'Q'
48
- | 'R'
49
- | 'S'
50
- | 'T'
51
- | 'U'
52
- | 'V'
53
- | 'W'
54
- | 'X'
55
- | 'Y'
56
- | 'Z'
57
- | '0'
58
- | '1'
59
- | '2'
60
- | '3'
61
- | '4'
62
- | '5'
63
- | '6'
64
- | '7'
65
- | '8'
66
- | '9'
67
- | '-'
68
- | '_';
69
-
70
- type ValidateName<T extends string, Original extends string = T> =
71
- // For widened strings, don't attempt validation – let them flow through unchanged
72
- string extends T // string is not wider than T, ie T is string
73
- ? T
74
- : T extends ''
75
- ? Original extends ''
76
- ? '❌ Name cannot be empty'
77
- : Original
78
- : T extends `${infer First}${infer Rest}`
79
- ? First extends ValidChars
80
- ? ValidateName<Rest, Original>
81
- : `❌ Invalid character in "${Original}". Only A-Z, a-z, 0-9, -, _ allowed`
82
- : never;
83
-
84
- type Simplify<T> = {
85
- [K in keyof T]: T[K];
86
- } & {};
87
- /**
88
- * Creates a scorer to be used in evals.
89
- *
90
- * Scorers need to return a number or a boolean. If returning a number, it is
91
- * suggested that this number is between 0 and 1.
92
- *
93
- * @example
94
- * const scorer = createScorer('exact-match',
95
- * (args: { output: string; expected: string; }) => {
96
- * return args.output === args.expected ? true : false;
97
- * }
98
- * );
99
- *
100
- * @example
101
- * // With aggregation for trials
102
- * import { PassAtK } from '@axiomhq/ai/evals/aggregations';
103
- * const scorer = createScorer('tool-called',
104
- * (args: { output: string }) => args.output.includes('tool') ? 1 : 0,
105
- * { aggregation: PassAtK({ threshold: 0.8 }) }
106
- * );
107
- */
108
- declare function createScorer<TArgs extends Record<string, any> = {}, TInput = [TArgs] extends [{
109
- input: infer I;
110
- }] ? I : unknown, TExpected = [TArgs] extends [{
111
- expected: infer E;
112
- }] ? Exclude<E, undefined> : unknown, TOutput = [TArgs] extends [{
113
- output: infer O;
114
- }] ? Exclude<O, undefined> : never, TExtra extends Record<string, any> = Simplify<Omit<TArgs, 'input' | 'expected' | 'output' | 'trialIndex'>>, TName extends string = string>(
115
- /**
116
- * The name of the scorer
117
- */
118
- name: ValidateName<TName>,
119
- /**
120
- * The scorer function. Can be sync or async.
121
- */
122
- fn: (args: TArgs) => number | boolean | Score | Promise<number | boolean | Score>,
123
- /**
124
- * Optional configuration for the scorer, including aggregation for trials.
125
- */
126
- options?: ScorerOptions): [TOutput] extends [never] ? never : Scorer<TInput, TExpected, TOutput, TExtra>;
127
-
128
- type Score = {
129
- score: number | boolean | null;
130
- metadata?: Record<string, any>;
131
- };
132
- type ScoreWithName = Score & {
133
- name: string;
134
- /** Per-trial scores when running multiple trials */
135
- trials?: number[];
136
- /** Aggregation type used (e.g., 'mean', 'pass@k') */
137
- aggregation?: string;
138
- /** Threshold for pass-based aggregations */
139
- threshold?: number;
140
- };
141
- /**
142
- * Configuration options for a scorer.
143
- */
144
- type ScorerOptions = {
145
- /**
146
- * Aggregation function for combining scores across multiple trials.
147
- * Defaults to Mean() if not specified.
148
- */
149
- aggregation?: Aggregation;
150
- };
151
- type ScorerLike<TInput = any, TExpected = any, TOutput = any, TExtra extends Record<string, any> = {}> = (args: {
152
- input?: TInput;
153
- expected?: TExpected;
154
- output: TOutput;
155
- /** Current trial index (0-based) when running multiple trials */
156
- trialIndex?: number;
157
- } & TExtra) => Score | Promise<Score>;
158
- type Scorer<TInput = any, TExpected = any, TOutput = any, TExtra extends Record<string, any> = {}> = ScorerLike<TInput, TExpected, TOutput, TExtra> & {
159
- readonly name: string;
160
- readonly aggregation?: Aggregation;
161
- };
162
-
163
3
  type DefaultMaxDepth = 8;
164
4
  type HasDefaults<S> = S extends {
165
5
  _zod: {
@@ -284,4 +124,4 @@ declare function createAppScope<FlagSchema extends ZodObject<any>, FactSchema ex
284
124
  __error__: 'createAppScope: flagSchema must have .default() for all leaf fields';
285
125
  }): AppScope<FlagSchema, FactSchema>;
286
126
 
287
- export { type ScoreWithName as S, type ValidateName as V, type ScorerLike as a, type Score as b, createAppScope as c, type ScorerOptions as d, createScorer as e, type Scorer as f };
127
+ export { createAppScope as c };
package/dist/bin.cjs CHANGED
@@ -553,7 +553,7 @@ var init_package = __esm({
553
553
  "package.json"() {
554
554
  package_default = {
555
555
  name: "axiom",
556
- version: "0.41.0",
556
+ version: "0.42.0",
557
557
  type: "module",
558
558
  author: "Axiom, Inc.",
559
559
  contributors: [
@@ -609,6 +609,16 @@ var init_package = __esm({
609
609
  default: "./dist/evals/aggregations.cjs"
610
610
  }
611
611
  },
612
+ "./ai/evals/scorers": {
613
+ import: {
614
+ types: "./dist/evals/scorers.d.ts",
615
+ default: "./dist/evals/scorers.js"
616
+ },
617
+ require: {
618
+ types: "./dist/evals/scorers.d.cts",
619
+ default: "./dist/evals/scorers.cjs"
620
+ }
621
+ },
612
622
  "./ai/config": {
613
623
  import: {
614
624
  types: "./dist/config.d.ts",
@@ -2480,11 +2490,11 @@ function setupEvalProvider(connection) {
2480
2490
  axiomProvider = new import_sdk_trace_node.NodeTracerProvider({
2481
2491
  resource: (0, import_resources.resourceFromAttributes)({
2482
2492
  ["service.name"]: "axiom",
2483
- ["service.version"]: "0.41.0"
2493
+ ["service.version"]: "0.42.0"
2484
2494
  }),
2485
2495
  spanProcessors: [processor]
2486
2496
  });
2487
- axiomTracer = axiomProvider.getTracer("axiom", "0.41.0");
2497
+ axiomTracer = axiomProvider.getTracer("axiom", "0.42.0");
2488
2498
  }
2489
2499
  async function initInstrumentation(config) {
2490
2500
  if (initialized) {
@@ -2496,7 +2506,7 @@ async function initInstrumentation(config) {
2496
2506
  }
2497
2507
  initializationPromise = (async () => {
2498
2508
  if (!config.enabled) {
2499
- axiomTracer = import_api10.trace.getTracer("axiom", "0.41.0");
2509
+ axiomTracer = import_api10.trace.getTracer("axiom", "0.42.0");
2500
2510
  initialized = true;
2501
2511
  return;
2502
2512
  }
@@ -3239,7 +3249,7 @@ var import_commander2 = require("commander");
3239
3249
  var loadVersionCommand = (program2) => {
3240
3250
  return program2.addCommand(
3241
3251
  new import_commander2.Command("version").description("cli version").action(() => {
3242
- console.log("0.41.0");
3252
+ console.log("0.42.0");
3243
3253
  })
3244
3254
  );
3245
3255
  };
@@ -3249,7 +3259,7 @@ var { loadEnvConfig } = import_env.default;
3249
3259
  loadEnvConfig(process.cwd());
3250
3260
  var { cleanedArgv, overrides } = extractOverrides(process.argv.slice(2));
3251
3261
  var program = new import_commander3.Command();
3252
- program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.41.0");
3262
+ program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.42.0");
3253
3263
  program.hook("preAction", async (_, actionCommand) => {
3254
3264
  const commandName = actionCommand.name();
3255
3265
  const parentCommand = actionCommand.parent;