axiom 0.41.0 → 0.43.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -0
- package/dist/{app-scope-BpysVzZT.d.ts → app-scope-BgNUnFZY.d.cts} +1 -161
- package/dist/{app-scope-BVlVbgs0.d.cts → app-scope-BgNUnFZY.d.ts} +1 -161
- package/dist/bin.cjs +28 -8
- package/dist/bin.cjs.map +1 -1
- package/dist/bin.js +5 -4
- package/dist/bin.js.map +1 -1
- package/dist/chunk-3THTOTTP.js +370 -0
- package/dist/chunk-3THTOTTP.js.map +1 -0
- package/dist/chunk-HCJKRSW4.js +28 -0
- package/dist/chunk-HCJKRSW4.js.map +1 -0
- package/dist/chunk-KPQJE7AU.js +219 -0
- package/dist/chunk-KPQJE7AU.js.map +1 -0
- package/dist/{chunk-CE5HHCSX.js → chunk-N4LWNPI5.js} +2 -2
- package/dist/chunk-N4LWNPI5.js.map +1 -0
- package/dist/{chunk-EOOUH7G4.js → chunk-OGWPMUHQ.js} +8 -8
- package/dist/chunk-OGWPMUHQ.js.map +1 -0
- package/dist/{chunk-EIFWUGXW.js → chunk-QSI2ES43.js} +3 -3
- package/dist/{chunk-BNDTR25U.js → chunk-T7DGZCOP.js} +25 -365
- package/dist/chunk-T7DGZCOP.js.map +1 -0
- package/dist/evals/online.cjs +539 -0
- package/dist/evals/online.cjs.map +1 -0
- package/dist/evals/online.d.cts +132 -0
- package/dist/evals/online.d.ts +132 -0
- package/dist/evals/online.js +14 -0
- package/dist/evals/online.js.map +1 -0
- package/dist/evals/scorers.cjs +378 -0
- package/dist/evals/scorers.cjs.map +1 -0
- package/dist/evals/scorers.d.cts +3 -0
- package/dist/evals/scorers.d.ts +3 -0
- package/dist/evals/scorers.js +14 -0
- package/dist/evals/scorers.js.map +1 -0
- package/dist/evals.cjs +45 -8
- package/dist/evals.cjs.map +1 -1
- package/dist/evals.d.cts +8 -3
- package/dist/evals.d.ts +8 -3
- package/dist/evals.js +26 -15
- package/dist/evals.js.map +1 -1
- package/dist/index.cjs +196 -50
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +16 -106
- package/dist/index.d.ts +16 -106
- package/dist/index.js +27 -118
- package/dist/index.js.map +1 -1
- package/dist/{run-vitest-LJOVEDIQ.js → run-vitest-ZLJB4UJX.js} +5 -4
- package/dist/{run-vitest-LJOVEDIQ.js.map → run-vitest-ZLJB4UJX.js.map} +1 -1
- package/dist/scorer.types-BY_ig9od.d.cts +38 -0
- package/dist/scorer.types-DMiCs7kl.d.ts +38 -0
- package/dist/scorers-FD50_6M6.d.cts +131 -0
- package/dist/scorers-ZbAJP2FN.d.ts +131 -0
- package/package.json +21 -1
- package/dist/chunk-BNDTR25U.js.map +0 -1
- package/dist/chunk-CE5HHCSX.js.map +0 -1
- package/dist/chunk-EOOUH7G4.js.map +0 -1
- /package/dist/{chunk-EIFWUGXW.js.map → chunk-QSI2ES43.js.map} +0 -0
package/README.md
CHANGED
|
@@ -77,6 +77,31 @@ const result = await withSpan(
|
|
|
77
77
|
)
|
|
78
78
|
```
|
|
79
79
|
|
|
80
|
+
## Online Evals
|
|
81
|
+
|
|
82
|
+
For running scorers in production (without vitest dependency):
|
|
83
|
+
|
|
84
|
+
```ts
|
|
85
|
+
import { withSpan } from 'axiom/ai';
|
|
86
|
+
import { Scorer } from 'axiom/ai/evals/scorers';
|
|
87
|
+
import { onlineEval } from 'axiom/ai/evals/online';
|
|
88
|
+
|
|
89
|
+
const formatScorer = Scorer('format-check', ({ output }: { output: string }) => {
|
|
90
|
+
return output.length > 0;
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
await withSpan({ capability: 'qa', step: 'answer' }, async () => {
|
|
94
|
+
const response = await generateText({ model, messages });
|
|
95
|
+
void onlineEval(
|
|
96
|
+
{ capability: 'qa', step: 'answer' },
|
|
97
|
+
{ output: response.text, scorers: [formatScorer] }
|
|
98
|
+
);
|
|
99
|
+
return response.text;
|
|
100
|
+
});
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
> For offline evals that use `Eval()`, continue importing from `axiom/ai/evals`.
|
|
104
|
+
|
|
80
105
|
## Documentation
|
|
81
106
|
|
|
82
107
|
For more information about how to set up and use the Axiom JavaScript SDK, read documentation on [axiom.co/docs/ai-engineering/quickstart](https://axiom.co/docs/ai-engineering/quickstart).
|
|
@@ -1,165 +1,5 @@
|
|
|
1
|
-
import { Aggregation } from './evals/aggregations.js';
|
|
2
1
|
import { ZodObject, ZodDefault, z } from 'zod';
|
|
3
2
|
|
|
4
|
-
type ValidChars =
|
|
5
|
-
| 'a'
|
|
6
|
-
| 'b'
|
|
7
|
-
| 'c'
|
|
8
|
-
| 'd'
|
|
9
|
-
| 'e'
|
|
10
|
-
| 'f'
|
|
11
|
-
| 'g'
|
|
12
|
-
| 'h'
|
|
13
|
-
| 'i'
|
|
14
|
-
| 'j'
|
|
15
|
-
| 'k'
|
|
16
|
-
| 'l'
|
|
17
|
-
| 'm'
|
|
18
|
-
| 'n'
|
|
19
|
-
| 'o'
|
|
20
|
-
| 'p'
|
|
21
|
-
| 'q'
|
|
22
|
-
| 'r'
|
|
23
|
-
| 's'
|
|
24
|
-
| 't'
|
|
25
|
-
| 'u'
|
|
26
|
-
| 'v'
|
|
27
|
-
| 'w'
|
|
28
|
-
| 'x'
|
|
29
|
-
| 'y'
|
|
30
|
-
| 'z'
|
|
31
|
-
| 'A'
|
|
32
|
-
| 'B'
|
|
33
|
-
| 'C'
|
|
34
|
-
| 'D'
|
|
35
|
-
| 'E'
|
|
36
|
-
| 'F'
|
|
37
|
-
| 'G'
|
|
38
|
-
| 'H'
|
|
39
|
-
| 'I'
|
|
40
|
-
| 'J'
|
|
41
|
-
| 'K'
|
|
42
|
-
| 'L'
|
|
43
|
-
| 'M'
|
|
44
|
-
| 'N'
|
|
45
|
-
| 'O'
|
|
46
|
-
| 'P'
|
|
47
|
-
| 'Q'
|
|
48
|
-
| 'R'
|
|
49
|
-
| 'S'
|
|
50
|
-
| 'T'
|
|
51
|
-
| 'U'
|
|
52
|
-
| 'V'
|
|
53
|
-
| 'W'
|
|
54
|
-
| 'X'
|
|
55
|
-
| 'Y'
|
|
56
|
-
| 'Z'
|
|
57
|
-
| '0'
|
|
58
|
-
| '1'
|
|
59
|
-
| '2'
|
|
60
|
-
| '3'
|
|
61
|
-
| '4'
|
|
62
|
-
| '5'
|
|
63
|
-
| '6'
|
|
64
|
-
| '7'
|
|
65
|
-
| '8'
|
|
66
|
-
| '9'
|
|
67
|
-
| '-'
|
|
68
|
-
| '_';
|
|
69
|
-
|
|
70
|
-
type ValidateName<T extends string, Original extends string = T> =
|
|
71
|
-
// For widened strings, don't attempt validation – let them flow through unchanged
|
|
72
|
-
string extends T // string is not wider than T, ie T is string
|
|
73
|
-
? T
|
|
74
|
-
: T extends ''
|
|
75
|
-
? Original extends ''
|
|
76
|
-
? '❌ Name cannot be empty'
|
|
77
|
-
: Original
|
|
78
|
-
: T extends `${infer First}${infer Rest}`
|
|
79
|
-
? First extends ValidChars
|
|
80
|
-
? ValidateName<Rest, Original>
|
|
81
|
-
: `❌ Invalid character in "${Original}". Only A-Z, a-z, 0-9, -, _ allowed`
|
|
82
|
-
: never;
|
|
83
|
-
|
|
84
|
-
type Simplify<T> = {
|
|
85
|
-
[K in keyof T]: T[K];
|
|
86
|
-
} & {};
|
|
87
|
-
/**
|
|
88
|
-
* Creates a scorer to be used in evals.
|
|
89
|
-
*
|
|
90
|
-
* Scorers need to return a number or a boolean. If returning a number, it is
|
|
91
|
-
* suggested that this number is between 0 and 1.
|
|
92
|
-
*
|
|
93
|
-
* @example
|
|
94
|
-
* const scorer = createScorer('exact-match',
|
|
95
|
-
* (args: { output: string; expected: string; }) => {
|
|
96
|
-
* return args.output === args.expected ? true : false;
|
|
97
|
-
* }
|
|
98
|
-
* );
|
|
99
|
-
*
|
|
100
|
-
* @example
|
|
101
|
-
* // With aggregation for trials
|
|
102
|
-
* import { PassAtK } from '@axiomhq/ai/evals/aggregations';
|
|
103
|
-
* const scorer = createScorer('tool-called',
|
|
104
|
-
* (args: { output: string }) => args.output.includes('tool') ? 1 : 0,
|
|
105
|
-
* { aggregation: PassAtK({ threshold: 0.8 }) }
|
|
106
|
-
* );
|
|
107
|
-
*/
|
|
108
|
-
declare function createScorer<TArgs extends Record<string, any> = {}, TInput = [TArgs] extends [{
|
|
109
|
-
input: infer I;
|
|
110
|
-
}] ? I : unknown, TExpected = [TArgs] extends [{
|
|
111
|
-
expected: infer E;
|
|
112
|
-
}] ? Exclude<E, undefined> : unknown, TOutput = [TArgs] extends [{
|
|
113
|
-
output: infer O;
|
|
114
|
-
}] ? Exclude<O, undefined> : never, TExtra extends Record<string, any> = Simplify<Omit<TArgs, 'input' | 'expected' | 'output' | 'trialIndex'>>, TName extends string = string>(
|
|
115
|
-
/**
|
|
116
|
-
* The name of the scorer
|
|
117
|
-
*/
|
|
118
|
-
name: ValidateName<TName>,
|
|
119
|
-
/**
|
|
120
|
-
* The scorer function. Can be sync or async.
|
|
121
|
-
*/
|
|
122
|
-
fn: (args: TArgs) => number | boolean | Score | Promise<number | boolean | Score>,
|
|
123
|
-
/**
|
|
124
|
-
* Optional configuration for the scorer, including aggregation for trials.
|
|
125
|
-
*/
|
|
126
|
-
options?: ScorerOptions): [TOutput] extends [never] ? never : Scorer<TInput, TExpected, TOutput, TExtra>;
|
|
127
|
-
|
|
128
|
-
type Score = {
|
|
129
|
-
score: number | boolean | null;
|
|
130
|
-
metadata?: Record<string, any>;
|
|
131
|
-
};
|
|
132
|
-
type ScoreWithName = Score & {
|
|
133
|
-
name: string;
|
|
134
|
-
/** Per-trial scores when running multiple trials */
|
|
135
|
-
trials?: number[];
|
|
136
|
-
/** Aggregation type used (e.g., 'mean', 'pass@k') */
|
|
137
|
-
aggregation?: string;
|
|
138
|
-
/** Threshold for pass-based aggregations */
|
|
139
|
-
threshold?: number;
|
|
140
|
-
};
|
|
141
|
-
/**
|
|
142
|
-
* Configuration options for a scorer.
|
|
143
|
-
*/
|
|
144
|
-
type ScorerOptions = {
|
|
145
|
-
/**
|
|
146
|
-
* Aggregation function for combining scores across multiple trials.
|
|
147
|
-
* Defaults to Mean() if not specified.
|
|
148
|
-
*/
|
|
149
|
-
aggregation?: Aggregation;
|
|
150
|
-
};
|
|
151
|
-
type ScorerLike<TInput = any, TExpected = any, TOutput = any, TExtra extends Record<string, any> = {}> = (args: {
|
|
152
|
-
input?: TInput;
|
|
153
|
-
expected?: TExpected;
|
|
154
|
-
output: TOutput;
|
|
155
|
-
/** Current trial index (0-based) when running multiple trials */
|
|
156
|
-
trialIndex?: number;
|
|
157
|
-
} & TExtra) => Score | Promise<Score>;
|
|
158
|
-
type Scorer<TInput = any, TExpected = any, TOutput = any, TExtra extends Record<string, any> = {}> = ScorerLike<TInput, TExpected, TOutput, TExtra> & {
|
|
159
|
-
readonly name: string;
|
|
160
|
-
readonly aggregation?: Aggregation;
|
|
161
|
-
};
|
|
162
|
-
|
|
163
3
|
type DefaultMaxDepth = 8;
|
|
164
4
|
type HasDefaults<S> = S extends {
|
|
165
5
|
_zod: {
|
|
@@ -284,4 +124,4 @@ declare function createAppScope<FlagSchema extends ZodObject<any>, FactSchema ex
|
|
|
284
124
|
__error__: 'createAppScope: flagSchema must have .default() for all leaf fields';
|
|
285
125
|
}): AppScope<FlagSchema, FactSchema>;
|
|
286
126
|
|
|
287
|
-
export {
|
|
127
|
+
export { createAppScope as c };
|
|
@@ -1,165 +1,5 @@
|
|
|
1
|
-
import { Aggregation } from './evals/aggregations.cjs';
|
|
2
1
|
import { ZodObject, ZodDefault, z } from 'zod';
|
|
3
2
|
|
|
4
|
-
type ValidChars =
|
|
5
|
-
| 'a'
|
|
6
|
-
| 'b'
|
|
7
|
-
| 'c'
|
|
8
|
-
| 'd'
|
|
9
|
-
| 'e'
|
|
10
|
-
| 'f'
|
|
11
|
-
| 'g'
|
|
12
|
-
| 'h'
|
|
13
|
-
| 'i'
|
|
14
|
-
| 'j'
|
|
15
|
-
| 'k'
|
|
16
|
-
| 'l'
|
|
17
|
-
| 'm'
|
|
18
|
-
| 'n'
|
|
19
|
-
| 'o'
|
|
20
|
-
| 'p'
|
|
21
|
-
| 'q'
|
|
22
|
-
| 'r'
|
|
23
|
-
| 's'
|
|
24
|
-
| 't'
|
|
25
|
-
| 'u'
|
|
26
|
-
| 'v'
|
|
27
|
-
| 'w'
|
|
28
|
-
| 'x'
|
|
29
|
-
| 'y'
|
|
30
|
-
| 'z'
|
|
31
|
-
| 'A'
|
|
32
|
-
| 'B'
|
|
33
|
-
| 'C'
|
|
34
|
-
| 'D'
|
|
35
|
-
| 'E'
|
|
36
|
-
| 'F'
|
|
37
|
-
| 'G'
|
|
38
|
-
| 'H'
|
|
39
|
-
| 'I'
|
|
40
|
-
| 'J'
|
|
41
|
-
| 'K'
|
|
42
|
-
| 'L'
|
|
43
|
-
| 'M'
|
|
44
|
-
| 'N'
|
|
45
|
-
| 'O'
|
|
46
|
-
| 'P'
|
|
47
|
-
| 'Q'
|
|
48
|
-
| 'R'
|
|
49
|
-
| 'S'
|
|
50
|
-
| 'T'
|
|
51
|
-
| 'U'
|
|
52
|
-
| 'V'
|
|
53
|
-
| 'W'
|
|
54
|
-
| 'X'
|
|
55
|
-
| 'Y'
|
|
56
|
-
| 'Z'
|
|
57
|
-
| '0'
|
|
58
|
-
| '1'
|
|
59
|
-
| '2'
|
|
60
|
-
| '3'
|
|
61
|
-
| '4'
|
|
62
|
-
| '5'
|
|
63
|
-
| '6'
|
|
64
|
-
| '7'
|
|
65
|
-
| '8'
|
|
66
|
-
| '9'
|
|
67
|
-
| '-'
|
|
68
|
-
| '_';
|
|
69
|
-
|
|
70
|
-
type ValidateName<T extends string, Original extends string = T> =
|
|
71
|
-
// For widened strings, don't attempt validation – let them flow through unchanged
|
|
72
|
-
string extends T // string is not wider than T, ie T is string
|
|
73
|
-
? T
|
|
74
|
-
: T extends ''
|
|
75
|
-
? Original extends ''
|
|
76
|
-
? '❌ Name cannot be empty'
|
|
77
|
-
: Original
|
|
78
|
-
: T extends `${infer First}${infer Rest}`
|
|
79
|
-
? First extends ValidChars
|
|
80
|
-
? ValidateName<Rest, Original>
|
|
81
|
-
: `❌ Invalid character in "${Original}". Only A-Z, a-z, 0-9, -, _ allowed`
|
|
82
|
-
: never;
|
|
83
|
-
|
|
84
|
-
type Simplify<T> = {
|
|
85
|
-
[K in keyof T]: T[K];
|
|
86
|
-
} & {};
|
|
87
|
-
/**
|
|
88
|
-
* Creates a scorer to be used in evals.
|
|
89
|
-
*
|
|
90
|
-
* Scorers need to return a number or a boolean. If returning a number, it is
|
|
91
|
-
* suggested that this number is between 0 and 1.
|
|
92
|
-
*
|
|
93
|
-
* @example
|
|
94
|
-
* const scorer = createScorer('exact-match',
|
|
95
|
-
* (args: { output: string; expected: string; }) => {
|
|
96
|
-
* return args.output === args.expected ? true : false;
|
|
97
|
-
* }
|
|
98
|
-
* );
|
|
99
|
-
*
|
|
100
|
-
* @example
|
|
101
|
-
* // With aggregation for trials
|
|
102
|
-
* import { PassAtK } from '@axiomhq/ai/evals/aggregations';
|
|
103
|
-
* const scorer = createScorer('tool-called',
|
|
104
|
-
* (args: { output: string }) => args.output.includes('tool') ? 1 : 0,
|
|
105
|
-
* { aggregation: PassAtK({ threshold: 0.8 }) }
|
|
106
|
-
* );
|
|
107
|
-
*/
|
|
108
|
-
declare function createScorer<TArgs extends Record<string, any> = {}, TInput = [TArgs] extends [{
|
|
109
|
-
input: infer I;
|
|
110
|
-
}] ? I : unknown, TExpected = [TArgs] extends [{
|
|
111
|
-
expected: infer E;
|
|
112
|
-
}] ? Exclude<E, undefined> : unknown, TOutput = [TArgs] extends [{
|
|
113
|
-
output: infer O;
|
|
114
|
-
}] ? Exclude<O, undefined> : never, TExtra extends Record<string, any> = Simplify<Omit<TArgs, 'input' | 'expected' | 'output' | 'trialIndex'>>, TName extends string = string>(
|
|
115
|
-
/**
|
|
116
|
-
* The name of the scorer
|
|
117
|
-
*/
|
|
118
|
-
name: ValidateName<TName>,
|
|
119
|
-
/**
|
|
120
|
-
* The scorer function. Can be sync or async.
|
|
121
|
-
*/
|
|
122
|
-
fn: (args: TArgs) => number | boolean | Score | Promise<number | boolean | Score>,
|
|
123
|
-
/**
|
|
124
|
-
* Optional configuration for the scorer, including aggregation for trials.
|
|
125
|
-
*/
|
|
126
|
-
options?: ScorerOptions): [TOutput] extends [never] ? never : Scorer<TInput, TExpected, TOutput, TExtra>;
|
|
127
|
-
|
|
128
|
-
type Score = {
|
|
129
|
-
score: number | boolean | null;
|
|
130
|
-
metadata?: Record<string, any>;
|
|
131
|
-
};
|
|
132
|
-
type ScoreWithName = Score & {
|
|
133
|
-
name: string;
|
|
134
|
-
/** Per-trial scores when running multiple trials */
|
|
135
|
-
trials?: number[];
|
|
136
|
-
/** Aggregation type used (e.g., 'mean', 'pass@k') */
|
|
137
|
-
aggregation?: string;
|
|
138
|
-
/** Threshold for pass-based aggregations */
|
|
139
|
-
threshold?: number;
|
|
140
|
-
};
|
|
141
|
-
/**
|
|
142
|
-
* Configuration options for a scorer.
|
|
143
|
-
*/
|
|
144
|
-
type ScorerOptions = {
|
|
145
|
-
/**
|
|
146
|
-
* Aggregation function for combining scores across multiple trials.
|
|
147
|
-
* Defaults to Mean() if not specified.
|
|
148
|
-
*/
|
|
149
|
-
aggregation?: Aggregation;
|
|
150
|
-
};
|
|
151
|
-
type ScorerLike<TInput = any, TExpected = any, TOutput = any, TExtra extends Record<string, any> = {}> = (args: {
|
|
152
|
-
input?: TInput;
|
|
153
|
-
expected?: TExpected;
|
|
154
|
-
output: TOutput;
|
|
155
|
-
/** Current trial index (0-based) when running multiple trials */
|
|
156
|
-
trialIndex?: number;
|
|
157
|
-
} & TExtra) => Score | Promise<Score>;
|
|
158
|
-
type Scorer<TInput = any, TExpected = any, TOutput = any, TExtra extends Record<string, any> = {}> = ScorerLike<TInput, TExpected, TOutput, TExtra> & {
|
|
159
|
-
readonly name: string;
|
|
160
|
-
readonly aggregation?: Aggregation;
|
|
161
|
-
};
|
|
162
|
-
|
|
163
3
|
type DefaultMaxDepth = 8;
|
|
164
4
|
type HasDefaults<S> = S extends {
|
|
165
5
|
_zod: {
|
|
@@ -284,4 +124,4 @@ declare function createAppScope<FlagSchema extends ZodObject<any>, FactSchema ex
|
|
|
284
124
|
__error__: 'createAppScope: flagSchema must have .default() for all leaf fields';
|
|
285
125
|
}): AppScope<FlagSchema, FactSchema>;
|
|
286
126
|
|
|
287
|
-
export {
|
|
127
|
+
export { createAppScope as c };
|
package/dist/bin.cjs
CHANGED
|
@@ -553,7 +553,7 @@ var init_package = __esm({
|
|
|
553
553
|
"package.json"() {
|
|
554
554
|
package_default = {
|
|
555
555
|
name: "axiom",
|
|
556
|
-
version: "0.
|
|
556
|
+
version: "0.43.0",
|
|
557
557
|
type: "module",
|
|
558
558
|
author: "Axiom, Inc.",
|
|
559
559
|
contributors: [
|
|
@@ -609,6 +609,26 @@ var init_package = __esm({
|
|
|
609
609
|
default: "./dist/evals/aggregations.cjs"
|
|
610
610
|
}
|
|
611
611
|
},
|
|
612
|
+
"./ai/evals/scorers": {
|
|
613
|
+
import: {
|
|
614
|
+
types: "./dist/evals/scorers.d.ts",
|
|
615
|
+
default: "./dist/evals/scorers.js"
|
|
616
|
+
},
|
|
617
|
+
require: {
|
|
618
|
+
types: "./dist/evals/scorers.d.cts",
|
|
619
|
+
default: "./dist/evals/scorers.cjs"
|
|
620
|
+
}
|
|
621
|
+
},
|
|
622
|
+
"./ai/evals/online": {
|
|
623
|
+
import: {
|
|
624
|
+
types: "./dist/evals/online.d.ts",
|
|
625
|
+
default: "./dist/evals/online.js"
|
|
626
|
+
},
|
|
627
|
+
require: {
|
|
628
|
+
types: "./dist/evals/online.d.cts",
|
|
629
|
+
default: "./dist/evals/online.cjs"
|
|
630
|
+
}
|
|
631
|
+
},
|
|
612
632
|
"./ai/config": {
|
|
613
633
|
import: {
|
|
614
634
|
types: "./dist/config.d.ts",
|
|
@@ -2224,11 +2244,11 @@ function printFinalReport({
|
|
|
2224
2244
|
const baselineParam = suite.baseline?.traceId ? `?baselineId=${suite.baseline.traceId}` : "";
|
|
2225
2245
|
logger("View eval result:");
|
|
2226
2246
|
logger(
|
|
2227
|
-
`${config.consoleEndpointUrl}/${orgId}/ai
|
|
2247
|
+
`${config.consoleEndpointUrl}/${orgId}/ai/evaluations/${suite.name}/${suite.version}${baselineParam}`
|
|
2228
2248
|
);
|
|
2229
2249
|
} else {
|
|
2230
2250
|
logger("View full report:");
|
|
2231
|
-
logger(`${config.consoleEndpointUrl}/${orgId}/ai
|
|
2251
|
+
logger(`${config.consoleEndpointUrl}/${orgId}/ai/evaluations?runId=${runId}`);
|
|
2232
2252
|
}
|
|
2233
2253
|
} else if (isDebug) {
|
|
2234
2254
|
logger(u.dim("Results not uploaded to Axiom (debug mode)"));
|
|
@@ -2480,11 +2500,11 @@ function setupEvalProvider(connection) {
|
|
|
2480
2500
|
axiomProvider = new import_sdk_trace_node.NodeTracerProvider({
|
|
2481
2501
|
resource: (0, import_resources.resourceFromAttributes)({
|
|
2482
2502
|
["service.name"]: "axiom",
|
|
2483
|
-
["service.version"]: "0.
|
|
2503
|
+
["service.version"]: "0.43.0"
|
|
2484
2504
|
}),
|
|
2485
2505
|
spanProcessors: [processor]
|
|
2486
2506
|
});
|
|
2487
|
-
axiomTracer = axiomProvider.getTracer("axiom", "0.
|
|
2507
|
+
axiomTracer = axiomProvider.getTracer("axiom", "0.43.0");
|
|
2488
2508
|
}
|
|
2489
2509
|
async function initInstrumentation(config) {
|
|
2490
2510
|
if (initialized) {
|
|
@@ -2496,7 +2516,7 @@ async function initInstrumentation(config) {
|
|
|
2496
2516
|
}
|
|
2497
2517
|
initializationPromise = (async () => {
|
|
2498
2518
|
if (!config.enabled) {
|
|
2499
|
-
axiomTracer = import_api10.trace.getTracer("axiom", "0.
|
|
2519
|
+
axiomTracer = import_api10.trace.getTracer("axiom", "0.43.0");
|
|
2500
2520
|
initialized = true;
|
|
2501
2521
|
return;
|
|
2502
2522
|
}
|
|
@@ -3239,7 +3259,7 @@ var import_commander2 = require("commander");
|
|
|
3239
3259
|
var loadVersionCommand = (program2) => {
|
|
3240
3260
|
return program2.addCommand(
|
|
3241
3261
|
new import_commander2.Command("version").description("cli version").action(() => {
|
|
3242
|
-
console.log("0.
|
|
3262
|
+
console.log("0.43.0");
|
|
3243
3263
|
})
|
|
3244
3264
|
);
|
|
3245
3265
|
};
|
|
@@ -3249,7 +3269,7 @@ var { loadEnvConfig } = import_env.default;
|
|
|
3249
3269
|
loadEnvConfig(process.cwd());
|
|
3250
3270
|
var { cleanedArgv, overrides } = extractOverrides(process.argv.slice(2));
|
|
3251
3271
|
var program = new import_commander3.Command();
|
|
3252
|
-
program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.
|
|
3272
|
+
program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.43.0");
|
|
3253
3273
|
program.hook("preAction", async (_, actionCommand) => {
|
|
3254
3274
|
const commandName = actionCommand.name();
|
|
3255
3275
|
const parentCommand = actionCommand.parent;
|