npm - axiom - Versions diffs - 0.41.0 → 0.43.0 - Mend

axiom 0.41.0 → 0.43.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

package/README.md +25 -0
package/dist/{app-scope-BpysVzZT.d.ts → app-scope-BgNUnFZY.d.cts} +1 -161
package/dist/{app-scope-BVlVbgs0.d.cts → app-scope-BgNUnFZY.d.ts} +1 -161
package/dist/bin.cjs +28 -8
package/dist/bin.cjs.map +1 -1
package/dist/bin.js +5 -4
package/dist/bin.js.map +1 -1
package/dist/chunk-3THTOTTP.js +370 -0
package/dist/chunk-3THTOTTP.js.map +1 -0
package/dist/chunk-HCJKRSW4.js +28 -0
package/dist/chunk-HCJKRSW4.js.map +1 -0
package/dist/chunk-KPQJE7AU.js +219 -0
package/dist/chunk-KPQJE7AU.js.map +1 -0
package/dist/{chunk-CE5HHCSX.js → chunk-N4LWNPI5.js} +2 -2
package/dist/chunk-N4LWNPI5.js.map +1 -0
package/dist/{chunk-EOOUH7G4.js → chunk-OGWPMUHQ.js} +8 -8
package/dist/chunk-OGWPMUHQ.js.map +1 -0
package/dist/{chunk-EIFWUGXW.js → chunk-QSI2ES43.js} +3 -3
package/dist/{chunk-BNDTR25U.js → chunk-T7DGZCOP.js} +25 -365
package/dist/chunk-T7DGZCOP.js.map +1 -0
package/dist/evals/online.cjs +539 -0
package/dist/evals/online.cjs.map +1 -0
package/dist/evals/online.d.cts +132 -0
package/dist/evals/online.d.ts +132 -0
package/dist/evals/online.js +14 -0
package/dist/evals/online.js.map +1 -0
package/dist/evals/scorers.cjs +378 -0
package/dist/evals/scorers.cjs.map +1 -0
package/dist/evals/scorers.d.cts +3 -0
package/dist/evals/scorers.d.ts +3 -0
package/dist/evals/scorers.js +14 -0
package/dist/evals/scorers.js.map +1 -0
package/dist/evals.cjs +45 -8
package/dist/evals.cjs.map +1 -1
package/dist/evals.d.cts +8 -3
package/dist/evals.d.ts +8 -3
package/dist/evals.js +26 -15
package/dist/evals.js.map +1 -1
package/dist/index.cjs +196 -50
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +16 -106
package/dist/index.d.ts +16 -106
package/dist/index.js +27 -118
package/dist/index.js.map +1 -1
package/dist/{run-vitest-LJOVEDIQ.js → run-vitest-ZLJB4UJX.js} +5 -4
package/dist/{run-vitest-LJOVEDIQ.js.map → run-vitest-ZLJB4UJX.js.map} +1 -1
package/dist/scorer.types-BY_ig9od.d.cts +38 -0
package/dist/scorer.types-DMiCs7kl.d.ts +38 -0
package/dist/scorers-FD50_6M6.d.cts +131 -0
package/dist/scorers-ZbAJP2FN.d.ts +131 -0
package/package.json +21 -1
package/dist/chunk-BNDTR25U.js.map +0 -1
package/dist/chunk-CE5HHCSX.js.map +0 -1
package/dist/chunk-EOOUH7G4.js.map +0 -1
/package/dist/{chunk-EIFWUGXW.js.map → chunk-QSI2ES43.js.map} +0 -0

package/README.md CHANGED Viewed

@@ -77,6 +77,31 @@ const result = await withSpan(
 )
 ```
+## Online Evals
+For running scorers in production (without vitest dependency):
+```ts
+import { withSpan } from 'axiom/ai';
+import { Scorer } from 'axiom/ai/evals/scorers';
+import { onlineEval } from 'axiom/ai/evals/online';
+const formatScorer = Scorer('format-check', ({ output }: { output: string }) => {
+  return output.length > 0;
+});
+await withSpan({ capability: 'qa', step: 'answer' }, async () => {
+  const response = await generateText({ model, messages });
+  void onlineEval(
+    { capability: 'qa', step: 'answer' },
+    { output: response.text, scorers: [formatScorer] }
+  );
+  return response.text;
+});
+```
+> For offline evals that use `Eval()`, continue importing from `axiom/ai/evals`.
 ## Documentation
 For more information about how to set up and use the Axiom JavaScript SDK, read documentation on [axiom.co/docs/ai-engineering/quickstart](https://axiom.co/docs/ai-engineering/quickstart).

package/dist/{app-scope-BpysVzZT.d.ts → app-scope-BgNUnFZY.d.cts} RENAMED Viewed

@@ -1,165 +1,5 @@
-import { Aggregation } from './evals/aggregations.js';
 import { ZodObject, ZodDefault, z } from 'zod';
-type ValidChars =
-  | 'a'
-  | 'b'
-  | 'c'
-  | 'd'
-  | 'e'
-  | 'f'
-  | 'g'
-  | 'h'
-  | 'i'
-  | 'j'
-  | 'k'
-  | 'l'
-  | 'm'
-  | 'n'
-  | 'o'
-  | 'p'
-  | 'q'
-  | 'r'
-  | 's'
-  | 't'
-  | 'u'
-  | 'v'
-  | 'w'
-  | 'x'
-  | 'y'
-  | 'z'
-  | 'A'
-  | 'B'
-  | 'C'
-  | 'D'
-  | 'E'
-  | 'F'
-  | 'G'
-  | 'H'
-  | 'I'
-  | 'J'
-  | 'K'
-  | 'L'
-  | 'M'
-  | 'N'
-  | 'O'
-  | 'P'
-  | 'Q'
-  | 'R'
-  | 'S'
-  | 'T'
-  | 'U'
-  | 'V'
-  | 'W'
-  | 'X'
-  | 'Y'
-  | 'Z'
-  | '0'
-  | '1'
-  | '2'
-  | '3'
-  | '4'
-  | '5'
-  | '6'
-  | '7'
-  | '8'
-  | '9'
-  | '-'
-  | '_';
-type ValidateName<T extends string, Original extends string = T> =
-  // For widened strings, don't attempt validation – let them flow through unchanged
-  string extends T // string is not wider than T, ie T is string
-    ? T
-    : T extends ''
-      ? Original extends ''
-        ? '❌ Name cannot be empty'
-        : Original
-      : T extends `${infer First}${infer Rest}`
-        ? First extends ValidChars
-          ? ValidateName<Rest, Original>
-          : `❌ Invalid character in "${Original}". Only A-Z, a-z, 0-9, -, _ allowed`
-        : never;
-type Simplify<T> = {
-    [K in keyof T]: T[K];
-} & {};
-/**
- * Creates a scorer to be used in evals.
- *
- * Scorers need to return a number or a boolean. If returning a number, it is
- * suggested that this number is between 0 and 1.
- *
- * @example
- * const scorer = createScorer('exact-match',
- *   (args: { output: string; expected: string; }) => {
- *     return args.output === args.expected ? true : false;
- *   }
- * );
- *
- * @example
- * // With aggregation for trials
- * import { PassAtK } from '@axiomhq/ai/evals/aggregations';
- * const scorer = createScorer('tool-called',
- *   (args: { output: string }) => args.output.includes('tool') ? 1 : 0,
- *   { aggregation: PassAtK({ threshold: 0.8 }) }
- * );
- */
-declare function createScorer<TArgs extends Record<string, any> = {}, TInput = [TArgs] extends [{
-    input: infer I;
-}] ? I : unknown, TExpected = [TArgs] extends [{
-    expected: infer E;
-}] ? Exclude<E, undefined> : unknown, TOutput = [TArgs] extends [{
-    output: infer O;
-}] ? Exclude<O, undefined> : never, TExtra extends Record<string, any> = Simplify<Omit<TArgs, 'input' | 'expected' | 'output' | 'trialIndex'>>, TName extends string = string>(
-/**
- * The name of the scorer
- */
-name: ValidateName<TName>,
-/**
- * The scorer function. Can be sync or async.
- */
-fn: (args: TArgs) => number | boolean | Score | Promise<number | boolean | Score>,
-/**
- * Optional configuration for the scorer, including aggregation for trials.
- */
-options?: ScorerOptions): [TOutput] extends [never] ? never : Scorer<TInput, TExpected, TOutput, TExtra>;
-type Score = {
-    score: number | boolean | null;
-    metadata?: Record<string, any>;
-};
-type ScoreWithName = Score & {
-    name: string;
-    /** Per-trial scores when running multiple trials */
-    trials?: number[];
-    /** Aggregation type used (e.g., 'mean', 'pass@k') */
-    aggregation?: string;
-    /** Threshold for pass-based aggregations */
-    threshold?: number;
-};
-/**
- * Configuration options for a scorer.
- */
-type ScorerOptions = {
-    /**
-     * Aggregation function for combining scores across multiple trials.
-     * Defaults to Mean() if not specified.
-     */
-    aggregation?: Aggregation;
-};
-type ScorerLike<TInput = any, TExpected = any, TOutput = any, TExtra extends Record<string, any> = {}> = (args: {
-    input?: TInput;
-    expected?: TExpected;
-    output: TOutput;
-    /** Current trial index (0-based) when running multiple trials */
-    trialIndex?: number;
-} & TExtra) => Score | Promise<Score>;
-type Scorer<TInput = any, TExpected = any, TOutput = any, TExtra extends Record<string, any> = {}> = ScorerLike<TInput, TExpected, TOutput, TExtra> & {
-    readonly name: string;
-    readonly aggregation?: Aggregation;
-};
 type DefaultMaxDepth = 8;
 type HasDefaults<S> = S extends {
     _zod: {
@@ -284,4 +124,4 @@ declare function createAppScope<FlagSchema extends ZodObject<any>, FactSchema ex
     __error__: 'createAppScope: flagSchema must have .default() for all leaf fields';
 }): AppScope<FlagSchema, FactSchema>;
-export { type ScoreWithName as S, type ValidateName as V, type ScorerLike as a, type Score as b, createAppScope as c, type ScorerOptions as d, createScorer as e, type Scorer as f };
+export { createAppScope as c };

package/dist/{app-scope-BVlVbgs0.d.cts → app-scope-BgNUnFZY.d.ts} RENAMED Viewed

@@ -1,165 +1,5 @@
-import { Aggregation } from './evals/aggregations.cjs';
 import { ZodObject, ZodDefault, z } from 'zod';
-type ValidChars =
-  | 'a'
-  | 'b'
-  | 'c'
-  | 'd'
-  | 'e'
-  | 'f'
-  | 'g'
-  | 'h'
-  | 'i'
-  | 'j'
-  | 'k'
-  | 'l'
-  | 'm'
-  | 'n'
-  | 'o'
-  | 'p'
-  | 'q'
-  | 'r'
-  | 's'
-  | 't'
-  | 'u'
-  | 'v'
-  | 'w'
-  | 'x'
-  | 'y'
-  | 'z'
-  | 'A'
-  | 'B'
-  | 'C'
-  | 'D'
-  | 'E'
-  | 'F'
-  | 'G'
-  | 'H'
-  | 'I'
-  | 'J'
-  | 'K'
-  | 'L'
-  | 'M'
-  | 'N'
-  | 'O'
-  | 'P'
-  | 'Q'
-  | 'R'
-  | 'S'
-  | 'T'
-  | 'U'
-  | 'V'
-  | 'W'
-  | 'X'
-  | 'Y'
-  | 'Z'
-  | '0'
-  | '1'
-  | '2'
-  | '3'
-  | '4'
-  | '5'
-  | '6'
-  | '7'
-  | '8'
-  | '9'
-  | '-'
-  | '_';
-type ValidateName<T extends string, Original extends string = T> =
-  // For widened strings, don't attempt validation – let them flow through unchanged
-  string extends T // string is not wider than T, ie T is string
-    ? T
-    : T extends ''
-      ? Original extends ''
-        ? '❌ Name cannot be empty'
-        : Original
-      : T extends `${infer First}${infer Rest}`
-        ? First extends ValidChars
-          ? ValidateName<Rest, Original>
-          : `❌ Invalid character in "${Original}". Only A-Z, a-z, 0-9, -, _ allowed`
-        : never;
-type Simplify<T> = {
-    [K in keyof T]: T[K];
-} & {};
-/**
- * Creates a scorer to be used in evals.
- *
- * Scorers need to return a number or a boolean. If returning a number, it is
- * suggested that this number is between 0 and 1.
- *
- * @example
- * const scorer = createScorer('exact-match',
- *   (args: { output: string; expected: string; }) => {
- *     return args.output === args.expected ? true : false;
- *   }
- * );
- *
- * @example
- * // With aggregation for trials
- * import { PassAtK } from '@axiomhq/ai/evals/aggregations';
- * const scorer = createScorer('tool-called',
- *   (args: { output: string }) => args.output.includes('tool') ? 1 : 0,
- *   { aggregation: PassAtK({ threshold: 0.8 }) }
- * );
- */
-declare function createScorer<TArgs extends Record<string, any> = {}, TInput = [TArgs] extends [{
-    input: infer I;
-}] ? I : unknown, TExpected = [TArgs] extends [{
-    expected: infer E;
-}] ? Exclude<E, undefined> : unknown, TOutput = [TArgs] extends [{
-    output: infer O;
-}] ? Exclude<O, undefined> : never, TExtra extends Record<string, any> = Simplify<Omit<TArgs, 'input' | 'expected' | 'output' | 'trialIndex'>>, TName extends string = string>(
-/**
- * The name of the scorer
- */
-name: ValidateName<TName>,
-/**
- * The scorer function. Can be sync or async.
- */
-fn: (args: TArgs) => number | boolean | Score | Promise<number | boolean | Score>,
-/**
- * Optional configuration for the scorer, including aggregation for trials.
- */
-options?: ScorerOptions): [TOutput] extends [never] ? never : Scorer<TInput, TExpected, TOutput, TExtra>;
-type Score = {
-    score: number | boolean | null;
-    metadata?: Record<string, any>;
-};
-type ScoreWithName = Score & {
-    name: string;
-    /** Per-trial scores when running multiple trials */
-    trials?: number[];
-    /** Aggregation type used (e.g., 'mean', 'pass@k') */
-    aggregation?: string;
-    /** Threshold for pass-based aggregations */
-    threshold?: number;
-};
-/**
- * Configuration options for a scorer.
- */
-type ScorerOptions = {
-    /**
-     * Aggregation function for combining scores across multiple trials.
-     * Defaults to Mean() if not specified.
-     */
-    aggregation?: Aggregation;
-};
-type ScorerLike<TInput = any, TExpected = any, TOutput = any, TExtra extends Record<string, any> = {}> = (args: {
-    input?: TInput;
-    expected?: TExpected;
-    output: TOutput;
-    /** Current trial index (0-based) when running multiple trials */
-    trialIndex?: number;
-} & TExtra) => Score | Promise<Score>;
-type Scorer<TInput = any, TExpected = any, TOutput = any, TExtra extends Record<string, any> = {}> = ScorerLike<TInput, TExpected, TOutput, TExtra> & {
-    readonly name: string;
-    readonly aggregation?: Aggregation;
-};
 type DefaultMaxDepth = 8;
 type HasDefaults<S> = S extends {
     _zod: {
@@ -284,4 +124,4 @@ declare function createAppScope<FlagSchema extends ZodObject<any>, FactSchema ex
     __error__: 'createAppScope: flagSchema must have .default() for all leaf fields';
 }): AppScope<FlagSchema, FactSchema>;
-export { type ScoreWithName as S, type ValidateName as V, type ScorerLike as a, type Score as b, createAppScope as c, type ScorerOptions as d, createScorer as e, type Scorer as f };
+export { createAppScope as c };

package/dist/bin.cjs CHANGED Viewed

@@ -553,7 +553,7 @@ var init_package = __esm({
   "package.json"() {
     package_default = {
       name: "axiom",
-      version: "0.41.0",
+      version: "0.43.0",
       type: "module",
       author: "Axiom, Inc.",
       contributors: [
@@ -609,6 +609,26 @@ var init_package = __esm({
             default: "./dist/evals/aggregations.cjs"
           }
         },
+        "./ai/evals/scorers": {
+          import: {
+            types: "./dist/evals/scorers.d.ts",
+            default: "./dist/evals/scorers.js"
+          },
+          require: {
+            types: "./dist/evals/scorers.d.cts",
+            default: "./dist/evals/scorers.cjs"
+          }
+        },
+        "./ai/evals/online": {
+          import: {
+            types: "./dist/evals/online.d.ts",
+            default: "./dist/evals/online.js"
+          },
+          require: {
+            types: "./dist/evals/online.d.cts",
+            default: "./dist/evals/online.cjs"
+          }
+        },
         "./ai/config": {
           import: {
             types: "./dist/config.d.ts",
@@ -2224,11 +2244,11 @@ function printFinalReport({
       const baselineParam = suite.baseline?.traceId ? `?baselineId=${suite.baseline.traceId}` : "";
       logger("View eval result:");
       logger(
-        `${config.consoleEndpointUrl}/${orgId}/ai-engineering/evaluations/${suite.name}/${suite.version}${baselineParam}`
+        `${config.consoleEndpointUrl}/${orgId}/ai/evaluations/${suite.name}/${suite.version}${baselineParam}`
       );
     } else {
       logger("View full report:");
-      logger(`${config.consoleEndpointUrl}/${orgId}/ai-engineering/evaluations?runId=${runId}`);
+      logger(`${config.consoleEndpointUrl}/${orgId}/ai/evaluations?runId=${runId}`);
     }
   } else if (isDebug) {
     logger(u.dim("Results not uploaded to Axiom (debug mode)"));
@@ -2480,11 +2500,11 @@ function setupEvalProvider(connection) {
   axiomProvider = new import_sdk_trace_node.NodeTracerProvider({
     resource: (0, import_resources.resourceFromAttributes)({
       ["service.name"]: "axiom",
-      ["service.version"]: "0.41.0"
+      ["service.version"]: "0.43.0"
     }),
     spanProcessors: [processor]
   });
-  axiomTracer = axiomProvider.getTracer("axiom", "0.41.0");
+  axiomTracer = axiomProvider.getTracer("axiom", "0.43.0");
 }
 async function initInstrumentation(config) {
   if (initialized) {
@@ -2496,7 +2516,7 @@ async function initInstrumentation(config) {
   }
   initializationPromise = (async () => {
     if (!config.enabled) {
-      axiomTracer = import_api10.trace.getTracer("axiom", "0.41.0");
+      axiomTracer = import_api10.trace.getTracer("axiom", "0.43.0");
       initialized = true;
       return;
     }
@@ -3239,7 +3259,7 @@ var import_commander2 = require("commander");
 var loadVersionCommand = (program2) => {
   return program2.addCommand(
     new import_commander2.Command("version").description("cli version").action(() => {
-      console.log("0.41.0");
+      console.log("0.43.0");
     })
   );
 };
@@ -3249,7 +3269,7 @@ var { loadEnvConfig } = import_env.default;
 loadEnvConfig(process.cwd());
 var { cleanedArgv, overrides } = extractOverrides(process.argv.slice(2));
 var program = new import_commander3.Command();
-program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.41.0");
+program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.43.0");
 program.hook("preAction", async (_, actionCommand) => {
   const commandName = actionCommand.name();
   const parentCommand = actionCommand.parent;