axiom 0.42.0 → 0.44.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/README.md +2 -1
  2. package/dist/bin.cjs +40 -16
  3. package/dist/bin.cjs.map +1 -1
  4. package/dist/bin.js +6 -5
  5. package/dist/bin.js.map +1 -1
  6. package/dist/{chunk-7AIWUQUO.js → chunk-56V2A6IW.js} +4 -4
  7. package/dist/chunk-56V2A6IW.js.map +1 -0
  8. package/dist/{chunk-DL77W2XP.js → chunk-7NTVK3F4.js} +72 -345
  9. package/dist/chunk-7NTVK3F4.js.map +1 -0
  10. package/dist/chunk-AAYRMTT6.js +240 -0
  11. package/dist/chunk-AAYRMTT6.js.map +1 -0
  12. package/dist/chunk-HCJKRSW4.js +28 -0
  13. package/dist/chunk-HCJKRSW4.js.map +1 -0
  14. package/dist/chunk-HW6E4M7S.js +302 -0
  15. package/dist/chunk-HW6E4M7S.js.map +1 -0
  16. package/dist/{chunk-N6WWQZ4E.js → chunk-MCKVQ2IB.js} +2 -2
  17. package/dist/chunk-MCKVQ2IB.js.map +1 -0
  18. package/dist/{chunk-3THTOTTP.js → chunk-TWE3LIRZ.js} +4 -2
  19. package/dist/chunk-TWE3LIRZ.js.map +1 -0
  20. package/dist/{chunk-AF26RXVP.js → chunk-U3JDCQ3Y.js} +31 -16
  21. package/dist/chunk-U3JDCQ3Y.js.map +1 -0
  22. package/dist/evals/online.cjs +762 -0
  23. package/dist/evals/online.cjs.map +1 -0
  24. package/dist/evals/online.d.cts +132 -0
  25. package/dist/evals/online.d.ts +132 -0
  26. package/dist/evals/online.js +15 -0
  27. package/dist/evals/online.js.map +1 -0
  28. package/dist/evals/scorers.cjs +5 -5
  29. package/dist/evals/scorers.cjs.map +1 -1
  30. package/dist/evals/scorers.d.cts +2 -1
  31. package/dist/evals/scorers.d.ts +2 -1
  32. package/dist/evals/scorers.js +3 -4
  33. package/dist/evals/scorers.js.map +1 -1
  34. package/dist/evals.cjs +378 -170
  35. package/dist/evals.cjs.map +1 -1
  36. package/dist/evals.d.cts +17 -5
  37. package/dist/evals.d.ts +17 -5
  38. package/dist/evals.js +342 -168
  39. package/dist/evals.js.map +1 -1
  40. package/dist/index.cjs +213 -56
  41. package/dist/index.cjs.map +1 -1
  42. package/dist/index.d.cts +15 -106
  43. package/dist/index.d.ts +15 -106
  44. package/dist/index.js +33 -124
  45. package/dist/index.js.map +1 -1
  46. package/dist/{run-vitest-TX7FOGF2.js → run-vitest-4OEEDEHV.js} +6 -5
  47. package/dist/{run-vitest-TX7FOGF2.js.map → run-vitest-4OEEDEHV.js.map} +1 -1
  48. package/dist/scorer.types-BY_ig9od.d.cts +38 -0
  49. package/dist/scorer.types-DMiCs7kl.d.ts +38 -0
  50. package/dist/{scorers-BQJ3Xrf7.d.ts → scorers-FD50_6M6.d.cts} +9 -40
  51. package/dist/{scorers-CiX7MIog.d.cts → scorers-ZbAJP2FN.d.ts} +9 -40
  52. package/package.json +13 -2
  53. package/dist/chunk-3THTOTTP.js.map +0 -1
  54. package/dist/chunk-7AIWUQUO.js.map +0 -1
  55. package/dist/chunk-AF26RXVP.js.map +0 -1
  56. package/dist/chunk-DL77W2XP.js.map +0 -1
  57. package/dist/chunk-N6WWQZ4E.js.map +0 -1
package/README.md CHANGED
@@ -82,8 +82,9 @@ const result = await withSpan(
82
82
  For running scorers in production (without vitest dependency):
83
83
 
84
84
  ```ts
85
- import { withSpan, onlineEval } from 'axiom/ai';
85
+ import { withSpan } from 'axiom/ai';
86
86
  import { Scorer } from 'axiom/ai/evals/scorers';
87
+ import { onlineEval } from 'axiom/ai/evals/online';
87
88
 
88
89
  const formatScorer = Scorer('format-check', ({ output }: { output: string }) => {
89
90
  return output.length > 0;
package/dist/bin.cjs CHANGED
@@ -553,7 +553,7 @@ var init_package = __esm({
553
553
  "package.json"() {
554
554
  package_default = {
555
555
  name: "axiom",
556
- version: "0.42.0",
556
+ version: "0.44.0",
557
557
  type: "module",
558
558
  author: "Axiom, Inc.",
559
559
  contributors: [
@@ -563,7 +563,8 @@ var init_package = __esm({
563
563
  ],
564
564
  scripts: {
565
565
  dev: "tsup --watch",
566
- build: "tsup && chmod +x dist/bin.js",
566
+ build: "tsup && chmod +x dist/bin.js && pnpm check:vitest-entrypoints",
567
+ "check:vitest-entrypoints": "node ./scripts/check-vitest-entrypoints.mjs",
567
568
  format: "prettier --write .",
568
569
  "format:check": "prettier --check .",
569
570
  lint: "eslint './**/*.{js,ts}'",
@@ -619,6 +620,16 @@ var init_package = __esm({
619
620
  default: "./dist/evals/scorers.cjs"
620
621
  }
621
622
  },
623
+ "./ai/evals/online": {
624
+ import: {
625
+ types: "./dist/evals/online.d.ts",
626
+ default: "./dist/evals/online.js"
627
+ },
628
+ require: {
629
+ types: "./dist/evals/online.d.cts",
630
+ default: "./dist/evals/online.cjs"
631
+ }
632
+ },
622
633
  "./ai/config": {
623
634
  import: {
624
635
  types: "./dist/config.d.ts",
@@ -2218,27 +2229,40 @@ function printFinalReport({
2218
2229
  logger("");
2219
2230
  logger(u.bgBlue(u.white(" FINAL EVALUATION REPORT ")));
2220
2231
  logger("");
2221
- for (const suite of suiteData) {
2222
- const scorerAverages = calculateScorerAverages(suite);
2223
- const flagDiff = calculateFlagDiff(suite);
2224
- printSuiteBox({ suite, scorerAverages, calculateBaselineScorerAverage, flagDiff, logger });
2225
- logger("");
2226
- }
2227
2232
  const runId = suiteData[0]?.runId;
2228
2233
  const orgId = suiteData[0]?.orgId;
2229
2234
  const anyRegistered = registrationStatus.some((s2) => s2.registered);
2230
2235
  const anyFailed = registrationStatus.some((s2) => !s2.registered);
2236
+ const allFailed = registrationStatus.length > 0 && registrationStatus.every((s2) => !s2.registered);
2237
+ const hasAnyScores = suiteData.some(
2238
+ (suite) => suite.cases.some((caseData) => Object.keys(caseData.scores ?? {}).length > 0)
2239
+ );
2240
+ const shouldPrintSuiteBoxes = isDebug || !allFailed || hasAnyScores;
2241
+ if (shouldPrintSuiteBoxes) {
2242
+ for (const suite of suiteData) {
2243
+ const scorerAverages = calculateScorerAverages(suite);
2244
+ const flagDiff = calculateFlagDiff(suite);
2245
+ printSuiteBox({
2246
+ suite,
2247
+ scorerAverages,
2248
+ calculateBaselineScorerAverage,
2249
+ flagDiff,
2250
+ logger
2251
+ });
2252
+ logger("");
2253
+ }
2254
+ }
2231
2255
  if (anyRegistered && orgId && config?.consoleEndpointUrl) {
2232
2256
  if (suiteData.length === 1) {
2233
2257
  const suite = suiteData[0];
2234
2258
  const baselineParam = suite.baseline?.traceId ? `?baselineId=${suite.baseline.traceId}` : "";
2235
2259
  logger("View eval result:");
2236
2260
  logger(
2237
- `${config.consoleEndpointUrl}/${orgId}/ai-engineering/evaluations/${suite.name}/${suite.version}${baselineParam}`
2261
+ `${config.consoleEndpointUrl}/${orgId}/ai/evaluations/${suite.name}/${suite.version}${baselineParam}`
2238
2262
  );
2239
2263
  } else {
2240
2264
  logger("View full report:");
2241
- logger(`${config.consoleEndpointUrl}/${orgId}/ai-engineering/evaluations?runId=${runId}`);
2265
+ logger(`${config.consoleEndpointUrl}/${orgId}/ai/evaluations?runId=${runId}`);
2242
2266
  }
2243
2267
  } else if (isDebug) {
2244
2268
  logger(u.dim("Results not uploaded to Axiom (debug mode)"));
@@ -2490,11 +2514,11 @@ function setupEvalProvider(connection) {
2490
2514
  axiomProvider = new import_sdk_trace_node.NodeTracerProvider({
2491
2515
  resource: (0, import_resources.resourceFromAttributes)({
2492
2516
  ["service.name"]: "axiom",
2493
- ["service.version"]: "0.42.0"
2517
+ ["service.version"]: "0.44.0"
2494
2518
  }),
2495
2519
  spanProcessors: [processor]
2496
2520
  });
2497
- axiomTracer = axiomProvider.getTracer("axiom", "0.42.0");
2521
+ axiomTracer = axiomProvider.getTracer("axiom", "0.44.0");
2498
2522
  }
2499
2523
  async function initInstrumentation(config) {
2500
2524
  if (initialized) {
@@ -2506,7 +2530,7 @@ async function initInstrumentation(config) {
2506
2530
  }
2507
2531
  initializationPromise = (async () => {
2508
2532
  if (!config.enabled) {
2509
- axiomTracer = import_api10.trace.getTracer("axiom", "0.42.0");
2533
+ axiomTracer = import_api10.trace.getTracer("axiom", "0.44.0");
2510
2534
  initialized = true;
2511
2535
  return;
2512
2536
  }
@@ -2768,7 +2792,7 @@ var init_eval_command = __esm({
2768
2792
  ".",
2769
2793
  "any *.eval.ts file in current directory"
2770
2794
  )
2771
- ).option("-w, --watch true", "keep server running and watch for changes", false).option("-t, --token <TOKEN>", "axiom token", getDefaultToken).option("-d, --dataset <DATASET>", "axiom dataset name", process.env.AXIOM_DATASET).option("-u, --url <AXIOM URL>", "axiom url", getDefaultUrl).option("-o, --org-id <ORG ID>", "axiom organization id", getDefaultOrgId).option("-b, --baseline <BASELINE ID>", "id of baseline evaluation to compare against").option("--debug", "run locally without any network operations", false).option("--list", "list evaluations and test cases without running them", false).addOption(new import_commander.Option("-c, --console-url <URL>", "console url override").hideHelp()).action(async (target, options) => {
2795
+ ).option("-w, --watch true", "keep server running and watch for changes", false).option("-t, --token <TOKEN>", "axiom token", getDefaultToken).option("-d, --dataset <DATASET>", "axiom dataset name").option("-u, --url <AXIOM URL>", "axiom url", getDefaultUrl).option("-o, --org-id <ORG ID>", "axiom organization id", getDefaultOrgId).option("-b, --baseline <BASELINE ID>", "id of baseline evaluation to compare against").option("--debug", "run locally without any network operations", false).option("--list", "list evaluations and test cases without running them", false).addOption(new import_commander.Option("-c, --console-url <URL>", "console url override").hideHelp()).action(async (target, options) => {
2772
2796
  try {
2773
2797
  if (options.debug) {
2774
2798
  process.env.AXIOM_DEBUG = "true";
@@ -3249,7 +3273,7 @@ var import_commander2 = require("commander");
3249
3273
  var loadVersionCommand = (program2) => {
3250
3274
  return program2.addCommand(
3251
3275
  new import_commander2.Command("version").description("cli version").action(() => {
3252
- console.log("0.42.0");
3276
+ console.log("0.44.0");
3253
3277
  })
3254
3278
  );
3255
3279
  };
@@ -3259,7 +3283,7 @@ var { loadEnvConfig } = import_env.default;
3259
3283
  loadEnvConfig(process.cwd());
3260
3284
  var { cleanedArgv, overrides } = extractOverrides(process.argv.slice(2));
3261
3285
  var program = new import_commander3.Command();
3262
- program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.42.0");
3286
+ program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.44.0");
3263
3287
  program.hook("preAction", async (_, actionCommand) => {
3264
3288
  const commandName = actionCommand.name();
3265
3289
  const parentCommand = actionCommand.parent;