npm - axiom - Versions diffs - 0.31.0 → 0.32.0 - Mend

axiom 0.31.0 → 0.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/dist/bin.cjs +352 -173
package/dist/bin.cjs.map +1 -1
package/dist/bin.js +12 -112
package/dist/bin.js.map +1 -1
package/dist/{chunk-DI3RSTOC.js → chunk-BKMC455O.js} +235 -69
package/dist/chunk-BKMC455O.js.map +1 -0
package/dist/{chunk-H4WXWPZO.js → chunk-C5XDEFQ5.js} +3 -1
package/dist/chunk-C5XDEFQ5.js.map +1 -0
package/dist/{chunk-JP7CME2X.js → chunk-RVLENV5V.js} +10 -3
package/dist/chunk-RVLENV5V.js.map +1 -0
package/dist/{config-DPgFsZ62.d.cts → config-Amf-IRuD.d.cts} +23 -0
package/dist/{config-DPgFsZ62.d.ts → config-Amf-IRuD.d.ts} +23 -0
package/dist/config.cjs +1 -0
package/dist/config.cjs.map +1 -1
package/dist/config.d.cts +2 -1
package/dist/config.d.ts +2 -1
package/dist/config.js +1 -1
package/dist/evals.cjs +95 -48
package/dist/evals.cjs.map +1 -1
package/dist/evals.d.cts +29 -25
package/dist/evals.d.ts +29 -25
package/dist/evals.js +22 -12
package/dist/evals.js.map +1 -1
package/dist/index.cjs +5 -2
package/dist/index.cjs.map +1 -1
package/dist/index.js +1 -1
package/package.json +4 -3
package/dist/chunk-DI3RSTOC.js.map +0 -1
package/dist/chunk-H4WXWPZO.js.map +0 -1
package/dist/chunk-JP7CME2X.js.map +0 -1

package/dist/bin.cjs CHANGED Viewed

@@ -153,6 +153,7 @@ var import_node_path = require("path");
 var import_node_fs = require("fs");
 var import_node_os = require("os");
 var import_node_path2 = __toESM(require("path"), 1);
+var import_vite_tsconfig_paths = __toESM(require("vite-tsconfig-paths"), 1);
 var import_node = require("vitest/node");
 // src/evals/reporter.ts
@@ -249,6 +250,101 @@ var import_zod3 = require("zod");
 // src/cli/utils/format-zod-errors.ts
 init_cjs_shims();
 var import_zod = require("zod");
+function formatZodErrors(error) {
+  const issues = error.issues;
+  const messages = [];
+  for (const issue of issues) {
+    const path3 = issue.path.join(".");
+    const message = formatIssueMessage(issue, path3);
+    messages.push(`  \u2022 ${message}`);
+  }
+  return messages.join("\n");
+}
+function formatIssueMessage(issue, path3) {
+  switch (issue.code) {
+    case "invalid_type":
+      return `flag '${path3}' expected ${issue.expected}, got ${JSON.stringify(issue.received)} (${typeof issue.received})`;
+    case "too_small":
+      if (issue.type === "number" || issue.origin === "number") {
+        return `flag '${path3}' must be >= ${issue.minimum}, got ${issue.received}`;
+      }
+      return `flag '${path3}' is too small: ${issue.message}`;
+    case "too_big":
+      if (issue.type === "number") {
+        return `flag '${path3}' must be <= ${issue.maximum}, got ${issue.received}`;
+      }
+      return `flag '${path3}' is too big: ${issue.message}`;
+    case "invalid_enum_value":
+      const options = issue.options.map((opt) => `"${opt}"`).join(", ");
+      return `flag '${path3}' must be one of: ${options}, got "${issue.received}"`;
+    case "invalid_value":
+      if (issue.values && Array.isArray(issue.values)) {
+        const values = issue.values.map((val) => `"${val}"`).join(", ");
+        return `flag '${path3}' must be one of: ${values}`;
+      }
+      return `flag '${path3}': ${issue.message}`;
+    case "unrecognized_keys":
+      const keys = issue.keys || [];
+      if (keys.length === 1) {
+        return `unrecognized flag '${keys[0]}'`;
+      } else if (keys.length > 1) {
+        const keysList = keys.map((key) => `'${key}'`).join(", ");
+        return `unrecognized flags ${keysList}`;
+      }
+      return `unrecognized keys in flags`;
+    case "custom":
+      return `flag '${path3}': ${issue.message}`;
+    default:
+      return `flag '${path3}': ${issue.message}`;
+  }
+}
+function generateFlagExamples(error) {
+  const examples = [];
+  for (const issue of error.issues) {
+    const path3 = issue.path.join(".");
+    const example = generateExampleForIssue(issue, path3);
+    if (example && !examples.includes(example)) {
+      examples.push(example);
+    }
+  }
+  return examples.slice(0, 3);
+}
+function generateExampleForIssue(issue, path3) {
+  switch (issue.code) {
+    case "invalid_type":
+      if (issue.expected === "number") {
+        return `--flag.${path3}=0.7`;
+      }
+      if (issue.expected === "boolean") {
+        return `--flag.${path3}=true`;
+      }
+      if (issue.expected === "string") {
+        return `--flag.${path3}="value"`;
+      }
+      break;
+    case "too_small":
+      if (typeof issue.minimum === "number" || typeof issue.minimum === "bigint") {
+        return `--flag.${path3}=${issue.minimum}`;
+      }
+      break;
+    case "too_big":
+      if (typeof issue.maximum === "number" || typeof issue.maximum === "bigint") {
+        return `--flag.${path3}=${issue.maximum}`;
+      }
+      break;
+    case "invalid_enum_value":
+      if (issue.options.length > 0) {
+        return `--flag.${path3}=${issue.options[0]}`;
+      }
+      break;
+    case "invalid_value":
+      if (issue.values && Array.isArray(issue.values) && issue.values.length > 0) {
+        return `--flag.${path3}=${String(issue.values[0])}`;
+      }
+      break;
+  }
+  return null;
+}
 // src/util/dot-path.ts
 init_cjs_shims();
@@ -287,6 +383,27 @@ function flattenObject(obj, prefix = "") {
   }
   return result;
 }
+function isValidPath(schema, segments) {
+  let currentSchema = schema;
+  for (let i = 0; i < segments.length; i++) {
+    const segment = segments[i];
+    if (!currentSchema.shape || !(segment in currentSchema.shape)) {
+      return false;
+    }
+    if (i < segments.length - 1) {
+      const nextSchema = currentSchema.shape[segment];
+      let unwrappedSchema = nextSchema;
+      while (unwrappedSchema?._def?.innerType || unwrappedSchema?._def?.schema) {
+        unwrappedSchema = unwrappedSchema._def.innerType || unwrappedSchema._def.schema;
+      }
+      if (!unwrappedSchema || unwrappedSchema._def?.type !== "object") {
+        return false;
+      }
+      currentSchema = unwrappedSchema;
+    }
+  }
+  return true;
+}
 // src/app-scope.ts
 var import_api8 = require("@opentelemetry/api");
@@ -331,7 +448,7 @@ var import_api4 = require("@opentelemetry/api");
 // package.json
 var package_default = {
   name: "axiom",
-  version: "0.31.0",
+  version: "0.32.0",
   type: "module",
   author: "Axiom, Inc.",
   contributors: [
@@ -415,7 +532,8 @@ var package_default = {
     defu: "^6.1.4",
     handlebars: "^4.7.8",
     nanoid: "^5.1.5",
-    open: "^10.1.0"
+    open: "^10.1.0",
+    "vite-tsconfig-paths": "^5.1.4"
   },
   peerDependencies: {
     "@opentelemetry/api": "^1.9.0",
@@ -937,22 +1055,17 @@ function printSuiteBox({
     const avg = scorerAverages[scorerName];
     const paddedName = scorerName.padEnd(maxNameLength);
     const hasAllErrors = allCasesErrored(scorerName);
-    if (suite.baseline) {
-      const baselineAvg = calculateBaselineScorerAverage2(suite.baseline, scorerName);
-      if (baselineAvg !== null) {
-        const currentPercent = hasAllErrors ? u.dim("N/A") : formatPercentage(avg);
-        const baselinePercent = formatPercentage(baselineAvg);
-        const { text: diffText, color: diffColor } = formatDiff(avg, baselineAvg);
-        const paddedBaseline = baselinePercent.padStart(7);
-        const paddedCurrent = hasAllErrors ? currentPercent : currentPercent.padStart(7);
-        const paddedDiff = hasAllErrors ? u.dim("(all cases failed)") : diffText.padStart(8);
-        logger(
-          `\u2502  ${paddedName}  ${u.blueBright(paddedBaseline)} \u2192 ${hasAllErrors ? paddedCurrent : u.magentaBright(paddedCurrent)}  (${hasAllErrors ? paddedDiff : diffColor(paddedDiff)})`
-        );
-      } else {
-        const currentPercent = hasAllErrors ? u.red("N/A (all cases failed)") : formatPercentage(avg);
-        logger(`\u2502   \u2022 ${paddedName}  ${currentPercent}`);
-      }
+    const baselineAvg = suite.baseline ? calculateBaselineScorerAverage2(suite.baseline, scorerName) : null;
+    if (baselineAvg !== null) {
+      const currentPercent = hasAllErrors ? u.dim("N/A") : formatPercentage(avg);
+      const baselinePercent = formatPercentage(baselineAvg);
+      const { text: diffText, color: diffColor } = formatDiff(avg, baselineAvg);
+      const paddedBaseline = baselinePercent.padStart(7);
+      const paddedCurrent = hasAllErrors ? currentPercent : currentPercent.padStart(7);
+      const diffDisplay = hasAllErrors ? u.dim("all cases failed") : diffColor(diffText.padStart(8));
+      logger(
+        `\u2502  ${paddedName}  ${u.blueBright(paddedBaseline)} \u2192 ${hasAllErrors ? paddedCurrent : u.magentaBright(paddedCurrent)}  (${diffDisplay})`
+      );
     } else {
       const currentPercent = hasAllErrors ? u.red("N/A (all cases failed)") : formatPercentage(avg);
       logger(`\u2502   \u2022 ${paddedName}  ${currentPercent}`);
@@ -967,14 +1080,16 @@ function printSuiteBox({
   } else {
     logger(`\u2502  Baseline: ${u.gray("(none)")}`);
   }
-  if (suite.baseline) {
-    const hasConfigChanges = flagDiff.length > 0;
-    logger("\u2502  Config changes:", hasConfigChanges ? "" : u.gray("(none)"));
-    if (hasConfigChanges) {
-      for (const { flag, current, baseline } of flagDiff) {
-        logger(
-          `\u2502   \u2022 ${flag}: ${current ?? "<not set>"} ${u.gray(`(baseline: ${baseline ?? "<not set>"})`)}`
-        );
+  const hasConfigChanges = flagDiff.length > 0;
+  logger("\u2502  Config changes:", hasConfigChanges ? "" : u.gray("(none)"));
+  if (hasConfigChanges) {
+    for (const { flag, current, baseline, default: defaultVal } of flagDiff) {
+      logger(`\u2502   \u2022 ${flag}: ${current ?? "<not set>"}`);
+      if (defaultVal !== void 0) {
+        logger(`\u2502       ${u.gray(`default: ${defaultVal}`)}`);
+      }
+      if (suite.baseline) {
+        logger(`\u2502       ${u.gray(`baseline: ${baseline ?? "<not set>"}`)}`);
       }
     }
   }
@@ -1024,25 +1139,38 @@ function calculateBaselineScorerAverage(baseline, scorerName) {
   return sum / scores.length;
 }
 function calculateFlagDiff(suite) {
-  if (!suite.baseline || !suite.configFlags || suite.configFlags.length === 0) {
+  if (!suite.configFlags || suite.configFlags.length === 0) {
     return [];
   }
   const diffs = [];
   const currentConfig = suite.flagConfig || {};
-  const baselineConfig = suite.baseline.flagConfig || {};
+  const baselineConfig = suite.baseline?.flagConfig || {};
+  const defaultConfig = suite.defaultFlagConfig || {};
   const currentFlat = flattenObject(currentConfig);
   const baselineFlat = flattenObject(baselineConfig);
-  const allKeys = /* @__PURE__ */ new Set([...Object.keys(currentFlat), ...Object.keys(baselineFlat)]);
+  const defaultFlat = flattenObject(defaultConfig);
+  const allKeys = /* @__PURE__ */ new Set([
+    ...Object.keys(currentFlat),
+    ...Object.keys(baselineFlat),
+    ...Object.keys(defaultFlat)
+  ]);
   for (const key of allKeys) {
     const isInScope = suite.configFlags.some((pattern) => key.startsWith(pattern));
     if (!isInScope) continue;
     const currentValue = currentFlat[key];
     const baselineValue = baselineFlat[key];
-    if (JSON.stringify(currentValue) !== JSON.stringify(baselineValue)) {
+    const defaultValue = defaultFlat[key];
+    const currentStr = currentValue !== void 0 ? JSON.stringify(currentValue) : void 0;
+    const baselineStr = baselineValue !== void 0 ? JSON.stringify(baselineValue) : void 0;
+    const defaultStr = defaultValue !== void 0 ? JSON.stringify(defaultValue) : void 0;
+    const diffFromBaseline = suite.baseline && currentStr !== baselineStr;
+    const diffFromDefault = currentStr !== defaultStr;
+    if (diffFromBaseline || diffFromDefault) {
       diffs.push({
         flag: key,
-        current: currentValue !== void 0 ? JSON.stringify(currentValue) : void 0,
-        baseline: baselineValue !== void 0 ? JSON.stringify(baselineValue) : void 0
+        current: currentStr,
+        baseline: suite.baseline ? baselineStr : void 0,
+        default: defaultStr
       });
     }
   }
@@ -1060,7 +1188,7 @@ function printFinalReport({
   logger("");
   for (const suite of suiteData) {
     const scorerAverages = calculateScorerAverages(suite);
-    const flagDiff = suite.baseline ? calculateFlagDiff(suite) : [];
+    const flagDiff = calculateFlagDiff(suite);
     printSuiteBox({ suite, scorerAverages, calculateBaselineScorerAverage, flagDiff, logger });
     logger("");
   }
@@ -1069,8 +1197,17 @@ function printFinalReport({
   const anyRegistered = registrationStatus.some((s2) => s2.registered);
   const anyFailed = registrationStatus.some((s2) => !s2.registered);
   if (anyRegistered && orgId && config?.consoleEndpointUrl) {
-    logger("View full report:");
-    logger(`${config.consoleEndpointUrl}/${orgId}/ai-engineering/evaluations?runId=${runId}`);
+    if (suiteData.length === 1) {
+      const suite = suiteData[0];
+      const baselineParam = suite.baseline?.traceId ? `?baselineId=${suite.baseline.traceId}` : "";
+      logger("View eval result:");
+      logger(
+        `${config.consoleEndpointUrl}/${orgId}/ai-engineering/evaluations/${suite.name}/${suite.version}${baselineParam}`
+      );
+    } else {
+      logger("View full report:");
+      logger(`${config.consoleEndpointUrl}/${orgId}/ai-engineering/evaluations?runId=${runId}`);
+    }
   } else if (isDebug) {
     logger(u.dim("Results not uploaded to Axiom (debug mode)"));
   } else {
@@ -1179,13 +1316,16 @@ var AxiomReporter = class {
       const overridesFlat = flattenObject(overrides2);
       flagConfig = dotNotationToNested({ ...defaultsFlat, ...overridesFlat });
     }
+    const defaultFlagConfig = meta.evaluation.configEnd?.flags;
     this._suiteData.push({
+      version: meta.evaluation.version,
       name: meta.evaluation.name,
       file: relativePath,
       duration: durationSeconds + "s",
       baseline: suiteBaseline || null,
       configFlags: meta.evaluation.configFlags,
       flagConfig,
+      defaultFlagConfig,
       runId: meta.evaluation.runId,
       orgId: meta.evaluation.orgId,
       cases,
@@ -1293,6 +1433,7 @@ var import_defu = require("defu");
 // src/config/index.ts
 init_cjs_shims();
+var import_zod5 = require("zod");
 // src/cli/auth/index.ts
 init_cjs_shims();
@@ -1644,6 +1785,7 @@ function createPartialDefaults() {
       orgId,
       token,
       dataset: process.env.AXIOM_DATASET,
+      flagSchema: void 0,
       instrumentation: null,
       include: [...DEFAULT_EVAL_INCLUDE],
       exclude: [],
@@ -1687,6 +1829,9 @@ function customMerger(target, source) {
   if (source?.eval && "include" in source.eval) {
     merged.eval.include = source.eval.include;
   }
+  if (source?.eval && "flagSchema" in source.eval) {
+    merged.eval.flagSchema = source.eval.flagSchema;
+  }
   return merged;
 }
 async function loadConfig(cwd = process.cwd()) {
@@ -1766,11 +1911,11 @@ function setupEvalProvider(connection) {
   axiomProvider = new import_sdk_trace_node.NodeTracerProvider({
     resource: (0, import_resources.resourceFromAttributes)({
       ["service.name"]: "axiom",
-      ["service.version"]: "0.31.0"
+      ["service.version"]: "0.32.0"
     }),
     spanProcessors: [processor]
   });
-  axiomTracer = axiomProvider.getTracer("axiom", "0.31.0");
+  axiomTracer = axiomProvider.getTracer("axiom", "0.32.0");
 }
 async function initInstrumentation(config) {
   if (initialized) {
@@ -1782,7 +1927,7 @@ async function initInstrumentation(config) {
   }
   initializationPromise = (async () => {
     if (!config.enabled) {
-      axiomTracer = import_api10.trace.getTracer("axiom", "0.31.0");
+      axiomTracer = import_api10.trace.getTracer("axiom", "0.32.0");
       initialized = true;
       return;
     }
@@ -1878,8 +2023,9 @@ var runVitest = async (dir, opts) => {
     ...opts.config,
     eval: {
       ...opts.config.eval,
-      // function can't be serialized, so we need to remove it
-      instrumentation: null
+      // These can't be serialized, so we need to remove them
+      instrumentation: null,
+      flagSchema: null
     }
   };
   if (opts.debug) {
@@ -1898,35 +2044,41 @@ var runVitest = async (dir, opts) => {
   if (opts.list) {
     console.log(u.bgWhite(u.blackBright(" List mode ")));
   }
-  const vi = await (0, import_node.createVitest)("test", {
-    root: dir ? dir : process.cwd(),
-    mode: "test",
-    include: opts.include,
-    exclude: opts.exclude,
-    testNamePattern: opts.testNamePattern,
-    reporters: ["verbose", new AxiomReporter()],
-    environment: "node",
-    browser: void 0,
-    watch: opts.watch,
-    setupFiles: [],
-    // ignore user vitest.config.ts etc
-    name: "axiom:eval",
-    printConsoleTrace: true,
-    silent: false,
-    disableConsoleIntercept: true,
-    testTimeout: opts.config?.eval?.timeoutMs || 6e4,
-    globals: true,
-    runner: (0, import_node_path.resolve)(__dirname, "evals", "custom-runner.js"),
-    provide: {
-      baseline: opts.baseline,
-      debug: opts.debug,
-      list: opts.list,
-      overrides: opts.overrides,
-      axiomConfig: providedConfig,
-      runId: opts.runId,
-      consoleUrl: opts.consoleUrl
+  const vi = await (0, import_node.createVitest)(
+    "test",
+    {
+      root: dir ? dir : process.cwd(),
+      mode: "test",
+      include: opts.include,
+      exclude: opts.exclude,
+      testNamePattern: opts.testNamePattern,
+      reporters: ["verbose", new AxiomReporter()],
+      environment: "node",
+      browser: void 0,
+      watch: opts.watch,
+      setupFiles: [],
+      // ignore user vitest.config.ts etc
+      name: "axiom:eval",
+      printConsoleTrace: true,
+      silent: false,
+      disableConsoleIntercept: true,
+      testTimeout: opts.config?.eval?.timeoutMs || 6e4,
+      globals: true,
+      runner: (0, import_node_path.resolve)(__dirname, "evals", "custom-runner.js"),
+      provide: {
+        baseline: opts.baseline,
+        debug: opts.debug,
+        list: opts.list,
+        overrides: opts.overrides,
+        axiomConfig: providedConfig,
+        runId: opts.runId,
+        consoleUrl: opts.consoleUrl
+      }
+    },
+    {
+      plugins: [(0, import_vite_tsconfig_paths.default)({ root: dir || process.cwd() })]
     }
-  });
+  );
   if (opts.list) {
     const result = await vi.collect();
     printCollectedEvals(result, dir || process.cwd());
@@ -1951,7 +2103,7 @@ var runVitest = async (dir, opts) => {
 };
 // src/cli/commands/eval.command.ts
-var import_node_fs2 = require("fs");
+var import_node_fs3 = require("fs");
 // src/cli/utils/eval-context-runner.ts
 init_cjs_shims();
@@ -1993,6 +2145,133 @@ async function runEvalWithContext(overrides2, runFn) {
   });
 }
+// src/cli/utils/parse-flag-overrides.ts
+init_cjs_shims();
+var import_zod6 = require("zod");
+var import_node_fs2 = require("fs");
+var import_node_path3 = require("path");
+var FLAG_RE = /^--flag\.([^=]+)(?:=(.*))?$/;
+var CONFIG_RE = /^--flags-config(?:=(.*))?$/;
+function ensureNoSpaceSeparatedSyntax(flagName, value, nextToken, flagType) {
+  if (value === void 0 && nextToken !== void 0) {
+    if (flagType === "flag" && !nextToken.startsWith("-") && nextToken !== "true" && nextToken !== "false") {
+      console.error(`\u274C Invalid syntax: --flag.${flagName} ${nextToken}`);
+      console.error(`\u{1F4A1} Use: --flag.${flagName}=${nextToken}`);
+      process.exit(1);
+    } else if (flagType === "config" && !nextToken.startsWith("-")) {
+      console.error(`\u274C Invalid syntax: --flags-config ${nextToken}`);
+      console.error(`\u{1F4A1} Use: --flags-config=${nextToken}`);
+      process.exit(1);
+    }
+  }
+}
+function validateFlagOverrides(overrides2, flagSchema) {
+  if (!flagSchema || Object.keys(overrides2).length === 0) {
+    return;
+  }
+  const schema = flagSchema;
+  for (const dotPath of Object.keys(overrides2)) {
+    const segments = parsePath(dotPath);
+    if (!isValidPath(schema, segments)) {
+      console.error("\u274C Invalid CLI flags:");
+      console.error(`  \u2022 flag '${dotPath}': Invalid flag path`);
+      process.exit(1);
+    }
+  }
+  const nestedObject = dotNotationToNested(overrides2);
+  const result = schema.strict().partial().safeParse(nestedObject);
+  if (!result.success) {
+    console.error("\u274C Invalid CLI flags:");
+    console.error(formatZodErrors(result.error));
+    const examples = generateFlagExamples(result.error);
+    if (examples.length > 0) {
+      console.error("\n\u{1F4A1} Valid examples:");
+      examples.forEach((example) => console.error(`  ${example}`));
+    }
+    process.exit(1);
+  }
+}
+function coerceValue(raw) {
+  if (raw === "true") return true;
+  if (raw === "false") return false;
+  const num = Number(raw);
+  if (!Number.isNaN(num) && raw.trim() === num.toString()) {
+    return num;
+  }
+  try {
+    return JSON.parse(raw);
+  } catch {
+    return raw;
+  }
+}
+function loadConfigFile(path3) {
+  const abs = (0, import_node_path3.resolve)(process.cwd(), path3);
+  try {
+    const contents = (0, import_node_fs2.readFileSync)(abs, "utf8");
+    const parsed = JSON.parse(contents);
+    if (typeof parsed !== "object" || Array.isArray(parsed) || parsed === null) {
+      console.error(
+        `\u274C Flags config must be a JSON object, got ${Array.isArray(parsed) ? "array" : typeof parsed}`
+      );
+      process.exit(1);
+    }
+    return parsed;
+  } catch (err) {
+    console.error(`\u274C Could not read or parse flags config "${path3}": ${err.message}`);
+    process.exit(1);
+  }
+}
+function extractOverrides(argv) {
+  const cleanedArgv2 = [];
+  const overrides2 = {};
+  let configPath = null;
+  let hasCliFlags = false;
+  let configPathCount = 0;
+  for (let i = 0; i < argv.length; i++) {
+    const token = argv[i];
+    const configMatch = token.match(CONFIG_RE);
+    const flagMatch = token.match(FLAG_RE);
+    if (configMatch) {
+      configPathCount++;
+      if (configPathCount > 1) {
+        console.error("\u274C Only one --flags-config can be supplied.");
+        process.exit(1);
+      }
+      const value = configMatch[1];
+      const nextToken = argv.length > i + 1 ? argv[i + 1] : void 0;
+      ensureNoSpaceSeparatedSyntax("flags-config", value, nextToken, "config");
+      if (!value) {
+        console.error("\u274C --flags-config requires a file path");
+        console.error("\u{1F4A1} Use: --flags-config=path/to/config.json");
+        process.exit(1);
+      }
+      configPath = value;
+    } else if (flagMatch) {
+      hasCliFlags = true;
+      const key = flagMatch[1];
+      const value = flagMatch[2];
+      const nextToken = argv.length > i + 1 ? argv[i + 1] : void 0;
+      ensureNoSpaceSeparatedSyntax(key, value, nextToken, "flag");
+      const finalValue = value === void 0 ? "true" : value;
+      overrides2[key] = coerceValue(finalValue);
+    } else {
+      cleanedArgv2.push(token);
+    }
+  }
+  if (configPath && hasCliFlags) {
+    console.error("\u274C Cannot use both --flags-config and --flag.* arguments together.");
+    console.error("Choose one approach:");
+    console.error("  \u2022 Config file: --flags-config=my-flags.json");
+    console.error("  \u2022 CLI flags: --flag.temperature=0.9 --flag.model=gpt-4o");
+    process.exit(1);
+  }
+  if (configPath) {
+    const configOverrides = loadConfigFile(configPath);
+    return { cleanedArgv: cleanedArgv2, overrides: configOverrides };
+  }
+  return { cleanedArgv: cleanedArgv2, overrides: overrides2 };
+}
 // src/cli/utils/glob-utils.ts
 init_cjs_shims();
 function isGlob(str) {
@@ -2043,6 +2322,7 @@ var loadEvalCommand = (program2, flagOverrides = {}) => {
         let testNamePattern;
         const isGlobPattern = isGlob(target);
         const { config: loadedConfig } = await loadConfig(".");
+        validateFlagOverrides(flagOverrides, loadedConfig.eval.flagSchema);
         const config = {
           ...loadedConfig,
           eval: {
@@ -2057,7 +2337,7 @@ var loadEvalCommand = (program2, flagOverrides = {}) => {
           include = [target];
         } else {
           try {
-            const stat = (0, import_node_fs2.lstatSync)(target);
+            const stat = (0, import_node_fs3.lstatSync)(target);
             if (stat.isDirectory()) {
               include = config?.eval?.include || [];
             } else {
@@ -2468,107 +2748,6 @@ function loadAuthCommand(program2) {
   loadAuthSwitchCommand(auth, program2);
 }
-// src/cli/utils/parse-flag-overrides.ts
-init_cjs_shims();
-var import_zod5 = require("zod");
-var import_node_fs3 = require("fs");
-var import_node_path3 = require("path");
-var FLAG_RE = /^--flag\.([^=]+)(?:=(.*))?$/;
-var CONFIG_RE = /^--flags-config(?:=(.*))?$/;
-function ensureNoSpaceSeparatedSyntax(flagName, value, nextToken, flagType) {
-  if (value === void 0 && nextToken !== void 0) {
-    if (flagType === "flag" && !nextToken.startsWith("-") && nextToken !== "true" && nextToken !== "false") {
-      console.error(`\u274C Invalid syntax: --flag.${flagName} ${nextToken}`);
-      console.error(`\u{1F4A1} Use: --flag.${flagName}=${nextToken}`);
-      process.exit(1);
-    } else if (flagType === "config" && !nextToken.startsWith("-")) {
-      console.error(`\u274C Invalid syntax: --flags-config ${nextToken}`);
-      console.error(`\u{1F4A1} Use: --flags-config=${nextToken}`);
-      process.exit(1);
-    }
-  }
-}
-function coerceValue(raw) {
-  if (raw === "true") return true;
-  if (raw === "false") return false;
-  const num = Number(raw);
-  if (!Number.isNaN(num) && raw.trim() === num.toString()) {
-    return num;
-  }
-  try {
-    return JSON.parse(raw);
-  } catch {
-    return raw;
-  }
-}
-function loadConfigFile(path3) {
-  const abs = (0, import_node_path3.resolve)(process.cwd(), path3);
-  try {
-    const contents = (0, import_node_fs3.readFileSync)(abs, "utf8");
-    const parsed = JSON.parse(contents);
-    if (typeof parsed !== "object" || Array.isArray(parsed) || parsed === null) {
-      console.error(
-        `\u274C Flags config must be a JSON object, got ${Array.isArray(parsed) ? "array" : typeof parsed}`
-      );
-      process.exit(1);
-    }
-    return parsed;
-  } catch (err) {
-    console.error(`\u274C Could not read or parse flags config "${path3}": ${err.message}`);
-    process.exit(1);
-  }
-}
-function extractOverrides(argv) {
-  const cleanedArgv2 = [];
-  const overrides2 = {};
-  let configPath = null;
-  let hasCliFlags = false;
-  let configPathCount = 0;
-  for (let i = 0; i < argv.length; i++) {
-    const token = argv[i];
-    const configMatch = token.match(CONFIG_RE);
-    const flagMatch = token.match(FLAG_RE);
-    if (configMatch) {
-      configPathCount++;
-      if (configPathCount > 1) {
-        console.error("\u274C Only one --flags-config can be supplied.");
-        process.exit(1);
-      }
-      const value = configMatch[1];
-      const nextToken = argv.length > i + 1 ? argv[i + 1] : void 0;
-      ensureNoSpaceSeparatedSyntax("flags-config", value, nextToken, "config");
-      if (!value) {
-        console.error("\u274C --flags-config requires a file path");
-        console.error("\u{1F4A1} Use: --flags-config=path/to/config.json");
-        process.exit(1);
-      }
-      configPath = value;
-    } else if (flagMatch) {
-      hasCliFlags = true;
-      const key = flagMatch[1];
-      const value = flagMatch[2];
-      const nextToken = argv.length > i + 1 ? argv[i + 1] : void 0;
-      ensureNoSpaceSeparatedSyntax(key, value, nextToken, "flag");
-      const finalValue = value === void 0 ? "true" : value;
-      overrides2[key] = coerceValue(finalValue);
-    } else {
-      cleanedArgv2.push(token);
-    }
-  }
-  if (configPath && hasCliFlags) {
-    console.error("\u274C Cannot use both --flags-config and --flag.* arguments together.");
-    console.error("Choose one approach:");
-    console.error("  \u2022 Config file: --flags-config=my-flags.json");
-    console.error("  \u2022 CLI flags: --flag.temperature=0.9 --flag.model=gpt-4o");
-    process.exit(1);
-  }
-  if (configPath) {
-    const configOverrides = loadConfigFile(configPath);
-    return { cleanedArgv: cleanedArgv2, overrides: configOverrides };
-  }
-  return { cleanedArgv: cleanedArgv2, overrides: overrides2 };
-}
 // src/bin.ts
 var import_env = __toESM(require("@next/env"), 1);
@@ -2578,7 +2757,7 @@ var import_commander2 = require("commander");
 var loadVersionCommand = (program2) => {
   return program2.addCommand(
     new import_commander2.Command("version").description("cli version").action(() => {
-      console.log("0.31.0");
+      console.log("0.32.0");
     })
   );
 };
@@ -2588,7 +2767,7 @@ var { loadEnvConfig } = import_env.default;
 loadEnvConfig(process.cwd());
 var { cleanedArgv, overrides } = extractOverrides(process.argv.slice(2));
 var program = new import_commander3.Command();
-program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.31.0");
+program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.32.0");
 program.hook("preAction", async (_, actionCommand) => {
   const commandName = actionCommand.name();
   const parentCommand = actionCommand.parent;