axiom 0.31.0 → 0.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/bin.cjs CHANGED
@@ -153,6 +153,7 @@ var import_node_path = require("path");
153
153
  var import_node_fs = require("fs");
154
154
  var import_node_os = require("os");
155
155
  var import_node_path2 = __toESM(require("path"), 1);
156
+ var import_vite_tsconfig_paths = __toESM(require("vite-tsconfig-paths"), 1);
156
157
  var import_node = require("vitest/node");
157
158
 
158
159
  // src/evals/reporter.ts
@@ -249,6 +250,101 @@ var import_zod3 = require("zod");
249
250
  // src/cli/utils/format-zod-errors.ts
250
251
  init_cjs_shims();
251
252
  var import_zod = require("zod");
253
+ function formatZodErrors(error) {
254
+ const issues = error.issues;
255
+ const messages = [];
256
+ for (const issue of issues) {
257
+ const path3 = issue.path.join(".");
258
+ const message = formatIssueMessage(issue, path3);
259
+ messages.push(` \u2022 ${message}`);
260
+ }
261
+ return messages.join("\n");
262
+ }
263
+ function formatIssueMessage(issue, path3) {
264
+ switch (issue.code) {
265
+ case "invalid_type":
266
+ return `flag '${path3}' expected ${issue.expected}, got ${JSON.stringify(issue.received)} (${typeof issue.received})`;
267
+ case "too_small":
268
+ if (issue.type === "number" || issue.origin === "number") {
269
+ return `flag '${path3}' must be >= ${issue.minimum}, got ${issue.received}`;
270
+ }
271
+ return `flag '${path3}' is too small: ${issue.message}`;
272
+ case "too_big":
273
+ if (issue.type === "number") {
274
+ return `flag '${path3}' must be <= ${issue.maximum}, got ${issue.received}`;
275
+ }
276
+ return `flag '${path3}' is too big: ${issue.message}`;
277
+ case "invalid_enum_value":
278
+ const options = issue.options.map((opt) => `"${opt}"`).join(", ");
279
+ return `flag '${path3}' must be one of: ${options}, got "${issue.received}"`;
280
+ case "invalid_value":
281
+ if (issue.values && Array.isArray(issue.values)) {
282
+ const values = issue.values.map((val) => `"${val}"`).join(", ");
283
+ return `flag '${path3}' must be one of: ${values}`;
284
+ }
285
+ return `flag '${path3}': ${issue.message}`;
286
+ case "unrecognized_keys":
287
+ const keys = issue.keys || [];
288
+ if (keys.length === 1) {
289
+ return `unrecognized flag '${keys[0]}'`;
290
+ } else if (keys.length > 1) {
291
+ const keysList = keys.map((key) => `'${key}'`).join(", ");
292
+ return `unrecognized flags ${keysList}`;
293
+ }
294
+ return `unrecognized keys in flags`;
295
+ case "custom":
296
+ return `flag '${path3}': ${issue.message}`;
297
+ default:
298
+ return `flag '${path3}': ${issue.message}`;
299
+ }
300
+ }
301
+ function generateFlagExamples(error) {
302
+ const examples = [];
303
+ for (const issue of error.issues) {
304
+ const path3 = issue.path.join(".");
305
+ const example = generateExampleForIssue(issue, path3);
306
+ if (example && !examples.includes(example)) {
307
+ examples.push(example);
308
+ }
309
+ }
310
+ return examples.slice(0, 3);
311
+ }
312
+ function generateExampleForIssue(issue, path3) {
313
+ switch (issue.code) {
314
+ case "invalid_type":
315
+ if (issue.expected === "number") {
316
+ return `--flag.${path3}=0.7`;
317
+ }
318
+ if (issue.expected === "boolean") {
319
+ return `--flag.${path3}=true`;
320
+ }
321
+ if (issue.expected === "string") {
322
+ return `--flag.${path3}="value"`;
323
+ }
324
+ break;
325
+ case "too_small":
326
+ if (typeof issue.minimum === "number" || typeof issue.minimum === "bigint") {
327
+ return `--flag.${path3}=${issue.minimum}`;
328
+ }
329
+ break;
330
+ case "too_big":
331
+ if (typeof issue.maximum === "number" || typeof issue.maximum === "bigint") {
332
+ return `--flag.${path3}=${issue.maximum}`;
333
+ }
334
+ break;
335
+ case "invalid_enum_value":
336
+ if (issue.options.length > 0) {
337
+ return `--flag.${path3}=${issue.options[0]}`;
338
+ }
339
+ break;
340
+ case "invalid_value":
341
+ if (issue.values && Array.isArray(issue.values) && issue.values.length > 0) {
342
+ return `--flag.${path3}=${String(issue.values[0])}`;
343
+ }
344
+ break;
345
+ }
346
+ return null;
347
+ }
252
348
 
253
349
  // src/util/dot-path.ts
254
350
  init_cjs_shims();
@@ -287,6 +383,27 @@ function flattenObject(obj, prefix = "") {
287
383
  }
288
384
  return result;
289
385
  }
386
+ function isValidPath(schema, segments) {
387
+ let currentSchema = schema;
388
+ for (let i = 0; i < segments.length; i++) {
389
+ const segment = segments[i];
390
+ if (!currentSchema.shape || !(segment in currentSchema.shape)) {
391
+ return false;
392
+ }
393
+ if (i < segments.length - 1) {
394
+ const nextSchema = currentSchema.shape[segment];
395
+ let unwrappedSchema = nextSchema;
396
+ while (unwrappedSchema?._def?.innerType || unwrappedSchema?._def?.schema) {
397
+ unwrappedSchema = unwrappedSchema._def.innerType || unwrappedSchema._def.schema;
398
+ }
399
+ if (!unwrappedSchema || unwrappedSchema._def?.type !== "object") {
400
+ return false;
401
+ }
402
+ currentSchema = unwrappedSchema;
403
+ }
404
+ }
405
+ return true;
406
+ }
290
407
 
291
408
  // src/app-scope.ts
292
409
  var import_api8 = require("@opentelemetry/api");
@@ -331,7 +448,7 @@ var import_api4 = require("@opentelemetry/api");
331
448
  // package.json
332
449
  var package_default = {
333
450
  name: "axiom",
334
- version: "0.31.0",
451
+ version: "0.32.0",
335
452
  type: "module",
336
453
  author: "Axiom, Inc.",
337
454
  contributors: [
@@ -415,7 +532,8 @@ var package_default = {
415
532
  defu: "^6.1.4",
416
533
  handlebars: "^4.7.8",
417
534
  nanoid: "^5.1.5",
418
- open: "^10.1.0"
535
+ open: "^10.1.0",
536
+ "vite-tsconfig-paths": "^5.1.4"
419
537
  },
420
538
  peerDependencies: {
421
539
  "@opentelemetry/api": "^1.9.0",
@@ -937,22 +1055,17 @@ function printSuiteBox({
937
1055
  const avg = scorerAverages[scorerName];
938
1056
  const paddedName = scorerName.padEnd(maxNameLength);
939
1057
  const hasAllErrors = allCasesErrored(scorerName);
940
- if (suite.baseline) {
941
- const baselineAvg = calculateBaselineScorerAverage2(suite.baseline, scorerName);
942
- if (baselineAvg !== null) {
943
- const currentPercent = hasAllErrors ? u.dim("N/A") : formatPercentage(avg);
944
- const baselinePercent = formatPercentage(baselineAvg);
945
- const { text: diffText, color: diffColor } = formatDiff(avg, baselineAvg);
946
- const paddedBaseline = baselinePercent.padStart(7);
947
- const paddedCurrent = hasAllErrors ? currentPercent : currentPercent.padStart(7);
948
- const paddedDiff = hasAllErrors ? u.dim("(all cases failed)") : diffText.padStart(8);
949
- logger(
950
- `\u2502 ${paddedName} ${u.blueBright(paddedBaseline)} \u2192 ${hasAllErrors ? paddedCurrent : u.magentaBright(paddedCurrent)} (${hasAllErrors ? paddedDiff : diffColor(paddedDiff)})`
951
- );
952
- } else {
953
- const currentPercent = hasAllErrors ? u.red("N/A (all cases failed)") : formatPercentage(avg);
954
- logger(`\u2502 \u2022 ${paddedName} ${currentPercent}`);
955
- }
1058
+ const baselineAvg = suite.baseline ? calculateBaselineScorerAverage2(suite.baseline, scorerName) : null;
1059
+ if (baselineAvg !== null) {
1060
+ const currentPercent = hasAllErrors ? u.dim("N/A") : formatPercentage(avg);
1061
+ const baselinePercent = formatPercentage(baselineAvg);
1062
+ const { text: diffText, color: diffColor } = formatDiff(avg, baselineAvg);
1063
+ const paddedBaseline = baselinePercent.padStart(7);
1064
+ const paddedCurrent = hasAllErrors ? currentPercent : currentPercent.padStart(7);
1065
+ const diffDisplay = hasAllErrors ? u.dim("all cases failed") : diffColor(diffText.padStart(8));
1066
+ logger(
1067
+ `\u2502 ${paddedName} ${u.blueBright(paddedBaseline)} \u2192 ${hasAllErrors ? paddedCurrent : u.magentaBright(paddedCurrent)} (${diffDisplay})`
1068
+ );
956
1069
  } else {
957
1070
  const currentPercent = hasAllErrors ? u.red("N/A (all cases failed)") : formatPercentage(avg);
958
1071
  logger(`\u2502 \u2022 ${paddedName} ${currentPercent}`);
@@ -967,14 +1080,16 @@ function printSuiteBox({
967
1080
  } else {
968
1081
  logger(`\u2502 Baseline: ${u.gray("(none)")}`);
969
1082
  }
970
- if (suite.baseline) {
971
- const hasConfigChanges = flagDiff.length > 0;
972
- logger("\u2502 Config changes:", hasConfigChanges ? "" : u.gray("(none)"));
973
- if (hasConfigChanges) {
974
- for (const { flag, current, baseline } of flagDiff) {
975
- logger(
976
- `\u2502 \u2022 ${flag}: ${current ?? "<not set>"} ${u.gray(`(baseline: ${baseline ?? "<not set>"})`)}`
977
- );
1083
+ const hasConfigChanges = flagDiff.length > 0;
1084
+ logger("\u2502 Config changes:", hasConfigChanges ? "" : u.gray("(none)"));
1085
+ if (hasConfigChanges) {
1086
+ for (const { flag, current, baseline, default: defaultVal } of flagDiff) {
1087
+ logger(`\u2502 \u2022 ${flag}: ${current ?? "<not set>"}`);
1088
+ if (defaultVal !== void 0) {
1089
+ logger(`\u2502 ${u.gray(`default: ${defaultVal}`)}`);
1090
+ }
1091
+ if (suite.baseline) {
1092
+ logger(`\u2502 ${u.gray(`baseline: ${baseline ?? "<not set>"}`)}`);
978
1093
  }
979
1094
  }
980
1095
  }
@@ -1024,25 +1139,38 @@ function calculateBaselineScorerAverage(baseline, scorerName) {
1024
1139
  return sum / scores.length;
1025
1140
  }
1026
1141
  function calculateFlagDiff(suite) {
1027
- if (!suite.baseline || !suite.configFlags || suite.configFlags.length === 0) {
1142
+ if (!suite.configFlags || suite.configFlags.length === 0) {
1028
1143
  return [];
1029
1144
  }
1030
1145
  const diffs = [];
1031
1146
  const currentConfig = suite.flagConfig || {};
1032
- const baselineConfig = suite.baseline.flagConfig || {};
1147
+ const baselineConfig = suite.baseline?.flagConfig || {};
1148
+ const defaultConfig = suite.defaultFlagConfig || {};
1033
1149
  const currentFlat = flattenObject(currentConfig);
1034
1150
  const baselineFlat = flattenObject(baselineConfig);
1035
- const allKeys = /* @__PURE__ */ new Set([...Object.keys(currentFlat), ...Object.keys(baselineFlat)]);
1151
+ const defaultFlat = flattenObject(defaultConfig);
1152
+ const allKeys = /* @__PURE__ */ new Set([
1153
+ ...Object.keys(currentFlat),
1154
+ ...Object.keys(baselineFlat),
1155
+ ...Object.keys(defaultFlat)
1156
+ ]);
1036
1157
  for (const key of allKeys) {
1037
1158
  const isInScope = suite.configFlags.some((pattern) => key.startsWith(pattern));
1038
1159
  if (!isInScope) continue;
1039
1160
  const currentValue = currentFlat[key];
1040
1161
  const baselineValue = baselineFlat[key];
1041
- if (JSON.stringify(currentValue) !== JSON.stringify(baselineValue)) {
1162
+ const defaultValue = defaultFlat[key];
1163
+ const currentStr = currentValue !== void 0 ? JSON.stringify(currentValue) : void 0;
1164
+ const baselineStr = baselineValue !== void 0 ? JSON.stringify(baselineValue) : void 0;
1165
+ const defaultStr = defaultValue !== void 0 ? JSON.stringify(defaultValue) : void 0;
1166
+ const diffFromBaseline = suite.baseline && currentStr !== baselineStr;
1167
+ const diffFromDefault = currentStr !== defaultStr;
1168
+ if (diffFromBaseline || diffFromDefault) {
1042
1169
  diffs.push({
1043
1170
  flag: key,
1044
- current: currentValue !== void 0 ? JSON.stringify(currentValue) : void 0,
1045
- baseline: baselineValue !== void 0 ? JSON.stringify(baselineValue) : void 0
1171
+ current: currentStr,
1172
+ baseline: suite.baseline ? baselineStr : void 0,
1173
+ default: defaultStr
1046
1174
  });
1047
1175
  }
1048
1176
  }
@@ -1060,7 +1188,7 @@ function printFinalReport({
1060
1188
  logger("");
1061
1189
  for (const suite of suiteData) {
1062
1190
  const scorerAverages = calculateScorerAverages(suite);
1063
- const flagDiff = suite.baseline ? calculateFlagDiff(suite) : [];
1191
+ const flagDiff = calculateFlagDiff(suite);
1064
1192
  printSuiteBox({ suite, scorerAverages, calculateBaselineScorerAverage, flagDiff, logger });
1065
1193
  logger("");
1066
1194
  }
@@ -1069,8 +1197,17 @@ function printFinalReport({
1069
1197
  const anyRegistered = registrationStatus.some((s2) => s2.registered);
1070
1198
  const anyFailed = registrationStatus.some((s2) => !s2.registered);
1071
1199
  if (anyRegistered && orgId && config?.consoleEndpointUrl) {
1072
- logger("View full report:");
1073
- logger(`${config.consoleEndpointUrl}/${orgId}/ai-engineering/evaluations?runId=${runId}`);
1200
+ if (suiteData.length === 1) {
1201
+ const suite = suiteData[0];
1202
+ const baselineParam = suite.baseline?.traceId ? `?baselineId=${suite.baseline.traceId}` : "";
1203
+ logger("View eval result:");
1204
+ logger(
1205
+ `${config.consoleEndpointUrl}/${orgId}/ai-engineering/evaluations/${suite.name}/${suite.version}${baselineParam}`
1206
+ );
1207
+ } else {
1208
+ logger("View full report:");
1209
+ logger(`${config.consoleEndpointUrl}/${orgId}/ai-engineering/evaluations?runId=${runId}`);
1210
+ }
1074
1211
  } else if (isDebug) {
1075
1212
  logger(u.dim("Results not uploaded to Axiom (debug mode)"));
1076
1213
  } else {
@@ -1179,13 +1316,16 @@ var AxiomReporter = class {
1179
1316
  const overridesFlat = flattenObject(overrides2);
1180
1317
  flagConfig = dotNotationToNested({ ...defaultsFlat, ...overridesFlat });
1181
1318
  }
1319
+ const defaultFlagConfig = meta.evaluation.configEnd?.flags;
1182
1320
  this._suiteData.push({
1321
+ version: meta.evaluation.version,
1183
1322
  name: meta.evaluation.name,
1184
1323
  file: relativePath,
1185
1324
  duration: durationSeconds + "s",
1186
1325
  baseline: suiteBaseline || null,
1187
1326
  configFlags: meta.evaluation.configFlags,
1188
1327
  flagConfig,
1328
+ defaultFlagConfig,
1189
1329
  runId: meta.evaluation.runId,
1190
1330
  orgId: meta.evaluation.orgId,
1191
1331
  cases,
@@ -1293,6 +1433,7 @@ var import_defu = require("defu");
1293
1433
 
1294
1434
  // src/config/index.ts
1295
1435
  init_cjs_shims();
1436
+ var import_zod5 = require("zod");
1296
1437
 
1297
1438
  // src/cli/auth/index.ts
1298
1439
  init_cjs_shims();
@@ -1644,6 +1785,7 @@ function createPartialDefaults() {
1644
1785
  orgId,
1645
1786
  token,
1646
1787
  dataset: process.env.AXIOM_DATASET,
1788
+ flagSchema: void 0,
1647
1789
  instrumentation: null,
1648
1790
  include: [...DEFAULT_EVAL_INCLUDE],
1649
1791
  exclude: [],
@@ -1687,6 +1829,9 @@ function customMerger(target, source) {
1687
1829
  if (source?.eval && "include" in source.eval) {
1688
1830
  merged.eval.include = source.eval.include;
1689
1831
  }
1832
+ if (source?.eval && "flagSchema" in source.eval) {
1833
+ merged.eval.flagSchema = source.eval.flagSchema;
1834
+ }
1690
1835
  return merged;
1691
1836
  }
1692
1837
  async function loadConfig(cwd = process.cwd()) {
@@ -1766,11 +1911,11 @@ function setupEvalProvider(connection) {
1766
1911
  axiomProvider = new import_sdk_trace_node.NodeTracerProvider({
1767
1912
  resource: (0, import_resources.resourceFromAttributes)({
1768
1913
  ["service.name"]: "axiom",
1769
- ["service.version"]: "0.31.0"
1914
+ ["service.version"]: "0.32.0"
1770
1915
  }),
1771
1916
  spanProcessors: [processor]
1772
1917
  });
1773
- axiomTracer = axiomProvider.getTracer("axiom", "0.31.0");
1918
+ axiomTracer = axiomProvider.getTracer("axiom", "0.32.0");
1774
1919
  }
1775
1920
  async function initInstrumentation(config) {
1776
1921
  if (initialized) {
@@ -1782,7 +1927,7 @@ async function initInstrumentation(config) {
1782
1927
  }
1783
1928
  initializationPromise = (async () => {
1784
1929
  if (!config.enabled) {
1785
- axiomTracer = import_api10.trace.getTracer("axiom", "0.31.0");
1930
+ axiomTracer = import_api10.trace.getTracer("axiom", "0.32.0");
1786
1931
  initialized = true;
1787
1932
  return;
1788
1933
  }
@@ -1878,8 +2023,9 @@ var runVitest = async (dir, opts) => {
1878
2023
  ...opts.config,
1879
2024
  eval: {
1880
2025
  ...opts.config.eval,
1881
- // function can't be serialized, so we need to remove it
1882
- instrumentation: null
2026
+ // These can't be serialized, so we need to remove them
2027
+ instrumentation: null,
2028
+ flagSchema: null
1883
2029
  }
1884
2030
  };
1885
2031
  if (opts.debug) {
@@ -1898,35 +2044,41 @@ var runVitest = async (dir, opts) => {
1898
2044
  if (opts.list) {
1899
2045
  console.log(u.bgWhite(u.blackBright(" List mode ")));
1900
2046
  }
1901
- const vi = await (0, import_node.createVitest)("test", {
1902
- root: dir ? dir : process.cwd(),
1903
- mode: "test",
1904
- include: opts.include,
1905
- exclude: opts.exclude,
1906
- testNamePattern: opts.testNamePattern,
1907
- reporters: ["verbose", new AxiomReporter()],
1908
- environment: "node",
1909
- browser: void 0,
1910
- watch: opts.watch,
1911
- setupFiles: [],
1912
- // ignore user vitest.config.ts etc
1913
- name: "axiom:eval",
1914
- printConsoleTrace: true,
1915
- silent: false,
1916
- disableConsoleIntercept: true,
1917
- testTimeout: opts.config?.eval?.timeoutMs || 6e4,
1918
- globals: true,
1919
- runner: (0, import_node_path.resolve)(__dirname, "evals", "custom-runner.js"),
1920
- provide: {
1921
- baseline: opts.baseline,
1922
- debug: opts.debug,
1923
- list: opts.list,
1924
- overrides: opts.overrides,
1925
- axiomConfig: providedConfig,
1926
- runId: opts.runId,
1927
- consoleUrl: opts.consoleUrl
2047
+ const vi = await (0, import_node.createVitest)(
2048
+ "test",
2049
+ {
2050
+ root: dir ? dir : process.cwd(),
2051
+ mode: "test",
2052
+ include: opts.include,
2053
+ exclude: opts.exclude,
2054
+ testNamePattern: opts.testNamePattern,
2055
+ reporters: ["verbose", new AxiomReporter()],
2056
+ environment: "node",
2057
+ browser: void 0,
2058
+ watch: opts.watch,
2059
+ setupFiles: [],
2060
+ // ignore user vitest.config.ts etc
2061
+ name: "axiom:eval",
2062
+ printConsoleTrace: true,
2063
+ silent: false,
2064
+ disableConsoleIntercept: true,
2065
+ testTimeout: opts.config?.eval?.timeoutMs || 6e4,
2066
+ globals: true,
2067
+ runner: (0, import_node_path.resolve)(__dirname, "evals", "custom-runner.js"),
2068
+ provide: {
2069
+ baseline: opts.baseline,
2070
+ debug: opts.debug,
2071
+ list: opts.list,
2072
+ overrides: opts.overrides,
2073
+ axiomConfig: providedConfig,
2074
+ runId: opts.runId,
2075
+ consoleUrl: opts.consoleUrl
2076
+ }
2077
+ },
2078
+ {
2079
+ plugins: [(0, import_vite_tsconfig_paths.default)({ root: dir || process.cwd() })]
1928
2080
  }
1929
- });
2081
+ );
1930
2082
  if (opts.list) {
1931
2083
  const result = await vi.collect();
1932
2084
  printCollectedEvals(result, dir || process.cwd());
@@ -1951,7 +2103,7 @@ var runVitest = async (dir, opts) => {
1951
2103
  };
1952
2104
 
1953
2105
  // src/cli/commands/eval.command.ts
1954
- var import_node_fs2 = require("fs");
2106
+ var import_node_fs3 = require("fs");
1955
2107
 
1956
2108
  // src/cli/utils/eval-context-runner.ts
1957
2109
  init_cjs_shims();
@@ -1993,6 +2145,133 @@ async function runEvalWithContext(overrides2, runFn) {
1993
2145
  });
1994
2146
  }
1995
2147
 
2148
+ // src/cli/utils/parse-flag-overrides.ts
2149
+ init_cjs_shims();
2150
+ var import_zod6 = require("zod");
2151
+ var import_node_fs2 = require("fs");
2152
+ var import_node_path3 = require("path");
2153
+ var FLAG_RE = /^--flag\.([^=]+)(?:=(.*))?$/;
2154
+ var CONFIG_RE = /^--flags-config(?:=(.*))?$/;
2155
+ function ensureNoSpaceSeparatedSyntax(flagName, value, nextToken, flagType) {
2156
+ if (value === void 0 && nextToken !== void 0) {
2157
+ if (flagType === "flag" && !nextToken.startsWith("-") && nextToken !== "true" && nextToken !== "false") {
2158
+ console.error(`\u274C Invalid syntax: --flag.${flagName} ${nextToken}`);
2159
+ console.error(`\u{1F4A1} Use: --flag.${flagName}=${nextToken}`);
2160
+ process.exit(1);
2161
+ } else if (flagType === "config" && !nextToken.startsWith("-")) {
2162
+ console.error(`\u274C Invalid syntax: --flags-config ${nextToken}`);
2163
+ console.error(`\u{1F4A1} Use: --flags-config=${nextToken}`);
2164
+ process.exit(1);
2165
+ }
2166
+ }
2167
+ }
2168
+ function validateFlagOverrides(overrides2, flagSchema) {
2169
+ if (!flagSchema || Object.keys(overrides2).length === 0) {
2170
+ return;
2171
+ }
2172
+ const schema = flagSchema;
2173
+ for (const dotPath of Object.keys(overrides2)) {
2174
+ const segments = parsePath(dotPath);
2175
+ if (!isValidPath(schema, segments)) {
2176
+ console.error("\u274C Invalid CLI flags:");
2177
+ console.error(` \u2022 flag '${dotPath}': Invalid flag path`);
2178
+ process.exit(1);
2179
+ }
2180
+ }
2181
+ const nestedObject = dotNotationToNested(overrides2);
2182
+ const result = schema.strict().partial().safeParse(nestedObject);
2183
+ if (!result.success) {
2184
+ console.error("\u274C Invalid CLI flags:");
2185
+ console.error(formatZodErrors(result.error));
2186
+ const examples = generateFlagExamples(result.error);
2187
+ if (examples.length > 0) {
2188
+ console.error("\n\u{1F4A1} Valid examples:");
2189
+ examples.forEach((example) => console.error(` ${example}`));
2190
+ }
2191
+ process.exit(1);
2192
+ }
2193
+ }
2194
+ function coerceValue(raw) {
2195
+ if (raw === "true") return true;
2196
+ if (raw === "false") return false;
2197
+ const num = Number(raw);
2198
+ if (!Number.isNaN(num) && raw.trim() === num.toString()) {
2199
+ return num;
2200
+ }
2201
+ try {
2202
+ return JSON.parse(raw);
2203
+ } catch {
2204
+ return raw;
2205
+ }
2206
+ }
2207
+ function loadConfigFile(path3) {
2208
+ const abs = (0, import_node_path3.resolve)(process.cwd(), path3);
2209
+ try {
2210
+ const contents = (0, import_node_fs2.readFileSync)(abs, "utf8");
2211
+ const parsed = JSON.parse(contents);
2212
+ if (typeof parsed !== "object" || Array.isArray(parsed) || parsed === null) {
2213
+ console.error(
2214
+ `\u274C Flags config must be a JSON object, got ${Array.isArray(parsed) ? "array" : typeof parsed}`
2215
+ );
2216
+ process.exit(1);
2217
+ }
2218
+ return parsed;
2219
+ } catch (err) {
2220
+ console.error(`\u274C Could not read or parse flags config "${path3}": ${err.message}`);
2221
+ process.exit(1);
2222
+ }
2223
+ }
2224
+ function extractOverrides(argv) {
2225
+ const cleanedArgv2 = [];
2226
+ const overrides2 = {};
2227
+ let configPath = null;
2228
+ let hasCliFlags = false;
2229
+ let configPathCount = 0;
2230
+ for (let i = 0; i < argv.length; i++) {
2231
+ const token = argv[i];
2232
+ const configMatch = token.match(CONFIG_RE);
2233
+ const flagMatch = token.match(FLAG_RE);
2234
+ if (configMatch) {
2235
+ configPathCount++;
2236
+ if (configPathCount > 1) {
2237
+ console.error("\u274C Only one --flags-config can be supplied.");
2238
+ process.exit(1);
2239
+ }
2240
+ const value = configMatch[1];
2241
+ const nextToken = argv.length > i + 1 ? argv[i + 1] : void 0;
2242
+ ensureNoSpaceSeparatedSyntax("flags-config", value, nextToken, "config");
2243
+ if (!value) {
2244
+ console.error("\u274C --flags-config requires a file path");
2245
+ console.error("\u{1F4A1} Use: --flags-config=path/to/config.json");
2246
+ process.exit(1);
2247
+ }
2248
+ configPath = value;
2249
+ } else if (flagMatch) {
2250
+ hasCliFlags = true;
2251
+ const key = flagMatch[1];
2252
+ const value = flagMatch[2];
2253
+ const nextToken = argv.length > i + 1 ? argv[i + 1] : void 0;
2254
+ ensureNoSpaceSeparatedSyntax(key, value, nextToken, "flag");
2255
+ const finalValue = value === void 0 ? "true" : value;
2256
+ overrides2[key] = coerceValue(finalValue);
2257
+ } else {
2258
+ cleanedArgv2.push(token);
2259
+ }
2260
+ }
2261
+ if (configPath && hasCliFlags) {
2262
+ console.error("\u274C Cannot use both --flags-config and --flag.* arguments together.");
2263
+ console.error("Choose one approach:");
2264
+ console.error(" \u2022 Config file: --flags-config=my-flags.json");
2265
+ console.error(" \u2022 CLI flags: --flag.temperature=0.9 --flag.model=gpt-4o");
2266
+ process.exit(1);
2267
+ }
2268
+ if (configPath) {
2269
+ const configOverrides = loadConfigFile(configPath);
2270
+ return { cleanedArgv: cleanedArgv2, overrides: configOverrides };
2271
+ }
2272
+ return { cleanedArgv: cleanedArgv2, overrides: overrides2 };
2273
+ }
2274
+
1996
2275
  // src/cli/utils/glob-utils.ts
1997
2276
  init_cjs_shims();
1998
2277
  function isGlob(str) {
@@ -2043,6 +2322,7 @@ var loadEvalCommand = (program2, flagOverrides = {}) => {
2043
2322
  let testNamePattern;
2044
2323
  const isGlobPattern = isGlob(target);
2045
2324
  const { config: loadedConfig } = await loadConfig(".");
2325
+ validateFlagOverrides(flagOverrides, loadedConfig.eval.flagSchema);
2046
2326
  const config = {
2047
2327
  ...loadedConfig,
2048
2328
  eval: {
@@ -2057,7 +2337,7 @@ var loadEvalCommand = (program2, flagOverrides = {}) => {
2057
2337
  include = [target];
2058
2338
  } else {
2059
2339
  try {
2060
- const stat = (0, import_node_fs2.lstatSync)(target);
2340
+ const stat = (0, import_node_fs3.lstatSync)(target);
2061
2341
  if (stat.isDirectory()) {
2062
2342
  include = config?.eval?.include || [];
2063
2343
  } else {
@@ -2468,107 +2748,6 @@ function loadAuthCommand(program2) {
2468
2748
  loadAuthSwitchCommand(auth, program2);
2469
2749
  }
2470
2750
 
2471
- // src/cli/utils/parse-flag-overrides.ts
2472
- init_cjs_shims();
2473
- var import_zod5 = require("zod");
2474
- var import_node_fs3 = require("fs");
2475
- var import_node_path3 = require("path");
2476
- var FLAG_RE = /^--flag\.([^=]+)(?:=(.*))?$/;
2477
- var CONFIG_RE = /^--flags-config(?:=(.*))?$/;
2478
- function ensureNoSpaceSeparatedSyntax(flagName, value, nextToken, flagType) {
2479
- if (value === void 0 && nextToken !== void 0) {
2480
- if (flagType === "flag" && !nextToken.startsWith("-") && nextToken !== "true" && nextToken !== "false") {
2481
- console.error(`\u274C Invalid syntax: --flag.${flagName} ${nextToken}`);
2482
- console.error(`\u{1F4A1} Use: --flag.${flagName}=${nextToken}`);
2483
- process.exit(1);
2484
- } else if (flagType === "config" && !nextToken.startsWith("-")) {
2485
- console.error(`\u274C Invalid syntax: --flags-config ${nextToken}`);
2486
- console.error(`\u{1F4A1} Use: --flags-config=${nextToken}`);
2487
- process.exit(1);
2488
- }
2489
- }
2490
- }
2491
- function coerceValue(raw) {
2492
- if (raw === "true") return true;
2493
- if (raw === "false") return false;
2494
- const num = Number(raw);
2495
- if (!Number.isNaN(num) && raw.trim() === num.toString()) {
2496
- return num;
2497
- }
2498
- try {
2499
- return JSON.parse(raw);
2500
- } catch {
2501
- return raw;
2502
- }
2503
- }
2504
- function loadConfigFile(path3) {
2505
- const abs = (0, import_node_path3.resolve)(process.cwd(), path3);
2506
- try {
2507
- const contents = (0, import_node_fs3.readFileSync)(abs, "utf8");
2508
- const parsed = JSON.parse(contents);
2509
- if (typeof parsed !== "object" || Array.isArray(parsed) || parsed === null) {
2510
- console.error(
2511
- `\u274C Flags config must be a JSON object, got ${Array.isArray(parsed) ? "array" : typeof parsed}`
2512
- );
2513
- process.exit(1);
2514
- }
2515
- return parsed;
2516
- } catch (err) {
2517
- console.error(`\u274C Could not read or parse flags config "${path3}": ${err.message}`);
2518
- process.exit(1);
2519
- }
2520
- }
2521
- function extractOverrides(argv) {
2522
- const cleanedArgv2 = [];
2523
- const overrides2 = {};
2524
- let configPath = null;
2525
- let hasCliFlags = false;
2526
- let configPathCount = 0;
2527
- for (let i = 0; i < argv.length; i++) {
2528
- const token = argv[i];
2529
- const configMatch = token.match(CONFIG_RE);
2530
- const flagMatch = token.match(FLAG_RE);
2531
- if (configMatch) {
2532
- configPathCount++;
2533
- if (configPathCount > 1) {
2534
- console.error("\u274C Only one --flags-config can be supplied.");
2535
- process.exit(1);
2536
- }
2537
- const value = configMatch[1];
2538
- const nextToken = argv.length > i + 1 ? argv[i + 1] : void 0;
2539
- ensureNoSpaceSeparatedSyntax("flags-config", value, nextToken, "config");
2540
- if (!value) {
2541
- console.error("\u274C --flags-config requires a file path");
2542
- console.error("\u{1F4A1} Use: --flags-config=path/to/config.json");
2543
- process.exit(1);
2544
- }
2545
- configPath = value;
2546
- } else if (flagMatch) {
2547
- hasCliFlags = true;
2548
- const key = flagMatch[1];
2549
- const value = flagMatch[2];
2550
- const nextToken = argv.length > i + 1 ? argv[i + 1] : void 0;
2551
- ensureNoSpaceSeparatedSyntax(key, value, nextToken, "flag");
2552
- const finalValue = value === void 0 ? "true" : value;
2553
- overrides2[key] = coerceValue(finalValue);
2554
- } else {
2555
- cleanedArgv2.push(token);
2556
- }
2557
- }
2558
- if (configPath && hasCliFlags) {
2559
- console.error("\u274C Cannot use both --flags-config and --flag.* arguments together.");
2560
- console.error("Choose one approach:");
2561
- console.error(" \u2022 Config file: --flags-config=my-flags.json");
2562
- console.error(" \u2022 CLI flags: --flag.temperature=0.9 --flag.model=gpt-4o");
2563
- process.exit(1);
2564
- }
2565
- if (configPath) {
2566
- const configOverrides = loadConfigFile(configPath);
2567
- return { cleanedArgv: cleanedArgv2, overrides: configOverrides };
2568
- }
2569
- return { cleanedArgv: cleanedArgv2, overrides: overrides2 };
2570
- }
2571
-
2572
2751
  // src/bin.ts
2573
2752
  var import_env = __toESM(require("@next/env"), 1);
2574
2753
 
@@ -2578,7 +2757,7 @@ var import_commander2 = require("commander");
2578
2757
  var loadVersionCommand = (program2) => {
2579
2758
  return program2.addCommand(
2580
2759
  new import_commander2.Command("version").description("cli version").action(() => {
2581
- console.log("0.31.0");
2760
+ console.log("0.32.0");
2582
2761
  })
2583
2762
  );
2584
2763
  };
@@ -2588,7 +2767,7 @@ var { loadEnvConfig } = import_env.default;
2588
2767
  loadEnvConfig(process.cwd());
2589
2768
  var { cleanedArgv, overrides } = extractOverrides(process.argv.slice(2));
2590
2769
  var program = new import_commander3.Command();
2591
- program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.31.0");
2770
+ program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.32.0");
2592
2771
  program.hook("preAction", async (_, actionCommand) => {
2593
2772
  const commandName = actionCommand.name();
2594
2773
  const parentCommand = actionCommand.parent;