axiom 0.31.1 → 0.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/bin.cjs CHANGED
@@ -250,6 +250,101 @@ var import_zod3 = require("zod");
250
250
  // src/cli/utils/format-zod-errors.ts
251
251
  init_cjs_shims();
252
252
  var import_zod = require("zod");
253
+ function formatZodErrors(error) {
254
+ const issues = error.issues;
255
+ const messages = [];
256
+ for (const issue of issues) {
257
+ const path3 = issue.path.join(".");
258
+ const message = formatIssueMessage(issue, path3);
259
+ messages.push(` \u2022 ${message}`);
260
+ }
261
+ return messages.join("\n");
262
+ }
263
+ function formatIssueMessage(issue, path3) {
264
+ switch (issue.code) {
265
+ case "invalid_type":
266
+ return `flag '${path3}' expected ${issue.expected}, got ${JSON.stringify(issue.received)} (${typeof issue.received})`;
267
+ case "too_small":
268
+ if (issue.type === "number" || issue.origin === "number") {
269
+ return `flag '${path3}' must be >= ${issue.minimum}, got ${issue.received}`;
270
+ }
271
+ return `flag '${path3}' is too small: ${issue.message}`;
272
+ case "too_big":
273
+ if (issue.type === "number") {
274
+ return `flag '${path3}' must be <= ${issue.maximum}, got ${issue.received}`;
275
+ }
276
+ return `flag '${path3}' is too big: ${issue.message}`;
277
+ case "invalid_enum_value":
278
+ const options = issue.options.map((opt) => `"${opt}"`).join(", ");
279
+ return `flag '${path3}' must be one of: ${options}, got "${issue.received}"`;
280
+ case "invalid_value":
281
+ if (issue.values && Array.isArray(issue.values)) {
282
+ const values = issue.values.map((val) => `"${val}"`).join(", ");
283
+ return `flag '${path3}' must be one of: ${values}`;
284
+ }
285
+ return `flag '${path3}': ${issue.message}`;
286
+ case "unrecognized_keys":
287
+ const keys = issue.keys || [];
288
+ if (keys.length === 1) {
289
+ return `unrecognized flag '${keys[0]}'`;
290
+ } else if (keys.length > 1) {
291
+ const keysList = keys.map((key) => `'${key}'`).join(", ");
292
+ return `unrecognized flags ${keysList}`;
293
+ }
294
+ return `unrecognized keys in flags`;
295
+ case "custom":
296
+ return `flag '${path3}': ${issue.message}`;
297
+ default:
298
+ return `flag '${path3}': ${issue.message}`;
299
+ }
300
+ }
301
+ function generateFlagExamples(error) {
302
+ const examples = [];
303
+ for (const issue of error.issues) {
304
+ const path3 = issue.path.join(".");
305
+ const example = generateExampleForIssue(issue, path3);
306
+ if (example && !examples.includes(example)) {
307
+ examples.push(example);
308
+ }
309
+ }
310
+ return examples.slice(0, 3);
311
+ }
312
+ function generateExampleForIssue(issue, path3) {
313
+ switch (issue.code) {
314
+ case "invalid_type":
315
+ if (issue.expected === "number") {
316
+ return `--flag.${path3}=0.7`;
317
+ }
318
+ if (issue.expected === "boolean") {
319
+ return `--flag.${path3}=true`;
320
+ }
321
+ if (issue.expected === "string") {
322
+ return `--flag.${path3}="value"`;
323
+ }
324
+ break;
325
+ case "too_small":
326
+ if (typeof issue.minimum === "number" || typeof issue.minimum === "bigint") {
327
+ return `--flag.${path3}=${issue.minimum}`;
328
+ }
329
+ break;
330
+ case "too_big":
331
+ if (typeof issue.maximum === "number" || typeof issue.maximum === "bigint") {
332
+ return `--flag.${path3}=${issue.maximum}`;
333
+ }
334
+ break;
335
+ case "invalid_enum_value":
336
+ if (issue.options.length > 0) {
337
+ return `--flag.${path3}=${issue.options[0]}`;
338
+ }
339
+ break;
340
+ case "invalid_value":
341
+ if (issue.values && Array.isArray(issue.values) && issue.values.length > 0) {
342
+ return `--flag.${path3}=${String(issue.values[0])}`;
343
+ }
344
+ break;
345
+ }
346
+ return null;
347
+ }
253
348
 
254
349
  // src/util/dot-path.ts
255
350
  init_cjs_shims();
@@ -288,6 +383,27 @@ function flattenObject(obj, prefix = "") {
288
383
  }
289
384
  return result;
290
385
  }
386
+ function isValidPath(schema, segments) {
387
+ let currentSchema = schema;
388
+ for (let i = 0; i < segments.length; i++) {
389
+ const segment = segments[i];
390
+ if (!currentSchema.shape || !(segment in currentSchema.shape)) {
391
+ return false;
392
+ }
393
+ if (i < segments.length - 1) {
394
+ const nextSchema = currentSchema.shape[segment];
395
+ let unwrappedSchema = nextSchema;
396
+ while (unwrappedSchema?._def?.innerType || unwrappedSchema?._def?.schema) {
397
+ unwrappedSchema = unwrappedSchema._def.innerType || unwrappedSchema._def.schema;
398
+ }
399
+ if (!unwrappedSchema || unwrappedSchema._def?.type !== "object") {
400
+ return false;
401
+ }
402
+ currentSchema = unwrappedSchema;
403
+ }
404
+ }
405
+ return true;
406
+ }
291
407
 
292
408
  // src/app-scope.ts
293
409
  var import_api8 = require("@opentelemetry/api");
@@ -332,7 +448,7 @@ var import_api4 = require("@opentelemetry/api");
332
448
  // package.json
333
449
  var package_default = {
334
450
  name: "axiom",
335
- version: "0.31.1",
451
+ version: "0.32.0",
336
452
  type: "module",
337
453
  author: "Axiom, Inc.",
338
454
  contributors: [
@@ -939,22 +1055,17 @@ function printSuiteBox({
939
1055
  const avg = scorerAverages[scorerName];
940
1056
  const paddedName = scorerName.padEnd(maxNameLength);
941
1057
  const hasAllErrors = allCasesErrored(scorerName);
942
- if (suite.baseline) {
943
- const baselineAvg = calculateBaselineScorerAverage2(suite.baseline, scorerName);
944
- if (baselineAvg !== null) {
945
- const currentPercent = hasAllErrors ? u.dim("N/A") : formatPercentage(avg);
946
- const baselinePercent = formatPercentage(baselineAvg);
947
- const { text: diffText, color: diffColor } = formatDiff(avg, baselineAvg);
948
- const paddedBaseline = baselinePercent.padStart(7);
949
- const paddedCurrent = hasAllErrors ? currentPercent : currentPercent.padStart(7);
950
- const paddedDiff = hasAllErrors ? u.dim("(all cases failed)") : diffText.padStart(8);
951
- logger(
952
- `\u2502 ${paddedName} ${u.blueBright(paddedBaseline)} \u2192 ${hasAllErrors ? paddedCurrent : u.magentaBright(paddedCurrent)} (${hasAllErrors ? paddedDiff : diffColor(paddedDiff)})`
953
- );
954
- } else {
955
- const currentPercent = hasAllErrors ? u.red("N/A (all cases failed)") : formatPercentage(avg);
956
- logger(`\u2502 \u2022 ${paddedName} ${currentPercent}`);
957
- }
1058
+ const baselineAvg = suite.baseline ? calculateBaselineScorerAverage2(suite.baseline, scorerName) : null;
1059
+ if (baselineAvg !== null) {
1060
+ const currentPercent = hasAllErrors ? u.dim("N/A") : formatPercentage(avg);
1061
+ const baselinePercent = formatPercentage(baselineAvg);
1062
+ const { text: diffText, color: diffColor } = formatDiff(avg, baselineAvg);
1063
+ const paddedBaseline = baselinePercent.padStart(7);
1064
+ const paddedCurrent = hasAllErrors ? currentPercent : currentPercent.padStart(7);
1065
+ const diffDisplay = hasAllErrors ? u.dim("all cases failed") : diffColor(diffText.padStart(8));
1066
+ logger(
1067
+ `\u2502 ${paddedName} ${u.blueBright(paddedBaseline)} \u2192 ${hasAllErrors ? paddedCurrent : u.magentaBright(paddedCurrent)} (${diffDisplay})`
1068
+ );
958
1069
  } else {
959
1070
  const currentPercent = hasAllErrors ? u.red("N/A (all cases failed)") : formatPercentage(avg);
960
1071
  logger(`\u2502 \u2022 ${paddedName} ${currentPercent}`);
@@ -969,14 +1080,16 @@ function printSuiteBox({
969
1080
  } else {
970
1081
  logger(`\u2502 Baseline: ${u.gray("(none)")}`);
971
1082
  }
972
- if (suite.baseline) {
973
- const hasConfigChanges = flagDiff.length > 0;
974
- logger("\u2502 Config changes:", hasConfigChanges ? "" : u.gray("(none)"));
975
- if (hasConfigChanges) {
976
- for (const { flag, current, baseline } of flagDiff) {
977
- logger(
978
- `\u2502 \u2022 ${flag}: ${current ?? "<not set>"} ${u.gray(`(baseline: ${baseline ?? "<not set>"})`)}`
979
- );
1083
+ const hasConfigChanges = flagDiff.length > 0;
1084
+ logger("\u2502 Config changes:", hasConfigChanges ? "" : u.gray("(none)"));
1085
+ if (hasConfigChanges) {
1086
+ for (const { flag, current, baseline, default: defaultVal } of flagDiff) {
1087
+ logger(`\u2502 \u2022 ${flag}: ${current ?? "<not set>"}`);
1088
+ if (defaultVal !== void 0) {
1089
+ logger(`\u2502 ${u.gray(`default: ${defaultVal}`)}`);
1090
+ }
1091
+ if (suite.baseline) {
1092
+ logger(`\u2502 ${u.gray(`baseline: ${baseline ?? "<not set>"}`)}`);
980
1093
  }
981
1094
  }
982
1095
  }
@@ -1026,25 +1139,38 @@ function calculateBaselineScorerAverage(baseline, scorerName) {
1026
1139
  return sum / scores.length;
1027
1140
  }
1028
1141
  function calculateFlagDiff(suite) {
1029
- if (!suite.baseline || !suite.configFlags || suite.configFlags.length === 0) {
1142
+ if (!suite.configFlags || suite.configFlags.length === 0) {
1030
1143
  return [];
1031
1144
  }
1032
1145
  const diffs = [];
1033
1146
  const currentConfig = suite.flagConfig || {};
1034
- const baselineConfig = suite.baseline.flagConfig || {};
1147
+ const baselineConfig = suite.baseline?.flagConfig || {};
1148
+ const defaultConfig = suite.defaultFlagConfig || {};
1035
1149
  const currentFlat = flattenObject(currentConfig);
1036
1150
  const baselineFlat = flattenObject(baselineConfig);
1037
- const allKeys = /* @__PURE__ */ new Set([...Object.keys(currentFlat), ...Object.keys(baselineFlat)]);
1151
+ const defaultFlat = flattenObject(defaultConfig);
1152
+ const allKeys = /* @__PURE__ */ new Set([
1153
+ ...Object.keys(currentFlat),
1154
+ ...Object.keys(baselineFlat),
1155
+ ...Object.keys(defaultFlat)
1156
+ ]);
1038
1157
  for (const key of allKeys) {
1039
1158
  const isInScope = suite.configFlags.some((pattern) => key.startsWith(pattern));
1040
1159
  if (!isInScope) continue;
1041
1160
  const currentValue = currentFlat[key];
1042
1161
  const baselineValue = baselineFlat[key];
1043
- if (JSON.stringify(currentValue) !== JSON.stringify(baselineValue)) {
1162
+ const defaultValue = defaultFlat[key];
1163
+ const currentStr = currentValue !== void 0 ? JSON.stringify(currentValue) : void 0;
1164
+ const baselineStr = baselineValue !== void 0 ? JSON.stringify(baselineValue) : void 0;
1165
+ const defaultStr = defaultValue !== void 0 ? JSON.stringify(defaultValue) : void 0;
1166
+ const diffFromBaseline = suite.baseline && currentStr !== baselineStr;
1167
+ const diffFromDefault = currentStr !== defaultStr;
1168
+ if (diffFromBaseline || diffFromDefault) {
1044
1169
  diffs.push({
1045
1170
  flag: key,
1046
- current: currentValue !== void 0 ? JSON.stringify(currentValue) : void 0,
1047
- baseline: baselineValue !== void 0 ? JSON.stringify(baselineValue) : void 0
1171
+ current: currentStr,
1172
+ baseline: suite.baseline ? baselineStr : void 0,
1173
+ default: defaultStr
1048
1174
  });
1049
1175
  }
1050
1176
  }
@@ -1062,7 +1188,7 @@ function printFinalReport({
1062
1188
  logger("");
1063
1189
  for (const suite of suiteData) {
1064
1190
  const scorerAverages = calculateScorerAverages(suite);
1065
- const flagDiff = suite.baseline ? calculateFlagDiff(suite) : [];
1191
+ const flagDiff = calculateFlagDiff(suite);
1066
1192
  printSuiteBox({ suite, scorerAverages, calculateBaselineScorerAverage, flagDiff, logger });
1067
1193
  logger("");
1068
1194
  }
@@ -1071,8 +1197,17 @@ function printFinalReport({
1071
1197
  const anyRegistered = registrationStatus.some((s2) => s2.registered);
1072
1198
  const anyFailed = registrationStatus.some((s2) => !s2.registered);
1073
1199
  if (anyRegistered && orgId && config?.consoleEndpointUrl) {
1074
- logger("View full report:");
1075
- logger(`${config.consoleEndpointUrl}/${orgId}/ai-engineering/evaluations?runId=${runId}`);
1200
+ if (suiteData.length === 1) {
1201
+ const suite = suiteData[0];
1202
+ const baselineParam = suite.baseline?.traceId ? `?baselineId=${suite.baseline.traceId}` : "";
1203
+ logger("View eval result:");
1204
+ logger(
1205
+ `${config.consoleEndpointUrl}/${orgId}/ai-engineering/evaluations/${suite.name}/${suite.version}${baselineParam}`
1206
+ );
1207
+ } else {
1208
+ logger("View full report:");
1209
+ logger(`${config.consoleEndpointUrl}/${orgId}/ai-engineering/evaluations?runId=${runId}`);
1210
+ }
1076
1211
  } else if (isDebug) {
1077
1212
  logger(u.dim("Results not uploaded to Axiom (debug mode)"));
1078
1213
  } else {
@@ -1181,13 +1316,16 @@ var AxiomReporter = class {
1181
1316
  const overridesFlat = flattenObject(overrides2);
1182
1317
  flagConfig = dotNotationToNested({ ...defaultsFlat, ...overridesFlat });
1183
1318
  }
1319
+ const defaultFlagConfig = meta.evaluation.configEnd?.flags;
1184
1320
  this._suiteData.push({
1321
+ version: meta.evaluation.version,
1185
1322
  name: meta.evaluation.name,
1186
1323
  file: relativePath,
1187
1324
  duration: durationSeconds + "s",
1188
1325
  baseline: suiteBaseline || null,
1189
1326
  configFlags: meta.evaluation.configFlags,
1190
1327
  flagConfig,
1328
+ defaultFlagConfig,
1191
1329
  runId: meta.evaluation.runId,
1192
1330
  orgId: meta.evaluation.orgId,
1193
1331
  cases,
@@ -1295,6 +1433,7 @@ var import_defu = require("defu");
1295
1433
 
1296
1434
  // src/config/index.ts
1297
1435
  init_cjs_shims();
1436
+ var import_zod5 = require("zod");
1298
1437
 
1299
1438
  // src/cli/auth/index.ts
1300
1439
  init_cjs_shims();
@@ -1646,6 +1785,7 @@ function createPartialDefaults() {
1646
1785
  orgId,
1647
1786
  token,
1648
1787
  dataset: process.env.AXIOM_DATASET,
1788
+ flagSchema: void 0,
1649
1789
  instrumentation: null,
1650
1790
  include: [...DEFAULT_EVAL_INCLUDE],
1651
1791
  exclude: [],
@@ -1689,6 +1829,9 @@ function customMerger(target, source) {
1689
1829
  if (source?.eval && "include" in source.eval) {
1690
1830
  merged.eval.include = source.eval.include;
1691
1831
  }
1832
+ if (source?.eval && "flagSchema" in source.eval) {
1833
+ merged.eval.flagSchema = source.eval.flagSchema;
1834
+ }
1692
1835
  return merged;
1693
1836
  }
1694
1837
  async function loadConfig(cwd = process.cwd()) {
@@ -1768,11 +1911,11 @@ function setupEvalProvider(connection) {
1768
1911
  axiomProvider = new import_sdk_trace_node.NodeTracerProvider({
1769
1912
  resource: (0, import_resources.resourceFromAttributes)({
1770
1913
  ["service.name"]: "axiom",
1771
- ["service.version"]: "0.31.1"
1914
+ ["service.version"]: "0.32.0"
1772
1915
  }),
1773
1916
  spanProcessors: [processor]
1774
1917
  });
1775
- axiomTracer = axiomProvider.getTracer("axiom", "0.31.1");
1918
+ axiomTracer = axiomProvider.getTracer("axiom", "0.32.0");
1776
1919
  }
1777
1920
  async function initInstrumentation(config) {
1778
1921
  if (initialized) {
@@ -1784,7 +1927,7 @@ async function initInstrumentation(config) {
1784
1927
  }
1785
1928
  initializationPromise = (async () => {
1786
1929
  if (!config.enabled) {
1787
- axiomTracer = import_api10.trace.getTracer("axiom", "0.31.1");
1930
+ axiomTracer = import_api10.trace.getTracer("axiom", "0.32.0");
1788
1931
  initialized = true;
1789
1932
  return;
1790
1933
  }
@@ -1880,8 +2023,9 @@ var runVitest = async (dir, opts) => {
1880
2023
  ...opts.config,
1881
2024
  eval: {
1882
2025
  ...opts.config.eval,
1883
- // function can't be serialized, so we need to remove it
1884
- instrumentation: null
2026
+ // These can't be serialized, so we need to remove them
2027
+ instrumentation: null,
2028
+ flagSchema: null
1885
2029
  }
1886
2030
  };
1887
2031
  if (opts.debug) {
@@ -1959,7 +2103,7 @@ var runVitest = async (dir, opts) => {
1959
2103
  };
1960
2104
 
1961
2105
  // src/cli/commands/eval.command.ts
1962
- var import_node_fs2 = require("fs");
2106
+ var import_node_fs3 = require("fs");
1963
2107
 
1964
2108
  // src/cli/utils/eval-context-runner.ts
1965
2109
  init_cjs_shims();
@@ -2001,6 +2145,133 @@ async function runEvalWithContext(overrides2, runFn) {
2001
2145
  });
2002
2146
  }
2003
2147
 
2148
+ // src/cli/utils/parse-flag-overrides.ts
2149
+ init_cjs_shims();
2150
+ var import_zod6 = require("zod");
2151
+ var import_node_fs2 = require("fs");
2152
+ var import_node_path3 = require("path");
2153
+ var FLAG_RE = /^--flag\.([^=]+)(?:=(.*))?$/;
2154
+ var CONFIG_RE = /^--flags-config(?:=(.*))?$/;
2155
+ function ensureNoSpaceSeparatedSyntax(flagName, value, nextToken, flagType) {
2156
+ if (value === void 0 && nextToken !== void 0) {
2157
+ if (flagType === "flag" && !nextToken.startsWith("-") && nextToken !== "true" && nextToken !== "false") {
2158
+ console.error(`\u274C Invalid syntax: --flag.${flagName} ${nextToken}`);
2159
+ console.error(`\u{1F4A1} Use: --flag.${flagName}=${nextToken}`);
2160
+ process.exit(1);
2161
+ } else if (flagType === "config" && !nextToken.startsWith("-")) {
2162
+ console.error(`\u274C Invalid syntax: --flags-config ${nextToken}`);
2163
+ console.error(`\u{1F4A1} Use: --flags-config=${nextToken}`);
2164
+ process.exit(1);
2165
+ }
2166
+ }
2167
+ }
2168
+ function validateFlagOverrides(overrides2, flagSchema) {
2169
+ if (!flagSchema || Object.keys(overrides2).length === 0) {
2170
+ return;
2171
+ }
2172
+ const schema = flagSchema;
2173
+ for (const dotPath of Object.keys(overrides2)) {
2174
+ const segments = parsePath(dotPath);
2175
+ if (!isValidPath(schema, segments)) {
2176
+ console.error("\u274C Invalid CLI flags:");
2177
+ console.error(` \u2022 flag '${dotPath}': Invalid flag path`);
2178
+ process.exit(1);
2179
+ }
2180
+ }
2181
+ const nestedObject = dotNotationToNested(overrides2);
2182
+ const result = schema.strict().partial().safeParse(nestedObject);
2183
+ if (!result.success) {
2184
+ console.error("\u274C Invalid CLI flags:");
2185
+ console.error(formatZodErrors(result.error));
2186
+ const examples = generateFlagExamples(result.error);
2187
+ if (examples.length > 0) {
2188
+ console.error("\n\u{1F4A1} Valid examples:");
2189
+ examples.forEach((example) => console.error(` ${example}`));
2190
+ }
2191
+ process.exit(1);
2192
+ }
2193
+ }
2194
+ function coerceValue(raw) {
2195
+ if (raw === "true") return true;
2196
+ if (raw === "false") return false;
2197
+ const num = Number(raw);
2198
+ if (!Number.isNaN(num) && raw.trim() === num.toString()) {
2199
+ return num;
2200
+ }
2201
+ try {
2202
+ return JSON.parse(raw);
2203
+ } catch {
2204
+ return raw;
2205
+ }
2206
+ }
2207
+ function loadConfigFile(path3) {
2208
+ const abs = (0, import_node_path3.resolve)(process.cwd(), path3);
2209
+ try {
2210
+ const contents = (0, import_node_fs2.readFileSync)(abs, "utf8");
2211
+ const parsed = JSON.parse(contents);
2212
+ if (typeof parsed !== "object" || Array.isArray(parsed) || parsed === null) {
2213
+ console.error(
2214
+ `\u274C Flags config must be a JSON object, got ${Array.isArray(parsed) ? "array" : typeof parsed}`
2215
+ );
2216
+ process.exit(1);
2217
+ }
2218
+ return parsed;
2219
+ } catch (err) {
2220
+ console.error(`\u274C Could not read or parse flags config "${path3}": ${err.message}`);
2221
+ process.exit(1);
2222
+ }
2223
+ }
2224
+ function extractOverrides(argv) {
2225
+ const cleanedArgv2 = [];
2226
+ const overrides2 = {};
2227
+ let configPath = null;
2228
+ let hasCliFlags = false;
2229
+ let configPathCount = 0;
2230
+ for (let i = 0; i < argv.length; i++) {
2231
+ const token = argv[i];
2232
+ const configMatch = token.match(CONFIG_RE);
2233
+ const flagMatch = token.match(FLAG_RE);
2234
+ if (configMatch) {
2235
+ configPathCount++;
2236
+ if (configPathCount > 1) {
2237
+ console.error("\u274C Only one --flags-config can be supplied.");
2238
+ process.exit(1);
2239
+ }
2240
+ const value = configMatch[1];
2241
+ const nextToken = argv.length > i + 1 ? argv[i + 1] : void 0;
2242
+ ensureNoSpaceSeparatedSyntax("flags-config", value, nextToken, "config");
2243
+ if (!value) {
2244
+ console.error("\u274C --flags-config requires a file path");
2245
+ console.error("\u{1F4A1} Use: --flags-config=path/to/config.json");
2246
+ process.exit(1);
2247
+ }
2248
+ configPath = value;
2249
+ } else if (flagMatch) {
2250
+ hasCliFlags = true;
2251
+ const key = flagMatch[1];
2252
+ const value = flagMatch[2];
2253
+ const nextToken = argv.length > i + 1 ? argv[i + 1] : void 0;
2254
+ ensureNoSpaceSeparatedSyntax(key, value, nextToken, "flag");
2255
+ const finalValue = value === void 0 ? "true" : value;
2256
+ overrides2[key] = coerceValue(finalValue);
2257
+ } else {
2258
+ cleanedArgv2.push(token);
2259
+ }
2260
+ }
2261
+ if (configPath && hasCliFlags) {
2262
+ console.error("\u274C Cannot use both --flags-config and --flag.* arguments together.");
2263
+ console.error("Choose one approach:");
2264
+ console.error(" \u2022 Config file: --flags-config=my-flags.json");
2265
+ console.error(" \u2022 CLI flags: --flag.temperature=0.9 --flag.model=gpt-4o");
2266
+ process.exit(1);
2267
+ }
2268
+ if (configPath) {
2269
+ const configOverrides = loadConfigFile(configPath);
2270
+ return { cleanedArgv: cleanedArgv2, overrides: configOverrides };
2271
+ }
2272
+ return { cleanedArgv: cleanedArgv2, overrides: overrides2 };
2273
+ }
2274
+
2004
2275
  // src/cli/utils/glob-utils.ts
2005
2276
  init_cjs_shims();
2006
2277
  function isGlob(str) {
@@ -2051,6 +2322,7 @@ var loadEvalCommand = (program2, flagOverrides = {}) => {
2051
2322
  let testNamePattern;
2052
2323
  const isGlobPattern = isGlob(target);
2053
2324
  const { config: loadedConfig } = await loadConfig(".");
2325
+ validateFlagOverrides(flagOverrides, loadedConfig.eval.flagSchema);
2054
2326
  const config = {
2055
2327
  ...loadedConfig,
2056
2328
  eval: {
@@ -2065,7 +2337,7 @@ var loadEvalCommand = (program2, flagOverrides = {}) => {
2065
2337
  include = [target];
2066
2338
  } else {
2067
2339
  try {
2068
- const stat = (0, import_node_fs2.lstatSync)(target);
2340
+ const stat = (0, import_node_fs3.lstatSync)(target);
2069
2341
  if (stat.isDirectory()) {
2070
2342
  include = config?.eval?.include || [];
2071
2343
  } else {
@@ -2476,107 +2748,6 @@ function loadAuthCommand(program2) {
2476
2748
  loadAuthSwitchCommand(auth, program2);
2477
2749
  }
2478
2750
 
2479
- // src/cli/utils/parse-flag-overrides.ts
2480
- init_cjs_shims();
2481
- var import_zod5 = require("zod");
2482
- var import_node_fs3 = require("fs");
2483
- var import_node_path3 = require("path");
2484
- var FLAG_RE = /^--flag\.([^=]+)(?:=(.*))?$/;
2485
- var CONFIG_RE = /^--flags-config(?:=(.*))?$/;
2486
- function ensureNoSpaceSeparatedSyntax(flagName, value, nextToken, flagType) {
2487
- if (value === void 0 && nextToken !== void 0) {
2488
- if (flagType === "flag" && !nextToken.startsWith("-") && nextToken !== "true" && nextToken !== "false") {
2489
- console.error(`\u274C Invalid syntax: --flag.${flagName} ${nextToken}`);
2490
- console.error(`\u{1F4A1} Use: --flag.${flagName}=${nextToken}`);
2491
- process.exit(1);
2492
- } else if (flagType === "config" && !nextToken.startsWith("-")) {
2493
- console.error(`\u274C Invalid syntax: --flags-config ${nextToken}`);
2494
- console.error(`\u{1F4A1} Use: --flags-config=${nextToken}`);
2495
- process.exit(1);
2496
- }
2497
- }
2498
- }
2499
- function coerceValue(raw) {
2500
- if (raw === "true") return true;
2501
- if (raw === "false") return false;
2502
- const num = Number(raw);
2503
- if (!Number.isNaN(num) && raw.trim() === num.toString()) {
2504
- return num;
2505
- }
2506
- try {
2507
- return JSON.parse(raw);
2508
- } catch {
2509
- return raw;
2510
- }
2511
- }
2512
- function loadConfigFile(path3) {
2513
- const abs = (0, import_node_path3.resolve)(process.cwd(), path3);
2514
- try {
2515
- const contents = (0, import_node_fs3.readFileSync)(abs, "utf8");
2516
- const parsed = JSON.parse(contents);
2517
- if (typeof parsed !== "object" || Array.isArray(parsed) || parsed === null) {
2518
- console.error(
2519
- `\u274C Flags config must be a JSON object, got ${Array.isArray(parsed) ? "array" : typeof parsed}`
2520
- );
2521
- process.exit(1);
2522
- }
2523
- return parsed;
2524
- } catch (err) {
2525
- console.error(`\u274C Could not read or parse flags config "${path3}": ${err.message}`);
2526
- process.exit(1);
2527
- }
2528
- }
2529
- function extractOverrides(argv) {
2530
- const cleanedArgv2 = [];
2531
- const overrides2 = {};
2532
- let configPath = null;
2533
- let hasCliFlags = false;
2534
- let configPathCount = 0;
2535
- for (let i = 0; i < argv.length; i++) {
2536
- const token = argv[i];
2537
- const configMatch = token.match(CONFIG_RE);
2538
- const flagMatch = token.match(FLAG_RE);
2539
- if (configMatch) {
2540
- configPathCount++;
2541
- if (configPathCount > 1) {
2542
- console.error("\u274C Only one --flags-config can be supplied.");
2543
- process.exit(1);
2544
- }
2545
- const value = configMatch[1];
2546
- const nextToken = argv.length > i + 1 ? argv[i + 1] : void 0;
2547
- ensureNoSpaceSeparatedSyntax("flags-config", value, nextToken, "config");
2548
- if (!value) {
2549
- console.error("\u274C --flags-config requires a file path");
2550
- console.error("\u{1F4A1} Use: --flags-config=path/to/config.json");
2551
- process.exit(1);
2552
- }
2553
- configPath = value;
2554
- } else if (flagMatch) {
2555
- hasCliFlags = true;
2556
- const key = flagMatch[1];
2557
- const value = flagMatch[2];
2558
- const nextToken = argv.length > i + 1 ? argv[i + 1] : void 0;
2559
- ensureNoSpaceSeparatedSyntax(key, value, nextToken, "flag");
2560
- const finalValue = value === void 0 ? "true" : value;
2561
- overrides2[key] = coerceValue(finalValue);
2562
- } else {
2563
- cleanedArgv2.push(token);
2564
- }
2565
- }
2566
- if (configPath && hasCliFlags) {
2567
- console.error("\u274C Cannot use both --flags-config and --flag.* arguments together.");
2568
- console.error("Choose one approach:");
2569
- console.error(" \u2022 Config file: --flags-config=my-flags.json");
2570
- console.error(" \u2022 CLI flags: --flag.temperature=0.9 --flag.model=gpt-4o");
2571
- process.exit(1);
2572
- }
2573
- if (configPath) {
2574
- const configOverrides = loadConfigFile(configPath);
2575
- return { cleanedArgv: cleanedArgv2, overrides: configOverrides };
2576
- }
2577
- return { cleanedArgv: cleanedArgv2, overrides: overrides2 };
2578
- }
2579
-
2580
2751
  // src/bin.ts
2581
2752
  var import_env = __toESM(require("@next/env"), 1);
2582
2753
 
@@ -2586,7 +2757,7 @@ var import_commander2 = require("commander");
2586
2757
  var loadVersionCommand = (program2) => {
2587
2758
  return program2.addCommand(
2588
2759
  new import_commander2.Command("version").description("cli version").action(() => {
2589
- console.log("0.31.1");
2760
+ console.log("0.32.0");
2590
2761
  })
2591
2762
  );
2592
2763
  };
@@ -2596,7 +2767,7 @@ var { loadEnvConfig } = import_env.default;
2596
2767
  loadEnvConfig(process.cwd());
2597
2768
  var { cleanedArgv, overrides } = extractOverrides(process.argv.slice(2));
2598
2769
  var program = new import_commander3.Command();
2599
- program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.31.1");
2770
+ program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.32.0");
2600
2771
  program.hook("preAction", async (_, actionCommand) => {
2601
2772
  const commandName = actionCommand.name();
2602
2773
  const parentCommand = actionCommand.parent;