axiom 0.31.0 → 0.32.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin.cjs +352 -173
- package/dist/bin.cjs.map +1 -1
- package/dist/bin.js +12 -112
- package/dist/bin.js.map +1 -1
- package/dist/{chunk-DI3RSTOC.js → chunk-BKMC455O.js} +235 -69
- package/dist/chunk-BKMC455O.js.map +1 -0
- package/dist/{chunk-H4WXWPZO.js → chunk-C5XDEFQ5.js} +3 -1
- package/dist/chunk-C5XDEFQ5.js.map +1 -0
- package/dist/{chunk-JP7CME2X.js → chunk-RVLENV5V.js} +10 -3
- package/dist/chunk-RVLENV5V.js.map +1 -0
- package/dist/{config-DPgFsZ62.d.cts → config-Amf-IRuD.d.cts} +23 -0
- package/dist/{config-DPgFsZ62.d.ts → config-Amf-IRuD.d.ts} +23 -0
- package/dist/config.cjs +1 -0
- package/dist/config.cjs.map +1 -1
- package/dist/config.d.cts +2 -1
- package/dist/config.d.ts +2 -1
- package/dist/config.js +1 -1
- package/dist/evals.cjs +95 -48
- package/dist/evals.cjs.map +1 -1
- package/dist/evals.d.cts +29 -25
- package/dist/evals.d.ts +29 -25
- package/dist/evals.js +22 -12
- package/dist/evals.js.map +1 -1
- package/dist/index.cjs +5 -2
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +1 -1
- package/package.json +4 -3
- package/dist/chunk-DI3RSTOC.js.map +0 -1
- package/dist/chunk-H4WXWPZO.js.map +0 -1
- package/dist/chunk-JP7CME2X.js.map +0 -1
package/dist/bin.cjs
CHANGED
|
@@ -153,6 +153,7 @@ var import_node_path = require("path");
|
|
|
153
153
|
var import_node_fs = require("fs");
|
|
154
154
|
var import_node_os = require("os");
|
|
155
155
|
var import_node_path2 = __toESM(require("path"), 1);
|
|
156
|
+
var import_vite_tsconfig_paths = __toESM(require("vite-tsconfig-paths"), 1);
|
|
156
157
|
var import_node = require("vitest/node");
|
|
157
158
|
|
|
158
159
|
// src/evals/reporter.ts
|
|
@@ -249,6 +250,101 @@ var import_zod3 = require("zod");
|
|
|
249
250
|
// src/cli/utils/format-zod-errors.ts
|
|
250
251
|
init_cjs_shims();
|
|
251
252
|
var import_zod = require("zod");
|
|
253
|
+
function formatZodErrors(error) {
|
|
254
|
+
const issues = error.issues;
|
|
255
|
+
const messages = [];
|
|
256
|
+
for (const issue of issues) {
|
|
257
|
+
const path3 = issue.path.join(".");
|
|
258
|
+
const message = formatIssueMessage(issue, path3);
|
|
259
|
+
messages.push(` \u2022 ${message}`);
|
|
260
|
+
}
|
|
261
|
+
return messages.join("\n");
|
|
262
|
+
}
|
|
263
|
+
function formatIssueMessage(issue, path3) {
|
|
264
|
+
switch (issue.code) {
|
|
265
|
+
case "invalid_type":
|
|
266
|
+
return `flag '${path3}' expected ${issue.expected}, got ${JSON.stringify(issue.received)} (${typeof issue.received})`;
|
|
267
|
+
case "too_small":
|
|
268
|
+
if (issue.type === "number" || issue.origin === "number") {
|
|
269
|
+
return `flag '${path3}' must be >= ${issue.minimum}, got ${issue.received}`;
|
|
270
|
+
}
|
|
271
|
+
return `flag '${path3}' is too small: ${issue.message}`;
|
|
272
|
+
case "too_big":
|
|
273
|
+
if (issue.type === "number") {
|
|
274
|
+
return `flag '${path3}' must be <= ${issue.maximum}, got ${issue.received}`;
|
|
275
|
+
}
|
|
276
|
+
return `flag '${path3}' is too big: ${issue.message}`;
|
|
277
|
+
case "invalid_enum_value":
|
|
278
|
+
const options = issue.options.map((opt) => `"${opt}"`).join(", ");
|
|
279
|
+
return `flag '${path3}' must be one of: ${options}, got "${issue.received}"`;
|
|
280
|
+
case "invalid_value":
|
|
281
|
+
if (issue.values && Array.isArray(issue.values)) {
|
|
282
|
+
const values = issue.values.map((val) => `"${val}"`).join(", ");
|
|
283
|
+
return `flag '${path3}' must be one of: ${values}`;
|
|
284
|
+
}
|
|
285
|
+
return `flag '${path3}': ${issue.message}`;
|
|
286
|
+
case "unrecognized_keys":
|
|
287
|
+
const keys = issue.keys || [];
|
|
288
|
+
if (keys.length === 1) {
|
|
289
|
+
return `unrecognized flag '${keys[0]}'`;
|
|
290
|
+
} else if (keys.length > 1) {
|
|
291
|
+
const keysList = keys.map((key) => `'${key}'`).join(", ");
|
|
292
|
+
return `unrecognized flags ${keysList}`;
|
|
293
|
+
}
|
|
294
|
+
return `unrecognized keys in flags`;
|
|
295
|
+
case "custom":
|
|
296
|
+
return `flag '${path3}': ${issue.message}`;
|
|
297
|
+
default:
|
|
298
|
+
return `flag '${path3}': ${issue.message}`;
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
function generateFlagExamples(error) {
|
|
302
|
+
const examples = [];
|
|
303
|
+
for (const issue of error.issues) {
|
|
304
|
+
const path3 = issue.path.join(".");
|
|
305
|
+
const example = generateExampleForIssue(issue, path3);
|
|
306
|
+
if (example && !examples.includes(example)) {
|
|
307
|
+
examples.push(example);
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
return examples.slice(0, 3);
|
|
311
|
+
}
|
|
312
|
+
function generateExampleForIssue(issue, path3) {
|
|
313
|
+
switch (issue.code) {
|
|
314
|
+
case "invalid_type":
|
|
315
|
+
if (issue.expected === "number") {
|
|
316
|
+
return `--flag.${path3}=0.7`;
|
|
317
|
+
}
|
|
318
|
+
if (issue.expected === "boolean") {
|
|
319
|
+
return `--flag.${path3}=true`;
|
|
320
|
+
}
|
|
321
|
+
if (issue.expected === "string") {
|
|
322
|
+
return `--flag.${path3}="value"`;
|
|
323
|
+
}
|
|
324
|
+
break;
|
|
325
|
+
case "too_small":
|
|
326
|
+
if (typeof issue.minimum === "number" || typeof issue.minimum === "bigint") {
|
|
327
|
+
return `--flag.${path3}=${issue.minimum}`;
|
|
328
|
+
}
|
|
329
|
+
break;
|
|
330
|
+
case "too_big":
|
|
331
|
+
if (typeof issue.maximum === "number" || typeof issue.maximum === "bigint") {
|
|
332
|
+
return `--flag.${path3}=${issue.maximum}`;
|
|
333
|
+
}
|
|
334
|
+
break;
|
|
335
|
+
case "invalid_enum_value":
|
|
336
|
+
if (issue.options.length > 0) {
|
|
337
|
+
return `--flag.${path3}=${issue.options[0]}`;
|
|
338
|
+
}
|
|
339
|
+
break;
|
|
340
|
+
case "invalid_value":
|
|
341
|
+
if (issue.values && Array.isArray(issue.values) && issue.values.length > 0) {
|
|
342
|
+
return `--flag.${path3}=${String(issue.values[0])}`;
|
|
343
|
+
}
|
|
344
|
+
break;
|
|
345
|
+
}
|
|
346
|
+
return null;
|
|
347
|
+
}
|
|
252
348
|
|
|
253
349
|
// src/util/dot-path.ts
|
|
254
350
|
init_cjs_shims();
|
|
@@ -287,6 +383,27 @@ function flattenObject(obj, prefix = "") {
|
|
|
287
383
|
}
|
|
288
384
|
return result;
|
|
289
385
|
}
|
|
386
|
+
function isValidPath(schema, segments) {
|
|
387
|
+
let currentSchema = schema;
|
|
388
|
+
for (let i = 0; i < segments.length; i++) {
|
|
389
|
+
const segment = segments[i];
|
|
390
|
+
if (!currentSchema.shape || !(segment in currentSchema.shape)) {
|
|
391
|
+
return false;
|
|
392
|
+
}
|
|
393
|
+
if (i < segments.length - 1) {
|
|
394
|
+
const nextSchema = currentSchema.shape[segment];
|
|
395
|
+
let unwrappedSchema = nextSchema;
|
|
396
|
+
while (unwrappedSchema?._def?.innerType || unwrappedSchema?._def?.schema) {
|
|
397
|
+
unwrappedSchema = unwrappedSchema._def.innerType || unwrappedSchema._def.schema;
|
|
398
|
+
}
|
|
399
|
+
if (!unwrappedSchema || unwrappedSchema._def?.type !== "object") {
|
|
400
|
+
return false;
|
|
401
|
+
}
|
|
402
|
+
currentSchema = unwrappedSchema;
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
return true;
|
|
406
|
+
}
|
|
290
407
|
|
|
291
408
|
// src/app-scope.ts
|
|
292
409
|
var import_api8 = require("@opentelemetry/api");
|
|
@@ -331,7 +448,7 @@ var import_api4 = require("@opentelemetry/api");
|
|
|
331
448
|
// package.json
|
|
332
449
|
var package_default = {
|
|
333
450
|
name: "axiom",
|
|
334
|
-
version: "0.
|
|
451
|
+
version: "0.32.0",
|
|
335
452
|
type: "module",
|
|
336
453
|
author: "Axiom, Inc.",
|
|
337
454
|
contributors: [
|
|
@@ -415,7 +532,8 @@ var package_default = {
|
|
|
415
532
|
defu: "^6.1.4",
|
|
416
533
|
handlebars: "^4.7.8",
|
|
417
534
|
nanoid: "^5.1.5",
|
|
418
|
-
open: "^10.1.0"
|
|
535
|
+
open: "^10.1.0",
|
|
536
|
+
"vite-tsconfig-paths": "^5.1.4"
|
|
419
537
|
},
|
|
420
538
|
peerDependencies: {
|
|
421
539
|
"@opentelemetry/api": "^1.9.0",
|
|
@@ -937,22 +1055,17 @@ function printSuiteBox({
|
|
|
937
1055
|
const avg = scorerAverages[scorerName];
|
|
938
1056
|
const paddedName = scorerName.padEnd(maxNameLength);
|
|
939
1057
|
const hasAllErrors = allCasesErrored(scorerName);
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
);
|
|
952
|
-
} else {
|
|
953
|
-
const currentPercent = hasAllErrors ? u.red("N/A (all cases failed)") : formatPercentage(avg);
|
|
954
|
-
logger(`\u2502 \u2022 ${paddedName} ${currentPercent}`);
|
|
955
|
-
}
|
|
1058
|
+
const baselineAvg = suite.baseline ? calculateBaselineScorerAverage2(suite.baseline, scorerName) : null;
|
|
1059
|
+
if (baselineAvg !== null) {
|
|
1060
|
+
const currentPercent = hasAllErrors ? u.dim("N/A") : formatPercentage(avg);
|
|
1061
|
+
const baselinePercent = formatPercentage(baselineAvg);
|
|
1062
|
+
const { text: diffText, color: diffColor } = formatDiff(avg, baselineAvg);
|
|
1063
|
+
const paddedBaseline = baselinePercent.padStart(7);
|
|
1064
|
+
const paddedCurrent = hasAllErrors ? currentPercent : currentPercent.padStart(7);
|
|
1065
|
+
const diffDisplay = hasAllErrors ? u.dim("all cases failed") : diffColor(diffText.padStart(8));
|
|
1066
|
+
logger(
|
|
1067
|
+
`\u2502 ${paddedName} ${u.blueBright(paddedBaseline)} \u2192 ${hasAllErrors ? paddedCurrent : u.magentaBright(paddedCurrent)} (${diffDisplay})`
|
|
1068
|
+
);
|
|
956
1069
|
} else {
|
|
957
1070
|
const currentPercent = hasAllErrors ? u.red("N/A (all cases failed)") : formatPercentage(avg);
|
|
958
1071
|
logger(`\u2502 \u2022 ${paddedName} ${currentPercent}`);
|
|
@@ -967,14 +1080,16 @@ function printSuiteBox({
|
|
|
967
1080
|
} else {
|
|
968
1081
|
logger(`\u2502 Baseline: ${u.gray("(none)")}`);
|
|
969
1082
|
}
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
1083
|
+
const hasConfigChanges = flagDiff.length > 0;
|
|
1084
|
+
logger("\u2502 Config changes:", hasConfigChanges ? "" : u.gray("(none)"));
|
|
1085
|
+
if (hasConfigChanges) {
|
|
1086
|
+
for (const { flag, current, baseline, default: defaultVal } of flagDiff) {
|
|
1087
|
+
logger(`\u2502 \u2022 ${flag}: ${current ?? "<not set>"}`);
|
|
1088
|
+
if (defaultVal !== void 0) {
|
|
1089
|
+
logger(`\u2502 ${u.gray(`default: ${defaultVal}`)}`);
|
|
1090
|
+
}
|
|
1091
|
+
if (suite.baseline) {
|
|
1092
|
+
logger(`\u2502 ${u.gray(`baseline: ${baseline ?? "<not set>"}`)}`);
|
|
978
1093
|
}
|
|
979
1094
|
}
|
|
980
1095
|
}
|
|
@@ -1024,25 +1139,38 @@ function calculateBaselineScorerAverage(baseline, scorerName) {
|
|
|
1024
1139
|
return sum / scores.length;
|
|
1025
1140
|
}
|
|
1026
1141
|
function calculateFlagDiff(suite) {
|
|
1027
|
-
if (!suite.
|
|
1142
|
+
if (!suite.configFlags || suite.configFlags.length === 0) {
|
|
1028
1143
|
return [];
|
|
1029
1144
|
}
|
|
1030
1145
|
const diffs = [];
|
|
1031
1146
|
const currentConfig = suite.flagConfig || {};
|
|
1032
|
-
const baselineConfig = suite.baseline
|
|
1147
|
+
const baselineConfig = suite.baseline?.flagConfig || {};
|
|
1148
|
+
const defaultConfig = suite.defaultFlagConfig || {};
|
|
1033
1149
|
const currentFlat = flattenObject(currentConfig);
|
|
1034
1150
|
const baselineFlat = flattenObject(baselineConfig);
|
|
1035
|
-
const
|
|
1151
|
+
const defaultFlat = flattenObject(defaultConfig);
|
|
1152
|
+
const allKeys = /* @__PURE__ */ new Set([
|
|
1153
|
+
...Object.keys(currentFlat),
|
|
1154
|
+
...Object.keys(baselineFlat),
|
|
1155
|
+
...Object.keys(defaultFlat)
|
|
1156
|
+
]);
|
|
1036
1157
|
for (const key of allKeys) {
|
|
1037
1158
|
const isInScope = suite.configFlags.some((pattern) => key.startsWith(pattern));
|
|
1038
1159
|
if (!isInScope) continue;
|
|
1039
1160
|
const currentValue = currentFlat[key];
|
|
1040
1161
|
const baselineValue = baselineFlat[key];
|
|
1041
|
-
|
|
1162
|
+
const defaultValue = defaultFlat[key];
|
|
1163
|
+
const currentStr = currentValue !== void 0 ? JSON.stringify(currentValue) : void 0;
|
|
1164
|
+
const baselineStr = baselineValue !== void 0 ? JSON.stringify(baselineValue) : void 0;
|
|
1165
|
+
const defaultStr = defaultValue !== void 0 ? JSON.stringify(defaultValue) : void 0;
|
|
1166
|
+
const diffFromBaseline = suite.baseline && currentStr !== baselineStr;
|
|
1167
|
+
const diffFromDefault = currentStr !== defaultStr;
|
|
1168
|
+
if (diffFromBaseline || diffFromDefault) {
|
|
1042
1169
|
diffs.push({
|
|
1043
1170
|
flag: key,
|
|
1044
|
-
current:
|
|
1045
|
-
baseline:
|
|
1171
|
+
current: currentStr,
|
|
1172
|
+
baseline: suite.baseline ? baselineStr : void 0,
|
|
1173
|
+
default: defaultStr
|
|
1046
1174
|
});
|
|
1047
1175
|
}
|
|
1048
1176
|
}
|
|
@@ -1060,7 +1188,7 @@ function printFinalReport({
|
|
|
1060
1188
|
logger("");
|
|
1061
1189
|
for (const suite of suiteData) {
|
|
1062
1190
|
const scorerAverages = calculateScorerAverages(suite);
|
|
1063
|
-
const flagDiff =
|
|
1191
|
+
const flagDiff = calculateFlagDiff(suite);
|
|
1064
1192
|
printSuiteBox({ suite, scorerAverages, calculateBaselineScorerAverage, flagDiff, logger });
|
|
1065
1193
|
logger("");
|
|
1066
1194
|
}
|
|
@@ -1069,8 +1197,17 @@ function printFinalReport({
|
|
|
1069
1197
|
const anyRegistered = registrationStatus.some((s2) => s2.registered);
|
|
1070
1198
|
const anyFailed = registrationStatus.some((s2) => !s2.registered);
|
|
1071
1199
|
if (anyRegistered && orgId && config?.consoleEndpointUrl) {
|
|
1072
|
-
|
|
1073
|
-
|
|
1200
|
+
if (suiteData.length === 1) {
|
|
1201
|
+
const suite = suiteData[0];
|
|
1202
|
+
const baselineParam = suite.baseline?.traceId ? `?baselineId=${suite.baseline.traceId}` : "";
|
|
1203
|
+
logger("View eval result:");
|
|
1204
|
+
logger(
|
|
1205
|
+
`${config.consoleEndpointUrl}/${orgId}/ai-engineering/evaluations/${suite.name}/${suite.version}${baselineParam}`
|
|
1206
|
+
);
|
|
1207
|
+
} else {
|
|
1208
|
+
logger("View full report:");
|
|
1209
|
+
logger(`${config.consoleEndpointUrl}/${orgId}/ai-engineering/evaluations?runId=${runId}`);
|
|
1210
|
+
}
|
|
1074
1211
|
} else if (isDebug) {
|
|
1075
1212
|
logger(u.dim("Results not uploaded to Axiom (debug mode)"));
|
|
1076
1213
|
} else {
|
|
@@ -1179,13 +1316,16 @@ var AxiomReporter = class {
|
|
|
1179
1316
|
const overridesFlat = flattenObject(overrides2);
|
|
1180
1317
|
flagConfig = dotNotationToNested({ ...defaultsFlat, ...overridesFlat });
|
|
1181
1318
|
}
|
|
1319
|
+
const defaultFlagConfig = meta.evaluation.configEnd?.flags;
|
|
1182
1320
|
this._suiteData.push({
|
|
1321
|
+
version: meta.evaluation.version,
|
|
1183
1322
|
name: meta.evaluation.name,
|
|
1184
1323
|
file: relativePath,
|
|
1185
1324
|
duration: durationSeconds + "s",
|
|
1186
1325
|
baseline: suiteBaseline || null,
|
|
1187
1326
|
configFlags: meta.evaluation.configFlags,
|
|
1188
1327
|
flagConfig,
|
|
1328
|
+
defaultFlagConfig,
|
|
1189
1329
|
runId: meta.evaluation.runId,
|
|
1190
1330
|
orgId: meta.evaluation.orgId,
|
|
1191
1331
|
cases,
|
|
@@ -1293,6 +1433,7 @@ var import_defu = require("defu");
|
|
|
1293
1433
|
|
|
1294
1434
|
// src/config/index.ts
|
|
1295
1435
|
init_cjs_shims();
|
|
1436
|
+
var import_zod5 = require("zod");
|
|
1296
1437
|
|
|
1297
1438
|
// src/cli/auth/index.ts
|
|
1298
1439
|
init_cjs_shims();
|
|
@@ -1644,6 +1785,7 @@ function createPartialDefaults() {
|
|
|
1644
1785
|
orgId,
|
|
1645
1786
|
token,
|
|
1646
1787
|
dataset: process.env.AXIOM_DATASET,
|
|
1788
|
+
flagSchema: void 0,
|
|
1647
1789
|
instrumentation: null,
|
|
1648
1790
|
include: [...DEFAULT_EVAL_INCLUDE],
|
|
1649
1791
|
exclude: [],
|
|
@@ -1687,6 +1829,9 @@ function customMerger(target, source) {
|
|
|
1687
1829
|
if (source?.eval && "include" in source.eval) {
|
|
1688
1830
|
merged.eval.include = source.eval.include;
|
|
1689
1831
|
}
|
|
1832
|
+
if (source?.eval && "flagSchema" in source.eval) {
|
|
1833
|
+
merged.eval.flagSchema = source.eval.flagSchema;
|
|
1834
|
+
}
|
|
1690
1835
|
return merged;
|
|
1691
1836
|
}
|
|
1692
1837
|
async function loadConfig(cwd = process.cwd()) {
|
|
@@ -1766,11 +1911,11 @@ function setupEvalProvider(connection) {
|
|
|
1766
1911
|
axiomProvider = new import_sdk_trace_node.NodeTracerProvider({
|
|
1767
1912
|
resource: (0, import_resources.resourceFromAttributes)({
|
|
1768
1913
|
["service.name"]: "axiom",
|
|
1769
|
-
["service.version"]: "0.
|
|
1914
|
+
["service.version"]: "0.32.0"
|
|
1770
1915
|
}),
|
|
1771
1916
|
spanProcessors: [processor]
|
|
1772
1917
|
});
|
|
1773
|
-
axiomTracer = axiomProvider.getTracer("axiom", "0.
|
|
1918
|
+
axiomTracer = axiomProvider.getTracer("axiom", "0.32.0");
|
|
1774
1919
|
}
|
|
1775
1920
|
async function initInstrumentation(config) {
|
|
1776
1921
|
if (initialized) {
|
|
@@ -1782,7 +1927,7 @@ async function initInstrumentation(config) {
|
|
|
1782
1927
|
}
|
|
1783
1928
|
initializationPromise = (async () => {
|
|
1784
1929
|
if (!config.enabled) {
|
|
1785
|
-
axiomTracer = import_api10.trace.getTracer("axiom", "0.
|
|
1930
|
+
axiomTracer = import_api10.trace.getTracer("axiom", "0.32.0");
|
|
1786
1931
|
initialized = true;
|
|
1787
1932
|
return;
|
|
1788
1933
|
}
|
|
@@ -1878,8 +2023,9 @@ var runVitest = async (dir, opts) => {
|
|
|
1878
2023
|
...opts.config,
|
|
1879
2024
|
eval: {
|
|
1880
2025
|
...opts.config.eval,
|
|
1881
|
-
//
|
|
1882
|
-
instrumentation: null
|
|
2026
|
+
// These can't be serialized, so we need to remove them
|
|
2027
|
+
instrumentation: null,
|
|
2028
|
+
flagSchema: null
|
|
1883
2029
|
}
|
|
1884
2030
|
};
|
|
1885
2031
|
if (opts.debug) {
|
|
@@ -1898,35 +2044,41 @@ var runVitest = async (dir, opts) => {
|
|
|
1898
2044
|
if (opts.list) {
|
|
1899
2045
|
console.log(u.bgWhite(u.blackBright(" List mode ")));
|
|
1900
2046
|
}
|
|
1901
|
-
const vi = await (0, import_node.createVitest)(
|
|
1902
|
-
|
|
1903
|
-
|
|
1904
|
-
|
|
1905
|
-
|
|
1906
|
-
|
|
1907
|
-
|
|
1908
|
-
|
|
1909
|
-
|
|
1910
|
-
|
|
1911
|
-
|
|
1912
|
-
|
|
1913
|
-
|
|
1914
|
-
|
|
1915
|
-
|
|
1916
|
-
|
|
1917
|
-
|
|
1918
|
-
|
|
1919
|
-
|
|
1920
|
-
|
|
1921
|
-
|
|
1922
|
-
|
|
1923
|
-
|
|
1924
|
-
|
|
1925
|
-
|
|
1926
|
-
|
|
1927
|
-
|
|
2047
|
+
const vi = await (0, import_node.createVitest)(
|
|
2048
|
+
"test",
|
|
2049
|
+
{
|
|
2050
|
+
root: dir ? dir : process.cwd(),
|
|
2051
|
+
mode: "test",
|
|
2052
|
+
include: opts.include,
|
|
2053
|
+
exclude: opts.exclude,
|
|
2054
|
+
testNamePattern: opts.testNamePattern,
|
|
2055
|
+
reporters: ["verbose", new AxiomReporter()],
|
|
2056
|
+
environment: "node",
|
|
2057
|
+
browser: void 0,
|
|
2058
|
+
watch: opts.watch,
|
|
2059
|
+
setupFiles: [],
|
|
2060
|
+
// ignore user vitest.config.ts etc
|
|
2061
|
+
name: "axiom:eval",
|
|
2062
|
+
printConsoleTrace: true,
|
|
2063
|
+
silent: false,
|
|
2064
|
+
disableConsoleIntercept: true,
|
|
2065
|
+
testTimeout: opts.config?.eval?.timeoutMs || 6e4,
|
|
2066
|
+
globals: true,
|
|
2067
|
+
runner: (0, import_node_path.resolve)(__dirname, "evals", "custom-runner.js"),
|
|
2068
|
+
provide: {
|
|
2069
|
+
baseline: opts.baseline,
|
|
2070
|
+
debug: opts.debug,
|
|
2071
|
+
list: opts.list,
|
|
2072
|
+
overrides: opts.overrides,
|
|
2073
|
+
axiomConfig: providedConfig,
|
|
2074
|
+
runId: opts.runId,
|
|
2075
|
+
consoleUrl: opts.consoleUrl
|
|
2076
|
+
}
|
|
2077
|
+
},
|
|
2078
|
+
{
|
|
2079
|
+
plugins: [(0, import_vite_tsconfig_paths.default)({ root: dir || process.cwd() })]
|
|
1928
2080
|
}
|
|
1929
|
-
|
|
2081
|
+
);
|
|
1930
2082
|
if (opts.list) {
|
|
1931
2083
|
const result = await vi.collect();
|
|
1932
2084
|
printCollectedEvals(result, dir || process.cwd());
|
|
@@ -1951,7 +2103,7 @@ var runVitest = async (dir, opts) => {
|
|
|
1951
2103
|
};
|
|
1952
2104
|
|
|
1953
2105
|
// src/cli/commands/eval.command.ts
|
|
1954
|
-
var
|
|
2106
|
+
var import_node_fs3 = require("fs");
|
|
1955
2107
|
|
|
1956
2108
|
// src/cli/utils/eval-context-runner.ts
|
|
1957
2109
|
init_cjs_shims();
|
|
@@ -1993,6 +2145,133 @@ async function runEvalWithContext(overrides2, runFn) {
|
|
|
1993
2145
|
});
|
|
1994
2146
|
}
|
|
1995
2147
|
|
|
2148
|
+
// src/cli/utils/parse-flag-overrides.ts
|
|
2149
|
+
init_cjs_shims();
|
|
2150
|
+
var import_zod6 = require("zod");
|
|
2151
|
+
var import_node_fs2 = require("fs");
|
|
2152
|
+
var import_node_path3 = require("path");
|
|
2153
|
+
var FLAG_RE = /^--flag\.([^=]+)(?:=(.*))?$/;
|
|
2154
|
+
var CONFIG_RE = /^--flags-config(?:=(.*))?$/;
|
|
2155
|
+
function ensureNoSpaceSeparatedSyntax(flagName, value, nextToken, flagType) {
|
|
2156
|
+
if (value === void 0 && nextToken !== void 0) {
|
|
2157
|
+
if (flagType === "flag" && !nextToken.startsWith("-") && nextToken !== "true" && nextToken !== "false") {
|
|
2158
|
+
console.error(`\u274C Invalid syntax: --flag.${flagName} ${nextToken}`);
|
|
2159
|
+
console.error(`\u{1F4A1} Use: --flag.${flagName}=${nextToken}`);
|
|
2160
|
+
process.exit(1);
|
|
2161
|
+
} else if (flagType === "config" && !nextToken.startsWith("-")) {
|
|
2162
|
+
console.error(`\u274C Invalid syntax: --flags-config ${nextToken}`);
|
|
2163
|
+
console.error(`\u{1F4A1} Use: --flags-config=${nextToken}`);
|
|
2164
|
+
process.exit(1);
|
|
2165
|
+
}
|
|
2166
|
+
}
|
|
2167
|
+
}
|
|
2168
|
+
function validateFlagOverrides(overrides2, flagSchema) {
|
|
2169
|
+
if (!flagSchema || Object.keys(overrides2).length === 0) {
|
|
2170
|
+
return;
|
|
2171
|
+
}
|
|
2172
|
+
const schema = flagSchema;
|
|
2173
|
+
for (const dotPath of Object.keys(overrides2)) {
|
|
2174
|
+
const segments = parsePath(dotPath);
|
|
2175
|
+
if (!isValidPath(schema, segments)) {
|
|
2176
|
+
console.error("\u274C Invalid CLI flags:");
|
|
2177
|
+
console.error(` \u2022 flag '${dotPath}': Invalid flag path`);
|
|
2178
|
+
process.exit(1);
|
|
2179
|
+
}
|
|
2180
|
+
}
|
|
2181
|
+
const nestedObject = dotNotationToNested(overrides2);
|
|
2182
|
+
const result = schema.strict().partial().safeParse(nestedObject);
|
|
2183
|
+
if (!result.success) {
|
|
2184
|
+
console.error("\u274C Invalid CLI flags:");
|
|
2185
|
+
console.error(formatZodErrors(result.error));
|
|
2186
|
+
const examples = generateFlagExamples(result.error);
|
|
2187
|
+
if (examples.length > 0) {
|
|
2188
|
+
console.error("\n\u{1F4A1} Valid examples:");
|
|
2189
|
+
examples.forEach((example) => console.error(` ${example}`));
|
|
2190
|
+
}
|
|
2191
|
+
process.exit(1);
|
|
2192
|
+
}
|
|
2193
|
+
}
|
|
2194
|
+
function coerceValue(raw) {
|
|
2195
|
+
if (raw === "true") return true;
|
|
2196
|
+
if (raw === "false") return false;
|
|
2197
|
+
const num = Number(raw);
|
|
2198
|
+
if (!Number.isNaN(num) && raw.trim() === num.toString()) {
|
|
2199
|
+
return num;
|
|
2200
|
+
}
|
|
2201
|
+
try {
|
|
2202
|
+
return JSON.parse(raw);
|
|
2203
|
+
} catch {
|
|
2204
|
+
return raw;
|
|
2205
|
+
}
|
|
2206
|
+
}
|
|
2207
|
+
function loadConfigFile(path3) {
|
|
2208
|
+
const abs = (0, import_node_path3.resolve)(process.cwd(), path3);
|
|
2209
|
+
try {
|
|
2210
|
+
const contents = (0, import_node_fs2.readFileSync)(abs, "utf8");
|
|
2211
|
+
const parsed = JSON.parse(contents);
|
|
2212
|
+
if (typeof parsed !== "object" || Array.isArray(parsed) || parsed === null) {
|
|
2213
|
+
console.error(
|
|
2214
|
+
`\u274C Flags config must be a JSON object, got ${Array.isArray(parsed) ? "array" : typeof parsed}`
|
|
2215
|
+
);
|
|
2216
|
+
process.exit(1);
|
|
2217
|
+
}
|
|
2218
|
+
return parsed;
|
|
2219
|
+
} catch (err) {
|
|
2220
|
+
console.error(`\u274C Could not read or parse flags config "${path3}": ${err.message}`);
|
|
2221
|
+
process.exit(1);
|
|
2222
|
+
}
|
|
2223
|
+
}
|
|
2224
|
+
function extractOverrides(argv) {
|
|
2225
|
+
const cleanedArgv2 = [];
|
|
2226
|
+
const overrides2 = {};
|
|
2227
|
+
let configPath = null;
|
|
2228
|
+
let hasCliFlags = false;
|
|
2229
|
+
let configPathCount = 0;
|
|
2230
|
+
for (let i = 0; i < argv.length; i++) {
|
|
2231
|
+
const token = argv[i];
|
|
2232
|
+
const configMatch = token.match(CONFIG_RE);
|
|
2233
|
+
const flagMatch = token.match(FLAG_RE);
|
|
2234
|
+
if (configMatch) {
|
|
2235
|
+
configPathCount++;
|
|
2236
|
+
if (configPathCount > 1) {
|
|
2237
|
+
console.error("\u274C Only one --flags-config can be supplied.");
|
|
2238
|
+
process.exit(1);
|
|
2239
|
+
}
|
|
2240
|
+
const value = configMatch[1];
|
|
2241
|
+
const nextToken = argv.length > i + 1 ? argv[i + 1] : void 0;
|
|
2242
|
+
ensureNoSpaceSeparatedSyntax("flags-config", value, nextToken, "config");
|
|
2243
|
+
if (!value) {
|
|
2244
|
+
console.error("\u274C --flags-config requires a file path");
|
|
2245
|
+
console.error("\u{1F4A1} Use: --flags-config=path/to/config.json");
|
|
2246
|
+
process.exit(1);
|
|
2247
|
+
}
|
|
2248
|
+
configPath = value;
|
|
2249
|
+
} else if (flagMatch) {
|
|
2250
|
+
hasCliFlags = true;
|
|
2251
|
+
const key = flagMatch[1];
|
|
2252
|
+
const value = flagMatch[2];
|
|
2253
|
+
const nextToken = argv.length > i + 1 ? argv[i + 1] : void 0;
|
|
2254
|
+
ensureNoSpaceSeparatedSyntax(key, value, nextToken, "flag");
|
|
2255
|
+
const finalValue = value === void 0 ? "true" : value;
|
|
2256
|
+
overrides2[key] = coerceValue(finalValue);
|
|
2257
|
+
} else {
|
|
2258
|
+
cleanedArgv2.push(token);
|
|
2259
|
+
}
|
|
2260
|
+
}
|
|
2261
|
+
if (configPath && hasCliFlags) {
|
|
2262
|
+
console.error("\u274C Cannot use both --flags-config and --flag.* arguments together.");
|
|
2263
|
+
console.error("Choose one approach:");
|
|
2264
|
+
console.error(" \u2022 Config file: --flags-config=my-flags.json");
|
|
2265
|
+
console.error(" \u2022 CLI flags: --flag.temperature=0.9 --flag.model=gpt-4o");
|
|
2266
|
+
process.exit(1);
|
|
2267
|
+
}
|
|
2268
|
+
if (configPath) {
|
|
2269
|
+
const configOverrides = loadConfigFile(configPath);
|
|
2270
|
+
return { cleanedArgv: cleanedArgv2, overrides: configOverrides };
|
|
2271
|
+
}
|
|
2272
|
+
return { cleanedArgv: cleanedArgv2, overrides: overrides2 };
|
|
2273
|
+
}
|
|
2274
|
+
|
|
1996
2275
|
// src/cli/utils/glob-utils.ts
|
|
1997
2276
|
init_cjs_shims();
|
|
1998
2277
|
function isGlob(str) {
|
|
@@ -2043,6 +2322,7 @@ var loadEvalCommand = (program2, flagOverrides = {}) => {
|
|
|
2043
2322
|
let testNamePattern;
|
|
2044
2323
|
const isGlobPattern = isGlob(target);
|
|
2045
2324
|
const { config: loadedConfig } = await loadConfig(".");
|
|
2325
|
+
validateFlagOverrides(flagOverrides, loadedConfig.eval.flagSchema);
|
|
2046
2326
|
const config = {
|
|
2047
2327
|
...loadedConfig,
|
|
2048
2328
|
eval: {
|
|
@@ -2057,7 +2337,7 @@ var loadEvalCommand = (program2, flagOverrides = {}) => {
|
|
|
2057
2337
|
include = [target];
|
|
2058
2338
|
} else {
|
|
2059
2339
|
try {
|
|
2060
|
-
const stat = (0,
|
|
2340
|
+
const stat = (0, import_node_fs3.lstatSync)(target);
|
|
2061
2341
|
if (stat.isDirectory()) {
|
|
2062
2342
|
include = config?.eval?.include || [];
|
|
2063
2343
|
} else {
|
|
@@ -2468,107 +2748,6 @@ function loadAuthCommand(program2) {
|
|
|
2468
2748
|
loadAuthSwitchCommand(auth, program2);
|
|
2469
2749
|
}
|
|
2470
2750
|
|
|
2471
|
-
// src/cli/utils/parse-flag-overrides.ts
|
|
2472
|
-
init_cjs_shims();
|
|
2473
|
-
var import_zod5 = require("zod");
|
|
2474
|
-
var import_node_fs3 = require("fs");
|
|
2475
|
-
var import_node_path3 = require("path");
|
|
2476
|
-
var FLAG_RE = /^--flag\.([^=]+)(?:=(.*))?$/;
|
|
2477
|
-
var CONFIG_RE = /^--flags-config(?:=(.*))?$/;
|
|
2478
|
-
function ensureNoSpaceSeparatedSyntax(flagName, value, nextToken, flagType) {
|
|
2479
|
-
if (value === void 0 && nextToken !== void 0) {
|
|
2480
|
-
if (flagType === "flag" && !nextToken.startsWith("-") && nextToken !== "true" && nextToken !== "false") {
|
|
2481
|
-
console.error(`\u274C Invalid syntax: --flag.${flagName} ${nextToken}`);
|
|
2482
|
-
console.error(`\u{1F4A1} Use: --flag.${flagName}=${nextToken}`);
|
|
2483
|
-
process.exit(1);
|
|
2484
|
-
} else if (flagType === "config" && !nextToken.startsWith("-")) {
|
|
2485
|
-
console.error(`\u274C Invalid syntax: --flags-config ${nextToken}`);
|
|
2486
|
-
console.error(`\u{1F4A1} Use: --flags-config=${nextToken}`);
|
|
2487
|
-
process.exit(1);
|
|
2488
|
-
}
|
|
2489
|
-
}
|
|
2490
|
-
}
|
|
2491
|
-
function coerceValue(raw) {
|
|
2492
|
-
if (raw === "true") return true;
|
|
2493
|
-
if (raw === "false") return false;
|
|
2494
|
-
const num = Number(raw);
|
|
2495
|
-
if (!Number.isNaN(num) && raw.trim() === num.toString()) {
|
|
2496
|
-
return num;
|
|
2497
|
-
}
|
|
2498
|
-
try {
|
|
2499
|
-
return JSON.parse(raw);
|
|
2500
|
-
} catch {
|
|
2501
|
-
return raw;
|
|
2502
|
-
}
|
|
2503
|
-
}
|
|
2504
|
-
function loadConfigFile(path3) {
|
|
2505
|
-
const abs = (0, import_node_path3.resolve)(process.cwd(), path3);
|
|
2506
|
-
try {
|
|
2507
|
-
const contents = (0, import_node_fs3.readFileSync)(abs, "utf8");
|
|
2508
|
-
const parsed = JSON.parse(contents);
|
|
2509
|
-
if (typeof parsed !== "object" || Array.isArray(parsed) || parsed === null) {
|
|
2510
|
-
console.error(
|
|
2511
|
-
`\u274C Flags config must be a JSON object, got ${Array.isArray(parsed) ? "array" : typeof parsed}`
|
|
2512
|
-
);
|
|
2513
|
-
process.exit(1);
|
|
2514
|
-
}
|
|
2515
|
-
return parsed;
|
|
2516
|
-
} catch (err) {
|
|
2517
|
-
console.error(`\u274C Could not read or parse flags config "${path3}": ${err.message}`);
|
|
2518
|
-
process.exit(1);
|
|
2519
|
-
}
|
|
2520
|
-
}
|
|
2521
|
-
function extractOverrides(argv) {
|
|
2522
|
-
const cleanedArgv2 = [];
|
|
2523
|
-
const overrides2 = {};
|
|
2524
|
-
let configPath = null;
|
|
2525
|
-
let hasCliFlags = false;
|
|
2526
|
-
let configPathCount = 0;
|
|
2527
|
-
for (let i = 0; i < argv.length; i++) {
|
|
2528
|
-
const token = argv[i];
|
|
2529
|
-
const configMatch = token.match(CONFIG_RE);
|
|
2530
|
-
const flagMatch = token.match(FLAG_RE);
|
|
2531
|
-
if (configMatch) {
|
|
2532
|
-
configPathCount++;
|
|
2533
|
-
if (configPathCount > 1) {
|
|
2534
|
-
console.error("\u274C Only one --flags-config can be supplied.");
|
|
2535
|
-
process.exit(1);
|
|
2536
|
-
}
|
|
2537
|
-
const value = configMatch[1];
|
|
2538
|
-
const nextToken = argv.length > i + 1 ? argv[i + 1] : void 0;
|
|
2539
|
-
ensureNoSpaceSeparatedSyntax("flags-config", value, nextToken, "config");
|
|
2540
|
-
if (!value) {
|
|
2541
|
-
console.error("\u274C --flags-config requires a file path");
|
|
2542
|
-
console.error("\u{1F4A1} Use: --flags-config=path/to/config.json");
|
|
2543
|
-
process.exit(1);
|
|
2544
|
-
}
|
|
2545
|
-
configPath = value;
|
|
2546
|
-
} else if (flagMatch) {
|
|
2547
|
-
hasCliFlags = true;
|
|
2548
|
-
const key = flagMatch[1];
|
|
2549
|
-
const value = flagMatch[2];
|
|
2550
|
-
const nextToken = argv.length > i + 1 ? argv[i + 1] : void 0;
|
|
2551
|
-
ensureNoSpaceSeparatedSyntax(key, value, nextToken, "flag");
|
|
2552
|
-
const finalValue = value === void 0 ? "true" : value;
|
|
2553
|
-
overrides2[key] = coerceValue(finalValue);
|
|
2554
|
-
} else {
|
|
2555
|
-
cleanedArgv2.push(token);
|
|
2556
|
-
}
|
|
2557
|
-
}
|
|
2558
|
-
if (configPath && hasCliFlags) {
|
|
2559
|
-
console.error("\u274C Cannot use both --flags-config and --flag.* arguments together.");
|
|
2560
|
-
console.error("Choose one approach:");
|
|
2561
|
-
console.error(" \u2022 Config file: --flags-config=my-flags.json");
|
|
2562
|
-
console.error(" \u2022 CLI flags: --flag.temperature=0.9 --flag.model=gpt-4o");
|
|
2563
|
-
process.exit(1);
|
|
2564
|
-
}
|
|
2565
|
-
if (configPath) {
|
|
2566
|
-
const configOverrides = loadConfigFile(configPath);
|
|
2567
|
-
return { cleanedArgv: cleanedArgv2, overrides: configOverrides };
|
|
2568
|
-
}
|
|
2569
|
-
return { cleanedArgv: cleanedArgv2, overrides: overrides2 };
|
|
2570
|
-
}
|
|
2571
|
-
|
|
2572
2751
|
// src/bin.ts
|
|
2573
2752
|
var import_env = __toESM(require("@next/env"), 1);
|
|
2574
2753
|
|
|
@@ -2578,7 +2757,7 @@ var import_commander2 = require("commander");
|
|
|
2578
2757
|
var loadVersionCommand = (program2) => {
|
|
2579
2758
|
return program2.addCommand(
|
|
2580
2759
|
new import_commander2.Command("version").description("cli version").action(() => {
|
|
2581
|
-
console.log("0.
|
|
2760
|
+
console.log("0.32.0");
|
|
2582
2761
|
})
|
|
2583
2762
|
);
|
|
2584
2763
|
};
|
|
@@ -2588,7 +2767,7 @@ var { loadEnvConfig } = import_env.default;
|
|
|
2588
2767
|
loadEnvConfig(process.cwd());
|
|
2589
2768
|
var { cleanedArgv, overrides } = extractOverrides(process.argv.slice(2));
|
|
2590
2769
|
var program = new import_commander3.Command();
|
|
2591
|
-
program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.
|
|
2770
|
+
program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.32.0");
|
|
2592
2771
|
program.hook("preAction", async (_, actionCommand) => {
|
|
2593
2772
|
const commandName = actionCommand.name();
|
|
2594
2773
|
const parentCommand = actionCommand.parent;
|