axiom 0.23.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin.cjs +74 -38
- package/dist/bin.cjs.map +1 -1
- package/dist/bin.js +11 -6
- package/dist/bin.js.map +1 -1
- package/dist/{chunk-B2XK7HHK.js → chunk-6E6HEZTE.js} +7 -2
- package/dist/chunk-6E6HEZTE.js.map +1 -0
- package/dist/{chunk-CSMTIO7U.js → chunk-CW7MNTNT.js} +85 -62
- package/dist/chunk-CW7MNTNT.js.map +1 -0
- package/dist/evals.cjs +126 -67
- package/dist/evals.cjs.map +1 -1
- package/dist/evals.d.cts +11 -0
- package/dist/evals.d.ts +11 -0
- package/dist/evals.js +41 -9
- package/dist/evals.js.map +1 -1
- package/dist/index.cjs +6 -1
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +1 -1
- package/package.json +1 -1
- package/dist/chunk-B2XK7HHK.js.map +0 -1
- package/dist/chunk-CSMTIO7U.js.map +0 -1
package/dist/bin.cjs
CHANGED
|
@@ -390,6 +390,7 @@ var loadPullCommand = (program2) => {
|
|
|
390
390
|
|
|
391
391
|
// src/cli/commands/eval.command.ts
|
|
392
392
|
var import_commander3 = require("commander");
|
|
393
|
+
var import_nanoid = require("nanoid");
|
|
393
394
|
|
|
394
395
|
// ../../node_modules/.pnpm/tinyrainbow@2.0.0/node_modules/tinyrainbow/dist/chunk-BVHSVHOK.js
|
|
395
396
|
var f = {
|
|
@@ -604,7 +605,7 @@ var import_api4 = require("@opentelemetry/api");
|
|
|
604
605
|
// package.json
|
|
605
606
|
var package_default = {
|
|
606
607
|
name: "axiom",
|
|
607
|
-
version: "0.
|
|
608
|
+
version: "0.24.0",
|
|
608
609
|
type: "module",
|
|
609
610
|
author: "Axiom, Inc.",
|
|
610
611
|
contributors: [
|
|
@@ -826,30 +827,22 @@ function resolveAxiomConnection(config) {
|
|
|
826
827
|
|
|
827
828
|
// src/evals/eval.service.ts
|
|
828
829
|
var findEvaluationCases = async (evalId, config) => {
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
console.log(payload);
|
|
844
|
-
return void 0;
|
|
845
|
-
}
|
|
846
|
-
if (payload.matches.length) {
|
|
847
|
-
return buildSpanTree(payload.matches);
|
|
848
|
-
}
|
|
849
|
-
} catch (err) {
|
|
850
|
-
console.log(err);
|
|
851
|
-
return void 0;
|
|
830
|
+
const { dataset, url, token } = resolveAxiomConnection(config);
|
|
831
|
+
const apl = `['${dataset}'] | where trace_id == "${evalId}" | order by _time`;
|
|
832
|
+
const headers = new Headers({
|
|
833
|
+
Authorization: `Bearer ${token}`,
|
|
834
|
+
"Content-Type": "application/json"
|
|
835
|
+
});
|
|
836
|
+
const resp = await fetch(`${url}/v1/datasets/_apl?format=legacy`, {
|
|
837
|
+
headers,
|
|
838
|
+
method: "POST",
|
|
839
|
+
body: JSON.stringify({ apl })
|
|
840
|
+
});
|
|
841
|
+
const payload = await resp.json();
|
|
842
|
+
if (!resp.ok) {
|
|
843
|
+
throw new Error(`Failed to query evaluation cases: ${payload.message || resp.statusText}`);
|
|
852
844
|
}
|
|
845
|
+
return payload.matches.length ? buildSpanTree(payload.matches) : null;
|
|
853
846
|
};
|
|
854
847
|
var mapSpanToEval = (span) => {
|
|
855
848
|
const flagConfigRaw = span.data.attributes["eval.config.flags"] ?? span.data.attributes.custom["eval.config.flags"];
|
|
@@ -1394,7 +1387,11 @@ function calculateFlagDiff(suite) {
|
|
|
1394
1387
|
}
|
|
1395
1388
|
return diffs;
|
|
1396
1389
|
}
|
|
1397
|
-
function printFinalReport({
|
|
1390
|
+
function printFinalReport({
|
|
1391
|
+
suiteData,
|
|
1392
|
+
config,
|
|
1393
|
+
registrationStatus
|
|
1394
|
+
}) {
|
|
1398
1395
|
console.log("");
|
|
1399
1396
|
console.log(u.bgBlue(u.white(" FINAL EVALUATION REPORT ")));
|
|
1400
1397
|
console.log("");
|
|
@@ -1404,8 +1401,28 @@ function printFinalReport({ suiteData }) {
|
|
|
1404
1401
|
printSuiteBox({ suite, scorerAverages, calculateBaselineScorerAverage, flagDiff });
|
|
1405
1402
|
console.log("");
|
|
1406
1403
|
}
|
|
1407
|
-
|
|
1408
|
-
|
|
1404
|
+
const runId = suiteData[0]?.runId;
|
|
1405
|
+
const orgId = suiteData[0]?.orgId;
|
|
1406
|
+
const anyRegistered = registrationStatus.some((s2) => s2.registered);
|
|
1407
|
+
const anyFailed = registrationStatus.some((s2) => !s2.registered);
|
|
1408
|
+
if (anyRegistered && orgId && config?.consoleEndpointUrl) {
|
|
1409
|
+
console.log("View full report:");
|
|
1410
|
+
console.log(`${config.consoleEndpointUrl}/${orgId}/ai-engineering/evaluations?runId=${runId}`);
|
|
1411
|
+
} else {
|
|
1412
|
+
console.log("Results not available in Axiom UI (registration failed)");
|
|
1413
|
+
}
|
|
1414
|
+
if (anyFailed) {
|
|
1415
|
+
console.log("");
|
|
1416
|
+
for (const status of registrationStatus) {
|
|
1417
|
+
if (!status.registered) {
|
|
1418
|
+
console.log(u.yellow(`\u26A0\uFE0F Warning: Failed to register "${status.name}" with Axiom`));
|
|
1419
|
+
if (status.error) {
|
|
1420
|
+
console.log(u.dim(` Error: ${status.error}`));
|
|
1421
|
+
}
|
|
1422
|
+
console.log(u.dim(` Results for this evaluation will not be available in the Axiom UI.`));
|
|
1423
|
+
}
|
|
1424
|
+
}
|
|
1425
|
+
}
|
|
1409
1426
|
}
|
|
1410
1427
|
|
|
1411
1428
|
// src/cli/errors.ts
|
|
@@ -1434,14 +1451,19 @@ var AxiomReporter = class {
|
|
|
1434
1451
|
__publicField(this, "_suiteData", []);
|
|
1435
1452
|
__publicField(this, "_baselines", /* @__PURE__ */ new Map());
|
|
1436
1453
|
__publicField(this, "_printedFlagOverrides", false);
|
|
1454
|
+
__publicField(this, "_config");
|
|
1437
1455
|
}
|
|
1438
1456
|
onTestRunStart() {
|
|
1439
1457
|
this.start = performance.now();
|
|
1440
1458
|
this.startTime = (/* @__PURE__ */ new Date()).getTime();
|
|
1459
|
+
const config = getAxiomConfig();
|
|
1460
|
+
if (config) {
|
|
1461
|
+
this._config = resolveAxiomConnection(config);
|
|
1462
|
+
}
|
|
1441
1463
|
}
|
|
1442
1464
|
async onTestSuiteReady(_testSuite) {
|
|
1443
1465
|
const meta = _testSuite.meta();
|
|
1444
|
-
if (_testSuite.state() === "skipped") {
|
|
1466
|
+
if (_testSuite.state() === "skipped" || !meta?.evaluation) {
|
|
1445
1467
|
return;
|
|
1446
1468
|
}
|
|
1447
1469
|
if (!this._printedFlagOverrides) {
|
|
@@ -1473,7 +1495,7 @@ var AxiomReporter = class {
|
|
|
1473
1495
|
}
|
|
1474
1496
|
async onTestSuiteResult(testSuite) {
|
|
1475
1497
|
const meta = testSuite.meta();
|
|
1476
|
-
if (testSuite.state() === "skipped") {
|
|
1498
|
+
if (testSuite.state() === "skipped" || !meta?.evaluation) {
|
|
1477
1499
|
return;
|
|
1478
1500
|
}
|
|
1479
1501
|
const durationSeconds = Number((performance.now() - this.start) / 1e3).toFixed(2);
|
|
@@ -1509,8 +1531,11 @@ var AxiomReporter = class {
|
|
|
1509
1531
|
baseline: suiteBaseline || null,
|
|
1510
1532
|
configFlags: meta.evaluation.configFlags,
|
|
1511
1533
|
flagConfig: meta.evaluation.flagConfig,
|
|
1534
|
+
runId: meta.evaluation.runId,
|
|
1535
|
+
orgId: meta.evaluation.orgId,
|
|
1512
1536
|
cases,
|
|
1513
|
-
outOfScopeFlags: meta.evaluation.outOfScopeFlags
|
|
1537
|
+
outOfScopeFlags: meta.evaluation.outOfScopeFlags,
|
|
1538
|
+
registrationStatus: meta.evaluation.registrationStatus
|
|
1514
1539
|
});
|
|
1515
1540
|
printEvalNameAndFileName(testSuite, meta);
|
|
1516
1541
|
printBaselineNameAndVersion(meta);
|
|
@@ -1526,8 +1551,15 @@ var AxiomReporter = class {
|
|
|
1526
1551
|
if (shouldClear) {
|
|
1527
1552
|
process.stdout.write("\x1B[2J\x1B[0f");
|
|
1528
1553
|
}
|
|
1554
|
+
const registrationStatus = this._suiteData.map((suite) => ({
|
|
1555
|
+
name: suite.name,
|
|
1556
|
+
registered: suite.registrationStatus?.status === "success",
|
|
1557
|
+
error: suite.registrationStatus?.status === "failed" ? suite.registrationStatus.error : void 0
|
|
1558
|
+
}));
|
|
1529
1559
|
printFinalReport({
|
|
1530
|
-
suiteData: this._suiteData
|
|
1560
|
+
suiteData: this._suiteData,
|
|
1561
|
+
config: this._config,
|
|
1562
|
+
registrationStatus
|
|
1531
1563
|
});
|
|
1532
1564
|
const DEBUG = process.env.AXIOM_DEBUG === "true";
|
|
1533
1565
|
if (DEBUG && this._endOfRunConfigEnd) {
|
|
@@ -1693,11 +1725,11 @@ function setupEvalProvider(connection) {
|
|
|
1693
1725
|
axiomProvider = new import_sdk_trace_node.NodeTracerProvider({
|
|
1694
1726
|
resource: (0, import_resources.resourceFromAttributes)({
|
|
1695
1727
|
["service.name"]: "axiom",
|
|
1696
|
-
["service.version"]: "0.
|
|
1728
|
+
["service.version"]: "0.24.0"
|
|
1697
1729
|
}),
|
|
1698
1730
|
spanProcessors: [processor]
|
|
1699
1731
|
});
|
|
1700
|
-
axiomTracer = axiomProvider.getTracer("axiom", "0.
|
|
1732
|
+
axiomTracer = axiomProvider.getTracer("axiom", "0.24.0");
|
|
1701
1733
|
}
|
|
1702
1734
|
async function initInstrumentation(config) {
|
|
1703
1735
|
if (initialized) {
|
|
@@ -1709,7 +1741,7 @@ async function initInstrumentation(config) {
|
|
|
1709
1741
|
}
|
|
1710
1742
|
initializationPromise = (async () => {
|
|
1711
1743
|
if (!config.enabled) {
|
|
1712
|
-
axiomTracer = import_api10.trace.getTracer("axiom", "0.
|
|
1744
|
+
axiomTracer = import_api10.trace.getTracer("axiom", "0.24.0");
|
|
1713
1745
|
initialized = true;
|
|
1714
1746
|
return;
|
|
1715
1747
|
}
|
|
@@ -1809,7 +1841,8 @@ var runVitest = async (dir, opts) => {
|
|
|
1809
1841
|
baseline: opts.baseline,
|
|
1810
1842
|
debug: opts.debug,
|
|
1811
1843
|
overrides: opts.overrides,
|
|
1812
|
-
axiomConfig: providedConfig
|
|
1844
|
+
axiomConfig: providedConfig,
|
|
1845
|
+
runId: opts.runId
|
|
1813
1846
|
}
|
|
1814
1847
|
});
|
|
1815
1848
|
await vi.start();
|
|
@@ -1868,6 +1901,7 @@ function isGlob(str) {
|
|
|
1868
1901
|
}
|
|
1869
1902
|
|
|
1870
1903
|
// src/cli/commands/eval.command.ts
|
|
1904
|
+
var createRunId = (0, import_nanoid.customAlphabet)("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ", 10);
|
|
1871
1905
|
var loadEvalCommand = (program2, flagOverrides = {}) => {
|
|
1872
1906
|
return program2.addCommand(
|
|
1873
1907
|
new import_commander3.Command("eval").description("run evals locally").addArgument(
|
|
@@ -1909,6 +1943,7 @@ var loadEvalCommand = (program2, flagOverrides = {}) => {
|
|
|
1909
1943
|
);
|
|
1910
1944
|
console.log("");
|
|
1911
1945
|
}
|
|
1946
|
+
const runId = createRunId();
|
|
1912
1947
|
await runEvalWithContext(flagOverrides, async () => {
|
|
1913
1948
|
return runVitest(".", {
|
|
1914
1949
|
watch: options.watch,
|
|
@@ -1918,7 +1953,8 @@ var loadEvalCommand = (program2, flagOverrides = {}) => {
|
|
|
1918
1953
|
testNamePattern,
|
|
1919
1954
|
debug: options.debug,
|
|
1920
1955
|
overrides: flagOverrides,
|
|
1921
|
-
config
|
|
1956
|
+
config,
|
|
1957
|
+
runId
|
|
1922
1958
|
});
|
|
1923
1959
|
});
|
|
1924
1960
|
} catch (error) {
|
|
@@ -2042,7 +2078,7 @@ var import_commander4 = require("commander");
|
|
|
2042
2078
|
var loadVersionCommand = (program2) => {
|
|
2043
2079
|
return program2.addCommand(
|
|
2044
2080
|
new import_commander4.Command("version").description("cli version").action(() => {
|
|
2045
|
-
console.log("0.
|
|
2081
|
+
console.log("0.24.0");
|
|
2046
2082
|
})
|
|
2047
2083
|
);
|
|
2048
2084
|
};
|
|
@@ -2052,7 +2088,7 @@ var { loadEnvConfig } = import_env.default;
|
|
|
2052
2088
|
loadEnvConfig(process.cwd());
|
|
2053
2089
|
var { cleanedArgv, overrides } = extractOverrides(process.argv.slice(2));
|
|
2054
2090
|
var program = new import_commander5.Command();
|
|
2055
|
-
program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.
|
|
2091
|
+
program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.24.0");
|
|
2056
2092
|
loadPushCommand(program);
|
|
2057
2093
|
loadPullCommand(program);
|
|
2058
2094
|
loadEvalCommand(program, overrides);
|