axiom 0.13.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin.cjs +536 -46
- package/dist/bin.cjs.map +1 -1
- package/dist/bin.d.cts +6 -0
- package/dist/bin.d.ts +6 -0
- package/dist/bin.js +28 -40
- package/dist/bin.js.map +1 -1
- package/dist/chunk-EJO7BPCZ.js +464 -0
- package/dist/chunk-EJO7BPCZ.js.map +1 -0
- package/dist/{chunk-EFEYUIIG.js → chunk-J2CSXIYU.js} +8 -16
- package/dist/chunk-J2CSXIYU.js.map +1 -0
- package/dist/chunk-O7C3VVO7.js +166 -0
- package/dist/chunk-O7C3VVO7.js.map +1 -0
- package/dist/evals.cjs +641 -124
- package/dist/evals.cjs.map +1 -1
- package/dist/evals.d.cts +136 -29
- package/dist/evals.d.ts +136 -29
- package/dist/evals.js +107 -66
- package/dist/evals.js.map +1 -1
- package/dist/index.cjs +9 -16
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +4 -101
- package/dist/index.d.ts +4 -101
- package/dist/index.js +14 -166
- package/dist/index.js.map +1 -1
- package/dist/types-DGiZzDAy.d.cts +113 -0
- package/dist/types-DGiZzDAy.d.ts +113 -0
- package/package.json +2 -1
- package/dist/chunk-EFEYUIIG.js.map +0 -1
- package/dist/chunk-MNOTFSB6.js +0 -79
- package/dist/chunk-MNOTFSB6.js.map +0 -1
- package/dist/scorer.types-D6mnSKTJ.d.cts +0 -12
- package/dist/scorer.types-D6mnSKTJ.d.ts +0 -12
package/dist/bin.cjs
CHANGED
|
@@ -6,6 +6,11 @@ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
|
6
6
|
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
7
7
|
var __getProtoOf = Object.getPrototypeOf;
|
|
8
8
|
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
9
|
+
var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
|
|
10
|
+
var __export = (target, all) => {
|
|
11
|
+
for (var name in all)
|
|
12
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
13
|
+
};
|
|
9
14
|
var __copyProps = (to, from, except, desc) => {
|
|
10
15
|
if (from && typeof from === "object" || typeof from === "function") {
|
|
11
16
|
for (let key of __getOwnPropNames(from))
|
|
@@ -22,12 +27,19 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
|
|
|
22
27
|
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
23
28
|
mod
|
|
24
29
|
));
|
|
30
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
31
|
+
var __publicField = (obj, key, value) => __defNormalProp(obj, typeof key !== "symbol" ? key + "" : key, value);
|
|
25
32
|
|
|
26
33
|
// src/bin.ts
|
|
27
|
-
var
|
|
34
|
+
var bin_exports = {};
|
|
35
|
+
__export(bin_exports, {
|
|
36
|
+
evalcmd: () => evalcmd,
|
|
37
|
+
program: () => program
|
|
38
|
+
});
|
|
39
|
+
module.exports = __toCommonJS(bin_exports);
|
|
28
40
|
var import_commander4 = require("commander");
|
|
29
41
|
|
|
30
|
-
// src/commands/push.command.ts
|
|
42
|
+
// src/cli/commands/push.command.ts
|
|
31
43
|
var import_commander = require("commander");
|
|
32
44
|
|
|
33
45
|
// src/transpiler.ts
|
|
@@ -236,7 +248,7 @@ ${options ? Object.entries(options).map(([key, value]) => ` ${key}: ${value}`
|
|
|
236
248
|
`;
|
|
237
249
|
}
|
|
238
250
|
|
|
239
|
-
// src/commands/push.command.ts
|
|
251
|
+
// src/cli/commands/push.command.ts
|
|
240
252
|
var import_promises2 = __toESM(require("fs/promises"), 1);
|
|
241
253
|
var import_node_readline = __toESM(require("readline"), 1);
|
|
242
254
|
async function askConfirmation(message) {
|
|
@@ -327,7 +339,7 @@ var loadPushCommand = (program2) => {
|
|
|
327
339
|
program2.addCommand(push);
|
|
328
340
|
};
|
|
329
341
|
|
|
330
|
-
// src/commands/pull.command.ts
|
|
342
|
+
// src/cli/commands/pull.command.ts
|
|
331
343
|
var import_commander2 = require("commander");
|
|
332
344
|
var fs3 = __toESM(require("fs/promises"), 1);
|
|
333
345
|
var path2 = __toESM(require("path"), 1);
|
|
@@ -338,8 +350,8 @@ var loadPullCommand = (program2) => {
|
|
|
338
350
|
).option("--version <version>", "The version to pull, default: latest", "latest").option("--output <path>", "Output file path (optional, defaults to <slug>.prompt.ts)").action(async (slug, options) => {
|
|
339
351
|
try {
|
|
340
352
|
console.log(`Pulling prompt: ${slug} (version: ${options.version})`);
|
|
341
|
-
const
|
|
342
|
-
const response = await fetch(
|
|
353
|
+
const url2 = `${process.env.AXIOM_URL}/v1/prompts/${slug}`;
|
|
354
|
+
const response = await fetch(url2, {
|
|
343
355
|
method: "GET",
|
|
344
356
|
headers: {
|
|
345
357
|
Authorization: `Bearer ${process.env.AXIOM_TOKEN}`,
|
|
@@ -377,50 +389,502 @@ var loadPullCommand = (program2) => {
|
|
|
377
389
|
program2.addCommand(pull);
|
|
378
390
|
};
|
|
379
391
|
|
|
380
|
-
// src/commands/eval.command.ts
|
|
392
|
+
// src/cli/commands/eval.command.ts
|
|
381
393
|
var import_commander3 = require("commander");
|
|
382
394
|
|
|
383
395
|
// src/evals/run-vitest.ts
|
|
384
396
|
var import_node = require("vitest/node");
|
|
385
397
|
|
|
398
|
+
// ../../node_modules/.pnpm/tinyrainbow@2.0.0/node_modules/tinyrainbow/dist/chunk-BVHSVHOK.js
|
|
399
|
+
var f = {
|
|
400
|
+
reset: [0, 0],
|
|
401
|
+
bold: [1, 22, "\x1B[22m\x1B[1m"],
|
|
402
|
+
dim: [2, 22, "\x1B[22m\x1B[2m"],
|
|
403
|
+
italic: [3, 23],
|
|
404
|
+
underline: [4, 24],
|
|
405
|
+
inverse: [7, 27],
|
|
406
|
+
hidden: [8, 28],
|
|
407
|
+
strikethrough: [9, 29],
|
|
408
|
+
black: [30, 39],
|
|
409
|
+
red: [31, 39],
|
|
410
|
+
green: [32, 39],
|
|
411
|
+
yellow: [33, 39],
|
|
412
|
+
blue: [34, 39],
|
|
413
|
+
magenta: [35, 39],
|
|
414
|
+
cyan: [36, 39],
|
|
415
|
+
white: [37, 39],
|
|
416
|
+
gray: [90, 39],
|
|
417
|
+
bgBlack: [40, 49],
|
|
418
|
+
bgRed: [41, 49],
|
|
419
|
+
bgGreen: [42, 49],
|
|
420
|
+
bgYellow: [43, 49],
|
|
421
|
+
bgBlue: [44, 49],
|
|
422
|
+
bgMagenta: [45, 49],
|
|
423
|
+
bgCyan: [46, 49],
|
|
424
|
+
bgWhite: [47, 49],
|
|
425
|
+
blackBright: [90, 39],
|
|
426
|
+
redBright: [91, 39],
|
|
427
|
+
greenBright: [92, 39],
|
|
428
|
+
yellowBright: [93, 39],
|
|
429
|
+
blueBright: [94, 39],
|
|
430
|
+
magentaBright: [95, 39],
|
|
431
|
+
cyanBright: [96, 39],
|
|
432
|
+
whiteBright: [97, 39],
|
|
433
|
+
bgBlackBright: [100, 49],
|
|
434
|
+
bgRedBright: [101, 49],
|
|
435
|
+
bgGreenBright: [102, 49],
|
|
436
|
+
bgYellowBright: [103, 49],
|
|
437
|
+
bgBlueBright: [104, 49],
|
|
438
|
+
bgMagentaBright: [105, 49],
|
|
439
|
+
bgCyanBright: [106, 49],
|
|
440
|
+
bgWhiteBright: [107, 49]
|
|
441
|
+
};
|
|
442
|
+
var h = Object.entries(f);
|
|
443
|
+
function a(n) {
|
|
444
|
+
return String(n);
|
|
445
|
+
}
|
|
446
|
+
a.open = "";
|
|
447
|
+
a.close = "";
|
|
448
|
+
function C(n = false) {
|
|
449
|
+
let e = typeof process != "undefined" ? process : void 0, i = (e == null ? void 0 : e.env) || {}, g = (e == null ? void 0 : e.argv) || [];
|
|
450
|
+
return !("NO_COLOR" in i || g.includes("--no-color")) && ("FORCE_COLOR" in i || g.includes("--color") || (e == null ? void 0 : e.platform) === "win32" || n && i.TERM !== "dumb" || "CI" in i) || typeof window != "undefined" && !!window.chrome;
|
|
451
|
+
}
|
|
452
|
+
function p(n = false) {
|
|
453
|
+
let e = C(n), i = (r2, t, c, o) => {
|
|
454
|
+
let l = "", s2 = 0;
|
|
455
|
+
do
|
|
456
|
+
l += r2.substring(s2, o) + c, s2 = o + t.length, o = r2.indexOf(t, s2);
|
|
457
|
+
while (~o);
|
|
458
|
+
return l + r2.substring(s2);
|
|
459
|
+
}, g = (r2, t, c = r2) => {
|
|
460
|
+
let o = (l) => {
|
|
461
|
+
let s2 = String(l), b = s2.indexOf(t, r2.length);
|
|
462
|
+
return ~b ? r2 + i(s2, t, c, b) + t : r2 + s2 + t;
|
|
463
|
+
};
|
|
464
|
+
return o.open = r2, o.close = t, o;
|
|
465
|
+
}, u2 = {
|
|
466
|
+
isColorSupported: e
|
|
467
|
+
}, d = (r2) => `\x1B[${r2}m`;
|
|
468
|
+
for (let [r2, t] of h)
|
|
469
|
+
u2[r2] = e ? g(
|
|
470
|
+
d(t[0]),
|
|
471
|
+
d(t[1]),
|
|
472
|
+
t[2]
|
|
473
|
+
) : a;
|
|
474
|
+
return u2;
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
// ../../node_modules/.pnpm/tinyrainbow@2.0.0/node_modules/tinyrainbow/dist/node.js
|
|
478
|
+
var import_tty = require("tty");
|
|
479
|
+
var r = process.env.FORCE_TTY !== void 0 || (0, import_tty.isatty)(1);
|
|
480
|
+
var u = p(r);
|
|
481
|
+
|
|
482
|
+
// src/evals/eval.service.ts
|
|
483
|
+
var datasetName = process.env.AXIOM_DATASET ?? "";
|
|
484
|
+
var url = process.env.AXIOM_URL ?? "https://api.axiom.co";
|
|
485
|
+
var token = process.env.AXIOM_TOKEN;
|
|
486
|
+
var findEvaluationCases = async (evalId) => {
|
|
487
|
+
try {
|
|
488
|
+
const apl = `['${datasetName}'] | where trace_id == "${evalId}" | order by _time`;
|
|
489
|
+
const headers = new Headers({
|
|
490
|
+
Authorization: `Bearer ${token}`,
|
|
491
|
+
"Content-Type": "application/json"
|
|
492
|
+
});
|
|
493
|
+
const resp = await fetch(`${url}/v1/datasets/_apl?format=legacy`, {
|
|
494
|
+
headers,
|
|
495
|
+
method: "POST",
|
|
496
|
+
body: JSON.stringify({ apl })
|
|
497
|
+
});
|
|
498
|
+
const payload = await resp.json();
|
|
499
|
+
if (!resp.ok) {
|
|
500
|
+
console.log(payload);
|
|
501
|
+
return void 0;
|
|
502
|
+
}
|
|
503
|
+
if (payload.matches.length) {
|
|
504
|
+
return buildSpanTree(payload.matches);
|
|
505
|
+
}
|
|
506
|
+
} catch (err) {
|
|
507
|
+
console.log(err);
|
|
508
|
+
return void 0;
|
|
509
|
+
}
|
|
510
|
+
};
|
|
511
|
+
var mapSpanToEval = (span) => {
|
|
512
|
+
return {
|
|
513
|
+
id: span.data.attributes.custom["eval.id"],
|
|
514
|
+
name: span.data.attributes.custom["eval.name"],
|
|
515
|
+
type: span.data.attributes.custom["eval.type"],
|
|
516
|
+
version: span.data.attributes.custom["eval.version"],
|
|
517
|
+
collection: {
|
|
518
|
+
name: span.data.attributes.custom["eval.collection.name"],
|
|
519
|
+
size: span.data.attributes.custom["eval.collection.size"]
|
|
520
|
+
},
|
|
521
|
+
baseline: {
|
|
522
|
+
id: span.data.attributes.custom["eval.baseline.id"],
|
|
523
|
+
name: span.data.attributes.custom["eval.baseline.name"]
|
|
524
|
+
},
|
|
525
|
+
prompt: {
|
|
526
|
+
model: span.data.attributes.custom["eval.prompt.model"],
|
|
527
|
+
params: span.data.attributes.custom["eval.prompt.params"]
|
|
528
|
+
},
|
|
529
|
+
duration: span.data.duration,
|
|
530
|
+
status: span.data.status.code,
|
|
531
|
+
traceId: span.data.trace_id,
|
|
532
|
+
runAt: span._time,
|
|
533
|
+
tags: span.data.attributes.custom["eval.tags"].length ? JSON.parse(span.data.attributes.custom["eval.tags"]) : [],
|
|
534
|
+
user: {
|
|
535
|
+
name: span.data.attributes.custom["eval.user.name"],
|
|
536
|
+
email: span.data.attributes.custom["eval.user.email"]
|
|
537
|
+
},
|
|
538
|
+
cases: []
|
|
539
|
+
};
|
|
540
|
+
};
|
|
541
|
+
var mapSpanToCase = (item) => {
|
|
542
|
+
const data = item.data;
|
|
543
|
+
const d = data.duration;
|
|
544
|
+
let duration = "-";
|
|
545
|
+
if (d.endsWith("s")) {
|
|
546
|
+
duration = `${Number(d.replace("s", "")).toFixed(2)}s`;
|
|
547
|
+
} else {
|
|
548
|
+
duration = d;
|
|
549
|
+
}
|
|
550
|
+
return {
|
|
551
|
+
index: data.attributes.custom["eval.case.index"],
|
|
552
|
+
input: data.attributes.custom["eval.case.input"],
|
|
553
|
+
output: data.attributes.custom["eval.case.output"],
|
|
554
|
+
expected: data.attributes.custom["eval.case.expected"],
|
|
555
|
+
duration,
|
|
556
|
+
status: data.status.code,
|
|
557
|
+
scores: data.attributes.custom["eval.case.scores"] ? JSON.parse(data.attributes.custom["eval.case.scores"]) : {},
|
|
558
|
+
runAt: item._time,
|
|
559
|
+
spanId: data.span_id,
|
|
560
|
+
traceId: data.trace_id
|
|
561
|
+
};
|
|
562
|
+
};
|
|
563
|
+
var buildSpanTree = (spans) => {
|
|
564
|
+
if (!spans.length) {
|
|
565
|
+
return null;
|
|
566
|
+
}
|
|
567
|
+
const evalSpan = spans.find((span) => span.data.attributes.gen_ai.operation.name === "eval");
|
|
568
|
+
if (!evalSpan) {
|
|
569
|
+
return null;
|
|
570
|
+
}
|
|
571
|
+
const rootSpan = mapSpanToEval(evalSpan);
|
|
572
|
+
const caseSpans = spans.filter((span) => span.data.name.startsWith("case"));
|
|
573
|
+
for (const caseSpan of caseSpans) {
|
|
574
|
+
const caseData = mapSpanToCase(caseSpan);
|
|
575
|
+
const taskSpans = spans.filter(
|
|
576
|
+
(span) => span.data.name.startsWith("task") && span.data.parent_span_id === caseSpan.data.span_id
|
|
577
|
+
);
|
|
578
|
+
if (taskSpans.length > 0) {
|
|
579
|
+
const taskSpan = taskSpans[0];
|
|
580
|
+
const chatSpans = spans.filter(
|
|
581
|
+
(span) => span.data.name.startsWith("chat") && span.data.parent_span_id === taskSpan.data.span_id
|
|
582
|
+
);
|
|
583
|
+
const chatData = chatSpans.map((chatSpan) => ({
|
|
584
|
+
operation: chatSpan.data.attributes.custom?.operation || "",
|
|
585
|
+
capability: chatSpan.data.attributes.custom?.capability || "",
|
|
586
|
+
step: chatSpan.data.attributes.custom?.step || "",
|
|
587
|
+
request: {
|
|
588
|
+
max_token: chatSpan.data.attributes.custom?.["request.max_token"] || "",
|
|
589
|
+
model: chatSpan.data.attributes.custom?.["request.model"] || "",
|
|
590
|
+
temperature: chatSpan.data.attributes.custom?.["request.temperature"] || 0
|
|
591
|
+
},
|
|
592
|
+
response: {
|
|
593
|
+
finish_reasons: chatSpan.data.attributes.custom?.["response.finish_reasons"] || ""
|
|
594
|
+
},
|
|
595
|
+
usage: {
|
|
596
|
+
input_tokens: chatSpan.data.attributes.gen_ai?.usage?.input_tokens || 0,
|
|
597
|
+
output_tokens: chatSpan.data.attributes.gen_ai?.usage?.output_tokens || 0
|
|
598
|
+
}
|
|
599
|
+
}));
|
|
600
|
+
const taskData = {
|
|
601
|
+
name: taskSpan.data.name,
|
|
602
|
+
output: taskSpan.data.attributes.custom?.output || "",
|
|
603
|
+
trial: taskSpan.data.attributes.custom?.trial || 0,
|
|
604
|
+
type: taskSpan.data.attributes.custom?.type || "",
|
|
605
|
+
error: taskSpan.data.attributes.custom?.error,
|
|
606
|
+
chat: chatData[0] || {
|
|
607
|
+
operation: "",
|
|
608
|
+
capability: "",
|
|
609
|
+
step: "",
|
|
610
|
+
request: { max_token: "", model: "", temperature: 0 },
|
|
611
|
+
response: { finish_reasons: "" },
|
|
612
|
+
usage: { input_tokens: 0, output_tokens: 0 }
|
|
613
|
+
}
|
|
614
|
+
};
|
|
615
|
+
caseData.task = taskData;
|
|
616
|
+
}
|
|
617
|
+
const scoreSpans = spans.filter(
|
|
618
|
+
(span) => span.data.attributes.gen_ai.operation.name === "eval.score" && span.data.parent_span_id === caseSpan.data.span_id
|
|
619
|
+
);
|
|
620
|
+
caseData.scores = {};
|
|
621
|
+
scoreSpans.forEach((score) => {
|
|
622
|
+
const name = score.data.attributes.custom["eval.score.name"];
|
|
623
|
+
caseData.scores[name] = {
|
|
624
|
+
name,
|
|
625
|
+
value: score.data.attributes.custom["eval.score.value"],
|
|
626
|
+
metadata: {
|
|
627
|
+
error: score.data.attributes.error
|
|
628
|
+
}
|
|
629
|
+
};
|
|
630
|
+
});
|
|
631
|
+
rootSpan.cases.push(caseData);
|
|
632
|
+
}
|
|
633
|
+
rootSpan.cases.sort((a2, b) => a2.index - b.index);
|
|
634
|
+
return rootSpan;
|
|
635
|
+
};
|
|
636
|
+
|
|
386
637
|
// src/evals/reporter.ts
|
|
387
|
-
var import_console_table_printer = require("console-table-printer");
|
|
388
|
-
var prRed = (s) => `\x1B[91m ${s}\x1B[00m`;
|
|
389
638
|
var AxiomReporter = class {
|
|
390
|
-
|
|
639
|
+
constructor() {
|
|
640
|
+
__publicField(this, "baseline");
|
|
641
|
+
__publicField(this, "startTime", 0);
|
|
642
|
+
__publicField(this, "start", 0);
|
|
643
|
+
}
|
|
644
|
+
onTestRunStart() {
|
|
645
|
+
this.start = performance.now();
|
|
646
|
+
this.startTime = (/* @__PURE__ */ new Date()).getTime();
|
|
647
|
+
}
|
|
648
|
+
async onTestSuiteReady(_testSuite) {
|
|
649
|
+
const meta = _testSuite.meta();
|
|
650
|
+
const baseline = meta.evaluation.baseline;
|
|
651
|
+
if (baseline) {
|
|
652
|
+
this.baseline = await findEvaluationCases(baseline.id);
|
|
653
|
+
}
|
|
654
|
+
const cwd = process.cwd();
|
|
655
|
+
console.log(
|
|
656
|
+
" ",
|
|
657
|
+
u.bgCyan(u.black(` ${_testSuite.project.name} `)),
|
|
658
|
+
u.bgBlue(u.black(` ${meta.evaluation.name}-${meta.evaluation.version} `)),
|
|
659
|
+
u.dim(`(${_testSuite.children.size} cases)`)
|
|
660
|
+
);
|
|
661
|
+
console.log(" ", u.dim(_testSuite.module.moduleId.replace(cwd, "")));
|
|
662
|
+
if (meta.evaluation.baseline) {
|
|
663
|
+
console.log(
|
|
664
|
+
" ",
|
|
665
|
+
" baseline ",
|
|
666
|
+
u.bgMagenta(
|
|
667
|
+
u.black(` ${meta.evaluation.baseline.name}-${meta.evaluation.baseline.version} `)
|
|
668
|
+
)
|
|
669
|
+
);
|
|
670
|
+
} else {
|
|
671
|
+
console.log(" ", u.bgWhite(u.blackBright(" baseline: ")), "none");
|
|
672
|
+
}
|
|
673
|
+
console.log("");
|
|
674
|
+
}
|
|
675
|
+
onTestCaseReady(test) {
|
|
676
|
+
const meta = test.meta();
|
|
677
|
+
console.log(u.blue(` \u2713 evaluating case ${meta.case.index}`));
|
|
391
678
|
}
|
|
392
679
|
onTestSuiteResult(testSuite) {
|
|
393
|
-
const
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
const v = testMeta.eval.scores[k].score ? testMeta.eval.scores[k].score : 0;
|
|
404
|
-
const scoreValue = Number(v * 100).toFixed(2);
|
|
405
|
-
const score = testMeta.eval.threshold && v < testMeta.eval.threshold ? prRed(scoreValue + "%") : scoreValue + "%";
|
|
406
|
-
return [k, score];
|
|
407
|
-
});
|
|
408
|
-
scoreboard.addRow({
|
|
409
|
-
case: testMeta.eval.index.toString(),
|
|
410
|
-
...Object.fromEntries(scores)
|
|
411
|
-
});
|
|
680
|
+
const duration = Number((performance.now() - this.start) / 1e3).toFixed(2);
|
|
681
|
+
console.log(" ");
|
|
682
|
+
console.log(" ", u.dim("Cases"), testSuite.children.size);
|
|
683
|
+
console.log(" ", u.dim("Start at"), new Date(this.startTime).toTimeString());
|
|
684
|
+
console.log(" ", u.dim("Duration"), `${duration}s`);
|
|
685
|
+
const meta = testSuite.meta();
|
|
686
|
+
const url2 = `https://app.axiom.co/evaluations/${meta.evaluation.name}/${meta.evaluation.id}`;
|
|
687
|
+
for (const test of testSuite.children) {
|
|
688
|
+
if (test.type !== "test") return;
|
|
689
|
+
this.printCaseResult(test);
|
|
412
690
|
}
|
|
413
|
-
|
|
691
|
+
console.log("");
|
|
692
|
+
console.log(
|
|
693
|
+
" ",
|
|
694
|
+
`see results for ${meta.evaluation.name}-${meta.evaluation.version} at ${url2}`
|
|
695
|
+
);
|
|
696
|
+
console.log(
|
|
697
|
+
" ",
|
|
698
|
+
u.cyanBright("=== === === === === === === === === === === === === === === ===")
|
|
699
|
+
);
|
|
700
|
+
console.log("");
|
|
414
701
|
}
|
|
415
702
|
async onTestRunEnd(_testModules, _errors, _reason) {
|
|
416
703
|
}
|
|
704
|
+
printCaseResult(test) {
|
|
705
|
+
const ok = test.ok();
|
|
706
|
+
const testMeta = test.meta();
|
|
707
|
+
if (!testMeta || !testMeta.case) {
|
|
708
|
+
return;
|
|
709
|
+
}
|
|
710
|
+
const index = testMeta.case.index;
|
|
711
|
+
if (ok) {
|
|
712
|
+
console.log(" ", u.yellow(` \u2714 case ${index}:`));
|
|
713
|
+
} else {
|
|
714
|
+
console.log(" ", u.red(` \u2716 case ${index}: failed`));
|
|
715
|
+
for (const e of testMeta.case.errors ?? []) {
|
|
716
|
+
console.log("", e.message);
|
|
717
|
+
}
|
|
718
|
+
}
|
|
719
|
+
Object.keys(testMeta.case.scores).forEach((k) => {
|
|
720
|
+
const v = testMeta.case.scores[k].score ? testMeta.case.scores[k].score : 0;
|
|
721
|
+
const scoreValue = Number(v * 100).toFixed(2) + "%";
|
|
722
|
+
if (this.baseline && this.baseline.cases[index] && this.baseline.cases[index].scores[k]) {
|
|
723
|
+
const baselineScoreValue = this.baseline.cases[index].scores[k].value;
|
|
724
|
+
const diff = v - baselineScoreValue;
|
|
725
|
+
const diffText = Number(diff * 100).toFixed(2) + "%";
|
|
726
|
+
const blScoreText = Number(baselineScoreValue * 100).toFixed(2) + "%";
|
|
727
|
+
console.log(
|
|
728
|
+
" ",
|
|
729
|
+
k,
|
|
730
|
+
u.magentaBright(blScoreText),
|
|
731
|
+
"->",
|
|
732
|
+
u.blueBright(scoreValue),
|
|
733
|
+
diff > 0 ? u.green("+" + diffText) : u.red(diffText)
|
|
734
|
+
);
|
|
735
|
+
} else {
|
|
736
|
+
console.log(" ", k, u.blueBright(scoreValue));
|
|
737
|
+
}
|
|
738
|
+
return [k, scoreValue];
|
|
739
|
+
});
|
|
740
|
+
}
|
|
417
741
|
};
|
|
418
742
|
|
|
419
743
|
// src/evals/instrument.ts
|
|
420
744
|
var import_sdk_trace_node = require("@opentelemetry/sdk-trace-node");
|
|
421
745
|
var import_resources = require("@opentelemetry/resources");
|
|
422
746
|
var import_exporter_trace_otlp_http = require("@opentelemetry/exporter-trace-otlp-http");
|
|
747
|
+
var import_api2 = require("@opentelemetry/api");
|
|
748
|
+
|
|
749
|
+
// src/otel/initAxiomAI.ts
|
|
423
750
|
var import_api = require("@opentelemetry/api");
|
|
751
|
+
|
|
752
|
+
// package.json
|
|
753
|
+
var package_default = {
|
|
754
|
+
name: "axiom",
|
|
755
|
+
version: "0.14.0",
|
|
756
|
+
type: "module",
|
|
757
|
+
author: "Axiom, Inc.",
|
|
758
|
+
contributors: [
|
|
759
|
+
"Islam Shehata <islam@axiom.co>",
|
|
760
|
+
"Chris Ehrlich <chris@axiom.co>",
|
|
761
|
+
"Gabriel de Andrade <gabriel@axiom.co>"
|
|
762
|
+
],
|
|
763
|
+
scripts: {
|
|
764
|
+
dev: "tsup --watch",
|
|
765
|
+
build: "tsup && chmod +x dist/bin.js",
|
|
766
|
+
format: "prettier --write .",
|
|
767
|
+
"format:check": "prettier --check .",
|
|
768
|
+
lint: "eslint './**/*.{js,ts}'",
|
|
769
|
+
typecheck: "tsc --noEmit",
|
|
770
|
+
test: "vitest run",
|
|
771
|
+
"test:watch": "vitest --watch",
|
|
772
|
+
publint: "npx publint"
|
|
773
|
+
},
|
|
774
|
+
types: "./dist/index.d.ts",
|
|
775
|
+
main: "./dist/index.cjs",
|
|
776
|
+
module: "./dist/index.js",
|
|
777
|
+
bin: {
|
|
778
|
+
axiom: "./dist/bin.js"
|
|
779
|
+
},
|
|
780
|
+
exports: {
|
|
781
|
+
"./ai": {
|
|
782
|
+
import: {
|
|
783
|
+
types: "./dist/index.d.ts",
|
|
784
|
+
default: "./dist/index.js"
|
|
785
|
+
},
|
|
786
|
+
require: {
|
|
787
|
+
types: "./dist/index.d.cts",
|
|
788
|
+
default: "./dist/index.cjs"
|
|
789
|
+
}
|
|
790
|
+
},
|
|
791
|
+
"./ai/evals": {
|
|
792
|
+
import: {
|
|
793
|
+
types: "./dist/evals.d.ts",
|
|
794
|
+
default: "./dist/evals.js"
|
|
795
|
+
},
|
|
796
|
+
require: {
|
|
797
|
+
types: "./dist/evals.d.cts",
|
|
798
|
+
default: "./dist/evals.cjs"
|
|
799
|
+
}
|
|
800
|
+
}
|
|
801
|
+
},
|
|
802
|
+
keywords: [
|
|
803
|
+
"axiom",
|
|
804
|
+
"logging",
|
|
805
|
+
"ai",
|
|
806
|
+
"otel",
|
|
807
|
+
"opentelemetry"
|
|
808
|
+
],
|
|
809
|
+
repository: {
|
|
810
|
+
type: "git",
|
|
811
|
+
url: "git+https://github.com/axiomhq/ai.git",
|
|
812
|
+
directory: "packages/ai"
|
|
813
|
+
},
|
|
814
|
+
license: "MIT",
|
|
815
|
+
dependencies: {
|
|
816
|
+
"@next/env": "^15.4.2",
|
|
817
|
+
"@opentelemetry/auto-instrumentations-node": "^0.60.1",
|
|
818
|
+
"@opentelemetry/context-async-hooks": "^2.0.1",
|
|
819
|
+
"@opentelemetry/exporter-trace-otlp-http": "^0.202.0",
|
|
820
|
+
"@opentelemetry/resources": "^2.0.1",
|
|
821
|
+
"@opentelemetry/sdk-node": "^0.202.0",
|
|
822
|
+
"@opentelemetry/sdk-trace-node": "^2.0.1",
|
|
823
|
+
"@opentelemetry/semantic-conventions": "^1.36.0",
|
|
824
|
+
"@sinclair/typebox": "^0.34.37",
|
|
825
|
+
commander: "^14.0.0",
|
|
826
|
+
"console-table-printer": "^2.14.6",
|
|
827
|
+
esbuild: "^0.25.8",
|
|
828
|
+
handlebars: "^4.7.8",
|
|
829
|
+
nanoid: "^5.1.5",
|
|
830
|
+
vitest: "catalog:",
|
|
831
|
+
zod: "catalog:"
|
|
832
|
+
},
|
|
833
|
+
peerDependencies: {
|
|
834
|
+
"@opentelemetry/api": "^1.9.0"
|
|
835
|
+
},
|
|
836
|
+
devDependencies: {
|
|
837
|
+
"@ai-sdk/anthropicv1": "npm:@ai-sdk/anthropic@^1.2.12",
|
|
838
|
+
"@ai-sdk/anthropicv2": "npm:@ai-sdk/anthropic@2.0.0-beta.9",
|
|
839
|
+
"@ai-sdk/openaiv1": "npm:@ai-sdk/openai@^1.3.23",
|
|
840
|
+
"@ai-sdk/openaiv2": "npm:@ai-sdk/openai@2.0.0-beta.12",
|
|
841
|
+
"@ai-sdk/providerv1": "npm:@ai-sdk/provider@^1.1.3",
|
|
842
|
+
"@ai-sdk/providerv2": "npm:@ai-sdk/provider@2.0.0-beta.1",
|
|
843
|
+
"@opentelemetry/api": "^1.9.0",
|
|
844
|
+
"@opentelemetry/core": "^2.0.1",
|
|
845
|
+
"@opentelemetry/sdk-trace-base": "^2.0.1",
|
|
846
|
+
"@opentelemetry/sdk-trace-node": "^2.0.1",
|
|
847
|
+
"@repo/eslint-config": "workspace:*",
|
|
848
|
+
"@types/node": "^22.15.29",
|
|
849
|
+
"@vitest/coverage-v8": "^3.2.4",
|
|
850
|
+
aiv4: "npm:ai@^4.3.19",
|
|
851
|
+
aiv5: "npm:ai@^5.0.0",
|
|
852
|
+
eslint: "catalog:",
|
|
853
|
+
prettier: "catalog:",
|
|
854
|
+
tinyrainbow: "^2.0.0",
|
|
855
|
+
tsup: "catalog:",
|
|
856
|
+
typescript: "catalog:",
|
|
857
|
+
vitest: "catalog:"
|
|
858
|
+
},
|
|
859
|
+
files: [
|
|
860
|
+
"dist"
|
|
861
|
+
],
|
|
862
|
+
packageManager: "pnpm@10.11.1"
|
|
863
|
+
};
|
|
864
|
+
|
|
865
|
+
// src/otel/initAxiomAI.ts
|
|
866
|
+
var AXIOM_AI_SCOPE_KEY = Symbol.for("__axiom_ai_scope__");
|
|
867
|
+
function extractTracerScope(tracer2) {
|
|
868
|
+
const tracerAny = tracer2;
|
|
869
|
+
const name = tracerAny._instrumentationScope?.name || tracerAny.instrumentationLibrary?.name || package_default.name;
|
|
870
|
+
const version = tracerAny._instrumentationScope?.version || tracerAny.instrumentationLibrary?.version || package_default.version;
|
|
871
|
+
return { name, version };
|
|
872
|
+
}
|
|
873
|
+
function initAxiomAI(config) {
|
|
874
|
+
const newScope = extractTracerScope(config.tracer);
|
|
875
|
+
const existingScope = globalThis[AXIOM_AI_SCOPE_KEY];
|
|
876
|
+
if (existingScope && existingScope.name === newScope.name && existingScope.version === newScope.version) {
|
|
877
|
+
return;
|
|
878
|
+
}
|
|
879
|
+
if (existingScope) {
|
|
880
|
+
console.warn(
|
|
881
|
+
`[AxiomAI] initAxiomAI() called multiple times with different scopes. Previous: ${existingScope.name}@${existingScope.version}, New: ${newScope.name}@${newScope.version}`
|
|
882
|
+
);
|
|
883
|
+
}
|
|
884
|
+
globalThis[AXIOM_AI_SCOPE_KEY] = newScope;
|
|
885
|
+
}
|
|
886
|
+
|
|
887
|
+
// src/evals/instrument.ts
|
|
424
888
|
var collectorOptions = {
|
|
425
889
|
url: process.env.AXIOM_URL ? `${process.env.AXIOM_URL}/v1/traces` : "https://api.axiom.co/v1/traces",
|
|
426
890
|
// Axiom API endpoint for trace data
|
|
@@ -442,26 +906,38 @@ var processor = new import_sdk_trace_node.BatchSpanProcessor(exporter, {
|
|
|
442
906
|
});
|
|
443
907
|
var provider = new import_sdk_trace_node.NodeTracerProvider({
|
|
444
908
|
resource: (0, import_resources.resourceFromAttributes)({
|
|
445
|
-
["service.name"]: "axiom
|
|
446
|
-
["service.version"]: "0.
|
|
909
|
+
["service.name"]: "axiom",
|
|
910
|
+
["service.version"]: "0.14.0"
|
|
447
911
|
}),
|
|
448
912
|
spanProcessors: [processor]
|
|
449
913
|
});
|
|
450
914
|
provider.register();
|
|
451
|
-
var tracer =
|
|
915
|
+
var tracer = import_api2.trace.getTracer("axiom", "0.14.0");
|
|
452
916
|
var flush = async () => {
|
|
453
917
|
await provider.forceFlush();
|
|
454
918
|
};
|
|
919
|
+
initAxiomAI({ tracer });
|
|
455
920
|
|
|
456
921
|
// src/evals/run-vitest.ts
|
|
457
|
-
var
|
|
922
|
+
var DEFAULT_TIMEOUT = 1e4;
|
|
923
|
+
var runVitest = async (dir, opts) => {
|
|
458
924
|
const vi = await (0, import_node.createVitest)("test", {
|
|
459
|
-
|
|
925
|
+
root: dir ? dir : process.cwd(),
|
|
460
926
|
mode: "test",
|
|
461
|
-
include: [
|
|
462
|
-
reporters: [
|
|
927
|
+
include: ["**/*.eval.ts"],
|
|
928
|
+
reporters: [new AxiomReporter()],
|
|
463
929
|
environment: "node",
|
|
464
|
-
browser: void 0
|
|
930
|
+
browser: void 0,
|
|
931
|
+
watch: opts.watch,
|
|
932
|
+
name: "axiom:eval",
|
|
933
|
+
printConsoleTrace: true,
|
|
934
|
+
silent: false,
|
|
935
|
+
disableConsoleIntercept: true,
|
|
936
|
+
testTimeout: DEFAULT_TIMEOUT,
|
|
937
|
+
globals: true,
|
|
938
|
+
provide: {
|
|
939
|
+
baseline: opts.baseline
|
|
940
|
+
}
|
|
465
941
|
});
|
|
466
942
|
await vi.start();
|
|
467
943
|
const dispose = (0, import_node.registerConsoleShortcuts)(vi, process.stdin, process.stdout);
|
|
@@ -474,25 +950,39 @@ var runVitest = async (file) => {
|
|
|
474
950
|
await flush();
|
|
475
951
|
};
|
|
476
952
|
|
|
477
|
-
// src/commands/eval.command.ts
|
|
478
|
-
var
|
|
953
|
+
// src/cli/commands/eval.command.ts
|
|
954
|
+
var loadEvalCommand = (program2) => {
|
|
479
955
|
return program2.addCommand(
|
|
480
|
-
new import_commander3.Command("eval").description("run evals locally").
|
|
481
|
-
|
|
482
|
-
|
|
956
|
+
new import_commander3.Command("eval").description("run evals locally").addArgument(
|
|
957
|
+
new import_commander3.Argument("[dir]", "path of base directory to scan for *.eval.ts files").default(
|
|
958
|
+
".",
|
|
959
|
+
"any eval file in current directory"
|
|
960
|
+
)
|
|
961
|
+
).option("-w, --watch true", "keep server running and watch for changes", false).option("-t, --token <TOKEN>", "axiom token", process.env.AXIOM_TOKEN).option("-d, --dataset <DATASET>", "axiom dataset name", process.env.AXIOM_DATASET).option("-u, --url <AXIOM URL>", "axiom url", process.env.AXIOM_URL ?? "https://api.axiom.co").option("-b, --baseline <BASELINE ID>", "id of baseline evaluation to compare against").action(async (dir, options) => {
|
|
962
|
+
if (!options.token || !options.dataset) {
|
|
963
|
+
throw new Error("AXIOM_TOKEN, and AXIOM_DATASET must be set");
|
|
483
964
|
}
|
|
484
|
-
await runVitest(
|
|
965
|
+
await runVitest(dir, {
|
|
966
|
+
watch: options.watch,
|
|
967
|
+
baseline: options.baseline
|
|
968
|
+
});
|
|
485
969
|
})
|
|
486
970
|
);
|
|
487
971
|
};
|
|
488
972
|
|
|
489
973
|
// src/bin.ts
|
|
974
|
+
var import_env = __toESM(require("@next/env"), 1);
|
|
490
975
|
var { loadEnvConfig } = import_env.default;
|
|
491
976
|
loadEnvConfig(process.cwd());
|
|
492
977
|
var program = new import_commander4.Command();
|
|
493
|
-
program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.
|
|
978
|
+
program.name("axiom").description("Axiom's CLI to manage your objects and run evals").version("0.14.0");
|
|
494
979
|
loadPushCommand(program);
|
|
495
980
|
loadPullCommand(program);
|
|
496
|
-
|
|
981
|
+
var evalcmd = loadEvalCommand(program);
|
|
497
982
|
program.parse();
|
|
983
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
984
|
+
0 && (module.exports = {
|
|
985
|
+
evalcmd,
|
|
986
|
+
program
|
|
987
|
+
});
|
|
498
988
|
//# sourceMappingURL=bin.cjs.map
|