@ls-stack/agent-eval 0.36.0 → 0.37.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-BlNzXWDM.mjs → app-C7ON9Wdh.mjs} +39 -4
- package/dist/apps/web/dist/assets/index-BiwYbMem.js +140 -0
- package/dist/apps/web/dist/assets/{index-D0rC5MSS.css → index-CKdoOah2.css} +1 -1
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/{cli-Dg3abrOv.mjs → cli-CwGcJYWe.mjs} +57 -8
- package/dist/index.d.mts +41 -35
- package/dist/index.mjs +3 -3
- package/dist/runChild.mjs +44 -2
- package/dist/{runOrchestration-V1TxX8es.mjs → runOrchestration-C4o5TcIu.mjs} +42 -7
- package/dist/{runner-BCs5rzej.mjs → runner-BTH8m_Er.mjs} +2 -2
- package/dist/{runner-znY6PY1M.mjs → runner-LqeHPID6.mjs} +1 -1
- package/dist/src--13_4uDG.mjs +3 -0
- package/package.json +3 -3
- package/skills/agent-eval/SKILL.md +9 -1
- package/dist/apps/web/dist/assets/index-BYtcGddU.js +0 -140
- package/dist/src-DBypR4TV.mjs +0 -3
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { C as
|
|
1
|
+
import { C as parseEvalDiscovery, D as normalizeScoreDef, E as buildDeclaredColumnDefs, G as deriveScopedSummaryFromCases, O as validateCharts, S as loadEvalModule, St as getCaseRowEvalKey, T as loadConfig, U as getEvalTitle, V as applyDerivedCallAttributes, W as getEvalDisplayStatus, Y as runSummarySchema, _ as resolveTracePresentation, a as generateRunId, b as parseManualInputValues, bt as buildEvalKey, c as loadPersistedRunSnapshot, d as persistCaseDetail, f as deleteTemporaryRuns, g as runTouchesEval, gt as resolveLlmCallsConfig, h as recomputePersistedCaseStatus, hr as getEvalRegistry, ht as resolveApiCallsConfig, i as stripTerminalControlCodes, k as createFsCacheStore, l as loadPersistedRunSnapshots, m as recomputeEvalStatusesInRuns, n as getTargetEvalKeys, o as getLastRunStatuses, p as persistRunState, s as getLatestRunInfos, u as nextShortIdFromSnapshots, v as resolveArtifactPath, w as resolveEvalDefaultConfig, x as deriveEvalFreshness, xt as getCaseRowCaseKey, y as buildManualInputDescriptor } from "./runOrchestration-C4o5TcIu.mjs";
|
|
2
2
|
import { createHash, randomUUID } from "node:crypto";
|
|
3
3
|
import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
|
|
4
4
|
import { basename, dirname, extname, isAbsolute, join, relative, resolve, sep } from "node:path";
|
|
@@ -536,6 +536,7 @@ function isRunChildMessage(value) {
|
|
|
536
536
|
const runChildInspectArgEnv = "AGENT_EVALS_RUN_CHILD_INSPECT_ARG";
|
|
537
537
|
const inspectFlagPrefix = "--inspect";
|
|
538
538
|
const inspectBrkFlagPrefix = "--inspect-brk";
|
|
539
|
+
const childOutputTailMaxLength = 12e3;
|
|
539
540
|
function startRunChild(params) {
|
|
540
541
|
const child = spawn(process.execPath, [
|
|
541
542
|
...getRunChildExecArgv(),
|
|
@@ -546,11 +547,12 @@ function startRunChild(params) {
|
|
|
546
547
|
env: process.env,
|
|
547
548
|
stdio: [
|
|
548
549
|
"ignore",
|
|
549
|
-
"
|
|
550
|
-
"
|
|
550
|
+
"pipe",
|
|
551
|
+
"pipe",
|
|
551
552
|
"ipc"
|
|
552
553
|
]
|
|
553
554
|
});
|
|
555
|
+
const outputTail = createRunChildOutputTail(child);
|
|
554
556
|
params.runState.childProcess = child;
|
|
555
557
|
child.on("message", (message) => {
|
|
556
558
|
if (!isRunChildMessage(message)) return;
|
|
@@ -564,8 +566,39 @@ function startRunChild(params) {
|
|
|
564
566
|
if (params.runState.childProcess === child) params.runState.childProcess = void 0;
|
|
565
567
|
if (params.runState.manifest.status !== "running" || params.runState.childTerminalReceived) return;
|
|
566
568
|
const reason = signal !== null ? `Run child exited with signal ${signal}` : `Run child exited with code ${String(code)}`;
|
|
567
|
-
markRunErrored(params.runState, reason, params.managerContext);
|
|
569
|
+
markRunErrored(params.runState, formatUnexpectedRunChildExit(reason, outputTail), params.managerContext);
|
|
570
|
+
});
|
|
571
|
+
}
|
|
572
|
+
function createRunChildOutputTail(child) {
|
|
573
|
+
const tail = {
|
|
574
|
+
stdout: "",
|
|
575
|
+
stderr: ""
|
|
576
|
+
};
|
|
577
|
+
child.stdout?.on("data", (chunk) => {
|
|
578
|
+
process.stdout.write(chunk);
|
|
579
|
+
tail.stdout = appendOutputTail(tail.stdout, chunkToText(chunk));
|
|
580
|
+
});
|
|
581
|
+
child.stderr?.on("data", (chunk) => {
|
|
582
|
+
process.stderr.write(chunk);
|
|
583
|
+
tail.stderr = appendOutputTail(tail.stderr, chunkToText(chunk));
|
|
568
584
|
});
|
|
585
|
+
return tail;
|
|
586
|
+
}
|
|
587
|
+
function chunkToText(chunk) {
|
|
588
|
+
return typeof chunk === "string" ? chunk : chunk.toString("utf-8");
|
|
589
|
+
}
|
|
590
|
+
function appendOutputTail(current, next) {
|
|
591
|
+
const combined = current + next;
|
|
592
|
+
if (combined.length <= childOutputTailMaxLength) return combined;
|
|
593
|
+
return combined.slice(combined.length - childOutputTailMaxLength);
|
|
594
|
+
}
|
|
595
|
+
function formatUnexpectedRunChildExit(reason, outputTail) {
|
|
596
|
+
const sections = [reason];
|
|
597
|
+
const stderr = stripTerminalControlCodes(outputTail.stderr).trim();
|
|
598
|
+
const stdout = stripTerminalControlCodes(outputTail.stdout).trim();
|
|
599
|
+
if (stderr.length > 0) sections.push(`Child stderr (last ${String(stderr.length)} chars):\n${stderr}`);
|
|
600
|
+
if (stdout.length > 0) sections.push(`Child stdout (last ${String(stdout.length)} chars):\n${stdout}`);
|
|
601
|
+
return sections.join("\n\n");
|
|
569
602
|
}
|
|
570
603
|
function getRunChildExecArgv() {
|
|
571
604
|
const execArgv = [];
|
|
@@ -647,6 +680,7 @@ function applyChildEvalMetas(evals, childMetas) {
|
|
|
647
680
|
}
|
|
648
681
|
evalMeta.columnDefs = childMeta.columnDefs;
|
|
649
682
|
evalMeta.caseCount = childMeta.caseCount;
|
|
683
|
+
evalMeta.caseIds = childMeta.caseIds;
|
|
650
684
|
evalMeta.stats = childMeta.stats;
|
|
651
685
|
evalMeta.charts = childMeta.charts;
|
|
652
686
|
evalMeta.sourceFingerprint = childMeta.sourceFingerprint;
|
|
@@ -1064,6 +1098,7 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
1064
1098
|
sourceFingerprint,
|
|
1065
1099
|
columnDefs,
|
|
1066
1100
|
caseCount: null,
|
|
1101
|
+
caseIds: void 0,
|
|
1067
1102
|
stats,
|
|
1068
1103
|
charts,
|
|
1069
1104
|
manualInputDescriptor,
|
|
@@ -1074,18 +1109,24 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
1074
1109
|
emitDiscoveryEvent();
|
|
1075
1110
|
},
|
|
1076
1111
|
async startRun(request) {
|
|
1112
|
+
const deletedTemporaryRuns = await deleteTemporaryRuns({
|
|
1113
|
+
runs,
|
|
1114
|
+
cancelRunningRun: killRunChild
|
|
1115
|
+
});
|
|
1077
1116
|
const runId = generateRunId();
|
|
1078
1117
|
const shortId = `r${String(nextShortIdNum++)}`;
|
|
1079
1118
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
1080
1119
|
const cacheMode = request.cache?.mode ?? "use";
|
|
1081
1120
|
const runDir = join(localStateDir, "runs", runId);
|
|
1121
|
+
const gitState = readGitWorktreeState(workspaceRoot);
|
|
1082
1122
|
const manifest = {
|
|
1083
1123
|
id: runId,
|
|
1084
1124
|
shortId,
|
|
1085
1125
|
status: "running",
|
|
1126
|
+
temporary: request.temporary === true,
|
|
1086
1127
|
startedAt: now,
|
|
1087
1128
|
endedAt: null,
|
|
1088
|
-
commitSha:
|
|
1129
|
+
commitSha: gitState.commitSha,
|
|
1089
1130
|
evalSourceFingerprints: {},
|
|
1090
1131
|
target: request.target,
|
|
1091
1132
|
trials: request.trials,
|
|
@@ -1162,6 +1203,7 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
1162
1203
|
emitDiscoveryEvent
|
|
1163
1204
|
}
|
|
1164
1205
|
});
|
|
1206
|
+
if (deletedTemporaryRuns > 0) emitDiscoveryEvent();
|
|
1165
1207
|
return {
|
|
1166
1208
|
manifest,
|
|
1167
1209
|
summary,
|
|
@@ -1445,6 +1487,7 @@ Flags:
|
|
|
1445
1487
|
--no-cache Shortcut for --cache bypass
|
|
1446
1488
|
--refresh-cache Shortcut for --cache refresh
|
|
1447
1489
|
--clear-cache Clear the cache before starting the run
|
|
1490
|
+
--temporary Persist until the next run starts, then delete
|
|
1448
1491
|
--input <json> Manual input value for a single targeted eval
|
|
1449
1492
|
that declares manualInput
|
|
1450
1493
|
--input-file <path> JSON object keyed by eval key (or eval id) with
|
|
@@ -1515,6 +1558,7 @@ Options:
|
|
|
1515
1558
|
--no-cache Shortcut for --cache bypass
|
|
1516
1559
|
--refresh-cache Shortcut for --cache refresh
|
|
1517
1560
|
--clear-cache Clear the cache before starting the run
|
|
1561
|
+
--temporary Persist until the next run starts, then delete
|
|
1518
1562
|
--no-env Disable automatic .env loading
|
|
1519
1563
|
--help, -h Show help
|
|
1520
1564
|
`);
|
|
@@ -1753,6 +1797,7 @@ function parseArgs(argv) {
|
|
|
1753
1797
|
port: 4100,
|
|
1754
1798
|
cacheMode: "use",
|
|
1755
1799
|
clearCache: false,
|
|
1800
|
+
temporary: false,
|
|
1756
1801
|
all: false,
|
|
1757
1802
|
loadEnv: normalizedArgv.length === argv.length,
|
|
1758
1803
|
inputJson: void 0,
|
|
@@ -1803,6 +1848,7 @@ function parseArgs(argv) {
|
|
|
1803
1848
|
} else if (arg === "--no-cache") args.cacheMode = "bypass";
|
|
1804
1849
|
else if (arg === "--refresh-cache") args.cacheMode = "refresh";
|
|
1805
1850
|
else if (arg === "--clear-cache") args.clearCache = true;
|
|
1851
|
+
else if (arg === "--temporary") args.temporary = true;
|
|
1806
1852
|
else if (arg === "--input" && next !== void 0) {
|
|
1807
1853
|
args.inputJson = next;
|
|
1808
1854
|
i++;
|
|
@@ -1940,8 +1986,8 @@ async function commandApp(args) {
|
|
|
1940
1986
|
const { serve } = await import("@hono/node-server");
|
|
1941
1987
|
const bundledWebDist = resolve(currentDir, "apps/web/dist");
|
|
1942
1988
|
if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
|
|
1943
|
-
const appModule = await import("./app-
|
|
1944
|
-
const runnerModule = await import("./runner-
|
|
1989
|
+
const appModule = await import("./app-C7ON9Wdh.mjs");
|
|
1990
|
+
const runnerModule = await import("./runner-LqeHPID6.mjs");
|
|
1945
1991
|
if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
|
|
1946
1992
|
if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
|
|
1947
1993
|
await runnerModule.initRunner();
|
|
@@ -2030,6 +2076,7 @@ async function commandRun(args) {
|
|
|
2030
2076
|
const run = await runner.startRun({
|
|
2031
2077
|
target,
|
|
2032
2078
|
trials: args.trials,
|
|
2079
|
+
temporary: args.temporary,
|
|
2033
2080
|
cache: { mode: args.cacheMode },
|
|
2034
2081
|
manualInputs: manualInputsResult.value
|
|
2035
2082
|
});
|
|
@@ -2037,6 +2084,7 @@ async function commandRun(args) {
|
|
|
2037
2084
|
console.info(`Run started: ${run.manifest.id}`);
|
|
2038
2085
|
console.info(`Trials: ${String(args.trials)}`);
|
|
2039
2086
|
if (args.cacheMode !== "use") console.info(`Cache mode: ${args.cacheMode}`);
|
|
2087
|
+
if (args.temporary) console.info("Temporary: yes");
|
|
2040
2088
|
console.info("");
|
|
2041
2089
|
}
|
|
2042
2090
|
await waitForRunCompletion(runner, run.manifest.id);
|
|
@@ -2152,6 +2200,7 @@ function buildRunFileIndex(workspaceRoot, run) {
|
|
|
2152
2200
|
id: run.manifest.id,
|
|
2153
2201
|
shortId: run.manifest.shortId,
|
|
2154
2202
|
status: run.manifest.status,
|
|
2203
|
+
temporary: run.manifest.temporary,
|
|
2155
2204
|
startedAt: run.manifest.startedAt,
|
|
2156
2205
|
endedAt: run.manifest.endedAt,
|
|
2157
2206
|
target: run.manifest.target,
|
|
@@ -2212,7 +2261,7 @@ function printRunFileIndexes(indexes) {
|
|
|
2212
2261
|
}
|
|
2213
2262
|
}
|
|
2214
2263
|
function printRunFileIndex(index) {
|
|
2215
|
-
console.info(`${index.shortId} (${index.id}) ${index.status} ${formatCaseCounts(index.summary)}`);
|
|
2264
|
+
console.info(`${index.shortId} (${index.id}) ${index.status}${index.temporary ? " temporary" : ""} ${formatCaseCounts(index.summary)}`);
|
|
2216
2265
|
console.info(` dir: ${index.files.dir}`);
|
|
2217
2266
|
console.info(` run: ${index.files.run}`);
|
|
2218
2267
|
console.info(` summary: ${index.files.summary}`);
|
package/dist/index.d.mts
CHANGED
|
@@ -127,6 +127,7 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
127
127
|
}>>;
|
|
128
128
|
}, z$1.core.$strip>>;
|
|
129
129
|
caseCount: z$1.ZodNullable<z$1.ZodNumber>;
|
|
130
|
+
caseIds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
130
131
|
lastRunStatus: z$1.ZodNullable<z$1.ZodEnum<{
|
|
131
132
|
error: "error";
|
|
132
133
|
pass: "pass";
|
|
@@ -617,6 +618,7 @@ declare const runManifestSchema$1: z$1.ZodObject<{
|
|
|
617
618
|
cancelled: "cancelled";
|
|
618
619
|
error: "error";
|
|
619
620
|
}>;
|
|
621
|
+
temporary: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodBoolean>>;
|
|
620
622
|
startedAt: z$1.ZodString;
|
|
621
623
|
endedAt: z$1.ZodNullable<z$1.ZodString>;
|
|
622
624
|
commitSha: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodNullable<z$1.ZodString>>>;
|
|
@@ -963,6 +965,7 @@ declare const createRunRequestSchema$1: z$1.ZodObject<{
|
|
|
963
965
|
caseIds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
964
966
|
}, z$1.core.$strip>;
|
|
965
967
|
trials: z$1.ZodNumber;
|
|
968
|
+
temporary: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
966
969
|
cache: z$1.ZodOptional<z$1.ZodObject<{
|
|
967
970
|
mode: z$1.ZodDefault<z$1.ZodEnum<{
|
|
968
971
|
use: "use";
|
|
@@ -2916,12 +2919,12 @@ declare const columnFormatSchema: z$1.ZodEnum<{
|
|
|
2916
2919
|
number: "number";
|
|
2917
2920
|
boolean: "boolean";
|
|
2918
2921
|
file: "file";
|
|
2919
|
-
duration: "duration";
|
|
2920
2922
|
markdown: "markdown";
|
|
2921
2923
|
json: "json";
|
|
2922
2924
|
image: "image";
|
|
2923
2925
|
audio: "audio";
|
|
2924
2926
|
video: "video";
|
|
2927
|
+
duration: "duration";
|
|
2925
2928
|
percent: "percent";
|
|
2926
2929
|
passFail: "passFail";
|
|
2927
2930
|
stars: "stars";
|
|
@@ -2941,12 +2944,12 @@ declare const columnDefSchema: z$1.ZodObject<{
|
|
|
2941
2944
|
number: "number";
|
|
2942
2945
|
boolean: "boolean";
|
|
2943
2946
|
file: "file";
|
|
2944
|
-
duration: "duration";
|
|
2945
2947
|
markdown: "markdown";
|
|
2946
2948
|
json: "json";
|
|
2947
2949
|
image: "image";
|
|
2948
2950
|
audio: "audio";
|
|
2949
2951
|
video: "video";
|
|
2952
|
+
duration: "duration";
|
|
2950
2953
|
percent: "percent";
|
|
2951
2954
|
passFail: "passFail";
|
|
2952
2955
|
stars: "stars";
|
|
@@ -2991,8 +2994,8 @@ declare const traceSpanKindSchema: z$1.ZodString;
|
|
|
2991
2994
|
declare const traceAttributeDisplayFormatSchema: z$1.ZodEnum<{
|
|
2992
2995
|
string: "string";
|
|
2993
2996
|
number: "number";
|
|
2994
|
-
duration: "duration";
|
|
2995
2997
|
json: "json";
|
|
2998
|
+
duration: "duration";
|
|
2996
2999
|
}>;
|
|
2997
3000
|
/**
|
|
2998
3001
|
* Formatting hint for trace attribute values rendered by the UI.
|
|
@@ -3016,8 +3019,8 @@ declare const traceAttributeDisplaySchema: z$1.ZodObject<{
|
|
|
3016
3019
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3017
3020
|
string: "string";
|
|
3018
3021
|
number: "number";
|
|
3019
|
-
duration: "duration";
|
|
3020
3022
|
json: "json";
|
|
3023
|
+
duration: "duration";
|
|
3021
3024
|
}>>;
|
|
3022
3025
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
3023
3026
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -3052,8 +3055,8 @@ declare const traceDisplayConfigSchema: z$1.ZodObject<{
|
|
|
3052
3055
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3053
3056
|
string: "string";
|
|
3054
3057
|
number: "number";
|
|
3055
|
-
duration: "duration";
|
|
3056
3058
|
json: "json";
|
|
3059
|
+
duration: "duration";
|
|
3057
3060
|
}>>;
|
|
3058
3061
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
3059
3062
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -3092,8 +3095,8 @@ declare const traceAttributeDisplayInputSchema: z$1.ZodObject<{
|
|
|
3092
3095
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3093
3096
|
string: "string";
|
|
3094
3097
|
number: "number";
|
|
3095
|
-
duration: "duration";
|
|
3096
3098
|
json: "json";
|
|
3099
|
+
duration: "duration";
|
|
3097
3100
|
}>>;
|
|
3098
3101
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
3099
3102
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -3130,8 +3133,8 @@ declare const traceDisplayInputConfigSchema: z$1.ZodObject<{
|
|
|
3130
3133
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3131
3134
|
string: "string";
|
|
3132
3135
|
number: "number";
|
|
3133
|
-
duration: "duration";
|
|
3134
3136
|
json: "json";
|
|
3137
|
+
duration: "duration";
|
|
3135
3138
|
}>>;
|
|
3136
3139
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
3137
3140
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -3263,12 +3266,12 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
3263
3266
|
number: "number";
|
|
3264
3267
|
boolean: "boolean";
|
|
3265
3268
|
file: "file";
|
|
3266
|
-
duration: "duration";
|
|
3267
3269
|
markdown: "markdown";
|
|
3268
3270
|
json: "json";
|
|
3269
3271
|
image: "image";
|
|
3270
3272
|
audio: "audio";
|
|
3271
3273
|
video: "video";
|
|
3274
|
+
duration: "duration";
|
|
3272
3275
|
percent: "percent";
|
|
3273
3276
|
passFail: "passFail";
|
|
3274
3277
|
stars: "stars";
|
|
@@ -3305,12 +3308,12 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
|
|
|
3305
3308
|
number: "number";
|
|
3306
3309
|
boolean: "boolean";
|
|
3307
3310
|
file: "file";
|
|
3308
|
-
duration: "duration";
|
|
3309
3311
|
markdown: "markdown";
|
|
3310
3312
|
json: "json";
|
|
3311
3313
|
image: "image";
|
|
3312
3314
|
audio: "audio";
|
|
3313
3315
|
video: "video";
|
|
3316
|
+
duration: "duration";
|
|
3314
3317
|
percent: "percent";
|
|
3315
3318
|
passFail: "passFail";
|
|
3316
3319
|
stars: "stars";
|
|
@@ -3348,12 +3351,12 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
3348
3351
|
number: "number";
|
|
3349
3352
|
boolean: "boolean";
|
|
3350
3353
|
file: "file";
|
|
3351
|
-
duration: "duration";
|
|
3352
3354
|
markdown: "markdown";
|
|
3353
3355
|
json: "json";
|
|
3354
3356
|
image: "image";
|
|
3355
3357
|
audio: "audio";
|
|
3356
3358
|
video: "video";
|
|
3359
|
+
duration: "duration";
|
|
3357
3360
|
percent: "percent";
|
|
3358
3361
|
passFail: "passFail";
|
|
3359
3362
|
stars: "stars";
|
|
@@ -3372,6 +3375,7 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
3372
3375
|
}>>;
|
|
3373
3376
|
}, z$1.core.$strip>>;
|
|
3374
3377
|
caseCount: z$1.ZodNullable<z$1.ZodNumber>;
|
|
3378
|
+
caseIds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
3375
3379
|
lastRunStatus: z$1.ZodNullable<z$1.ZodEnum<{
|
|
3376
3380
|
error: "error";
|
|
3377
3381
|
pass: "pass";
|
|
@@ -3406,12 +3410,12 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
3406
3410
|
number: "number";
|
|
3407
3411
|
boolean: "boolean";
|
|
3408
3412
|
file: "file";
|
|
3409
|
-
duration: "duration";
|
|
3410
3413
|
markdown: "markdown";
|
|
3411
3414
|
json: "json";
|
|
3412
3415
|
image: "image";
|
|
3413
3416
|
audio: "audio";
|
|
3414
3417
|
video: "video";
|
|
3418
|
+
duration: "duration";
|
|
3415
3419
|
percent: "percent";
|
|
3416
3420
|
passFail: "passFail";
|
|
3417
3421
|
stars: "stars";
|
|
@@ -3437,8 +3441,8 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
3437
3441
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3438
3442
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3439
3443
|
success: "success";
|
|
3440
|
-
accent: "accent";
|
|
3441
3444
|
error: "error";
|
|
3445
|
+
accent: "accent";
|
|
3442
3446
|
accentDim: "accentDim";
|
|
3443
3447
|
warning: "warning";
|
|
3444
3448
|
textMuted: "textMuted";
|
|
@@ -3461,8 +3465,8 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
3461
3465
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3462
3466
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3463
3467
|
success: "success";
|
|
3464
|
-
accent: "accent";
|
|
3465
3468
|
error: "error";
|
|
3469
|
+
accent: "accent";
|
|
3466
3470
|
accentDim: "accentDim";
|
|
3467
3471
|
warning: "warning";
|
|
3468
3472
|
textMuted: "textMuted";
|
|
@@ -3725,8 +3729,8 @@ declare const scoreTraceSchema: z$1.ZodObject<{
|
|
|
3725
3729
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3726
3730
|
string: "string";
|
|
3727
3731
|
number: "number";
|
|
3728
|
-
duration: "duration";
|
|
3729
3732
|
json: "json";
|
|
3733
|
+
duration: "duration";
|
|
3730
3734
|
}>>;
|
|
3731
3735
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
3732
3736
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -3811,8 +3815,8 @@ declare const caseDetailSchema: z$1.ZodObject<{
|
|
|
3811
3815
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3812
3816
|
string: "string";
|
|
3813
3817
|
number: "number";
|
|
3814
|
-
duration: "duration";
|
|
3815
3818
|
json: "json";
|
|
3819
|
+
duration: "duration";
|
|
3816
3820
|
}>>;
|
|
3817
3821
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
3818
3822
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -3880,8 +3884,8 @@ declare const caseDetailSchema: z$1.ZodObject<{
|
|
|
3880
3884
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3881
3885
|
string: "string";
|
|
3882
3886
|
number: "number";
|
|
3883
|
-
duration: "duration";
|
|
3884
3887
|
json: "json";
|
|
3888
|
+
duration: "duration";
|
|
3885
3889
|
}>>;
|
|
3886
3890
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
3887
3891
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -4045,8 +4049,8 @@ type EvalChartAggregate = z$1.infer<typeof evalChartAggregateSchema>;
|
|
|
4045
4049
|
*/
|
|
4046
4050
|
declare const evalChartColorSchema: z$1.ZodEnum<{
|
|
4047
4051
|
success: "success";
|
|
4048
|
-
accent: "accent";
|
|
4049
4052
|
error: "error";
|
|
4053
|
+
accent: "accent";
|
|
4050
4054
|
accentDim: "accentDim";
|
|
4051
4055
|
warning: "warning";
|
|
4052
4056
|
textMuted: "textMuted";
|
|
@@ -4074,8 +4078,8 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
4074
4078
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
4075
4079
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4076
4080
|
success: "success";
|
|
4077
|
-
accent: "accent";
|
|
4078
4081
|
error: "error";
|
|
4082
|
+
accent: "accent";
|
|
4079
4083
|
accentDim: "accentDim";
|
|
4080
4084
|
warning: "warning";
|
|
4081
4085
|
textMuted: "textMuted";
|
|
@@ -4098,8 +4102,8 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
4098
4102
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
4099
4103
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4100
4104
|
success: "success";
|
|
4101
|
-
accent: "accent";
|
|
4102
4105
|
error: "error";
|
|
4106
|
+
accent: "accent";
|
|
4103
4107
|
accentDim: "accentDim";
|
|
4104
4108
|
warning: "warning";
|
|
4105
4109
|
textMuted: "textMuted";
|
|
@@ -4157,8 +4161,8 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
4157
4161
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
4158
4162
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4159
4163
|
success: "success";
|
|
4160
|
-
accent: "accent";
|
|
4161
4164
|
error: "error";
|
|
4165
|
+
accent: "accent";
|
|
4162
4166
|
accentDim: "accentDim";
|
|
4163
4167
|
warning: "warning";
|
|
4164
4168
|
textMuted: "textMuted";
|
|
@@ -4181,8 +4185,8 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
4181
4185
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
4182
4186
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4183
4187
|
success: "success";
|
|
4184
|
-
accent: "accent";
|
|
4185
4188
|
error: "error";
|
|
4189
|
+
accent: "accent";
|
|
4186
4190
|
accentDim: "accentDim";
|
|
4187
4191
|
warning: "warning";
|
|
4188
4192
|
textMuted: "textMuted";
|
|
@@ -4247,8 +4251,8 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
4247
4251
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
4248
4252
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4249
4253
|
success: "success";
|
|
4250
|
-
accent: "accent";
|
|
4251
4254
|
error: "error";
|
|
4255
|
+
accent: "accent";
|
|
4252
4256
|
accentDim: "accentDim";
|
|
4253
4257
|
warning: "warning";
|
|
4254
4258
|
textMuted: "textMuted";
|
|
@@ -4271,8 +4275,8 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
4271
4275
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
4272
4276
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4273
4277
|
success: "success";
|
|
4274
|
-
accent: "accent";
|
|
4275
4278
|
error: "error";
|
|
4279
|
+
accent: "accent";
|
|
4276
4280
|
accentDim: "accentDim";
|
|
4277
4281
|
warning: "warning";
|
|
4278
4282
|
textMuted: "textMuted";
|
|
@@ -4327,15 +4331,16 @@ declare const runManifestSchema: z$1.ZodObject<{
|
|
|
4327
4331
|
pending: "pending";
|
|
4328
4332
|
completed: "completed";
|
|
4329
4333
|
}>;
|
|
4334
|
+
temporary: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodBoolean>>;
|
|
4330
4335
|
startedAt: z$1.ZodString;
|
|
4331
4336
|
endedAt: z$1.ZodNullable<z$1.ZodString>;
|
|
4332
4337
|
commitSha: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodNullable<z$1.ZodString>>>;
|
|
4333
4338
|
evalSourceFingerprints: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodString>>>;
|
|
4334
4339
|
target: z$1.ZodObject<{
|
|
4335
4340
|
mode: z$1.ZodEnum<{
|
|
4341
|
+
caseIds: "caseIds";
|
|
4336
4342
|
all: "all";
|
|
4337
4343
|
evalIds: "evalIds";
|
|
4338
|
-
caseIds: "caseIds";
|
|
4339
4344
|
}>;
|
|
4340
4345
|
evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
4341
4346
|
files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
@@ -4581,8 +4586,8 @@ declare const llmCallMetricFormatSchema: z$1.ZodEnum<{
|
|
|
4581
4586
|
string: "string";
|
|
4582
4587
|
number: "number";
|
|
4583
4588
|
boolean: "boolean";
|
|
4584
|
-
duration: "duration";
|
|
4585
4589
|
json: "json";
|
|
4590
|
+
duration: "duration";
|
|
4586
4591
|
}>;
|
|
4587
4592
|
/** Render format applied to an LLM-call metric value. */
|
|
4588
4593
|
type LlmCallMetricFormat = z$1.infer<typeof llmCallMetricFormatSchema>;
|
|
@@ -4591,8 +4596,8 @@ declare const apiCallMetricFormatSchema: z$1.ZodEnum<{
|
|
|
4591
4596
|
string: "string";
|
|
4592
4597
|
number: "number";
|
|
4593
4598
|
boolean: "boolean";
|
|
4594
|
-
duration: "duration";
|
|
4595
4599
|
json: "json";
|
|
4600
|
+
duration: "duration";
|
|
4596
4601
|
}>;
|
|
4597
4602
|
/** Render format applied to an API-call metric value. */
|
|
4598
4603
|
type ApiCallMetricFormat = z$1.infer<typeof apiCallMetricFormatSchema>;
|
|
@@ -4661,8 +4666,8 @@ declare const llmCallMetricSchema: z$1.ZodObject<{
|
|
|
4661
4666
|
string: "string";
|
|
4662
4667
|
number: "number";
|
|
4663
4668
|
boolean: "boolean";
|
|
4664
|
-
duration: "duration";
|
|
4665
4669
|
json: "json";
|
|
4670
|
+
duration: "duration";
|
|
4666
4671
|
}>>;
|
|
4667
4672
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
4668
4673
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -4690,8 +4695,8 @@ declare const apiCallMetricSchema: z$1.ZodObject<{
|
|
|
4690
4695
|
string: "string";
|
|
4691
4696
|
number: "number";
|
|
4692
4697
|
boolean: "boolean";
|
|
4693
|
-
duration: "duration";
|
|
4694
4698
|
json: "json";
|
|
4699
|
+
duration: "duration";
|
|
4695
4700
|
}>>;
|
|
4696
4701
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
4697
4702
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -4804,8 +4809,8 @@ declare const llmCallsConfigSchema: z$1.ZodObject<{
|
|
|
4804
4809
|
string: "string";
|
|
4805
4810
|
number: "number";
|
|
4806
4811
|
boolean: "boolean";
|
|
4807
|
-
duration: "duration";
|
|
4808
4812
|
json: "json";
|
|
4813
|
+
duration: "duration";
|
|
4809
4814
|
}>>;
|
|
4810
4815
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
4811
4816
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -4840,8 +4845,8 @@ declare const apiCallsConfigSchema: z$1.ZodObject<{
|
|
|
4840
4845
|
string: "string";
|
|
4841
4846
|
number: "number";
|
|
4842
4847
|
boolean: "boolean";
|
|
4843
|
-
duration: "duration";
|
|
4844
4848
|
json: "json";
|
|
4849
|
+
duration: "duration";
|
|
4845
4850
|
}>>;
|
|
4846
4851
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
4847
4852
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -5142,8 +5147,8 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
|
|
|
5142
5147
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5143
5148
|
string: "string";
|
|
5144
5149
|
number: "number";
|
|
5145
|
-
duration: "duration";
|
|
5146
5150
|
json: "json";
|
|
5151
|
+
duration: "duration";
|
|
5147
5152
|
}>>;
|
|
5148
5153
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
5149
5154
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -5191,12 +5196,12 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
|
|
|
5191
5196
|
number: "number";
|
|
5192
5197
|
boolean: "boolean";
|
|
5193
5198
|
file: "file";
|
|
5194
|
-
duration: "duration";
|
|
5195
5199
|
markdown: "markdown";
|
|
5196
5200
|
json: "json";
|
|
5197
5201
|
image: "image";
|
|
5198
5202
|
audio: "audio";
|
|
5199
5203
|
video: "video";
|
|
5204
|
+
duration: "duration";
|
|
5200
5205
|
percent: "percent";
|
|
5201
5206
|
passFail: "passFail";
|
|
5202
5207
|
stars: "stars";
|
|
@@ -5255,8 +5260,8 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
|
|
|
5255
5260
|
string: "string";
|
|
5256
5261
|
number: "number";
|
|
5257
5262
|
boolean: "boolean";
|
|
5258
|
-
duration: "duration";
|
|
5259
5263
|
json: "json";
|
|
5264
|
+
duration: "duration";
|
|
5260
5265
|
}>>;
|
|
5261
5266
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
5262
5267
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -5300,8 +5305,8 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
|
|
|
5300
5305
|
string: "string";
|
|
5301
5306
|
number: "number";
|
|
5302
5307
|
boolean: "boolean";
|
|
5303
|
-
duration: "duration";
|
|
5304
5308
|
json: "json";
|
|
5309
|
+
duration: "duration";
|
|
5305
5310
|
}>>;
|
|
5306
5311
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
5307
5312
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -6084,9 +6089,9 @@ type ConfigReloadState = z$1.infer<typeof configReloadStateSchema>;
|
|
|
6084
6089
|
declare const createRunRequestSchema: z$1.ZodObject<{
|
|
6085
6090
|
target: z$1.ZodObject<{
|
|
6086
6091
|
mode: z$1.ZodEnum<{
|
|
6092
|
+
caseIds: "caseIds";
|
|
6087
6093
|
all: "all";
|
|
6088
6094
|
evalIds: "evalIds";
|
|
6089
|
-
caseIds: "caseIds";
|
|
6090
6095
|
}>;
|
|
6091
6096
|
evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
6092
6097
|
files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
@@ -6094,6 +6099,7 @@ declare const createRunRequestSchema: z$1.ZodObject<{
|
|
|
6094
6099
|
caseIds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
6095
6100
|
}, z$1.core.$strip>;
|
|
6096
6101
|
trials: z$1.ZodNumber;
|
|
6102
|
+
temporary: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
6097
6103
|
cache: z$1.ZodOptional<z$1.ZodObject<{
|
|
6098
6104
|
mode: z$1.ZodDefault<z$1.ZodEnum<{
|
|
6099
6105
|
refresh: "refresh";
|
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { $ as
|
|
2
|
-
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-
|
|
3
|
-
import "./src
|
|
1
|
+
import { $ as apiCallMetricFormatSchema, $n as evalAssert, $t as evalChartTypeSchema, A as configReloadStateSchema, An as jsonCellSchema, At as evalStatsConfigSchema, B as simulateTokenAllocation, Bn as hashCacheKeySync, Bt as manualInputJsonFieldSchema, Cn as traceSpanSchema, Ct as assertionFailureSchema, Dn as columnFormatSchema, Dt as evalFreshnessStatusSchema, En as columnDefSchema, Et as discoveryIssueSchema, F as extractCacheEntries, Fn as buildTraceTree, Ft as runLogPhaseSchema, G as deriveScopedSummaryFromCases, Gn as repoFile, Gt as manualInputTextFieldSchema, H as getNestedAttribute, Hn as deserializeCacheValue, Ht as manualInputNumberFieldSchema, I as extractCacheHits, In as captureEvalSpanError, It as scoreTraceSchema, J as runManifestSchema, Jn as evalExpect, Jt as evalChartBuiltinMetricSchema, K as deriveStatusFromCaseRows, Kn as manualInputFileValueSchema, Kt as evalChartAggregateSchema, L as extractApiCalls, Ln as evalSpan, Lt as manualInputBooleanFieldSchema, M as createRunRequestSchema, Mn as repoFileRefSchema, Mt as runLogEntrySchema, N as updateManualScoreRequestSchema, Nn as runArtifactRefSchema, Nt as runLogLevelSchema, On as columnKindSchema, Ot as evalStatAggregateSchema, P as sseEnvelopeSchema, Pn as z, Pt as runLogLocationSchema, Q as agentEvalsConfigSchema, Qt as evalChartTooltipExtraSchema, R as extractLlmCalls, Rn as evalTracer, Rt as manualInputDescriptorSchema, Sn as traceSpanKindSchema, St as getCaseRowEvalKey, Tn as cellValueSchema, Tt as caseRowSchema, U as getEvalTitle, Un as serializeCacheRecording, Ut as manualInputSelectFieldSchema, V as applyDerivedCallAttributes, Vn as deserializeCacheRecording, Vt as manualInputMultilineFieldSchema, W as getEvalDisplayStatus, Wn as serializeCacheValue, Wt as manualInputSelectOptionSchema, X as DEFAULT_API_CALLS_CONFIG, Xn as advanceEvalTime, Xt as evalChartConfigSchema, Y as runSummarySchema, Yn as EvalAssertionError, Yt as evalChartColorSchema, Z as DEFAULT_LLM_CALLS_CONFIG, Zn as appendToEvalOutput, Zt as evalChartMetricSchema, _n as traceAttributeDisplayPlacementSchema, _t as runLogsConfigSchema, an as cacheFileSchema, ar as isInEvalScope, at as evalColumnsSchema, bn as traceDisplayInputConfigSchema, bt as buildEvalKey, cn as cacheOperationTypeSchema, cr as runInEvalRuntimeScope, ct as llmCallMetricFormatSchema, dn as cacheStatusSchema, dr as setEvalOutput, dt as llmCallPricingRateSchema, en as evalChartsConfigSchema, er as evalLog, et as apiCallMetricPlacementSchema, fn as serializedCacheSpanSchema, fr as setScopeCacheContext, ft as llmCallPricingSchema, gn as traceAttributeDisplayInputSchema, gt as resolveLlmCallsConfig, hn as traceAttributeDisplayFormatSchema, hr as getEvalRegistry, ht as resolveApiCallsConfig, in as cacheEntryWithDebugKeySchema, ir as incrementEvalOutput, it as evalColumnOverrideSchema, j as configReloadStatusSchema, jn as numberDisplayOptionsSchema, jt as evalSummarySchema, kn as fileRefSchema, kt as evalStatItemSchema, ln as cacheRecordingOpSchema, lr as runInEvalScope, lt as llmCallMetricPlacementSchema, mn as traceCacheRefSchema, mr as defineEval, mt as removeDefaultConfigSchema, nn as cacheDebugKeyFileSchema, nr as getEvalCaseInput, nt as apiCallsConfigSchema, on as cacheListItemSchema, or as mergeEvalOutput, ot as evalDeriveConfigSchema, pn as spanCacheOptionsSchema, pr as startEvalBackgroundJob, pt as llmCallsConfigSchema, q as deriveStatusFromChildStatuses, qn as readManualInputFile, qt as evalChartAxisSchema, rn as cacheEntrySchema, rr as getEvalStartTime, rt as defaultConfigKeySchema, sn as cacheModeSchema, sr as nextEvalId, st as llmCallCostCurrencySchema, tn as cacheDebugKeyEntrySchema, tr as getCurrentScope, tt as apiCallMetricSchema, un as cacheRecordingSchema, ur as runInExistingEvalScope, ut as llmCallMetricSchema, vn as traceAttributeDisplaySchema, vt as trialSelectionModeSchema, wn as traceSpanWarningSchema, wt as caseDetailSchema, xn as traceSpanErrorSchema, xt as getCaseRowCaseKey, yn as traceDisplayConfigSchema, yt as buildCaseKey, z as simulateLlmCallCost, zn as hashCacheKey, zt as manualInputFieldDescriptorSchema } from "./runOrchestration-C4o5TcIu.mjs";
|
|
2
|
+
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-CwGcJYWe.mjs";
|
|
3
|
+
import "./src--13_4uDG.mjs";
|
|
4
4
|
export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, cleanupStagedManualInputFiles, columnDefSchema, columnFormatSchema, columnKindSchema, configReloadStateSchema, configReloadStatusSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalColumnOverrideSchema, evalColumnsSchema, evalDeriveConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, jsonCellSchema, llmCallCostCurrencySchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingRateSchema, llmCallPricingSchema, llmCallsConfigSchema, manualInputBooleanFieldSchema, manualInputDescriptorSchema, manualInputFieldDescriptorSchema, manualInputFileValueSchema, manualInputJsonFieldSchema, manualInputMultilineFieldSchema, manualInputNumberFieldSchema, manualInputSelectFieldSchema, manualInputSelectOptionSchema, manualInputTextFieldSchema, materializeManualInputFiles, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, readManualInputFile, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, spanCacheOptionsSchema, sseEnvelopeSchema, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|