@ls-stack/agent-eval 0.60.2 → 0.60.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-DPCFFkyQ.mjs → app-gg10KvzS.mjs} +4 -4
- package/dist/bin.mjs +1 -1
- package/dist/caseChild.mjs +1 -1
- package/dist/{cli-CbePEEua.mjs → cli-OLZIjQpx.mjs} +41 -30
- package/dist/index.d.mts +82 -82
- package/dist/index.mjs +3 -3
- package/dist/runChild.mjs +2 -2
- package/dist/{runExecution-Bq0Y3y_1.mjs → runExecution-Bu9yfdUS.mjs} +1 -1
- package/dist/{runOrchestration-BpwW0AmB.mjs → runOrchestration-mpgZmEZ6.mjs} +41 -8
- package/dist/{runner-XEP21_u9.mjs → runner-C4Y0lWb1.mjs} +1 -1
- package/dist/{runner-Kp0JqxrU.mjs → runner-SxtKn-Xh.mjs} +2 -2
- package/dist/{src-CVM_FqPx.mjs → src-Cy3OxoZW.mjs} +2 -2
- package/package.json +3 -3
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { o as stageManualInputFile } from "./cli-
|
|
3
|
-
import "./src-
|
|
4
|
-
import { t as getRunnerInstance } from "./runner-
|
|
1
|
+
import { Dt as caseRowSchema, Tt as getCaseRowCaseKey, et as createRunRequestSchema, nt as extractCacheEntries, tt as updateManualScoreRequestSchema, ut as getEvalTitle } from "./runExecution-Bu9yfdUS.mjs";
|
|
2
|
+
import { o as stageManualInputFile } from "./cli-OLZIjQpx.mjs";
|
|
3
|
+
import "./src-Cy3OxoZW.mjs";
|
|
4
|
+
import { t as getRunnerInstance } from "./runner-SxtKn-Xh.mjs";
|
|
5
5
|
import { z } from "zod";
|
|
6
6
|
import { resultify } from "t-result";
|
|
7
7
|
import { readFile } from "node:fs/promises";
|
package/dist/bin.mjs
CHANGED
package/dist/caseChild.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { Ft as runWithEvalRegistry, I as configureEvalRunLogs, St as resolveLlmCallsConfig, _ as createFsCacheStore, a as isCaseChildParentMessage, d as loadEvalModule, g as createBufferedCacheStore, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, q as runInEvalRuntimeScope, r as runCase, v as getCacheRetentionOptions, xt as resolveApiCallsConfig } from "./runExecution-Bu9yfdUS.mjs";
|
|
2
2
|
//#region ../runner/src/caseChild.ts
|
|
3
3
|
let fatalErrorReported = false;
|
|
4
4
|
let disconnectExpected = false;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { C as
|
|
1
|
+
import { Dt as caseRowSchema, Pt as getEvalRegistry, St as resolveLlmCallsConfig, Tt as getCaseRowCaseKey, _ as createFsCacheStore, bt as runSummarySchema, c as resolveArtifactPath, ct as applyDerivedCallAttributes, dt as getEvalDisplayStatus, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, m as buildDeclaredColumnDefs, o as stripTerminalControlCodes, p as loadConfig, s as resolveTracePresentation, ut as getEvalTitle, v as getCacheRetentionOptions, wt as buildEvalKey, xt as resolveApiCallsConfig } from "./runExecution-Bu9yfdUS.mjs";
|
|
2
|
+
import { C as parseEvalDiscovery, S as loadIsolatedEvalRegistry, _ as recomputePersistedCaseStatus, a as validateTagsFilters, b as parseManualInputValues, c as getLatestRunInfos, d as loadPersistedRunSnapshots, f as nextShortIdFromSnapshots, g as recomputeEvalStatusesInRuns, h as persistRunState, i as resolveEvalTags, l as loadPersistedCaseDetail, m as deleteTemporaryRuns, n as getTargetEvalKeys, o as generateRunId, p as persistCaseDetail, s as getLastRunStatuses, u as loadPersistedRunSnapshot, v as runTouchesEval, w as validateCharts, x as deriveEvalFreshness, y as buildManualInputDescriptor } from "./runOrchestration-mpgZmEZ6.mjs";
|
|
3
3
|
import { parseEnv } from "node:util";
|
|
4
4
|
import { resultify } from "t-result";
|
|
5
5
|
import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
|
|
@@ -28,10 +28,14 @@ function resolveCaseDetailLookup(run, caseId) {
|
|
|
28
28
|
const caseDetail = run.caseDetails.get(lookupId);
|
|
29
29
|
if (caseDetail) return caseDetail;
|
|
30
30
|
}
|
|
31
|
-
const matchingCaseRow = run
|
|
31
|
+
const matchingCaseRow = resolveCaseRowForCaseDetailLookup(run, caseId);
|
|
32
32
|
if (matchingCaseRow === void 0) return void 0;
|
|
33
33
|
return run.caseDetails.get(getCaseRowCaseKey(matchingCaseRow));
|
|
34
34
|
}
|
|
35
|
+
function resolveCaseRowForCaseDetailLookup(run, caseId) {
|
|
36
|
+
const lookupIds = new Set(getCaseLookupIds(caseId));
|
|
37
|
+
return run.cases.find((caseRow) => lookupIds.has(getCaseRowCaseKey(caseRow)) || lookupIds.has(caseRow.caseId));
|
|
38
|
+
}
|
|
35
39
|
//#endregion
|
|
36
40
|
//#region ../runner/src/configReload.ts
|
|
37
41
|
/** Coordinates idle-only reloads for workspace config and `.env` in app mode. */
|
|
@@ -823,7 +827,7 @@ async function markRunTerminalFromChild(runState, event, managerContext) {
|
|
|
823
827
|
runState.manifest = snapshot.manifest;
|
|
824
828
|
runState.summary = snapshot.summary;
|
|
825
829
|
runState.cases = snapshot.cases;
|
|
826
|
-
runState.caseDetails =
|
|
830
|
+
runState.caseDetails = /* @__PURE__ */ new Map();
|
|
827
831
|
} else if (event.type === "run.finished") {
|
|
828
832
|
runState.manifest.status = "completed";
|
|
829
833
|
runState.manifest.endedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
@@ -969,7 +973,6 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
|
|
|
969
973
|
let discoveryRefreshTimer;
|
|
970
974
|
let runHistoryRefreshTimer;
|
|
971
975
|
let cachePruneIdleTimer;
|
|
972
|
-
let registryLoadCounter = 0;
|
|
973
976
|
const configReload = createConfigReloadController({
|
|
974
977
|
getActiveRunCount,
|
|
975
978
|
closeRunnerWatchers: closeWatchers,
|
|
@@ -995,9 +998,28 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
|
|
|
995
998
|
if (typeof config.concurrency !== "number" || !Number.isFinite(config.concurrency)) return 1;
|
|
996
999
|
return Math.max(1, Math.floor(config.concurrency));
|
|
997
1000
|
}
|
|
998
|
-
function
|
|
999
|
-
|
|
1000
|
-
return
|
|
1001
|
+
function getCaseDetailFileId(run, caseRow) {
|
|
1002
|
+
const caseKey = getCaseRowCaseKey(caseRow);
|
|
1003
|
+
return run.cases.some((existing) => existing.caseId === caseRow.caseId && getCaseRowCaseKey(existing) !== caseKey) ? caseKey : caseRow.caseId;
|
|
1004
|
+
}
|
|
1005
|
+
function hydrateCaseDetailForRow(run, caseRow) {
|
|
1006
|
+
const caseKey = getCaseRowCaseKey(caseRow);
|
|
1007
|
+
const cached = run.caseDetails.get(caseKey);
|
|
1008
|
+
if (cached !== void 0) return cached;
|
|
1009
|
+
const detail = loadPersistedCaseDetail(run.runDir, getCaseDetailFileId(run, caseRow));
|
|
1010
|
+
if (detail === null) return void 0;
|
|
1011
|
+
run.caseDetails.set(detail.caseKey ?? detail.caseId, detail);
|
|
1012
|
+
return detail;
|
|
1013
|
+
}
|
|
1014
|
+
function hydrateCaseDetailForLookup(run, caseId) {
|
|
1015
|
+
const cached = resolveCaseDetailLookup(run, caseId);
|
|
1016
|
+
if (cached !== void 0) return cached;
|
|
1017
|
+
const caseRow = resolveCaseRowForCaseDetailLookup(run, caseId);
|
|
1018
|
+
if (caseRow === void 0) return void 0;
|
|
1019
|
+
return hydrateCaseDetailForRow(run, caseRow);
|
|
1020
|
+
}
|
|
1021
|
+
function getDiscoveryModuleIsolationKey(filePath) {
|
|
1022
|
+
return `discovery:${filePath}`;
|
|
1001
1023
|
}
|
|
1002
1024
|
const runner = {
|
|
1003
1025
|
async init() {
|
|
@@ -1018,29 +1040,17 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
|
|
|
1018
1040
|
async recomputeStatusesForEval(evalKey) {
|
|
1019
1041
|
const evalMeta = resolveEvalMeta(evalKey);
|
|
1020
1042
|
if (!evalMeta) return { updatedRuns: 0 };
|
|
1021
|
-
const entry = (await loadIsolatedEvalRegistry({
|
|
1022
|
-
evalFilePath: evalMeta.sourceFilePath,
|
|
1023
|
-
sourceFingerprint: evalMeta.sourceFingerprint ?? void 0,
|
|
1024
|
-
moduleIsolation: {
|
|
1025
|
-
key: nextRegistryLoadIsolationKey("recompute-status", evalMeta.sourceFilePath),
|
|
1026
|
-
workspaceRoot
|
|
1027
|
-
},
|
|
1028
|
-
runtimeScope: "env"
|
|
1029
|
-
})).get(evalMeta.id);
|
|
1030
|
-
if (!entry) return { updatedRuns: 0 };
|
|
1031
1043
|
const scoreThresholds = /* @__PURE__ */ new Map();
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
}
|
|
1037
|
-
for (const [key, def] of Object.entries(evalDef.manualScores ?? {})) if (def.passThreshold !== void 0) scoreThresholds.set(key, def.passThreshold);
|
|
1038
|
-
});
|
|
1044
|
+
for (const columnDef of evalMeta.columnDefs) {
|
|
1045
|
+
if (columnDef.isScore !== true || columnDef.passThreshold === void 0) continue;
|
|
1046
|
+
scoreThresholds.set(columnDef.key, columnDef.passThreshold);
|
|
1047
|
+
}
|
|
1039
1048
|
const updatedRuns = await recomputeEvalStatusesInRuns({
|
|
1040
1049
|
runs: runs.values(),
|
|
1041
1050
|
evalKey: evalMeta.key,
|
|
1042
1051
|
evalExists: evals.has(evalMeta.key),
|
|
1043
1052
|
scoreThresholds,
|
|
1053
|
+
getCaseDetail: hydrateCaseDetailForRow,
|
|
1044
1054
|
persistCaseDetail
|
|
1045
1055
|
});
|
|
1046
1056
|
emitDiscoveryEvent();
|
|
@@ -1052,6 +1062,7 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
|
|
|
1052
1062
|
updated: false,
|
|
1053
1063
|
reason: "Run not found"
|
|
1054
1064
|
};
|
|
1065
|
+
hydrateCaseDetailForLookup(run, caseId);
|
|
1055
1066
|
return recalculateDerivedAttributesForCase({
|
|
1056
1067
|
run,
|
|
1057
1068
|
caseId,
|
|
@@ -1107,7 +1118,7 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
|
|
|
1107
1118
|
updated: false,
|
|
1108
1119
|
reason: "Manual score not found"
|
|
1109
1120
|
};
|
|
1110
|
-
const caseDetail = run
|
|
1121
|
+
const caseDetail = hydrateCaseDetailForRow(run, caseRow);
|
|
1111
1122
|
if (!caseDetail) return {
|
|
1112
1123
|
updated: false,
|
|
1113
1124
|
reason: "Case detail not found"
|
|
@@ -1238,7 +1249,7 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
|
|
|
1238
1249
|
evalFilePath: filePath,
|
|
1239
1250
|
sourceFingerprint,
|
|
1240
1251
|
moduleIsolation: {
|
|
1241
|
-
key:
|
|
1252
|
+
key: getDiscoveryModuleIsolationKey(filePath),
|
|
1242
1253
|
workspaceRoot
|
|
1243
1254
|
},
|
|
1244
1255
|
runtimeScope: "env"
|
|
@@ -1480,7 +1491,7 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
|
|
|
1480
1491
|
getCaseDetail(runId, caseId) {
|
|
1481
1492
|
const run = runs.get(runId);
|
|
1482
1493
|
if (!run) return void 0;
|
|
1483
|
-
return
|
|
1494
|
+
return hydrateCaseDetailForLookup(run, caseId);
|
|
1484
1495
|
},
|
|
1485
1496
|
subscribe(runId, listener) {
|
|
1486
1497
|
const run = runs.get(runId);
|
|
@@ -2232,8 +2243,8 @@ async function commandApp(args) {
|
|
|
2232
2243
|
const { serve } = await import("@hono/node-server");
|
|
2233
2244
|
const bundledWebDist = resolve(currentDir, "apps/web/dist");
|
|
2234
2245
|
if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
|
|
2235
|
-
const appModule = await import("./app-
|
|
2236
|
-
const runnerModule = await import("./runner-
|
|
2246
|
+
const appModule = await import("./app-gg10KvzS.mjs");
|
|
2247
|
+
const runnerModule = await import("./runner-C4Y0lWb1.mjs");
|
|
2237
2248
|
if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
|
|
2238
2249
|
if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
|
|
2239
2250
|
await runnerModule.initRunner({ loadEnv: args.loadEnv });
|
package/dist/index.d.mts
CHANGED
|
@@ -1942,7 +1942,6 @@ declare const columnFormatSchema: z.ZodEnum<{
|
|
|
1942
1942
|
number: "number";
|
|
1943
1943
|
boolean: "boolean";
|
|
1944
1944
|
file: "file";
|
|
1945
|
-
duration: "duration";
|
|
1946
1945
|
markdown: "markdown";
|
|
1947
1946
|
json: "json";
|
|
1948
1947
|
image: "image";
|
|
@@ -1950,6 +1949,7 @@ declare const columnFormatSchema: z.ZodEnum<{
|
|
|
1950
1949
|
pdf: "pdf";
|
|
1951
1950
|
audio: "audio";
|
|
1952
1951
|
video: "video";
|
|
1952
|
+
duration: "duration";
|
|
1953
1953
|
percent: "percent";
|
|
1954
1954
|
passFail: "passFail";
|
|
1955
1955
|
stars: "stars";
|
|
@@ -1969,7 +1969,6 @@ declare const columnDefSchema: z.ZodObject<{
|
|
|
1969
1969
|
number: "number";
|
|
1970
1970
|
boolean: "boolean";
|
|
1971
1971
|
file: "file";
|
|
1972
|
-
duration: "duration";
|
|
1973
1972
|
markdown: "markdown";
|
|
1974
1973
|
json: "json";
|
|
1975
1974
|
image: "image";
|
|
@@ -1977,6 +1976,7 @@ declare const columnDefSchema: z.ZodObject<{
|
|
|
1977
1976
|
pdf: "pdf";
|
|
1978
1977
|
audio: "audio";
|
|
1979
1978
|
video: "video";
|
|
1979
|
+
duration: "duration";
|
|
1980
1980
|
percent: "percent";
|
|
1981
1981
|
passFail: "passFail";
|
|
1982
1982
|
stars: "stars";
|
|
@@ -2022,8 +2022,8 @@ type CellValue = z.infer<typeof cellValueSchema>; //#endregion
|
|
|
2022
2022
|
declare const traceAttributeDisplayFormatSchema: z.ZodEnum<{
|
|
2023
2023
|
string: "string";
|
|
2024
2024
|
number: "number";
|
|
2025
|
-
duration: "duration";
|
|
2026
2025
|
json: "json";
|
|
2026
|
+
duration: "duration";
|
|
2027
2027
|
}>;
|
|
2028
2028
|
/**
|
|
2029
2029
|
* Formatting hint for trace attribute values rendered by the UI.
|
|
@@ -2047,8 +2047,8 @@ declare const traceAttributeDisplaySchema: z.ZodObject<{
|
|
|
2047
2047
|
format: z.ZodOptional<z.ZodEnum<{
|
|
2048
2048
|
string: "string";
|
|
2049
2049
|
number: "number";
|
|
2050
|
-
duration: "duration";
|
|
2051
2050
|
json: "json";
|
|
2051
|
+
duration: "duration";
|
|
2052
2052
|
}>>;
|
|
2053
2053
|
numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
2054
2054
|
placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
|
|
@@ -2083,8 +2083,8 @@ declare const traceDisplayConfigSchema: z.ZodObject<{
|
|
|
2083
2083
|
format: z.ZodOptional<z.ZodEnum<{
|
|
2084
2084
|
string: "string";
|
|
2085
2085
|
number: "number";
|
|
2086
|
-
duration: "duration";
|
|
2087
2086
|
json: "json";
|
|
2087
|
+
duration: "duration";
|
|
2088
2088
|
}>>;
|
|
2089
2089
|
numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
2090
2090
|
placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
|
|
@@ -2123,8 +2123,8 @@ declare const traceAttributeDisplayInputSchema: z.ZodObject<{
|
|
|
2123
2123
|
format: z.ZodOptional<z.ZodEnum<{
|
|
2124
2124
|
string: "string";
|
|
2125
2125
|
number: "number";
|
|
2126
|
-
duration: "duration";
|
|
2127
2126
|
json: "json";
|
|
2127
|
+
duration: "duration";
|
|
2128
2128
|
}>>;
|
|
2129
2129
|
numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
2130
2130
|
placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
|
|
@@ -2161,8 +2161,8 @@ declare const traceDisplayInputConfigSchema: z.ZodObject<{
|
|
|
2161
2161
|
format: z.ZodOptional<z.ZodEnum<{
|
|
2162
2162
|
string: "string";
|
|
2163
2163
|
number: "number";
|
|
2164
|
-
duration: "duration";
|
|
2165
2164
|
json: "json";
|
|
2165
|
+
duration: "duration";
|
|
2166
2166
|
}>>;
|
|
2167
2167
|
numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
2168
2168
|
placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
|
|
@@ -2214,8 +2214,8 @@ declare const traceSpanSchema$1: z.ZodObject<{
|
|
|
2214
2214
|
status: z.ZodEnum<{
|
|
2215
2215
|
error: "error";
|
|
2216
2216
|
running: "running";
|
|
2217
|
-
cancelled: "cancelled";
|
|
2218
2217
|
ok: "ok";
|
|
2218
|
+
cancelled: "cancelled";
|
|
2219
2219
|
}>;
|
|
2220
2220
|
attributes: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
2221
2221
|
error: z.ZodOptional<z.ZodObject<{
|
|
@@ -2327,7 +2327,6 @@ declare const evalStatItemSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
2327
2327
|
number: "number";
|
|
2328
2328
|
boolean: "boolean";
|
|
2329
2329
|
file: "file";
|
|
2330
|
-
duration: "duration";
|
|
2331
2330
|
markdown: "markdown";
|
|
2332
2331
|
json: "json";
|
|
2333
2332
|
image: "image";
|
|
@@ -2335,6 +2334,7 @@ declare const evalStatItemSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
2335
2334
|
pdf: "pdf";
|
|
2336
2335
|
audio: "audio";
|
|
2337
2336
|
video: "video";
|
|
2337
|
+
duration: "duration";
|
|
2338
2338
|
percent: "percent";
|
|
2339
2339
|
passFail: "passFail";
|
|
2340
2340
|
stars: "stars";
|
|
@@ -2391,7 +2391,6 @@ declare const evalStatsConfigSchema: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodOb
|
|
|
2391
2391
|
number: "number";
|
|
2392
2392
|
boolean: "boolean";
|
|
2393
2393
|
file: "file";
|
|
2394
|
-
duration: "duration";
|
|
2395
2394
|
markdown: "markdown";
|
|
2396
2395
|
json: "json";
|
|
2397
2396
|
image: "image";
|
|
@@ -2399,6 +2398,7 @@ declare const evalStatsConfigSchema: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodOb
|
|
|
2399
2398
|
pdf: "pdf";
|
|
2400
2399
|
audio: "audio";
|
|
2401
2400
|
video: "video";
|
|
2401
|
+
duration: "duration";
|
|
2402
2402
|
percent: "percent";
|
|
2403
2403
|
passFail: "passFail";
|
|
2404
2404
|
stars: "stars";
|
|
@@ -2437,7 +2437,6 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2437
2437
|
number: "number";
|
|
2438
2438
|
boolean: "boolean";
|
|
2439
2439
|
file: "file";
|
|
2440
|
-
duration: "duration";
|
|
2441
2440
|
markdown: "markdown";
|
|
2442
2441
|
json: "json";
|
|
2443
2442
|
image: "image";
|
|
@@ -2445,6 +2444,7 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2445
2444
|
pdf: "pdf";
|
|
2446
2445
|
audio: "audio";
|
|
2447
2446
|
video: "video";
|
|
2447
|
+
duration: "duration";
|
|
2448
2448
|
percent: "percent";
|
|
2449
2449
|
passFail: "passFail";
|
|
2450
2450
|
stars: "stars";
|
|
@@ -2466,10 +2466,10 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2466
2466
|
caseIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
2467
2467
|
lastRunStatus: z.ZodNullable<z.ZodEnum<{
|
|
2468
2468
|
error: "error";
|
|
2469
|
-
pass: "pass";
|
|
2470
|
-
fail: "fail";
|
|
2471
2469
|
running: "running";
|
|
2472
2470
|
cancelled: "cancelled";
|
|
2471
|
+
pass: "pass";
|
|
2472
|
+
fail: "fail";
|
|
2473
2473
|
unscored: "unscored";
|
|
2474
2474
|
}>>;
|
|
2475
2475
|
stats: z.ZodOptional<z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
@@ -2518,7 +2518,6 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2518
2518
|
number: "number";
|
|
2519
2519
|
boolean: "boolean";
|
|
2520
2520
|
file: "file";
|
|
2521
|
-
duration: "duration";
|
|
2522
2521
|
markdown: "markdown";
|
|
2523
2522
|
json: "json";
|
|
2524
2523
|
image: "image";
|
|
@@ -2526,6 +2525,7 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2526
2525
|
pdf: "pdf";
|
|
2527
2526
|
audio: "audio";
|
|
2528
2527
|
video: "video";
|
|
2528
|
+
duration: "duration";
|
|
2529
2529
|
percent: "percent";
|
|
2530
2530
|
passFail: "passFail";
|
|
2531
2531
|
stars: "stars";
|
|
@@ -2558,9 +2558,9 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2558
2558
|
}>;
|
|
2559
2559
|
label: z.ZodOptional<z.ZodString>;
|
|
2560
2560
|
color: z.ZodOptional<z.ZodEnum<{
|
|
2561
|
+
error: "error";
|
|
2561
2562
|
success: "success";
|
|
2562
2563
|
accent: "accent";
|
|
2563
|
-
error: "error";
|
|
2564
2564
|
accentDim: "accentDim";
|
|
2565
2565
|
warning: "warning";
|
|
2566
2566
|
textMuted: "textMuted";
|
|
@@ -2582,9 +2582,9 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2582
2582
|
}>;
|
|
2583
2583
|
label: z.ZodOptional<z.ZodString>;
|
|
2584
2584
|
color: z.ZodOptional<z.ZodEnum<{
|
|
2585
|
+
error: "error";
|
|
2585
2586
|
success: "success";
|
|
2586
2587
|
accent: "accent";
|
|
2587
|
-
error: "error";
|
|
2588
2588
|
accentDim: "accentDim";
|
|
2589
2589
|
warning: "warning";
|
|
2590
2590
|
textMuted: "textMuted";
|
|
@@ -2715,10 +2715,10 @@ declare const caseRowSchema$1: z.ZodObject<{
|
|
|
2715
2715
|
tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
2716
2716
|
status: z.ZodEnum<{
|
|
2717
2717
|
error: "error";
|
|
2718
|
-
pass: "pass";
|
|
2719
|
-
fail: "fail";
|
|
2720
2718
|
running: "running";
|
|
2721
2719
|
cancelled: "cancelled";
|
|
2720
|
+
pass: "pass";
|
|
2721
|
+
fail: "fail";
|
|
2722
2722
|
pending: "pending";
|
|
2723
2723
|
}>;
|
|
2724
2724
|
durationMs: z.ZodNullable<z.ZodNumber>;
|
|
@@ -2749,7 +2749,6 @@ declare const caseRowSchema$1: z.ZodObject<{
|
|
|
2749
2749
|
number: "number";
|
|
2750
2750
|
boolean: "boolean";
|
|
2751
2751
|
file: "file";
|
|
2752
|
-
duration: "duration";
|
|
2753
2752
|
markdown: "markdown";
|
|
2754
2753
|
json: "json";
|
|
2755
2754
|
image: "image";
|
|
@@ -2757,6 +2756,7 @@ declare const caseRowSchema$1: z.ZodObject<{
|
|
|
2757
2756
|
pdf: "pdf";
|
|
2758
2757
|
audio: "audio";
|
|
2759
2758
|
video: "video";
|
|
2759
|
+
duration: "duration";
|
|
2760
2760
|
percent: "percent";
|
|
2761
2761
|
passFail: "passFail";
|
|
2762
2762
|
stars: "stars";
|
|
@@ -2857,8 +2857,8 @@ declare const scoreTraceSchema: z.ZodObject<{
|
|
|
2857
2857
|
status: z.ZodEnum<{
|
|
2858
2858
|
error: "error";
|
|
2859
2859
|
running: "running";
|
|
2860
|
-
cancelled: "cancelled";
|
|
2861
2860
|
ok: "ok";
|
|
2861
|
+
cancelled: "cancelled";
|
|
2862
2862
|
}>;
|
|
2863
2863
|
attributes: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
2864
2864
|
error: z.ZodOptional<z.ZodObject<{
|
|
@@ -2894,8 +2894,8 @@ declare const scoreTraceSchema: z.ZodObject<{
|
|
|
2894
2894
|
format: z.ZodOptional<z.ZodEnum<{
|
|
2895
2895
|
string: "string";
|
|
2896
2896
|
number: "number";
|
|
2897
|
-
duration: "duration";
|
|
2898
2897
|
json: "json";
|
|
2898
|
+
duration: "duration";
|
|
2899
2899
|
}>>;
|
|
2900
2900
|
numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
2901
2901
|
placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
|
|
@@ -2920,10 +2920,10 @@ declare const scoreTraceSchema: z.ZodObject<{
|
|
|
2920
2920
|
namespace: z.ZodString;
|
|
2921
2921
|
key: z.ZodString;
|
|
2922
2922
|
status: z.ZodEnum<{
|
|
2923
|
+
bypass: "bypass";
|
|
2924
|
+
refresh: "refresh";
|
|
2923
2925
|
hit: "hit";
|
|
2924
2926
|
miss: "miss";
|
|
2925
|
-
refresh: "refresh";
|
|
2926
|
-
bypass: "bypass";
|
|
2927
2927
|
}>;
|
|
2928
2928
|
read: z.ZodOptional<z.ZodBoolean>;
|
|
2929
2929
|
stored: z.ZodOptional<z.ZodBoolean>;
|
|
@@ -2942,10 +2942,10 @@ declare const caseDetailSchema$1: z.ZodObject<{
|
|
|
2942
2942
|
tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
2943
2943
|
status: z.ZodEnum<{
|
|
2944
2944
|
error: "error";
|
|
2945
|
-
pass: "pass";
|
|
2946
|
-
fail: "fail";
|
|
2947
2945
|
running: "running";
|
|
2948
2946
|
cancelled: "cancelled";
|
|
2947
|
+
pass: "pass";
|
|
2948
|
+
fail: "fail";
|
|
2949
2949
|
pending: "pending";
|
|
2950
2950
|
}>;
|
|
2951
2951
|
input: z.ZodUnknown;
|
|
@@ -2960,8 +2960,8 @@ declare const caseDetailSchema$1: z.ZodObject<{
|
|
|
2960
2960
|
status: z.ZodEnum<{
|
|
2961
2961
|
error: "error";
|
|
2962
2962
|
running: "running";
|
|
2963
|
-
cancelled: "cancelled";
|
|
2964
2963
|
ok: "ok";
|
|
2964
|
+
cancelled: "cancelled";
|
|
2965
2965
|
}>;
|
|
2966
2966
|
attributes: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
2967
2967
|
error: z.ZodOptional<z.ZodObject<{
|
|
@@ -2997,8 +2997,8 @@ declare const caseDetailSchema$1: z.ZodObject<{
|
|
|
2997
2997
|
format: z.ZodOptional<z.ZodEnum<{
|
|
2998
2998
|
string: "string";
|
|
2999
2999
|
number: "number";
|
|
3000
|
-
duration: "duration";
|
|
3001
3000
|
json: "json";
|
|
3001
|
+
duration: "duration";
|
|
3002
3002
|
}>>;
|
|
3003
3003
|
numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
3004
3004
|
placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
|
|
@@ -3029,8 +3029,8 @@ declare const caseDetailSchema$1: z.ZodObject<{
|
|
|
3029
3029
|
status: z.ZodEnum<{
|
|
3030
3030
|
error: "error";
|
|
3031
3031
|
running: "running";
|
|
3032
|
-
cancelled: "cancelled";
|
|
3033
3032
|
ok: "ok";
|
|
3033
|
+
cancelled: "cancelled";
|
|
3034
3034
|
}>;
|
|
3035
3035
|
attributes: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
3036
3036
|
error: z.ZodOptional<z.ZodObject<{
|
|
@@ -3066,8 +3066,8 @@ declare const caseDetailSchema$1: z.ZodObject<{
|
|
|
3066
3066
|
format: z.ZodOptional<z.ZodEnum<{
|
|
3067
3067
|
string: "string";
|
|
3068
3068
|
number: "number";
|
|
3069
|
-
duration: "duration";
|
|
3070
3069
|
json: "json";
|
|
3070
|
+
duration: "duration";
|
|
3071
3071
|
}>>;
|
|
3072
3072
|
numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
3073
3073
|
placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
|
|
@@ -3092,10 +3092,10 @@ declare const caseDetailSchema$1: z.ZodObject<{
|
|
|
3092
3092
|
namespace: z.ZodString;
|
|
3093
3093
|
key: z.ZodString;
|
|
3094
3094
|
status: z.ZodEnum<{
|
|
3095
|
+
bypass: "bypass";
|
|
3096
|
+
refresh: "refresh";
|
|
3095
3097
|
hit: "hit";
|
|
3096
3098
|
miss: "miss";
|
|
3097
|
-
refresh: "refresh";
|
|
3098
|
-
bypass: "bypass";
|
|
3099
3099
|
}>;
|
|
3100
3100
|
read: z.ZodOptional<z.ZodBoolean>;
|
|
3101
3101
|
stored: z.ZodOptional<z.ZodBoolean>;
|
|
@@ -3127,7 +3127,6 @@ declare const caseDetailSchema$1: z.ZodObject<{
|
|
|
3127
3127
|
number: "number";
|
|
3128
3128
|
boolean: "boolean";
|
|
3129
3129
|
file: "file";
|
|
3130
|
-
duration: "duration";
|
|
3131
3130
|
markdown: "markdown";
|
|
3132
3131
|
json: "json";
|
|
3133
3132
|
image: "image";
|
|
@@ -3135,6 +3134,7 @@ declare const caseDetailSchema$1: z.ZodObject<{
|
|
|
3135
3134
|
pdf: "pdf";
|
|
3136
3135
|
audio: "audio";
|
|
3137
3136
|
video: "video";
|
|
3137
|
+
duration: "duration";
|
|
3138
3138
|
percent: "percent";
|
|
3139
3139
|
passFail: "passFail";
|
|
3140
3140
|
stars: "stars";
|
|
@@ -3213,10 +3213,10 @@ declare const caseDetailSchema$1: z.ZodObject<{
|
|
|
3213
3213
|
namespace: z.ZodString;
|
|
3214
3214
|
key: z.ZodString;
|
|
3215
3215
|
status: z.ZodEnum<{
|
|
3216
|
+
bypass: "bypass";
|
|
3217
|
+
refresh: "refresh";
|
|
3216
3218
|
hit: "hit";
|
|
3217
3219
|
miss: "miss";
|
|
3218
|
-
refresh: "refresh";
|
|
3219
|
-
bypass: "bypass";
|
|
3220
3220
|
}>;
|
|
3221
3221
|
read: z.ZodOptional<z.ZodBoolean>;
|
|
3222
3222
|
stored: z.ZodOptional<z.ZodBoolean>;
|
|
@@ -3283,9 +3283,9 @@ type EvalChartAggregate = z.infer<typeof evalChartAggregateSchema>;
|
|
|
3283
3283
|
* not emit raw hex so authored evals stay decoupled from the web theme.
|
|
3284
3284
|
*/
|
|
3285
3285
|
declare const evalChartColorSchema: z.ZodEnum<{
|
|
3286
|
+
error: "error";
|
|
3286
3287
|
success: "success";
|
|
3287
3288
|
accent: "accent";
|
|
3288
|
-
error: "error";
|
|
3289
3289
|
accentDim: "accentDim";
|
|
3290
3290
|
warning: "warning";
|
|
3291
3291
|
textMuted: "textMuted";
|
|
@@ -3312,9 +3312,9 @@ declare const evalChartMetricSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
3312
3312
|
}>;
|
|
3313
3313
|
label: z.ZodOptional<z.ZodString>;
|
|
3314
3314
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3315
|
+
error: "error";
|
|
3315
3316
|
success: "success";
|
|
3316
3317
|
accent: "accent";
|
|
3317
|
-
error: "error";
|
|
3318
3318
|
accentDim: "accentDim";
|
|
3319
3319
|
warning: "warning";
|
|
3320
3320
|
textMuted: "textMuted";
|
|
@@ -3336,9 +3336,9 @@ declare const evalChartMetricSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
3336
3336
|
}>;
|
|
3337
3337
|
label: z.ZodOptional<z.ZodString>;
|
|
3338
3338
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3339
|
+
error: "error";
|
|
3339
3340
|
success: "success";
|
|
3340
3341
|
accent: "accent";
|
|
3341
|
-
error: "error";
|
|
3342
3342
|
accentDim: "accentDim";
|
|
3343
3343
|
warning: "warning";
|
|
3344
3344
|
textMuted: "textMuted";
|
|
@@ -3395,9 +3395,9 @@ declare const evalChartConfigSchema: z.ZodObject<{
|
|
|
3395
3395
|
}>;
|
|
3396
3396
|
label: z.ZodOptional<z.ZodString>;
|
|
3397
3397
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3398
|
+
error: "error";
|
|
3398
3399
|
success: "success";
|
|
3399
3400
|
accent: "accent";
|
|
3400
|
-
error: "error";
|
|
3401
3401
|
accentDim: "accentDim";
|
|
3402
3402
|
warning: "warning";
|
|
3403
3403
|
textMuted: "textMuted";
|
|
@@ -3419,9 +3419,9 @@ declare const evalChartConfigSchema: z.ZodObject<{
|
|
|
3419
3419
|
}>;
|
|
3420
3420
|
label: z.ZodOptional<z.ZodString>;
|
|
3421
3421
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3422
|
+
error: "error";
|
|
3422
3423
|
success: "success";
|
|
3423
3424
|
accent: "accent";
|
|
3424
|
-
error: "error";
|
|
3425
3425
|
accentDim: "accentDim";
|
|
3426
3426
|
warning: "warning";
|
|
3427
3427
|
textMuted: "textMuted";
|
|
@@ -3485,9 +3485,9 @@ declare const evalChartsConfigSchema: z.ZodArray<z.ZodObject<{
|
|
|
3485
3485
|
}>;
|
|
3486
3486
|
label: z.ZodOptional<z.ZodString>;
|
|
3487
3487
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3488
|
+
error: "error";
|
|
3488
3489
|
success: "success";
|
|
3489
3490
|
accent: "accent";
|
|
3490
|
-
error: "error";
|
|
3491
3491
|
accentDim: "accentDim";
|
|
3492
3492
|
warning: "warning";
|
|
3493
3493
|
textMuted: "textMuted";
|
|
@@ -3509,9 +3509,9 @@ declare const evalChartsConfigSchema: z.ZodArray<z.ZodObject<{
|
|
|
3509
3509
|
}>;
|
|
3510
3510
|
label: z.ZodOptional<z.ZodString>;
|
|
3511
3511
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3512
|
+
error: "error";
|
|
3512
3513
|
success: "success";
|
|
3513
3514
|
accent: "accent";
|
|
3514
|
-
error: "error";
|
|
3515
3515
|
accentDim: "accentDim";
|
|
3516
3516
|
warning: "warning";
|
|
3517
3517
|
textMuted: "textMuted";
|
|
@@ -3589,9 +3589,9 @@ declare const runManifestSchema$1: z.ZodObject<{
|
|
|
3589
3589
|
median: "median";
|
|
3590
3590
|
}>>>;
|
|
3591
3591
|
cacheMode: z.ZodOptional<z.ZodEnum<{
|
|
3592
|
-
refresh: "refresh";
|
|
3593
|
-
bypass: "bypass";
|
|
3594
3592
|
use: "use";
|
|
3593
|
+
bypass: "bypass";
|
|
3594
|
+
refresh: "refresh";
|
|
3595
3595
|
}>>;
|
|
3596
3596
|
}, z.core.$strip>;
|
|
3597
3597
|
/** Persisted lifecycle metadata for a single eval run. */
|
|
@@ -3808,8 +3808,8 @@ declare const llmCallMetricFormatSchema$1: z.ZodEnum<{
|
|
|
3808
3808
|
string: "string";
|
|
3809
3809
|
number: "number";
|
|
3810
3810
|
boolean: "boolean";
|
|
3811
|
-
duration: "duration";
|
|
3812
3811
|
json: "json";
|
|
3812
|
+
duration: "duration";
|
|
3813
3813
|
}>;
|
|
3814
3814
|
/** Render format applied to an LLM-call metric value. */
|
|
3815
3815
|
type LlmCallMetricFormat = z.infer<typeof llmCallMetricFormatSchema$1>;
|
|
@@ -3818,8 +3818,8 @@ declare const apiCallMetricFormatSchema$1: z.ZodEnum<{
|
|
|
3818
3818
|
string: "string";
|
|
3819
3819
|
number: "number";
|
|
3820
3820
|
boolean: "boolean";
|
|
3821
|
-
duration: "duration";
|
|
3822
3821
|
json: "json";
|
|
3822
|
+
duration: "duration";
|
|
3823
3823
|
}>;
|
|
3824
3824
|
/** Render format applied to an API-call metric value. */
|
|
3825
3825
|
type ApiCallMetricFormat = z.infer<typeof apiCallMetricFormatSchema$1>;
|
|
@@ -3888,8 +3888,8 @@ declare const llmCallMetricSchema: z.ZodObject<{
|
|
|
3888
3888
|
string: "string";
|
|
3889
3889
|
number: "number";
|
|
3890
3890
|
boolean: "boolean";
|
|
3891
|
-
duration: "duration";
|
|
3892
3891
|
json: "json";
|
|
3892
|
+
duration: "duration";
|
|
3893
3893
|
}>>;
|
|
3894
3894
|
numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
3895
3895
|
placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
|
|
@@ -3917,8 +3917,8 @@ declare const apiCallMetricSchema: z.ZodObject<{
|
|
|
3917
3917
|
string: "string";
|
|
3918
3918
|
number: "number";
|
|
3919
3919
|
boolean: "boolean";
|
|
3920
|
-
duration: "duration";
|
|
3921
3920
|
json: "json";
|
|
3921
|
+
duration: "duration";
|
|
3922
3922
|
}>>;
|
|
3923
3923
|
numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
3924
3924
|
placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
|
|
@@ -4031,8 +4031,8 @@ declare const llmCallsConfigSchema: z.ZodObject<{
|
|
|
4031
4031
|
string: "string";
|
|
4032
4032
|
number: "number";
|
|
4033
4033
|
boolean: "boolean";
|
|
4034
|
-
duration: "duration";
|
|
4035
4034
|
json: "json";
|
|
4035
|
+
duration: "duration";
|
|
4036
4036
|
}>>;
|
|
4037
4037
|
numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
4038
4038
|
placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
|
|
@@ -4067,8 +4067,8 @@ declare const apiCallsConfigSchema: z.ZodObject<{
|
|
|
4067
4067
|
string: "string";
|
|
4068
4068
|
number: "number";
|
|
4069
4069
|
boolean: "boolean";
|
|
4070
|
-
duration: "duration";
|
|
4071
4070
|
json: "json";
|
|
4071
|
+
duration: "duration";
|
|
4072
4072
|
}>>;
|
|
4073
4073
|
numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
4074
4074
|
placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
|
|
@@ -4567,9 +4567,9 @@ declare function extractApiCalls(spans: EvalTraceSpan$1[], config: ResolvedApiCa
|
|
|
4567
4567
|
* - `refresh`: never read, always write (forces re-execution and overwrites).
|
|
4568
4568
|
*/
|
|
4569
4569
|
declare const cacheModeSchema: z.ZodEnum<{
|
|
4570
|
-
refresh: "refresh";
|
|
4571
|
-
bypass: "bypass";
|
|
4572
4570
|
use: "use";
|
|
4571
|
+
bypass: "bypass";
|
|
4572
|
+
refresh: "refresh";
|
|
4573
4573
|
}>;
|
|
4574
4574
|
/** Mode controlling how cached spans behave during a run. */
|
|
4575
4575
|
type CacheMode = z.infer<typeof cacheModeSchema>;
|
|
@@ -4583,17 +4583,17 @@ declare const spanCacheOptionsSchema: z.ZodObject<{
|
|
|
4583
4583
|
type SpanCacheOptions = z.infer<typeof spanCacheOptionsSchema>;
|
|
4584
4584
|
/** Category of operation stored in the eval cache. */
|
|
4585
4585
|
declare const cacheOperationTypeSchema: z.ZodEnum<{
|
|
4586
|
-
value: "value";
|
|
4587
4586
|
span: "span";
|
|
4587
|
+
value: "value";
|
|
4588
4588
|
}>;
|
|
4589
4589
|
/** Category of operation stored in the eval cache. */
|
|
4590
4590
|
type CacheOperationType = z.infer<typeof cacheOperationTypeSchema>;
|
|
4591
4591
|
/** Status of a cache lookup recorded on a span or case scope. */
|
|
4592
4592
|
declare const cacheStatusSchema: z.ZodEnum<{
|
|
4593
|
+
bypass: "bypass";
|
|
4594
|
+
refresh: "refresh";
|
|
4593
4595
|
hit: "hit";
|
|
4594
4596
|
miss: "miss";
|
|
4595
|
-
refresh: "refresh";
|
|
4596
|
-
bypass: "bypass";
|
|
4597
4597
|
}>;
|
|
4598
4598
|
/** Status of a cache lookup recorded on a span or case scope. */
|
|
4599
4599
|
type CacheStatus = z.infer<typeof cacheStatusSchema>;
|
|
@@ -4610,10 +4610,10 @@ declare const traceCacheRefSchema: z.ZodObject<{
|
|
|
4610
4610
|
namespace: z.ZodString;
|
|
4611
4611
|
key: z.ZodString;
|
|
4612
4612
|
status: z.ZodEnum<{
|
|
4613
|
+
bypass: "bypass";
|
|
4614
|
+
refresh: "refresh";
|
|
4613
4615
|
hit: "hit";
|
|
4614
4616
|
miss: "miss";
|
|
4615
|
-
refresh: "refresh";
|
|
4616
|
-
bypass: "bypass";
|
|
4617
4617
|
}>;
|
|
4618
4618
|
read: z.ZodOptional<z.ZodBoolean>;
|
|
4619
4619
|
stored: z.ZodOptional<z.ZodBoolean>;
|
|
@@ -4670,7 +4670,6 @@ declare const cacheRecordingOpSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
4670
4670
|
number: "number";
|
|
4671
4671
|
boolean: "boolean";
|
|
4672
4672
|
file: "file";
|
|
4673
|
-
duration: "duration";
|
|
4674
4673
|
markdown: "markdown";
|
|
4675
4674
|
json: "json";
|
|
4676
4675
|
image: "image";
|
|
@@ -4678,6 +4677,7 @@ declare const cacheRecordingOpSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
4678
4677
|
pdf: "pdf";
|
|
4679
4678
|
audio: "audio";
|
|
4680
4679
|
video: "video";
|
|
4680
|
+
duration: "duration";
|
|
4681
4681
|
percent: "percent";
|
|
4682
4682
|
passFail: "passFail";
|
|
4683
4683
|
stars: "stars";
|
|
@@ -4721,8 +4721,8 @@ declare const cacheRecordingSchema: z.ZodObject<{
|
|
|
4721
4721
|
finalStatus: z.ZodOptional<z.ZodEnum<{
|
|
4722
4722
|
error: "error";
|
|
4723
4723
|
running: "running";
|
|
4724
|
-
cancelled: "cancelled";
|
|
4725
4724
|
ok: "ok";
|
|
4725
|
+
cancelled: "cancelled";
|
|
4726
4726
|
}>>;
|
|
4727
4727
|
finalError: z.ZodOptional<z.ZodObject<{
|
|
4728
4728
|
name: z.ZodOptional<z.ZodString>;
|
|
@@ -4758,7 +4758,6 @@ declare const cacheRecordingSchema: z.ZodObject<{
|
|
|
4758
4758
|
number: "number";
|
|
4759
4759
|
boolean: "boolean";
|
|
4760
4760
|
file: "file";
|
|
4761
|
-
duration: "duration";
|
|
4762
4761
|
markdown: "markdown";
|
|
4763
4762
|
json: "json";
|
|
4764
4763
|
image: "image";
|
|
@@ -4766,6 +4765,7 @@ declare const cacheRecordingSchema: z.ZodObject<{
|
|
|
4766
4765
|
pdf: "pdf";
|
|
4767
4766
|
audio: "audio";
|
|
4768
4767
|
video: "video";
|
|
4768
|
+
duration: "duration";
|
|
4769
4769
|
percent: "percent";
|
|
4770
4770
|
passFail: "passFail";
|
|
4771
4771
|
stars: "stars";
|
|
@@ -4809,8 +4809,8 @@ declare const cacheEntrySchema: z.ZodObject<{
|
|
|
4809
4809
|
key: z.ZodString;
|
|
4810
4810
|
namespace: z.ZodString;
|
|
4811
4811
|
operationType: z.ZodOptional<z.ZodEnum<{
|
|
4812
|
-
value: "value";
|
|
4813
4812
|
span: "span";
|
|
4813
|
+
value: "value";
|
|
4814
4814
|
}>>;
|
|
4815
4815
|
operationName: z.ZodOptional<z.ZodString>;
|
|
4816
4816
|
spanName: z.ZodOptional<z.ZodString>;
|
|
@@ -4822,8 +4822,8 @@ declare const cacheEntrySchema: z.ZodObject<{
|
|
|
4822
4822
|
finalStatus: z.ZodOptional<z.ZodEnum<{
|
|
4823
4823
|
error: "error";
|
|
4824
4824
|
running: "running";
|
|
4825
|
-
cancelled: "cancelled";
|
|
4826
4825
|
ok: "ok";
|
|
4826
|
+
cancelled: "cancelled";
|
|
4827
4827
|
}>>;
|
|
4828
4828
|
finalError: z.ZodOptional<z.ZodObject<{
|
|
4829
4829
|
name: z.ZodOptional<z.ZodString>;
|
|
@@ -4859,7 +4859,6 @@ declare const cacheEntrySchema: z.ZodObject<{
|
|
|
4859
4859
|
number: "number";
|
|
4860
4860
|
boolean: "boolean";
|
|
4861
4861
|
file: "file";
|
|
4862
|
-
duration: "duration";
|
|
4863
4862
|
markdown: "markdown";
|
|
4864
4863
|
json: "json";
|
|
4865
4864
|
image: "image";
|
|
@@ -4867,6 +4866,7 @@ declare const cacheEntrySchema: z.ZodObject<{
|
|
|
4867
4866
|
pdf: "pdf";
|
|
4868
4867
|
audio: "audio";
|
|
4869
4868
|
video: "video";
|
|
4869
|
+
duration: "duration";
|
|
4870
4870
|
percent: "percent";
|
|
4871
4871
|
passFail: "passFail";
|
|
4872
4872
|
stars: "stars";
|
|
@@ -4916,8 +4916,8 @@ declare const cacheDebugKeyEntrySchema: z.ZodObject<{
|
|
|
4916
4916
|
key: z.ZodString;
|
|
4917
4917
|
namespace: z.ZodString;
|
|
4918
4918
|
operationType: z.ZodEnum<{
|
|
4919
|
-
value: "value";
|
|
4920
4919
|
span: "span";
|
|
4920
|
+
value: "value";
|
|
4921
4921
|
}>;
|
|
4922
4922
|
operationName: z.ZodString;
|
|
4923
4923
|
storedAt: z.ZodString;
|
|
@@ -4927,8 +4927,8 @@ declare const cacheDebugKeyEntrySchema: z.ZodObject<{
|
|
|
4927
4927
|
key: z.ZodString;
|
|
4928
4928
|
namespace: z.ZodString;
|
|
4929
4929
|
operationType: z.ZodOptional<z.ZodEnum<{
|
|
4930
|
-
value: "value";
|
|
4931
4930
|
span: "span";
|
|
4931
|
+
value: "value";
|
|
4932
4932
|
}>>;
|
|
4933
4933
|
operationName: z.ZodOptional<z.ZodString>;
|
|
4934
4934
|
spanName: z.ZodOptional<z.ZodString>;
|
|
@@ -4940,8 +4940,8 @@ declare const cacheDebugKeyEntrySchema: z.ZodObject<{
|
|
|
4940
4940
|
finalStatus: z.ZodOptional<z.ZodEnum<{
|
|
4941
4941
|
error: "error";
|
|
4942
4942
|
running: "running";
|
|
4943
|
-
cancelled: "cancelled";
|
|
4944
4943
|
ok: "ok";
|
|
4944
|
+
cancelled: "cancelled";
|
|
4945
4945
|
}>>;
|
|
4946
4946
|
finalError: z.ZodOptional<z.ZodObject<{
|
|
4947
4947
|
name: z.ZodOptional<z.ZodString>;
|
|
@@ -4977,7 +4977,6 @@ declare const cacheDebugKeyEntrySchema: z.ZodObject<{
|
|
|
4977
4977
|
number: "number";
|
|
4978
4978
|
boolean: "boolean";
|
|
4979
4979
|
file: "file";
|
|
4980
|
-
duration: "duration";
|
|
4981
4980
|
markdown: "markdown";
|
|
4982
4981
|
json: "json";
|
|
4983
4982
|
image: "image";
|
|
@@ -4985,6 +4984,7 @@ declare const cacheDebugKeyEntrySchema: z.ZodObject<{
|
|
|
4985
4984
|
pdf: "pdf";
|
|
4986
4985
|
audio: "audio";
|
|
4987
4986
|
video: "video";
|
|
4987
|
+
duration: "duration";
|
|
4988
4988
|
percent: "percent";
|
|
4989
4989
|
passFail: "passFail";
|
|
4990
4990
|
stars: "stars";
|
|
@@ -5034,8 +5034,8 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
|
|
|
5034
5034
|
key: z.ZodString;
|
|
5035
5035
|
namespace: z.ZodString;
|
|
5036
5036
|
operationType: z.ZodOptional<z.ZodEnum<{
|
|
5037
|
-
value: "value";
|
|
5038
5037
|
span: "span";
|
|
5038
|
+
value: "value";
|
|
5039
5039
|
}>>;
|
|
5040
5040
|
operationName: z.ZodOptional<z.ZodString>;
|
|
5041
5041
|
spanName: z.ZodOptional<z.ZodString>;
|
|
@@ -5047,8 +5047,8 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
|
|
|
5047
5047
|
finalStatus: z.ZodOptional<z.ZodEnum<{
|
|
5048
5048
|
error: "error";
|
|
5049
5049
|
running: "running";
|
|
5050
|
-
cancelled: "cancelled";
|
|
5051
5050
|
ok: "ok";
|
|
5051
|
+
cancelled: "cancelled";
|
|
5052
5052
|
}>>;
|
|
5053
5053
|
finalError: z.ZodOptional<z.ZodObject<{
|
|
5054
5054
|
name: z.ZodOptional<z.ZodString>;
|
|
@@ -5084,7 +5084,6 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
|
|
|
5084
5084
|
number: "number";
|
|
5085
5085
|
boolean: "boolean";
|
|
5086
5086
|
file: "file";
|
|
5087
|
-
duration: "duration";
|
|
5088
5087
|
markdown: "markdown";
|
|
5089
5088
|
json: "json";
|
|
5090
5089
|
image: "image";
|
|
@@ -5092,6 +5091,7 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
|
|
|
5092
5091
|
pdf: "pdf";
|
|
5093
5092
|
audio: "audio";
|
|
5094
5093
|
video: "video";
|
|
5094
|
+
duration: "duration";
|
|
5095
5095
|
percent: "percent";
|
|
5096
5096
|
passFail: "passFail";
|
|
5097
5097
|
stars: "stars";
|
|
@@ -5132,8 +5132,8 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
|
|
|
5132
5132
|
key: z.ZodString;
|
|
5133
5133
|
namespace: z.ZodString;
|
|
5134
5134
|
operationType: z.ZodEnum<{
|
|
5135
|
-
value: "value";
|
|
5136
5135
|
span: "span";
|
|
5136
|
+
value: "value";
|
|
5137
5137
|
}>;
|
|
5138
5138
|
operationName: z.ZodString;
|
|
5139
5139
|
storedAt: z.ZodString;
|
|
@@ -5143,8 +5143,8 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
|
|
|
5143
5143
|
key: z.ZodString;
|
|
5144
5144
|
namespace: z.ZodString;
|
|
5145
5145
|
operationType: z.ZodOptional<z.ZodEnum<{
|
|
5146
|
-
value: "value";
|
|
5147
5146
|
span: "span";
|
|
5147
|
+
value: "value";
|
|
5148
5148
|
}>>;
|
|
5149
5149
|
operationName: z.ZodOptional<z.ZodString>;
|
|
5150
5150
|
spanName: z.ZodOptional<z.ZodString>;
|
|
@@ -5156,8 +5156,8 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
|
|
|
5156
5156
|
finalStatus: z.ZodOptional<z.ZodEnum<{
|
|
5157
5157
|
error: "error";
|
|
5158
5158
|
running: "running";
|
|
5159
|
-
cancelled: "cancelled";
|
|
5160
5159
|
ok: "ok";
|
|
5160
|
+
cancelled: "cancelled";
|
|
5161
5161
|
}>>;
|
|
5162
5162
|
finalError: z.ZodOptional<z.ZodObject<{
|
|
5163
5163
|
name: z.ZodOptional<z.ZodString>;
|
|
@@ -5193,7 +5193,6 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
|
|
|
5193
5193
|
number: "number";
|
|
5194
5194
|
boolean: "boolean";
|
|
5195
5195
|
file: "file";
|
|
5196
|
-
duration: "duration";
|
|
5197
5196
|
markdown: "markdown";
|
|
5198
5197
|
json: "json";
|
|
5199
5198
|
image: "image";
|
|
@@ -5201,6 +5200,7 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
|
|
|
5201
5200
|
pdf: "pdf";
|
|
5202
5201
|
audio: "audio";
|
|
5203
5202
|
video: "video";
|
|
5203
|
+
duration: "duration";
|
|
5204
5204
|
percent: "percent";
|
|
5205
5205
|
passFail: "passFail";
|
|
5206
5206
|
stars: "stars";
|
|
@@ -5250,8 +5250,8 @@ declare const cacheFileSchema: z.ZodObject<{
|
|
|
5250
5250
|
key: z.ZodString;
|
|
5251
5251
|
namespace: z.ZodString;
|
|
5252
5252
|
operationType: z.ZodOptional<z.ZodEnum<{
|
|
5253
|
-
value: "value";
|
|
5254
5253
|
span: "span";
|
|
5254
|
+
value: "value";
|
|
5255
5255
|
}>>;
|
|
5256
5256
|
operationName: z.ZodOptional<z.ZodString>;
|
|
5257
5257
|
spanName: z.ZodOptional<z.ZodString>;
|
|
@@ -5263,8 +5263,8 @@ declare const cacheFileSchema: z.ZodObject<{
|
|
|
5263
5263
|
finalStatus: z.ZodOptional<z.ZodEnum<{
|
|
5264
5264
|
error: "error";
|
|
5265
5265
|
running: "running";
|
|
5266
|
-
cancelled: "cancelled";
|
|
5267
5266
|
ok: "ok";
|
|
5267
|
+
cancelled: "cancelled";
|
|
5268
5268
|
}>>;
|
|
5269
5269
|
finalError: z.ZodOptional<z.ZodObject<{
|
|
5270
5270
|
name: z.ZodOptional<z.ZodString>;
|
|
@@ -5300,7 +5300,6 @@ declare const cacheFileSchema: z.ZodObject<{
|
|
|
5300
5300
|
number: "number";
|
|
5301
5301
|
boolean: "boolean";
|
|
5302
5302
|
file: "file";
|
|
5303
|
-
duration: "duration";
|
|
5304
5303
|
markdown: "markdown";
|
|
5305
5304
|
json: "json";
|
|
5306
5305
|
image: "image";
|
|
@@ -5308,6 +5307,7 @@ declare const cacheFileSchema: z.ZodObject<{
|
|
|
5308
5307
|
pdf: "pdf";
|
|
5309
5308
|
audio: "audio";
|
|
5310
5309
|
video: "video";
|
|
5310
|
+
duration: "duration";
|
|
5311
5311
|
percent: "percent";
|
|
5312
5312
|
passFail: "passFail";
|
|
5313
5313
|
stars: "stars";
|
|
@@ -5356,8 +5356,8 @@ declare const cacheDebugKeyFileSchema: z.ZodObject<{
|
|
|
5356
5356
|
key: z.ZodString;
|
|
5357
5357
|
namespace: z.ZodString;
|
|
5358
5358
|
operationType: z.ZodEnum<{
|
|
5359
|
-
value: "value";
|
|
5360
5359
|
span: "span";
|
|
5360
|
+
value: "value";
|
|
5361
5361
|
}>;
|
|
5362
5362
|
operationName: z.ZodString;
|
|
5363
5363
|
storedAt: z.ZodString;
|
|
@@ -5367,8 +5367,8 @@ declare const cacheDebugKeyFileSchema: z.ZodObject<{
|
|
|
5367
5367
|
key: z.ZodString;
|
|
5368
5368
|
namespace: z.ZodString;
|
|
5369
5369
|
operationType: z.ZodOptional<z.ZodEnum<{
|
|
5370
|
-
value: "value";
|
|
5371
5370
|
span: "span";
|
|
5371
|
+
value: "value";
|
|
5372
5372
|
}>>;
|
|
5373
5373
|
operationName: z.ZodOptional<z.ZodString>;
|
|
5374
5374
|
spanName: z.ZodOptional<z.ZodString>;
|
|
@@ -5380,8 +5380,8 @@ declare const cacheDebugKeyFileSchema: z.ZodObject<{
|
|
|
5380
5380
|
finalStatus: z.ZodOptional<z.ZodEnum<{
|
|
5381
5381
|
error: "error";
|
|
5382
5382
|
running: "running";
|
|
5383
|
-
cancelled: "cancelled";
|
|
5384
5383
|
ok: "ok";
|
|
5384
|
+
cancelled: "cancelled";
|
|
5385
5385
|
}>>;
|
|
5386
5386
|
finalError: z.ZodOptional<z.ZodObject<{
|
|
5387
5387
|
name: z.ZodOptional<z.ZodString>;
|
|
@@ -5417,7 +5417,6 @@ declare const cacheDebugKeyFileSchema: z.ZodObject<{
|
|
|
5417
5417
|
number: "number";
|
|
5418
5418
|
boolean: "boolean";
|
|
5419
5419
|
file: "file";
|
|
5420
|
-
duration: "duration";
|
|
5421
5420
|
markdown: "markdown";
|
|
5422
5421
|
json: "json";
|
|
5423
5422
|
image: "image";
|
|
@@ -5425,6 +5424,7 @@ declare const cacheDebugKeyFileSchema: z.ZodObject<{
|
|
|
5425
5424
|
pdf: "pdf";
|
|
5426
5425
|
audio: "audio";
|
|
5427
5426
|
video: "video";
|
|
5427
|
+
duration: "duration";
|
|
5428
5428
|
percent: "percent";
|
|
5429
5429
|
passFail: "passFail";
|
|
5430
5430
|
stars: "stars";
|
|
@@ -5579,9 +5579,9 @@ declare const createRunRequestSchema$1: z.ZodObject<{
|
|
|
5579
5579
|
temporary: z.ZodOptional<z.ZodBoolean>;
|
|
5580
5580
|
cache: z.ZodOptional<z.ZodObject<{
|
|
5581
5581
|
mode: z.ZodDefault<z.ZodEnum<{
|
|
5582
|
-
refresh: "refresh";
|
|
5583
|
-
bypass: "bypass";
|
|
5584
5582
|
use: "use";
|
|
5583
|
+
bypass: "bypass";
|
|
5584
|
+
refresh: "refresh";
|
|
5585
5585
|
}>>;
|
|
5586
5586
|
}, z.core.$strip>>;
|
|
5587
5587
|
manualInputs: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { $ as
|
|
2
|
-
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-
|
|
3
|
-
import { n as matchesEvalTags, t as defineEval } from "./src-
|
|
1
|
+
import { $ as startEvalBackgroundJob, A as manualInputFileValueSchema, B as getCurrentScope, C as hashCacheKey, D as serializeCacheRecording, E as deserializeCacheValue, F as appendToEvalOutput, G as mergeEvalOutput, H as incrementEvalOutput, J as runInEvalScope, K as nextEvalId, L as evalAssert, M as evalExpect, N as EvalAssertionError, O as serializeCacheValue, P as EvalRuntimeUsageError, Pt as getEvalRegistry, Q as setScopeCacheContext, R as evalLog, S as evalTracer, T as deserializeCacheRecording, U as isInEvalScope, V as getEvalCaseInput, Y as runInExistingEvalScope, Z as setEvalOutput, at as extractLlmCalls, b as captureEvalSpanError, it as extractApiCalls, j as readManualInputFile, k as repoFile, lt as getNestedAttribute, nt as extractCacheEntries, ot as simulateLlmCallCost, q as runInEvalRuntimeScope, rt as extractCacheHits, st as simulateTokenAllocation, w as hashCacheKeySync, x as evalSpan, y as buildTraceTree, z as evalTime } from "./runExecution-Bu9yfdUS.mjs";
|
|
2
|
+
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-OLZIjQpx.mjs";
|
|
3
|
+
import { n as matchesEvalTags, t as defineEval } from "./src-Cy3OxoZW.mjs";
|
|
4
4
|
export { EvalAssertionError, EvalRuntimeUsageError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob };
|
package/dist/runChild.mjs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { At as
|
|
2
|
-
import {
|
|
1
|
+
import { At as manualInputDescriptorSchema, I as configureEvalRunLogs, Mt as columnDefSchema, Ot as evalStatAggregateSchema, _ as createFsCacheStore, bt as runSummarySchema, et as createRunRequestSchema, jt as evalChartsConfigSchema, kt as evalStatsConfigSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, v as getCacheRetentionOptions, wt as buildEvalKey, yt as runManifestSchema } from "./runExecution-Bu9yfdUS.mjs";
|
|
2
|
+
import { C as parseEvalDiscovery, h as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-mpgZmEZ6.mjs";
|
|
3
3
|
import { z } from "zod";
|
|
4
4
|
import { readFile } from "node:fs/promises";
|
|
5
5
|
import { relative } from "node:path";
|
|
@@ -7239,4 +7239,4 @@ function recordAssertionFailure(scope, failure) {
|
|
|
7239
7239
|
});
|
|
7240
7240
|
}
|
|
7241
7241
|
//#endregion
|
|
7242
|
-
export {
|
|
7242
|
+
export { startEvalBackgroundJob as $, manualInputFileValueSchema as A, manualInputDescriptorSchema as At, getCurrentScope as B, hashCacheKey as C, buildCaseKey as Ct, serializeCacheRecording as D, caseRowSchema as Dt, deserializeCacheValue as E, caseDetailSchema as Et, appendToEvalOutput as F, runWithEvalRegistry as Ft, mergeEvalOutput as G, incrementEvalOutput as H, configureEvalRunLogs as I, runInEvalScope as J, nextEvalId as K, evalAssert as L, evalExpect as M, columnDefSchema as Mt, EvalAssertionError as N, defineEval as Nt, serializeCacheValue as O, evalStatAggregateSchema as Ot, EvalRuntimeUsageError as P, getEvalRegistry as Pt, setScopeCacheContext as Q, evalLog as R, evalTracer as S, resolveLlmCallsConfig as St, deserializeCacheRecording as T, getCaseRowCaseKey as Tt, isInEvalScope as U, getEvalCaseInput as V, matchesEvalTags as W, runWithEvalClock as X, runInExistingEvalScope as Y, setEvalOutput as Z, createFsCacheStore as _, validateEvalTagName as _t, isCaseChildParentMessage as a, extractLlmCalls as at, captureEvalSpanError as b, runSummarySchema as bt, resolveArtifactPath as c, applyDerivedCallAttributes as ct, loadEvalModule as d, getEvalDisplayStatus as dt, createRunRequestSchema as et, resolveEvalDefaultConfig as f, deriveScopedSummaryFromCases as ft, createBufferedCacheStore as g, matchesTagsFilter as gt, commitPendingCacheWrites as h, dedupeEvalTags as ht, isCaseChildMessage as i, extractApiCalls as it, readManualInputFile as j, evalChartsConfigSchema as jt, repoFile as k, evalStatsConfigSchema as kt, registerAgentEvalsPackageResolutionHooks as l, getNestedAttribute as lt, buildDeclaredColumnDefs as m, deriveStatusFromChildStatuses as mt, resolveRunnableEvalCases as n, extractCacheEntries as nt, stripTerminalControlCodes as o, simulateLlmCallCost as ot, loadConfig as p, deriveStatusFromCaseRows as pt, runInEvalRuntimeScope as q, runCase as r, extractCacheHits as rt, resolveTracePresentation as s, simulateTokenAllocation as st, filterEvalCases as t, updateManualScoreRequestSchema as tt, runWithModuleIsolation as u, getEvalTitle as ut, getCacheRetentionOptions as v, validateTagsFilterExpression as vt, hashCacheKeySync as w, buildEvalKey as wt, evalSpan as x, resolveApiCallsConfig as xt, buildTraceTree as y, runManifestSchema as yt, evalTime as z };
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { Ct as buildCaseKey, Dt as caseRowSchema, Et as caseDetailSchema, Ft as runWithEvalRegistry, Tt as getCaseRowCaseKey, X as runWithEvalClock, _t as validateEvalTagName, bt as runSummarySchema, d as loadEvalModule, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, h as commitPendingCacheWrites, ht as dedupeEvalTags, i as isCaseChildMessage, m as buildDeclaredColumnDefs, mt as deriveStatusFromChildStatuses, n as resolveRunnableEvalCases, o as stripTerminalControlCodes, pt as deriveStatusFromCaseRows, q as runInEvalRuntimeScope, t as filterEvalCases, u as runWithModuleIsolation, vt as validateTagsFilterExpression, yt as runManifestSchema } from "./runExecution-Bu9yfdUS.mjs";
|
|
2
2
|
import { Result, resultify } from "t-result";
|
|
3
3
|
import { readFile, readdir, rm, writeFile } from "node:fs/promises";
|
|
4
4
|
import { dirname, join } from "node:path";
|
|
5
|
-
import { existsSync } from "node:fs";
|
|
5
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
6
6
|
import { fileURLToPath } from "node:url";
|
|
7
7
|
import { spawn } from "node:child_process";
|
|
8
8
|
//#region ../runner/src/chartValidation.ts
|
|
@@ -670,7 +670,7 @@ async function recomputeEvalStatusesInRuns(params) {
|
|
|
670
670
|
let changed = false;
|
|
671
671
|
for (const caseRow of run.cases) {
|
|
672
672
|
if (caseRow.evalKey !== params.evalKey) continue;
|
|
673
|
-
const caseDetail = run.caseDetails.get(getCaseRowCaseKey(caseRow));
|
|
673
|
+
const caseDetail = params.getCaseDetail?.(run, caseRow) ?? run.caseDetails.get(getCaseRowCaseKey(caseRow));
|
|
674
674
|
const nextStatus = recomputePersistedCaseStatus(caseRow, caseDetail, params.scoreThresholds);
|
|
675
675
|
if (caseRow.status === nextStatus) continue;
|
|
676
676
|
caseRow.status = nextStatus;
|
|
@@ -725,14 +725,22 @@ function nextShortIdFromSnapshots(snapshots) {
|
|
|
725
725
|
}
|
|
726
726
|
return maxNum + 1;
|
|
727
727
|
}
|
|
728
|
-
|
|
728
|
+
/**
|
|
729
|
+
* Load persisted run metadata from the local state directory.
|
|
730
|
+
*
|
|
731
|
+
* Case details are skipped by default so long-running app processes can keep
|
|
732
|
+
* run history in memory without retaining every trace payload. Pass
|
|
733
|
+
* `includeCaseDetails` only for narrow maintenance flows that need full
|
|
734
|
+
* details for every case.
|
|
735
|
+
*/
|
|
736
|
+
async function loadPersistedRunSnapshots(localStateDir, options = {}) {
|
|
729
737
|
const runsDir = join(localStateDir, "runs");
|
|
730
738
|
const entriesResult = await resultify(() => readdir(runsDir, { withFileTypes: true }));
|
|
731
739
|
if (entriesResult.error) return [];
|
|
732
740
|
const snapshots = [];
|
|
733
741
|
const runDirs = entriesResult.value.filter((entry) => entry.isDirectory()).map((entry) => join(runsDir, entry.name)).toSorted();
|
|
734
742
|
for (const runDir of runDirs) {
|
|
735
|
-
const snapshot = await loadPersistedRunSnapshot(runDir);
|
|
743
|
+
const snapshot = await loadPersistedRunSnapshot(runDir, options);
|
|
736
744
|
if (!snapshot) continue;
|
|
737
745
|
snapshots.push(snapshot);
|
|
738
746
|
}
|
|
@@ -766,7 +774,14 @@ function getLatestRunInfos(params) {
|
|
|
766
774
|
function toLastRunStatus$1(status) {
|
|
767
775
|
return status === "pending" ? null : status;
|
|
768
776
|
}
|
|
769
|
-
|
|
777
|
+
/**
|
|
778
|
+
* Load one persisted run snapshot from disk.
|
|
779
|
+
*
|
|
780
|
+
* The returned snapshot includes manifest, summary, and case rows. Case
|
|
781
|
+
* details are loaded only when `includeCaseDetails` is true; otherwise callers
|
|
782
|
+
* should use `loadPersistedCaseDetail` for the specific case being inspected.
|
|
783
|
+
*/
|
|
784
|
+
async function loadPersistedRunSnapshot(runDir, options = {}) {
|
|
770
785
|
const manifest = await readParsedJsonFile(join(runDir, "run.json"), { safeParse: runManifestSchema.safeParse.bind(runManifestSchema) });
|
|
771
786
|
if (!manifest) return null;
|
|
772
787
|
const summary = await readParsedJsonFile(join(runDir, "summary.json"), { safeParse: runSummarySchema.safeParse.bind(runSummarySchema) });
|
|
@@ -776,9 +791,18 @@ async function loadPersistedRunSnapshot(runDir) {
|
|
|
776
791
|
manifest,
|
|
777
792
|
summary,
|
|
778
793
|
cases: await readCaseRows(runDir),
|
|
779
|
-
caseDetails: await readCaseDetails(runDir)
|
|
794
|
+
caseDetails: options.includeCaseDetails === true ? await readCaseDetails(runDir) : /* @__PURE__ */ new Map()
|
|
780
795
|
};
|
|
781
796
|
}
|
|
797
|
+
/**
|
|
798
|
+
* Load one persisted case detail by its artifact file id.
|
|
799
|
+
*
|
|
800
|
+
* Returns `null` when the file is missing, invalid JSON, or no longer matches
|
|
801
|
+
* the current case-detail schema.
|
|
802
|
+
*/
|
|
803
|
+
function loadPersistedCaseDetail(runDir, fileId) {
|
|
804
|
+
return readParsedJsonFileSync(join(runDir, "case-details", `${encodeCaseDetailFileName(fileId)}.json`), { safeParse: caseDetailSchema.safeParse.bind(caseDetailSchema) });
|
|
805
|
+
}
|
|
782
806
|
async function readParsedJsonFile(filePath, schema) {
|
|
783
807
|
const fileResult = await resultify(() => readFile(filePath, "utf-8"));
|
|
784
808
|
if (fileResult.error) return null;
|
|
@@ -788,6 +812,15 @@ async function readParsedJsonFile(filePath, schema) {
|
|
|
788
812
|
if (!parsed.success) return null;
|
|
789
813
|
return parsed.data;
|
|
790
814
|
}
|
|
815
|
+
function readParsedJsonFileSync(filePath, schema) {
|
|
816
|
+
const fileResult = resultify(() => readFileSync(filePath, "utf-8"));
|
|
817
|
+
if (fileResult.error) return null;
|
|
818
|
+
const jsonResult = resultify(() => JSON.parse(fileResult.value));
|
|
819
|
+
if (jsonResult.error) return null;
|
|
820
|
+
const parsed = schema.safeParse(jsonResult.value);
|
|
821
|
+
if (!parsed.success) return null;
|
|
822
|
+
return parsed.data;
|
|
823
|
+
}
|
|
791
824
|
async function readCaseRows(runDir) {
|
|
792
825
|
const fileResult = await resultify(() => readFile(join(runDir, "cases.jsonl"), "utf-8"));
|
|
793
826
|
if (fileResult.error) return [];
|
|
@@ -1660,4 +1693,4 @@ function toLastRunStatus(status) {
|
|
|
1660
1693
|
return status === "pending" ? null : status;
|
|
1661
1694
|
}
|
|
1662
1695
|
//#endregion
|
|
1663
|
-
export {
|
|
1696
|
+
export { parseEvalDiscovery as C, loadIsolatedEvalRegistry as S, recomputePersistedCaseStatus as _, validateTagsFilters as a, parseManualInputValues as b, getLatestRunInfos as c, loadPersistedRunSnapshots as d, nextShortIdFromSnapshots as f, recomputeEvalStatusesInRuns as g, persistRunState as h, resolveEvalTags as i, loadPersistedCaseDetail as l, deleteTemporaryRuns as m, getTargetEvalKeys as n, generateRunId as o, persistCaseDetail as p, getTargetEvals as r, getLastRunStatuses as s, executeRun as t, loadPersistedRunSnapshot as u, runTouchesEval as v, validateCharts as w, deriveEvalFreshness as x, buildManualInputDescriptor as y };
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { n as initRunner, t as getRunnerInstance } from "./runner-
|
|
1
|
+
import { n as initRunner, t as getRunnerInstance } from "./runner-SxtKn-Xh.mjs";
|
|
2
2
|
export { getRunnerInstance, initRunner };
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { n as createRunner } from "./cli-
|
|
2
|
-
import "./src-
|
|
1
|
+
import { n as createRunner } from "./cli-OLZIjQpx.mjs";
|
|
2
|
+
import "./src-Cy3OxoZW.mjs";
|
|
3
3
|
//#region ../../apps/server/src/runner.ts
|
|
4
4
|
let runnerInstance = null;
|
|
5
5
|
function getRunnerInstance({ loadEnv = true } = {}) {
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import "./cli-
|
|
1
|
+
import { Nt as defineEval$1, W as matchesEvalTags$1 } from "./runExecution-Bu9yfdUS.mjs";
|
|
2
|
+
import "./cli-OLZIjQpx.mjs";
|
|
3
3
|
//#region src/index.ts
|
|
4
4
|
/** Register an eval definition with typed tag support. */
|
|
5
5
|
function defineEval(definition) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ls-stack/agent-eval",
|
|
3
|
-
"version": "0.60.
|
|
3
|
+
"version": "0.60.4",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"bin": {
|
|
6
6
|
"agent-evals": "./dist/bin.mjs"
|
|
@@ -32,9 +32,9 @@
|
|
|
32
32
|
"devDependencies": {
|
|
33
33
|
"@types/node": "^24.7.2",
|
|
34
34
|
"typescript": "^5.9.2",
|
|
35
|
-
"@agent-evals/runner": "0.0.1",
|
|
36
35
|
"@agent-evals/shared": "0.0.1",
|
|
37
|
-
"@agent-evals/sdk": "0.0.1"
|
|
36
|
+
"@agent-evals/sdk": "0.0.1",
|
|
37
|
+
"@agent-evals/runner": "0.0.1"
|
|
38
38
|
},
|
|
39
39
|
"scripts": {
|
|
40
40
|
"build": "pnpm --filter @agent-evals/web build && pnpm --filter @agent-evals/shared build && pnpm --filter @agent-evals/sdk build && pnpm --filter @agent-evals/runner build && tsdown --filter cli-js && tsdown --filter cli-types",
|