@ls-stack/agent-eval 0.60.2 → 0.60.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-DPCFFkyQ.mjs → app-l3ynaNsb.mjs} +3 -3
- package/dist/bin.mjs +1 -1
- package/dist/{cli-CbePEEua.mjs → cli-BSVUCUxr.mjs} +33 -7
- package/dist/index.d.mts +39 -39
- package/dist/index.mjs +2 -2
- package/dist/runChild.mjs +1 -1
- package/dist/{runOrchestration-BpwW0AmB.mjs → runOrchestration-C7qQISz2.mjs} +40 -7
- package/dist/{runner-XEP21_u9.mjs → runner-C9xNJHt3.mjs} +1 -1
- package/dist/{runner-Kp0JqxrU.mjs → runner-DmkSq-QG.mjs} +2 -2
- package/dist/{src-CVM_FqPx.mjs → src-D5vGo2iv.mjs} +1 -1
- package/package.json +1 -1
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { Et as getCaseRowCaseKey, Ot as caseRowSchema, dt as getEvalTitle, nt as updateManualScoreRequestSchema, rt as extractCacheEntries, tt as createRunRequestSchema } from "./runExecution-Bq0Y3y_1.mjs";
|
|
2
|
-
import { o as stageManualInputFile } from "./cli-
|
|
3
|
-
import "./src-
|
|
4
|
-
import { t as getRunnerInstance } from "./runner-
|
|
2
|
+
import { o as stageManualInputFile } from "./cli-BSVUCUxr.mjs";
|
|
3
|
+
import "./src-D5vGo2iv.mjs";
|
|
4
|
+
import { t as getRunnerInstance } from "./runner-DmkSq-QG.mjs";
|
|
5
5
|
import { z } from "zod";
|
|
6
6
|
import { resultify } from "t-result";
|
|
7
7
|
import { readFile } from "node:fs/promises";
|
package/dist/bin.mjs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { Ct as resolveLlmCallsConfig, Et as getCaseRowCaseKey, Ft as getEvalRegistry, Ot as caseRowSchema, St as resolveApiCallsConfig, Tt as buildEvalKey, _t as matchesTagsFilter, c as resolveArtifactPath, dt as getEvalTitle, f as resolveEvalDefaultConfig, ft as getEvalDisplayStatus, h as normalizeScoreDef, lt as applyDerivedCallAttributes, m as buildDeclaredColumnDefs, o as stripTerminalControlCodes, p as loadConfig, pt as deriveScopedSummaryFromCases, s as resolveTracePresentation, v as createFsCacheStore, xt as runSummarySchema, y as getCacheRetentionOptions } from "./runExecution-Bq0Y3y_1.mjs";
|
|
2
|
-
import { C as
|
|
2
|
+
import { C as parseEvalDiscovery, S as loadIsolatedEvalRegistry, _ as recomputePersistedCaseStatus, a as validateTagsFilters, b as parseManualInputValues, c as getLatestRunInfos, d as loadPersistedRunSnapshots, f as nextShortIdFromSnapshots, g as recomputeEvalStatusesInRuns, h as persistRunState, i as resolveEvalTags, l as loadPersistedCaseDetail, m as deleteTemporaryRuns, n as getTargetEvalKeys, o as generateRunId, p as persistCaseDetail, s as getLastRunStatuses, u as loadPersistedRunSnapshot, v as runTouchesEval, w as validateCharts, x as deriveEvalFreshness, y as buildManualInputDescriptor } from "./runOrchestration-C7qQISz2.mjs";
|
|
3
3
|
import { parseEnv } from "node:util";
|
|
4
4
|
import { resultify } from "t-result";
|
|
5
5
|
import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
|
|
@@ -28,10 +28,14 @@ function resolveCaseDetailLookup(run, caseId) {
|
|
|
28
28
|
const caseDetail = run.caseDetails.get(lookupId);
|
|
29
29
|
if (caseDetail) return caseDetail;
|
|
30
30
|
}
|
|
31
|
-
const matchingCaseRow = run
|
|
31
|
+
const matchingCaseRow = resolveCaseRowForCaseDetailLookup(run, caseId);
|
|
32
32
|
if (matchingCaseRow === void 0) return void 0;
|
|
33
33
|
return run.caseDetails.get(getCaseRowCaseKey(matchingCaseRow));
|
|
34
34
|
}
|
|
35
|
+
function resolveCaseRowForCaseDetailLookup(run, caseId) {
|
|
36
|
+
const lookupIds = new Set(getCaseLookupIds(caseId));
|
|
37
|
+
return run.cases.find((caseRow) => lookupIds.has(getCaseRowCaseKey(caseRow)) || lookupIds.has(caseRow.caseId));
|
|
38
|
+
}
|
|
35
39
|
//#endregion
|
|
36
40
|
//#region ../runner/src/configReload.ts
|
|
37
41
|
/** Coordinates idle-only reloads for workspace config and `.env` in app mode. */
|
|
@@ -823,7 +827,7 @@ async function markRunTerminalFromChild(runState, event, managerContext) {
|
|
|
823
827
|
runState.manifest = snapshot.manifest;
|
|
824
828
|
runState.summary = snapshot.summary;
|
|
825
829
|
runState.cases = snapshot.cases;
|
|
826
|
-
runState.caseDetails =
|
|
830
|
+
runState.caseDetails = /* @__PURE__ */ new Map();
|
|
827
831
|
} else if (event.type === "run.finished") {
|
|
828
832
|
runState.manifest.status = "completed";
|
|
829
833
|
runState.manifest.endedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
@@ -995,6 +999,26 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
|
|
|
995
999
|
if (typeof config.concurrency !== "number" || !Number.isFinite(config.concurrency)) return 1;
|
|
996
1000
|
return Math.max(1, Math.floor(config.concurrency));
|
|
997
1001
|
}
|
|
1002
|
+
function getCaseDetailFileId(run, caseRow) {
|
|
1003
|
+
const caseKey = getCaseRowCaseKey(caseRow);
|
|
1004
|
+
return run.cases.some((existing) => existing.caseId === caseRow.caseId && getCaseRowCaseKey(existing) !== caseKey) ? caseKey : caseRow.caseId;
|
|
1005
|
+
}
|
|
1006
|
+
function hydrateCaseDetailForRow(run, caseRow) {
|
|
1007
|
+
const caseKey = getCaseRowCaseKey(caseRow);
|
|
1008
|
+
const cached = run.caseDetails.get(caseKey);
|
|
1009
|
+
if (cached !== void 0) return cached;
|
|
1010
|
+
const detail = loadPersistedCaseDetail(run.runDir, getCaseDetailFileId(run, caseRow));
|
|
1011
|
+
if (detail === null) return void 0;
|
|
1012
|
+
run.caseDetails.set(detail.caseKey ?? detail.caseId, detail);
|
|
1013
|
+
return detail;
|
|
1014
|
+
}
|
|
1015
|
+
function hydrateCaseDetailForLookup(run, caseId) {
|
|
1016
|
+
const cached = resolveCaseDetailLookup(run, caseId);
|
|
1017
|
+
if (cached !== void 0) return cached;
|
|
1018
|
+
const caseRow = resolveCaseRowForCaseDetailLookup(run, caseId);
|
|
1019
|
+
if (caseRow === void 0) return void 0;
|
|
1020
|
+
return hydrateCaseDetailForRow(run, caseRow);
|
|
1021
|
+
}
|
|
998
1022
|
function nextRegistryLoadIsolationKey(prefix, filePath) {
|
|
999
1023
|
registryLoadCounter++;
|
|
1000
1024
|
return `${prefix}:${String(registryLoadCounter)}:${filePath}`;
|
|
@@ -1041,6 +1065,7 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
|
|
|
1041
1065
|
evalKey: evalMeta.key,
|
|
1042
1066
|
evalExists: evals.has(evalMeta.key),
|
|
1043
1067
|
scoreThresholds,
|
|
1068
|
+
getCaseDetail: hydrateCaseDetailForRow,
|
|
1044
1069
|
persistCaseDetail
|
|
1045
1070
|
});
|
|
1046
1071
|
emitDiscoveryEvent();
|
|
@@ -1052,6 +1077,7 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
|
|
|
1052
1077
|
updated: false,
|
|
1053
1078
|
reason: "Run not found"
|
|
1054
1079
|
};
|
|
1080
|
+
hydrateCaseDetailForLookup(run, caseId);
|
|
1055
1081
|
return recalculateDerivedAttributesForCase({
|
|
1056
1082
|
run,
|
|
1057
1083
|
caseId,
|
|
@@ -1107,7 +1133,7 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
|
|
|
1107
1133
|
updated: false,
|
|
1108
1134
|
reason: "Manual score not found"
|
|
1109
1135
|
};
|
|
1110
|
-
const caseDetail = run
|
|
1136
|
+
const caseDetail = hydrateCaseDetailForRow(run, caseRow);
|
|
1111
1137
|
if (!caseDetail) return {
|
|
1112
1138
|
updated: false,
|
|
1113
1139
|
reason: "Case detail not found"
|
|
@@ -1480,7 +1506,7 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
|
|
|
1480
1506
|
getCaseDetail(runId, caseId) {
|
|
1481
1507
|
const run = runs.get(runId);
|
|
1482
1508
|
if (!run) return void 0;
|
|
1483
|
-
return
|
|
1509
|
+
return hydrateCaseDetailForLookup(run, caseId);
|
|
1484
1510
|
},
|
|
1485
1511
|
subscribe(runId, listener) {
|
|
1486
1512
|
const run = runs.get(runId);
|
|
@@ -2232,8 +2258,8 @@ async function commandApp(args) {
|
|
|
2232
2258
|
const { serve } = await import("@hono/node-server");
|
|
2233
2259
|
const bundledWebDist = resolve(currentDir, "apps/web/dist");
|
|
2234
2260
|
if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
|
|
2235
|
-
const appModule = await import("./app-
|
|
2236
|
-
const runnerModule = await import("./runner-
|
|
2261
|
+
const appModule = await import("./app-l3ynaNsb.mjs");
|
|
2262
|
+
const runnerModule = await import("./runner-C9xNJHt3.mjs");
|
|
2237
2263
|
if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
|
|
2238
2264
|
if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
|
|
2239
2265
|
await runnerModule.initRunner({ loadEnv: args.loadEnv });
|
package/dist/index.d.mts
CHANGED
|
@@ -1942,7 +1942,6 @@ declare const columnFormatSchema: z.ZodEnum<{
|
|
|
1942
1942
|
number: "number";
|
|
1943
1943
|
boolean: "boolean";
|
|
1944
1944
|
file: "file";
|
|
1945
|
-
duration: "duration";
|
|
1946
1945
|
markdown: "markdown";
|
|
1947
1946
|
json: "json";
|
|
1948
1947
|
image: "image";
|
|
@@ -1950,6 +1949,7 @@ declare const columnFormatSchema: z.ZodEnum<{
|
|
|
1950
1949
|
pdf: "pdf";
|
|
1951
1950
|
audio: "audio";
|
|
1952
1951
|
video: "video";
|
|
1952
|
+
duration: "duration";
|
|
1953
1953
|
percent: "percent";
|
|
1954
1954
|
passFail: "passFail";
|
|
1955
1955
|
stars: "stars";
|
|
@@ -1969,7 +1969,6 @@ declare const columnDefSchema: z.ZodObject<{
|
|
|
1969
1969
|
number: "number";
|
|
1970
1970
|
boolean: "boolean";
|
|
1971
1971
|
file: "file";
|
|
1972
|
-
duration: "duration";
|
|
1973
1972
|
markdown: "markdown";
|
|
1974
1973
|
json: "json";
|
|
1975
1974
|
image: "image";
|
|
@@ -1977,6 +1976,7 @@ declare const columnDefSchema: z.ZodObject<{
|
|
|
1977
1976
|
pdf: "pdf";
|
|
1978
1977
|
audio: "audio";
|
|
1979
1978
|
video: "video";
|
|
1979
|
+
duration: "duration";
|
|
1980
1980
|
percent: "percent";
|
|
1981
1981
|
passFail: "passFail";
|
|
1982
1982
|
stars: "stars";
|
|
@@ -2022,8 +2022,8 @@ type CellValue = z.infer<typeof cellValueSchema>; //#endregion
|
|
|
2022
2022
|
declare const traceAttributeDisplayFormatSchema: z.ZodEnum<{
|
|
2023
2023
|
string: "string";
|
|
2024
2024
|
number: "number";
|
|
2025
|
-
duration: "duration";
|
|
2026
2025
|
json: "json";
|
|
2026
|
+
duration: "duration";
|
|
2027
2027
|
}>;
|
|
2028
2028
|
/**
|
|
2029
2029
|
* Formatting hint for trace attribute values rendered by the UI.
|
|
@@ -2047,8 +2047,8 @@ declare const traceAttributeDisplaySchema: z.ZodObject<{
|
|
|
2047
2047
|
format: z.ZodOptional<z.ZodEnum<{
|
|
2048
2048
|
string: "string";
|
|
2049
2049
|
number: "number";
|
|
2050
|
-
duration: "duration";
|
|
2051
2050
|
json: "json";
|
|
2051
|
+
duration: "duration";
|
|
2052
2052
|
}>>;
|
|
2053
2053
|
numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
2054
2054
|
placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
|
|
@@ -2083,8 +2083,8 @@ declare const traceDisplayConfigSchema: z.ZodObject<{
|
|
|
2083
2083
|
format: z.ZodOptional<z.ZodEnum<{
|
|
2084
2084
|
string: "string";
|
|
2085
2085
|
number: "number";
|
|
2086
|
-
duration: "duration";
|
|
2087
2086
|
json: "json";
|
|
2087
|
+
duration: "duration";
|
|
2088
2088
|
}>>;
|
|
2089
2089
|
numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
2090
2090
|
placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
|
|
@@ -2123,8 +2123,8 @@ declare const traceAttributeDisplayInputSchema: z.ZodObject<{
|
|
|
2123
2123
|
format: z.ZodOptional<z.ZodEnum<{
|
|
2124
2124
|
string: "string";
|
|
2125
2125
|
number: "number";
|
|
2126
|
-
duration: "duration";
|
|
2127
2126
|
json: "json";
|
|
2127
|
+
duration: "duration";
|
|
2128
2128
|
}>>;
|
|
2129
2129
|
numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
2130
2130
|
placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
|
|
@@ -2161,8 +2161,8 @@ declare const traceDisplayInputConfigSchema: z.ZodObject<{
|
|
|
2161
2161
|
format: z.ZodOptional<z.ZodEnum<{
|
|
2162
2162
|
string: "string";
|
|
2163
2163
|
number: "number";
|
|
2164
|
-
duration: "duration";
|
|
2165
2164
|
json: "json";
|
|
2165
|
+
duration: "duration";
|
|
2166
2166
|
}>>;
|
|
2167
2167
|
numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
2168
2168
|
placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
|
|
@@ -2327,7 +2327,6 @@ declare const evalStatItemSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
2327
2327
|
number: "number";
|
|
2328
2328
|
boolean: "boolean";
|
|
2329
2329
|
file: "file";
|
|
2330
|
-
duration: "duration";
|
|
2331
2330
|
markdown: "markdown";
|
|
2332
2331
|
json: "json";
|
|
2333
2332
|
image: "image";
|
|
@@ -2335,6 +2334,7 @@ declare const evalStatItemSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
2335
2334
|
pdf: "pdf";
|
|
2336
2335
|
audio: "audio";
|
|
2337
2336
|
video: "video";
|
|
2337
|
+
duration: "duration";
|
|
2338
2338
|
percent: "percent";
|
|
2339
2339
|
passFail: "passFail";
|
|
2340
2340
|
stars: "stars";
|
|
@@ -2391,7 +2391,6 @@ declare const evalStatsConfigSchema: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodOb
|
|
|
2391
2391
|
number: "number";
|
|
2392
2392
|
boolean: "boolean";
|
|
2393
2393
|
file: "file";
|
|
2394
|
-
duration: "duration";
|
|
2395
2394
|
markdown: "markdown";
|
|
2396
2395
|
json: "json";
|
|
2397
2396
|
image: "image";
|
|
@@ -2399,6 +2398,7 @@ declare const evalStatsConfigSchema: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodOb
|
|
|
2399
2398
|
pdf: "pdf";
|
|
2400
2399
|
audio: "audio";
|
|
2401
2400
|
video: "video";
|
|
2401
|
+
duration: "duration";
|
|
2402
2402
|
percent: "percent";
|
|
2403
2403
|
passFail: "passFail";
|
|
2404
2404
|
stars: "stars";
|
|
@@ -2437,7 +2437,6 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2437
2437
|
number: "number";
|
|
2438
2438
|
boolean: "boolean";
|
|
2439
2439
|
file: "file";
|
|
2440
|
-
duration: "duration";
|
|
2441
2440
|
markdown: "markdown";
|
|
2442
2441
|
json: "json";
|
|
2443
2442
|
image: "image";
|
|
@@ -2445,6 +2444,7 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2445
2444
|
pdf: "pdf";
|
|
2446
2445
|
audio: "audio";
|
|
2447
2446
|
video: "video";
|
|
2447
|
+
duration: "duration";
|
|
2448
2448
|
percent: "percent";
|
|
2449
2449
|
passFail: "passFail";
|
|
2450
2450
|
stars: "stars";
|
|
@@ -2518,7 +2518,6 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2518
2518
|
number: "number";
|
|
2519
2519
|
boolean: "boolean";
|
|
2520
2520
|
file: "file";
|
|
2521
|
-
duration: "duration";
|
|
2522
2521
|
markdown: "markdown";
|
|
2523
2522
|
json: "json";
|
|
2524
2523
|
image: "image";
|
|
@@ -2526,6 +2525,7 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2526
2525
|
pdf: "pdf";
|
|
2527
2526
|
audio: "audio";
|
|
2528
2527
|
video: "video";
|
|
2528
|
+
duration: "duration";
|
|
2529
2529
|
percent: "percent";
|
|
2530
2530
|
passFail: "passFail";
|
|
2531
2531
|
stars: "stars";
|
|
@@ -2559,8 +2559,8 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2559
2559
|
label: z.ZodOptional<z.ZodString>;
|
|
2560
2560
|
color: z.ZodOptional<z.ZodEnum<{
|
|
2561
2561
|
success: "success";
|
|
2562
|
-
accent: "accent";
|
|
2563
2562
|
error: "error";
|
|
2563
|
+
accent: "accent";
|
|
2564
2564
|
accentDim: "accentDim";
|
|
2565
2565
|
warning: "warning";
|
|
2566
2566
|
textMuted: "textMuted";
|
|
@@ -2583,8 +2583,8 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2583
2583
|
label: z.ZodOptional<z.ZodString>;
|
|
2584
2584
|
color: z.ZodOptional<z.ZodEnum<{
|
|
2585
2585
|
success: "success";
|
|
2586
|
-
accent: "accent";
|
|
2587
2586
|
error: "error";
|
|
2587
|
+
accent: "accent";
|
|
2588
2588
|
accentDim: "accentDim";
|
|
2589
2589
|
warning: "warning";
|
|
2590
2590
|
textMuted: "textMuted";
|
|
@@ -2749,7 +2749,6 @@ declare const caseRowSchema$1: z.ZodObject<{
|
|
|
2749
2749
|
number: "number";
|
|
2750
2750
|
boolean: "boolean";
|
|
2751
2751
|
file: "file";
|
|
2752
|
-
duration: "duration";
|
|
2753
2752
|
markdown: "markdown";
|
|
2754
2753
|
json: "json";
|
|
2755
2754
|
image: "image";
|
|
@@ -2757,6 +2756,7 @@ declare const caseRowSchema$1: z.ZodObject<{
|
|
|
2757
2756
|
pdf: "pdf";
|
|
2758
2757
|
audio: "audio";
|
|
2759
2758
|
video: "video";
|
|
2759
|
+
duration: "duration";
|
|
2760
2760
|
percent: "percent";
|
|
2761
2761
|
passFail: "passFail";
|
|
2762
2762
|
stars: "stars";
|
|
@@ -2894,8 +2894,8 @@ declare const scoreTraceSchema: z.ZodObject<{
|
|
|
2894
2894
|
format: z.ZodOptional<z.ZodEnum<{
|
|
2895
2895
|
string: "string";
|
|
2896
2896
|
number: "number";
|
|
2897
|
-
duration: "duration";
|
|
2898
2897
|
json: "json";
|
|
2898
|
+
duration: "duration";
|
|
2899
2899
|
}>>;
|
|
2900
2900
|
numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
2901
2901
|
placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
|
|
@@ -2997,8 +2997,8 @@ declare const caseDetailSchema$1: z.ZodObject<{
|
|
|
2997
2997
|
format: z.ZodOptional<z.ZodEnum<{
|
|
2998
2998
|
string: "string";
|
|
2999
2999
|
number: "number";
|
|
3000
|
-
duration: "duration";
|
|
3001
3000
|
json: "json";
|
|
3001
|
+
duration: "duration";
|
|
3002
3002
|
}>>;
|
|
3003
3003
|
numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
3004
3004
|
placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
|
|
@@ -3066,8 +3066,8 @@ declare const caseDetailSchema$1: z.ZodObject<{
|
|
|
3066
3066
|
format: z.ZodOptional<z.ZodEnum<{
|
|
3067
3067
|
string: "string";
|
|
3068
3068
|
number: "number";
|
|
3069
|
-
duration: "duration";
|
|
3070
3069
|
json: "json";
|
|
3070
|
+
duration: "duration";
|
|
3071
3071
|
}>>;
|
|
3072
3072
|
numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
3073
3073
|
placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
|
|
@@ -3127,7 +3127,6 @@ declare const caseDetailSchema$1: z.ZodObject<{
|
|
|
3127
3127
|
number: "number";
|
|
3128
3128
|
boolean: "boolean";
|
|
3129
3129
|
file: "file";
|
|
3130
|
-
duration: "duration";
|
|
3131
3130
|
markdown: "markdown";
|
|
3132
3131
|
json: "json";
|
|
3133
3132
|
image: "image";
|
|
@@ -3135,6 +3134,7 @@ declare const caseDetailSchema$1: z.ZodObject<{
|
|
|
3135
3134
|
pdf: "pdf";
|
|
3136
3135
|
audio: "audio";
|
|
3137
3136
|
video: "video";
|
|
3137
|
+
duration: "duration";
|
|
3138
3138
|
percent: "percent";
|
|
3139
3139
|
passFail: "passFail";
|
|
3140
3140
|
stars: "stars";
|
|
@@ -3284,8 +3284,8 @@ type EvalChartAggregate = z.infer<typeof evalChartAggregateSchema>;
|
|
|
3284
3284
|
*/
|
|
3285
3285
|
declare const evalChartColorSchema: z.ZodEnum<{
|
|
3286
3286
|
success: "success";
|
|
3287
|
-
accent: "accent";
|
|
3288
3287
|
error: "error";
|
|
3288
|
+
accent: "accent";
|
|
3289
3289
|
accentDim: "accentDim";
|
|
3290
3290
|
warning: "warning";
|
|
3291
3291
|
textMuted: "textMuted";
|
|
@@ -3313,8 +3313,8 @@ declare const evalChartMetricSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
3313
3313
|
label: z.ZodOptional<z.ZodString>;
|
|
3314
3314
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3315
3315
|
success: "success";
|
|
3316
|
-
accent: "accent";
|
|
3317
3316
|
error: "error";
|
|
3317
|
+
accent: "accent";
|
|
3318
3318
|
accentDim: "accentDim";
|
|
3319
3319
|
warning: "warning";
|
|
3320
3320
|
textMuted: "textMuted";
|
|
@@ -3337,8 +3337,8 @@ declare const evalChartMetricSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
3337
3337
|
label: z.ZodOptional<z.ZodString>;
|
|
3338
3338
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3339
3339
|
success: "success";
|
|
3340
|
-
accent: "accent";
|
|
3341
3340
|
error: "error";
|
|
3341
|
+
accent: "accent";
|
|
3342
3342
|
accentDim: "accentDim";
|
|
3343
3343
|
warning: "warning";
|
|
3344
3344
|
textMuted: "textMuted";
|
|
@@ -3396,8 +3396,8 @@ declare const evalChartConfigSchema: z.ZodObject<{
|
|
|
3396
3396
|
label: z.ZodOptional<z.ZodString>;
|
|
3397
3397
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3398
3398
|
success: "success";
|
|
3399
|
-
accent: "accent";
|
|
3400
3399
|
error: "error";
|
|
3400
|
+
accent: "accent";
|
|
3401
3401
|
accentDim: "accentDim";
|
|
3402
3402
|
warning: "warning";
|
|
3403
3403
|
textMuted: "textMuted";
|
|
@@ -3420,8 +3420,8 @@ declare const evalChartConfigSchema: z.ZodObject<{
|
|
|
3420
3420
|
label: z.ZodOptional<z.ZodString>;
|
|
3421
3421
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3422
3422
|
success: "success";
|
|
3423
|
-
accent: "accent";
|
|
3424
3423
|
error: "error";
|
|
3424
|
+
accent: "accent";
|
|
3425
3425
|
accentDim: "accentDim";
|
|
3426
3426
|
warning: "warning";
|
|
3427
3427
|
textMuted: "textMuted";
|
|
@@ -3486,8 +3486,8 @@ declare const evalChartsConfigSchema: z.ZodArray<z.ZodObject<{
|
|
|
3486
3486
|
label: z.ZodOptional<z.ZodString>;
|
|
3487
3487
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3488
3488
|
success: "success";
|
|
3489
|
-
accent: "accent";
|
|
3490
3489
|
error: "error";
|
|
3490
|
+
accent: "accent";
|
|
3491
3491
|
accentDim: "accentDim";
|
|
3492
3492
|
warning: "warning";
|
|
3493
3493
|
textMuted: "textMuted";
|
|
@@ -3510,8 +3510,8 @@ declare const evalChartsConfigSchema: z.ZodArray<z.ZodObject<{
|
|
|
3510
3510
|
label: z.ZodOptional<z.ZodString>;
|
|
3511
3511
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3512
3512
|
success: "success";
|
|
3513
|
-
accent: "accent";
|
|
3514
3513
|
error: "error";
|
|
3514
|
+
accent: "accent";
|
|
3515
3515
|
accentDim: "accentDim";
|
|
3516
3516
|
warning: "warning";
|
|
3517
3517
|
textMuted: "textMuted";
|
|
@@ -3808,8 +3808,8 @@ declare const llmCallMetricFormatSchema$1: z.ZodEnum<{
|
|
|
3808
3808
|
string: "string";
|
|
3809
3809
|
number: "number";
|
|
3810
3810
|
boolean: "boolean";
|
|
3811
|
-
duration: "duration";
|
|
3812
3811
|
json: "json";
|
|
3812
|
+
duration: "duration";
|
|
3813
3813
|
}>;
|
|
3814
3814
|
/** Render format applied to an LLM-call metric value. */
|
|
3815
3815
|
type LlmCallMetricFormat = z.infer<typeof llmCallMetricFormatSchema$1>;
|
|
@@ -3818,8 +3818,8 @@ declare const apiCallMetricFormatSchema$1: z.ZodEnum<{
|
|
|
3818
3818
|
string: "string";
|
|
3819
3819
|
number: "number";
|
|
3820
3820
|
boolean: "boolean";
|
|
3821
|
-
duration: "duration";
|
|
3822
3821
|
json: "json";
|
|
3822
|
+
duration: "duration";
|
|
3823
3823
|
}>;
|
|
3824
3824
|
/** Render format applied to an API-call metric value. */
|
|
3825
3825
|
type ApiCallMetricFormat = z.infer<typeof apiCallMetricFormatSchema$1>;
|
|
@@ -3888,8 +3888,8 @@ declare const llmCallMetricSchema: z.ZodObject<{
|
|
|
3888
3888
|
string: "string";
|
|
3889
3889
|
number: "number";
|
|
3890
3890
|
boolean: "boolean";
|
|
3891
|
-
duration: "duration";
|
|
3892
3891
|
json: "json";
|
|
3892
|
+
duration: "duration";
|
|
3893
3893
|
}>>;
|
|
3894
3894
|
numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
3895
3895
|
placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
|
|
@@ -3917,8 +3917,8 @@ declare const apiCallMetricSchema: z.ZodObject<{
|
|
|
3917
3917
|
string: "string";
|
|
3918
3918
|
number: "number";
|
|
3919
3919
|
boolean: "boolean";
|
|
3920
|
-
duration: "duration";
|
|
3921
3920
|
json: "json";
|
|
3921
|
+
duration: "duration";
|
|
3922
3922
|
}>>;
|
|
3923
3923
|
numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
3924
3924
|
placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
|
|
@@ -4031,8 +4031,8 @@ declare const llmCallsConfigSchema: z.ZodObject<{
|
|
|
4031
4031
|
string: "string";
|
|
4032
4032
|
number: "number";
|
|
4033
4033
|
boolean: "boolean";
|
|
4034
|
-
duration: "duration";
|
|
4035
4034
|
json: "json";
|
|
4035
|
+
duration: "duration";
|
|
4036
4036
|
}>>;
|
|
4037
4037
|
numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
4038
4038
|
placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
|
|
@@ -4067,8 +4067,8 @@ declare const apiCallsConfigSchema: z.ZodObject<{
|
|
|
4067
4067
|
string: "string";
|
|
4068
4068
|
number: "number";
|
|
4069
4069
|
boolean: "boolean";
|
|
4070
|
-
duration: "duration";
|
|
4071
4070
|
json: "json";
|
|
4071
|
+
duration: "duration";
|
|
4072
4072
|
}>>;
|
|
4073
4073
|
numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
4074
4074
|
placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
|
|
@@ -4670,7 +4670,6 @@ declare const cacheRecordingOpSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
4670
4670
|
number: "number";
|
|
4671
4671
|
boolean: "boolean";
|
|
4672
4672
|
file: "file";
|
|
4673
|
-
duration: "duration";
|
|
4674
4673
|
markdown: "markdown";
|
|
4675
4674
|
json: "json";
|
|
4676
4675
|
image: "image";
|
|
@@ -4678,6 +4677,7 @@ declare const cacheRecordingOpSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
4678
4677
|
pdf: "pdf";
|
|
4679
4678
|
audio: "audio";
|
|
4680
4679
|
video: "video";
|
|
4680
|
+
duration: "duration";
|
|
4681
4681
|
percent: "percent";
|
|
4682
4682
|
passFail: "passFail";
|
|
4683
4683
|
stars: "stars";
|
|
@@ -4758,7 +4758,6 @@ declare const cacheRecordingSchema: z.ZodObject<{
|
|
|
4758
4758
|
number: "number";
|
|
4759
4759
|
boolean: "boolean";
|
|
4760
4760
|
file: "file";
|
|
4761
|
-
duration: "duration";
|
|
4762
4761
|
markdown: "markdown";
|
|
4763
4762
|
json: "json";
|
|
4764
4763
|
image: "image";
|
|
@@ -4766,6 +4765,7 @@ declare const cacheRecordingSchema: z.ZodObject<{
|
|
|
4766
4765
|
pdf: "pdf";
|
|
4767
4766
|
audio: "audio";
|
|
4768
4767
|
video: "video";
|
|
4768
|
+
duration: "duration";
|
|
4769
4769
|
percent: "percent";
|
|
4770
4770
|
passFail: "passFail";
|
|
4771
4771
|
stars: "stars";
|
|
@@ -4859,7 +4859,6 @@ declare const cacheEntrySchema: z.ZodObject<{
|
|
|
4859
4859
|
number: "number";
|
|
4860
4860
|
boolean: "boolean";
|
|
4861
4861
|
file: "file";
|
|
4862
|
-
duration: "duration";
|
|
4863
4862
|
markdown: "markdown";
|
|
4864
4863
|
json: "json";
|
|
4865
4864
|
image: "image";
|
|
@@ -4867,6 +4866,7 @@ declare const cacheEntrySchema: z.ZodObject<{
|
|
|
4867
4866
|
pdf: "pdf";
|
|
4868
4867
|
audio: "audio";
|
|
4869
4868
|
video: "video";
|
|
4869
|
+
duration: "duration";
|
|
4870
4870
|
percent: "percent";
|
|
4871
4871
|
passFail: "passFail";
|
|
4872
4872
|
stars: "stars";
|
|
@@ -4977,7 +4977,6 @@ declare const cacheDebugKeyEntrySchema: z.ZodObject<{
|
|
|
4977
4977
|
number: "number";
|
|
4978
4978
|
boolean: "boolean";
|
|
4979
4979
|
file: "file";
|
|
4980
|
-
duration: "duration";
|
|
4981
4980
|
markdown: "markdown";
|
|
4982
4981
|
json: "json";
|
|
4983
4982
|
image: "image";
|
|
@@ -4985,6 +4984,7 @@ declare const cacheDebugKeyEntrySchema: z.ZodObject<{
|
|
|
4985
4984
|
pdf: "pdf";
|
|
4986
4985
|
audio: "audio";
|
|
4987
4986
|
video: "video";
|
|
4987
|
+
duration: "duration";
|
|
4988
4988
|
percent: "percent";
|
|
4989
4989
|
passFail: "passFail";
|
|
4990
4990
|
stars: "stars";
|
|
@@ -5084,7 +5084,6 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
|
|
|
5084
5084
|
number: "number";
|
|
5085
5085
|
boolean: "boolean";
|
|
5086
5086
|
file: "file";
|
|
5087
|
-
duration: "duration";
|
|
5088
5087
|
markdown: "markdown";
|
|
5089
5088
|
json: "json";
|
|
5090
5089
|
image: "image";
|
|
@@ -5092,6 +5091,7 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
|
|
|
5092
5091
|
pdf: "pdf";
|
|
5093
5092
|
audio: "audio";
|
|
5094
5093
|
video: "video";
|
|
5094
|
+
duration: "duration";
|
|
5095
5095
|
percent: "percent";
|
|
5096
5096
|
passFail: "passFail";
|
|
5097
5097
|
stars: "stars";
|
|
@@ -5193,7 +5193,6 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
|
|
|
5193
5193
|
number: "number";
|
|
5194
5194
|
boolean: "boolean";
|
|
5195
5195
|
file: "file";
|
|
5196
|
-
duration: "duration";
|
|
5197
5196
|
markdown: "markdown";
|
|
5198
5197
|
json: "json";
|
|
5199
5198
|
image: "image";
|
|
@@ -5201,6 +5200,7 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
|
|
|
5201
5200
|
pdf: "pdf";
|
|
5202
5201
|
audio: "audio";
|
|
5203
5202
|
video: "video";
|
|
5203
|
+
duration: "duration";
|
|
5204
5204
|
percent: "percent";
|
|
5205
5205
|
passFail: "passFail";
|
|
5206
5206
|
stars: "stars";
|
|
@@ -5300,7 +5300,6 @@ declare const cacheFileSchema: z.ZodObject<{
|
|
|
5300
5300
|
number: "number";
|
|
5301
5301
|
boolean: "boolean";
|
|
5302
5302
|
file: "file";
|
|
5303
|
-
duration: "duration";
|
|
5304
5303
|
markdown: "markdown";
|
|
5305
5304
|
json: "json";
|
|
5306
5305
|
image: "image";
|
|
@@ -5308,6 +5307,7 @@ declare const cacheFileSchema: z.ZodObject<{
|
|
|
5308
5307
|
pdf: "pdf";
|
|
5309
5308
|
audio: "audio";
|
|
5310
5309
|
video: "video";
|
|
5310
|
+
duration: "duration";
|
|
5311
5311
|
percent: "percent";
|
|
5312
5312
|
passFail: "passFail";
|
|
5313
5313
|
stars: "stars";
|
|
@@ -5417,7 +5417,6 @@ declare const cacheDebugKeyFileSchema: z.ZodObject<{
|
|
|
5417
5417
|
number: "number";
|
|
5418
5418
|
boolean: "boolean";
|
|
5419
5419
|
file: "file";
|
|
5420
|
-
duration: "duration";
|
|
5421
5420
|
markdown: "markdown";
|
|
5422
5421
|
json: "json";
|
|
5423
5422
|
image: "image";
|
|
@@ -5425,6 +5424,7 @@ declare const cacheDebugKeyFileSchema: z.ZodObject<{
|
|
|
5425
5424
|
pdf: "pdf";
|
|
5426
5425
|
audio: "audio";
|
|
5427
5426
|
video: "video";
|
|
5427
|
+
duration: "duration";
|
|
5428
5428
|
percent: "percent";
|
|
5429
5429
|
passFail: "passFail";
|
|
5430
5430
|
stars: "stars";
|
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
1
|
import { $ as setScopeCacheContext, A as repoFile, B as evalTime, C as evalTracer, D as deserializeCacheValue, E as deserializeCacheRecording, F as EvalRuntimeUsageError, Ft as getEvalRegistry, H as getEvalCaseInput, I as appendToEvalOutput, J as runInEvalRuntimeScope, K as mergeEvalOutput, M as readManualInputFile, N as evalExpect, O as serializeCacheRecording, P as EvalAssertionError, Q as setEvalOutput, R as evalAssert, S as evalSpan, T as hashCacheKeySync, U as incrementEvalOutput, V as getCurrentScope, W as isInEvalScope, X as runInExistingEvalScope, Y as runInEvalScope, at as extractApiCalls, b as buildTraceTree, ct as simulateTokenAllocation, et as startEvalBackgroundJob, it as extractCacheHits, j as manualInputFileValueSchema, k as serializeCacheValue, ot as extractLlmCalls, q as nextEvalId, rt as extractCacheEntries, st as simulateLlmCallCost, ut as getNestedAttribute, w as hashCacheKey, x as captureEvalSpanError, z as evalLog } from "./runExecution-Bq0Y3y_1.mjs";
|
|
2
|
-
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-
|
|
3
|
-
import { n as matchesEvalTags, t as defineEval } from "./src-
|
|
2
|
+
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-BSVUCUxr.mjs";
|
|
3
|
+
import { n as matchesEvalTags, t as defineEval } from "./src-D5vGo2iv.mjs";
|
|
4
4
|
export { EvalAssertionError, EvalRuntimeUsageError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob };
|
package/dist/runChild.mjs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { At as evalStatsConfigSchema, L as configureEvalRunLogs, Mt as evalChartsConfigSchema, Nt as columnDefSchema, Tt as buildEvalKey, bt as runManifestSchema, jt as manualInputDescriptorSchema, kt as evalStatAggregateSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, tt as createRunRequestSchema, v as createFsCacheStore, xt as runSummarySchema, y as getCacheRetentionOptions } from "./runExecution-Bq0Y3y_1.mjs";
|
|
2
|
-
import {
|
|
2
|
+
import { C as parseEvalDiscovery, h as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-C7qQISz2.mjs";
|
|
3
3
|
import { z } from "zod";
|
|
4
4
|
import { readFile } from "node:fs/promises";
|
|
5
5
|
import { relative } from "node:path";
|
|
@@ -2,7 +2,7 @@ import { Dt as caseDetailSchema, Et as getCaseRowCaseKey, It as runWithEvalRegis
|
|
|
2
2
|
import { Result, resultify } from "t-result";
|
|
3
3
|
import { readFile, readdir, rm, writeFile } from "node:fs/promises";
|
|
4
4
|
import { dirname, join } from "node:path";
|
|
5
|
-
import { existsSync } from "node:fs";
|
|
5
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
6
6
|
import { fileURLToPath } from "node:url";
|
|
7
7
|
import { spawn } from "node:child_process";
|
|
8
8
|
//#region ../runner/src/chartValidation.ts
|
|
@@ -670,7 +670,7 @@ async function recomputeEvalStatusesInRuns(params) {
|
|
|
670
670
|
let changed = false;
|
|
671
671
|
for (const caseRow of run.cases) {
|
|
672
672
|
if (caseRow.evalKey !== params.evalKey) continue;
|
|
673
|
-
const caseDetail = run.caseDetails.get(getCaseRowCaseKey(caseRow));
|
|
673
|
+
const caseDetail = params.getCaseDetail?.(run, caseRow) ?? run.caseDetails.get(getCaseRowCaseKey(caseRow));
|
|
674
674
|
const nextStatus = recomputePersistedCaseStatus(caseRow, caseDetail, params.scoreThresholds);
|
|
675
675
|
if (caseRow.status === nextStatus) continue;
|
|
676
676
|
caseRow.status = nextStatus;
|
|
@@ -725,14 +725,22 @@ function nextShortIdFromSnapshots(snapshots) {
|
|
|
725
725
|
}
|
|
726
726
|
return maxNum + 1;
|
|
727
727
|
}
|
|
728
|
-
|
|
728
|
+
/**
|
|
729
|
+
* Load persisted run metadata from the local state directory.
|
|
730
|
+
*
|
|
731
|
+
* Case details are skipped by default so long-running app processes can keep
|
|
732
|
+
* run history in memory without retaining every trace payload. Pass
|
|
733
|
+
* `includeCaseDetails` only for narrow maintenance flows that need full
|
|
734
|
+
* details for every case.
|
|
735
|
+
*/
|
|
736
|
+
async function loadPersistedRunSnapshots(localStateDir, options = {}) {
|
|
729
737
|
const runsDir = join(localStateDir, "runs");
|
|
730
738
|
const entriesResult = await resultify(() => readdir(runsDir, { withFileTypes: true }));
|
|
731
739
|
if (entriesResult.error) return [];
|
|
732
740
|
const snapshots = [];
|
|
733
741
|
const runDirs = entriesResult.value.filter((entry) => entry.isDirectory()).map((entry) => join(runsDir, entry.name)).toSorted();
|
|
734
742
|
for (const runDir of runDirs) {
|
|
735
|
-
const snapshot = await loadPersistedRunSnapshot(runDir);
|
|
743
|
+
const snapshot = await loadPersistedRunSnapshot(runDir, options);
|
|
736
744
|
if (!snapshot) continue;
|
|
737
745
|
snapshots.push(snapshot);
|
|
738
746
|
}
|
|
@@ -766,7 +774,14 @@ function getLatestRunInfos(params) {
|
|
|
766
774
|
function toLastRunStatus$1(status) {
|
|
767
775
|
return status === "pending" ? null : status;
|
|
768
776
|
}
|
|
769
|
-
|
|
777
|
+
/**
|
|
778
|
+
* Load one persisted run snapshot from disk.
|
|
779
|
+
*
|
|
780
|
+
* The returned snapshot includes manifest, summary, and case rows. Case
|
|
781
|
+
* details are loaded only when `includeCaseDetails` is true; otherwise callers
|
|
782
|
+
* should use `loadPersistedCaseDetail` for the specific case being inspected.
|
|
783
|
+
*/
|
|
784
|
+
async function loadPersistedRunSnapshot(runDir, options = {}) {
|
|
770
785
|
const manifest = await readParsedJsonFile(join(runDir, "run.json"), { safeParse: runManifestSchema.safeParse.bind(runManifestSchema) });
|
|
771
786
|
if (!manifest) return null;
|
|
772
787
|
const summary = await readParsedJsonFile(join(runDir, "summary.json"), { safeParse: runSummarySchema.safeParse.bind(runSummarySchema) });
|
|
@@ -776,9 +791,18 @@ async function loadPersistedRunSnapshot(runDir) {
|
|
|
776
791
|
manifest,
|
|
777
792
|
summary,
|
|
778
793
|
cases: await readCaseRows(runDir),
|
|
779
|
-
caseDetails: await readCaseDetails(runDir)
|
|
794
|
+
caseDetails: options.includeCaseDetails === true ? await readCaseDetails(runDir) : /* @__PURE__ */ new Map()
|
|
780
795
|
};
|
|
781
796
|
}
|
|
797
|
+
/**
|
|
798
|
+
* Load one persisted case detail by its artifact file id.
|
|
799
|
+
*
|
|
800
|
+
* Returns `null` when the file is missing, invalid JSON, or no longer matches
|
|
801
|
+
* the current case-detail schema.
|
|
802
|
+
*/
|
|
803
|
+
function loadPersistedCaseDetail(runDir, fileId) {
|
|
804
|
+
return readParsedJsonFileSync(join(runDir, "case-details", `${encodeCaseDetailFileName(fileId)}.json`), { safeParse: caseDetailSchema.safeParse.bind(caseDetailSchema) });
|
|
805
|
+
}
|
|
782
806
|
async function readParsedJsonFile(filePath, schema) {
|
|
783
807
|
const fileResult = await resultify(() => readFile(filePath, "utf-8"));
|
|
784
808
|
if (fileResult.error) return null;
|
|
@@ -788,6 +812,15 @@ async function readParsedJsonFile(filePath, schema) {
|
|
|
788
812
|
if (!parsed.success) return null;
|
|
789
813
|
return parsed.data;
|
|
790
814
|
}
|
|
815
|
+
function readParsedJsonFileSync(filePath, schema) {
|
|
816
|
+
const fileResult = resultify(() => readFileSync(filePath, "utf-8"));
|
|
817
|
+
if (fileResult.error) return null;
|
|
818
|
+
const jsonResult = resultify(() => JSON.parse(fileResult.value));
|
|
819
|
+
if (jsonResult.error) return null;
|
|
820
|
+
const parsed = schema.safeParse(jsonResult.value);
|
|
821
|
+
if (!parsed.success) return null;
|
|
822
|
+
return parsed.data;
|
|
823
|
+
}
|
|
791
824
|
async function readCaseRows(runDir) {
|
|
792
825
|
const fileResult = await resultify(() => readFile(join(runDir, "cases.jsonl"), "utf-8"));
|
|
793
826
|
if (fileResult.error) return [];
|
|
@@ -1660,4 +1693,4 @@ function toLastRunStatus(status) {
|
|
|
1660
1693
|
return status === "pending" ? null : status;
|
|
1661
1694
|
}
|
|
1662
1695
|
//#endregion
|
|
1663
|
-
export {
|
|
1696
|
+
export { parseEvalDiscovery as C, loadIsolatedEvalRegistry as S, recomputePersistedCaseStatus as _, validateTagsFilters as a, parseManualInputValues as b, getLatestRunInfos as c, loadPersistedRunSnapshots as d, nextShortIdFromSnapshots as f, recomputeEvalStatusesInRuns as g, persistRunState as h, resolveEvalTags as i, loadPersistedCaseDetail as l, deleteTemporaryRuns as m, getTargetEvalKeys as n, generateRunId as o, persistCaseDetail as p, getTargetEvals as r, getLastRunStatuses as s, executeRun as t, loadPersistedRunSnapshot as u, runTouchesEval as v, validateCharts as w, deriveEvalFreshness as x, buildManualInputDescriptor as y };
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { n as initRunner, t as getRunnerInstance } from "./runner-
|
|
1
|
+
import { n as initRunner, t as getRunnerInstance } from "./runner-DmkSq-QG.mjs";
|
|
2
2
|
export { getRunnerInstance, initRunner };
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { n as createRunner } from "./cli-
|
|
2
|
-
import "./src-
|
|
1
|
+
import { n as createRunner } from "./cli-BSVUCUxr.mjs";
|
|
2
|
+
import "./src-D5vGo2iv.mjs";
|
|
3
3
|
//#region ../../apps/server/src/runner.ts
|
|
4
4
|
let runnerInstance = null;
|
|
5
5
|
function getRunnerInstance({ loadEnv = true } = {}) {
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { G as matchesEvalTags$1, Pt as defineEval$1 } from "./runExecution-Bq0Y3y_1.mjs";
|
|
2
|
-
import "./cli-
|
|
2
|
+
import "./cli-BSVUCUxr.mjs";
|
|
3
3
|
//#region src/index.ts
|
|
4
4
|
/** Register an eval definition with typed tag support. */
|
|
5
5
|
function defineEval(definition) {
|