@ls-stack/agent-eval 0.42.1 → 0.42.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-mOYjX9zq.mjs → app-DPamBr5R.mjs} +4 -4
- package/dist/apps/web/dist/assets/{index-eFM9VIsz.css → index-S3J5Nm0o.css} +1 -1
- package/dist/apps/web/dist/assets/{index-CANDLTsq.js → index-XLJByNnS.js} +2 -2
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/{cli-DbVfkr9T.mjs → cli-BeJCJMQo.mjs} +49 -23
- package/dist/index.d.mts +42 -43
- package/dist/index.mjs +3 -3
- package/dist/runChild.mjs +1 -1
- package/dist/{runOrchestration-SPaHx-SC.mjs → runOrchestration-OVUFw1fL.mjs} +12 -23
- package/dist/{runner-DiCQ57JQ.mjs → runner-B1KygirW.mjs} +2 -2
- package/dist/{runner-BYOdLBle.mjs → runner-BJQq7cpd.mjs} +1 -1
- package/dist/{src-CANi3gpd.mjs → src-D7_xKo7h.mjs} +2 -2
- package/package.json +3 -3
- package/skills/agent-eval/SKILL.md +15 -8
|
@@ -25,8 +25,8 @@
|
|
|
25
25
|
href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
|
|
26
26
|
rel="stylesheet"
|
|
27
27
|
/>
|
|
28
|
-
<script type="module" crossorigin src="/assets/index-
|
|
29
|
-
<link rel="stylesheet" crossorigin href="/assets/index-
|
|
28
|
+
<script type="module" crossorigin src="/assets/index-XLJByNnS.js"></script>
|
|
29
|
+
<link rel="stylesheet" crossorigin href="/assets/index-S3J5Nm0o.css">
|
|
30
30
|
</head>
|
|
31
31
|
<body>
|
|
32
32
|
<div id="root"></div>
|
package/dist/bin.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { A as validateCharts, At as buildEvalKey, C as deriveEvalFreshness, Ct as getEvalDisplayStatus, D as loadConfig, Dt as runSummarySchema, E as resolveEvalDefaultConfig,
|
|
1
|
+
import { A as validateCharts, At as buildEvalKey, C as deriveEvalFreshness, Ct as getEvalDisplayStatus, D as loadConfig, Dt as runSummarySchema, E as resolveEvalDefaultConfig, Lt as getEvalRegistry, O as buildDeclaredColumnDefs, Ot as resolveApiCallsConfig, S as parseManualInputValues, St as getEvalTitle, T as parseEvalDiscovery, Tt as matchesTagsFilter, _ as recomputePersistedCaseStatus, a as validateTagsFilters, b as resolveArtifactPath, bt as applyDerivedCallAttributes, c as getLastRunStatuses, d as loadPersistedRunSnapshots, f as nextShortIdFromSnapshots, g as recomputeEvalStatusesInRuns, h as persistRunState, i as resolveEvalTags, j as createFsCacheStore, jt as getCaseRowCaseKey, k as normalizeScoreDef, kt as resolveLlmCallsConfig, l as getLatestRunInfos, m as deleteTemporaryRuns, n as getTargetEvalKeys, o as stripTerminalControlCodes, p as persistCaseDetail, s as generateRunId, u as loadPersistedRunSnapshot, v as runTouchesEval, w as loadEvalModule, wt as deriveScopedSummaryFromCases, x as buildManualInputDescriptor, y as resolveTracePresentation } from "./runOrchestration-OVUFw1fL.mjs";
|
|
2
2
|
import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
|
|
3
3
|
import { basename, dirname, extname, isAbsolute, join, relative, resolve, sep } from "node:path";
|
|
4
4
|
import { createHash, randomUUID } from "node:crypto";
|
|
@@ -476,6 +476,11 @@ function validateManualInputsForRequest(params) {
|
|
|
476
476
|
};
|
|
477
477
|
}
|
|
478
478
|
//#endregion
|
|
479
|
+
//#region ../runner/src/objectUtils.ts
|
|
480
|
+
function isRecord(value) {
|
|
481
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
482
|
+
}
|
|
483
|
+
//#endregion
|
|
479
484
|
//#region ../runner/src/recalculateDerivedAttributes.ts
|
|
480
485
|
function getCaseArtifactFileIdForExistingRun(runState, caseRow) {
|
|
481
486
|
const caseKey = getCaseRowCaseKey(caseRow);
|
|
@@ -505,7 +510,7 @@ async function recalculateDerivedAttributesForCase(params) {
|
|
|
505
510
|
});
|
|
506
511
|
let nextTrace = spansWithDerivedAttributes;
|
|
507
512
|
let nextTraceDisplay = caseDetail.traceDisplay;
|
|
508
|
-
const evalMeta = params.evals.get(
|
|
513
|
+
const evalMeta = caseRow.evalKey === void 0 ? void 0 : params.evals.get(caseRow.evalKey);
|
|
509
514
|
const entry = evalMeta === void 0 ? void 0 : getEvalRegistry().get(evalMeta.id);
|
|
510
515
|
if (entry !== void 0) entry.use((evalDef) => {
|
|
511
516
|
const resolved = resolveTracePresentation(spansWithDerivedAttributes, params.traceDisplayConfig, evalDef.traceDisplay);
|
|
@@ -787,6 +792,36 @@ async function markRunTerminalFromChild(runState, event, managerContext) {
|
|
|
787
792
|
managerContext.emitDiscoveryEvent();
|
|
788
793
|
}
|
|
789
794
|
//#endregion
|
|
795
|
+
//#region ../runner/src/runnerStateHydration.ts
|
|
796
|
+
/** Rehydrate a persisted run while preserving live listeners/process handles. */
|
|
797
|
+
function toRunnerRunState(snapshot, existing) {
|
|
798
|
+
return {
|
|
799
|
+
...snapshot,
|
|
800
|
+
listeners: existing?.listeners ?? /* @__PURE__ */ new Set(),
|
|
801
|
+
childProcess: existing?.childProcess,
|
|
802
|
+
childTerminalReceived: existing?.childTerminalReceived ?? false
|
|
803
|
+
};
|
|
804
|
+
}
|
|
805
|
+
//#endregion
|
|
806
|
+
//#region ../runner/src/runTargetPersistence.ts
|
|
807
|
+
/** Build the exact-key run target persisted in run history. */
|
|
808
|
+
function buildPersistedRunTarget(params) {
|
|
809
|
+
const { target, evalKeys } = params;
|
|
810
|
+
if (target.mode === "all") return { mode: "all" };
|
|
811
|
+
const persistEvalKeys = (target.evalKeys?.length ?? 0) > 0 || (target.evalIds?.length ?? 0) > 0 || (target.files?.length ?? 0) > 0;
|
|
812
|
+
const keyedTarget = {
|
|
813
|
+
mode: target.mode,
|
|
814
|
+
evalKeys: persistEvalKeys && evalKeys.length > 0 ? evalKeys : void 0,
|
|
815
|
+
files: target.files,
|
|
816
|
+
tagsFilter: target.tagsFilter
|
|
817
|
+
};
|
|
818
|
+
if (target.mode === "caseIds") return {
|
|
819
|
+
...keyedTarget,
|
|
820
|
+
caseIds: target.caseIds
|
|
821
|
+
};
|
|
822
|
+
return keyedTarget;
|
|
823
|
+
}
|
|
824
|
+
//#endregion
|
|
790
825
|
//#region ../runner/src/watchRoots.ts
|
|
791
826
|
const globMagicCharacters = new Set([
|
|
792
827
|
"*",
|
|
@@ -823,9 +858,6 @@ function getWatchRootsForIncludePatterns(params) {
|
|
|
823
858
|
}
|
|
824
859
|
//#endregion
|
|
825
860
|
//#region ../runner/src/runner.ts
|
|
826
|
-
function isRecord(value) {
|
|
827
|
-
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
828
|
-
}
|
|
829
861
|
/** Create an in-memory eval runner bound to the current workspace config. */
|
|
830
862
|
function createRunner({ watchForChanges = true } = {}) {
|
|
831
863
|
let config;
|
|
@@ -898,7 +930,6 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
898
930
|
const updatedRuns = await recomputeEvalStatusesInRuns({
|
|
899
931
|
runs: runs.values(),
|
|
900
932
|
evalKey: evalMeta.key,
|
|
901
|
-
evalId: evalMeta.id,
|
|
902
933
|
evalExists: evals.has(evalMeta.key),
|
|
903
934
|
scoreThresholds,
|
|
904
935
|
persistCaseDetail
|
|
@@ -930,7 +961,6 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
930
961
|
target: run.manifest.target,
|
|
931
962
|
caseRows: run.cases,
|
|
932
963
|
evalKey: evalMeta?.key ?? evalKey,
|
|
933
|
-
evalId: evalMeta?.id,
|
|
934
964
|
evalExists: evalMeta !== void 0
|
|
935
965
|
})) continue;
|
|
936
966
|
if (run.manifest.status === "running") continue;
|
|
@@ -959,7 +989,7 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
959
989
|
updated: false,
|
|
960
990
|
reason: "Case not found"
|
|
961
991
|
};
|
|
962
|
-
const evalMeta = evals.get(
|
|
992
|
+
const evalMeta = caseRow.evalKey === void 0 ? void 0 : evals.get(caseRow.evalKey);
|
|
963
993
|
if (!evalMeta) return {
|
|
964
994
|
updated: false,
|
|
965
995
|
reason: "Eval not found"
|
|
@@ -1170,6 +1200,10 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
1170
1200
|
const cacheMode = request.cache?.mode ?? "use";
|
|
1171
1201
|
const runDir = join(localStateDir, "runs", runId);
|
|
1172
1202
|
const gitState = readGitWorktreeState(workspaceRoot);
|
|
1203
|
+
const targetEvalKeys = getTargetEvalKeys({
|
|
1204
|
+
request,
|
|
1205
|
+
sortedEvals: getSortedEvalMetas()
|
|
1206
|
+
});
|
|
1173
1207
|
const manifest = {
|
|
1174
1208
|
id: runId,
|
|
1175
1209
|
shortId,
|
|
@@ -1179,7 +1213,10 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
1179
1213
|
endedAt: null,
|
|
1180
1214
|
commitSha: gitState.commitSha,
|
|
1181
1215
|
evalSourceFingerprints: {},
|
|
1182
|
-
target:
|
|
1216
|
+
target: buildPersistedRunTarget({
|
|
1217
|
+
target: request.target,
|
|
1218
|
+
evalKeys: targetEvalKeys
|
|
1219
|
+
}),
|
|
1183
1220
|
trials: request.trials,
|
|
1184
1221
|
trialSelection: config.trialSelection ?? "lowestScore",
|
|
1185
1222
|
cacheMode
|
|
@@ -1224,10 +1261,7 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
1224
1261
|
runs.set(runId, runState);
|
|
1225
1262
|
setLatestRunInfoMap({
|
|
1226
1263
|
latestRunInfoMap,
|
|
1227
|
-
evalIds:
|
|
1228
|
-
request: materializedRequest,
|
|
1229
|
-
sortedEvals: getSortedEvalMetas()
|
|
1230
|
-
}),
|
|
1264
|
+
evalIds: targetEvalKeys,
|
|
1231
1265
|
info: {
|
|
1232
1266
|
status: "running",
|
|
1233
1267
|
startedAt: now,
|
|
@@ -1478,14 +1512,6 @@ function createRunner({ watchForChanges = true } = {}) {
|
|
|
1478
1512
|
nextShortIdNum = Math.max(nextShortIdNum, nextShortIdFromSnapshots(persistedRuns));
|
|
1479
1513
|
if (changed) emitDiscoveryEvent();
|
|
1480
1514
|
}
|
|
1481
|
-
function toRunnerRunState(snapshot, existing) {
|
|
1482
|
-
return {
|
|
1483
|
-
...snapshot,
|
|
1484
|
-
listeners: existing?.listeners ?? /* @__PURE__ */ new Set(),
|
|
1485
|
-
childProcess: existing?.childProcess,
|
|
1486
|
-
childTerminalReceived: existing?.childTerminalReceived ?? false
|
|
1487
|
-
};
|
|
1488
|
-
}
|
|
1489
1515
|
return runner;
|
|
1490
1516
|
}
|
|
1491
1517
|
//#endregion
|
|
@@ -2046,8 +2072,8 @@ async function commandApp(args) {
|
|
|
2046
2072
|
const { serve } = await import("@hono/node-server");
|
|
2047
2073
|
const bundledWebDist = resolve(currentDir, "apps/web/dist");
|
|
2048
2074
|
if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
|
|
2049
|
-
const appModule = await import("./app-
|
|
2050
|
-
const runnerModule = await import("./runner-
|
|
2075
|
+
const appModule = await import("./app-DPamBr5R.mjs");
|
|
2076
|
+
const runnerModule = await import("./runner-BJQq7cpd.mjs");
|
|
2051
2077
|
if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
|
|
2052
2078
|
if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
|
|
2053
2079
|
await runnerModule.initRunner();
|
package/dist/index.d.mts
CHANGED
|
@@ -1717,13 +1717,13 @@ type ColumnKind = z$1.infer<typeof columnKindSchema>;
|
|
|
1717
1717
|
declare const columnFormatSchema: z$1.ZodEnum<{
|
|
1718
1718
|
number: "number";
|
|
1719
1719
|
boolean: "boolean";
|
|
1720
|
-
duration: "duration";
|
|
1721
|
-
json: "json";
|
|
1722
1720
|
file: "file";
|
|
1723
1721
|
markdown: "markdown";
|
|
1722
|
+
json: "json";
|
|
1724
1723
|
image: "image";
|
|
1725
1724
|
audio: "audio";
|
|
1726
1725
|
video: "video";
|
|
1726
|
+
duration: "duration";
|
|
1727
1727
|
percent: "percent";
|
|
1728
1728
|
passFail: "passFail";
|
|
1729
1729
|
stars: "stars";
|
|
@@ -1742,13 +1742,13 @@ declare const columnDefSchema: z$1.ZodObject<{
|
|
|
1742
1742
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
1743
1743
|
number: "number";
|
|
1744
1744
|
boolean: "boolean";
|
|
1745
|
-
duration: "duration";
|
|
1746
|
-
json: "json";
|
|
1747
1745
|
file: "file";
|
|
1748
1746
|
markdown: "markdown";
|
|
1747
|
+
json: "json";
|
|
1749
1748
|
image: "image";
|
|
1750
1749
|
audio: "audio";
|
|
1751
1750
|
video: "video";
|
|
1751
|
+
duration: "duration";
|
|
1752
1752
|
percent: "percent";
|
|
1753
1753
|
passFail: "passFail";
|
|
1754
1754
|
stars: "stars";
|
|
@@ -1762,8 +1762,8 @@ declare const columnDefSchema: z$1.ZodObject<{
|
|
|
1762
1762
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
1763
1763
|
align: z$1.ZodOptional<z$1.ZodEnum<{
|
|
1764
1764
|
left: "left";
|
|
1765
|
-
right: "right";
|
|
1766
1765
|
center: "center";
|
|
1766
|
+
right: "right";
|
|
1767
1767
|
}>>;
|
|
1768
1768
|
}, z$1.core.$strip>;
|
|
1769
1769
|
/** Column definition exposed to the UI for eval and case tables. */
|
|
@@ -1792,8 +1792,8 @@ type CellValue = z$1.infer<typeof cellValueSchema>; //#endregion
|
|
|
1792
1792
|
declare const traceAttributeDisplayFormatSchema: z$1.ZodEnum<{
|
|
1793
1793
|
string: "string";
|
|
1794
1794
|
number: "number";
|
|
1795
|
-
duration: "duration";
|
|
1796
1795
|
json: "json";
|
|
1796
|
+
duration: "duration";
|
|
1797
1797
|
}>;
|
|
1798
1798
|
/**
|
|
1799
1799
|
* Formatting hint for trace attribute values rendered by the UI.
|
|
@@ -1817,8 +1817,8 @@ declare const traceAttributeDisplaySchema: z$1.ZodObject<{
|
|
|
1817
1817
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
1818
1818
|
string: "string";
|
|
1819
1819
|
number: "number";
|
|
1820
|
-
duration: "duration";
|
|
1821
1820
|
json: "json";
|
|
1821
|
+
duration: "duration";
|
|
1822
1822
|
}>>;
|
|
1823
1823
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
1824
1824
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -1853,8 +1853,8 @@ declare const traceDisplayConfigSchema: z$1.ZodObject<{
|
|
|
1853
1853
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
1854
1854
|
string: "string";
|
|
1855
1855
|
number: "number";
|
|
1856
|
-
duration: "duration";
|
|
1857
1856
|
json: "json";
|
|
1857
|
+
duration: "duration";
|
|
1858
1858
|
}>>;
|
|
1859
1859
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
1860
1860
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -1893,8 +1893,8 @@ declare const traceAttributeDisplayInputSchema: z$1.ZodObject<{
|
|
|
1893
1893
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
1894
1894
|
string: "string";
|
|
1895
1895
|
number: "number";
|
|
1896
|
-
duration: "duration";
|
|
1897
1896
|
json: "json";
|
|
1897
|
+
duration: "duration";
|
|
1898
1898
|
}>>;
|
|
1899
1899
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
1900
1900
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -1931,8 +1931,8 @@ declare const traceDisplayInputConfigSchema: z$1.ZodObject<{
|
|
|
1931
1931
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
1932
1932
|
string: "string";
|
|
1933
1933
|
number: "number";
|
|
1934
|
-
duration: "duration";
|
|
1935
1934
|
json: "json";
|
|
1935
|
+
duration: "duration";
|
|
1936
1936
|
}>>;
|
|
1937
1937
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
1938
1938
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -2063,13 +2063,13 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
2063
2063
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2064
2064
|
number: "number";
|
|
2065
2065
|
boolean: "boolean";
|
|
2066
|
-
duration: "duration";
|
|
2067
|
-
json: "json";
|
|
2068
2066
|
file: "file";
|
|
2069
2067
|
markdown: "markdown";
|
|
2068
|
+
json: "json";
|
|
2070
2069
|
image: "image";
|
|
2071
2070
|
audio: "audio";
|
|
2072
2071
|
video: "video";
|
|
2072
|
+
duration: "duration";
|
|
2073
2073
|
percent: "percent";
|
|
2074
2074
|
passFail: "passFail";
|
|
2075
2075
|
stars: "stars";
|
|
@@ -2105,13 +2105,13 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
|
|
|
2105
2105
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2106
2106
|
number: "number";
|
|
2107
2107
|
boolean: "boolean";
|
|
2108
|
-
duration: "duration";
|
|
2109
|
-
json: "json";
|
|
2110
2108
|
file: "file";
|
|
2111
2109
|
markdown: "markdown";
|
|
2110
|
+
json: "json";
|
|
2112
2111
|
image: "image";
|
|
2113
2112
|
audio: "audio";
|
|
2114
2113
|
video: "video";
|
|
2114
|
+
duration: "duration";
|
|
2115
2115
|
percent: "percent";
|
|
2116
2116
|
passFail: "passFail";
|
|
2117
2117
|
stars: "stars";
|
|
@@ -2149,13 +2149,13 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2149
2149
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2150
2150
|
number: "number";
|
|
2151
2151
|
boolean: "boolean";
|
|
2152
|
-
duration: "duration";
|
|
2153
|
-
json: "json";
|
|
2154
2152
|
file: "file";
|
|
2155
2153
|
markdown: "markdown";
|
|
2154
|
+
json: "json";
|
|
2156
2155
|
image: "image";
|
|
2157
2156
|
audio: "audio";
|
|
2158
2157
|
video: "video";
|
|
2158
|
+
duration: "duration";
|
|
2159
2159
|
percent: "percent";
|
|
2160
2160
|
passFail: "passFail";
|
|
2161
2161
|
stars: "stars";
|
|
@@ -2169,8 +2169,8 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2169
2169
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2170
2170
|
align: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2171
2171
|
left: "left";
|
|
2172
|
-
right: "right";
|
|
2173
2172
|
center: "center";
|
|
2173
|
+
right: "right";
|
|
2174
2174
|
}>>;
|
|
2175
2175
|
}, z$1.core.$strip>>;
|
|
2176
2176
|
caseCount: z$1.ZodNullable<z$1.ZodNumber>;
|
|
@@ -2208,13 +2208,13 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2208
2208
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2209
2209
|
number: "number";
|
|
2210
2210
|
boolean: "boolean";
|
|
2211
|
-
duration: "duration";
|
|
2212
|
-
json: "json";
|
|
2213
2211
|
file: "file";
|
|
2214
2212
|
markdown: "markdown";
|
|
2213
|
+
json: "json";
|
|
2215
2214
|
image: "image";
|
|
2216
2215
|
audio: "audio";
|
|
2217
2216
|
video: "video";
|
|
2217
|
+
duration: "duration";
|
|
2218
2218
|
percent: "percent";
|
|
2219
2219
|
passFail: "passFail";
|
|
2220
2220
|
stars: "stars";
|
|
@@ -2239,8 +2239,8 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2239
2239
|
}>;
|
|
2240
2240
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2241
2241
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2242
|
-
error: "error";
|
|
2243
2242
|
success: "success";
|
|
2243
|
+
error: "error";
|
|
2244
2244
|
warning: "warning";
|
|
2245
2245
|
accent: "accent";
|
|
2246
2246
|
accentDim: "accentDim";
|
|
@@ -2263,8 +2263,8 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2263
2263
|
}>;
|
|
2264
2264
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2265
2265
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2266
|
-
error: "error";
|
|
2267
2266
|
success: "success";
|
|
2267
|
+
error: "error";
|
|
2268
2268
|
warning: "warning";
|
|
2269
2269
|
accent: "accent";
|
|
2270
2270
|
accentDim: "accentDim";
|
|
@@ -2529,8 +2529,8 @@ declare const scoreTraceSchema: z$1.ZodObject<{
|
|
|
2529
2529
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2530
2530
|
string: "string";
|
|
2531
2531
|
number: "number";
|
|
2532
|
-
duration: "duration";
|
|
2533
2532
|
json: "json";
|
|
2533
|
+
duration: "duration";
|
|
2534
2534
|
}>>;
|
|
2535
2535
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
2536
2536
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -2616,8 +2616,8 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
2616
2616
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2617
2617
|
string: "string";
|
|
2618
2618
|
number: "number";
|
|
2619
|
-
duration: "duration";
|
|
2620
2619
|
json: "json";
|
|
2620
|
+
duration: "duration";
|
|
2621
2621
|
}>>;
|
|
2622
2622
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
2623
2623
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -2685,8 +2685,8 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
2685
2685
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2686
2686
|
string: "string";
|
|
2687
2687
|
number: "number";
|
|
2688
|
-
duration: "duration";
|
|
2689
2688
|
json: "json";
|
|
2689
|
+
duration: "duration";
|
|
2690
2690
|
}>>;
|
|
2691
2691
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
2692
2692
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -2831,8 +2831,8 @@ type EvalChartAggregate = z$1.infer<typeof evalChartAggregateSchema>;
|
|
|
2831
2831
|
* not emit raw hex so authored evals stay decoupled from the web theme.
|
|
2832
2832
|
*/
|
|
2833
2833
|
declare const evalChartColorSchema: z$1.ZodEnum<{
|
|
2834
|
-
error: "error";
|
|
2835
2834
|
success: "success";
|
|
2835
|
+
error: "error";
|
|
2836
2836
|
warning: "warning";
|
|
2837
2837
|
accent: "accent";
|
|
2838
2838
|
accentDim: "accentDim";
|
|
@@ -2860,8 +2860,8 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
2860
2860
|
}>;
|
|
2861
2861
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2862
2862
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2863
|
-
error: "error";
|
|
2864
2863
|
success: "success";
|
|
2864
|
+
error: "error";
|
|
2865
2865
|
warning: "warning";
|
|
2866
2866
|
accent: "accent";
|
|
2867
2867
|
accentDim: "accentDim";
|
|
@@ -2884,8 +2884,8 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
2884
2884
|
}>;
|
|
2885
2885
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2886
2886
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2887
|
-
error: "error";
|
|
2888
2887
|
success: "success";
|
|
2888
|
+
error: "error";
|
|
2889
2889
|
warning: "warning";
|
|
2890
2890
|
accent: "accent";
|
|
2891
2891
|
accentDim: "accentDim";
|
|
@@ -2943,8 +2943,8 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
2943
2943
|
}>;
|
|
2944
2944
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2945
2945
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2946
|
-
error: "error";
|
|
2947
2946
|
success: "success";
|
|
2947
|
+
error: "error";
|
|
2948
2948
|
warning: "warning";
|
|
2949
2949
|
accent: "accent";
|
|
2950
2950
|
accentDim: "accentDim";
|
|
@@ -2967,8 +2967,8 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
2967
2967
|
}>;
|
|
2968
2968
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2969
2969
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2970
|
-
error: "error";
|
|
2971
2970
|
success: "success";
|
|
2971
|
+
error: "error";
|
|
2972
2972
|
warning: "warning";
|
|
2973
2973
|
accent: "accent";
|
|
2974
2974
|
accentDim: "accentDim";
|
|
@@ -3033,8 +3033,8 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
3033
3033
|
}>;
|
|
3034
3034
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3035
3035
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3036
|
-
error: "error";
|
|
3037
3036
|
success: "success";
|
|
3037
|
+
error: "error";
|
|
3038
3038
|
warning: "warning";
|
|
3039
3039
|
accent: "accent";
|
|
3040
3040
|
accentDim: "accentDim";
|
|
@@ -3057,8 +3057,8 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
3057
3057
|
}>;
|
|
3058
3058
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3059
3059
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3060
|
-
error: "error";
|
|
3061
3060
|
success: "success";
|
|
3061
|
+
error: "error";
|
|
3062
3062
|
warning: "warning";
|
|
3063
3063
|
accent: "accent";
|
|
3064
3064
|
accentDim: "accentDim";
|
|
@@ -3310,8 +3310,8 @@ declare const llmCallMetricFormatSchema$1: z$1.ZodEnum<{
|
|
|
3310
3310
|
string: "string";
|
|
3311
3311
|
number: "number";
|
|
3312
3312
|
boolean: "boolean";
|
|
3313
|
-
duration: "duration";
|
|
3314
3313
|
json: "json";
|
|
3314
|
+
duration: "duration";
|
|
3315
3315
|
}>;
|
|
3316
3316
|
/** Render format applied to an LLM-call metric value. */
|
|
3317
3317
|
type LlmCallMetricFormat = z$1.infer<typeof llmCallMetricFormatSchema$1>;
|
|
@@ -3320,8 +3320,8 @@ declare const apiCallMetricFormatSchema$1: z$1.ZodEnum<{
|
|
|
3320
3320
|
string: "string";
|
|
3321
3321
|
number: "number";
|
|
3322
3322
|
boolean: "boolean";
|
|
3323
|
-
duration: "duration";
|
|
3324
3323
|
json: "json";
|
|
3324
|
+
duration: "duration";
|
|
3325
3325
|
}>;
|
|
3326
3326
|
/** Render format applied to an API-call metric value. */
|
|
3327
3327
|
type ApiCallMetricFormat = z$1.infer<typeof apiCallMetricFormatSchema$1>;
|
|
@@ -3390,8 +3390,8 @@ declare const llmCallMetricSchema: z$1.ZodObject<{
|
|
|
3390
3390
|
string: "string";
|
|
3391
3391
|
number: "number";
|
|
3392
3392
|
boolean: "boolean";
|
|
3393
|
-
duration: "duration";
|
|
3394
3393
|
json: "json";
|
|
3394
|
+
duration: "duration";
|
|
3395
3395
|
}>>;
|
|
3396
3396
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
3397
3397
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -3419,8 +3419,8 @@ declare const apiCallMetricSchema: z$1.ZodObject<{
|
|
|
3419
3419
|
string: "string";
|
|
3420
3420
|
number: "number";
|
|
3421
3421
|
boolean: "boolean";
|
|
3422
|
-
duration: "duration";
|
|
3423
3422
|
json: "json";
|
|
3423
|
+
duration: "duration";
|
|
3424
3424
|
}>>;
|
|
3425
3425
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
3426
3426
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -3533,8 +3533,8 @@ declare const llmCallsConfigSchema: z$1.ZodObject<{
|
|
|
3533
3533
|
string: "string";
|
|
3534
3534
|
number: "number";
|
|
3535
3535
|
boolean: "boolean";
|
|
3536
|
-
duration: "duration";
|
|
3537
3536
|
json: "json";
|
|
3537
|
+
duration: "duration";
|
|
3538
3538
|
}>>;
|
|
3539
3539
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
3540
3540
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -3569,8 +3569,8 @@ declare const apiCallsConfigSchema: z$1.ZodObject<{
|
|
|
3569
3569
|
string: "string";
|
|
3570
3570
|
number: "number";
|
|
3571
3571
|
boolean: "boolean";
|
|
3572
|
-
duration: "duration";
|
|
3573
3572
|
json: "json";
|
|
3573
|
+
duration: "duration";
|
|
3574
3574
|
}>>;
|
|
3575
3575
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
3576
3576
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -5886,7 +5886,7 @@ type EvalRunner = {
|
|
|
5886
5886
|
clearCache(filter?: CacheClearFilter): Promise<void>;
|
|
5887
5887
|
/**
|
|
5888
5888
|
* Recompute persisted case and run statuses for terminal runs touching one
|
|
5889
|
-
* eval. Accepts the exact eval key
|
|
5889
|
+
* eval. Accepts the exact eval key.
|
|
5890
5890
|
*/
|
|
5891
5891
|
recomputeStatusesForEval(evalKey: string): Promise<{
|
|
5892
5892
|
updatedRuns: number;
|
|
@@ -5897,7 +5897,7 @@ type EvalRunner = {
|
|
|
5897
5897
|
}): Promise<RecalculateDerivedAttributesResult>;
|
|
5898
5898
|
/**
|
|
5899
5899
|
* Delete terminal persisted runs that touch one eval from memory and disk.
|
|
5900
|
-
* Accepts the exact eval key
|
|
5900
|
+
* Accepts the exact eval key.
|
|
5901
5901
|
*/
|
|
5902
5902
|
cleanRunsForEval(evalKey: string): Promise<{
|
|
5903
5903
|
deletedRuns: number;
|
|
@@ -5937,13 +5937,12 @@ type EvalRunner = {
|
|
|
5937
5937
|
validateManualInputs(request: CreateRunRequest$1): ManualInputValidationResult;
|
|
5938
5938
|
}; //#endregion
|
|
5939
5939
|
//#region src/runner.d.ts
|
|
5940
|
-
type CreateRunnerOptions = {
|
|
5941
|
-
watchForChanges?: boolean;
|
|
5942
|
-
};
|
|
5943
5940
|
/** Create an in-memory eval runner bound to the current workspace config. */
|
|
5944
5941
|
declare function createRunner({
|
|
5945
5942
|
watchForChanges
|
|
5946
|
-
}?:
|
|
5943
|
+
}?: {
|
|
5944
|
+
watchForChanges?: boolean;
|
|
5945
|
+
}): EvalRunner; //#endregion
|
|
5947
5946
|
//#region src/manualInput/files.d.ts
|
|
5948
5947
|
type StageManualInputFileParams = {
|
|
5949
5948
|
workspaceRoot: string;
|
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { $ as getCurrentScope, B as deserializeCacheValue, F as evalSpan, G as readManualInputFile, H as serializeCacheValue, I as evalTracer, J as appendToEvalOutput, K as evalExpect, L as hashCacheKey, M as z, N as buildTraceTree, P as captureEvalSpanError, Q as evalTime, R as hashCacheKeySync,
|
|
2
|
-
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-
|
|
3
|
-
import { n as matchesEvalTags, t as defineEval } from "./src-
|
|
1
|
+
import { $ as getCurrentScope, B as deserializeCacheValue, F as evalSpan, G as readManualInputFile, H as serializeCacheValue, I as evalTracer, J as appendToEvalOutput, K as evalExpect, L as hashCacheKey, Lt as getEvalRegistry, M as z, N as buildTraceTree, P as captureEvalSpanError, Q as evalTime, R as hashCacheKeySync, U as repoFile, V as serializeCacheRecording, W as manualInputFileValueSchema, X as evalAssert, Z as evalLog, _t as extractLlmCalls, at as nextEvalId, ct as runInExistingEvalScope, dt as startEvalBackgroundJob, et as getEvalCaseInput, gt as extractApiCalls, ht as extractCacheHits, it as mergeEvalOutput, lt as setEvalOutput, mt as extractCacheEntries, nt as isInEvalScope, ot as runInEvalRuntimeScope, q as EvalAssertionError, st as runInEvalScope, tt as incrementEvalOutput, ut as setScopeCacheContext, vt as simulateLlmCallCost, xt as getNestedAttribute, yt as simulateTokenAllocation, z as deserializeCacheRecording } from "./runOrchestration-OVUFw1fL.mjs";
|
|
2
|
+
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-BeJCJMQo.mjs";
|
|
3
|
+
import { n as matchesEvalTags, t as defineEval } from "./src-D7_xKo7h.mjs";
|
|
4
4
|
export { EvalAssertionError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
|
package/dist/runChild.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { At as buildEvalKey, D as loadConfig, Dt as runSummarySchema, Et as runManifestSchema, Ft as
|
|
1
|
+
import { At as buildEvalKey, D as loadConfig, Dt as runSummarySchema, Et as runManifestSchema, Ft as columnDefSchema, Mt as evalStatsConfigSchema, Nt as manualInputDescriptorSchema, Pt as evalChartsConfigSchema, T as parseEvalDiscovery, Y as configureEvalRunLogs, ft as createRunRequestSchema, h as persistRunState, j as createFsCacheStore, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-OVUFw1fL.mjs";
|
|
2
2
|
import { z } from "zod/v4";
|
|
3
3
|
import { readFile } from "node:fs/promises";
|
|
4
4
|
import { relative } from "node:path";
|
|
@@ -866,10 +866,6 @@ function buildCaseKey(params) {
|
|
|
866
866
|
encodeURIComponent(params.caseId)
|
|
867
867
|
].join("#");
|
|
868
868
|
}
|
|
869
|
-
/** Return the collision-safe eval key stored on a row, falling back for legacy data. */
|
|
870
|
-
function getCaseRowEvalKey(row) {
|
|
871
|
-
return row.evalKey ?? row.evalId;
|
|
872
|
-
}
|
|
873
869
|
/** Return the collision-safe case key stored on a row, falling back for legacy data. */
|
|
874
870
|
function getCaseRowCaseKey(row) {
|
|
875
871
|
return row.caseKey ?? row.caseId;
|
|
@@ -6586,9 +6582,9 @@ function recomputePersistedCaseStatus(caseRow, caseDetail, scoreThresholds) {
|
|
|
6586
6582
|
return caseRow.status === "error" ? "error" : "pass";
|
|
6587
6583
|
}
|
|
6588
6584
|
function runTouchesEval(params) {
|
|
6589
|
-
if (params.caseRows.some((caseRow) =>
|
|
6585
|
+
if (params.caseRows.some((caseRow) => caseRow.evalKey === params.evalKey)) return true;
|
|
6590
6586
|
if (params.target.mode === "all") return params.evalExists;
|
|
6591
|
-
if (params.target.mode === "evalIds") return params.target.evalKeys?.includes(params.evalKey) ??
|
|
6587
|
+
if (params.target.mode === "evalIds") return params.target.evalKeys?.includes(params.evalKey) ?? false;
|
|
6592
6588
|
return false;
|
|
6593
6589
|
}
|
|
6594
6590
|
async function deleteTemporaryRuns(params) {
|
|
@@ -6619,13 +6615,12 @@ async function recomputeEvalStatusesInRuns(params) {
|
|
|
6619
6615
|
target: run.manifest.target,
|
|
6620
6616
|
caseRows: run.cases,
|
|
6621
6617
|
evalKey: params.evalKey,
|
|
6622
|
-
evalId: params.evalId,
|
|
6623
6618
|
evalExists: params.evalExists
|
|
6624
6619
|
})) continue;
|
|
6625
6620
|
if (run.manifest.status === "running") continue;
|
|
6626
6621
|
let changed = false;
|
|
6627
6622
|
for (const caseRow of run.cases) {
|
|
6628
|
-
if (
|
|
6623
|
+
if (caseRow.evalKey !== params.evalKey) continue;
|
|
6629
6624
|
const caseDetail = run.caseDetails.get(getCaseRowCaseKey(caseRow));
|
|
6630
6625
|
const nextStatus = recomputePersistedCaseStatus(caseRow, caseDetail, params.scoreThresholds);
|
|
6631
6626
|
if (caseRow.status === nextStatus) continue;
|
|
@@ -6708,15 +6703,14 @@ function getLastRunStatuses(params) {
|
|
|
6708
6703
|
function getLatestRunInfos(params) {
|
|
6709
6704
|
const { runs, knownEvals } = params;
|
|
6710
6705
|
const knownEvalMetas = [...knownEvals];
|
|
6711
|
-
const evalIdByKey = new Map(knownEvalMetas.map((evalMeta) => [evalMeta.key, evalMeta.id]));
|
|
6712
6706
|
const manualScoreKeysByEval = new Map(knownEvalMetas.map((evalMeta) => [evalMeta.key, evalMeta.columnDefs.filter((columnDef) => columnDef.isManualScore === true).map((columnDef) => columnDef.key)]));
|
|
6713
6707
|
const orderedRuns = [...runs].toSorted((a, b) => new Date(getRunFreshnessTimestamp(a.manifest)).getTime() - new Date(getRunFreshnessTimestamp(b.manifest)).getTime());
|
|
6714
6708
|
const latestRunInfos = /* @__PURE__ */ new Map();
|
|
6715
6709
|
for (const run of orderedRuns) for (const evalKey of getRunEvalKeys(run, knownEvalMetas)) latestRunInfos.set(evalKey, {
|
|
6716
|
-
status: getEvalStatusForRun(run, evalKey,
|
|
6710
|
+
status: getEvalStatusForRun(run, evalKey, manualScoreKeysByEval.get(evalKey) ?? []),
|
|
6717
6711
|
startedAt: getRunFreshnessTimestamp(run.manifest),
|
|
6718
6712
|
commitSha: run.manifest.commitSha ?? null,
|
|
6719
|
-
evalSourceFingerprint: run.manifest.evalSourceFingerprints[evalKey] ??
|
|
6713
|
+
evalSourceFingerprint: run.manifest.evalSourceFingerprints[evalKey] ?? null
|
|
6720
6714
|
});
|
|
6721
6715
|
return latestRunInfos;
|
|
6722
6716
|
}
|
|
@@ -6775,19 +6769,14 @@ async function readCaseDetails(runDir) {
|
|
|
6775
6769
|
}
|
|
6776
6770
|
function getRunEvalKeys(run, knownEvals) {
|
|
6777
6771
|
const knownEvalMetas = [...knownEvals];
|
|
6778
|
-
const evalKeys = new Set(
|
|
6779
|
-
for (const caseRow of run.cases)
|
|
6780
|
-
|
|
6781
|
-
|
|
6782
|
-
}
|
|
6783
|
-
if (run.manifest.target.mode === "evalIds") {
|
|
6784
|
-
for (const evalKey of run.manifest.target.evalKeys ?? []) evalKeys.add(evalKey);
|
|
6785
|
-
for (const evalId of run.manifest.target.evalIds ?? []) for (const evalMeta of knownEvalMetas) if (evalMeta.id === evalId) evalKeys.add(evalMeta.key);
|
|
6786
|
-
} else if (run.manifest.target.mode === "all" && evalKeys.size === 0) for (const evalMeta of knownEvalMetas) evalKeys.add(evalMeta.key);
|
|
6772
|
+
const evalKeys = /* @__PURE__ */ new Set();
|
|
6773
|
+
for (const caseRow of run.cases) if (caseRow.evalKey !== void 0) evalKeys.add(caseRow.evalKey);
|
|
6774
|
+
if (run.manifest.target.mode === "evalIds") for (const evalKey of run.manifest.target.evalKeys ?? []) evalKeys.add(evalKey);
|
|
6775
|
+
else if (run.manifest.target.mode === "all" && evalKeys.size === 0) for (const evalMeta of knownEvalMetas) evalKeys.add(evalMeta.key);
|
|
6787
6776
|
return [...evalKeys];
|
|
6788
6777
|
}
|
|
6789
|
-
function getEvalStatusForRun(run, evalKey,
|
|
6790
|
-
const evalCases = run.cases.filter((caseRow) =>
|
|
6778
|
+
function getEvalStatusForRun(run, evalKey, manualScoreKeys) {
|
|
6779
|
+
const evalCases = run.cases.filter((caseRow) => caseRow.evalKey === evalKey);
|
|
6791
6780
|
if (evalCases.length > 0) {
|
|
6792
6781
|
if (hasPendingManualScores(evalCases, manualScoreKeys)) return "unscored";
|
|
6793
6782
|
return toLastRunStatus$1(deriveStatusFromCaseRows({ caseRows: evalCases }));
|
|
@@ -7809,4 +7798,4 @@ function toLastRunStatus(status) {
|
|
|
7809
7798
|
return status === "pending" ? null : status;
|
|
7810
7799
|
}
|
|
7811
7800
|
//#endregion
|
|
7812
|
-
export { getCurrentScope as $, validateCharts as A, buildEvalKey as At, deserializeCacheValue as B, deriveEvalFreshness as C, getEvalDisplayStatus as Ct, loadConfig as D, runSummarySchema as Dt, resolveEvalDefaultConfig as E, runManifestSchema as Et, evalSpan as F,
|
|
7801
|
+
export { getCurrentScope as $, validateCharts as A, buildEvalKey as At, deserializeCacheValue as B, deriveEvalFreshness as C, getEvalDisplayStatus as Ct, loadConfig as D, runSummarySchema as Dt, resolveEvalDefaultConfig as E, runManifestSchema as Et, evalSpan as F, columnDefSchema as Ft, readManualInputFile as G, serializeCacheValue as H, evalTracer as I, defineEval as It, appendToEvalOutput as J, evalExpect as K, hashCacheKey as L, getEvalRegistry as Lt, z$1 as M, evalStatsConfigSchema as Mt, buildTraceTree as N, manualInputDescriptorSchema as Nt, buildDeclaredColumnDefs as O, resolveApiCallsConfig as Ot, captureEvalSpanError as P, evalChartsConfigSchema as Pt, evalTime as Q, hashCacheKeySync as R, parseManualInputValues as S, getEvalTitle as St, parseEvalDiscovery as T, matchesTagsFilter as Tt, repoFile as U, serializeCacheRecording as V, manualInputFileValueSchema as W, evalAssert as X, configureEvalRunLogs as Y, evalLog as Z, recomputePersistedCaseStatus as _, extractLlmCalls as _t, validateTagsFilters as a, nextEvalId as at, resolveArtifactPath as b, applyDerivedCallAttributes as bt, getLastRunStatuses as c, runInExistingEvalScope as ct, loadPersistedRunSnapshots as d, startEvalBackgroundJob as dt, getEvalCaseInput as et, nextShortIdFromSnapshots as f, createRunRequestSchema as ft, recomputeEvalStatusesInRuns as g, extractApiCalls as gt, persistRunState as h, extractCacheHits as ht, resolveEvalTags as i, mergeEvalOutput as it, createFsCacheStore as j, getCaseRowCaseKey as jt, normalizeScoreDef as k, resolveLlmCallsConfig as kt, getLatestRunInfos as l, setEvalOutput as lt, deleteTemporaryRuns as m, extractCacheEntries as mt, getTargetEvalKeys as n, isInEvalScope as nt, stripTerminalControlCodes as o, runInEvalRuntimeScope as ot, persistCaseDetail as p, updateManualScoreRequestSchema as pt, EvalAssertionError as q, getTargetEvals as r, matchesEvalTags as rt, generateRunId as s, runInEvalScope as st, executeRun as t, incrementEvalOutput as tt, loadPersistedRunSnapshot as u, setScopeCacheContext as ut, runTouchesEval as v, simulateLlmCallCost as vt, loadEvalModule as w, deriveScopedSummaryFromCases as wt, buildManualInputDescriptor as x, getNestedAttribute as xt, resolveTracePresentation as y, simulateTokenAllocation as yt, deserializeCacheRecording as z };
|