@ls-stack/agent-eval 0.42.1 → 0.42.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,8 +25,8 @@
25
25
  href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
26
26
  rel="stylesheet"
27
27
  />
28
- <script type="module" crossorigin src="/assets/index-CANDLTsq.js"></script>
29
- <link rel="stylesheet" crossorigin href="/assets/index-eFM9VIsz.css">
28
+ <script type="module" crossorigin src="/assets/index-XLJByNnS.js"></script>
29
+ <link rel="stylesheet" crossorigin href="/assets/index-S3J5Nm0o.css">
30
30
  </head>
31
31
  <body>
32
32
  <div id="root"></div>
package/dist/bin.mjs CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { t as runCli } from "./cli-DbVfkr9T.mjs";
2
+ import { t as runCli } from "./cli-BeJCJMQo.mjs";
3
3
  import { spawn } from "node:child_process";
4
4
  //#region src/bin.ts
5
5
  const moduleMocksFlag = "--experimental-test-module-mocks";
@@ -1,4 +1,4 @@
1
- import { A as validateCharts, At as buildEvalKey, C as deriveEvalFreshness, Ct as getEvalDisplayStatus, D as loadConfig, Dt as runSummarySchema, E as resolveEvalDefaultConfig, Mt as getCaseRowEvalKey, O as buildDeclaredColumnDefs, Ot as resolveApiCallsConfig, Rt as getEvalRegistry, S as parseManualInputValues, St as getEvalTitle, T as parseEvalDiscovery, Tt as matchesTagsFilter, _ as recomputePersistedCaseStatus, a as validateTagsFilters, b as resolveArtifactPath, bt as applyDerivedCallAttributes, c as getLastRunStatuses, d as loadPersistedRunSnapshots, f as nextShortIdFromSnapshots, g as recomputeEvalStatusesInRuns, h as persistRunState, i as resolveEvalTags, j as createFsCacheStore, jt as getCaseRowCaseKey, k as normalizeScoreDef, kt as resolveLlmCallsConfig, l as getLatestRunInfos, m as deleteTemporaryRuns, n as getTargetEvalKeys, o as stripTerminalControlCodes, p as persistCaseDetail, s as generateRunId, u as loadPersistedRunSnapshot, v as runTouchesEval, w as loadEvalModule, wt as deriveScopedSummaryFromCases, x as buildManualInputDescriptor, y as resolveTracePresentation } from "./runOrchestration-SPaHx-SC.mjs";
1
+ import { A as validateCharts, At as buildEvalKey, C as deriveEvalFreshness, Ct as getEvalDisplayStatus, D as loadConfig, Dt as runSummarySchema, E as resolveEvalDefaultConfig, Lt as getEvalRegistry, O as buildDeclaredColumnDefs, Ot as resolveApiCallsConfig, S as parseManualInputValues, St as getEvalTitle, T as parseEvalDiscovery, Tt as matchesTagsFilter, _ as recomputePersistedCaseStatus, a as validateTagsFilters, b as resolveArtifactPath, bt as applyDerivedCallAttributes, c as getLastRunStatuses, d as loadPersistedRunSnapshots, f as nextShortIdFromSnapshots, g as recomputeEvalStatusesInRuns, h as persistRunState, i as resolveEvalTags, j as createFsCacheStore, jt as getCaseRowCaseKey, k as normalizeScoreDef, kt as resolveLlmCallsConfig, l as getLatestRunInfos, m as deleteTemporaryRuns, n as getTargetEvalKeys, o as stripTerminalControlCodes, p as persistCaseDetail, s as generateRunId, u as loadPersistedRunSnapshot, v as runTouchesEval, w as loadEvalModule, wt as deriveScopedSummaryFromCases, x as buildManualInputDescriptor, y as resolveTracePresentation } from "./runOrchestration-OVUFw1fL.mjs";
2
2
  import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
3
3
  import { basename, dirname, extname, isAbsolute, join, relative, resolve, sep } from "node:path";
4
4
  import { createHash, randomUUID } from "node:crypto";
@@ -476,6 +476,11 @@ function validateManualInputsForRequest(params) {
476
476
  };
477
477
  }
478
478
  //#endregion
479
+ //#region ../runner/src/objectUtils.ts
480
+ function isRecord(value) {
481
+ return typeof value === "object" && value !== null && !Array.isArray(value);
482
+ }
483
+ //#endregion
479
484
  //#region ../runner/src/recalculateDerivedAttributes.ts
480
485
  function getCaseArtifactFileIdForExistingRun(runState, caseRow) {
481
486
  const caseKey = getCaseRowCaseKey(caseRow);
@@ -505,7 +510,7 @@ async function recalculateDerivedAttributesForCase(params) {
505
510
  });
506
511
  let nextTrace = spansWithDerivedAttributes;
507
512
  let nextTraceDisplay = caseDetail.traceDisplay;
508
- const evalMeta = params.evals.get(getCaseRowEvalKey(caseRow));
513
+ const evalMeta = caseRow.evalKey === void 0 ? void 0 : params.evals.get(caseRow.evalKey);
509
514
  const entry = evalMeta === void 0 ? void 0 : getEvalRegistry().get(evalMeta.id);
510
515
  if (entry !== void 0) entry.use((evalDef) => {
511
516
  const resolved = resolveTracePresentation(spansWithDerivedAttributes, params.traceDisplayConfig, evalDef.traceDisplay);
@@ -787,6 +792,36 @@ async function markRunTerminalFromChild(runState, event, managerContext) {
787
792
  managerContext.emitDiscoveryEvent();
788
793
  }
789
794
  //#endregion
795
+ //#region ../runner/src/runnerStateHydration.ts
796
+ /** Rehydrate a persisted run while preserving live listeners/process handles. */
797
+ function toRunnerRunState(snapshot, existing) {
798
+ return {
799
+ ...snapshot,
800
+ listeners: existing?.listeners ?? /* @__PURE__ */ new Set(),
801
+ childProcess: existing?.childProcess,
802
+ childTerminalReceived: existing?.childTerminalReceived ?? false
803
+ };
804
+ }
805
+ //#endregion
806
+ //#region ../runner/src/runTargetPersistence.ts
807
+ /** Build the exact-key run target persisted in run history. */
808
+ function buildPersistedRunTarget(params) {
809
+ const { target, evalKeys } = params;
810
+ if (target.mode === "all") return { mode: "all" };
811
+ const persistEvalKeys = (target.evalKeys?.length ?? 0) > 0 || (target.evalIds?.length ?? 0) > 0 || (target.files?.length ?? 0) > 0;
812
+ const keyedTarget = {
813
+ mode: target.mode,
814
+ evalKeys: persistEvalKeys && evalKeys.length > 0 ? evalKeys : void 0,
815
+ files: target.files,
816
+ tagsFilter: target.tagsFilter
817
+ };
818
+ if (target.mode === "caseIds") return {
819
+ ...keyedTarget,
820
+ caseIds: target.caseIds
821
+ };
822
+ return keyedTarget;
823
+ }
824
+ //#endregion
790
825
  //#region ../runner/src/watchRoots.ts
791
826
  const globMagicCharacters = new Set([
792
827
  "*",
@@ -823,9 +858,6 @@ function getWatchRootsForIncludePatterns(params) {
823
858
  }
824
859
  //#endregion
825
860
  //#region ../runner/src/runner.ts
826
- function isRecord(value) {
827
- return typeof value === "object" && value !== null && !Array.isArray(value);
828
- }
829
861
  /** Create an in-memory eval runner bound to the current workspace config. */
830
862
  function createRunner({ watchForChanges = true } = {}) {
831
863
  let config;
@@ -898,7 +930,6 @@ function createRunner({ watchForChanges = true } = {}) {
898
930
  const updatedRuns = await recomputeEvalStatusesInRuns({
899
931
  runs: runs.values(),
900
932
  evalKey: evalMeta.key,
901
- evalId: evalMeta.id,
902
933
  evalExists: evals.has(evalMeta.key),
903
934
  scoreThresholds,
904
935
  persistCaseDetail
@@ -930,7 +961,6 @@ function createRunner({ watchForChanges = true } = {}) {
930
961
  target: run.manifest.target,
931
962
  caseRows: run.cases,
932
963
  evalKey: evalMeta?.key ?? evalKey,
933
- evalId: evalMeta?.id,
934
964
  evalExists: evalMeta !== void 0
935
965
  })) continue;
936
966
  if (run.manifest.status === "running") continue;
@@ -959,7 +989,7 @@ function createRunner({ watchForChanges = true } = {}) {
959
989
  updated: false,
960
990
  reason: "Case not found"
961
991
  };
962
- const evalMeta = evals.get(getCaseRowEvalKey(caseRow));
992
+ const evalMeta = caseRow.evalKey === void 0 ? void 0 : evals.get(caseRow.evalKey);
963
993
  if (!evalMeta) return {
964
994
  updated: false,
965
995
  reason: "Eval not found"
@@ -1170,6 +1200,10 @@ function createRunner({ watchForChanges = true } = {}) {
1170
1200
  const cacheMode = request.cache?.mode ?? "use";
1171
1201
  const runDir = join(localStateDir, "runs", runId);
1172
1202
  const gitState = readGitWorktreeState(workspaceRoot);
1203
+ const targetEvalKeys = getTargetEvalKeys({
1204
+ request,
1205
+ sortedEvals: getSortedEvalMetas()
1206
+ });
1173
1207
  const manifest = {
1174
1208
  id: runId,
1175
1209
  shortId,
@@ -1179,7 +1213,10 @@ function createRunner({ watchForChanges = true } = {}) {
1179
1213
  endedAt: null,
1180
1214
  commitSha: gitState.commitSha,
1181
1215
  evalSourceFingerprints: {},
1182
- target: request.target,
1216
+ target: buildPersistedRunTarget({
1217
+ target: request.target,
1218
+ evalKeys: targetEvalKeys
1219
+ }),
1183
1220
  trials: request.trials,
1184
1221
  trialSelection: config.trialSelection ?? "lowestScore",
1185
1222
  cacheMode
@@ -1224,10 +1261,7 @@ function createRunner({ watchForChanges = true } = {}) {
1224
1261
  runs.set(runId, runState);
1225
1262
  setLatestRunInfoMap({
1226
1263
  latestRunInfoMap,
1227
- evalIds: getTargetEvalKeys({
1228
- request: materializedRequest,
1229
- sortedEvals: getSortedEvalMetas()
1230
- }),
1264
+ evalIds: targetEvalKeys,
1231
1265
  info: {
1232
1266
  status: "running",
1233
1267
  startedAt: now,
@@ -1478,14 +1512,6 @@ function createRunner({ watchForChanges = true } = {}) {
1478
1512
  nextShortIdNum = Math.max(nextShortIdNum, nextShortIdFromSnapshots(persistedRuns));
1479
1513
  if (changed) emitDiscoveryEvent();
1480
1514
  }
1481
- function toRunnerRunState(snapshot, existing) {
1482
- return {
1483
- ...snapshot,
1484
- listeners: existing?.listeners ?? /* @__PURE__ */ new Set(),
1485
- childProcess: existing?.childProcess,
1486
- childTerminalReceived: existing?.childTerminalReceived ?? false
1487
- };
1488
- }
1489
1515
  return runner;
1490
1516
  }
1491
1517
  //#endregion
@@ -2046,8 +2072,8 @@ async function commandApp(args) {
2046
2072
  const { serve } = await import("@hono/node-server");
2047
2073
  const bundledWebDist = resolve(currentDir, "apps/web/dist");
2048
2074
  if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
2049
- const appModule = await import("./app-mOYjX9zq.mjs");
2050
- const runnerModule = await import("./runner-BYOdLBle.mjs");
2075
+ const appModule = await import("./app-DPamBr5R.mjs");
2076
+ const runnerModule = await import("./runner-BJQq7cpd.mjs");
2051
2077
  if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
2052
2078
  if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
2053
2079
  await runnerModule.initRunner();
package/dist/index.d.mts CHANGED
@@ -1717,13 +1717,13 @@ type ColumnKind = z$1.infer<typeof columnKindSchema>;
1717
1717
  declare const columnFormatSchema: z$1.ZodEnum<{
1718
1718
  number: "number";
1719
1719
  boolean: "boolean";
1720
- duration: "duration";
1721
- json: "json";
1722
1720
  file: "file";
1723
1721
  markdown: "markdown";
1722
+ json: "json";
1724
1723
  image: "image";
1725
1724
  audio: "audio";
1726
1725
  video: "video";
1726
+ duration: "duration";
1727
1727
  percent: "percent";
1728
1728
  passFail: "passFail";
1729
1729
  stars: "stars";
@@ -1742,13 +1742,13 @@ declare const columnDefSchema: z$1.ZodObject<{
1742
1742
  format: z$1.ZodOptional<z$1.ZodEnum<{
1743
1743
  number: "number";
1744
1744
  boolean: "boolean";
1745
- duration: "duration";
1746
- json: "json";
1747
1745
  file: "file";
1748
1746
  markdown: "markdown";
1747
+ json: "json";
1749
1748
  image: "image";
1750
1749
  audio: "audio";
1751
1750
  video: "video";
1751
+ duration: "duration";
1752
1752
  percent: "percent";
1753
1753
  passFail: "passFail";
1754
1754
  stars: "stars";
@@ -1762,8 +1762,8 @@ declare const columnDefSchema: z$1.ZodObject<{
1762
1762
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
1763
1763
  align: z$1.ZodOptional<z$1.ZodEnum<{
1764
1764
  left: "left";
1765
- right: "right";
1766
1765
  center: "center";
1766
+ right: "right";
1767
1767
  }>>;
1768
1768
  }, z$1.core.$strip>;
1769
1769
  /** Column definition exposed to the UI for eval and case tables. */
@@ -1792,8 +1792,8 @@ type CellValue = z$1.infer<typeof cellValueSchema>; //#endregion
1792
1792
  declare const traceAttributeDisplayFormatSchema: z$1.ZodEnum<{
1793
1793
  string: "string";
1794
1794
  number: "number";
1795
- duration: "duration";
1796
1795
  json: "json";
1796
+ duration: "duration";
1797
1797
  }>;
1798
1798
  /**
1799
1799
  * Formatting hint for trace attribute values rendered by the UI.
@@ -1817,8 +1817,8 @@ declare const traceAttributeDisplaySchema: z$1.ZodObject<{
1817
1817
  format: z$1.ZodOptional<z$1.ZodEnum<{
1818
1818
  string: "string";
1819
1819
  number: "number";
1820
- duration: "duration";
1821
1820
  json: "json";
1821
+ duration: "duration";
1822
1822
  }>>;
1823
1823
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
1824
1824
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -1853,8 +1853,8 @@ declare const traceDisplayConfigSchema: z$1.ZodObject<{
1853
1853
  format: z$1.ZodOptional<z$1.ZodEnum<{
1854
1854
  string: "string";
1855
1855
  number: "number";
1856
- duration: "duration";
1857
1856
  json: "json";
1857
+ duration: "duration";
1858
1858
  }>>;
1859
1859
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
1860
1860
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -1893,8 +1893,8 @@ declare const traceAttributeDisplayInputSchema: z$1.ZodObject<{
1893
1893
  format: z$1.ZodOptional<z$1.ZodEnum<{
1894
1894
  string: "string";
1895
1895
  number: "number";
1896
- duration: "duration";
1897
1896
  json: "json";
1897
+ duration: "duration";
1898
1898
  }>>;
1899
1899
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
1900
1900
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -1931,8 +1931,8 @@ declare const traceDisplayInputConfigSchema: z$1.ZodObject<{
1931
1931
  format: z$1.ZodOptional<z$1.ZodEnum<{
1932
1932
  string: "string";
1933
1933
  number: "number";
1934
- duration: "duration";
1935
1934
  json: "json";
1935
+ duration: "duration";
1936
1936
  }>>;
1937
1937
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
1938
1938
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -2063,13 +2063,13 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2063
2063
  format: z$1.ZodOptional<z$1.ZodEnum<{
2064
2064
  number: "number";
2065
2065
  boolean: "boolean";
2066
- duration: "duration";
2067
- json: "json";
2068
2066
  file: "file";
2069
2067
  markdown: "markdown";
2068
+ json: "json";
2070
2069
  image: "image";
2071
2070
  audio: "audio";
2072
2071
  video: "video";
2072
+ duration: "duration";
2073
2073
  percent: "percent";
2074
2074
  passFail: "passFail";
2075
2075
  stars: "stars";
@@ -2105,13 +2105,13 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
2105
2105
  format: z$1.ZodOptional<z$1.ZodEnum<{
2106
2106
  number: "number";
2107
2107
  boolean: "boolean";
2108
- duration: "duration";
2109
- json: "json";
2110
2108
  file: "file";
2111
2109
  markdown: "markdown";
2110
+ json: "json";
2112
2111
  image: "image";
2113
2112
  audio: "audio";
2114
2113
  video: "video";
2114
+ duration: "duration";
2115
2115
  percent: "percent";
2116
2116
  passFail: "passFail";
2117
2117
  stars: "stars";
@@ -2149,13 +2149,13 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2149
2149
  format: z$1.ZodOptional<z$1.ZodEnum<{
2150
2150
  number: "number";
2151
2151
  boolean: "boolean";
2152
- duration: "duration";
2153
- json: "json";
2154
2152
  file: "file";
2155
2153
  markdown: "markdown";
2154
+ json: "json";
2156
2155
  image: "image";
2157
2156
  audio: "audio";
2158
2157
  video: "video";
2158
+ duration: "duration";
2159
2159
  percent: "percent";
2160
2160
  passFail: "passFail";
2161
2161
  stars: "stars";
@@ -2169,8 +2169,8 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2169
2169
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2170
2170
  align: z$1.ZodOptional<z$1.ZodEnum<{
2171
2171
  left: "left";
2172
- right: "right";
2173
2172
  center: "center";
2173
+ right: "right";
2174
2174
  }>>;
2175
2175
  }, z$1.core.$strip>>;
2176
2176
  caseCount: z$1.ZodNullable<z$1.ZodNumber>;
@@ -2208,13 +2208,13 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2208
2208
  format: z$1.ZodOptional<z$1.ZodEnum<{
2209
2209
  number: "number";
2210
2210
  boolean: "boolean";
2211
- duration: "duration";
2212
- json: "json";
2213
2211
  file: "file";
2214
2212
  markdown: "markdown";
2213
+ json: "json";
2215
2214
  image: "image";
2216
2215
  audio: "audio";
2217
2216
  video: "video";
2217
+ duration: "duration";
2218
2218
  percent: "percent";
2219
2219
  passFail: "passFail";
2220
2220
  stars: "stars";
@@ -2239,8 +2239,8 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2239
2239
  }>;
2240
2240
  label: z$1.ZodOptional<z$1.ZodString>;
2241
2241
  color: z$1.ZodOptional<z$1.ZodEnum<{
2242
- error: "error";
2243
2242
  success: "success";
2243
+ error: "error";
2244
2244
  warning: "warning";
2245
2245
  accent: "accent";
2246
2246
  accentDim: "accentDim";
@@ -2263,8 +2263,8 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2263
2263
  }>;
2264
2264
  label: z$1.ZodOptional<z$1.ZodString>;
2265
2265
  color: z$1.ZodOptional<z$1.ZodEnum<{
2266
- error: "error";
2267
2266
  success: "success";
2267
+ error: "error";
2268
2268
  warning: "warning";
2269
2269
  accent: "accent";
2270
2270
  accentDim: "accentDim";
@@ -2529,8 +2529,8 @@ declare const scoreTraceSchema: z$1.ZodObject<{
2529
2529
  format: z$1.ZodOptional<z$1.ZodEnum<{
2530
2530
  string: "string";
2531
2531
  number: "number";
2532
- duration: "duration";
2533
2532
  json: "json";
2533
+ duration: "duration";
2534
2534
  }>>;
2535
2535
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
2536
2536
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -2616,8 +2616,8 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2616
2616
  format: z$1.ZodOptional<z$1.ZodEnum<{
2617
2617
  string: "string";
2618
2618
  number: "number";
2619
- duration: "duration";
2620
2619
  json: "json";
2620
+ duration: "duration";
2621
2621
  }>>;
2622
2622
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
2623
2623
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -2685,8 +2685,8 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2685
2685
  format: z$1.ZodOptional<z$1.ZodEnum<{
2686
2686
  string: "string";
2687
2687
  number: "number";
2688
- duration: "duration";
2689
2688
  json: "json";
2689
+ duration: "duration";
2690
2690
  }>>;
2691
2691
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
2692
2692
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -2831,8 +2831,8 @@ type EvalChartAggregate = z$1.infer<typeof evalChartAggregateSchema>;
2831
2831
  * not emit raw hex so authored evals stay decoupled from the web theme.
2832
2832
  */
2833
2833
  declare const evalChartColorSchema: z$1.ZodEnum<{
2834
- error: "error";
2835
2834
  success: "success";
2835
+ error: "error";
2836
2836
  warning: "warning";
2837
2837
  accent: "accent";
2838
2838
  accentDim: "accentDim";
@@ -2860,8 +2860,8 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2860
2860
  }>;
2861
2861
  label: z$1.ZodOptional<z$1.ZodString>;
2862
2862
  color: z$1.ZodOptional<z$1.ZodEnum<{
2863
- error: "error";
2864
2863
  success: "success";
2864
+ error: "error";
2865
2865
  warning: "warning";
2866
2866
  accent: "accent";
2867
2867
  accentDim: "accentDim";
@@ -2884,8 +2884,8 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2884
2884
  }>;
2885
2885
  label: z$1.ZodOptional<z$1.ZodString>;
2886
2886
  color: z$1.ZodOptional<z$1.ZodEnum<{
2887
- error: "error";
2888
2887
  success: "success";
2888
+ error: "error";
2889
2889
  warning: "warning";
2890
2890
  accent: "accent";
2891
2891
  accentDim: "accentDim";
@@ -2943,8 +2943,8 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
2943
2943
  }>;
2944
2944
  label: z$1.ZodOptional<z$1.ZodString>;
2945
2945
  color: z$1.ZodOptional<z$1.ZodEnum<{
2946
- error: "error";
2947
2946
  success: "success";
2947
+ error: "error";
2948
2948
  warning: "warning";
2949
2949
  accent: "accent";
2950
2950
  accentDim: "accentDim";
@@ -2967,8 +2967,8 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
2967
2967
  }>;
2968
2968
  label: z$1.ZodOptional<z$1.ZodString>;
2969
2969
  color: z$1.ZodOptional<z$1.ZodEnum<{
2970
- error: "error";
2971
2970
  success: "success";
2971
+ error: "error";
2972
2972
  warning: "warning";
2973
2973
  accent: "accent";
2974
2974
  accentDim: "accentDim";
@@ -3033,8 +3033,8 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
3033
3033
  }>;
3034
3034
  label: z$1.ZodOptional<z$1.ZodString>;
3035
3035
  color: z$1.ZodOptional<z$1.ZodEnum<{
3036
- error: "error";
3037
3036
  success: "success";
3037
+ error: "error";
3038
3038
  warning: "warning";
3039
3039
  accent: "accent";
3040
3040
  accentDim: "accentDim";
@@ -3057,8 +3057,8 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
3057
3057
  }>;
3058
3058
  label: z$1.ZodOptional<z$1.ZodString>;
3059
3059
  color: z$1.ZodOptional<z$1.ZodEnum<{
3060
- error: "error";
3061
3060
  success: "success";
3061
+ error: "error";
3062
3062
  warning: "warning";
3063
3063
  accent: "accent";
3064
3064
  accentDim: "accentDim";
@@ -3310,8 +3310,8 @@ declare const llmCallMetricFormatSchema$1: z$1.ZodEnum<{
3310
3310
  string: "string";
3311
3311
  number: "number";
3312
3312
  boolean: "boolean";
3313
- duration: "duration";
3314
3313
  json: "json";
3314
+ duration: "duration";
3315
3315
  }>;
3316
3316
  /** Render format applied to an LLM-call metric value. */
3317
3317
  type LlmCallMetricFormat = z$1.infer<typeof llmCallMetricFormatSchema$1>;
@@ -3320,8 +3320,8 @@ declare const apiCallMetricFormatSchema$1: z$1.ZodEnum<{
3320
3320
  string: "string";
3321
3321
  number: "number";
3322
3322
  boolean: "boolean";
3323
- duration: "duration";
3324
3323
  json: "json";
3324
+ duration: "duration";
3325
3325
  }>;
3326
3326
  /** Render format applied to an API-call metric value. */
3327
3327
  type ApiCallMetricFormat = z$1.infer<typeof apiCallMetricFormatSchema$1>;
@@ -3390,8 +3390,8 @@ declare const llmCallMetricSchema: z$1.ZodObject<{
3390
3390
  string: "string";
3391
3391
  number: "number";
3392
3392
  boolean: "boolean";
3393
- duration: "duration";
3394
3393
  json: "json";
3394
+ duration: "duration";
3395
3395
  }>>;
3396
3396
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
3397
3397
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -3419,8 +3419,8 @@ declare const apiCallMetricSchema: z$1.ZodObject<{
3419
3419
  string: "string";
3420
3420
  number: "number";
3421
3421
  boolean: "boolean";
3422
- duration: "duration";
3423
3422
  json: "json";
3423
+ duration: "duration";
3424
3424
  }>>;
3425
3425
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
3426
3426
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -3533,8 +3533,8 @@ declare const llmCallsConfigSchema: z$1.ZodObject<{
3533
3533
  string: "string";
3534
3534
  number: "number";
3535
3535
  boolean: "boolean";
3536
- duration: "duration";
3537
3536
  json: "json";
3537
+ duration: "duration";
3538
3538
  }>>;
3539
3539
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
3540
3540
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -3569,8 +3569,8 @@ declare const apiCallsConfigSchema: z$1.ZodObject<{
3569
3569
  string: "string";
3570
3570
  number: "number";
3571
3571
  boolean: "boolean";
3572
- duration: "duration";
3573
3572
  json: "json";
3573
+ duration: "duration";
3574
3574
  }>>;
3575
3575
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
3576
3576
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -5886,7 +5886,7 @@ type EvalRunner = {
5886
5886
  clearCache(filter?: CacheClearFilter): Promise<void>;
5887
5887
  /**
5888
5888
  * Recompute persisted case and run statuses for terminal runs touching one
5889
- * eval. Accepts the exact eval key, with a legacy fallback for unique eval ids.
5889
+ * eval. Accepts the exact eval key.
5890
5890
  */
5891
5891
  recomputeStatusesForEval(evalKey: string): Promise<{
5892
5892
  updatedRuns: number;
@@ -5897,7 +5897,7 @@ type EvalRunner = {
5897
5897
  }): Promise<RecalculateDerivedAttributesResult>;
5898
5898
  /**
5899
5899
  * Delete terminal persisted runs that touch one eval from memory and disk.
5900
- * Accepts the exact eval key, with a legacy fallback for unique eval ids.
5900
+ * Accepts the exact eval key.
5901
5901
  */
5902
5902
  cleanRunsForEval(evalKey: string): Promise<{
5903
5903
  deletedRuns: number;
@@ -5937,13 +5937,12 @@ type EvalRunner = {
5937
5937
  validateManualInputs(request: CreateRunRequest$1): ManualInputValidationResult;
5938
5938
  }; //#endregion
5939
5939
  //#region src/runner.d.ts
5940
- type CreateRunnerOptions = {
5941
- watchForChanges?: boolean;
5942
- };
5943
5940
  /** Create an in-memory eval runner bound to the current workspace config. */
5944
5941
  declare function createRunner({
5945
5942
  watchForChanges
5946
- }?: CreateRunnerOptions): EvalRunner; //#endregion
5943
+ }?: {
5944
+ watchForChanges?: boolean;
5945
+ }): EvalRunner; //#endregion
5947
5946
  //#region src/manualInput/files.d.ts
5948
5947
  type StageManualInputFileParams = {
5949
5948
  workspaceRoot: string;
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as getCurrentScope, B as deserializeCacheValue, F as evalSpan, G as readManualInputFile, H as serializeCacheValue, I as evalTracer, J as appendToEvalOutput, K as evalExpect, L as hashCacheKey, M as z, N as buildTraceTree, P as captureEvalSpanError, Q as evalTime, R as hashCacheKeySync, Rt as getEvalRegistry, U as repoFile, V as serializeCacheRecording, W as manualInputFileValueSchema, X as evalAssert, Z as evalLog, _t as extractLlmCalls, at as nextEvalId, ct as runInExistingEvalScope, dt as startEvalBackgroundJob, et as getEvalCaseInput, gt as extractApiCalls, ht as extractCacheHits, it as mergeEvalOutput, lt as setEvalOutput, mt as extractCacheEntries, nt as isInEvalScope, ot as runInEvalRuntimeScope, q as EvalAssertionError, st as runInEvalScope, tt as incrementEvalOutput, ut as setScopeCacheContext, vt as simulateLlmCallCost, xt as getNestedAttribute, yt as simulateTokenAllocation, z as deserializeCacheRecording } from "./runOrchestration-SPaHx-SC.mjs";
2
- import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-DbVfkr9T.mjs";
3
- import { n as matchesEvalTags, t as defineEval } from "./src-CANi3gpd.mjs";
1
+ import { $ as getCurrentScope, B as deserializeCacheValue, F as evalSpan, G as readManualInputFile, H as serializeCacheValue, I as evalTracer, J as appendToEvalOutput, K as evalExpect, L as hashCacheKey, Lt as getEvalRegistry, M as z, N as buildTraceTree, P as captureEvalSpanError, Q as evalTime, R as hashCacheKeySync, U as repoFile, V as serializeCacheRecording, W as manualInputFileValueSchema, X as evalAssert, Z as evalLog, _t as extractLlmCalls, at as nextEvalId, ct as runInExistingEvalScope, dt as startEvalBackgroundJob, et as getEvalCaseInput, gt as extractApiCalls, ht as extractCacheHits, it as mergeEvalOutput, lt as setEvalOutput, mt as extractCacheEntries, nt as isInEvalScope, ot as runInEvalRuntimeScope, q as EvalAssertionError, st as runInEvalScope, tt as incrementEvalOutput, ut as setScopeCacheContext, vt as simulateLlmCallCost, xt as getNestedAttribute, yt as simulateTokenAllocation, z as deserializeCacheRecording } from "./runOrchestration-OVUFw1fL.mjs";
2
+ import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-BeJCJMQo.mjs";
3
+ import { n as matchesEvalTags, t as defineEval } from "./src-D7_xKo7h.mjs";
4
4
  export { EvalAssertionError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
package/dist/runChild.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { At as buildEvalKey, D as loadConfig, Dt as runSummarySchema, Et as runManifestSchema, Ft as evalChartsConfigSchema, It as columnDefSchema, Nt as evalStatsConfigSchema, Pt as manualInputDescriptorSchema, T as parseEvalDiscovery, Y as configureEvalRunLogs, ft as createRunRequestSchema, h as persistRunState, j as createFsCacheStore, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-SPaHx-SC.mjs";
1
+ import { At as buildEvalKey, D as loadConfig, Dt as runSummarySchema, Et as runManifestSchema, Ft as columnDefSchema, Mt as evalStatsConfigSchema, Nt as manualInputDescriptorSchema, Pt as evalChartsConfigSchema, T as parseEvalDiscovery, Y as configureEvalRunLogs, ft as createRunRequestSchema, h as persistRunState, j as createFsCacheStore, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-OVUFw1fL.mjs";
2
2
  import { z } from "zod/v4";
3
3
  import { readFile } from "node:fs/promises";
4
4
  import { relative } from "node:path";
@@ -866,10 +866,6 @@ function buildCaseKey(params) {
866
866
  encodeURIComponent(params.caseId)
867
867
  ].join("#");
868
868
  }
869
- /** Return the collision-safe eval key stored on a row, falling back for legacy data. */
870
- function getCaseRowEvalKey(row) {
871
- return row.evalKey ?? row.evalId;
872
- }
873
869
  /** Return the collision-safe case key stored on a row, falling back for legacy data. */
874
870
  function getCaseRowCaseKey(row) {
875
871
  return row.caseKey ?? row.caseId;
@@ -6586,9 +6582,9 @@ function recomputePersistedCaseStatus(caseRow, caseDetail, scoreThresholds) {
6586
6582
  return caseRow.status === "error" ? "error" : "pass";
6587
6583
  }
6588
6584
  function runTouchesEval(params) {
6589
- if (params.caseRows.some((caseRow) => getCaseRowEvalKey(caseRow) === params.evalKey || caseRow.evalKey === void 0 && caseRow.evalId === params.evalId)) return true;
6585
+ if (params.caseRows.some((caseRow) => caseRow.evalKey === params.evalKey)) return true;
6590
6586
  if (params.target.mode === "all") return params.evalExists;
6591
- if (params.target.mode === "evalIds") return params.target.evalKeys?.includes(params.evalKey) ?? params.target.evalIds?.includes(params.evalId ?? params.evalKey) ?? false;
6587
+ if (params.target.mode === "evalIds") return params.target.evalKeys?.includes(params.evalKey) ?? false;
6592
6588
  return false;
6593
6589
  }
6594
6590
  async function deleteTemporaryRuns(params) {
@@ -6619,13 +6615,12 @@ async function recomputeEvalStatusesInRuns(params) {
6619
6615
  target: run.manifest.target,
6620
6616
  caseRows: run.cases,
6621
6617
  evalKey: params.evalKey,
6622
- evalId: params.evalId,
6623
6618
  evalExists: params.evalExists
6624
6619
  })) continue;
6625
6620
  if (run.manifest.status === "running") continue;
6626
6621
  let changed = false;
6627
6622
  for (const caseRow of run.cases) {
6628
- if (getCaseRowEvalKey(caseRow) !== params.evalKey && !(caseRow.evalKey === void 0 && caseRow.evalId === params.evalId)) continue;
6623
+ if (caseRow.evalKey !== params.evalKey) continue;
6629
6624
  const caseDetail = run.caseDetails.get(getCaseRowCaseKey(caseRow));
6630
6625
  const nextStatus = recomputePersistedCaseStatus(caseRow, caseDetail, params.scoreThresholds);
6631
6626
  if (caseRow.status === nextStatus) continue;
@@ -6708,15 +6703,14 @@ function getLastRunStatuses(params) {
6708
6703
  function getLatestRunInfos(params) {
6709
6704
  const { runs, knownEvals } = params;
6710
6705
  const knownEvalMetas = [...knownEvals];
6711
- const evalIdByKey = new Map(knownEvalMetas.map((evalMeta) => [evalMeta.key, evalMeta.id]));
6712
6706
  const manualScoreKeysByEval = new Map(knownEvalMetas.map((evalMeta) => [evalMeta.key, evalMeta.columnDefs.filter((columnDef) => columnDef.isManualScore === true).map((columnDef) => columnDef.key)]));
6713
6707
  const orderedRuns = [...runs].toSorted((a, b) => new Date(getRunFreshnessTimestamp(a.manifest)).getTime() - new Date(getRunFreshnessTimestamp(b.manifest)).getTime());
6714
6708
  const latestRunInfos = /* @__PURE__ */ new Map();
6715
6709
  for (const run of orderedRuns) for (const evalKey of getRunEvalKeys(run, knownEvalMetas)) latestRunInfos.set(evalKey, {
6716
- status: getEvalStatusForRun(run, evalKey, evalIdByKey.get(evalKey), manualScoreKeysByEval.get(evalKey) ?? []),
6710
+ status: getEvalStatusForRun(run, evalKey, manualScoreKeysByEval.get(evalKey) ?? []),
6717
6711
  startedAt: getRunFreshnessTimestamp(run.manifest),
6718
6712
  commitSha: run.manifest.commitSha ?? null,
6719
- evalSourceFingerprint: run.manifest.evalSourceFingerprints[evalKey] ?? run.manifest.evalSourceFingerprints[evalIdByKey.get(evalKey) ?? ""] ?? null
6713
+ evalSourceFingerprint: run.manifest.evalSourceFingerprints[evalKey] ?? null
6720
6714
  });
6721
6715
  return latestRunInfos;
6722
6716
  }
@@ -6775,19 +6769,14 @@ async function readCaseDetails(runDir) {
6775
6769
  }
6776
6770
  function getRunEvalKeys(run, knownEvals) {
6777
6771
  const knownEvalMetas = [...knownEvals];
6778
- const evalKeys = new Set(run.cases.map(getCaseRowEvalKey));
6779
- for (const caseRow of run.cases) {
6780
- if (caseRow.evalKey !== void 0) continue;
6781
- for (const evalMeta of knownEvalMetas) if (evalMeta.id === caseRow.evalId) evalKeys.add(evalMeta.key);
6782
- }
6783
- if (run.manifest.target.mode === "evalIds") {
6784
- for (const evalKey of run.manifest.target.evalKeys ?? []) evalKeys.add(evalKey);
6785
- for (const evalId of run.manifest.target.evalIds ?? []) for (const evalMeta of knownEvalMetas) if (evalMeta.id === evalId) evalKeys.add(evalMeta.key);
6786
- } else if (run.manifest.target.mode === "all" && evalKeys.size === 0) for (const evalMeta of knownEvalMetas) evalKeys.add(evalMeta.key);
6772
+ const evalKeys = /* @__PURE__ */ new Set();
6773
+ for (const caseRow of run.cases) if (caseRow.evalKey !== void 0) evalKeys.add(caseRow.evalKey);
6774
+ if (run.manifest.target.mode === "evalIds") for (const evalKey of run.manifest.target.evalKeys ?? []) evalKeys.add(evalKey);
6775
+ else if (run.manifest.target.mode === "all" && evalKeys.size === 0) for (const evalMeta of knownEvalMetas) evalKeys.add(evalMeta.key);
6787
6776
  return [...evalKeys];
6788
6777
  }
6789
- function getEvalStatusForRun(run, evalKey, evalId, manualScoreKeys) {
6790
- const evalCases = run.cases.filter((caseRow) => getCaseRowEvalKey(caseRow) === evalKey || caseRow.evalKey === void 0 && caseRow.evalId === evalId);
6778
+ function getEvalStatusForRun(run, evalKey, manualScoreKeys) {
6779
+ const evalCases = run.cases.filter((caseRow) => caseRow.evalKey === evalKey);
6791
6780
  if (evalCases.length > 0) {
6792
6781
  if (hasPendingManualScores(evalCases, manualScoreKeys)) return "unscored";
6793
6782
  return toLastRunStatus$1(deriveStatusFromCaseRows({ caseRows: evalCases }));
@@ -7809,4 +7798,4 @@ function toLastRunStatus(status) {
7809
7798
  return status === "pending" ? null : status;
7810
7799
  }
7811
7800
  //#endregion
7812
- export { getCurrentScope as $, validateCharts as A, buildEvalKey as At, deserializeCacheValue as B, deriveEvalFreshness as C, getEvalDisplayStatus as Ct, loadConfig as D, runSummarySchema as Dt, resolveEvalDefaultConfig as E, runManifestSchema as Et, evalSpan as F, evalChartsConfigSchema as Ft, readManualInputFile as G, serializeCacheValue as H, evalTracer as I, columnDefSchema as It, appendToEvalOutput as J, evalExpect as K, hashCacheKey as L, defineEval as Lt, z$1 as M, getCaseRowEvalKey as Mt, buildTraceTree as N, evalStatsConfigSchema as Nt, buildDeclaredColumnDefs as O, resolveApiCallsConfig as Ot, captureEvalSpanError as P, manualInputDescriptorSchema as Pt, evalTime as Q, hashCacheKeySync as R, getEvalRegistry as Rt, parseManualInputValues as S, getEvalTitle as St, parseEvalDiscovery as T, matchesTagsFilter as Tt, repoFile as U, serializeCacheRecording as V, manualInputFileValueSchema as W, evalAssert as X, configureEvalRunLogs as Y, evalLog as Z, recomputePersistedCaseStatus as _, extractLlmCalls as _t, validateTagsFilters as a, nextEvalId as at, resolveArtifactPath as b, applyDerivedCallAttributes as bt, getLastRunStatuses as c, runInExistingEvalScope as ct, loadPersistedRunSnapshots as d, startEvalBackgroundJob as dt, getEvalCaseInput as et, nextShortIdFromSnapshots as f, createRunRequestSchema as ft, recomputeEvalStatusesInRuns as g, extractApiCalls as gt, persistRunState as h, extractCacheHits as ht, resolveEvalTags as i, mergeEvalOutput as it, createFsCacheStore as j, getCaseRowCaseKey as jt, normalizeScoreDef as k, resolveLlmCallsConfig as kt, getLatestRunInfos as l, setEvalOutput as lt, deleteTemporaryRuns as m, extractCacheEntries as mt, getTargetEvalKeys as n, isInEvalScope as nt, stripTerminalControlCodes as o, runInEvalRuntimeScope as ot, persistCaseDetail as p, updateManualScoreRequestSchema as pt, EvalAssertionError as q, getTargetEvals as r, matchesEvalTags as rt, generateRunId as s, runInEvalScope as st, executeRun as t, incrementEvalOutput as tt, loadPersistedRunSnapshot as u, setScopeCacheContext as ut, runTouchesEval as v, simulateLlmCallCost as vt, loadEvalModule as w, deriveScopedSummaryFromCases as wt, buildManualInputDescriptor as x, getNestedAttribute as xt, resolveTracePresentation as y, simulateTokenAllocation as yt, deserializeCacheRecording as z };
7801
+ export { getCurrentScope as $, validateCharts as A, buildEvalKey as At, deserializeCacheValue as B, deriveEvalFreshness as C, getEvalDisplayStatus as Ct, loadConfig as D, runSummarySchema as Dt, resolveEvalDefaultConfig as E, runManifestSchema as Et, evalSpan as F, columnDefSchema as Ft, readManualInputFile as G, serializeCacheValue as H, evalTracer as I, defineEval as It, appendToEvalOutput as J, evalExpect as K, hashCacheKey as L, getEvalRegistry as Lt, z$1 as M, evalStatsConfigSchema as Mt, buildTraceTree as N, manualInputDescriptorSchema as Nt, buildDeclaredColumnDefs as O, resolveApiCallsConfig as Ot, captureEvalSpanError as P, evalChartsConfigSchema as Pt, evalTime as Q, hashCacheKeySync as R, parseManualInputValues as S, getEvalTitle as St, parseEvalDiscovery as T, matchesTagsFilter as Tt, repoFile as U, serializeCacheRecording as V, manualInputFileValueSchema as W, evalAssert as X, configureEvalRunLogs as Y, evalLog as Z, recomputePersistedCaseStatus as _, extractLlmCalls as _t, validateTagsFilters as a, nextEvalId as at, resolveArtifactPath as b, applyDerivedCallAttributes as bt, getLastRunStatuses as c, runInExistingEvalScope as ct, loadPersistedRunSnapshots as d, startEvalBackgroundJob as dt, getEvalCaseInput as et, nextShortIdFromSnapshots as f, createRunRequestSchema as ft, recomputeEvalStatusesInRuns as g, extractApiCalls as gt, persistRunState as h, extractCacheHits as ht, resolveEvalTags as i, mergeEvalOutput as it, createFsCacheStore as j, getCaseRowCaseKey as jt, normalizeScoreDef as k, resolveLlmCallsConfig as kt, getLatestRunInfos as l, setEvalOutput as lt, deleteTemporaryRuns as m, extractCacheEntries as mt, getTargetEvalKeys as n, isInEvalScope as nt, stripTerminalControlCodes as o, runInEvalRuntimeScope as ot, persistCaseDetail as p, updateManualScoreRequestSchema as pt, EvalAssertionError as q, getTargetEvals as r, matchesEvalTags as rt, generateRunId as s, runInEvalScope as st, executeRun as t, incrementEvalOutput as tt, loadPersistedRunSnapshot as u, setScopeCacheContext as ut, runTouchesEval as v, simulateLlmCallCost as vt, loadEvalModule as w, deriveScopedSummaryFromCases as wt, buildManualInputDescriptor as x, getNestedAttribute as xt, resolveTracePresentation as y, simulateTokenAllocation as yt, deserializeCacheRecording as z };