@ls-stack/agent-eval 0.60.2 → 0.60.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  import { Et as getCaseRowCaseKey, Ot as caseRowSchema, dt as getEvalTitle, nt as updateManualScoreRequestSchema, rt as extractCacheEntries, tt as createRunRequestSchema } from "./runExecution-Bq0Y3y_1.mjs";
2
- import { o as stageManualInputFile } from "./cli-CbePEEua.mjs";
3
- import "./src-CVM_FqPx.mjs";
4
- import { t as getRunnerInstance } from "./runner-Kp0JqxrU.mjs";
2
+ import { o as stageManualInputFile } from "./cli-BSVUCUxr.mjs";
3
+ import "./src-D5vGo2iv.mjs";
4
+ import { t as getRunnerInstance } from "./runner-DmkSq-QG.mjs";
5
5
  import { z } from "zod";
6
6
  import { resultify } from "t-result";
7
7
  import { readFile } from "node:fs/promises";
package/dist/bin.mjs CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { t as runCli } from "./cli-CbePEEua.mjs";
2
+ import { t as runCli } from "./cli-BSVUCUxr.mjs";
3
3
  import { spawn } from "node:child_process";
4
4
  //#region src/bin.ts
5
5
  const moduleMocksFlag = "--experimental-test-module-mocks";
@@ -1,5 +1,5 @@
1
1
  import { Ct as resolveLlmCallsConfig, Et as getCaseRowCaseKey, Ft as getEvalRegistry, Ot as caseRowSchema, St as resolveApiCallsConfig, Tt as buildEvalKey, _t as matchesTagsFilter, c as resolveArtifactPath, dt as getEvalTitle, f as resolveEvalDefaultConfig, ft as getEvalDisplayStatus, h as normalizeScoreDef, lt as applyDerivedCallAttributes, m as buildDeclaredColumnDefs, o as stripTerminalControlCodes, p as loadConfig, pt as deriveScopedSummaryFromCases, s as resolveTracePresentation, v as createFsCacheStore, xt as runSummarySchema, y as getCacheRetentionOptions } from "./runExecution-Bq0Y3y_1.mjs";
2
- import { C as validateCharts, S as parseEvalDiscovery, _ as runTouchesEval, a as validateTagsFilters, b as deriveEvalFreshness, c as getLatestRunInfos, d as nextShortIdFromSnapshots, f as persistCaseDetail, g as recomputePersistedCaseStatus, h as recomputeEvalStatusesInRuns, i as resolveEvalTags, l as loadPersistedRunSnapshot, m as persistRunState, n as getTargetEvalKeys, o as generateRunId, p as deleteTemporaryRuns, s as getLastRunStatuses, u as loadPersistedRunSnapshots, v as buildManualInputDescriptor, x as loadIsolatedEvalRegistry, y as parseManualInputValues } from "./runOrchestration-BpwW0AmB.mjs";
2
+ import { C as parseEvalDiscovery, S as loadIsolatedEvalRegistry, _ as recomputePersistedCaseStatus, a as validateTagsFilters, b as parseManualInputValues, c as getLatestRunInfos, d as loadPersistedRunSnapshots, f as nextShortIdFromSnapshots, g as recomputeEvalStatusesInRuns, h as persistRunState, i as resolveEvalTags, l as loadPersistedCaseDetail, m as deleteTemporaryRuns, n as getTargetEvalKeys, o as generateRunId, p as persistCaseDetail, s as getLastRunStatuses, u as loadPersistedRunSnapshot, v as runTouchesEval, w as validateCharts, x as deriveEvalFreshness, y as buildManualInputDescriptor } from "./runOrchestration-C7qQISz2.mjs";
3
3
  import { parseEnv } from "node:util";
4
4
  import { resultify } from "t-result";
5
5
  import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
@@ -28,10 +28,14 @@ function resolveCaseDetailLookup(run, caseId) {
28
28
  const caseDetail = run.caseDetails.get(lookupId);
29
29
  if (caseDetail) return caseDetail;
30
30
  }
31
- const matchingCaseRow = run.cases.find((caseRow) => lookupIds.has(getCaseRowCaseKey(caseRow)) || lookupIds.has(caseRow.caseId));
31
+ const matchingCaseRow = resolveCaseRowForCaseDetailLookup(run, caseId);
32
32
  if (matchingCaseRow === void 0) return void 0;
33
33
  return run.caseDetails.get(getCaseRowCaseKey(matchingCaseRow));
34
34
  }
35
+ function resolveCaseRowForCaseDetailLookup(run, caseId) {
36
+ const lookupIds = new Set(getCaseLookupIds(caseId));
37
+ return run.cases.find((caseRow) => lookupIds.has(getCaseRowCaseKey(caseRow)) || lookupIds.has(caseRow.caseId));
38
+ }
35
39
  //#endregion
36
40
  //#region ../runner/src/configReload.ts
37
41
  /** Coordinates idle-only reloads for workspace config and `.env` in app mode. */
@@ -823,7 +827,7 @@ async function markRunTerminalFromChild(runState, event, managerContext) {
823
827
  runState.manifest = snapshot.manifest;
824
828
  runState.summary = snapshot.summary;
825
829
  runState.cases = snapshot.cases;
826
- runState.caseDetails = snapshot.caseDetails;
830
+ runState.caseDetails = /* @__PURE__ */ new Map();
827
831
  } else if (event.type === "run.finished") {
828
832
  runState.manifest.status = "completed";
829
833
  runState.manifest.endedAt = (/* @__PURE__ */ new Date()).toISOString();
@@ -995,6 +999,26 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
995
999
  if (typeof config.concurrency !== "number" || !Number.isFinite(config.concurrency)) return 1;
996
1000
  return Math.max(1, Math.floor(config.concurrency));
997
1001
  }
1002
+ function getCaseDetailFileId(run, caseRow) {
1003
+ const caseKey = getCaseRowCaseKey(caseRow);
1004
+ return run.cases.some((existing) => existing.caseId === caseRow.caseId && getCaseRowCaseKey(existing) !== caseKey) ? caseKey : caseRow.caseId;
1005
+ }
1006
+ function hydrateCaseDetailForRow(run, caseRow) {
1007
+ const caseKey = getCaseRowCaseKey(caseRow);
1008
+ const cached = run.caseDetails.get(caseKey);
1009
+ if (cached !== void 0) return cached;
1010
+ const detail = loadPersistedCaseDetail(run.runDir, getCaseDetailFileId(run, caseRow));
1011
+ if (detail === null) return void 0;
1012
+ run.caseDetails.set(detail.caseKey ?? detail.caseId, detail);
1013
+ return detail;
1014
+ }
1015
+ function hydrateCaseDetailForLookup(run, caseId) {
1016
+ const cached = resolveCaseDetailLookup(run, caseId);
1017
+ if (cached !== void 0) return cached;
1018
+ const caseRow = resolveCaseRowForCaseDetailLookup(run, caseId);
1019
+ if (caseRow === void 0) return void 0;
1020
+ return hydrateCaseDetailForRow(run, caseRow);
1021
+ }
998
1022
  function nextRegistryLoadIsolationKey(prefix, filePath) {
999
1023
  registryLoadCounter++;
1000
1024
  return `${prefix}:${String(registryLoadCounter)}:${filePath}`;
@@ -1041,6 +1065,7 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
1041
1065
  evalKey: evalMeta.key,
1042
1066
  evalExists: evals.has(evalMeta.key),
1043
1067
  scoreThresholds,
1068
+ getCaseDetail: hydrateCaseDetailForRow,
1044
1069
  persistCaseDetail
1045
1070
  });
1046
1071
  emitDiscoveryEvent();
@@ -1052,6 +1077,7 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
1052
1077
  updated: false,
1053
1078
  reason: "Run not found"
1054
1079
  };
1080
+ hydrateCaseDetailForLookup(run, caseId);
1055
1081
  return recalculateDerivedAttributesForCase({
1056
1082
  run,
1057
1083
  caseId,
@@ -1107,7 +1133,7 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
1107
1133
  updated: false,
1108
1134
  reason: "Manual score not found"
1109
1135
  };
1110
- const caseDetail = run.caseDetails.get(getCaseRowCaseKey(caseRow));
1136
+ const caseDetail = hydrateCaseDetailForRow(run, caseRow);
1111
1137
  if (!caseDetail) return {
1112
1138
  updated: false,
1113
1139
  reason: "Case detail not found"
@@ -1480,7 +1506,7 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
1480
1506
  getCaseDetail(runId, caseId) {
1481
1507
  const run = runs.get(runId);
1482
1508
  if (!run) return void 0;
1483
- return resolveCaseDetailLookup(run, caseId);
1509
+ return hydrateCaseDetailForLookup(run, caseId);
1484
1510
  },
1485
1511
  subscribe(runId, listener) {
1486
1512
  const run = runs.get(runId);
@@ -2232,8 +2258,8 @@ async function commandApp(args) {
2232
2258
  const { serve } = await import("@hono/node-server");
2233
2259
  const bundledWebDist = resolve(currentDir, "apps/web/dist");
2234
2260
  if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
2235
- const appModule = await import("./app-DPCFFkyQ.mjs");
2236
- const runnerModule = await import("./runner-XEP21_u9.mjs");
2261
+ const appModule = await import("./app-l3ynaNsb.mjs");
2262
+ const runnerModule = await import("./runner-C9xNJHt3.mjs");
2237
2263
  if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
2238
2264
  if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
2239
2265
  await runnerModule.initRunner({ loadEnv: args.loadEnv });
package/dist/index.d.mts CHANGED
@@ -1942,7 +1942,6 @@ declare const columnFormatSchema: z.ZodEnum<{
1942
1942
  number: "number";
1943
1943
  boolean: "boolean";
1944
1944
  file: "file";
1945
- duration: "duration";
1946
1945
  markdown: "markdown";
1947
1946
  json: "json";
1948
1947
  image: "image";
@@ -1950,6 +1949,7 @@ declare const columnFormatSchema: z.ZodEnum<{
1950
1949
  pdf: "pdf";
1951
1950
  audio: "audio";
1952
1951
  video: "video";
1952
+ duration: "duration";
1953
1953
  percent: "percent";
1954
1954
  passFail: "passFail";
1955
1955
  stars: "stars";
@@ -1969,7 +1969,6 @@ declare const columnDefSchema: z.ZodObject<{
1969
1969
  number: "number";
1970
1970
  boolean: "boolean";
1971
1971
  file: "file";
1972
- duration: "duration";
1973
1972
  markdown: "markdown";
1974
1973
  json: "json";
1975
1974
  image: "image";
@@ -1977,6 +1976,7 @@ declare const columnDefSchema: z.ZodObject<{
1977
1976
  pdf: "pdf";
1978
1977
  audio: "audio";
1979
1978
  video: "video";
1979
+ duration: "duration";
1980
1980
  percent: "percent";
1981
1981
  passFail: "passFail";
1982
1982
  stars: "stars";
@@ -2022,8 +2022,8 @@ type CellValue = z.infer<typeof cellValueSchema>; //#endregion
2022
2022
  declare const traceAttributeDisplayFormatSchema: z.ZodEnum<{
2023
2023
  string: "string";
2024
2024
  number: "number";
2025
- duration: "duration";
2026
2025
  json: "json";
2026
+ duration: "duration";
2027
2027
  }>;
2028
2028
  /**
2029
2029
  * Formatting hint for trace attribute values rendered by the UI.
@@ -2047,8 +2047,8 @@ declare const traceAttributeDisplaySchema: z.ZodObject<{
2047
2047
  format: z.ZodOptional<z.ZodEnum<{
2048
2048
  string: "string";
2049
2049
  number: "number";
2050
- duration: "duration";
2051
2050
  json: "json";
2051
+ duration: "duration";
2052
2052
  }>>;
2053
2053
  numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
2054
2054
  placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
@@ -2083,8 +2083,8 @@ declare const traceDisplayConfigSchema: z.ZodObject<{
2083
2083
  format: z.ZodOptional<z.ZodEnum<{
2084
2084
  string: "string";
2085
2085
  number: "number";
2086
- duration: "duration";
2087
2086
  json: "json";
2087
+ duration: "duration";
2088
2088
  }>>;
2089
2089
  numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
2090
2090
  placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
@@ -2123,8 +2123,8 @@ declare const traceAttributeDisplayInputSchema: z.ZodObject<{
2123
2123
  format: z.ZodOptional<z.ZodEnum<{
2124
2124
  string: "string";
2125
2125
  number: "number";
2126
- duration: "duration";
2127
2126
  json: "json";
2127
+ duration: "duration";
2128
2128
  }>>;
2129
2129
  numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
2130
2130
  placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
@@ -2161,8 +2161,8 @@ declare const traceDisplayInputConfigSchema: z.ZodObject<{
2161
2161
  format: z.ZodOptional<z.ZodEnum<{
2162
2162
  string: "string";
2163
2163
  number: "number";
2164
- duration: "duration";
2165
2164
  json: "json";
2165
+ duration: "duration";
2166
2166
  }>>;
2167
2167
  numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
2168
2168
  placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
@@ -2327,7 +2327,6 @@ declare const evalStatItemSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
2327
2327
  number: "number";
2328
2328
  boolean: "boolean";
2329
2329
  file: "file";
2330
- duration: "duration";
2331
2330
  markdown: "markdown";
2332
2331
  json: "json";
2333
2332
  image: "image";
@@ -2335,6 +2334,7 @@ declare const evalStatItemSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
2335
2334
  pdf: "pdf";
2336
2335
  audio: "audio";
2337
2336
  video: "video";
2337
+ duration: "duration";
2338
2338
  percent: "percent";
2339
2339
  passFail: "passFail";
2340
2340
  stars: "stars";
@@ -2391,7 +2391,6 @@ declare const evalStatsConfigSchema: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodOb
2391
2391
  number: "number";
2392
2392
  boolean: "boolean";
2393
2393
  file: "file";
2394
- duration: "duration";
2395
2394
  markdown: "markdown";
2396
2395
  json: "json";
2397
2396
  image: "image";
@@ -2399,6 +2398,7 @@ declare const evalStatsConfigSchema: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodOb
2399
2398
  pdf: "pdf";
2400
2399
  audio: "audio";
2401
2400
  video: "video";
2401
+ duration: "duration";
2402
2402
  percent: "percent";
2403
2403
  passFail: "passFail";
2404
2404
  stars: "stars";
@@ -2437,7 +2437,6 @@ declare const evalSummarySchema$1: z.ZodObject<{
2437
2437
  number: "number";
2438
2438
  boolean: "boolean";
2439
2439
  file: "file";
2440
- duration: "duration";
2441
2440
  markdown: "markdown";
2442
2441
  json: "json";
2443
2442
  image: "image";
@@ -2445,6 +2444,7 @@ declare const evalSummarySchema$1: z.ZodObject<{
2445
2444
  pdf: "pdf";
2446
2445
  audio: "audio";
2447
2446
  video: "video";
2447
+ duration: "duration";
2448
2448
  percent: "percent";
2449
2449
  passFail: "passFail";
2450
2450
  stars: "stars";
@@ -2518,7 +2518,6 @@ declare const evalSummarySchema$1: z.ZodObject<{
2518
2518
  number: "number";
2519
2519
  boolean: "boolean";
2520
2520
  file: "file";
2521
- duration: "duration";
2522
2521
  markdown: "markdown";
2523
2522
  json: "json";
2524
2523
  image: "image";
@@ -2526,6 +2525,7 @@ declare const evalSummarySchema$1: z.ZodObject<{
2526
2525
  pdf: "pdf";
2527
2526
  audio: "audio";
2528
2527
  video: "video";
2528
+ duration: "duration";
2529
2529
  percent: "percent";
2530
2530
  passFail: "passFail";
2531
2531
  stars: "stars";
@@ -2559,8 +2559,8 @@ declare const evalSummarySchema$1: z.ZodObject<{
2559
2559
  label: z.ZodOptional<z.ZodString>;
2560
2560
  color: z.ZodOptional<z.ZodEnum<{
2561
2561
  success: "success";
2562
- accent: "accent";
2563
2562
  error: "error";
2563
+ accent: "accent";
2564
2564
  accentDim: "accentDim";
2565
2565
  warning: "warning";
2566
2566
  textMuted: "textMuted";
@@ -2583,8 +2583,8 @@ declare const evalSummarySchema$1: z.ZodObject<{
2583
2583
  label: z.ZodOptional<z.ZodString>;
2584
2584
  color: z.ZodOptional<z.ZodEnum<{
2585
2585
  success: "success";
2586
- accent: "accent";
2587
2586
  error: "error";
2587
+ accent: "accent";
2588
2588
  accentDim: "accentDim";
2589
2589
  warning: "warning";
2590
2590
  textMuted: "textMuted";
@@ -2749,7 +2749,6 @@ declare const caseRowSchema$1: z.ZodObject<{
2749
2749
  number: "number";
2750
2750
  boolean: "boolean";
2751
2751
  file: "file";
2752
- duration: "duration";
2753
2752
  markdown: "markdown";
2754
2753
  json: "json";
2755
2754
  image: "image";
@@ -2757,6 +2756,7 @@ declare const caseRowSchema$1: z.ZodObject<{
2757
2756
  pdf: "pdf";
2758
2757
  audio: "audio";
2759
2758
  video: "video";
2759
+ duration: "duration";
2760
2760
  percent: "percent";
2761
2761
  passFail: "passFail";
2762
2762
  stars: "stars";
@@ -2894,8 +2894,8 @@ declare const scoreTraceSchema: z.ZodObject<{
2894
2894
  format: z.ZodOptional<z.ZodEnum<{
2895
2895
  string: "string";
2896
2896
  number: "number";
2897
- duration: "duration";
2898
2897
  json: "json";
2898
+ duration: "duration";
2899
2899
  }>>;
2900
2900
  numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
2901
2901
  placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
@@ -2997,8 +2997,8 @@ declare const caseDetailSchema$1: z.ZodObject<{
2997
2997
  format: z.ZodOptional<z.ZodEnum<{
2998
2998
  string: "string";
2999
2999
  number: "number";
3000
- duration: "duration";
3001
3000
  json: "json";
3001
+ duration: "duration";
3002
3002
  }>>;
3003
3003
  numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
3004
3004
  placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
@@ -3066,8 +3066,8 @@ declare const caseDetailSchema$1: z.ZodObject<{
3066
3066
  format: z.ZodOptional<z.ZodEnum<{
3067
3067
  string: "string";
3068
3068
  number: "number";
3069
- duration: "duration";
3070
3069
  json: "json";
3070
+ duration: "duration";
3071
3071
  }>>;
3072
3072
  numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
3073
3073
  placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
@@ -3127,7 +3127,6 @@ declare const caseDetailSchema$1: z.ZodObject<{
3127
3127
  number: "number";
3128
3128
  boolean: "boolean";
3129
3129
  file: "file";
3130
- duration: "duration";
3131
3130
  markdown: "markdown";
3132
3131
  json: "json";
3133
3132
  image: "image";
@@ -3135,6 +3134,7 @@ declare const caseDetailSchema$1: z.ZodObject<{
3135
3134
  pdf: "pdf";
3136
3135
  audio: "audio";
3137
3136
  video: "video";
3137
+ duration: "duration";
3138
3138
  percent: "percent";
3139
3139
  passFail: "passFail";
3140
3140
  stars: "stars";
@@ -3284,8 +3284,8 @@ type EvalChartAggregate = z.infer<typeof evalChartAggregateSchema>;
3284
3284
  */
3285
3285
  declare const evalChartColorSchema: z.ZodEnum<{
3286
3286
  success: "success";
3287
- accent: "accent";
3288
3287
  error: "error";
3288
+ accent: "accent";
3289
3289
  accentDim: "accentDim";
3290
3290
  warning: "warning";
3291
3291
  textMuted: "textMuted";
@@ -3313,8 +3313,8 @@ declare const evalChartMetricSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
3313
3313
  label: z.ZodOptional<z.ZodString>;
3314
3314
  color: z.ZodOptional<z.ZodEnum<{
3315
3315
  success: "success";
3316
- accent: "accent";
3317
3316
  error: "error";
3317
+ accent: "accent";
3318
3318
  accentDim: "accentDim";
3319
3319
  warning: "warning";
3320
3320
  textMuted: "textMuted";
@@ -3337,8 +3337,8 @@ declare const evalChartMetricSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
3337
3337
  label: z.ZodOptional<z.ZodString>;
3338
3338
  color: z.ZodOptional<z.ZodEnum<{
3339
3339
  success: "success";
3340
- accent: "accent";
3341
3340
  error: "error";
3341
+ accent: "accent";
3342
3342
  accentDim: "accentDim";
3343
3343
  warning: "warning";
3344
3344
  textMuted: "textMuted";
@@ -3396,8 +3396,8 @@ declare const evalChartConfigSchema: z.ZodObject<{
3396
3396
  label: z.ZodOptional<z.ZodString>;
3397
3397
  color: z.ZodOptional<z.ZodEnum<{
3398
3398
  success: "success";
3399
- accent: "accent";
3400
3399
  error: "error";
3400
+ accent: "accent";
3401
3401
  accentDim: "accentDim";
3402
3402
  warning: "warning";
3403
3403
  textMuted: "textMuted";
@@ -3420,8 +3420,8 @@ declare const evalChartConfigSchema: z.ZodObject<{
3420
3420
  label: z.ZodOptional<z.ZodString>;
3421
3421
  color: z.ZodOptional<z.ZodEnum<{
3422
3422
  success: "success";
3423
- accent: "accent";
3424
3423
  error: "error";
3424
+ accent: "accent";
3425
3425
  accentDim: "accentDim";
3426
3426
  warning: "warning";
3427
3427
  textMuted: "textMuted";
@@ -3486,8 +3486,8 @@ declare const evalChartsConfigSchema: z.ZodArray<z.ZodObject<{
3486
3486
  label: z.ZodOptional<z.ZodString>;
3487
3487
  color: z.ZodOptional<z.ZodEnum<{
3488
3488
  success: "success";
3489
- accent: "accent";
3490
3489
  error: "error";
3490
+ accent: "accent";
3491
3491
  accentDim: "accentDim";
3492
3492
  warning: "warning";
3493
3493
  textMuted: "textMuted";
@@ -3510,8 +3510,8 @@ declare const evalChartsConfigSchema: z.ZodArray<z.ZodObject<{
3510
3510
  label: z.ZodOptional<z.ZodString>;
3511
3511
  color: z.ZodOptional<z.ZodEnum<{
3512
3512
  success: "success";
3513
- accent: "accent";
3514
3513
  error: "error";
3514
+ accent: "accent";
3515
3515
  accentDim: "accentDim";
3516
3516
  warning: "warning";
3517
3517
  textMuted: "textMuted";
@@ -3808,8 +3808,8 @@ declare const llmCallMetricFormatSchema$1: z.ZodEnum<{
3808
3808
  string: "string";
3809
3809
  number: "number";
3810
3810
  boolean: "boolean";
3811
- duration: "duration";
3812
3811
  json: "json";
3812
+ duration: "duration";
3813
3813
  }>;
3814
3814
  /** Render format applied to an LLM-call metric value. */
3815
3815
  type LlmCallMetricFormat = z.infer<typeof llmCallMetricFormatSchema$1>;
@@ -3818,8 +3818,8 @@ declare const apiCallMetricFormatSchema$1: z.ZodEnum<{
3818
3818
  string: "string";
3819
3819
  number: "number";
3820
3820
  boolean: "boolean";
3821
- duration: "duration";
3822
3821
  json: "json";
3822
+ duration: "duration";
3823
3823
  }>;
3824
3824
  /** Render format applied to an API-call metric value. */
3825
3825
  type ApiCallMetricFormat = z.infer<typeof apiCallMetricFormatSchema$1>;
@@ -3888,8 +3888,8 @@ declare const llmCallMetricSchema: z.ZodObject<{
3888
3888
  string: "string";
3889
3889
  number: "number";
3890
3890
  boolean: "boolean";
3891
- duration: "duration";
3892
3891
  json: "json";
3892
+ duration: "duration";
3893
3893
  }>>;
3894
3894
  numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
3895
3895
  placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
@@ -3917,8 +3917,8 @@ declare const apiCallMetricSchema: z.ZodObject<{
3917
3917
  string: "string";
3918
3918
  number: "number";
3919
3919
  boolean: "boolean";
3920
- duration: "duration";
3921
3920
  json: "json";
3921
+ duration: "duration";
3922
3922
  }>>;
3923
3923
  numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
3924
3924
  placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
@@ -4031,8 +4031,8 @@ declare const llmCallsConfigSchema: z.ZodObject<{
4031
4031
  string: "string";
4032
4032
  number: "number";
4033
4033
  boolean: "boolean";
4034
- duration: "duration";
4035
4034
  json: "json";
4035
+ duration: "duration";
4036
4036
  }>>;
4037
4037
  numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
4038
4038
  placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
@@ -4067,8 +4067,8 @@ declare const apiCallsConfigSchema: z.ZodObject<{
4067
4067
  string: "string";
4068
4068
  number: "number";
4069
4069
  boolean: "boolean";
4070
- duration: "duration";
4071
4070
  json: "json";
4071
+ duration: "duration";
4072
4072
  }>>;
4073
4073
  numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
4074
4074
  placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
@@ -4670,7 +4670,6 @@ declare const cacheRecordingOpSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
4670
4670
  number: "number";
4671
4671
  boolean: "boolean";
4672
4672
  file: "file";
4673
- duration: "duration";
4674
4673
  markdown: "markdown";
4675
4674
  json: "json";
4676
4675
  image: "image";
@@ -4678,6 +4677,7 @@ declare const cacheRecordingOpSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
4678
4677
  pdf: "pdf";
4679
4678
  audio: "audio";
4680
4679
  video: "video";
4680
+ duration: "duration";
4681
4681
  percent: "percent";
4682
4682
  passFail: "passFail";
4683
4683
  stars: "stars";
@@ -4758,7 +4758,6 @@ declare const cacheRecordingSchema: z.ZodObject<{
4758
4758
  number: "number";
4759
4759
  boolean: "boolean";
4760
4760
  file: "file";
4761
- duration: "duration";
4762
4761
  markdown: "markdown";
4763
4762
  json: "json";
4764
4763
  image: "image";
@@ -4766,6 +4765,7 @@ declare const cacheRecordingSchema: z.ZodObject<{
4766
4765
  pdf: "pdf";
4767
4766
  audio: "audio";
4768
4767
  video: "video";
4768
+ duration: "duration";
4769
4769
  percent: "percent";
4770
4770
  passFail: "passFail";
4771
4771
  stars: "stars";
@@ -4859,7 +4859,6 @@ declare const cacheEntrySchema: z.ZodObject<{
4859
4859
  number: "number";
4860
4860
  boolean: "boolean";
4861
4861
  file: "file";
4862
- duration: "duration";
4863
4862
  markdown: "markdown";
4864
4863
  json: "json";
4865
4864
  image: "image";
@@ -4867,6 +4866,7 @@ declare const cacheEntrySchema: z.ZodObject<{
4867
4866
  pdf: "pdf";
4868
4867
  audio: "audio";
4869
4868
  video: "video";
4869
+ duration: "duration";
4870
4870
  percent: "percent";
4871
4871
  passFail: "passFail";
4872
4872
  stars: "stars";
@@ -4977,7 +4977,6 @@ declare const cacheDebugKeyEntrySchema: z.ZodObject<{
4977
4977
  number: "number";
4978
4978
  boolean: "boolean";
4979
4979
  file: "file";
4980
- duration: "duration";
4981
4980
  markdown: "markdown";
4982
4981
  json: "json";
4983
4982
  image: "image";
@@ -4985,6 +4984,7 @@ declare const cacheDebugKeyEntrySchema: z.ZodObject<{
4985
4984
  pdf: "pdf";
4986
4985
  audio: "audio";
4987
4986
  video: "video";
4987
+ duration: "duration";
4988
4988
  percent: "percent";
4989
4989
  passFail: "passFail";
4990
4990
  stars: "stars";
@@ -5084,7 +5084,6 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
5084
5084
  number: "number";
5085
5085
  boolean: "boolean";
5086
5086
  file: "file";
5087
- duration: "duration";
5088
5087
  markdown: "markdown";
5089
5088
  json: "json";
5090
5089
  image: "image";
@@ -5092,6 +5091,7 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
5092
5091
  pdf: "pdf";
5093
5092
  audio: "audio";
5094
5093
  video: "video";
5094
+ duration: "duration";
5095
5095
  percent: "percent";
5096
5096
  passFail: "passFail";
5097
5097
  stars: "stars";
@@ -5193,7 +5193,6 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
5193
5193
  number: "number";
5194
5194
  boolean: "boolean";
5195
5195
  file: "file";
5196
- duration: "duration";
5197
5196
  markdown: "markdown";
5198
5197
  json: "json";
5199
5198
  image: "image";
@@ -5201,6 +5200,7 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
5201
5200
  pdf: "pdf";
5202
5201
  audio: "audio";
5203
5202
  video: "video";
5203
+ duration: "duration";
5204
5204
  percent: "percent";
5205
5205
  passFail: "passFail";
5206
5206
  stars: "stars";
@@ -5300,7 +5300,6 @@ declare const cacheFileSchema: z.ZodObject<{
5300
5300
  number: "number";
5301
5301
  boolean: "boolean";
5302
5302
  file: "file";
5303
- duration: "duration";
5304
5303
  markdown: "markdown";
5305
5304
  json: "json";
5306
5305
  image: "image";
@@ -5308,6 +5307,7 @@ declare const cacheFileSchema: z.ZodObject<{
5308
5307
  pdf: "pdf";
5309
5308
  audio: "audio";
5310
5309
  video: "video";
5310
+ duration: "duration";
5311
5311
  percent: "percent";
5312
5312
  passFail: "passFail";
5313
5313
  stars: "stars";
@@ -5417,7 +5417,6 @@ declare const cacheDebugKeyFileSchema: z.ZodObject<{
5417
5417
  number: "number";
5418
5418
  boolean: "boolean";
5419
5419
  file: "file";
5420
- duration: "duration";
5421
5420
  markdown: "markdown";
5422
5421
  json: "json";
5423
5422
  image: "image";
@@ -5425,6 +5424,7 @@ declare const cacheDebugKeyFileSchema: z.ZodObject<{
5425
5424
  pdf: "pdf";
5426
5425
  audio: "audio";
5427
5426
  video: "video";
5427
+ duration: "duration";
5428
5428
  percent: "percent";
5429
5429
  passFail: "passFail";
5430
5430
  stars: "stars";
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
1
  import { $ as setScopeCacheContext, A as repoFile, B as evalTime, C as evalTracer, D as deserializeCacheValue, E as deserializeCacheRecording, F as EvalRuntimeUsageError, Ft as getEvalRegistry, H as getEvalCaseInput, I as appendToEvalOutput, J as runInEvalRuntimeScope, K as mergeEvalOutput, M as readManualInputFile, N as evalExpect, O as serializeCacheRecording, P as EvalAssertionError, Q as setEvalOutput, R as evalAssert, S as evalSpan, T as hashCacheKeySync, U as incrementEvalOutput, V as getCurrentScope, W as isInEvalScope, X as runInExistingEvalScope, Y as runInEvalScope, at as extractApiCalls, b as buildTraceTree, ct as simulateTokenAllocation, et as startEvalBackgroundJob, it as extractCacheHits, j as manualInputFileValueSchema, k as serializeCacheValue, ot as extractLlmCalls, q as nextEvalId, rt as extractCacheEntries, st as simulateLlmCallCost, ut as getNestedAttribute, w as hashCacheKey, x as captureEvalSpanError, z as evalLog } from "./runExecution-Bq0Y3y_1.mjs";
2
- import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-CbePEEua.mjs";
3
- import { n as matchesEvalTags, t as defineEval } from "./src-CVM_FqPx.mjs";
2
+ import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-BSVUCUxr.mjs";
3
+ import { n as matchesEvalTags, t as defineEval } from "./src-D5vGo2iv.mjs";
4
4
  export { EvalAssertionError, EvalRuntimeUsageError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob };
package/dist/runChild.mjs CHANGED
@@ -1,5 +1,5 @@
1
1
  import { At as evalStatsConfigSchema, L as configureEvalRunLogs, Mt as evalChartsConfigSchema, Nt as columnDefSchema, Tt as buildEvalKey, bt as runManifestSchema, jt as manualInputDescriptorSchema, kt as evalStatAggregateSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, tt as createRunRequestSchema, v as createFsCacheStore, xt as runSummarySchema, y as getCacheRetentionOptions } from "./runExecution-Bq0Y3y_1.mjs";
2
- import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-BpwW0AmB.mjs";
2
+ import { C as parseEvalDiscovery, h as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-C7qQISz2.mjs";
3
3
  import { z } from "zod";
4
4
  import { readFile } from "node:fs/promises";
5
5
  import { relative } from "node:path";
@@ -2,7 +2,7 @@ import { Dt as caseDetailSchema, Et as getCaseRowCaseKey, It as runWithEvalRegis
2
2
  import { Result, resultify } from "t-result";
3
3
  import { readFile, readdir, rm, writeFile } from "node:fs/promises";
4
4
  import { dirname, join } from "node:path";
5
- import { existsSync } from "node:fs";
5
+ import { existsSync, readFileSync } from "node:fs";
6
6
  import { fileURLToPath } from "node:url";
7
7
  import { spawn } from "node:child_process";
8
8
  //#region ../runner/src/chartValidation.ts
@@ -670,7 +670,7 @@ async function recomputeEvalStatusesInRuns(params) {
670
670
  let changed = false;
671
671
  for (const caseRow of run.cases) {
672
672
  if (caseRow.evalKey !== params.evalKey) continue;
673
- const caseDetail = run.caseDetails.get(getCaseRowCaseKey(caseRow));
673
+ const caseDetail = params.getCaseDetail?.(run, caseRow) ?? run.caseDetails.get(getCaseRowCaseKey(caseRow));
674
674
  const nextStatus = recomputePersistedCaseStatus(caseRow, caseDetail, params.scoreThresholds);
675
675
  if (caseRow.status === nextStatus) continue;
676
676
  caseRow.status = nextStatus;
@@ -725,14 +725,22 @@ function nextShortIdFromSnapshots(snapshots) {
725
725
  }
726
726
  return maxNum + 1;
727
727
  }
728
- async function loadPersistedRunSnapshots(localStateDir) {
728
+ /**
729
+ * Load persisted run metadata from the local state directory.
730
+ *
731
+ * Case details are skipped by default so long-running app processes can keep
732
+ * run history in memory without retaining every trace payload. Pass
733
+ * `includeCaseDetails` only for narrow maintenance flows that need full
734
+ * details for every case.
735
+ */
736
+ async function loadPersistedRunSnapshots(localStateDir, options = {}) {
729
737
  const runsDir = join(localStateDir, "runs");
730
738
  const entriesResult = await resultify(() => readdir(runsDir, { withFileTypes: true }));
731
739
  if (entriesResult.error) return [];
732
740
  const snapshots = [];
733
741
  const runDirs = entriesResult.value.filter((entry) => entry.isDirectory()).map((entry) => join(runsDir, entry.name)).toSorted();
734
742
  for (const runDir of runDirs) {
735
- const snapshot = await loadPersistedRunSnapshot(runDir);
743
+ const snapshot = await loadPersistedRunSnapshot(runDir, options);
736
744
  if (!snapshot) continue;
737
745
  snapshots.push(snapshot);
738
746
  }
@@ -766,7 +774,14 @@ function getLatestRunInfos(params) {
766
774
  function toLastRunStatus$1(status) {
767
775
  return status === "pending" ? null : status;
768
776
  }
769
- async function loadPersistedRunSnapshot(runDir) {
777
+ /**
778
+ * Load one persisted run snapshot from disk.
779
+ *
780
+ * The returned snapshot includes manifest, summary, and case rows. Case
781
+ * details are loaded only when `includeCaseDetails` is true; otherwise callers
782
+ * should use `loadPersistedCaseDetail` for the specific case being inspected.
783
+ */
784
+ async function loadPersistedRunSnapshot(runDir, options = {}) {
770
785
  const manifest = await readParsedJsonFile(join(runDir, "run.json"), { safeParse: runManifestSchema.safeParse.bind(runManifestSchema) });
771
786
  if (!manifest) return null;
772
787
  const summary = await readParsedJsonFile(join(runDir, "summary.json"), { safeParse: runSummarySchema.safeParse.bind(runSummarySchema) });
@@ -776,9 +791,18 @@ async function loadPersistedRunSnapshot(runDir) {
776
791
  manifest,
777
792
  summary,
778
793
  cases: await readCaseRows(runDir),
779
- caseDetails: await readCaseDetails(runDir)
794
+ caseDetails: options.includeCaseDetails === true ? await readCaseDetails(runDir) : /* @__PURE__ */ new Map()
780
795
  };
781
796
  }
797
+ /**
798
+ * Load one persisted case detail by its artifact file id.
799
+ *
800
+ * Returns `null` when the file is missing, invalid JSON, or no longer matches
801
+ * the current case-detail schema.
802
+ */
803
+ function loadPersistedCaseDetail(runDir, fileId) {
804
+ return readParsedJsonFileSync(join(runDir, "case-details", `${encodeCaseDetailFileName(fileId)}.json`), { safeParse: caseDetailSchema.safeParse.bind(caseDetailSchema) });
805
+ }
782
806
  async function readParsedJsonFile(filePath, schema) {
783
807
  const fileResult = await resultify(() => readFile(filePath, "utf-8"));
784
808
  if (fileResult.error) return null;
@@ -788,6 +812,15 @@ async function readParsedJsonFile(filePath, schema) {
788
812
  if (!parsed.success) return null;
789
813
  return parsed.data;
790
814
  }
815
+ function readParsedJsonFileSync(filePath, schema) {
816
+ const fileResult = resultify(() => readFileSync(filePath, "utf-8"));
817
+ if (fileResult.error) return null;
818
+ const jsonResult = resultify(() => JSON.parse(fileResult.value));
819
+ if (jsonResult.error) return null;
820
+ const parsed = schema.safeParse(jsonResult.value);
821
+ if (!parsed.success) return null;
822
+ return parsed.data;
823
+ }
791
824
  async function readCaseRows(runDir) {
792
825
  const fileResult = await resultify(() => readFile(join(runDir, "cases.jsonl"), "utf-8"));
793
826
  if (fileResult.error) return [];
@@ -1660,4 +1693,4 @@ function toLastRunStatus(status) {
1660
1693
  return status === "pending" ? null : status;
1661
1694
  }
1662
1695
  //#endregion
1663
- export { validateCharts as C, parseEvalDiscovery as S, runTouchesEval as _, validateTagsFilters as a, deriveEvalFreshness as b, getLatestRunInfos as c, nextShortIdFromSnapshots as d, persistCaseDetail as f, recomputePersistedCaseStatus as g, recomputeEvalStatusesInRuns as h, resolveEvalTags as i, loadPersistedRunSnapshot as l, persistRunState as m, getTargetEvalKeys as n, generateRunId as o, deleteTemporaryRuns as p, getTargetEvals as r, getLastRunStatuses as s, executeRun as t, loadPersistedRunSnapshots as u, buildManualInputDescriptor as v, loadIsolatedEvalRegistry as x, parseManualInputValues as y };
1696
+ export { parseEvalDiscovery as C, loadIsolatedEvalRegistry as S, recomputePersistedCaseStatus as _, validateTagsFilters as a, parseManualInputValues as b, getLatestRunInfos as c, loadPersistedRunSnapshots as d, nextShortIdFromSnapshots as f, recomputeEvalStatusesInRuns as g, persistRunState as h, resolveEvalTags as i, loadPersistedCaseDetail as l, deleteTemporaryRuns as m, getTargetEvalKeys as n, generateRunId as o, persistCaseDetail as p, getTargetEvals as r, getLastRunStatuses as s, executeRun as t, loadPersistedRunSnapshot as u, runTouchesEval as v, validateCharts as w, deriveEvalFreshness as x, buildManualInputDescriptor as y };
@@ -1,2 +1,2 @@
1
- import { n as initRunner, t as getRunnerInstance } from "./runner-Kp0JqxrU.mjs";
1
+ import { n as initRunner, t as getRunnerInstance } from "./runner-DmkSq-QG.mjs";
2
2
  export { getRunnerInstance, initRunner };
@@ -1,5 +1,5 @@
1
- import { n as createRunner } from "./cli-CbePEEua.mjs";
2
- import "./src-CVM_FqPx.mjs";
1
+ import { n as createRunner } from "./cli-BSVUCUxr.mjs";
2
+ import "./src-D5vGo2iv.mjs";
3
3
  //#region ../../apps/server/src/runner.ts
4
4
  let runnerInstance = null;
5
5
  function getRunnerInstance({ loadEnv = true } = {}) {
@@ -1,5 +1,5 @@
1
1
  import { G as matchesEvalTags$1, Pt as defineEval$1 } from "./runExecution-Bq0Y3y_1.mjs";
2
- import "./cli-CbePEEua.mjs";
2
+ import "./cli-BSVUCUxr.mjs";
3
3
  //#region src/index.ts
4
4
  /** Register an eval definition with typed tag support. */
5
5
  function defineEval(definition) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ls-stack/agent-eval",
3
- "version": "0.60.2",
3
+ "version": "0.60.3",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "agent-evals": "./dist/bin.mjs"