@ls-stack/agent-eval 0.60.1 → 0.60.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
- import { Et as getCaseRowCaseKey, Ot as caseRowSchema, dt as getEvalTitle, nt as updateManualScoreRequestSchema, rt as extractCacheEntries, tt as createRunRequestSchema } from "./runExecution-CjWJUUZ5.mjs";
2
- import { o as stageManualInputFile } from "./cli-CVBSlTD8.mjs";
3
- import "./src-DjOTPnDz.mjs";
4
- import { t as getRunnerInstance } from "./runner-DzDRasWV.mjs";
1
+ import { Et as getCaseRowCaseKey, Ot as caseRowSchema, dt as getEvalTitle, nt as updateManualScoreRequestSchema, rt as extractCacheEntries, tt as createRunRequestSchema } from "./runExecution-Bq0Y3y_1.mjs";
2
+ import { o as stageManualInputFile } from "./cli-BSVUCUxr.mjs";
3
+ import "./src-D5vGo2iv.mjs";
4
+ import { t as getRunnerInstance } from "./runner-DmkSq-QG.mjs";
5
5
  import { z } from "zod";
6
6
  import { resultify } from "t-result";
7
7
  import { readFile } from "node:fs/promises";
package/dist/bin.mjs CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { t as runCli } from "./cli-CVBSlTD8.mjs";
2
+ import { t as runCli } from "./cli-BSVUCUxr.mjs";
3
3
  import { spawn } from "node:child_process";
4
4
  //#region src/bin.ts
5
5
  const moduleMocksFlag = "--experimental-test-module-mocks";
@@ -1,4 +1,4 @@
1
- import { Ct as resolveLlmCallsConfig, It as runWithEvalRegistry, J as runInEvalRuntimeScope, L as configureEvalRunLogs, St as resolveApiCallsConfig, _ as createBufferedCacheStore, a as isCaseChildParentMessage, d as loadEvalModule, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, r as runCase, v as createFsCacheStore, y as getCacheRetentionOptions } from "./runExecution-CjWJUUZ5.mjs";
1
+ import { Ct as resolveLlmCallsConfig, It as runWithEvalRegistry, J as runInEvalRuntimeScope, L as configureEvalRunLogs, St as resolveApiCallsConfig, _ as createBufferedCacheStore, a as isCaseChildParentMessage, d as loadEvalModule, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, r as runCase, v as createFsCacheStore, y as getCacheRetentionOptions } from "./runExecution-Bq0Y3y_1.mjs";
2
2
  //#region ../runner/src/caseChild.ts
3
3
  let fatalErrorReported = false;
4
4
  let disconnectExpected = false;
@@ -1,5 +1,5 @@
1
- import { Ct as resolveLlmCallsConfig, Et as getCaseRowCaseKey, Ft as getEvalRegistry, Ot as caseRowSchema, St as resolveApiCallsConfig, Tt as buildEvalKey, _t as matchesTagsFilter, c as resolveArtifactPath, dt as getEvalTitle, f as resolveEvalDefaultConfig, ft as getEvalDisplayStatus, h as normalizeScoreDef, lt as applyDerivedCallAttributes, m as buildDeclaredColumnDefs, o as stripTerminalControlCodes, p as loadConfig, pt as deriveScopedSummaryFromCases, s as resolveTracePresentation, v as createFsCacheStore, xt as runSummarySchema, y as getCacheRetentionOptions } from "./runExecution-CjWJUUZ5.mjs";
2
- import { C as validateCharts, S as parseEvalDiscovery, _ as runTouchesEval, a as validateTagsFilters, b as deriveEvalFreshness, c as getLatestRunInfos, d as nextShortIdFromSnapshots, f as persistCaseDetail, g as recomputePersistedCaseStatus, h as recomputeEvalStatusesInRuns, i as resolveEvalTags, l as loadPersistedRunSnapshot, m as persistRunState, n as getTargetEvalKeys, o as generateRunId, p as deleteTemporaryRuns, s as getLastRunStatuses, u as loadPersistedRunSnapshots, v as buildManualInputDescriptor, x as loadIsolatedEvalRegistry, y as parseManualInputValues } from "./runOrchestration-DE2TFAS6.mjs";
1
+ import { Ct as resolveLlmCallsConfig, Et as getCaseRowCaseKey, Ft as getEvalRegistry, Ot as caseRowSchema, St as resolveApiCallsConfig, Tt as buildEvalKey, _t as matchesTagsFilter, c as resolveArtifactPath, dt as getEvalTitle, f as resolveEvalDefaultConfig, ft as getEvalDisplayStatus, h as normalizeScoreDef, lt as applyDerivedCallAttributes, m as buildDeclaredColumnDefs, o as stripTerminalControlCodes, p as loadConfig, pt as deriveScopedSummaryFromCases, s as resolveTracePresentation, v as createFsCacheStore, xt as runSummarySchema, y as getCacheRetentionOptions } from "./runExecution-Bq0Y3y_1.mjs";
2
+ import { C as parseEvalDiscovery, S as loadIsolatedEvalRegistry, _ as recomputePersistedCaseStatus, a as validateTagsFilters, b as parseManualInputValues, c as getLatestRunInfos, d as loadPersistedRunSnapshots, f as nextShortIdFromSnapshots, g as recomputeEvalStatusesInRuns, h as persistRunState, i as resolveEvalTags, l as loadPersistedCaseDetail, m as deleteTemporaryRuns, n as getTargetEvalKeys, o as generateRunId, p as persistCaseDetail, s as getLastRunStatuses, u as loadPersistedRunSnapshot, v as runTouchesEval, w as validateCharts, x as deriveEvalFreshness, y as buildManualInputDescriptor } from "./runOrchestration-C7qQISz2.mjs";
3
3
  import { parseEnv } from "node:util";
4
4
  import { resultify } from "t-result";
5
5
  import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
@@ -28,10 +28,14 @@ function resolveCaseDetailLookup(run, caseId) {
28
28
  const caseDetail = run.caseDetails.get(lookupId);
29
29
  if (caseDetail) return caseDetail;
30
30
  }
31
- const matchingCaseRow = run.cases.find((caseRow) => lookupIds.has(getCaseRowCaseKey(caseRow)) || lookupIds.has(caseRow.caseId));
31
+ const matchingCaseRow = resolveCaseRowForCaseDetailLookup(run, caseId);
32
32
  if (matchingCaseRow === void 0) return void 0;
33
33
  return run.caseDetails.get(getCaseRowCaseKey(matchingCaseRow));
34
34
  }
35
+ function resolveCaseRowForCaseDetailLookup(run, caseId) {
36
+ const lookupIds = new Set(getCaseLookupIds(caseId));
37
+ return run.cases.find((caseRow) => lookupIds.has(getCaseRowCaseKey(caseRow)) || lookupIds.has(caseRow.caseId));
38
+ }
35
39
  //#endregion
36
40
  //#region ../runner/src/configReload.ts
37
41
  /** Coordinates idle-only reloads for workspace config and `.env` in app mode. */
@@ -823,7 +827,7 @@ async function markRunTerminalFromChild(runState, event, managerContext) {
823
827
  runState.manifest = snapshot.manifest;
824
828
  runState.summary = snapshot.summary;
825
829
  runState.cases = snapshot.cases;
826
- runState.caseDetails = snapshot.caseDetails;
830
+ runState.caseDetails = /* @__PURE__ */ new Map();
827
831
  } else if (event.type === "run.finished") {
828
832
  runState.manifest.status = "completed";
829
833
  runState.manifest.endedAt = (/* @__PURE__ */ new Date()).toISOString();
@@ -995,6 +999,26 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
995
999
  if (typeof config.concurrency !== "number" || !Number.isFinite(config.concurrency)) return 1;
996
1000
  return Math.max(1, Math.floor(config.concurrency));
997
1001
  }
1002
+ function getCaseDetailFileId(run, caseRow) {
1003
+ const caseKey = getCaseRowCaseKey(caseRow);
1004
+ return run.cases.some((existing) => existing.caseId === caseRow.caseId && getCaseRowCaseKey(existing) !== caseKey) ? caseKey : caseRow.caseId;
1005
+ }
1006
+ function hydrateCaseDetailForRow(run, caseRow) {
1007
+ const caseKey = getCaseRowCaseKey(caseRow);
1008
+ const cached = run.caseDetails.get(caseKey);
1009
+ if (cached !== void 0) return cached;
1010
+ const detail = loadPersistedCaseDetail(run.runDir, getCaseDetailFileId(run, caseRow));
1011
+ if (detail === null) return void 0;
1012
+ run.caseDetails.set(detail.caseKey ?? detail.caseId, detail);
1013
+ return detail;
1014
+ }
1015
+ function hydrateCaseDetailForLookup(run, caseId) {
1016
+ const cached = resolveCaseDetailLookup(run, caseId);
1017
+ if (cached !== void 0) return cached;
1018
+ const caseRow = resolveCaseRowForCaseDetailLookup(run, caseId);
1019
+ if (caseRow === void 0) return void 0;
1020
+ return hydrateCaseDetailForRow(run, caseRow);
1021
+ }
998
1022
  function nextRegistryLoadIsolationKey(prefix, filePath) {
999
1023
  registryLoadCounter++;
1000
1024
  return `${prefix}:${String(registryLoadCounter)}:${filePath}`;
@@ -1041,6 +1065,7 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
1041
1065
  evalKey: evalMeta.key,
1042
1066
  evalExists: evals.has(evalMeta.key),
1043
1067
  scoreThresholds,
1068
+ getCaseDetail: hydrateCaseDetailForRow,
1044
1069
  persistCaseDetail
1045
1070
  });
1046
1071
  emitDiscoveryEvent();
@@ -1052,6 +1077,7 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
1052
1077
  updated: false,
1053
1078
  reason: "Run not found"
1054
1079
  };
1080
+ hydrateCaseDetailForLookup(run, caseId);
1055
1081
  return recalculateDerivedAttributesForCase({
1056
1082
  run,
1057
1083
  caseId,
@@ -1107,7 +1133,7 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
1107
1133
  updated: false,
1108
1134
  reason: "Manual score not found"
1109
1135
  };
1110
- const caseDetail = run.caseDetails.get(getCaseRowCaseKey(caseRow));
1136
+ const caseDetail = hydrateCaseDetailForRow(run, caseRow);
1111
1137
  if (!caseDetail) return {
1112
1138
  updated: false,
1113
1139
  reason: "Case detail not found"
@@ -1480,7 +1506,7 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
1480
1506
  getCaseDetail(runId, caseId) {
1481
1507
  const run = runs.get(runId);
1482
1508
  if (!run) return void 0;
1483
- return resolveCaseDetailLookup(run, caseId);
1509
+ return hydrateCaseDetailForLookup(run, caseId);
1484
1510
  },
1485
1511
  subscribe(runId, listener) {
1486
1512
  const run = runs.get(runId);
@@ -2232,8 +2258,8 @@ async function commandApp(args) {
2232
2258
  const { serve } = await import("@hono/node-server");
2233
2259
  const bundledWebDist = resolve(currentDir, "apps/web/dist");
2234
2260
  if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
2235
- const appModule = await import("./app-CxKEVlng.mjs");
2236
- const runnerModule = await import("./runner-Cu1CQPTB.mjs");
2261
+ const appModule = await import("./app-l3ynaNsb.mjs");
2262
+ const runnerModule = await import("./runner-C9xNJHt3.mjs");
2237
2263
  if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
2238
2264
  if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
2239
2265
  await runnerModule.initRunner({ loadEnv: args.loadEnv });