@ls-stack/agent-eval 0.42.1 → 0.42.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,7 +25,7 @@
25
25
  href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
26
26
  rel="stylesheet"
27
27
  />
28
- <script type="module" crossorigin src="/assets/index-CANDLTsq.js"></script>
28
+ <script type="module" crossorigin src="/assets/index-DH0qYD76.js"></script>
29
29
  <link rel="stylesheet" crossorigin href="/assets/index-eFM9VIsz.css">
30
30
  </head>
31
31
  <body>
package/dist/bin.mjs CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { t as runCli } from "./cli-DbVfkr9T.mjs";
2
+ import { t as runCli } from "./cli-BeJCJMQo.mjs";
3
3
  import { spawn } from "node:child_process";
4
4
  //#region src/bin.ts
5
5
  const moduleMocksFlag = "--experimental-test-module-mocks";
@@ -1,4 +1,4 @@
1
- import { A as validateCharts, At as buildEvalKey, C as deriveEvalFreshness, Ct as getEvalDisplayStatus, D as loadConfig, Dt as runSummarySchema, E as resolveEvalDefaultConfig, Mt as getCaseRowEvalKey, O as buildDeclaredColumnDefs, Ot as resolveApiCallsConfig, Rt as getEvalRegistry, S as parseManualInputValues, St as getEvalTitle, T as parseEvalDiscovery, Tt as matchesTagsFilter, _ as recomputePersistedCaseStatus, a as validateTagsFilters, b as resolveArtifactPath, bt as applyDerivedCallAttributes, c as getLastRunStatuses, d as loadPersistedRunSnapshots, f as nextShortIdFromSnapshots, g as recomputeEvalStatusesInRuns, h as persistRunState, i as resolveEvalTags, j as createFsCacheStore, jt as getCaseRowCaseKey, k as normalizeScoreDef, kt as resolveLlmCallsConfig, l as getLatestRunInfos, m as deleteTemporaryRuns, n as getTargetEvalKeys, o as stripTerminalControlCodes, p as persistCaseDetail, s as generateRunId, u as loadPersistedRunSnapshot, v as runTouchesEval, w as loadEvalModule, wt as deriveScopedSummaryFromCases, x as buildManualInputDescriptor, y as resolveTracePresentation } from "./runOrchestration-SPaHx-SC.mjs";
1
+ import { A as validateCharts, At as buildEvalKey, C as deriveEvalFreshness, Ct as getEvalDisplayStatus, D as loadConfig, Dt as runSummarySchema, E as resolveEvalDefaultConfig, Lt as getEvalRegistry, O as buildDeclaredColumnDefs, Ot as resolveApiCallsConfig, S as parseManualInputValues, St as getEvalTitle, T as parseEvalDiscovery, Tt as matchesTagsFilter, _ as recomputePersistedCaseStatus, a as validateTagsFilters, b as resolveArtifactPath, bt as applyDerivedCallAttributes, c as getLastRunStatuses, d as loadPersistedRunSnapshots, f as nextShortIdFromSnapshots, g as recomputeEvalStatusesInRuns, h as persistRunState, i as resolveEvalTags, j as createFsCacheStore, jt as getCaseRowCaseKey, k as normalizeScoreDef, kt as resolveLlmCallsConfig, l as getLatestRunInfos, m as deleteTemporaryRuns, n as getTargetEvalKeys, o as stripTerminalControlCodes, p as persistCaseDetail, s as generateRunId, u as loadPersistedRunSnapshot, v as runTouchesEval, w as loadEvalModule, wt as deriveScopedSummaryFromCases, x as buildManualInputDescriptor, y as resolveTracePresentation } from "./runOrchestration-OVUFw1fL.mjs";
2
2
  import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
3
3
  import { basename, dirname, extname, isAbsolute, join, relative, resolve, sep } from "node:path";
4
4
  import { createHash, randomUUID } from "node:crypto";
@@ -476,6 +476,11 @@ function validateManualInputsForRequest(params) {
476
476
  };
477
477
  }
478
478
  //#endregion
479
+ //#region ../runner/src/objectUtils.ts
480
+ function isRecord(value) {
481
+ return typeof value === "object" && value !== null && !Array.isArray(value);
482
+ }
483
+ //#endregion
479
484
  //#region ../runner/src/recalculateDerivedAttributes.ts
480
485
  function getCaseArtifactFileIdForExistingRun(runState, caseRow) {
481
486
  const caseKey = getCaseRowCaseKey(caseRow);
@@ -505,7 +510,7 @@ async function recalculateDerivedAttributesForCase(params) {
505
510
  });
506
511
  let nextTrace = spansWithDerivedAttributes;
507
512
  let nextTraceDisplay = caseDetail.traceDisplay;
508
- const evalMeta = params.evals.get(getCaseRowEvalKey(caseRow));
513
+ const evalMeta = caseRow.evalKey === void 0 ? void 0 : params.evals.get(caseRow.evalKey);
509
514
  const entry = evalMeta === void 0 ? void 0 : getEvalRegistry().get(evalMeta.id);
510
515
  if (entry !== void 0) entry.use((evalDef) => {
511
516
  const resolved = resolveTracePresentation(spansWithDerivedAttributes, params.traceDisplayConfig, evalDef.traceDisplay);
@@ -787,6 +792,36 @@ async function markRunTerminalFromChild(runState, event, managerContext) {
787
792
  managerContext.emitDiscoveryEvent();
788
793
  }
789
794
  //#endregion
795
+ //#region ../runner/src/runnerStateHydration.ts
796
+ /** Rehydrate a persisted run while preserving live listeners/process handles. */
797
+ function toRunnerRunState(snapshot, existing) {
798
+ return {
799
+ ...snapshot,
800
+ listeners: existing?.listeners ?? /* @__PURE__ */ new Set(),
801
+ childProcess: existing?.childProcess,
802
+ childTerminalReceived: existing?.childTerminalReceived ?? false
803
+ };
804
+ }
805
+ //#endregion
806
+ //#region ../runner/src/runTargetPersistence.ts
807
+ /** Build the exact-key run target persisted in run history. */
808
+ function buildPersistedRunTarget(params) {
809
+ const { target, evalKeys } = params;
810
+ if (target.mode === "all") return { mode: "all" };
811
+ const persistEvalKeys = (target.evalKeys?.length ?? 0) > 0 || (target.evalIds?.length ?? 0) > 0 || (target.files?.length ?? 0) > 0;
812
+ const keyedTarget = {
813
+ mode: target.mode,
814
+ evalKeys: persistEvalKeys && evalKeys.length > 0 ? evalKeys : void 0,
815
+ files: target.files,
816
+ tagsFilter: target.tagsFilter
817
+ };
818
+ if (target.mode === "caseIds") return {
819
+ ...keyedTarget,
820
+ caseIds: target.caseIds
821
+ };
822
+ return keyedTarget;
823
+ }
824
+ //#endregion
790
825
  //#region ../runner/src/watchRoots.ts
791
826
  const globMagicCharacters = new Set([
792
827
  "*",
@@ -823,9 +858,6 @@ function getWatchRootsForIncludePatterns(params) {
823
858
  }
824
859
  //#endregion
825
860
  //#region ../runner/src/runner.ts
826
- function isRecord(value) {
827
- return typeof value === "object" && value !== null && !Array.isArray(value);
828
- }
829
861
  /** Create an in-memory eval runner bound to the current workspace config. */
830
862
  function createRunner({ watchForChanges = true } = {}) {
831
863
  let config;
@@ -898,7 +930,6 @@ function createRunner({ watchForChanges = true } = {}) {
898
930
  const updatedRuns = await recomputeEvalStatusesInRuns({
899
931
  runs: runs.values(),
900
932
  evalKey: evalMeta.key,
901
- evalId: evalMeta.id,
902
933
  evalExists: evals.has(evalMeta.key),
903
934
  scoreThresholds,
904
935
  persistCaseDetail
@@ -930,7 +961,6 @@ function createRunner({ watchForChanges = true } = {}) {
930
961
  target: run.manifest.target,
931
962
  caseRows: run.cases,
932
963
  evalKey: evalMeta?.key ?? evalKey,
933
- evalId: evalMeta?.id,
934
964
  evalExists: evalMeta !== void 0
935
965
  })) continue;
936
966
  if (run.manifest.status === "running") continue;
@@ -959,7 +989,7 @@ function createRunner({ watchForChanges = true } = {}) {
959
989
  updated: false,
960
990
  reason: "Case not found"
961
991
  };
962
- const evalMeta = evals.get(getCaseRowEvalKey(caseRow));
992
+ const evalMeta = caseRow.evalKey === void 0 ? void 0 : evals.get(caseRow.evalKey);
963
993
  if (!evalMeta) return {
964
994
  updated: false,
965
995
  reason: "Eval not found"
@@ -1170,6 +1200,10 @@ function createRunner({ watchForChanges = true } = {}) {
1170
1200
  const cacheMode = request.cache?.mode ?? "use";
1171
1201
  const runDir = join(localStateDir, "runs", runId);
1172
1202
  const gitState = readGitWorktreeState(workspaceRoot);
1203
+ const targetEvalKeys = getTargetEvalKeys({
1204
+ request,
1205
+ sortedEvals: getSortedEvalMetas()
1206
+ });
1173
1207
  const manifest = {
1174
1208
  id: runId,
1175
1209
  shortId,
@@ -1179,7 +1213,10 @@ function createRunner({ watchForChanges = true } = {}) {
1179
1213
  endedAt: null,
1180
1214
  commitSha: gitState.commitSha,
1181
1215
  evalSourceFingerprints: {},
1182
- target: request.target,
1216
+ target: buildPersistedRunTarget({
1217
+ target: request.target,
1218
+ evalKeys: targetEvalKeys
1219
+ }),
1183
1220
  trials: request.trials,
1184
1221
  trialSelection: config.trialSelection ?? "lowestScore",
1185
1222
  cacheMode
@@ -1224,10 +1261,7 @@ function createRunner({ watchForChanges = true } = {}) {
1224
1261
  runs.set(runId, runState);
1225
1262
  setLatestRunInfoMap({
1226
1263
  latestRunInfoMap,
1227
- evalIds: getTargetEvalKeys({
1228
- request: materializedRequest,
1229
- sortedEvals: getSortedEvalMetas()
1230
- }),
1264
+ evalIds: targetEvalKeys,
1231
1265
  info: {
1232
1266
  status: "running",
1233
1267
  startedAt: now,
@@ -1478,14 +1512,6 @@ function createRunner({ watchForChanges = true } = {}) {
1478
1512
  nextShortIdNum = Math.max(nextShortIdNum, nextShortIdFromSnapshots(persistedRuns));
1479
1513
  if (changed) emitDiscoveryEvent();
1480
1514
  }
1481
- function toRunnerRunState(snapshot, existing) {
1482
- return {
1483
- ...snapshot,
1484
- listeners: existing?.listeners ?? /* @__PURE__ */ new Set(),
1485
- childProcess: existing?.childProcess,
1486
- childTerminalReceived: existing?.childTerminalReceived ?? false
1487
- };
1488
- }
1489
1515
  return runner;
1490
1516
  }
1491
1517
  //#endregion
@@ -2046,8 +2072,8 @@ async function commandApp(args) {
2046
2072
  const { serve } = await import("@hono/node-server");
2047
2073
  const bundledWebDist = resolve(currentDir, "apps/web/dist");
2048
2074
  if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
2049
- const appModule = await import("./app-mOYjX9zq.mjs");
2050
- const runnerModule = await import("./runner-BYOdLBle.mjs");
2075
+ const appModule = await import("./app-DPamBr5R.mjs");
2076
+ const runnerModule = await import("./runner-BJQq7cpd.mjs");
2051
2077
  if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
2052
2078
  if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
2053
2079
  await runnerModule.initRunner();