@ls-stack/agent-eval 0.14.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import { A as getEvalDisplayStatus, F as runSummarySchema, J as resolveLlmCallsConfig, _ as loadEvalModule, a as loadPersistedRunSnapshot, b as normalizeScoreDef, c as persistCaseDetail, d as recomputePersistedCaseStatus, f as runTouchesEval, fn as getEvalRegistry, g as setLatestRunInfoMap, h as getTargetEvalIds, i as getLatestRunInfos, j as deriveScopedSummaryFromCases, k as getEvalTitle, l as persistRunState, m as buildEvalSummary, n as generateRunId, o as loadPersistedRunSnapshots, p as resolveArtifactPath, q as resolveApiCallsConfig, r as getLastRunStatuses, s as nextShortIdFromSnapshots, u as recomputeEvalStatusesInRuns, v as loadConfig, x as createFsCacheStore, y as buildDeclaredColumnDefs } from "./runOrchestration-H0pSUl3I.mjs";
1
+ import { A as getEvalTitle, I as runSummarySchema, J as resolveApiCallsConfig, M as deriveScopedSummaryFromCases, S as createFsCacheStore, Y as resolveLlmCallsConfig, _ as loadEvalModule, a as loadPersistedRunSnapshot, b as buildDeclaredColumnDefs, c as persistCaseDetail, d as recomputePersistedCaseStatus, f as runTouchesEval, g as setLatestRunInfoMap, h as getTargetEvalIds, hn as getEvalRegistry, i as getLatestRunInfos, j as getEvalDisplayStatus, l as persistRunState, m as buildEvalSummary, n as generateRunId, o as loadPersistedRunSnapshots, p as resolveArtifactPath, r as getLastRunStatuses, s as nextShortIdFromSnapshots, u as recomputeEvalStatusesInRuns, v as parseEvalMetas, x as normalizeScoreDef, y as loadConfig } from "./runOrchestration-BDyNrRQT.mjs";
2
2
  import { createHash } from "node:crypto";
3
3
  import { mkdir, readFile, rm, writeFile } from "node:fs/promises";
4
4
  import { dirname, join, relative, resolve } from "node:path";
@@ -82,98 +82,6 @@ function validateCharts(params) {
82
82
  };
83
83
  }
84
84
  //#endregion
85
- //#region ../runner/src/discovery.ts
86
- const evalIdMatchRegex = /\bid\s*:\s*['"]([^'"]+)['"]/;
87
- const evalTitleMatchRegex = /\btitle\s*:\s*['"]([^'"]+)['"]/;
88
- function parseEvalMetas(filePath, content) {
89
- const metas = [];
90
- let searchIndex = 0;
91
- while (searchIndex < content.length) {
92
- const defineEvalIndex = content.indexOf("defineEval", searchIndex);
93
- if (defineEvalIndex === -1) break;
94
- const extracted = extractDefineEvalObject(content, defineEvalIndex);
95
- if (!extracted) {
96
- searchIndex = defineEvalIndex + 10;
97
- continue;
98
- }
99
- const id = evalIdMatchRegex.exec(extracted.objectText)?.[1];
100
- if (id !== void 0) {
101
- const result = {
102
- filePath,
103
- id
104
- };
105
- const title = evalTitleMatchRegex.exec(extracted.objectText)?.[1];
106
- if (title !== void 0) result.title = title;
107
- metas.push(result);
108
- }
109
- searchIndex = extracted.nextIndex;
110
- }
111
- return metas;
112
- }
113
- function extractDefineEvalObject(content, defineEvalIndex) {
114
- const openParenIndex = content.indexOf("(", defineEvalIndex);
115
- if (openParenIndex === -1) return void 0;
116
- const objectStartIndex = content.indexOf("{", openParenIndex);
117
- if (objectStartIndex === -1) return void 0;
118
- let depth = 0;
119
- let quote;
120
- let inBlockComment = false;
121
- let inLineComment = false;
122
- let isEscaped = false;
123
- for (let index = objectStartIndex; index < content.length; index++) {
124
- const currentChar = content[index];
125
- const nextChar = content[index + 1];
126
- if (inLineComment) {
127
- if (currentChar === "\n") inLineComment = false;
128
- continue;
129
- }
130
- if (inBlockComment) {
131
- if (currentChar === "*" && nextChar === "/") {
132
- inBlockComment = false;
133
- index++;
134
- }
135
- continue;
136
- }
137
- if (quote) {
138
- if (isEscaped) {
139
- isEscaped = false;
140
- continue;
141
- }
142
- if (currentChar === "\\") {
143
- isEscaped = true;
144
- continue;
145
- }
146
- if (currentChar === quote) quote = void 0;
147
- continue;
148
- }
149
- if (currentChar === "/" && nextChar === "/") {
150
- inLineComment = true;
151
- index++;
152
- continue;
153
- }
154
- if (currentChar === "/" && nextChar === "*") {
155
- inBlockComment = true;
156
- index++;
157
- continue;
158
- }
159
- if (currentChar === "\"" || currentChar === "'" || currentChar === "`") {
160
- quote = currentChar;
161
- continue;
162
- }
163
- if (currentChar === "{") {
164
- depth++;
165
- continue;
166
- }
167
- if (currentChar === "}") {
168
- depth--;
169
- if (depth === 0) return {
170
- nextIndex: index + 1,
171
- objectText: content.slice(objectStartIndex, index + 1)
172
- };
173
- }
174
- }
175
- }
176
- //#endregion
177
85
  //#region ../runner/src/gitState.ts
178
86
  function runGitCommand(workspaceRoot, args) {
179
87
  const result = spawnSync("git", args, {
@@ -316,7 +224,10 @@ function upsertFinishedCase(runState, caseDetail, caseRow) {
316
224
  function applyChildEvalMetas(evals, childMetas) {
317
225
  for (const childMeta of childMetas) {
318
226
  const evalMeta = evals.get(childMeta.id);
319
- if (evalMeta === void 0) continue;
227
+ if (evalMeta === void 0) {
228
+ evals.set(childMeta.id, childMeta);
229
+ continue;
230
+ }
320
231
  evalMeta.columnDefs = childMeta.columnDefs;
321
232
  evalMeta.caseCount = childMeta.caseCount;
322
233
  evalMeta.stats = childMeta.stats;
@@ -728,8 +639,7 @@ function createRunner({ watchForChanges = true } = {}) {
728
639
  workspaceRoot,
729
640
  runDir,
730
641
  manifest,
731
- summary,
732
- evals: getSortedEvalMetas()
642
+ summary
733
643
  };
734
644
  await writeFile(join(runDir, "run-child-context.json"), JSON.stringify(childContext, null, 2));
735
645
  startRunChild({
@@ -820,6 +730,9 @@ function createRunner({ watchForChanges = true } = {}) {
820
730
  getWorkspaceRoot() {
821
731
  return workspaceRoot;
822
732
  },
733
+ getAllowCliRunAll() {
734
+ return config.allowCliRunAll === true;
735
+ },
823
736
  getLlmCallsConfig() {
824
737
  return llmCallsConfig;
825
738
  },
@@ -900,6 +813,7 @@ function parseArgs(argv) {
900
813
  const args = {
901
814
  command: "help",
902
815
  subcommand: void 0,
816
+ positionals: [],
903
817
  showHelp: false,
904
818
  helpTopic: "global",
905
819
  unknownHelpTarget: void 0,
@@ -933,6 +847,7 @@ function parseArgs(argv) {
933
847
  }
934
848
  for (let i = cursor; i < normalizedArgv.length; i++) {
935
849
  const arg = normalizedArgv[i];
850
+ if (arg === void 0) continue;
936
851
  const next = normalizedArgv[i + 1];
937
852
  if (arg === "--help" || arg === "-h") args.showHelp = true;
938
853
  else if (arg === "--eval" && next) {
@@ -955,6 +870,7 @@ function parseArgs(argv) {
955
870
  else if (arg === "--refresh-cache") args.cacheMode = "refresh";
956
871
  else if (arg === "--clear-cache") args.clearCache = true;
957
872
  else if (arg === "--all") args.all = true;
873
+ else if (!arg.startsWith("-")) args.positionals.push(arg);
958
874
  }
959
875
  return args;
960
876
  }
@@ -988,6 +904,9 @@ async function runCli(argv) {
988
904
  case "run":
989
905
  await commandRun(args);
990
906
  break;
907
+ case "show-runs":
908
+ await commandShowRuns(args);
909
+ break;
991
910
  case "cache":
992
911
  await commandCache(args);
993
912
  break;
@@ -997,7 +916,7 @@ async function runCli(argv) {
997
916
  }
998
917
  }
999
918
  function isCliCommand(command) {
1000
- return command === "app" || command === "list" || command === "run" || command === "cache" || command === "help";
919
+ return command === "app" || command === "list" || command === "run" || command === "show-runs" || command === "cache" || command === "help";
1001
920
  }
1002
921
  function loadWorkspaceEnv() {
1003
922
  const envPath = resolve(process.cwd(), ".env");
@@ -1059,8 +978,8 @@ async function commandApp(args) {
1059
978
  const { serve } = await import("@hono/node-server");
1060
979
  const bundledWebDist = resolve(currentDir, "apps/web/dist");
1061
980
  if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
1062
- const appModule = await import("./app-DKWm1oxc.mjs");
1063
- const runnerModule = await import("./runner-Dx1sMCbh.mjs");
981
+ const appModule = await import("./app-B8e-oWYc.mjs");
982
+ const runnerModule = await import("./runner-DABFPXkx.mjs");
1064
983
  if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
1065
984
  if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
1066
985
  await runnerModule.initRunner();
@@ -1098,6 +1017,11 @@ async function commandList(args_) {
1098
1017
  async function commandRun(args) {
1099
1018
  const runner = createRunner({ watchForChanges: false });
1100
1019
  await runner.init();
1020
+ if (args.evalIds.length === 0 && args.caseIds.length === 0 && !runner.getAllowCliRunAll()) {
1021
+ console.error("This workspace disables running all evals from the CLI. Pass --eval <id> or --case <id> to run a targeted subset.");
1022
+ process.exit(1);
1023
+ return;
1024
+ }
1101
1025
  if (args.clearCache) {
1102
1026
  await runner.clearCache();
1103
1027
  if (!args.json) {
@@ -1144,6 +1068,32 @@ async function commandRun(args) {
1144
1068
  }
1145
1069
  if (summary.failedCases > 0 || summary.errorCases > 0) process.exit(1);
1146
1070
  }
1071
+ async function commandShowRuns(args) {
1072
+ const runner = createRunner({ watchForChanges: false });
1073
+ await runner.init();
1074
+ const runRef = args.positionals[0];
1075
+ if (runRef !== void 0) {
1076
+ const run = resolveRunSnapshot(runner, runRef);
1077
+ if (!run) {
1078
+ printMissingRun(runRef);
1079
+ process.exit(1);
1080
+ return;
1081
+ }
1082
+ const index = buildRunFileIndex(runner.getWorkspaceRoot(), run);
1083
+ if (args.json) {
1084
+ printJson(index);
1085
+ return;
1086
+ }
1087
+ printRunFileIndex(index);
1088
+ return;
1089
+ }
1090
+ const indexes = getSortedRunSnapshots(runner).map((run) => buildRunFileIndex(runner.getWorkspaceRoot(), run));
1091
+ if (args.json) {
1092
+ printJson(indexes);
1093
+ return;
1094
+ }
1095
+ printRunFileIndexes(indexes);
1096
+ }
1147
1097
  async function commandCache(args) {
1148
1098
  const runner = createRunner({ watchForChanges: false });
1149
1099
  await runner.init();
@@ -1194,6 +1144,93 @@ async function commandCache(args) {
1194
1144
  }
1195
1145
  printHelp(args.helpTopic);
1196
1146
  }
1147
+ function getSortedRunSnapshots(runner) {
1148
+ return runner.getRuns().toSorted((a, b) => getRunStartTime(a) - getRunStartTime(b)).map((manifest) => runner.getRun(manifest.id)).filter((run) => run !== void 0);
1149
+ }
1150
+ function buildRunFileIndex(workspaceRoot, run) {
1151
+ const runDir = join(workspaceRoot, ".agent-evals", "runs", run.manifest.id);
1152
+ return {
1153
+ id: run.manifest.id,
1154
+ shortId: run.manifest.shortId,
1155
+ status: run.manifest.status,
1156
+ startedAt: run.manifest.startedAt,
1157
+ endedAt: run.manifest.endedAt,
1158
+ target: run.manifest.target,
1159
+ summary: run.summary,
1160
+ files: {
1161
+ dir: runDir,
1162
+ run: join(runDir, "run.json"),
1163
+ summary: join(runDir, "summary.json"),
1164
+ cases: join(runDir, "cases.jsonl"),
1165
+ caseDetailsDir: join(runDir, "case-details"),
1166
+ tracesDir: join(runDir, "traces")
1167
+ },
1168
+ cases: run.cases.map((caseRow) => {
1169
+ const fileName = `${encodeURIComponent(caseRow.caseId)}.json`;
1170
+ return {
1171
+ caseId: caseRow.caseId,
1172
+ evalId: caseRow.evalId,
1173
+ status: caseRow.status,
1174
+ files: {
1175
+ caseDetail: join(runDir, "case-details", fileName),
1176
+ trace: join(runDir, "traces", fileName)
1177
+ }
1178
+ };
1179
+ })
1180
+ };
1181
+ }
1182
+ function resolveRunSnapshot(runner, runRef) {
1183
+ const runs = getSortedRunSnapshots(runner);
1184
+ if (runs.length === 0) return void 0;
1185
+ if (runRef === void 0 || runRef === "latest") return runs[runs.length - 1];
1186
+ return runs.find((run) => run.manifest.id === runRef || run.manifest.shortId === runRef);
1187
+ }
1188
+ function printMissingRun(runRef) {
1189
+ console.error(runRef === void 0 ? "No saved runs found." : `No saved run found for "${runRef}".`);
1190
+ }
1191
+ function getRunStartTime(manifest) {
1192
+ const parsed = new Date(manifest.startedAt).getTime();
1193
+ return Number.isFinite(parsed) ? parsed : 0;
1194
+ }
1195
+ function printJson(value) {
1196
+ console.info(JSON.stringify(value, null, 2));
1197
+ }
1198
+ function printRunFileIndexes(indexes) {
1199
+ if (indexes.length === 0) {
1200
+ console.info("No saved runs.");
1201
+ return;
1202
+ }
1203
+ console.info(`Saved runs (${String(indexes.length)}):\n`);
1204
+ for (const index of indexes) {
1205
+ printRunFileIndex(index);
1206
+ console.info("");
1207
+ }
1208
+ }
1209
+ function printRunFileIndex(index) {
1210
+ console.info(`${index.shortId} (${index.id}) ${index.status} ${formatCaseCounts(index.summary)}`);
1211
+ console.info(` dir: ${index.files.dir}`);
1212
+ console.info(` run: ${index.files.run}`);
1213
+ console.info(` summary: ${index.files.summary}`);
1214
+ console.info(` cases: ${index.files.cases}`);
1215
+ console.info(` case details: ${index.files.caseDetailsDir}`);
1216
+ console.info(` traces: ${index.files.tracesDir}`);
1217
+ if (index.cases.length === 0) return;
1218
+ console.info(" case files:");
1219
+ for (const caseEntry of index.cases) {
1220
+ console.info(` ${caseEntry.caseId} [${caseEntry.evalId}] ${caseEntry.status}`);
1221
+ console.info(` detail: ${caseEntry.files.caseDetail}`);
1222
+ console.info(` trace: ${caseEntry.files.trace}`);
1223
+ }
1224
+ }
1225
+ function formatCaseCounts(summary) {
1226
+ return [
1227
+ `${String(summary.totalCases)} total`,
1228
+ `${String(summary.passedCases)} passed`,
1229
+ `${String(summary.failedCases)} failed`,
1230
+ `${String(summary.errorCases)} errors`,
1231
+ `${String(summary.cancelledCases)} cancelled`
1232
+ ].join(", ");
1233
+ }
1197
1234
  async function waitForRunCompletion(runner, runId) {
1198
1235
  return new Promise((resolvePromise) => {
1199
1236
  const check = () => {
@@ -1255,6 +1292,24 @@ Flags:
1255
1292
  --clear-cache Clear the cache before starting the run
1256
1293
  --no-env Disable automatic .env loading
1257
1294
  --help, -h Show this help
1295
+ `);
1296
+ return;
1297
+ }
1298
+ if (topic === "show-runs") {
1299
+ console.info(`
1300
+ agent-evals show-runs - Show saved run artifact file paths
1301
+
1302
+ Usage:
1303
+ agent-evals show-runs [<run-id>|latest] [--json]
1304
+
1305
+ Prints the run directory and stable artifact paths for run.json, summary.json,
1306
+ cases.jsonl, case detail JSON, and trace JSON files. Run ids can be full
1307
+ timestamp ids, short ids such as r0, or latest.
1308
+
1309
+ Flags:
1310
+ --json Output the file index as JSON
1311
+ --no-env Disable automatic .env loading
1312
+ --help, -h Show this help
1258
1313
  `);
1259
1314
  return;
1260
1315
  }
@@ -1283,6 +1338,7 @@ Commands:
1283
1338
  app Start server with UI
1284
1339
  list List discovered evals
1285
1340
  run Run evals
1341
+ show-runs [id|latest] Show saved run artifact file paths
1286
1342
  cache list List cached operation entries
1287
1343
  cache clear --eval <id> Clear cache entries for one eval
1288
1344
  cache clear --all Clear every cached entry
package/dist/index.d.mts CHANGED
@@ -1674,6 +1674,12 @@ type AgentEvalsConfig = {
1674
1674
  * considered outdated. Defaults to `14`.
1675
1675
  */
1676
1676
  staleAfterDays?: number;
1677
+ /**
1678
+ * Whether `agent-evals run` may run every discovered eval when no `--eval`
1679
+ * or `--case` filter is provided. Defaults to `false`; set to `true` to
1680
+ * opt into unfiltered CLI runs. Grouped runs in the UI are still allowed.
1681
+ */
1682
+ allowCliRunAll?: boolean;
1677
1683
  /**
1678
1684
  * Global trace attribute display config for the UI.
1679
1685
  *
@@ -1762,6 +1768,7 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
1762
1768
  }>>;
1763
1769
  concurrency: z$1.ZodOptional<z$1.ZodNumber>;
1764
1770
  staleAfterDays: z$1.ZodOptional<z$1.ZodNumber>;
1771
+ allowCliRunAll: z$1.ZodOptional<z$1.ZodBoolean>;
1765
1772
  traceDisplay: z$1.ZodOptional<z$1.ZodObject<{
1766
1773
  attributes: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
1767
1774
  key: z$1.ZodOptional<z$1.ZodString>;
@@ -2720,6 +2727,14 @@ type EvalCaseScope = {
2720
2727
  caseCacheRefs: TraceCacheRef[]; /** Background promises that should settle before the case scope finalizes. */
2721
2728
  pendingBackgroundJobs: Set<Promise<unknown>>;
2722
2729
  };
2730
+ /**
2731
+ * Runtime phase currently owned by the eval runner.
2732
+ *
2733
+ * `null` means the current async execution is outside an eval run. `env`
2734
+ * covers run-time module/environment loading, including top-level code in
2735
+ * modules imported while a run is being prepared.
2736
+ */
2737
+ type EvalRuntimeScope = 'env' | 'cases' | 'eval' | 'derive' | 'outputsSchema' | 'scorer';
2723
2738
  /** Error thrown when an eval assertion fails during case execution. */
2724
2739
  declare class EvalAssertionError extends Error {
2725
2740
  constructor(message: string);
@@ -2727,12 +2742,14 @@ declare class EvalAssertionError extends Error {
2727
2742
  /** Return the current eval scope for the active async context, if any. */
2728
2743
  declare function getCurrentScope(): EvalCaseScope | undefined;
2729
2744
  /**
2730
- * Return whether the current async execution is inside an active eval case.
2745
+ * Return the current eval runner phase for this async execution.
2731
2746
  *
2732
- * This is useful for shared workflow code that wants to branch on eval-only
2733
- * behavior without importing or inspecting the full eval scope.
2747
+ * Returns `null` outside eval-owned work, `env` while the runner is loading
2748
+ * eval modules for a run, `cases` while generating cases, `eval` while running
2749
+ * case `execute`, `derive` while deriving outputs from traces, `outputsSchema`
2750
+ * while validating outputs, and `scorer` while computing scores.
2734
2751
  */
2735
- declare function isInEvalScope(): boolean;
2752
+ declare function isInEvalScope(): EvalRuntimeScope | null;
2736
2753
  /**
2737
2754
  * Register background work that should settle before eval finalization.
2738
2755
  *
@@ -2762,8 +2779,18 @@ type RunInEvalScopeOptions = {
2762
2779
  /** Authored input for the active eval case. */input?: unknown; /** Stable prefix used when generating scoped IDs with `nextEvalId()`. */
2763
2780
  idPrefix?: string; /** Cache adapter + mode attached to the scope before `fn` runs. */
2764
2781
  cacheContext?: CacheScopeContext; /** Whether registered background jobs should settle before scope finalizes. */
2765
- waitForBackgroundJobs?: boolean;
2782
+ waitForBackgroundJobs?: boolean; /** Eval runner phase exposed through `isInEvalScope()`. Defaults to `eval`. */
2783
+ runtimeScope?: EvalRuntimeScope;
2766
2784
  };
2785
+ /** Execute a callback while `isInEvalScope()` reports a runner phase. */
2786
+ declare function runInEvalRuntimeScope<T>(runtimeScope: EvalRuntimeScope, fn: () => Promise<T> | T): Promise<T>;
2787
+ /**
2788
+ * Execute a callback with an existing case scope and a specific runner phase.
2789
+ *
2790
+ * Runner-internal helper for post-execute phases that still need access to the
2791
+ * completed case scope through output, trace, assertion, and input helpers.
2792
+ */
2793
+ declare function runInExistingEvalScope<T>(scope: EvalCaseScope, runtimeScope: EvalRuntimeScope, fn: () => Promise<T> | T): Promise<T>;
2767
2794
  /**
2768
2795
  * Execute a callback inside a fresh eval case scope and capture its outputs,
2769
2796
  * trace data, and terminal error state.
@@ -3058,6 +3085,13 @@ type EvalRunner = {
3058
3085
  subscribeDiscovery(listener: (event: SseEnvelope) => void): () => void; /** Stop background filesystem watchers owned by this runner instance. */
3059
3086
  close(): Promise<void>; /** Resolve the workspace root backing this runner instance. */
3060
3087
  getWorkspaceRoot(): string;
3088
+ /**
3089
+ * Return whether the current workspace allows an unfiltered CLI run.
3090
+ *
3091
+ * `false` means `agent-evals run` must include `--eval` or `--case`.
3092
+ * Programmatic/server runs are intentionally unaffected.
3093
+ */
3094
+ getAllowCliRunAll(): boolean;
3061
3095
  /**
3062
3096
  * Resolved LLM-calls config used by the UI to derive the LLM calls tab.
3063
3097
  *
@@ -3135,4 +3169,4 @@ declare function createRunner({
3135
3169
  */
3136
3170
  declare function runCli(argv: string[]): Promise<void>;
3137
3171
  //#endregion
3138
- export { type AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheAdapter, type CacheEntry, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheStatus, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallsConfigInput, type NumberDisplayOptions, type RepoFileRef, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedLlmCallMetric, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
3172
+ export { type AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheAdapter, type CacheEntry, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheStatus, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallsConfigInput, type NumberDisplayOptions, type RepoFileRef, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedLlmCallMetric, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as evalFreshnessStatusSchema, $t as evalAssert, A as getEvalDisplayStatus, At as traceDisplayInputConfigSchema, B as apiCallMetricPlacementSchema, Bt as jsonCellSchema, C as updateManualScoreRequestSchema, Ct as spanCacheOptionsSchema, D as extractLlmCalls, Dt as traceAttributeDisplayPlacementSchema, E as extractApiCalls, Et as traceAttributeDisplayInputSchema, F as runSummarySchema, Ft as cellValueSchema, G as llmCallMetricSchema, Gt as buildTraceTree, H as apiCallsConfigSchema, Ht as repoFileRefSchema, I as DEFAULT_API_CALLS_CONFIG, It as columnDefSchema, J as resolveLlmCallsConfig, Jt as evalTracer, K as llmCallsConfigSchema, Kt as captureEvalSpanError, L as DEFAULT_LLM_CALLS_CONFIG, Lt as columnFormatSchema, M as deriveStatusFromCaseRows, Mt as traceSpanKindSchema, N as deriveStatusFromChildStatuses, Nt as traceSpanSchema, O as getNestedAttribute, Ot as traceAttributeDisplaySchema, P as runManifestSchema, Pt as traceSpanWarningSchema, Q as caseRowSchema, Qt as appendToEvalOutput, R as agentEvalsConfigSchema, Rt as columnKindSchema, S as createRunRequestSchema, St as serializedCacheSpanSchema, T as extractCacheHits, Tt as traceAttributeDisplayFormatSchema, U as llmCallMetricFormatSchema, Ut as runArtifactRefSchema, V as apiCallMetricSchema, Vt as numberDisplayOptionsSchema, W as llmCallMetricPlacementSchema, Wt as z, X as assertionFailureSchema, Xt as hashCacheKeySync, Y as trialSelectionModeSchema, Yt as hashCacheKey, Z as caseDetailSchema, Zt as EvalAssertionError, _t as cacheModeSchema, an as nextEvalId, at as evalChartAggregateSchema, bt as cacheRecordingSchema, cn as setScopeCacheContext, ct as evalChartColorSchema, dn as defineEval, dt as evalChartTooltipExtraSchema, en as getCurrentScope, et as evalStatAggregateSchema, fn as getEvalRegistry, ft as evalChartTypeSchema, gt as cacheListItemSchema, ht as cacheFileSchema, in as mergeEvalOutput, it as scoreTraceSchema, j as deriveScopedSummaryFromCases, jt as traceSpanErrorSchema, k as getEvalTitle, kt as traceDisplayConfigSchema, ln as startEvalBackgroundJob, lt as evalChartConfigSchema, mt as cacheEntrySchema, nn as incrementEvalOutput, nt as evalStatsConfigSchema, on as runInEvalScope, ot as evalChartAxisSchema, pt as evalChartsConfigSchema, q as resolveApiCallsConfig, qt as evalSpan, rn as isInEvalScope, rt as evalSummarySchema, sn as setEvalOutput, st as evalChartBuiltinMetricSchema, tn as getEvalCaseInput, tt as evalStatItemSchema, un as repoFile, ut as evalChartMetricSchema, vt as cacheOperationTypeSchema, w as sseEnvelopeSchema, wt as traceCacheRefSchema, xt as cacheStatusSchema, yt as cacheRecordingOpSchema, z as apiCallMetricFormatSchema, zt as fileRefSchema } from "./runOrchestration-H0pSUl3I.mjs";
2
- import { n as createRunner, t as runCli } from "./cli-CMiCEQ-3.mjs";
3
- import "./src-BgGL7DDp.mjs";
4
- export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
1
+ import { $ as caseRowSchema, $t as appendToEvalOutput, A as getEvalTitle, At as traceDisplayConfigSchema, B as apiCallMetricFormatSchema, Bt as fileRefSchema, C as createRunRequestSchema, Ct as serializedCacheSpanSchema, D as extractApiCalls, Dt as traceAttributeDisplayInputSchema, E as extractCacheHits, Et as traceAttributeDisplayFormatSchema, F as runManifestSchema, Ft as traceSpanWarningSchema, G as llmCallMetricPlacementSchema, Gt as z, H as apiCallMetricSchema, Ht as numberDisplayOptionsSchema, I as runSummarySchema, It as cellValueSchema, J as resolveApiCallsConfig, Jt as evalSpan, K as llmCallMetricSchema, Kt as buildTraceTree, L as DEFAULT_API_CALLS_CONFIG, Lt as columnDefSchema, M as deriveScopedSummaryFromCases, Mt as traceSpanErrorSchema, N as deriveStatusFromCaseRows, Nt as traceSpanKindSchema, O as extractLlmCalls, Ot as traceAttributeDisplayPlacementSchema, P as deriveStatusFromChildStatuses, Pt as traceSpanSchema, Q as caseDetailSchema, Qt as EvalAssertionError, R as DEFAULT_LLM_CALLS_CONFIG, Rt as columnFormatSchema, St as cacheStatusSchema, T as sseEnvelopeSchema, Tt as traceCacheRefSchema, U as apiCallsConfigSchema, Ut as repoFileRefSchema, V as apiCallMetricPlacementSchema, Vt as jsonCellSchema, W as llmCallMetricFormatSchema, Wt as runArtifactRefSchema, X as trialSelectionModeSchema, Xt as hashCacheKey, Y as resolveLlmCallsConfig, Yt as evalTracer, Z as assertionFailureSchema, Zt as hashCacheKeySync, _t as cacheListItemSchema, an as mergeEvalOutput, at as scoreTraceSchema, bt as cacheRecordingOpSchema, cn as runInEvalScope, ct as evalChartBuiltinMetricSchema, dn as setScopeCacheContext, dt as evalChartMetricSchema, en as evalAssert, et as evalFreshnessStatusSchema, fn as startEvalBackgroundJob, ft as evalChartTooltipExtraSchema, gt as cacheFileSchema, hn as getEvalRegistry, ht as cacheEntrySchema, in as isInEvalScope, it as evalSummarySchema, j as getEvalDisplayStatus, jt as traceDisplayInputConfigSchema, k as getNestedAttribute, kt as traceAttributeDisplaySchema, ln as runInExistingEvalScope, lt as evalChartColorSchema, mn as defineEval, mt as evalChartsConfigSchema, nn as getEvalCaseInput, nt as evalStatItemSchema, on as nextEvalId, ot as evalChartAggregateSchema, pn as repoFile, pt as evalChartTypeSchema, q as llmCallsConfigSchema, qt as captureEvalSpanError, rn as incrementEvalOutput, rt as evalStatsConfigSchema, sn as runInEvalRuntimeScope, st as evalChartAxisSchema, tn as getCurrentScope, tt as evalStatAggregateSchema, un as setEvalOutput, ut as evalChartConfigSchema, vt as cacheModeSchema, w as updateManualScoreRequestSchema, wt as spanCacheOptionsSchema, xt as cacheRecordingSchema, yt as cacheOperationTypeSchema, z as agentEvalsConfigSchema, zt as columnKindSchema } from "./runOrchestration-BDyNrRQT.mjs";
2
+ import { n as createRunner, t as runCli } from "./cli-BmrtjQj_.mjs";
3
+ import "./src-CEAJYN_X.mjs";
4
+ export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
package/dist/runChild.mjs CHANGED
@@ -1,7 +1,9 @@
1
- import { F as runSummarySchema, It as columnDefSchema, P as runManifestSchema, S as createRunRequestSchema, nt as evalStatsConfigSchema, pt as evalChartsConfigSchema, t as executeRun, v as loadConfig, x as createFsCacheStore } from "./runOrchestration-H0pSUl3I.mjs";
1
+ import { C as createRunRequestSchema, F as runManifestSchema, I as runSummarySchema, Lt as columnDefSchema, S as createFsCacheStore, mt as evalChartsConfigSchema, rt as evalStatsConfigSchema, t as executeRun, v as parseEvalMetas, y as loadConfig } from "./runOrchestration-BDyNrRQT.mjs";
2
2
  import { createHash } from "node:crypto";
3
3
  import { readFile } from "node:fs/promises";
4
+ import { relative } from "node:path";
4
5
  import { z } from "zod/v4";
6
+ import { glob } from "glob";
5
7
  //#region ../runner/src/runChild.ts
6
8
  const evalMetaSchema = z.object({
7
9
  id: z.string(),
@@ -20,7 +22,7 @@ const runChildContextSchema = z.object({
20
22
  runDir: z.string(),
21
23
  manifest: runManifestSchema,
22
24
  summary: runSummarySchema,
23
- evals: z.array(evalMetaSchema)
25
+ evals: z.array(evalMetaSchema).optional()
24
26
  });
25
27
  function sendMessage(message) {
26
28
  if (process.send === void 0) return;
@@ -37,6 +39,38 @@ function getTargetEvals(params) {
37
39
  if (params.request.target.evalIds && params.request.target.evalIds.length > 0) return params.request.target.evalIds.map((id) => params.evals.get(id)).filter((entry) => entry !== void 0);
38
40
  return [...params.evals.values()].toSorted((a, b) => a.filePath.localeCompare(b.filePath));
39
41
  }
42
+ function toWorkspaceRelativePath(params) {
43
+ return relative(params.workspaceRoot, params.filePath).replaceAll("\\", "/");
44
+ }
45
+ async function discoverRunEvals(params) {
46
+ const discovered = [];
47
+ for (const pattern of params.config.include) {
48
+ const files = await glob(pattern, {
49
+ cwd: params.workspaceRoot,
50
+ absolute: true
51
+ });
52
+ discovered.push(...files);
53
+ }
54
+ const evals = /* @__PURE__ */ new Map();
55
+ for (const filePath of discovered) {
56
+ const source = await readFile(filePath, "utf-8");
57
+ const sourceFingerprint = getSourceFingerprint(source);
58
+ const metas = parseEvalMetas(filePath, source);
59
+ for (const meta of metas) evals.set(meta.id, {
60
+ id: meta.id,
61
+ title: meta.title,
62
+ filePath: toWorkspaceRelativePath({
63
+ filePath: meta.filePath,
64
+ workspaceRoot: params.workspaceRoot
65
+ }),
66
+ sourceFilePath: meta.filePath,
67
+ sourceFingerprint,
68
+ columnDefs: [],
69
+ caseCount: null
70
+ });
71
+ }
72
+ return [...evals.values()].toSorted((a, b) => a.filePath.localeCompare(b.filePath));
73
+ }
40
74
  async function readContext(contextPath) {
41
75
  if (contextPath === void 0) throw new Error("Missing run child context path");
42
76
  return runChildContextSchema.parse(JSON.parse(await readFile(contextPath, "utf-8")));
@@ -54,7 +88,11 @@ async function main() {
54
88
  maxEntriesPerNamespace: config.cache?.maxEntriesPerNamespace ?? config.cache?.maxEntriesPerEval,
55
89
  maxEntriesByNamespace: config.cache?.maxEntriesByNamespace
56
90
  });
57
- const evals = new Map(context.evals.map((evalMeta) => [evalMeta.id, evalMeta]));
91
+ const evalMetas = await discoverRunEvals({
92
+ config,
93
+ workspaceRoot: context.workspaceRoot
94
+ });
95
+ const evals = new Map(evalMetas.map((evalMeta) => [evalMeta.id, evalMeta]));
58
96
  const lastRunStatusMap = /* @__PURE__ */ new Map();
59
97
  const latestRunInfoMap = /* @__PURE__ */ new Map();
60
98
  await executeRun({