@ls-stack/agent-eval 0.58.1 → 0.58.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
- import { et as createRunRequestSchema, nt as extractCacheEntries, tt as updateManualScoreRequestSchema, ut as getEvalTitle } from "./runExecution-d42Lm0i5.mjs";
2
- import { o as stageManualInputFile } from "./cli-_g2qOMK6.mjs";
3
- import "./src-CdZsOn6y.mjs";
4
- import { t as getRunnerInstance } from "./runner-MSr8sAWm.mjs";
1
+ import { Et as getCaseRowCaseKey, Ot as caseRowSchema, dt as getEvalTitle, nt as updateManualScoreRequestSchema, rt as extractCacheEntries, tt as createRunRequestSchema } from "./runExecution-pHJ0_TzH.mjs";
2
+ import { o as stageManualInputFile } from "./cli-HBwXIJsg.mjs";
3
+ import "./src-AeXGBJ26.mjs";
4
+ import { t as getRunnerInstance } from "./runner-D_pz2NON.mjs";
5
5
  import { z } from "zod/v4";
6
6
  import { readFile } from "node:fs/promises";
7
7
  import { dirname, isAbsolute, join, relative, resolve, sep } from "node:path";
@@ -231,9 +231,24 @@ function logStartedAppRunEvals(params) {
231
231
  const targetEvals = getRunTargetEvalSummaries(params.evals, params.target);
232
232
  if (targetEvals.length === 0) return;
233
233
  const label = targetEvals.length === 1 ? "eval" : "evals";
234
- console.info(`[agent-evals] Starting app run ${params.shortId} (${params.runId}) with ${String(targetEvals.length)} ${label}:`);
234
+ console.info(`[agent-evals] Queued app run ${params.shortId} (${params.runId}) with ${String(targetEvals.length)} ${label}; concurrency ${String(params.concurrency)}:`);
235
235
  for (const ev of targetEvals) console.info(` - ${getEvalTitle(ev)} (${ev.filePath}#${ev.id})`);
236
236
  }
237
+ function getEvalSummaryLabel(evalsByKey, evalsById, evalKey, evalId) {
238
+ const summary = (evalKey === void 0 ? void 0 : evalsByKey.get(evalKey)) ?? evalsById.get(evalId);
239
+ if (summary === void 0) return evalId;
240
+ return `${getEvalTitle(summary)} (${summary.filePath}#${summary.id})`;
241
+ }
242
+ function getRunCaseLabel(caseId, caseKey) {
243
+ return caseKey === void 0 || caseKey === caseId ? caseId : `${caseId} [${caseKey}]`;
244
+ }
245
+ function formatCaseStartedLog(params) {
246
+ return [
247
+ `[agent-evals] Run ${params.shortId} started `,
248
+ `${String(params.activeCount)}/${String(params.concurrency)}: `,
249
+ `${params.evalLabel} / ${params.caseLabel}`
250
+ ].join("");
251
+ }
237
252
  function formatDurationMs(durationMs) {
238
253
  if (durationMs === null) return "";
239
254
  if (durationMs < 1e3) return ` in ${String(durationMs)}ms`;
@@ -247,8 +262,34 @@ function isTerminalRunEvent(eventType) {
247
262
  return eventType === "run.finished" || eventType === "run.error" || eventType === "run.cancelled";
248
263
  }
249
264
  function subscribeToAppRunResultLog(params) {
265
+ const evalsByKey = new Map(params.evals.map((ev) => [ev.key, ev]));
266
+ const evalsById = new Map(params.evals.map((ev) => [ev.id, ev]));
267
+ const activeCases = /* @__PURE__ */ new Set();
268
+ const loggedStarts = /* @__PURE__ */ new Set();
250
269
  let unsubscribe;
251
270
  unsubscribe = params.runner.subscribe(params.runId, (event) => {
271
+ if (event.type === "case.started") {
272
+ const parsed = caseRowSchema.safeParse(event.payload);
273
+ if (!parsed.success) return;
274
+ const caseRow = parsed.data;
275
+ const caseKey = `${getCaseRowCaseKey(caseRow)}:${String(caseRow.trial)}`;
276
+ activeCases.add(caseKey);
277
+ if (loggedStarts.has(caseKey)) return;
278
+ loggedStarts.add(caseKey);
279
+ console.info(formatCaseStartedLog({
280
+ shortId: params.shortId,
281
+ activeCount: activeCases.size,
282
+ concurrency: params.concurrency,
283
+ evalLabel: getEvalSummaryLabel(evalsByKey, evalsById, caseRow.evalKey, caseRow.evalId),
284
+ caseLabel: getRunCaseLabel(caseRow.caseId, caseRow.caseKey)
285
+ }));
286
+ return;
287
+ }
288
+ if (event.type === "case.finished") {
289
+ const parsed = caseRowSchema.safeParse(event.payload);
290
+ if (parsed.success) activeCases.delete(`${getCaseRowCaseKey(parsed.data)}:${String(parsed.data.trial)}`);
291
+ return;
292
+ }
252
293
  if (!isTerminalRunEvent(event.type)) return;
253
294
  unsubscribe?.();
254
295
  unsubscribe = void 0;
@@ -301,6 +342,7 @@ const runsRoutes = new Hono().get("/", (c) => {
301
342
  failures: validation.failures
302
343
  }, 400);
303
344
  const evalsForTerminalLog = runner.getEvals();
345
+ const concurrency = runner.getConfiguredConcurrency();
304
346
  const runResult = await resultify(() => runner.startRun(body));
305
347
  if (runResult.error) return c.json({
306
348
  error: "Failed to start run",
@@ -310,12 +352,15 @@ const runsRoutes = new Hono().get("/", (c) => {
310
352
  runId: runResult.value.manifest.id,
311
353
  shortId: runResult.value.manifest.shortId,
312
354
  evals: evalsForTerminalLog,
313
- target: body.target
355
+ target: body.target,
356
+ concurrency
314
357
  });
315
358
  subscribeToAppRunResultLog({
316
359
  runner,
317
360
  runId: runResult.value.manifest.id,
318
- shortId: runResult.value.manifest.shortId
361
+ shortId: runResult.value.manifest.shortId,
362
+ evals: evalsForTerminalLog,
363
+ concurrency
319
364
  });
320
365
  return c.json(runResult.value, 201);
321
366
  }).post("/actions/open-location", zValidator("json", openRunLocationRequestSchema), (c) => {