@ls-stack/agent-eval 0.60.3 → 0.60.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
- import { Et as getCaseRowCaseKey, Ot as caseRowSchema, dt as getEvalTitle, nt as updateManualScoreRequestSchema, rt as extractCacheEntries, tt as createRunRequestSchema } from "./runExecution-Bq0Y3y_1.mjs";
2
- import { o as stageManualInputFile } from "./cli-BSVUCUxr.mjs";
3
- import "./src-D5vGo2iv.mjs";
4
- import { t as getRunnerInstance } from "./runner-DmkSq-QG.mjs";
1
+ import { Dt as caseRowSchema, Tt as getCaseRowCaseKey, et as createRunRequestSchema, nt as extractCacheEntries, tt as updateManualScoreRequestSchema, ut as getEvalTitle } from "./runExecution-Bu9yfdUS.mjs";
2
+ import { o as stageManualInputFile } from "./cli-OLZIjQpx.mjs";
3
+ import "./src-Cy3OxoZW.mjs";
4
+ import { t as getRunnerInstance } from "./runner-SxtKn-Xh.mjs";
5
5
  import { z } from "zod";
6
6
  import { resultify } from "t-result";
7
7
  import { readFile } from "node:fs/promises";
package/dist/bin.mjs CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { t as runCli } from "./cli-BSVUCUxr.mjs";
2
+ import { t as runCli } from "./cli-OLZIjQpx.mjs";
3
3
  import { spawn } from "node:child_process";
4
4
  //#region src/bin.ts
5
5
  const moduleMocksFlag = "--experimental-test-module-mocks";
@@ -1,4 +1,4 @@
1
- import { Ct as resolveLlmCallsConfig, It as runWithEvalRegistry, J as runInEvalRuntimeScope, L as configureEvalRunLogs, St as resolveApiCallsConfig, _ as createBufferedCacheStore, a as isCaseChildParentMessage, d as loadEvalModule, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, r as runCase, v as createFsCacheStore, y as getCacheRetentionOptions } from "./runExecution-Bq0Y3y_1.mjs";
1
+ import { Ft as runWithEvalRegistry, I as configureEvalRunLogs, St as resolveLlmCallsConfig, _ as createFsCacheStore, a as isCaseChildParentMessage, d as loadEvalModule, g as createBufferedCacheStore, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, q as runInEvalRuntimeScope, r as runCase, v as getCacheRetentionOptions, xt as resolveApiCallsConfig } from "./runExecution-Bu9yfdUS.mjs";
2
2
  //#region ../runner/src/caseChild.ts
3
3
  let fatalErrorReported = false;
4
4
  let disconnectExpected = false;
@@ -1,5 +1,5 @@
1
- import { Ct as resolveLlmCallsConfig, Et as getCaseRowCaseKey, Ft as getEvalRegistry, Ot as caseRowSchema, St as resolveApiCallsConfig, Tt as buildEvalKey, _t as matchesTagsFilter, c as resolveArtifactPath, dt as getEvalTitle, f as resolveEvalDefaultConfig, ft as getEvalDisplayStatus, h as normalizeScoreDef, lt as applyDerivedCallAttributes, m as buildDeclaredColumnDefs, o as stripTerminalControlCodes, p as loadConfig, pt as deriveScopedSummaryFromCases, s as resolveTracePresentation, v as createFsCacheStore, xt as runSummarySchema, y as getCacheRetentionOptions } from "./runExecution-Bq0Y3y_1.mjs";
2
- import { C as parseEvalDiscovery, S as loadIsolatedEvalRegistry, _ as recomputePersistedCaseStatus, a as validateTagsFilters, b as parseManualInputValues, c as getLatestRunInfos, d as loadPersistedRunSnapshots, f as nextShortIdFromSnapshots, g as recomputeEvalStatusesInRuns, h as persistRunState, i as resolveEvalTags, l as loadPersistedCaseDetail, m as deleteTemporaryRuns, n as getTargetEvalKeys, o as generateRunId, p as persistCaseDetail, s as getLastRunStatuses, u as loadPersistedRunSnapshot, v as runTouchesEval, w as validateCharts, x as deriveEvalFreshness, y as buildManualInputDescriptor } from "./runOrchestration-C7qQISz2.mjs";
1
+ import { Dt as caseRowSchema, Pt as getEvalRegistry, St as resolveLlmCallsConfig, Tt as getCaseRowCaseKey, _ as createFsCacheStore, bt as runSummarySchema, c as resolveArtifactPath, ct as applyDerivedCallAttributes, dt as getEvalDisplayStatus, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, m as buildDeclaredColumnDefs, o as stripTerminalControlCodes, p as loadConfig, s as resolveTracePresentation, ut as getEvalTitle, v as getCacheRetentionOptions, wt as buildEvalKey, xt as resolveApiCallsConfig } from "./runExecution-Bu9yfdUS.mjs";
2
+ import { C as parseEvalDiscovery, S as loadIsolatedEvalRegistry, _ as recomputePersistedCaseStatus, a as validateTagsFilters, b as parseManualInputValues, c as getLatestRunInfos, d as loadPersistedRunSnapshots, f as nextShortIdFromSnapshots, g as recomputeEvalStatusesInRuns, h as persistRunState, i as resolveEvalTags, l as loadPersistedCaseDetail, m as deleteTemporaryRuns, n as getTargetEvalKeys, o as generateRunId, p as persistCaseDetail, s as getLastRunStatuses, u as loadPersistedRunSnapshot, v as runTouchesEval, w as validateCharts, x as deriveEvalFreshness, y as buildManualInputDescriptor } from "./runOrchestration-mpgZmEZ6.mjs";
3
3
  import { parseEnv } from "node:util";
4
4
  import { resultify } from "t-result";
5
5
  import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
@@ -973,7 +973,6 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
973
973
  let discoveryRefreshTimer;
974
974
  let runHistoryRefreshTimer;
975
975
  let cachePruneIdleTimer;
976
- let registryLoadCounter = 0;
977
976
  const configReload = createConfigReloadController({
978
977
  getActiveRunCount,
979
978
  closeRunnerWatchers: closeWatchers,
@@ -1019,9 +1018,8 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
1019
1018
  if (caseRow === void 0) return void 0;
1020
1019
  return hydrateCaseDetailForRow(run, caseRow);
1021
1020
  }
1022
- function nextRegistryLoadIsolationKey(prefix, filePath) {
1023
- registryLoadCounter++;
1024
- return `${prefix}:${String(registryLoadCounter)}:${filePath}`;
1021
+ function getDiscoveryModuleIsolationKey(filePath) {
1022
+ return `discovery:${filePath}`;
1025
1023
  }
1026
1024
  const runner = {
1027
1025
  async init() {
@@ -1042,24 +1040,11 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
1042
1040
  async recomputeStatusesForEval(evalKey) {
1043
1041
  const evalMeta = resolveEvalMeta(evalKey);
1044
1042
  if (!evalMeta) return { updatedRuns: 0 };
1045
- const entry = (await loadIsolatedEvalRegistry({
1046
- evalFilePath: evalMeta.sourceFilePath,
1047
- sourceFingerprint: evalMeta.sourceFingerprint ?? void 0,
1048
- moduleIsolation: {
1049
- key: nextRegistryLoadIsolationKey("recompute-status", evalMeta.sourceFilePath),
1050
- workspaceRoot
1051
- },
1052
- runtimeScope: "env"
1053
- })).get(evalMeta.id);
1054
- if (!entry) return { updatedRuns: 0 };
1055
1043
  const scoreThresholds = /* @__PURE__ */ new Map();
1056
- entry.use((evalDef) => {
1057
- for (const [key, def] of Object.entries(evalDef.scores ?? {})) {
1058
- const threshold = normalizeScoreDef(def).passThreshold;
1059
- if (threshold !== void 0) scoreThresholds.set(key, threshold);
1060
- }
1061
- for (const [key, def] of Object.entries(evalDef.manualScores ?? {})) if (def.passThreshold !== void 0) scoreThresholds.set(key, def.passThreshold);
1062
- });
1044
+ for (const columnDef of evalMeta.columnDefs) {
1045
+ if (columnDef.isScore !== true || columnDef.passThreshold === void 0) continue;
1046
+ scoreThresholds.set(columnDef.key, columnDef.passThreshold);
1047
+ }
1063
1048
  const updatedRuns = await recomputeEvalStatusesInRuns({
1064
1049
  runs: runs.values(),
1065
1050
  evalKey: evalMeta.key,
@@ -1264,7 +1249,7 @@ function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
1264
1249
  evalFilePath: filePath,
1265
1250
  sourceFingerprint,
1266
1251
  moduleIsolation: {
1267
- key: nextRegistryLoadIsolationKey("discovery", filePath),
1252
+ key: getDiscoveryModuleIsolationKey(filePath),
1268
1253
  workspaceRoot
1269
1254
  },
1270
1255
  runtimeScope: "env"
@@ -2258,8 +2243,8 @@ async function commandApp(args) {
2258
2243
  const { serve } = await import("@hono/node-server");
2259
2244
  const bundledWebDist = resolve(currentDir, "apps/web/dist");
2260
2245
  if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
2261
- const appModule = await import("./app-l3ynaNsb.mjs");
2262
- const runnerModule = await import("./runner-C9xNJHt3.mjs");
2246
+ const appModule = await import("./app-gg10KvzS.mjs");
2247
+ const runnerModule = await import("./runner-C4Y0lWb1.mjs");
2263
2248
  if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
2264
2249
  if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
2265
2250
  await runnerModule.initRunner({ loadEnv: args.loadEnv });
package/dist/index.d.mts CHANGED
@@ -2214,8 +2214,8 @@ declare const traceSpanSchema$1: z.ZodObject<{
2214
2214
  status: z.ZodEnum<{
2215
2215
  error: "error";
2216
2216
  running: "running";
2217
- cancelled: "cancelled";
2218
2217
  ok: "ok";
2218
+ cancelled: "cancelled";
2219
2219
  }>;
2220
2220
  attributes: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
2221
2221
  error: z.ZodOptional<z.ZodObject<{
@@ -2466,10 +2466,10 @@ declare const evalSummarySchema$1: z.ZodObject<{
2466
2466
  caseIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
2467
2467
  lastRunStatus: z.ZodNullable<z.ZodEnum<{
2468
2468
  error: "error";
2469
- pass: "pass";
2470
- fail: "fail";
2471
2469
  running: "running";
2472
2470
  cancelled: "cancelled";
2471
+ pass: "pass";
2472
+ fail: "fail";
2473
2473
  unscored: "unscored";
2474
2474
  }>>;
2475
2475
  stats: z.ZodOptional<z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
@@ -2558,8 +2558,8 @@ declare const evalSummarySchema$1: z.ZodObject<{
2558
2558
  }>;
2559
2559
  label: z.ZodOptional<z.ZodString>;
2560
2560
  color: z.ZodOptional<z.ZodEnum<{
2561
- success: "success";
2562
2561
  error: "error";
2562
+ success: "success";
2563
2563
  accent: "accent";
2564
2564
  accentDim: "accentDim";
2565
2565
  warning: "warning";
@@ -2582,8 +2582,8 @@ declare const evalSummarySchema$1: z.ZodObject<{
2582
2582
  }>;
2583
2583
  label: z.ZodOptional<z.ZodString>;
2584
2584
  color: z.ZodOptional<z.ZodEnum<{
2585
- success: "success";
2586
2585
  error: "error";
2586
+ success: "success";
2587
2587
  accent: "accent";
2588
2588
  accentDim: "accentDim";
2589
2589
  warning: "warning";
@@ -2715,10 +2715,10 @@ declare const caseRowSchema$1: z.ZodObject<{
2715
2715
  tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
2716
2716
  status: z.ZodEnum<{
2717
2717
  error: "error";
2718
- pass: "pass";
2719
- fail: "fail";
2720
2718
  running: "running";
2721
2719
  cancelled: "cancelled";
2720
+ pass: "pass";
2721
+ fail: "fail";
2722
2722
  pending: "pending";
2723
2723
  }>;
2724
2724
  durationMs: z.ZodNullable<z.ZodNumber>;
@@ -2857,8 +2857,8 @@ declare const scoreTraceSchema: z.ZodObject<{
2857
2857
  status: z.ZodEnum<{
2858
2858
  error: "error";
2859
2859
  running: "running";
2860
- cancelled: "cancelled";
2861
2860
  ok: "ok";
2861
+ cancelled: "cancelled";
2862
2862
  }>;
2863
2863
  attributes: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
2864
2864
  error: z.ZodOptional<z.ZodObject<{
@@ -2920,10 +2920,10 @@ declare const scoreTraceSchema: z.ZodObject<{
2920
2920
  namespace: z.ZodString;
2921
2921
  key: z.ZodString;
2922
2922
  status: z.ZodEnum<{
2923
+ bypass: "bypass";
2924
+ refresh: "refresh";
2923
2925
  hit: "hit";
2924
2926
  miss: "miss";
2925
- refresh: "refresh";
2926
- bypass: "bypass";
2927
2927
  }>;
2928
2928
  read: z.ZodOptional<z.ZodBoolean>;
2929
2929
  stored: z.ZodOptional<z.ZodBoolean>;
@@ -2942,10 +2942,10 @@ declare const caseDetailSchema$1: z.ZodObject<{
2942
2942
  tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
2943
2943
  status: z.ZodEnum<{
2944
2944
  error: "error";
2945
- pass: "pass";
2946
- fail: "fail";
2947
2945
  running: "running";
2948
2946
  cancelled: "cancelled";
2947
+ pass: "pass";
2948
+ fail: "fail";
2949
2949
  pending: "pending";
2950
2950
  }>;
2951
2951
  input: z.ZodUnknown;
@@ -2960,8 +2960,8 @@ declare const caseDetailSchema$1: z.ZodObject<{
2960
2960
  status: z.ZodEnum<{
2961
2961
  error: "error";
2962
2962
  running: "running";
2963
- cancelled: "cancelled";
2964
2963
  ok: "ok";
2964
+ cancelled: "cancelled";
2965
2965
  }>;
2966
2966
  attributes: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
2967
2967
  error: z.ZodOptional<z.ZodObject<{
@@ -3029,8 +3029,8 @@ declare const caseDetailSchema$1: z.ZodObject<{
3029
3029
  status: z.ZodEnum<{
3030
3030
  error: "error";
3031
3031
  running: "running";
3032
- cancelled: "cancelled";
3033
3032
  ok: "ok";
3033
+ cancelled: "cancelled";
3034
3034
  }>;
3035
3035
  attributes: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
3036
3036
  error: z.ZodOptional<z.ZodObject<{
@@ -3092,10 +3092,10 @@ declare const caseDetailSchema$1: z.ZodObject<{
3092
3092
  namespace: z.ZodString;
3093
3093
  key: z.ZodString;
3094
3094
  status: z.ZodEnum<{
3095
+ bypass: "bypass";
3096
+ refresh: "refresh";
3095
3097
  hit: "hit";
3096
3098
  miss: "miss";
3097
- refresh: "refresh";
3098
- bypass: "bypass";
3099
3099
  }>;
3100
3100
  read: z.ZodOptional<z.ZodBoolean>;
3101
3101
  stored: z.ZodOptional<z.ZodBoolean>;
@@ -3213,10 +3213,10 @@ declare const caseDetailSchema$1: z.ZodObject<{
3213
3213
  namespace: z.ZodString;
3214
3214
  key: z.ZodString;
3215
3215
  status: z.ZodEnum<{
3216
+ bypass: "bypass";
3217
+ refresh: "refresh";
3216
3218
  hit: "hit";
3217
3219
  miss: "miss";
3218
- refresh: "refresh";
3219
- bypass: "bypass";
3220
3220
  }>;
3221
3221
  read: z.ZodOptional<z.ZodBoolean>;
3222
3222
  stored: z.ZodOptional<z.ZodBoolean>;
@@ -3283,8 +3283,8 @@ type EvalChartAggregate = z.infer<typeof evalChartAggregateSchema>;
3283
3283
  * not emit raw hex so authored evals stay decoupled from the web theme.
3284
3284
  */
3285
3285
  declare const evalChartColorSchema: z.ZodEnum<{
3286
- success: "success";
3287
3286
  error: "error";
3287
+ success: "success";
3288
3288
  accent: "accent";
3289
3289
  accentDim: "accentDim";
3290
3290
  warning: "warning";
@@ -3312,8 +3312,8 @@ declare const evalChartMetricSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
3312
3312
  }>;
3313
3313
  label: z.ZodOptional<z.ZodString>;
3314
3314
  color: z.ZodOptional<z.ZodEnum<{
3315
- success: "success";
3316
3315
  error: "error";
3316
+ success: "success";
3317
3317
  accent: "accent";
3318
3318
  accentDim: "accentDim";
3319
3319
  warning: "warning";
@@ -3336,8 +3336,8 @@ declare const evalChartMetricSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
3336
3336
  }>;
3337
3337
  label: z.ZodOptional<z.ZodString>;
3338
3338
  color: z.ZodOptional<z.ZodEnum<{
3339
- success: "success";
3340
3339
  error: "error";
3340
+ success: "success";
3341
3341
  accent: "accent";
3342
3342
  accentDim: "accentDim";
3343
3343
  warning: "warning";
@@ -3395,8 +3395,8 @@ declare const evalChartConfigSchema: z.ZodObject<{
3395
3395
  }>;
3396
3396
  label: z.ZodOptional<z.ZodString>;
3397
3397
  color: z.ZodOptional<z.ZodEnum<{
3398
- success: "success";
3399
3398
  error: "error";
3399
+ success: "success";
3400
3400
  accent: "accent";
3401
3401
  accentDim: "accentDim";
3402
3402
  warning: "warning";
@@ -3419,8 +3419,8 @@ declare const evalChartConfigSchema: z.ZodObject<{
3419
3419
  }>;
3420
3420
  label: z.ZodOptional<z.ZodString>;
3421
3421
  color: z.ZodOptional<z.ZodEnum<{
3422
- success: "success";
3423
3422
  error: "error";
3423
+ success: "success";
3424
3424
  accent: "accent";
3425
3425
  accentDim: "accentDim";
3426
3426
  warning: "warning";
@@ -3485,8 +3485,8 @@ declare const evalChartsConfigSchema: z.ZodArray<z.ZodObject<{
3485
3485
  }>;
3486
3486
  label: z.ZodOptional<z.ZodString>;
3487
3487
  color: z.ZodOptional<z.ZodEnum<{
3488
- success: "success";
3489
3488
  error: "error";
3489
+ success: "success";
3490
3490
  accent: "accent";
3491
3491
  accentDim: "accentDim";
3492
3492
  warning: "warning";
@@ -3509,8 +3509,8 @@ declare const evalChartsConfigSchema: z.ZodArray<z.ZodObject<{
3509
3509
  }>;
3510
3510
  label: z.ZodOptional<z.ZodString>;
3511
3511
  color: z.ZodOptional<z.ZodEnum<{
3512
- success: "success";
3513
3512
  error: "error";
3513
+ success: "success";
3514
3514
  accent: "accent";
3515
3515
  accentDim: "accentDim";
3516
3516
  warning: "warning";
@@ -3589,9 +3589,9 @@ declare const runManifestSchema$1: z.ZodObject<{
3589
3589
  median: "median";
3590
3590
  }>>>;
3591
3591
  cacheMode: z.ZodOptional<z.ZodEnum<{
3592
- refresh: "refresh";
3593
- bypass: "bypass";
3594
3592
  use: "use";
3593
+ bypass: "bypass";
3594
+ refresh: "refresh";
3595
3595
  }>>;
3596
3596
  }, z.core.$strip>;
3597
3597
  /** Persisted lifecycle metadata for a single eval run. */
@@ -4567,9 +4567,9 @@ declare function extractApiCalls(spans: EvalTraceSpan$1[], config: ResolvedApiCa
4567
4567
  * - `refresh`: never read, always write (forces re-execution and overwrites).
4568
4568
  */
4569
4569
  declare const cacheModeSchema: z.ZodEnum<{
4570
- refresh: "refresh";
4571
- bypass: "bypass";
4572
4570
  use: "use";
4571
+ bypass: "bypass";
4572
+ refresh: "refresh";
4573
4573
  }>;
4574
4574
  /** Mode controlling how cached spans behave during a run. */
4575
4575
  type CacheMode = z.infer<typeof cacheModeSchema>;
@@ -4583,17 +4583,17 @@ declare const spanCacheOptionsSchema: z.ZodObject<{
4583
4583
  type SpanCacheOptions = z.infer<typeof spanCacheOptionsSchema>;
4584
4584
  /** Category of operation stored in the eval cache. */
4585
4585
  declare const cacheOperationTypeSchema: z.ZodEnum<{
4586
- value: "value";
4587
4586
  span: "span";
4587
+ value: "value";
4588
4588
  }>;
4589
4589
  /** Category of operation stored in the eval cache. */
4590
4590
  type CacheOperationType = z.infer<typeof cacheOperationTypeSchema>;
4591
4591
  /** Status of a cache lookup recorded on a span or case scope. */
4592
4592
  declare const cacheStatusSchema: z.ZodEnum<{
4593
+ bypass: "bypass";
4594
+ refresh: "refresh";
4593
4595
  hit: "hit";
4594
4596
  miss: "miss";
4595
- refresh: "refresh";
4596
- bypass: "bypass";
4597
4597
  }>;
4598
4598
  /** Status of a cache lookup recorded on a span or case scope. */
4599
4599
  type CacheStatus = z.infer<typeof cacheStatusSchema>;
@@ -4610,10 +4610,10 @@ declare const traceCacheRefSchema: z.ZodObject<{
4610
4610
  namespace: z.ZodString;
4611
4611
  key: z.ZodString;
4612
4612
  status: z.ZodEnum<{
4613
+ bypass: "bypass";
4614
+ refresh: "refresh";
4613
4615
  hit: "hit";
4614
4616
  miss: "miss";
4615
- refresh: "refresh";
4616
- bypass: "bypass";
4617
4617
  }>;
4618
4618
  read: z.ZodOptional<z.ZodBoolean>;
4619
4619
  stored: z.ZodOptional<z.ZodBoolean>;
@@ -4721,8 +4721,8 @@ declare const cacheRecordingSchema: z.ZodObject<{
4721
4721
  finalStatus: z.ZodOptional<z.ZodEnum<{
4722
4722
  error: "error";
4723
4723
  running: "running";
4724
- cancelled: "cancelled";
4725
4724
  ok: "ok";
4725
+ cancelled: "cancelled";
4726
4726
  }>>;
4727
4727
  finalError: z.ZodOptional<z.ZodObject<{
4728
4728
  name: z.ZodOptional<z.ZodString>;
@@ -4809,8 +4809,8 @@ declare const cacheEntrySchema: z.ZodObject<{
4809
4809
  key: z.ZodString;
4810
4810
  namespace: z.ZodString;
4811
4811
  operationType: z.ZodOptional<z.ZodEnum<{
4812
- value: "value";
4813
4812
  span: "span";
4813
+ value: "value";
4814
4814
  }>>;
4815
4815
  operationName: z.ZodOptional<z.ZodString>;
4816
4816
  spanName: z.ZodOptional<z.ZodString>;
@@ -4822,8 +4822,8 @@ declare const cacheEntrySchema: z.ZodObject<{
4822
4822
  finalStatus: z.ZodOptional<z.ZodEnum<{
4823
4823
  error: "error";
4824
4824
  running: "running";
4825
- cancelled: "cancelled";
4826
4825
  ok: "ok";
4826
+ cancelled: "cancelled";
4827
4827
  }>>;
4828
4828
  finalError: z.ZodOptional<z.ZodObject<{
4829
4829
  name: z.ZodOptional<z.ZodString>;
@@ -4916,8 +4916,8 @@ declare const cacheDebugKeyEntrySchema: z.ZodObject<{
4916
4916
  key: z.ZodString;
4917
4917
  namespace: z.ZodString;
4918
4918
  operationType: z.ZodEnum<{
4919
- value: "value";
4920
4919
  span: "span";
4920
+ value: "value";
4921
4921
  }>;
4922
4922
  operationName: z.ZodString;
4923
4923
  storedAt: z.ZodString;
@@ -4927,8 +4927,8 @@ declare const cacheDebugKeyEntrySchema: z.ZodObject<{
4927
4927
  key: z.ZodString;
4928
4928
  namespace: z.ZodString;
4929
4929
  operationType: z.ZodOptional<z.ZodEnum<{
4930
- value: "value";
4931
4930
  span: "span";
4931
+ value: "value";
4932
4932
  }>>;
4933
4933
  operationName: z.ZodOptional<z.ZodString>;
4934
4934
  spanName: z.ZodOptional<z.ZodString>;
@@ -4940,8 +4940,8 @@ declare const cacheDebugKeyEntrySchema: z.ZodObject<{
4940
4940
  finalStatus: z.ZodOptional<z.ZodEnum<{
4941
4941
  error: "error";
4942
4942
  running: "running";
4943
- cancelled: "cancelled";
4944
4943
  ok: "ok";
4944
+ cancelled: "cancelled";
4945
4945
  }>>;
4946
4946
  finalError: z.ZodOptional<z.ZodObject<{
4947
4947
  name: z.ZodOptional<z.ZodString>;
@@ -5034,8 +5034,8 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
5034
5034
  key: z.ZodString;
5035
5035
  namespace: z.ZodString;
5036
5036
  operationType: z.ZodOptional<z.ZodEnum<{
5037
- value: "value";
5038
5037
  span: "span";
5038
+ value: "value";
5039
5039
  }>>;
5040
5040
  operationName: z.ZodOptional<z.ZodString>;
5041
5041
  spanName: z.ZodOptional<z.ZodString>;
@@ -5047,8 +5047,8 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
5047
5047
  finalStatus: z.ZodOptional<z.ZodEnum<{
5048
5048
  error: "error";
5049
5049
  running: "running";
5050
- cancelled: "cancelled";
5051
5050
  ok: "ok";
5051
+ cancelled: "cancelled";
5052
5052
  }>>;
5053
5053
  finalError: z.ZodOptional<z.ZodObject<{
5054
5054
  name: z.ZodOptional<z.ZodString>;
@@ -5132,8 +5132,8 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
5132
5132
  key: z.ZodString;
5133
5133
  namespace: z.ZodString;
5134
5134
  operationType: z.ZodEnum<{
5135
- value: "value";
5136
5135
  span: "span";
5136
+ value: "value";
5137
5137
  }>;
5138
5138
  operationName: z.ZodString;
5139
5139
  storedAt: z.ZodString;
@@ -5143,8 +5143,8 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
5143
5143
  key: z.ZodString;
5144
5144
  namespace: z.ZodString;
5145
5145
  operationType: z.ZodOptional<z.ZodEnum<{
5146
- value: "value";
5147
5146
  span: "span";
5147
+ value: "value";
5148
5148
  }>>;
5149
5149
  operationName: z.ZodOptional<z.ZodString>;
5150
5150
  spanName: z.ZodOptional<z.ZodString>;
@@ -5156,8 +5156,8 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
5156
5156
  finalStatus: z.ZodOptional<z.ZodEnum<{
5157
5157
  error: "error";
5158
5158
  running: "running";
5159
- cancelled: "cancelled";
5160
5159
  ok: "ok";
5160
+ cancelled: "cancelled";
5161
5161
  }>>;
5162
5162
  finalError: z.ZodOptional<z.ZodObject<{
5163
5163
  name: z.ZodOptional<z.ZodString>;
@@ -5250,8 +5250,8 @@ declare const cacheFileSchema: z.ZodObject<{
5250
5250
  key: z.ZodString;
5251
5251
  namespace: z.ZodString;
5252
5252
  operationType: z.ZodOptional<z.ZodEnum<{
5253
- value: "value";
5254
5253
  span: "span";
5254
+ value: "value";
5255
5255
  }>>;
5256
5256
  operationName: z.ZodOptional<z.ZodString>;
5257
5257
  spanName: z.ZodOptional<z.ZodString>;
@@ -5263,8 +5263,8 @@ declare const cacheFileSchema: z.ZodObject<{
5263
5263
  finalStatus: z.ZodOptional<z.ZodEnum<{
5264
5264
  error: "error";
5265
5265
  running: "running";
5266
- cancelled: "cancelled";
5267
5266
  ok: "ok";
5267
+ cancelled: "cancelled";
5268
5268
  }>>;
5269
5269
  finalError: z.ZodOptional<z.ZodObject<{
5270
5270
  name: z.ZodOptional<z.ZodString>;
@@ -5356,8 +5356,8 @@ declare const cacheDebugKeyFileSchema: z.ZodObject<{
5356
5356
  key: z.ZodString;
5357
5357
  namespace: z.ZodString;
5358
5358
  operationType: z.ZodEnum<{
5359
- value: "value";
5360
5359
  span: "span";
5360
+ value: "value";
5361
5361
  }>;
5362
5362
  operationName: z.ZodString;
5363
5363
  storedAt: z.ZodString;
@@ -5367,8 +5367,8 @@ declare const cacheDebugKeyFileSchema: z.ZodObject<{
5367
5367
  key: z.ZodString;
5368
5368
  namespace: z.ZodString;
5369
5369
  operationType: z.ZodOptional<z.ZodEnum<{
5370
- value: "value";
5371
5370
  span: "span";
5371
+ value: "value";
5372
5372
  }>>;
5373
5373
  operationName: z.ZodOptional<z.ZodString>;
5374
5374
  spanName: z.ZodOptional<z.ZodString>;
@@ -5380,8 +5380,8 @@ declare const cacheDebugKeyFileSchema: z.ZodObject<{
5380
5380
  finalStatus: z.ZodOptional<z.ZodEnum<{
5381
5381
  error: "error";
5382
5382
  running: "running";
5383
- cancelled: "cancelled";
5384
5383
  ok: "ok";
5384
+ cancelled: "cancelled";
5385
5385
  }>>;
5386
5386
  finalError: z.ZodOptional<z.ZodObject<{
5387
5387
  name: z.ZodOptional<z.ZodString>;
@@ -5579,9 +5579,9 @@ declare const createRunRequestSchema$1: z.ZodObject<{
5579
5579
  temporary: z.ZodOptional<z.ZodBoolean>;
5580
5580
  cache: z.ZodOptional<z.ZodObject<{
5581
5581
  mode: z.ZodDefault<z.ZodEnum<{
5582
- refresh: "refresh";
5583
- bypass: "bypass";
5584
5582
  use: "use";
5583
+ bypass: "bypass";
5584
+ refresh: "refresh";
5585
5585
  }>>;
5586
5586
  }, z.core.$strip>>;
5587
5587
  manualInputs: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as setScopeCacheContext, A as repoFile, B as evalTime, C as evalTracer, D as deserializeCacheValue, E as deserializeCacheRecording, F as EvalRuntimeUsageError, Ft as getEvalRegistry, H as getEvalCaseInput, I as appendToEvalOutput, J as runInEvalRuntimeScope, K as mergeEvalOutput, M as readManualInputFile, N as evalExpect, O as serializeCacheRecording, P as EvalAssertionError, Q as setEvalOutput, R as evalAssert, S as evalSpan, T as hashCacheKeySync, U as incrementEvalOutput, V as getCurrentScope, W as isInEvalScope, X as runInExistingEvalScope, Y as runInEvalScope, at as extractApiCalls, b as buildTraceTree, ct as simulateTokenAllocation, et as startEvalBackgroundJob, it as extractCacheHits, j as manualInputFileValueSchema, k as serializeCacheValue, ot as extractLlmCalls, q as nextEvalId, rt as extractCacheEntries, st as simulateLlmCallCost, ut as getNestedAttribute, w as hashCacheKey, x as captureEvalSpanError, z as evalLog } from "./runExecution-Bq0Y3y_1.mjs";
2
- import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-BSVUCUxr.mjs";
3
- import { n as matchesEvalTags, t as defineEval } from "./src-D5vGo2iv.mjs";
1
+ import { $ as startEvalBackgroundJob, A as manualInputFileValueSchema, B as getCurrentScope, C as hashCacheKey, D as serializeCacheRecording, E as deserializeCacheValue, F as appendToEvalOutput, G as mergeEvalOutput, H as incrementEvalOutput, J as runInEvalScope, K as nextEvalId, L as evalAssert, M as evalExpect, N as EvalAssertionError, O as serializeCacheValue, P as EvalRuntimeUsageError, Pt as getEvalRegistry, Q as setScopeCacheContext, R as evalLog, S as evalTracer, T as deserializeCacheRecording, U as isInEvalScope, V as getEvalCaseInput, Y as runInExistingEvalScope, Z as setEvalOutput, at as extractLlmCalls, b as captureEvalSpanError, it as extractApiCalls, j as readManualInputFile, k as repoFile, lt as getNestedAttribute, nt as extractCacheEntries, ot as simulateLlmCallCost, q as runInEvalRuntimeScope, rt as extractCacheHits, st as simulateTokenAllocation, w as hashCacheKeySync, x as evalSpan, y as buildTraceTree, z as evalTime } from "./runExecution-Bu9yfdUS.mjs";
2
+ import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-OLZIjQpx.mjs";
3
+ import { n as matchesEvalTags, t as defineEval } from "./src-Cy3OxoZW.mjs";
4
4
  export { EvalAssertionError, EvalRuntimeUsageError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob };
package/dist/runChild.mjs CHANGED
@@ -1,5 +1,5 @@
1
- import { At as evalStatsConfigSchema, L as configureEvalRunLogs, Mt as evalChartsConfigSchema, Nt as columnDefSchema, Tt as buildEvalKey, bt as runManifestSchema, jt as manualInputDescriptorSchema, kt as evalStatAggregateSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, tt as createRunRequestSchema, v as createFsCacheStore, xt as runSummarySchema, y as getCacheRetentionOptions } from "./runExecution-Bq0Y3y_1.mjs";
2
- import { C as parseEvalDiscovery, h as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-C7qQISz2.mjs";
1
+ import { At as manualInputDescriptorSchema, I as configureEvalRunLogs, Mt as columnDefSchema, Ot as evalStatAggregateSchema, _ as createFsCacheStore, bt as runSummarySchema, et as createRunRequestSchema, jt as evalChartsConfigSchema, kt as evalStatsConfigSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, v as getCacheRetentionOptions, wt as buildEvalKey, yt as runManifestSchema } from "./runExecution-Bu9yfdUS.mjs";
2
+ import { C as parseEvalDiscovery, h as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-mpgZmEZ6.mjs";
3
3
  import { z } from "zod";
4
4
  import { readFile } from "node:fs/promises";
5
5
  import { relative } from "node:path";
@@ -7239,4 +7239,4 @@ function recordAssertionFailure(scope, failure) {
7239
7239
  });
7240
7240
  }
7241
7241
  //#endregion
7242
- export { setScopeCacheContext as $, repoFile as A, evalStatsConfigSchema as At, evalTime as B, evalTracer as C, resolveLlmCallsConfig as Ct, deserializeCacheValue as D, caseDetailSchema as Dt, deserializeCacheRecording as E, getCaseRowCaseKey as Et, EvalRuntimeUsageError as F, getEvalRegistry as Ft, matchesEvalTags as G, getEvalCaseInput as H, appendToEvalOutput as I, runWithEvalRegistry as It, runInEvalRuntimeScope as J, mergeEvalOutput as K, configureEvalRunLogs as L, readManualInputFile as M, evalChartsConfigSchema as Mt, evalExpect as N, columnDefSchema as Nt, serializeCacheRecording as O, caseRowSchema as Ot, EvalAssertionError as P, defineEval as Pt, setEvalOutput as Q, evalAssert as R, evalSpan as S, resolveApiCallsConfig as St, hashCacheKeySync as T, buildEvalKey as Tt, incrementEvalOutput as U, getCurrentScope as V, isInEvalScope as W, runInExistingEvalScope as X, runInEvalScope as Y, runWithEvalClock as Z, createBufferedCacheStore as _, matchesTagsFilter as _t, isCaseChildParentMessage as a, extractApiCalls as at, buildTraceTree as b, runManifestSchema as bt, resolveArtifactPath as c, simulateTokenAllocation as ct, loadEvalModule as d, getEvalTitle as dt, startEvalBackgroundJob as et, resolveEvalDefaultConfig as f, getEvalDisplayStatus as ft, commitPendingCacheWrites as g, dedupeEvalTags as gt, normalizeScoreDef as h, deriveStatusFromChildStatuses as ht, isCaseChildMessage as i, extractCacheHits as it, manualInputFileValueSchema as j, manualInputDescriptorSchema as jt, serializeCacheValue as k, evalStatAggregateSchema as kt, registerAgentEvalsPackageResolutionHooks as l, applyDerivedCallAttributes as lt, buildDeclaredColumnDefs as m, deriveStatusFromCaseRows as mt, resolveRunnableEvalCases as n, updateManualScoreRequestSchema as nt, stripTerminalControlCodes as o, extractLlmCalls as ot, loadConfig as p, deriveScopedSummaryFromCases as pt, nextEvalId as q, runCase as r, extractCacheEntries as rt, resolveTracePresentation as s, simulateLlmCallCost as st, filterEvalCases as t, createRunRequestSchema as tt, runWithModuleIsolation as u, getNestedAttribute as ut, createFsCacheStore as v, validateEvalTagName as vt, hashCacheKey as w, buildCaseKey as wt, captureEvalSpanError as x, runSummarySchema as xt, getCacheRetentionOptions as y, validateTagsFilterExpression as yt, evalLog as z };
7242
+ export { startEvalBackgroundJob as $, manualInputFileValueSchema as A, manualInputDescriptorSchema as At, getCurrentScope as B, hashCacheKey as C, buildCaseKey as Ct, serializeCacheRecording as D, caseRowSchema as Dt, deserializeCacheValue as E, caseDetailSchema as Et, appendToEvalOutput as F, runWithEvalRegistry as Ft, mergeEvalOutput as G, incrementEvalOutput as H, configureEvalRunLogs as I, runInEvalScope as J, nextEvalId as K, evalAssert as L, evalExpect as M, columnDefSchema as Mt, EvalAssertionError as N, defineEval as Nt, serializeCacheValue as O, evalStatAggregateSchema as Ot, EvalRuntimeUsageError as P, getEvalRegistry as Pt, setScopeCacheContext as Q, evalLog as R, evalTracer as S, resolveLlmCallsConfig as St, deserializeCacheRecording as T, getCaseRowCaseKey as Tt, isInEvalScope as U, getEvalCaseInput as V, matchesEvalTags as W, runWithEvalClock as X, runInExistingEvalScope as Y, setEvalOutput as Z, createFsCacheStore as _, validateEvalTagName as _t, isCaseChildParentMessage as a, extractLlmCalls as at, captureEvalSpanError as b, runSummarySchema as bt, resolveArtifactPath as c, applyDerivedCallAttributes as ct, loadEvalModule as d, getEvalDisplayStatus as dt, createRunRequestSchema as et, resolveEvalDefaultConfig as f, deriveScopedSummaryFromCases as ft, createBufferedCacheStore as g, matchesTagsFilter as gt, commitPendingCacheWrites as h, dedupeEvalTags as ht, isCaseChildMessage as i, extractApiCalls as it, readManualInputFile as j, evalChartsConfigSchema as jt, repoFile as k, evalStatsConfigSchema as kt, registerAgentEvalsPackageResolutionHooks as l, getNestedAttribute as lt, buildDeclaredColumnDefs as m, deriveStatusFromChildStatuses as mt, resolveRunnableEvalCases as n, extractCacheEntries as nt, stripTerminalControlCodes as o, simulateLlmCallCost as ot, loadConfig as p, deriveStatusFromCaseRows as pt, runInEvalRuntimeScope as q, runCase as r, extractCacheHits as rt, resolveTracePresentation as s, simulateTokenAllocation as st, filterEvalCases as t, updateManualScoreRequestSchema as tt, runWithModuleIsolation as u, getEvalTitle as ut, getCacheRetentionOptions as v, validateTagsFilterExpression as vt, hashCacheKeySync as w, buildEvalKey as wt, evalSpan as x, resolveApiCallsConfig as xt, buildTraceTree as y, runManifestSchema as yt, evalTime as z };
@@ -1,4 +1,4 @@
1
- import { Dt as caseDetailSchema, Et as getCaseRowCaseKey, It as runWithEvalRegistry, J as runInEvalRuntimeScope, Ot as caseRowSchema, Z as runWithEvalClock, _t as matchesTagsFilter, bt as runManifestSchema, d as loadEvalModule, f as resolveEvalDefaultConfig, g as commitPendingCacheWrites, gt as dedupeEvalTags, ht as deriveStatusFromChildStatuses, i as isCaseChildMessage, m as buildDeclaredColumnDefs, mt as deriveStatusFromCaseRows, n as resolveRunnableEvalCases, o as stripTerminalControlCodes, pt as deriveScopedSummaryFromCases, t as filterEvalCases, u as runWithModuleIsolation, vt as validateEvalTagName, wt as buildCaseKey, xt as runSummarySchema, yt as validateTagsFilterExpression } from "./runExecution-Bq0Y3y_1.mjs";
1
+ import { Ct as buildCaseKey, Dt as caseRowSchema, Et as caseDetailSchema, Ft as runWithEvalRegistry, Tt as getCaseRowCaseKey, X as runWithEvalClock, _t as validateEvalTagName, bt as runSummarySchema, d as loadEvalModule, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, h as commitPendingCacheWrites, ht as dedupeEvalTags, i as isCaseChildMessage, m as buildDeclaredColumnDefs, mt as deriveStatusFromChildStatuses, n as resolveRunnableEvalCases, o as stripTerminalControlCodes, pt as deriveStatusFromCaseRows, q as runInEvalRuntimeScope, t as filterEvalCases, u as runWithModuleIsolation, vt as validateTagsFilterExpression, yt as runManifestSchema } from "./runExecution-Bu9yfdUS.mjs";
2
2
  import { Result, resultify } from "t-result";
3
3
  import { readFile, readdir, rm, writeFile } from "node:fs/promises";
4
4
  import { dirname, join } from "node:path";
@@ -1,2 +1,2 @@
1
- import { n as initRunner, t as getRunnerInstance } from "./runner-DmkSq-QG.mjs";
1
+ import { n as initRunner, t as getRunnerInstance } from "./runner-SxtKn-Xh.mjs";
2
2
  export { getRunnerInstance, initRunner };
@@ -1,5 +1,5 @@
1
- import { n as createRunner } from "./cli-BSVUCUxr.mjs";
2
- import "./src-D5vGo2iv.mjs";
1
+ import { n as createRunner } from "./cli-OLZIjQpx.mjs";
2
+ import "./src-Cy3OxoZW.mjs";
3
3
  //#region ../../apps/server/src/runner.ts
4
4
  let runnerInstance = null;
5
5
  function getRunnerInstance({ loadEnv = true } = {}) {
@@ -1,5 +1,5 @@
1
- import { G as matchesEvalTags$1, Pt as defineEval$1 } from "./runExecution-Bq0Y3y_1.mjs";
2
- import "./cli-BSVUCUxr.mjs";
1
+ import { Nt as defineEval$1, W as matchesEvalTags$1 } from "./runExecution-Bu9yfdUS.mjs";
2
+ import "./cli-OLZIjQpx.mjs";
3
3
  //#region src/index.ts
4
4
  /** Register an eval definition with typed tag support. */
5
5
  function defineEval(definition) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ls-stack/agent-eval",
3
- "version": "0.60.3",
3
+ "version": "0.60.4",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "agent-evals": "./dist/bin.mjs"
@@ -32,9 +32,9 @@
32
32
  "devDependencies": {
33
33
  "@types/node": "^24.7.2",
34
34
  "typescript": "^5.9.2",
35
- "@agent-evals/runner": "0.0.1",
36
35
  "@agent-evals/shared": "0.0.1",
37
- "@agent-evals/sdk": "0.0.1"
36
+ "@agent-evals/sdk": "0.0.1",
37
+ "@agent-evals/runner": "0.0.1"
38
38
  },
39
39
  "scripts": {
40
40
  "build": "pnpm --filter @agent-evals/web build && pnpm --filter @agent-evals/shared build && pnpm --filter @agent-evals/sdk build && pnpm --filter @agent-evals/runner build && tsdown --filter cli-js && tsdown --filter cli-types",