@ls-stack/agent-eval 0.58.3 → 0.58.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,7 +25,7 @@
25
25
  href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
26
26
  rel="stylesheet"
27
27
  />
28
- <script type="module" crossorigin src="/assets/index-PTikBbhf.js"></script>
28
+ <script type="module" crossorigin src="/assets/index-BXFsxHVc.js"></script>
29
29
  <link rel="stylesheet" crossorigin href="/assets/index-CHH7m5Cv.css">
30
30
  </head>
31
31
  <body>
package/dist/bin.mjs CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { t as runCli } from "./cli-SP4kEtYL.mjs";
2
+ import { t as runCli } from "./cli-Bf5RzM8O.mjs";
3
3
  import { spawn } from "node:child_process";
4
4
  //#region src/bin.ts
5
5
  const moduleMocksFlag = "--experimental-test-module-mocks";
@@ -1,4 +1,4 @@
1
- import { Ct as resolveLlmCallsConfig, It as runWithEvalRegistry, J as runInEvalRuntimeScope, L as configureEvalRunLogs, St as resolveApiCallsConfig, _ as createBufferedCacheStore, a as isCaseChildParentMessage, d as loadEvalModule, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, r as runCase, v as createFsCacheStore } from "./runExecution-CFw0MQFs.mjs";
1
+ import { Ct as resolveApiCallsConfig, Lt as runWithEvalRegistry, R as configureEvalRunLogs, Y as runInEvalRuntimeScope, _ as createBufferedCacheStore, a as isCaseChildParentMessage, d as loadEvalModule, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, r as runCase, v as createFsCacheStore, wt as resolveLlmCallsConfig, y as getCacheRetentionOptions } from "./runExecution-CLkC-4Z1.mjs";
2
2
  //#region ../runner/src/caseChild.ts
3
3
  let fatalErrorReported = false;
4
4
  let disconnectExpected = false;
@@ -52,11 +52,12 @@ async function executeCaseChild(context) {
52
52
  registerAgentEvalsPackageResolutionHooks();
53
53
  const config = await loadConfig();
54
54
  configureEvalRunLogs({ captureConsole: config.runLogs?.captureConsole !== false });
55
+ const cacheRetentionOptions = getCacheRetentionOptions(config.cache);
55
56
  const cacheStore = createFsCacheStore({
56
57
  workspaceRoot: context.workspaceRoot,
57
58
  dir: config.cache?.dir,
58
- maxEntriesPerNamespace: config.cache?.maxEntriesPerNamespace ?? config.cache?.maxEntriesPerEval,
59
- maxEntriesByNamespace: config.cache?.maxEntriesByNamespace,
59
+ maxEntriesPerNamespace: cacheRetentionOptions.maxEntriesPerNamespace,
60
+ maxEntriesByNamespace: cacheRetentionOptions.maxEntriesByNamespace,
60
61
  lastAccessedAtUpdateIntervalMs: config.cache?.lastAccessedAtUpdateIntervalMs
61
62
  });
62
63
  const bufferedCacheStore = context.cacheEnabled && context.cacheMode !== "bypass" ? createBufferedCacheStore(cacheStore) : null;
@@ -1,5 +1,5 @@
1
- import { Ct as resolveLlmCallsConfig, Et as getCaseRowCaseKey, Ft as getEvalRegistry, Ot as caseRowSchema, St as resolveApiCallsConfig, Tt as buildEvalKey, _t as matchesTagsFilter, c as resolveArtifactPath, dt as getEvalTitle, f as resolveEvalDefaultConfig, ft as getEvalDisplayStatus, h as normalizeScoreDef, lt as applyDerivedCallAttributes, m as buildDeclaredColumnDefs, o as stripTerminalControlCodes, p as loadConfig, pt as deriveScopedSummaryFromCases, s as resolveTracePresentation, v as createFsCacheStore, xt as runSummarySchema } from "./runExecution-CFw0MQFs.mjs";
2
- import { C as validateCharts, S as parseEvalDiscovery, _ as runTouchesEval, a as validateTagsFilters, b as deriveEvalFreshness, c as getLatestRunInfos, d as nextShortIdFromSnapshots, f as persistCaseDetail, g as recomputePersistedCaseStatus, h as recomputeEvalStatusesInRuns, i as resolveEvalTags, l as loadPersistedRunSnapshot, m as persistRunState, n as getTargetEvalKeys, o as generateRunId, p as deleteTemporaryRuns, s as getLastRunStatuses, u as loadPersistedRunSnapshots, v as buildManualInputDescriptor, x as loadIsolatedEvalRegistry, y as parseManualInputValues } from "./runOrchestration-CxjiQmof.mjs";
1
+ import { Ct as resolveApiCallsConfig, Dt as getCaseRowCaseKey, Et as buildEvalKey, It as getEvalRegistry, St as runSummarySchema, c as resolveArtifactPath, f as resolveEvalDefaultConfig, ft as getEvalTitle, h as normalizeScoreDef, kt as caseRowSchema, m as buildDeclaredColumnDefs, mt as deriveScopedSummaryFromCases, o as stripTerminalControlCodes, p as loadConfig, pt as getEvalDisplayStatus, s as resolveTracePresentation, ut as applyDerivedCallAttributes, v as createFsCacheStore, vt as matchesTagsFilter, wt as resolveLlmCallsConfig, y as getCacheRetentionOptions } from "./runExecution-CLkC-4Z1.mjs";
2
+ import { C as validateCharts, S as parseEvalDiscovery, _ as runTouchesEval, a as validateTagsFilters, b as deriveEvalFreshness, c as getLatestRunInfos, d as nextShortIdFromSnapshots, f as persistCaseDetail, g as recomputePersistedCaseStatus, h as recomputeEvalStatusesInRuns, i as resolveEvalTags, l as loadPersistedRunSnapshot, m as persistRunState, n as getTargetEvalKeys, o as generateRunId, p as deleteTemporaryRuns, s as getLastRunStatuses, u as loadPersistedRunSnapshots, v as buildManualInputDescriptor, x as loadIsolatedEvalRegistry, y as parseManualInputValues } from "./runOrchestration-BS-WxTee.mjs";
3
3
  import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
4
4
  import { basename, dirname, extname, isAbsolute, join, relative, resolve, sep } from "node:path";
5
5
  import { createHash, randomUUID } from "node:crypto";
@@ -1483,11 +1483,12 @@ function createRunner({ watchForChanges = true } = {}) {
1483
1483
  await mkdir(localStateDir, { recursive: true });
1484
1484
  await mkdir(join(localStateDir, "runs"), { recursive: true });
1485
1485
  await cleanupStagedManualInputFiles(workspaceRoot);
1486
+ const cacheRetentionOptions = getCacheRetentionOptions(config.cache);
1486
1487
  cacheStore = createFsCacheStore({
1487
1488
  workspaceRoot,
1488
1489
  dir: config.cache?.dir,
1489
- maxEntriesPerNamespace: config.cache?.maxEntriesPerNamespace ?? config.cache?.maxEntriesPerEval,
1490
- maxEntriesByNamespace: config.cache?.maxEntriesByNamespace,
1490
+ maxEntriesPerNamespace: cacheRetentionOptions.maxEntriesPerNamespace,
1491
+ maxEntriesByNamespace: cacheRetentionOptions.maxEntriesByNamespace,
1491
1492
  lastAccessedAtUpdateIntervalMs: config.cache?.lastAccessedAtUpdateIntervalMs
1492
1493
  });
1493
1494
  await loadPersistedRuns();
@@ -2198,8 +2199,8 @@ async function commandApp(args) {
2198
2199
  const { serve } = await import("@hono/node-server");
2199
2200
  const bundledWebDist = resolve(currentDir, "apps/web/dist");
2200
2201
  if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
2201
- const appModule = await import("./app-ROCEce9X.mjs");
2202
- const runnerModule = await import("./runner-BlFQyvN2.mjs");
2202
+ const appModule = await import("./app-sGeXC4AT.mjs");
2203
+ const runnerModule = await import("./runner-Bz5ZPqmm.mjs");
2203
2204
  if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
2204
2205
  if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
2205
2206
  await runnerModule.initRunner();
package/dist/index.d.mts CHANGED
@@ -2241,9 +2241,9 @@ type EvalFreshnessStatus = z$1.infer<typeof evalFreshnessStatusSchema>;
2241
2241
  * `best` selects the highest finite value and `worst` selects the lowest.
2242
2242
  */
2243
2243
  declare const evalStatAggregateSchema: z$1.ZodEnum<{
2244
- sum: "sum";
2245
2244
  min: "min";
2246
2245
  max: "max";
2246
+ sum: "sum";
2247
2247
  avg: "avg";
2248
2248
  best: "best";
2249
2249
  worst: "worst";
@@ -2273,9 +2273,9 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2273
2273
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2274
2274
  kind: z$1.ZodLiteral<"duration">;
2275
2275
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2276
- sum: "sum";
2277
2276
  min: "min";
2278
2277
  max: "max";
2278
+ sum: "sum";
2279
2279
  avg: "avg";
2280
2280
  best: "best";
2281
2281
  worst: "worst";
@@ -2284,9 +2284,9 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2284
2284
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2285
2285
  kind: z$1.ZodLiteral<"cacheHits">;
2286
2286
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2287
- sum: "sum";
2288
2287
  min: "min";
2289
2288
  max: "max";
2289
+ sum: "sum";
2290
2290
  avg: "avg";
2291
2291
  best: "best";
2292
2292
  worst: "worst";
@@ -2297,9 +2297,9 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2297
2297
  key: z$1.ZodString;
2298
2298
  label: z$1.ZodOptional<z$1.ZodString>;
2299
2299
  aggregate: z$1.ZodEnum<{
2300
- sum: "sum";
2301
2300
  min: "min";
2302
2301
  max: "max";
2302
+ sum: "sum";
2303
2303
  avg: "avg";
2304
2304
  best: "best";
2305
2305
  worst: "worst";
@@ -2337,9 +2337,9 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
2337
2337
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2338
2338
  kind: z$1.ZodLiteral<"duration">;
2339
2339
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2340
- sum: "sum";
2341
2340
  min: "min";
2342
2341
  max: "max";
2342
+ sum: "sum";
2343
2343
  avg: "avg";
2344
2344
  best: "best";
2345
2345
  worst: "worst";
@@ -2348,9 +2348,9 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
2348
2348
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2349
2349
  kind: z$1.ZodLiteral<"cacheHits">;
2350
2350
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2351
- sum: "sum";
2352
2351
  min: "min";
2353
2352
  max: "max";
2353
+ sum: "sum";
2354
2354
  avg: "avg";
2355
2355
  best: "best";
2356
2356
  worst: "worst";
@@ -2361,9 +2361,9 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
2361
2361
  key: z$1.ZodString;
2362
2362
  label: z$1.ZodOptional<z$1.ZodString>;
2363
2363
  aggregate: z$1.ZodEnum<{
2364
- sum: "sum";
2365
2364
  min: "min";
2366
2365
  max: "max";
2366
+ sum: "sum";
2367
2367
  avg: "avg";
2368
2368
  best: "best";
2369
2369
  worst: "worst";
@@ -2464,9 +2464,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2464
2464
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2465
2465
  kind: z$1.ZodLiteral<"duration">;
2466
2466
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2467
- sum: "sum";
2468
2467
  min: "min";
2469
2468
  max: "max";
2469
+ sum: "sum";
2470
2470
  avg: "avg";
2471
2471
  best: "best";
2472
2472
  worst: "worst";
@@ -2475,9 +2475,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2475
2475
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2476
2476
  kind: z$1.ZodLiteral<"cacheHits">;
2477
2477
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2478
- sum: "sum";
2479
2478
  min: "min";
2480
2479
  max: "max";
2480
+ sum: "sum";
2481
2481
  avg: "avg";
2482
2482
  best: "best";
2483
2483
  worst: "worst";
@@ -2488,9 +2488,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2488
2488
  key: z$1.ZodString;
2489
2489
  label: z$1.ZodOptional<z$1.ZodString>;
2490
2490
  aggregate: z$1.ZodEnum<{
2491
- sum: "sum";
2492
2491
  min: "min";
2493
2492
  max: "max";
2493
+ sum: "sum";
2494
2494
  avg: "avg";
2495
2495
  best: "best";
2496
2496
  worst: "worst";
@@ -2515,9 +2515,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2515
2515
  accent: z$1.ZodOptional<z$1.ZodBoolean>;
2516
2516
  }, z$1.core.$strip>], "kind">>>;
2517
2517
  defaultStatAggregate: z$1.ZodOptional<z$1.ZodEnum<{
2518
- sum: "sum";
2519
2518
  min: "min";
2520
2519
  max: "max";
2520
+ sum: "sum";
2521
2521
  avg: "avg";
2522
2522
  best: "best";
2523
2523
  worst: "worst";
@@ -2534,15 +2534,15 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2534
2534
  metrics: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2535
2535
  source: z$1.ZodLiteral<"builtin">;
2536
2536
  metric: z$1.ZodEnum<{
2537
- passRate: "passRate";
2538
2537
  durationMs: "durationMs";
2538
+ passRate: "passRate";
2539
2539
  }>;
2540
2540
  label: z$1.ZodOptional<z$1.ZodString>;
2541
2541
  color: z$1.ZodOptional<z$1.ZodEnum<{
2542
2542
  error: "error";
2543
2543
  success: "success";
2544
- warning: "warning";
2545
2544
  accent: "accent";
2545
+ warning: "warning";
2546
2546
  accentDim: "accentDim";
2547
2547
  textMuted: "textMuted";
2548
2548
  }>>;
@@ -2554,9 +2554,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2554
2554
  source: z$1.ZodLiteral<"column">;
2555
2555
  key: z$1.ZodString;
2556
2556
  aggregate: z$1.ZodEnum<{
2557
- sum: "sum";
2558
2557
  min: "min";
2559
2558
  max: "max";
2559
+ sum: "sum";
2560
2560
  avg: "avg";
2561
2561
  latest: "latest";
2562
2562
  passThresholdRate: "passThresholdRate";
@@ -2565,8 +2565,8 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2565
2565
  color: z$1.ZodOptional<z$1.ZodEnum<{
2566
2566
  error: "error";
2567
2567
  success: "success";
2568
- warning: "warning";
2569
2568
  accent: "accent";
2569
+ warning: "warning";
2570
2570
  accentDim: "accentDim";
2571
2571
  textMuted: "textMuted";
2572
2572
  }>>;
@@ -2588,17 +2588,17 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2588
2588
  tooltipExtras: z$1.ZodOptional<z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2589
2589
  source: z$1.ZodLiteral<"builtin">;
2590
2590
  metric: z$1.ZodEnum<{
2591
- passRate: "passRate";
2592
2591
  durationMs: "durationMs";
2592
+ passRate: "passRate";
2593
2593
  }>;
2594
2594
  label: z$1.ZodOptional<z$1.ZodString>;
2595
2595
  }, z$1.core.$strip>, z$1.ZodObject<{
2596
2596
  source: z$1.ZodLiteral<"column">;
2597
2597
  key: z$1.ZodString;
2598
2598
  aggregate: z$1.ZodEnum<{
2599
- sum: "sum";
2600
2599
  min: "min";
2601
2600
  max: "max";
2601
+ sum: "sum";
2602
2602
  avg: "avg";
2603
2603
  latest: "latest";
2604
2604
  passThresholdRate: "passThresholdRate";
@@ -2779,9 +2779,9 @@ declare const runLogLevelSchema: z$1.ZodEnum<{
2779
2779
  type RunLogLevel = z$1.infer<typeof runLogLevelSchema>;
2780
2780
  /** Eval runner phase that emitted a captured case log. */
2781
2781
  declare const runLogPhaseSchema: z$1.ZodEnum<{
2782
+ tracingAssertions: "tracingAssertions";
2782
2783
  eval: "eval";
2783
2784
  derive: "derive";
2784
- tracingAssertions: "tracingAssertions";
2785
2785
  outputsSchema: "outputsSchema";
2786
2786
  scorer: "scorer";
2787
2787
  }>;
@@ -2806,9 +2806,9 @@ declare const runLogEntrySchema: z$1.ZodObject<{
2806
2806
  warn: "warn";
2807
2807
  }>;
2808
2808
  phase: z$1.ZodEnum<{
2809
+ tracingAssertions: "tracingAssertions";
2809
2810
  eval: "eval";
2810
2811
  derive: "derive";
2811
- tracingAssertions: "tracingAssertions";
2812
2812
  outputsSchema: "outputsSchema";
2813
2813
  scorer: "scorer";
2814
2814
  }>;
@@ -3165,9 +3165,9 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
3165
3165
  warn: "warn";
3166
3166
  }>;
3167
3167
  phase: z$1.ZodEnum<{
3168
+ tracingAssertions: "tracingAssertions";
3168
3169
  eval: "eval";
3169
3170
  derive: "derive";
3170
- tracingAssertions: "tracingAssertions";
3171
3171
  outputsSchema: "outputsSchema";
3172
3172
  scorer: "scorer";
3173
3173
  }>;
@@ -3240,8 +3240,8 @@ type EvalChartType = z$1.infer<typeof evalChartTypeSchema>;
3240
3240
  * than from a per-case column.
3241
3241
  */
3242
3242
  declare const evalChartBuiltinMetricSchema: z$1.ZodEnum<{
3243
- passRate: "passRate";
3244
3243
  durationMs: "durationMs";
3244
+ passRate: "passRate";
3245
3245
  }>;
3246
3246
  /**
3247
3247
  * Run-level metric sourced from the aggregated `RunSummary` for a run, rather
@@ -3250,9 +3250,9 @@ declare const evalChartBuiltinMetricSchema: z$1.ZodEnum<{
3250
3250
  type EvalChartBuiltinMetric = z$1.infer<typeof evalChartBuiltinMetricSchema>;
3251
3251
  /** Reducer applied to a numeric column across all cases of a single run. */
3252
3252
  declare const evalChartAggregateSchema: z$1.ZodEnum<{
3253
- sum: "sum";
3254
3253
  min: "min";
3255
3254
  max: "max";
3255
+ sum: "sum";
3256
3256
  avg: "avg";
3257
3257
  latest: "latest";
3258
3258
  passThresholdRate: "passThresholdRate";
@@ -3266,8 +3266,8 @@ type EvalChartAggregate = z$1.infer<typeof evalChartAggregateSchema>;
3266
3266
  declare const evalChartColorSchema: z$1.ZodEnum<{
3267
3267
  error: "error";
3268
3268
  success: "success";
3269
- warning: "warning";
3270
3269
  accent: "accent";
3270
+ warning: "warning";
3271
3271
  accentDim: "accentDim";
3272
3272
  textMuted: "textMuted";
3273
3273
  }>;
@@ -3288,15 +3288,15 @@ type EvalChartAxis = z$1.infer<typeof evalChartAxisSchema>;
3288
3288
  declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
3289
3289
  source: z$1.ZodLiteral<"builtin">;
3290
3290
  metric: z$1.ZodEnum<{
3291
- passRate: "passRate";
3292
3291
  durationMs: "durationMs";
3292
+ passRate: "passRate";
3293
3293
  }>;
3294
3294
  label: z$1.ZodOptional<z$1.ZodString>;
3295
3295
  color: z$1.ZodOptional<z$1.ZodEnum<{
3296
3296
  error: "error";
3297
3297
  success: "success";
3298
- warning: "warning";
3299
3298
  accent: "accent";
3299
+ warning: "warning";
3300
3300
  accentDim: "accentDim";
3301
3301
  textMuted: "textMuted";
3302
3302
  }>>;
@@ -3308,9 +3308,9 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
3308
3308
  source: z$1.ZodLiteral<"column">;
3309
3309
  key: z$1.ZodString;
3310
3310
  aggregate: z$1.ZodEnum<{
3311
- sum: "sum";
3312
3311
  min: "min";
3313
3312
  max: "max";
3313
+ sum: "sum";
3314
3314
  avg: "avg";
3315
3315
  latest: "latest";
3316
3316
  passThresholdRate: "passThresholdRate";
@@ -3319,8 +3319,8 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
3319
3319
  color: z$1.ZodOptional<z$1.ZodEnum<{
3320
3320
  error: "error";
3321
3321
  success: "success";
3322
- warning: "warning";
3323
3322
  accent: "accent";
3323
+ warning: "warning";
3324
3324
  accentDim: "accentDim";
3325
3325
  textMuted: "textMuted";
3326
3326
  }>>;
@@ -3335,17 +3335,17 @@ type EvalChartMetric = z$1.infer<typeof evalChartMetricSchema>;
3335
3335
  declare const evalChartTooltipExtraSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
3336
3336
  source: z$1.ZodLiteral<"builtin">;
3337
3337
  metric: z$1.ZodEnum<{
3338
- passRate: "passRate";
3339
3338
  durationMs: "durationMs";
3339
+ passRate: "passRate";
3340
3340
  }>;
3341
3341
  label: z$1.ZodOptional<z$1.ZodString>;
3342
3342
  }, z$1.core.$strip>, z$1.ZodObject<{
3343
3343
  source: z$1.ZodLiteral<"column">;
3344
3344
  key: z$1.ZodString;
3345
3345
  aggregate: z$1.ZodEnum<{
3346
- sum: "sum";
3347
3346
  min: "min";
3348
3347
  max: "max";
3348
+ sum: "sum";
3349
3349
  avg: "avg";
3350
3350
  latest: "latest";
3351
3351
  passThresholdRate: "passThresholdRate";
@@ -3371,15 +3371,15 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
3371
3371
  metrics: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
3372
3372
  source: z$1.ZodLiteral<"builtin">;
3373
3373
  metric: z$1.ZodEnum<{
3374
- passRate: "passRate";
3375
3374
  durationMs: "durationMs";
3375
+ passRate: "passRate";
3376
3376
  }>;
3377
3377
  label: z$1.ZodOptional<z$1.ZodString>;
3378
3378
  color: z$1.ZodOptional<z$1.ZodEnum<{
3379
3379
  error: "error";
3380
3380
  success: "success";
3381
- warning: "warning";
3382
3381
  accent: "accent";
3382
+ warning: "warning";
3383
3383
  accentDim: "accentDim";
3384
3384
  textMuted: "textMuted";
3385
3385
  }>>;
@@ -3391,9 +3391,9 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
3391
3391
  source: z$1.ZodLiteral<"column">;
3392
3392
  key: z$1.ZodString;
3393
3393
  aggregate: z$1.ZodEnum<{
3394
- sum: "sum";
3395
3394
  min: "min";
3396
3395
  max: "max";
3396
+ sum: "sum";
3397
3397
  avg: "avg";
3398
3398
  latest: "latest";
3399
3399
  passThresholdRate: "passThresholdRate";
@@ -3402,8 +3402,8 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
3402
3402
  color: z$1.ZodOptional<z$1.ZodEnum<{
3403
3403
  error: "error";
3404
3404
  success: "success";
3405
- warning: "warning";
3406
3405
  accent: "accent";
3406
+ warning: "warning";
3407
3407
  accentDim: "accentDim";
3408
3408
  textMuted: "textMuted";
3409
3409
  }>>;
@@ -3425,17 +3425,17 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
3425
3425
  tooltipExtras: z$1.ZodOptional<z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
3426
3426
  source: z$1.ZodLiteral<"builtin">;
3427
3427
  metric: z$1.ZodEnum<{
3428
- passRate: "passRate";
3429
3428
  durationMs: "durationMs";
3429
+ passRate: "passRate";
3430
3430
  }>;
3431
3431
  label: z$1.ZodOptional<z$1.ZodString>;
3432
3432
  }, z$1.core.$strip>, z$1.ZodObject<{
3433
3433
  source: z$1.ZodLiteral<"column">;
3434
3434
  key: z$1.ZodString;
3435
3435
  aggregate: z$1.ZodEnum<{
3436
- sum: "sum";
3437
3436
  min: "min";
3438
3437
  max: "max";
3438
+ sum: "sum";
3439
3439
  avg: "avg";
3440
3440
  latest: "latest";
3441
3441
  passThresholdRate: "passThresholdRate";
@@ -3461,15 +3461,15 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
3461
3461
  metrics: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
3462
3462
  source: z$1.ZodLiteral<"builtin">;
3463
3463
  metric: z$1.ZodEnum<{
3464
- passRate: "passRate";
3465
3464
  durationMs: "durationMs";
3465
+ passRate: "passRate";
3466
3466
  }>;
3467
3467
  label: z$1.ZodOptional<z$1.ZodString>;
3468
3468
  color: z$1.ZodOptional<z$1.ZodEnum<{
3469
3469
  error: "error";
3470
3470
  success: "success";
3471
- warning: "warning";
3472
3471
  accent: "accent";
3472
+ warning: "warning";
3473
3473
  accentDim: "accentDim";
3474
3474
  textMuted: "textMuted";
3475
3475
  }>>;
@@ -3481,9 +3481,9 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
3481
3481
  source: z$1.ZodLiteral<"column">;
3482
3482
  key: z$1.ZodString;
3483
3483
  aggregate: z$1.ZodEnum<{
3484
- sum: "sum";
3485
3484
  min: "min";
3486
3485
  max: "max";
3486
+ sum: "sum";
3487
3487
  avg: "avg";
3488
3488
  latest: "latest";
3489
3489
  passThresholdRate: "passThresholdRate";
@@ -3492,8 +3492,8 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
3492
3492
  color: z$1.ZodOptional<z$1.ZodEnum<{
3493
3493
  error: "error";
3494
3494
  success: "success";
3495
- warning: "warning";
3496
3495
  accent: "accent";
3496
+ warning: "warning";
3497
3497
  accentDim: "accentDim";
3498
3498
  textMuted: "textMuted";
3499
3499
  }>>;
@@ -3515,17 +3515,17 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
3515
3515
  tooltipExtras: z$1.ZodOptional<z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
3516
3516
  source: z$1.ZodLiteral<"builtin">;
3517
3517
  metric: z$1.ZodEnum<{
3518
- passRate: "passRate";
3519
3518
  durationMs: "durationMs";
3519
+ passRate: "passRate";
3520
3520
  }>;
3521
3521
  label: z$1.ZodOptional<z$1.ZodString>;
3522
3522
  }, z$1.core.$strip>, z$1.ZodObject<{
3523
3523
  source: z$1.ZodLiteral<"column">;
3524
3524
  key: z$1.ZodString;
3525
3525
  aggregate: z$1.ZodEnum<{
3526
- sum: "sum";
3527
3526
  min: "min";
3528
3527
  max: "max";
3528
+ sum: "sum";
3529
3529
  avg: "avg";
3530
3530
  latest: "latest";
3531
3531
  passThresholdRate: "passThresholdRate";
@@ -3668,8 +3668,8 @@ type TrialSelectionMode = z$1.infer<typeof trialSelectionModeSchema>;
3668
3668
  /** Built-in eval-level output/column keys. */
3669
3669
  /** Removal config for built-in eval-level outputs and UI metadata. */
3670
3670
  declare const removeDefaultConfigSchema: z$1.ZodUnion<readonly [z$1.ZodLiteral<true>, z$1.ZodArray<z$1.ZodEnum<{
3671
- costUsd: "costUsd";
3672
3671
  apiCalls: "apiCalls";
3672
+ costUsd: "costUsd";
3673
3673
  llmTurns: "llmTurns";
3674
3674
  inputTokens: "inputTokens";
3675
3675
  outputTokens: "outputTokens";
@@ -4290,15 +4290,26 @@ type AgentEvalsConfig$1 = {
4290
4290
  /** Disable the cache entirely; spans with `cache` options execute as if uncached. */enabled?: boolean; /** Override the directory used to persist cache entries. */
4291
4291
  dir?: string;
4292
4292
  /**
4293
- * Default maximum entries retained for each cache namespace. Defaults to
4294
- * `100`; non-positive or non-finite values fall back to the default.
4293
+ * Maximum entries retained per cache namespace.
4294
+ *
4295
+ * Pass a number to set the default cap for every namespace. Pass an object
4296
+ * to set a default cap plus exact namespace-specific caps. Non-positive or
4297
+ * non-finite values fall back to the default.
4298
+ *
4299
+ * @example
4300
+ * ```ts
4301
+ * cache: {
4302
+ * maxEntries: {
4303
+ * default: 50,
4304
+ * namespaces: { 'receipt-audit.receipt-audit-context': 200 },
4305
+ * },
4306
+ * }
4307
+ * ```
4295
4308
  */
4296
- maxEntriesPerNamespace?: number;
4297
- /**
4298
- * Exact namespace-specific retention caps. Values override
4299
- * `maxEntriesPerNamespace` for matching namespaces.
4300
- */
4301
- maxEntriesByNamespace?: Record<string, number>;
4309
+ maxEntries?: number | {
4310
+ default?: number;
4311
+ namespaces?: Record<string, number>;
4312
+ };
4302
4313
  /**
4303
4314
  * Milliseconds the runner waits after becoming idle before pruning indexed
4304
4315
  * cache entries. Defaults to `5000`; non-positive or non-finite values use
@@ -4309,8 +4320,7 @@ type AgentEvalsConfig$1 = {
4309
4320
  * Minimum milliseconds between `lastAccessedAt` index rewrites for repeated
4310
4321
  * cache hits. Defaults to four hours. Set to `0` to record every hit.
4311
4322
  */
4312
- lastAccessedAtUpdateIntervalMs?: number; /** Legacy alias for `maxEntriesPerNamespace`, retained so older config files keep working. */
4313
- maxEntriesPerEval?: number;
4323
+ lastAccessedAtUpdateIntervalMs?: number;
4314
4324
  };
4315
4325
  };
4316
4326
  /** Zod schema for validating `agent-evals.config.ts` input. */
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as setScopeCacheContext, A as repoFile, B as evalTime, C as evalTracer, D as deserializeCacheValue, E as deserializeCacheRecording, F as EvalRuntimeUsageError, Ft as getEvalRegistry, H as getEvalCaseInput, I as appendToEvalOutput, J as runInEvalRuntimeScope, K as mergeEvalOutput, M as readManualInputFile, N as evalExpect, O as serializeCacheRecording, P as EvalAssertionError, Q as setEvalOutput, R as evalAssert, S as evalSpan, T as hashCacheKeySync, U as incrementEvalOutput, V as getCurrentScope, W as isInEvalScope, X as runInExistingEvalScope, Y as runInEvalScope, at as extractApiCalls, b as buildTraceTree, ct as simulateTokenAllocation, et as startEvalBackgroundJob, it as extractCacheHits, j as manualInputFileValueSchema, k as serializeCacheValue, ot as extractLlmCalls, q as nextEvalId, rt as extractCacheEntries, st as simulateLlmCallCost, ut as getNestedAttribute, w as hashCacheKey, x as captureEvalSpanError, y as z, z as evalLog } from "./runExecution-CFw0MQFs.mjs";
2
- import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-SP4kEtYL.mjs";
3
- import { n as matchesEvalTags, t as defineEval } from "./src-7GbQj1sb.mjs";
1
+ import { $ as setEvalOutput, A as serializeCacheValue, B as evalLog, C as evalSpan, D as deserializeCacheRecording, E as hashCacheKeySync, F as EvalAssertionError, G as isInEvalScope, H as getCurrentScope, I as EvalRuntimeUsageError, It as getEvalRegistry, J as nextEvalId, L as appendToEvalOutput, M as manualInputFileValueSchema, N as readManualInputFile, O as deserializeCacheValue, P as evalExpect, S as captureEvalSpanError, T as hashCacheKey, U as getEvalCaseInput, V as evalTime, W as incrementEvalOutput, X as runInEvalScope, Y as runInEvalRuntimeScope, Z as runInExistingEvalScope, at as extractCacheHits, b as z, ct as simulateLlmCallCost, dt as getNestedAttribute, et as setScopeCacheContext, it as extractCacheEntries, j as repoFile, k as serializeCacheRecording, lt as simulateTokenAllocation, ot as extractApiCalls, q as mergeEvalOutput, st as extractLlmCalls, tt as startEvalBackgroundJob, w as evalTracer, x as buildTraceTree, z as evalAssert } from "./runExecution-CLkC-4Z1.mjs";
2
+ import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-Bf5RzM8O.mjs";
3
+ import { n as matchesEvalTags, t as defineEval } from "./src-BjMMDm_O.mjs";
4
4
  export { EvalAssertionError, EvalRuntimeUsageError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
package/dist/runChild.mjs CHANGED
@@ -1,5 +1,5 @@
1
- import { At as evalStatsConfigSchema, L as configureEvalRunLogs, Mt as evalChartsConfigSchema, Nt as columnDefSchema, Tt as buildEvalKey, bt as runManifestSchema, jt as manualInputDescriptorSchema, kt as evalStatAggregateSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, tt as createRunRequestSchema, v as createFsCacheStore, xt as runSummarySchema } from "./runExecution-CFw0MQFs.mjs";
2
- import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-CxjiQmof.mjs";
1
+ import { At as evalStatAggregateSchema, Et as buildEvalKey, Mt as manualInputDescriptorSchema, Nt as evalChartsConfigSchema, Pt as columnDefSchema, R as configureEvalRunLogs, St as runSummarySchema, jt as evalStatsConfigSchema, l as registerAgentEvalsPackageResolutionHooks, nt as createRunRequestSchema, p as loadConfig, v as createFsCacheStore, xt as runManifestSchema, y as getCacheRetentionOptions } from "./runExecution-CLkC-4Z1.mjs";
2
+ import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-BS-WxTee.mjs";
3
3
  import { z } from "zod/v4";
4
4
  import { readFile } from "node:fs/promises";
5
5
  import { relative } from "node:path";
@@ -137,11 +137,12 @@ async function main() {
137
137
  registerAgentEvalsPackageResolutionHooks();
138
138
  const config = await loadConfig();
139
139
  configureEvalRunLogs({ captureConsole: config.runLogs?.captureConsole !== false });
140
+ const cacheRetentionOptions = getCacheRetentionOptions(config.cache);
140
141
  const cacheStore = createFsCacheStore({
141
142
  workspaceRoot: context.workspaceRoot,
142
143
  dir: config.cache?.dir,
143
- maxEntriesPerNamespace: config.cache?.maxEntriesPerNamespace ?? config.cache?.maxEntriesPerEval,
144
- maxEntriesByNamespace: config.cache?.maxEntriesByNamespace,
144
+ maxEntriesPerNamespace: cacheRetentionOptions.maxEntriesPerNamespace,
145
+ maxEntriesByNamespace: cacheRetentionOptions.maxEntriesByNamespace,
145
146
  lastAccessedAtUpdateIntervalMs: config.cache?.lastAccessedAtUpdateIntervalMs
146
147
  });
147
148
  const evalMetas = await discoverRunEvals({
@@ -1401,6 +1401,10 @@ function resolveApiCallsConfig(input) {
1401
1401
  metrics: (input?.metrics ?? []).map(resolveApiCallMetric)
1402
1402
  };
1403
1403
  }
1404
+ const cacheMaxEntriesSchema = z.union([z.number(), z.object({
1405
+ default: z.number().optional(),
1406
+ namespaces: z.record(z.string(), z.number()).optional()
1407
+ })]).optional();
1404
1408
  /** Zod schema for validating `agent-evals.config.ts` input. */
1405
1409
  const agentEvalsConfigSchema = z.object({
1406
1410
  workspaceRoot: z.string().optional(),
@@ -1424,11 +1428,26 @@ const agentEvalsConfigSchema = z.object({
1424
1428
  cache: z.object({
1425
1429
  enabled: z.boolean().optional(),
1426
1430
  dir: z.string().optional(),
1427
- maxEntriesPerNamespace: z.preprocess((value) => typeof value === "number" && Number.isFinite(value) ? value : void 0, z.number().optional()),
1431
+ maxEntries: cacheMaxEntriesSchema,
1432
+ maxEntriesPerNamespace: z.number().optional(),
1428
1433
  maxEntriesByNamespace: z.record(z.string(), z.number()).optional(),
1429
1434
  pruneIdleDelayMs: z.preprocess((value) => typeof value === "number" && Number.isFinite(value) ? value : void 0, z.number().optional()),
1430
1435
  lastAccessedAtUpdateIntervalMs: z.preprocess((value) => typeof value === "number" && Number.isFinite(value) ? value : void 0, z.number().optional()),
1431
- maxEntriesPerEval: z.preprocess((value) => typeof value === "number" && Number.isFinite(value) ? value : void 0, z.number().optional())
1436
+ maxEntriesPerEval: z.number().optional()
1437
+ }).transform(({ maxEntries, maxEntriesByNamespace, maxEntriesPerEval, maxEntriesPerNamespace, ...cache }) => {
1438
+ const defaultMaxEntries = maxEntriesPerNamespace ?? maxEntriesPerEval;
1439
+ if (maxEntries !== void 0) return {
1440
+ ...cache,
1441
+ maxEntries
1442
+ };
1443
+ if (defaultMaxEntries !== void 0 || maxEntriesByNamespace !== void 0) return {
1444
+ ...cache,
1445
+ maxEntries: {
1446
+ default: defaultMaxEntries,
1447
+ namespaces: maxEntriesByNamespace
1448
+ }
1449
+ };
1450
+ return cache;
1432
1451
  }).optional()
1433
1452
  });
1434
1453
  //#endregion
@@ -5078,6 +5097,19 @@ function buildTraceTree(spans, checkpoints) {
5078
5097
  };
5079
5098
  }
5080
5099
  //#endregion
5100
+ //#region ../runner/src/cacheConfig.ts
5101
+ function getCacheRetentionOptions(cacheConfig) {
5102
+ const maxEntries = cacheConfig?.maxEntries;
5103
+ if (typeof maxEntries === "number") return {
5104
+ maxEntriesPerNamespace: maxEntries,
5105
+ maxEntriesByNamespace: void 0
5106
+ };
5107
+ return {
5108
+ maxEntriesPerNamespace: maxEntries?.default,
5109
+ maxEntriesByNamespace: maxEntries?.namespaces
5110
+ };
5111
+ }
5112
+ //#endregion
5081
5113
  //#region ../runner/src/cacheAccessTime.ts
5082
5114
  const defaultLastAccessedAtUpdateIntervalMs = 14400 * 1e3;
5083
5115
  function normalizeLastAccessedAtUpdateIntervalMs(value) {
@@ -7026,4 +7058,4 @@ function recordAssertionFailure(scope, failure) {
7026
7058
  });
7027
7059
  }
7028
7060
  //#endregion
7029
- export { setScopeCacheContext as $, repoFile as A, evalStatsConfigSchema as At, evalTime as B, evalTracer as C, resolveLlmCallsConfig as Ct, deserializeCacheValue as D, caseDetailSchema as Dt, deserializeCacheRecording as E, getCaseRowCaseKey as Et, EvalRuntimeUsageError as F, getEvalRegistry as Ft, matchesEvalTags as G, getEvalCaseInput as H, appendToEvalOutput as I, runWithEvalRegistry as It, runInEvalRuntimeScope as J, mergeEvalOutput as K, configureEvalRunLogs as L, readManualInputFile as M, evalChartsConfigSchema as Mt, evalExpect as N, columnDefSchema as Nt, serializeCacheRecording as O, caseRowSchema as Ot, EvalAssertionError as P, defineEval as Pt, setEvalOutput as Q, evalAssert as R, evalSpan as S, resolveApiCallsConfig as St, hashCacheKeySync as T, buildEvalKey as Tt, incrementEvalOutput as U, getCurrentScope as V, isInEvalScope as W, runInExistingEvalScope as X, runInEvalScope as Y, runWithEvalClock as Z, createBufferedCacheStore as _, matchesTagsFilter as _t, isCaseChildParentMessage as a, extractApiCalls as at, buildTraceTree as b, runManifestSchema as bt, resolveArtifactPath as c, simulateTokenAllocation as ct, loadEvalModule as d, getEvalTitle as dt, startEvalBackgroundJob as et, resolveEvalDefaultConfig as f, getEvalDisplayStatus as ft, commitPendingCacheWrites as g, dedupeEvalTags as gt, normalizeScoreDef as h, deriveStatusFromChildStatuses as ht, isCaseChildMessage as i, extractCacheHits as it, manualInputFileValueSchema as j, manualInputDescriptorSchema as jt, serializeCacheValue as k, evalStatAggregateSchema as kt, registerAgentEvalsPackageResolutionHooks as l, applyDerivedCallAttributes as lt, buildDeclaredColumnDefs as m, deriveStatusFromCaseRows as mt, resolveRunnableEvalCases as n, updateManualScoreRequestSchema as nt, stripTerminalControlCodes as o, extractLlmCalls as ot, loadConfig as p, deriveScopedSummaryFromCases as pt, nextEvalId as q, runCase as r, extractCacheEntries as rt, resolveTracePresentation as s, simulateLlmCallCost as st, filterEvalCases as t, createRunRequestSchema as tt, runWithModuleIsolation as u, getNestedAttribute as ut, createFsCacheStore as v, validateEvalTagName as vt, hashCacheKey as w, buildCaseKey as wt, captureEvalSpanError as x, runSummarySchema as xt, z$1 as y, validateTagsFilterExpression as yt, evalLog as z };
7061
+ export { setEvalOutput as $, serializeCacheValue as A, evalStatAggregateSchema as At, evalLog as B, evalSpan as C, resolveApiCallsConfig as Ct, deserializeCacheRecording as D, getCaseRowCaseKey as Dt, hashCacheKeySync as E, buildEvalKey as Et, EvalAssertionError as F, defineEval as Ft, isInEvalScope as G, getCurrentScope as H, EvalRuntimeUsageError as I, getEvalRegistry as It, nextEvalId as J, matchesEvalTags as K, appendToEvalOutput as L, runWithEvalRegistry as Lt, manualInputFileValueSchema as M, manualInputDescriptorSchema as Mt, readManualInputFile as N, evalChartsConfigSchema as Nt, deserializeCacheValue as O, caseDetailSchema as Ot, evalExpect as P, columnDefSchema as Pt, runWithEvalClock as Q, configureEvalRunLogs as R, captureEvalSpanError as S, runSummarySchema as St, hashCacheKey as T, buildCaseKey as Tt, getEvalCaseInput as U, evalTime as V, incrementEvalOutput as W, runInEvalScope as X, runInEvalRuntimeScope as Y, runInExistingEvalScope as Z, createBufferedCacheStore as _, dedupeEvalTags as _t, isCaseChildParentMessage as a, extractCacheHits as at, z$1 as b, validateTagsFilterExpression as bt, resolveArtifactPath as c, simulateLlmCallCost as ct, loadEvalModule as d, getNestedAttribute as dt, setScopeCacheContext as et, resolveEvalDefaultConfig as f, getEvalTitle as ft, commitPendingCacheWrites as g, deriveStatusFromChildStatuses as gt, normalizeScoreDef as h, deriveStatusFromCaseRows as ht, isCaseChildMessage as i, extractCacheEntries as it, repoFile as j, evalStatsConfigSchema as jt, serializeCacheRecording as k, caseRowSchema as kt, registerAgentEvalsPackageResolutionHooks as l, simulateTokenAllocation as lt, buildDeclaredColumnDefs as m, deriveScopedSummaryFromCases as mt, resolveRunnableEvalCases as n, createRunRequestSchema as nt, stripTerminalControlCodes as o, extractApiCalls as ot, loadConfig as p, getEvalDisplayStatus as pt, mergeEvalOutput as q, runCase as r, updateManualScoreRequestSchema as rt, resolveTracePresentation as s, extractLlmCalls as st, filterEvalCases as t, startEvalBackgroundJob as tt, runWithModuleIsolation as u, applyDerivedCallAttributes as ut, createFsCacheStore as v, matchesTagsFilter as vt, evalTracer as w, resolveLlmCallsConfig as wt, buildTraceTree as x, runManifestSchema as xt, getCacheRetentionOptions as y, validateEvalTagName as yt, evalAssert as z };