@ls-stack/agent-eval 0.55.1 → 0.55.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,7 +25,7 @@
25
25
  href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
26
26
  rel="stylesheet"
27
27
  />
28
- <script type="module" crossorigin src="/assets/index-CvsPmlHl.js"></script>
28
+ <script type="module" crossorigin src="/assets/index-C7QjETk8.js"></script>
29
29
  <link rel="stylesheet" crossorigin href="/assets/index-2I-eWzVL.css">
30
30
  </head>
31
31
  <body>
package/dist/bin.mjs CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { t as runCli } from "./cli-BR3wMZMx.mjs";
2
+ import { t as runCli } from "./cli-Bu9347r1.mjs";
3
3
  import { spawn } from "node:child_process";
4
4
  //#region src/bin.ts
5
5
  const moduleMocksFlag = "--experimental-test-module-mocks";
@@ -1,4 +1,4 @@
1
- import { I as configureEvalRunLogs, Pt as runWithEvalRegistry, St as resolveLlmCallsConfig, _ as createBufferedCacheStore, a as isCaseChildParentMessage, d as loadEvalModule, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, q as runInEvalRuntimeScope, r as runCase, v as createFsCacheStore, xt as resolveApiCallsConfig } from "./runExecution-Sw38bCaq.mjs";
1
+ import { I as configureEvalRunLogs, Pt as runWithEvalRegistry, St as resolveLlmCallsConfig, _ as createBufferedCacheStore, a as isCaseChildParentMessage, d as loadEvalModule, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, q as runInEvalRuntimeScope, r as runCase, v as createFsCacheStore, xt as resolveApiCallsConfig } from "./runExecution-C31dpemR.mjs";
2
2
  //#region ../runner/src/caseChild.ts
3
3
  let fatalErrorReported = false;
4
4
  let disconnectExpected = false;
@@ -1,5 +1,5 @@
1
- import { Ct as buildEvalKey, Nt as getEvalRegistry, St as resolveLlmCallsConfig, bt as runSummarySchema, c as resolveArtifactPath, ct as applyDerivedCallAttributes, dt as getEvalDisplayStatus, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, h as normalizeScoreDef, m as buildDeclaredColumnDefs, o as stripTerminalControlCodes, p as loadConfig, s as resolveTracePresentation, ut as getEvalTitle, v as createFsCacheStore, wt as getCaseRowCaseKey, xt as resolveApiCallsConfig } from "./runExecution-Sw38bCaq.mjs";
2
- import { C as validateCharts, S as parseEvalDiscovery, _ as runTouchesEval, a as validateTagsFilters, b as deriveEvalFreshness, c as getLatestRunInfos, d as nextShortIdFromSnapshots, f as persistCaseDetail, g as recomputePersistedCaseStatus, h as recomputeEvalStatusesInRuns, i as resolveEvalTags, l as loadPersistedRunSnapshot, m as persistRunState, n as getTargetEvalKeys, o as generateRunId, p as deleteTemporaryRuns, s as getLastRunStatuses, u as loadPersistedRunSnapshots, v as buildManualInputDescriptor, x as loadIsolatedEvalRegistry, y as parseManualInputValues } from "./runOrchestration-DJsdLYeZ.mjs";
1
+ import { Ct as buildEvalKey, Nt as getEvalRegistry, St as resolveLlmCallsConfig, bt as runSummarySchema, c as resolveArtifactPath, ct as applyDerivedCallAttributes, dt as getEvalDisplayStatus, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, h as normalizeScoreDef, m as buildDeclaredColumnDefs, o as stripTerminalControlCodes, p as loadConfig, s as resolveTracePresentation, ut as getEvalTitle, v as createFsCacheStore, wt as getCaseRowCaseKey, xt as resolveApiCallsConfig } from "./runExecution-C31dpemR.mjs";
2
+ import { C as validateCharts, S as parseEvalDiscovery, _ as runTouchesEval, a as validateTagsFilters, b as deriveEvalFreshness, c as getLatestRunInfos, d as nextShortIdFromSnapshots, f as persistCaseDetail, g as recomputePersistedCaseStatus, h as recomputeEvalStatusesInRuns, i as resolveEvalTags, l as loadPersistedRunSnapshot, m as persistRunState, n as getTargetEvalKeys, o as generateRunId, p as deleteTemporaryRuns, s as getLastRunStatuses, u as loadPersistedRunSnapshots, v as buildManualInputDescriptor, x as loadIsolatedEvalRegistry, y as parseManualInputValues } from "./runOrchestration-3RoHLW4U.mjs";
3
3
  import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
4
4
  import { basename, dirname, extname, isAbsolute, join, relative, resolve, sep } from "node:path";
5
5
  import { createHash, randomUUID } from "node:crypto";
@@ -794,6 +794,7 @@ async function markRunErrored(runState, message, managerContext) {
794
794
  payload: { message }
795
795
  });
796
796
  managerContext.emitDiscoveryEvent();
797
+ managerContext.onRunTerminal();
797
798
  }
798
799
  async function markRunTerminalFromChild(runState, event, managerContext) {
799
800
  const snapshot = await loadPersistedRunSnapshot(runState.runDir);
@@ -815,6 +816,7 @@ async function markRunTerminalFromChild(runState, event, managerContext) {
815
816
  }
816
817
  managerContext.emitEvent(runState, event);
817
818
  managerContext.emitDiscoveryEvent();
819
+ managerContext.onRunTerminal();
818
820
  }
819
821
  //#endregion
820
822
  //#region ../runner/src/runnerStateHydration.ts
@@ -883,6 +885,7 @@ function getWatchRootsForIncludePatterns(params) {
883
885
  }
884
886
  //#endregion
885
887
  //#region ../runner/src/runner.ts
888
+ const defaultCachePruneIdleDelayMs = 5e3;
886
889
  /** Create an in-memory eval runner bound to the current workspace config. */
887
890
  function createRunner({ watchForChanges = true } = {}) {
888
891
  let config;
@@ -903,6 +906,7 @@ function createRunner({ watchForChanges = true } = {}) {
903
906
  let runHistoryWatcher;
904
907
  let discoveryRefreshTimer;
905
908
  let runHistoryRefreshTimer;
909
+ let cachePruneIdleTimer;
906
910
  let registryLoadCounter = 0;
907
911
  const configReload = createConfigReloadController({
908
912
  getActiveRunCount,
@@ -942,6 +946,9 @@ function createRunner({ watchForChanges = true } = {}) {
942
946
  async clearCache(filter) {
943
947
  await cacheStore.clear(filter);
944
948
  },
949
+ async repairCache() {
950
+ return cacheStore.repair();
951
+ },
945
952
  async recomputeStatusesForEval(evalKey) {
946
953
  const evalMeta = resolveEvalMeta(evalKey);
947
954
  if (!evalMeta) return { updatedRuns: 0 };
@@ -1252,6 +1259,7 @@ function createRunner({ watchForChanges = true } = {}) {
1252
1259
  emitDiscoveryEvent();
1253
1260
  },
1254
1261
  async startRun(request) {
1262
+ cancelCacheRetentionPrune();
1255
1263
  const tagsFilterError = validateTagsFilters(request.target.tagsFilter);
1256
1264
  if (tagsFilterError !== null) throw new Error(tagsFilterError);
1257
1265
  const deletedTemporaryRuns = await deleteTemporaryRuns({
@@ -1349,7 +1357,8 @@ function createRunner({ watchForChanges = true } = {}) {
1349
1357
  workspaceRoot,
1350
1358
  evals,
1351
1359
  emitEvent,
1352
- emitDiscoveryEvent
1360
+ emitDiscoveryEvent,
1361
+ onRunTerminal: scheduleCacheRetentionPrune
1353
1362
  }
1354
1363
  });
1355
1364
  if (deletedTemporaryRuns > 0) emitDiscoveryEvent();
@@ -1398,6 +1407,7 @@ function createRunner({ watchForChanges = true } = {}) {
1398
1407
  payload: run.summary
1399
1408
  });
1400
1409
  emitDiscoveryEvent();
1410
+ scheduleCacheRetentionPrune();
1401
1411
  },
1402
1412
  getCaseDetail(runId, caseId) {
1403
1413
  const run = runs.get(runId);
@@ -1419,6 +1429,7 @@ function createRunner({ watchForChanges = true } = {}) {
1419
1429
  };
1420
1430
  },
1421
1431
  async close() {
1432
+ cancelCacheRetentionPrune();
1422
1433
  await Promise.all([closeWatchers(), configReload.close()]);
1423
1434
  },
1424
1435
  getWorkspaceRoot() {
@@ -1522,6 +1533,28 @@ function createRunner({ watchForChanges = true } = {}) {
1522
1533
  function getActiveRunCount() {
1523
1534
  return [...runs.values()].filter((run) => run.manifest.status === "running").length;
1524
1535
  }
1536
+ function getCachePruneIdleDelayMs() {
1537
+ const configured = config.cache?.pruneIdleDelayMs;
1538
+ if (configured === void 0 || !Number.isFinite(configured) || configured <= 0) return defaultCachePruneIdleDelayMs;
1539
+ return Math.floor(configured);
1540
+ }
1541
+ function cancelCacheRetentionPrune() {
1542
+ if (cachePruneIdleTimer === void 0) return;
1543
+ clearTimeout(cachePruneIdleTimer);
1544
+ cachePruneIdleTimer = void 0;
1545
+ }
1546
+ function scheduleCacheRetentionPrune() {
1547
+ cancelCacheRetentionPrune();
1548
+ cachePruneIdleTimer = setTimeout(() => {
1549
+ cachePruneIdleTimer = void 0;
1550
+ pruneCacheRetentionIfIdle();
1551
+ }, getCachePruneIdleDelayMs());
1552
+ cachePruneIdleTimer.unref();
1553
+ }
1554
+ async function pruneCacheRetentionIfIdle() {
1555
+ if (getActiveRunCount() > 0) return;
1556
+ await cacheStore.pruneRetention();
1557
+ }
1525
1558
  function emitDiscoveryEvent() {
1526
1559
  const lastRunStatuses = getLastRunStatuses({
1527
1560
  runs: runs.values(),
@@ -1657,19 +1690,20 @@ Flags:
1657
1690
  `);
1658
1691
  return;
1659
1692
  }
1660
- if (topic === "cache" || topic === "cache list" || topic === "cache clear") {
1693
+ if (topic === "cache" || topic === "cache list" || topic === "cache clear" || topic === "cache repair") {
1661
1694
  console.info(`
1662
- agent-evals cache - Manage cached operation entries
1695
+ agent-evals cache - Manage cached namespace/key entries
1663
1696
 
1664
1697
  Usage:
1665
1698
  agent-evals cache list [flags]
1666
1699
  agent-evals cache clear --eval <id>
1667
1700
  agent-evals cache clear --all
1701
+ agent-evals cache repair [flags]
1668
1702
 
1669
1703
  Flags:
1670
1704
  --eval <id> Clear entries for specific eval(s) (comma-separated)
1671
1705
  --all Confirm clearing every cached entry
1672
- --json Output cache listing as JSON
1706
+ --json Output cache listing or repair summary as JSON
1673
1707
  --no-env Disable automatic .env loading
1674
1708
  --help, -h Show this help
1675
1709
  `);
@@ -1683,9 +1717,10 @@ Commands:
1683
1717
  list List discovered evals
1684
1718
  run Run evals
1685
1719
  show-runs [id|latest] Show saved run artifact file paths
1686
- cache list List cached operation entries
1720
+ cache list List cached namespace/key entries
1687
1721
  cache clear --eval <id> Clear cache entries for one eval
1688
1722
  cache clear --all Clear every cached entry
1723
+ cache repair Remove unindexed/orphaned cache files
1689
1724
  help Show this help
1690
1725
 
1691
1726
  Options:
@@ -1961,7 +1996,7 @@ function parseArgs(argv) {
1961
1996
  let cursor = 1;
1962
1997
  if (args.command === "cache") {
1963
1998
  const sub = normalizedArgv[cursor];
1964
- if (sub === "list" || sub === "clear") {
1999
+ if (sub === "list" || sub === "clear" || sub === "repair") {
1965
2000
  args.subcommand = sub;
1966
2001
  args.helpTopic = `cache ${sub}`;
1967
2002
  cursor++;
@@ -2136,8 +2171,8 @@ async function commandApp(args) {
2136
2171
  const { serve } = await import("@hono/node-server");
2137
2172
  const bundledWebDist = resolve(currentDir, "apps/web/dist");
2138
2173
  if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
2139
- const appModule = await import("./app-BD0D9-7k.mjs");
2140
- const runnerModule = await import("./runner-dB69WsnM.mjs");
2174
+ const appModule = await import("./app-NI4to6lp.mjs");
2175
+ const runnerModule = await import("./runner-CTp9zHbM.mjs");
2141
2176
  if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
2142
2177
  if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
2143
2178
  await runnerModule.initRunner();
@@ -2311,10 +2346,8 @@ async function commandCache(args) {
2311
2346
  for (const entry of entries) {
2312
2347
  console.info(` ${entry.namespace}`);
2313
2348
  console.info(` key: ${entry.key}`);
2314
- const operationLabel = entry.operationType === "span" ? `${entry.operationName} (span ${entry.spanKind ?? "unknown"})` : `${entry.operationName} (value)`;
2315
- console.info(` operation: ${operationLabel}`);
2316
2349
  console.info(` stored: ${entry.storedAt}`);
2317
- console.info(` size: ${String(entry.sizeBytes)} bytes`);
2350
+ console.info(` last accessed: ${entry.lastAccessedAt}`);
2318
2351
  console.info("");
2319
2352
  }
2320
2353
  return;
@@ -2342,6 +2375,20 @@ async function commandCache(args) {
2342
2375
  console.info("Refusing to clear cache without --eval <id> or --all. Use one of these flags to confirm.");
2343
2376
  process.exit(1);
2344
2377
  }
2378
+ if (args.subcommand === "repair") {
2379
+ const summary = await runner.repairCache();
2380
+ if (args.json) {
2381
+ console.info(JSON.stringify(summary, null, 2));
2382
+ return;
2383
+ }
2384
+ console.info("Cache repair complete.");
2385
+ console.info(`Removed cache files: ${String(summary.removedCacheFiles)}`);
2386
+ console.info(`Removed debug files: ${String(summary.removedDebugFiles)}`);
2387
+ console.info(`Removed blob files: ${String(summary.removedBlobFiles)}`);
2388
+ console.info(`Removed index rows: ${String(summary.removedIndexRows)}`);
2389
+ console.info(`Rewritten indexes: ${String(summary.rewrittenIndexes)}`);
2390
+ return;
2391
+ }
2345
2392
  printHelp(args.helpTopic);
2346
2393
  }
2347
2394
  function getSortedRunSnapshots(runner) {
package/dist/index.d.mts CHANGED
@@ -1399,7 +1399,7 @@ type EvalLogLevelInput = RunLogLevel$1 | 'warning';
1399
1399
  declare class EvalAssertionError extends Error {
1400
1400
  constructor(message: string);
1401
1401
  }
1402
- /** Return the shifted wall-clock time for a stored eval clock state. */
1402
+ /** Return the host process clock, bypassing the eval Date shim. */
1403
1403
  /**
1404
1404
  * Eval time helpers for reading and moving the active eval clock.
1405
1405
  *
@@ -4042,7 +4042,13 @@ type AgentEvalsConfig$1 = {
4042
4042
  * Exact namespace-specific retention caps. Values override
4043
4043
  * `maxEntriesPerNamespace` for matching namespaces.
4044
4044
  */
4045
- maxEntriesByNamespace?: Record<string, number>; /** Legacy alias for `maxEntriesPerNamespace`, retained so older config files keep working. */
4045
+ maxEntriesByNamespace?: Record<string, number>;
4046
+ /**
4047
+ * Milliseconds the runner waits after becoming idle before pruning indexed
4048
+ * cache entries. Defaults to `5000`; non-positive or non-finite values use
4049
+ * the default.
4050
+ */
4051
+ pruneIdleDelayMs?: number; /** Legacy alias for `maxEntriesPerNamespace`, retained so older config files keep working. */
4046
4052
  maxEntriesPerEval?: number;
4047
4053
  };
4048
4054
  };
@@ -4304,22 +4310,25 @@ declare const traceCacheRefSchema: z$1.ZodObject<{
4304
4310
  }, z$1.core.$strip>;
4305
4311
  /** Reference to a value-cache lookup performed via `evalTracer.cache(...)`. */
4306
4312
  type TraceCacheRef$1 = z$1.infer<typeof traceCacheRefSchema>;
4307
- /** Summary of a single persisted cache entry, used by list/delete endpoints. */
4313
+ /** Minimal index-backed summary of a persisted cache entry. */
4308
4314
  declare const cacheListItemSchema$1: z$1.ZodObject<{
4309
4315
  key: z$1.ZodString;
4310
4316
  namespace: z$1.ZodString;
4311
- operationType: z$1.ZodEnum<{
4312
- value: "value";
4313
- span: "span";
4314
- }>;
4315
- operationName: z$1.ZodString;
4316
- spanName: z$1.ZodOptional<z$1.ZodString>;
4317
- spanKind: z$1.ZodOptional<z$1.ZodString>;
4318
4317
  storedAt: z$1.ZodString;
4319
- sizeBytes: z$1.ZodNumber;
4318
+ lastAccessedAt: z$1.ZodString;
4320
4319
  }, z$1.core.$strip>;
4321
- /** Summary row for a single cache entry. */
4320
+ /** Minimal summary row for a single cache entry. */
4322
4321
  type CacheListItem = z$1.infer<typeof cacheListItemSchema$1>;
4322
+ /** Summary of cleanup performed by manual cache repair. */
4323
+ declare const cacheRepairSummarySchema$1: z$1.ZodObject<{
4324
+ removedCacheFiles: z$1.ZodNumber;
4325
+ removedDebugFiles: z$1.ZodNumber;
4326
+ removedBlobFiles: z$1.ZodNumber;
4327
+ removedIndexRows: z$1.ZodNumber;
4328
+ rewrittenIndexes: z$1.ZodNumber;
4329
+ }, z$1.core.$strip>;
4330
+ /** Stable JSON summary returned by manual cache repair. */
4331
+ type CacheRepairSummary = z$1.infer<typeof cacheRepairSummarySchema$1>;
4323
4332
  /** Serialized nested span captured while recording a cached operation. */
4324
4333
  type SerializedCacheSpan = {
4325
4334
  kind: string;
@@ -6097,22 +6106,25 @@ type ResolvedLlmCallCostCurrency$1 = {
6097
6106
  numberFormat?: NumberDisplayOptions$1;
6098
6107
  }; //#endregion
6099
6108
  //#region ../shared/src/schemas/cache.d.ts
6100
- /** Summary of a single persisted cache entry, used by list/delete endpoints. */
6109
+ /** Minimal index-backed summary of a persisted cache entry. */
6101
6110
  declare const cacheListItemSchema: z$1.ZodObject<{
6102
6111
  key: z$1.ZodString;
6103
6112
  namespace: z$1.ZodString;
6104
- operationType: z$1.ZodEnum<{
6105
- span: "span";
6106
- value: "value";
6107
- }>;
6108
- operationName: z$1.ZodString;
6109
- spanName: z$1.ZodOptional<z$1.ZodString>;
6110
- spanKind: z$1.ZodOptional<z$1.ZodString>;
6111
6113
  storedAt: z$1.ZodString;
6112
- sizeBytes: z$1.ZodNumber;
6114
+ lastAccessedAt: z$1.ZodString;
6113
6115
  }, z$1.core.$strip>;
6114
- /** Summary row for a single cache entry. */
6116
+ /** Minimal summary row for a single cache entry. */
6115
6117
  type CacheListItem$1 = z$1.infer<typeof cacheListItemSchema>;
6118
+ /** Summary of cleanup performed by manual cache repair. */
6119
+ declare const cacheRepairSummarySchema: z$1.ZodObject<{
6120
+ removedCacheFiles: z$1.ZodNumber;
6121
+ removedDebugFiles: z$1.ZodNumber;
6122
+ removedBlobFiles: z$1.ZodNumber;
6123
+ removedIndexRows: z$1.ZodNumber;
6124
+ rewrittenIndexes: z$1.ZodNumber;
6125
+ }, z$1.core.$strip>;
6126
+ /** Stable JSON summary returned by manual cache repair. */
6127
+ type CacheRepairSummary$1 = z$1.infer<typeof cacheRepairSummarySchema>;
6116
6128
  /** Serialized nested span captured while recording a cached operation. */
6117
6129
  type SerializedCacheSpan$1 = {
6118
6130
  kind: string;
@@ -6454,7 +6466,8 @@ type EvalRunner = {
6454
6466
  * Remove cache entries matching `filter`, or all entries when no filter is
6455
6467
  * supplied.
6456
6468
  */
6457
- clearCache(filter?: CacheClearFilter): Promise<void>;
6469
+ clearCache(filter?: CacheClearFilter): Promise<void>; /** Remove cache/debug/blob files that are not referenced by cache indexes. */
6470
+ repairCache(): Promise<CacheRepairSummary$1>;
6458
6471
  /**
6459
6472
  * Recompute persisted case and run statuses for terminal runs touching one
6460
6473
  * eval. Accepts the exact eval key.
@@ -6635,4 +6648,4 @@ declare function defineEval<TInput = unknown, TOutputs extends EvalOutputs = Eva
6635
6648
  /** Return whether the active eval case has tags matching the typed input. */
6636
6649
  declare function matchesEvalTags(input: EvalTagMatchInput): boolean;
6637
6650
  //#endregion
6638
- export { AgentEvalTagRegistry, AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheSerializationOptions, type CacheStatus, type CallDerivedAttribute, type CallDerivedAttributeContext, type CallDerivedAttributesConfig, type CallDerivedAttributesFn, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type ConfigReloadState, type ConfigReloadStatus, type CreateRunRequest, type DefaultConfigKey, type DerivedStatus, type DiscoveryIssue, EvalAssertionError, type EvalCacheConfig, EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, EvalDefinition, type EvalDeriveConfig, type EvalDeriveContext, type EvalDeriveFn, type EvalDeriveMap, type EvalDeriveValueFn, type EvalDisplayStatus, type EvalExecuteContext, type EvalExpectation, type EvalFreshnessStatus, type EvalManualInputConfig, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, EvalTag, EvalTagMatchInput, type EvalTraceTree, type JsonCell, type LlmCallCostBreakdown, type LlmCallCostCurrency, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallPricingRate, type LlmCallPricingRegistry, type LlmCallSimulatedTokens, type LlmCallsConfigInput, type LlmCostScenario, type ManualInputDescriptor, type ManualInputFieldDescriptor, type ManualInputFieldKind, type ManualInputFieldOverride, type ManualInputFieldsConfig, type ManualInputFileValue, type ManualInputSelectOption, type MaterializeManualInputFilesResult, type NumberDisplayOptions, type ReadManualInputFileResult, type RemoveDefaultConfig, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedCallDerivedAttribute, type ResolvedLlmCallCostCurrency, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
6651
+ export { AgentEvalTagRegistry, AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheRepairSummary, type CacheScopeContext, type CacheSerializationOptions, type CacheStatus, type CallDerivedAttribute, type CallDerivedAttributeContext, type CallDerivedAttributesConfig, type CallDerivedAttributesFn, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type ConfigReloadState, type ConfigReloadStatus, type CreateRunRequest, type DefaultConfigKey, type DerivedStatus, type DiscoveryIssue, EvalAssertionError, type EvalCacheConfig, EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, EvalDefinition, type EvalDeriveConfig, type EvalDeriveContext, type EvalDeriveFn, type EvalDeriveMap, type EvalDeriveValueFn, type EvalDisplayStatus, type EvalExecuteContext, type EvalExpectation, type EvalFreshnessStatus, type EvalManualInputConfig, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, EvalTag, EvalTagMatchInput, type EvalTraceTree, type JsonCell, type LlmCallCostBreakdown, type LlmCallCostCurrency, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallPricingRate, type LlmCallPricingRegistry, type LlmCallSimulatedTokens, type LlmCallsConfigInput, type LlmCostScenario, type ManualInputDescriptor, type ManualInputFieldDescriptor, type ManualInputFieldKind, type ManualInputFieldOverride, type ManualInputFieldsConfig, type ManualInputFileValue, type ManualInputSelectOption, type MaterializeManualInputFilesResult, type NumberDisplayOptions, type ReadManualInputFileResult, type RemoveDefaultConfig, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedCallDerivedAttribute, type ResolvedLlmCallCostCurrency, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as startEvalBackgroundJob, A as repoFile, B as getCurrentScope, C as evalTracer, D as deserializeCacheValue, E as deserializeCacheRecording, F as appendToEvalOutput, G as mergeEvalOutput, H as incrementEvalOutput, J as runInEvalScope, K as nextEvalId, L as evalAssert, M as readManualInputFile, N as evalExpect, Nt as getEvalRegistry, O as serializeCacheRecording, P as EvalAssertionError, Q as setScopeCacheContext, R as evalLog, S as evalSpan, T as hashCacheKeySync, U as isInEvalScope, V as getEvalCaseInput, Y as runInExistingEvalScope, Z as setEvalOutput, at as extractLlmCalls, b as buildTraceTree, it as extractApiCalls, j as manualInputFileValueSchema, k as serializeCacheValue, lt as getNestedAttribute, nt as extractCacheEntries, ot as simulateLlmCallCost, q as runInEvalRuntimeScope, rt as extractCacheHits, st as simulateTokenAllocation, w as hashCacheKey, x as captureEvalSpanError, y as z, z as evalTime } from "./runExecution-Sw38bCaq.mjs";
2
- import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-BR3wMZMx.mjs";
3
- import { n as matchesEvalTags, t as defineEval } from "./src-hBGtzWuA.mjs";
1
+ import { $ as startEvalBackgroundJob, A as repoFile, B as getCurrentScope, C as evalTracer, D as deserializeCacheValue, E as deserializeCacheRecording, F as appendToEvalOutput, G as mergeEvalOutput, H as incrementEvalOutput, J as runInEvalScope, K as nextEvalId, L as evalAssert, M as readManualInputFile, N as evalExpect, Nt as getEvalRegistry, O as serializeCacheRecording, P as EvalAssertionError, Q as setScopeCacheContext, R as evalLog, S as evalSpan, T as hashCacheKeySync, U as isInEvalScope, V as getEvalCaseInput, Y as runInExistingEvalScope, Z as setEvalOutput, at as extractLlmCalls, b as buildTraceTree, it as extractApiCalls, j as manualInputFileValueSchema, k as serializeCacheValue, lt as getNestedAttribute, nt as extractCacheEntries, ot as simulateLlmCallCost, q as runInEvalRuntimeScope, rt as extractCacheHits, st as simulateTokenAllocation, w as hashCacheKey, x as captureEvalSpanError, y as z, z as evalTime } from "./runExecution-C31dpemR.mjs";
2
+ import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-Bu9347r1.mjs";
3
+ import { n as matchesEvalTags, t as defineEval } from "./src-FR60ZR_4.mjs";
4
4
  export { EvalAssertionError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
package/dist/runChild.mjs CHANGED
@@ -1,5 +1,5 @@
1
- import { At as evalChartsConfigSchema, Ct as buildEvalKey, Dt as evalStatAggregateSchema, I as configureEvalRunLogs, Ot as evalStatsConfigSchema, bt as runSummarySchema, et as createRunRequestSchema, jt as columnDefSchema, kt as manualInputDescriptorSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, v as createFsCacheStore, yt as runManifestSchema } from "./runExecution-Sw38bCaq.mjs";
2
- import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-DJsdLYeZ.mjs";
1
+ import { At as evalChartsConfigSchema, Ct as buildEvalKey, Dt as evalStatAggregateSchema, I as configureEvalRunLogs, Ot as evalStatsConfigSchema, bt as runSummarySchema, et as createRunRequestSchema, jt as columnDefSchema, kt as manualInputDescriptorSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, v as createFsCacheStore, yt as runManifestSchema } from "./runExecution-C31dpemR.mjs";
2
+ import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-3RoHLW4U.mjs";
3
3
  import { z } from "zod/v4";
4
4
  import { readFile } from "node:fs/promises";
5
5
  import { relative } from "node:path";