@ls-stack/agent-eval 0.58.0 → 0.58.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
- import { et as createRunRequestSchema, nt as extractCacheEntries, tt as updateManualScoreRequestSchema, ut as getEvalTitle } from "./runExecution-C4kAOhC1.mjs";
2
- import { o as stageManualInputFile } from "./cli-Cf37PZKi.mjs";
3
- import "./src-303BocMW.mjs";
4
- import { t as getRunnerInstance } from "./runner-bjd_UB9i.mjs";
1
+ import { et as createRunRequestSchema, nt as extractCacheEntries, tt as updateManualScoreRequestSchema, ut as getEvalTitle } from "./runExecution-d42Lm0i5.mjs";
2
+ import { o as stageManualInputFile } from "./cli-_g2qOMK6.mjs";
3
+ import "./src-CdZsOn6y.mjs";
4
+ import { t as getRunnerInstance } from "./runner-MSr8sAWm.mjs";
5
5
  import { z } from "zod/v4";
6
6
  import { readFile } from "node:fs/promises";
7
7
  import { dirname, isAbsolute, join, relative, resolve, sep } from "node:path";
package/dist/bin.mjs CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { t as runCli } from "./cli-Cf37PZKi.mjs";
2
+ import { t as runCli } from "./cli-_g2qOMK6.mjs";
3
3
  import { spawn } from "node:child_process";
4
4
  //#region src/bin.ts
5
5
  const moduleMocksFlag = "--experimental-test-module-mocks";
@@ -1,4 +1,4 @@
1
- import { I as configureEvalRunLogs, Pt as runWithEvalRegistry, St as resolveLlmCallsConfig, _ as createBufferedCacheStore, a as isCaseChildParentMessage, d as loadEvalModule, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, q as runInEvalRuntimeScope, r as runCase, v as createFsCacheStore, xt as resolveApiCallsConfig } from "./runExecution-C4kAOhC1.mjs";
1
+ import { I as configureEvalRunLogs, Pt as runWithEvalRegistry, St as resolveLlmCallsConfig, _ as createBufferedCacheStore, a as isCaseChildParentMessage, d as loadEvalModule, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, q as runInEvalRuntimeScope, r as runCase, v as createFsCacheStore, xt as resolveApiCallsConfig } from "./runExecution-d42Lm0i5.mjs";
2
2
  //#region ../runner/src/caseChild.ts
3
3
  let fatalErrorReported = false;
4
4
  let disconnectExpected = false;
@@ -1,5 +1,5 @@
1
- import { Ct as buildEvalKey, Nt as getEvalRegistry, St as resolveLlmCallsConfig, bt as runSummarySchema, c as resolveArtifactPath, ct as applyDerivedCallAttributes, dt as getEvalDisplayStatus, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, h as normalizeScoreDef, m as buildDeclaredColumnDefs, o as stripTerminalControlCodes, p as loadConfig, s as resolveTracePresentation, ut as getEvalTitle, v as createFsCacheStore, wt as getCaseRowCaseKey, xt as resolveApiCallsConfig } from "./runExecution-C4kAOhC1.mjs";
2
- import { C as validateCharts, S as parseEvalDiscovery, _ as runTouchesEval, a as validateTagsFilters, b as deriveEvalFreshness, c as getLatestRunInfos, d as nextShortIdFromSnapshots, f as persistCaseDetail, g as recomputePersistedCaseStatus, h as recomputeEvalStatusesInRuns, i as resolveEvalTags, l as loadPersistedRunSnapshot, m as persistRunState, n as getTargetEvalKeys, o as generateRunId, p as deleteTemporaryRuns, s as getLastRunStatuses, u as loadPersistedRunSnapshots, v as buildManualInputDescriptor, x as loadIsolatedEvalRegistry, y as parseManualInputValues } from "./runOrchestration-5xEiQxiS.mjs";
1
+ import { Ct as buildEvalKey, Nt as getEvalRegistry, St as resolveLlmCallsConfig, bt as runSummarySchema, c as resolveArtifactPath, ct as applyDerivedCallAttributes, dt as getEvalDisplayStatus, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, gt as matchesTagsFilter, h as normalizeScoreDef, m as buildDeclaredColumnDefs, o as stripTerminalControlCodes, p as loadConfig, s as resolveTracePresentation, ut as getEvalTitle, v as createFsCacheStore, wt as getCaseRowCaseKey, xt as resolveApiCallsConfig } from "./runExecution-d42Lm0i5.mjs";
2
+ import { C as validateCharts, S as parseEvalDiscovery, _ as runTouchesEval, a as validateTagsFilters, b as deriveEvalFreshness, c as getLatestRunInfos, d as nextShortIdFromSnapshots, f as persistCaseDetail, g as recomputePersistedCaseStatus, h as recomputeEvalStatusesInRuns, i as resolveEvalTags, l as loadPersistedRunSnapshot, m as persistRunState, n as getTargetEvalKeys, o as generateRunId, p as deleteTemporaryRuns, s as getLastRunStatuses, u as loadPersistedRunSnapshots, v as buildManualInputDescriptor, x as loadIsolatedEvalRegistry, y as parseManualInputValues } from "./runOrchestration-CvmFeOmT.mjs";
3
3
  import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
4
4
  import { basename, dirname, extname, isAbsolute, join, relative, resolve, sep } from "node:path";
5
5
  import { createHash, randomUUID } from "node:crypto";
@@ -2172,8 +2172,8 @@ async function commandApp(args) {
2172
2172
  const { serve } = await import("@hono/node-server");
2173
2173
  const bundledWebDist = resolve(currentDir, "apps/web/dist");
2174
2174
  if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
2175
- const appModule = await import("./app-L9GdY28I.mjs");
2176
- const runnerModule = await import("./runner-JIykMlve.mjs");
2175
+ const appModule = await import("./app-DhMIbjlE.mjs");
2176
+ const runnerModule = await import("./runner-BKogjiYd.mjs");
2177
2177
  if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
2178
2178
  if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
2179
2179
  await runnerModule.initRunner();
package/dist/index.d.mts CHANGED
@@ -1455,7 +1455,9 @@ type CacheScopeContext = {
1455
1455
  /** Active recording frame captured while a cached operation body executes. */
1456
1456
  type CacheRecordingFrame = {
1457
1457
  /** Length of `scope.spans` immediately before the cached body started. */baseSpanIndex: number; /** Parent id used when recording and replaying direct child spans. */
1458
- replayParentSpanId: string | null; /** Ordered observable effects recorded during the cached body. */
1458
+ replayParentSpanId: string | null; /** Spans created by this cache body's async execution branch. */
1459
+ spanIds: Set<string>; /** Non-cache attributes written to the replay parent by this async branch. */
1460
+ finalAttributes: Record<string, unknown>; /** Ordered observable effects recorded during the cached body. */
1459
1461
  ops: CacheRecordingOp$1[];
1460
1462
  };
1461
1463
  /** Mutable per-case runtime state stored in async local storage. */
@@ -1480,11 +1482,6 @@ type EvalCaseScope = {
1480
1482
  logs: RunLogEntry$1[];
1481
1483
  spans: EvalTraceSpan$2[];
1482
1484
  checkpoints: Map<string, unknown>;
1483
- /**
1484
- * Stack of active cache recorders. Ops are written to the top-most frame
1485
- * when it exists and `replayingDepth === 0`.
1486
- */
1487
- recordingStack: CacheRecordingFrame[];
1488
1485
  /**
1489
1486
  * Incremented while replaying a cached operation, so nested SDK calls do not
1490
1487
  * accidentally double-record ops into outer recorders.
@@ -2017,8 +2014,8 @@ declare const traceAttributeDisplaySchema: z$1.ZodObject<{
2017
2014
  subtree: "subtree";
2018
2015
  }>>;
2019
2016
  mode: z$1.ZodOptional<z$1.ZodEnum<{
2020
- all: "all";
2021
2017
  sum: "sum";
2018
+ all: "all";
2022
2019
  last: "last";
2023
2020
  }>>;
2024
2021
  }, z$1.core.$strip>;
@@ -2053,8 +2050,8 @@ declare const traceDisplayConfigSchema: z$1.ZodObject<{
2053
2050
  subtree: "subtree";
2054
2051
  }>>;
2055
2052
  mode: z$1.ZodOptional<z$1.ZodEnum<{
2056
- all: "all";
2057
2053
  sum: "sum";
2054
+ all: "all";
2058
2055
  last: "last";
2059
2056
  }>>;
2060
2057
  }, z$1.core.$strip>>>;
@@ -2093,8 +2090,8 @@ declare const traceAttributeDisplayInputSchema: z$1.ZodObject<{
2093
2090
  subtree: "subtree";
2094
2091
  }>>;
2095
2092
  mode: z$1.ZodOptional<z$1.ZodEnum<{
2096
- all: "all";
2097
2093
  sum: "sum";
2094
+ all: "all";
2098
2095
  last: "last";
2099
2096
  }>>;
2100
2097
  transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
@@ -2131,8 +2128,8 @@ declare const traceDisplayInputConfigSchema: z$1.ZodObject<{
2131
2128
  subtree: "subtree";
2132
2129
  }>>;
2133
2130
  mode: z$1.ZodOptional<z$1.ZodEnum<{
2134
- all: "all";
2135
2131
  sum: "sum";
2132
+ all: "all";
2136
2133
  last: "last";
2137
2134
  }>>;
2138
2135
  transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
@@ -2217,9 +2214,9 @@ type EvalFreshnessStatus = z$1.infer<typeof evalFreshnessStatusSchema>;
2217
2214
  */
2218
2215
  declare const evalStatAggregateSchema: z$1.ZodEnum<{
2219
2216
  avg: "avg";
2220
- sum: "sum";
2221
2217
  min: "min";
2222
2218
  max: "max";
2219
+ sum: "sum";
2223
2220
  best: "best";
2224
2221
  worst: "worst";
2225
2222
  }>;
@@ -2249,9 +2246,9 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2249
2246
  kind: z$1.ZodLiteral<"duration">;
2250
2247
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2251
2248
  avg: "avg";
2252
- sum: "sum";
2253
2249
  min: "min";
2254
2250
  max: "max";
2251
+ sum: "sum";
2255
2252
  best: "best";
2256
2253
  worst: "worst";
2257
2254
  }>>;
@@ -2260,9 +2257,9 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2260
2257
  kind: z$1.ZodLiteral<"cacheHits">;
2261
2258
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2262
2259
  avg: "avg";
2263
- sum: "sum";
2264
2260
  min: "min";
2265
2261
  max: "max";
2262
+ sum: "sum";
2266
2263
  best: "best";
2267
2264
  worst: "worst";
2268
2265
  }>>;
@@ -2273,9 +2270,9 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2273
2270
  label: z$1.ZodOptional<z$1.ZodString>;
2274
2271
  aggregate: z$1.ZodEnum<{
2275
2272
  avg: "avg";
2276
- sum: "sum";
2277
2273
  min: "min";
2278
2274
  max: "max";
2275
+ sum: "sum";
2279
2276
  best: "best";
2280
2277
  worst: "worst";
2281
2278
  }>;
@@ -2313,9 +2310,9 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
2313
2310
  kind: z$1.ZodLiteral<"duration">;
2314
2311
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2315
2312
  avg: "avg";
2316
- sum: "sum";
2317
2313
  min: "min";
2318
2314
  max: "max";
2315
+ sum: "sum";
2319
2316
  best: "best";
2320
2317
  worst: "worst";
2321
2318
  }>>;
@@ -2324,9 +2321,9 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
2324
2321
  kind: z$1.ZodLiteral<"cacheHits">;
2325
2322
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2326
2323
  avg: "avg";
2327
- sum: "sum";
2328
2324
  min: "min";
2329
2325
  max: "max";
2326
+ sum: "sum";
2330
2327
  best: "best";
2331
2328
  worst: "worst";
2332
2329
  }>>;
@@ -2337,9 +2334,9 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
2337
2334
  label: z$1.ZodOptional<z$1.ZodString>;
2338
2335
  aggregate: z$1.ZodEnum<{
2339
2336
  avg: "avg";
2340
- sum: "sum";
2341
2337
  min: "min";
2342
2338
  max: "max";
2339
+ sum: "sum";
2343
2340
  best: "best";
2344
2341
  worst: "worst";
2345
2342
  }>;
@@ -2422,10 +2419,10 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2422
2419
  caseIds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
2423
2420
  lastRunStatus: z$1.ZodNullable<z$1.ZodEnum<{
2424
2421
  error: "error";
2425
- running: "running";
2426
- cancelled: "cancelled";
2427
2422
  pass: "pass";
2428
2423
  fail: "fail";
2424
+ running: "running";
2425
+ cancelled: "cancelled";
2429
2426
  unscored: "unscored";
2430
2427
  }>>;
2431
2428
  stats: z$1.ZodOptional<z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
@@ -2440,9 +2437,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2440
2437
  kind: z$1.ZodLiteral<"duration">;
2441
2438
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2442
2439
  avg: "avg";
2443
- sum: "sum";
2444
2440
  min: "min";
2445
2441
  max: "max";
2442
+ sum: "sum";
2446
2443
  best: "best";
2447
2444
  worst: "worst";
2448
2445
  }>>;
@@ -2451,9 +2448,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2451
2448
  kind: z$1.ZodLiteral<"cacheHits">;
2452
2449
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2453
2450
  avg: "avg";
2454
- sum: "sum";
2455
2451
  min: "min";
2456
2452
  max: "max";
2453
+ sum: "sum";
2457
2454
  best: "best";
2458
2455
  worst: "worst";
2459
2456
  }>>;
@@ -2464,9 +2461,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2464
2461
  label: z$1.ZodOptional<z$1.ZodString>;
2465
2462
  aggregate: z$1.ZodEnum<{
2466
2463
  avg: "avg";
2467
- sum: "sum";
2468
2464
  min: "min";
2469
2465
  max: "max";
2466
+ sum: "sum";
2470
2467
  best: "best";
2471
2468
  worst: "worst";
2472
2469
  }>;
@@ -2491,9 +2488,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2491
2488
  }, z$1.core.$strip>], "kind">>>;
2492
2489
  defaultStatAggregate: z$1.ZodOptional<z$1.ZodEnum<{
2493
2490
  avg: "avg";
2494
- sum: "sum";
2495
2491
  min: "min";
2496
2492
  max: "max";
2493
+ sum: "sum";
2497
2494
  best: "best";
2498
2495
  worst: "worst";
2499
2496
  }>>;
@@ -2530,9 +2527,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2530
2527
  key: z$1.ZodString;
2531
2528
  aggregate: z$1.ZodEnum<{
2532
2529
  avg: "avg";
2533
- sum: "sum";
2534
2530
  min: "min";
2535
2531
  max: "max";
2532
+ sum: "sum";
2536
2533
  latest: "latest";
2537
2534
  passThresholdRate: "passThresholdRate";
2538
2535
  }>;
@@ -2572,9 +2569,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2572
2569
  key: z$1.ZodString;
2573
2570
  aggregate: z$1.ZodEnum<{
2574
2571
  avg: "avg";
2575
- sum: "sum";
2576
2572
  min: "min";
2577
2573
  max: "max";
2574
+ sum: "sum";
2578
2575
  latest: "latest";
2579
2576
  passThresholdRate: "passThresholdRate";
2580
2577
  }>;
@@ -2671,11 +2668,11 @@ declare const caseRowSchema$1: z$1.ZodObject<{
2671
2668
  tags: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
2672
2669
  status: z$1.ZodEnum<{
2673
2670
  error: "error";
2674
- pending: "pending";
2675
- running: "running";
2676
- cancelled: "cancelled";
2677
2671
  pass: "pass";
2678
2672
  fail: "fail";
2673
+ running: "running";
2674
+ cancelled: "cancelled";
2675
+ pending: "pending";
2679
2676
  }>;
2680
2677
  durationMs: z$1.ZodNullable<z$1.ZodNumber>;
2681
2678
  cacheHits: z$1.ZodOptional<z$1.ZodNumber>;
@@ -2862,8 +2859,8 @@ declare const scoreTraceSchema: z$1.ZodObject<{
2862
2859
  subtree: "subtree";
2863
2860
  }>>;
2864
2861
  mode: z$1.ZodOptional<z$1.ZodEnum<{
2865
- all: "all";
2866
2862
  sum: "sum";
2863
+ all: "all";
2867
2864
  last: "last";
2868
2865
  }>>;
2869
2866
  }, z$1.core.$strip>>>;
@@ -2874,10 +2871,10 @@ declare const scoreTraceSchema: z$1.ZodObject<{
2874
2871
  namespace: z$1.ZodString;
2875
2872
  key: z$1.ZodString;
2876
2873
  status: z$1.ZodEnum<{
2877
- bypass: "bypass";
2878
- refresh: "refresh";
2879
2874
  hit: "hit";
2880
2875
  miss: "miss";
2876
+ refresh: "refresh";
2877
+ bypass: "bypass";
2881
2878
  }>;
2882
2879
  read: z$1.ZodOptional<z$1.ZodBoolean>;
2883
2880
  stored: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -2896,11 +2893,11 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2896
2893
  tags: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
2897
2894
  status: z$1.ZodEnum<{
2898
2895
  error: "error";
2899
- pending: "pending";
2900
- running: "running";
2901
- cancelled: "cancelled";
2902
2896
  pass: "pass";
2903
2897
  fail: "fail";
2898
+ running: "running";
2899
+ cancelled: "cancelled";
2900
+ pending: "pending";
2904
2901
  }>;
2905
2902
  input: z$1.ZodUnknown;
2906
2903
  trace: z$1.ZodArray<z$1.ZodObject<{
@@ -2965,8 +2962,8 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2965
2962
  subtree: "subtree";
2966
2963
  }>>;
2967
2964
  mode: z$1.ZodOptional<z$1.ZodEnum<{
2968
- all: "all";
2969
2965
  sum: "sum";
2966
+ all: "all";
2970
2967
  last: "last";
2971
2968
  }>>;
2972
2969
  }, z$1.core.$strip>>>;
@@ -3034,8 +3031,8 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
3034
3031
  subtree: "subtree";
3035
3032
  }>>;
3036
3033
  mode: z$1.ZodOptional<z$1.ZodEnum<{
3037
- all: "all";
3038
3034
  sum: "sum";
3035
+ all: "all";
3039
3036
  last: "last";
3040
3037
  }>>;
3041
3038
  }, z$1.core.$strip>>>;
@@ -3046,10 +3043,10 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
3046
3043
  namespace: z$1.ZodString;
3047
3044
  key: z$1.ZodString;
3048
3045
  status: z$1.ZodEnum<{
3049
- bypass: "bypass";
3050
- refresh: "refresh";
3051
3046
  hit: "hit";
3052
3047
  miss: "miss";
3048
+ refresh: "refresh";
3049
+ bypass: "bypass";
3053
3050
  }>;
3054
3051
  read: z$1.ZodOptional<z$1.ZodBoolean>;
3055
3052
  stored: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -3166,10 +3163,10 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
3166
3163
  namespace: z$1.ZodString;
3167
3164
  key: z$1.ZodString;
3168
3165
  status: z$1.ZodEnum<{
3169
- bypass: "bypass";
3170
- refresh: "refresh";
3171
3166
  hit: "hit";
3172
3167
  miss: "miss";
3168
+ refresh: "refresh";
3169
+ bypass: "bypass";
3173
3170
  }>;
3174
3171
  read: z$1.ZodOptional<z$1.ZodBoolean>;
3175
3172
  stored: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -3223,9 +3220,9 @@ type EvalChartBuiltinMetric = z$1.infer<typeof evalChartBuiltinMetricSchema>;
3223
3220
  /** Reducer applied to a numeric column across all cases of a single run. */
3224
3221
  declare const evalChartAggregateSchema: z$1.ZodEnum<{
3225
3222
  avg: "avg";
3226
- sum: "sum";
3227
3223
  min: "min";
3228
3224
  max: "max";
3225
+ sum: "sum";
3229
3226
  latest: "latest";
3230
3227
  passThresholdRate: "passThresholdRate";
3231
3228
  }>;
@@ -3281,9 +3278,9 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
3281
3278
  key: z$1.ZodString;
3282
3279
  aggregate: z$1.ZodEnum<{
3283
3280
  avg: "avg";
3284
- sum: "sum";
3285
3281
  min: "min";
3286
3282
  max: "max";
3283
+ sum: "sum";
3287
3284
  latest: "latest";
3288
3285
  passThresholdRate: "passThresholdRate";
3289
3286
  }>;
@@ -3316,9 +3313,9 @@ declare const evalChartTooltipExtraSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObj
3316
3313
  key: z$1.ZodString;
3317
3314
  aggregate: z$1.ZodEnum<{
3318
3315
  avg: "avg";
3319
- sum: "sum";
3320
3316
  min: "min";
3321
3317
  max: "max";
3318
+ sum: "sum";
3322
3319
  latest: "latest";
3323
3320
  passThresholdRate: "passThresholdRate";
3324
3321
  }>;
@@ -3364,9 +3361,9 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
3364
3361
  key: z$1.ZodString;
3365
3362
  aggregate: z$1.ZodEnum<{
3366
3363
  avg: "avg";
3367
- sum: "sum";
3368
3364
  min: "min";
3369
3365
  max: "max";
3366
+ sum: "sum";
3370
3367
  latest: "latest";
3371
3368
  passThresholdRate: "passThresholdRate";
3372
3369
  }>;
@@ -3406,9 +3403,9 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
3406
3403
  key: z$1.ZodString;
3407
3404
  aggregate: z$1.ZodEnum<{
3408
3405
  avg: "avg";
3409
- sum: "sum";
3410
3406
  min: "min";
3411
3407
  max: "max";
3408
+ sum: "sum";
3412
3409
  latest: "latest";
3413
3410
  passThresholdRate: "passThresholdRate";
3414
3411
  }>;
@@ -3454,9 +3451,9 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
3454
3451
  key: z$1.ZodString;
3455
3452
  aggregate: z$1.ZodEnum<{
3456
3453
  avg: "avg";
3457
- sum: "sum";
3458
3454
  min: "min";
3459
3455
  max: "max";
3456
+ sum: "sum";
3460
3457
  latest: "latest";
3461
3458
  passThresholdRate: "passThresholdRate";
3462
3459
  }>;
@@ -3496,9 +3493,9 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
3496
3493
  key: z$1.ZodString;
3497
3494
  aggregate: z$1.ZodEnum<{
3498
3495
  avg: "avg";
3499
- sum: "sum";
3500
3496
  min: "min";
3501
3497
  max: "max";
3498
+ sum: "sum";
3502
3499
  latest: "latest";
3503
3500
  passThresholdRate: "passThresholdRate";
3504
3501
  }>;
@@ -3514,10 +3511,10 @@ declare const runManifestSchema$1: z$1.ZodObject<{
3514
3511
  shortId: z$1.ZodString;
3515
3512
  status: z$1.ZodEnum<{
3516
3513
  error: "error";
3517
- pending: "pending";
3518
3514
  running: "running";
3519
- completed: "completed";
3520
3515
  cancelled: "cancelled";
3516
+ pending: "pending";
3517
+ completed: "completed";
3521
3518
  }>;
3522
3519
  temporary: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodBoolean>>;
3523
3520
  startedAt: z$1.ZodString;
@@ -3526,9 +3523,9 @@ declare const runManifestSchema$1: z$1.ZodObject<{
3526
3523
  evalSourceFingerprints: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodString>>>;
3527
3524
  target: z$1.ZodObject<{
3528
3525
  mode: z$1.ZodEnum<{
3526
+ caseIds: "caseIds";
3529
3527
  all: "all";
3530
3528
  evalIds: "evalIds";
3531
- caseIds: "caseIds";
3532
3529
  }>;
3533
3530
  evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
3534
3531
  files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
@@ -3542,9 +3539,9 @@ declare const runManifestSchema$1: z$1.ZodObject<{
3542
3539
  median: "median";
3543
3540
  }>>>;
3544
3541
  cacheMode: z$1.ZodOptional<z$1.ZodEnum<{
3545
- use: "use";
3546
- bypass: "bypass";
3547
3542
  refresh: "refresh";
3543
+ bypass: "bypass";
3544
+ use: "use";
3548
3545
  }>>;
3549
3546
  }, z$1.core.$strip>;
3550
3547
  /** Persisted lifecycle metadata for a single eval run. */
@@ -3554,10 +3551,10 @@ declare const runSummarySchema$1: z$1.ZodObject<{
3554
3551
  runId: z$1.ZodString;
3555
3552
  status: z$1.ZodEnum<{
3556
3553
  error: "error";
3557
- pending: "pending";
3558
3554
  running: "running";
3559
- completed: "completed";
3560
3555
  cancelled: "cancelled";
3556
+ pending: "pending";
3557
+ completed: "completed";
3561
3558
  }>;
3562
3559
  totalCases: z$1.ZodNumber;
3563
3560
  passedCases: z$1.ZodNumber;
@@ -4469,9 +4466,9 @@ declare function extractApiCalls(spans: EvalTraceSpan$1[], config: ResolvedApiCa
4469
4466
  * - `refresh`: never read, always write (forces re-execution and overwrites).
4470
4467
  */
4471
4468
  declare const cacheModeSchema: z$1.ZodEnum<{
4472
- use: "use";
4473
- bypass: "bypass";
4474
4469
  refresh: "refresh";
4470
+ bypass: "bypass";
4471
+ use: "use";
4475
4472
  }>;
4476
4473
  /** Mode controlling how cached spans behave during a run. */
4477
4474
  type CacheMode = z$1.infer<typeof cacheModeSchema>;
@@ -4492,10 +4489,10 @@ declare const cacheOperationTypeSchema: z$1.ZodEnum<{
4492
4489
  type CacheOperationType = z$1.infer<typeof cacheOperationTypeSchema>;
4493
4490
  /** Status of a cache lookup recorded on a span or case scope. */
4494
4491
  declare const cacheStatusSchema: z$1.ZodEnum<{
4495
- bypass: "bypass";
4496
- refresh: "refresh";
4497
4492
  hit: "hit";
4498
4493
  miss: "miss";
4494
+ refresh: "refresh";
4495
+ bypass: "bypass";
4499
4496
  }>;
4500
4497
  /** Status of a cache lookup recorded on a span or case scope. */
4501
4498
  type CacheStatus = z$1.infer<typeof cacheStatusSchema>;
@@ -4512,10 +4509,10 @@ declare const traceCacheRefSchema: z$1.ZodObject<{
4512
4509
  namespace: z$1.ZodString;
4513
4510
  key: z$1.ZodString;
4514
4511
  status: z$1.ZodEnum<{
4515
- bypass: "bypass";
4516
- refresh: "refresh";
4517
4512
  hit: "hit";
4518
4513
  miss: "miss";
4514
+ refresh: "refresh";
4515
+ bypass: "bypass";
4519
4516
  }>;
4520
4517
  read: z$1.ZodOptional<z$1.ZodBoolean>;
4521
4518
  stored: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -5467,9 +5464,9 @@ type ConfigReloadState = z$1.infer<typeof configReloadStateSchema$1>;
5467
5464
  declare const createRunRequestSchema$1: z$1.ZodObject<{
5468
5465
  target: z$1.ZodObject<{
5469
5466
  mode: z$1.ZodEnum<{
5467
+ caseIds: "caseIds";
5470
5468
  all: "all";
5471
5469
  evalIds: "evalIds";
5472
- caseIds: "caseIds";
5473
5470
  }>;
5474
5471
  evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
5475
5472
  files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
@@ -5481,9 +5478,9 @@ declare const createRunRequestSchema$1: z$1.ZodObject<{
5481
5478
  temporary: z$1.ZodOptional<z$1.ZodBoolean>;
5482
5479
  cache: z$1.ZodOptional<z$1.ZodObject<{
5483
5480
  mode: z$1.ZodDefault<z$1.ZodEnum<{
5484
- use: "use";
5485
- bypass: "bypass";
5486
5481
  refresh: "refresh";
5482
+ bypass: "bypass";
5483
+ use: "use";
5487
5484
  }>>;
5488
5485
  }, z$1.core.$strip>>;
5489
5486
  manualInputs: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as startEvalBackgroundJob, A as repoFile, B as getCurrentScope, C as evalTracer, D as deserializeCacheValue, E as deserializeCacheRecording, F as appendToEvalOutput, G as mergeEvalOutput, H as incrementEvalOutput, J as runInEvalScope, K as nextEvalId, L as evalAssert, M as readManualInputFile, N as evalExpect, Nt as getEvalRegistry, O as serializeCacheRecording, P as EvalAssertionError, Q as setScopeCacheContext, R as evalLog, S as evalSpan, T as hashCacheKeySync, U as isInEvalScope, V as getEvalCaseInput, Y as runInExistingEvalScope, Z as setEvalOutput, at as extractLlmCalls, b as buildTraceTree, it as extractApiCalls, j as manualInputFileValueSchema, k as serializeCacheValue, lt as getNestedAttribute, nt as extractCacheEntries, ot as simulateLlmCallCost, q as runInEvalRuntimeScope, rt as extractCacheHits, st as simulateTokenAllocation, w as hashCacheKey, x as captureEvalSpanError, y as z, z as evalTime } from "./runExecution-C4kAOhC1.mjs";
2
- import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-Cf37PZKi.mjs";
3
- import { n as matchesEvalTags, t as defineEval } from "./src-303BocMW.mjs";
1
+ import { $ as startEvalBackgroundJob, A as repoFile, B as getCurrentScope, C as evalTracer, D as deserializeCacheValue, E as deserializeCacheRecording, F as appendToEvalOutput, G as mergeEvalOutput, H as incrementEvalOutput, J as runInEvalScope, K as nextEvalId, L as evalAssert, M as readManualInputFile, N as evalExpect, Nt as getEvalRegistry, O as serializeCacheRecording, P as EvalAssertionError, Q as setScopeCacheContext, R as evalLog, S as evalSpan, T as hashCacheKeySync, U as isInEvalScope, V as getEvalCaseInput, Y as runInExistingEvalScope, Z as setEvalOutput, at as extractLlmCalls, b as buildTraceTree, it as extractApiCalls, j as manualInputFileValueSchema, k as serializeCacheValue, lt as getNestedAttribute, nt as extractCacheEntries, ot as simulateLlmCallCost, q as runInEvalRuntimeScope, rt as extractCacheHits, st as simulateTokenAllocation, w as hashCacheKey, x as captureEvalSpanError, y as z, z as evalTime } from "./runExecution-d42Lm0i5.mjs";
2
+ import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-_g2qOMK6.mjs";
3
+ import { n as matchesEvalTags, t as defineEval } from "./src-CdZsOn6y.mjs";
4
4
  export { EvalAssertionError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
package/dist/runChild.mjs CHANGED
@@ -1,5 +1,5 @@
1
- import { At as evalChartsConfigSchema, Ct as buildEvalKey, Dt as evalStatAggregateSchema, I as configureEvalRunLogs, Ot as evalStatsConfigSchema, bt as runSummarySchema, et as createRunRequestSchema, jt as columnDefSchema, kt as manualInputDescriptorSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, v as createFsCacheStore, yt as runManifestSchema } from "./runExecution-C4kAOhC1.mjs";
2
- import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-5xEiQxiS.mjs";
1
+ import { At as evalChartsConfigSchema, Ct as buildEvalKey, Dt as evalStatAggregateSchema, I as configureEvalRunLogs, Ot as evalStatsConfigSchema, bt as runSummarySchema, et as createRunRequestSchema, jt as columnDefSchema, kt as manualInputDescriptorSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, v as createFsCacheStore, yt as runManifestSchema } from "./runExecution-d42Lm0i5.mjs";
2
+ import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-CvmFeOmT.mjs";
3
3
  import { z } from "zod/v4";
4
4
  import { readFile } from "node:fs/promises";
5
5
  import { relative } from "node:path";
@@ -2651,6 +2651,7 @@ const scopeStorage = new AsyncLocalStorage();
2651
2651
  const runtimeScopeStorage = new AsyncLocalStorage();
2652
2652
  const evalClockStorage = new AsyncLocalStorage();
2653
2653
  const activeSpanStackStorage = new AsyncLocalStorage();
2654
+ const recordingStackStorage = new AsyncLocalStorage();
2654
2655
  let activeEvalScopeCount = 0;
2655
2656
  let activeEvalRuntimeScopeCount = 0;
2656
2657
  let consoleCaptureEnabled = true;
@@ -2803,6 +2804,20 @@ async function runWithActiveSpan(span, fn) {
2803
2804
  const currentStack = activeSpanStackStorage.getStore() ?? [];
2804
2805
  return await activeSpanStackStorage.run([...currentStack, span], fn);
2805
2806
  }
2807
+ /** Execute a callback with a cache recording frame scoped to this async branch. */
2808
+ async function runWithCacheRecordingFrame(frame, fn) {
2809
+ const currentStack = recordingStackStorage.getStore() ?? [];
2810
+ return await recordingStackStorage.run([...currentStack, frame], fn);
2811
+ }
2812
+ function getCurrentCacheRecordingFrame(scope) {
2813
+ if (scope.replayingDepth > 0) return void 0;
2814
+ return recordingStackStorage.getStore()?.at(-1);
2815
+ }
2816
+ /** Mark a span as created by the active cache recorder, when one exists. */
2817
+ function recordSpanForActiveCacheRecording(scope, spanId) {
2818
+ if (scope.replayingDepth > 0) return;
2819
+ for (const frame of recordingStackStorage.getStore() ?? []) frame.spanIds.add(spanId);
2820
+ }
2806
2821
  /**
2807
2822
  * Return the current eval runner phase for this async execution.
2808
2823
  *
@@ -3110,7 +3125,6 @@ async function runInEvalScope(caseId, fn, options = {}) {
3110
3125
  logs: [],
3111
3126
  spans: [],
3112
3127
  checkpoints: /* @__PURE__ */ new Map(),
3113
- recordingStack: [],
3114
3128
  replayingDepth: 0,
3115
3129
  cacheContext: options.cacheContext,
3116
3130
  caseCacheRefs: [],
@@ -3150,10 +3164,16 @@ function nextEvalId() {
3150
3164
  scope.nextEvalIdCounter++;
3151
3165
  return `${scope.idPrefix}-${scope.nextEvalIdCounter}`;
3152
3166
  }
3153
- function recordOpIfActive(scope, op) {
3154
- if (scope.replayingDepth > 0) return;
3155
- const top = scope.recordingStack.at(-1);
3156
- if (top) top.ops.push(op);
3167
+ function recordCacheRecordingOpIfActive(scope, op) {
3168
+ getCurrentCacheRecordingFrame(scope)?.ops.push(op);
3169
+ }
3170
+ function recordCacheRecordingAttributesIfActive(scope, span, attributes) {
3171
+ const frames = recordingStackStorage.getStore();
3172
+ if (scope.replayingDepth > 0 || frames === void 0) return;
3173
+ for (const [key, value] of Object.entries(attributes)) {
3174
+ if (key.startsWith("cache.")) continue;
3175
+ for (const frame of frames) if (span.id === frame.replayParentSpanId) frame.finalAttributes[key] = value;
3176
+ }
3157
3177
  }
3158
3178
  function normalizeEvalOutputOptions(options) {
3159
3179
  if (options === void 0) return void 0;
@@ -3185,7 +3205,7 @@ function setEvalOutput(key, value, options = void 0) {
3185
3205
  scope.outputs[key] = value;
3186
3206
  const column = normalizeEvalOutputOptions(options);
3187
3207
  if (column !== void 0) scope.outputColumnOverrides[key] = column;
3188
- recordOpIfActive(scope, {
3208
+ recordCacheRecordingOpIfActive(scope, {
3189
3209
  kind: "setOutput",
3190
3210
  key,
3191
3211
  value,
@@ -3205,7 +3225,7 @@ function appendToEvalOutput(key, value) {
3205
3225
  if (existing === void 0) scope.outputs[key] = [value];
3206
3226
  else if (Array.isArray(existing)) scope.outputs[key] = [...copyArray$1(existing), value];
3207
3227
  else scope.outputs[key] = [existing, value];
3208
- recordOpIfActive(scope, {
3228
+ recordCacheRecordingOpIfActive(scope, {
3209
3229
  kind: "appendOutput",
3210
3230
  key,
3211
3231
  value
@@ -3223,7 +3243,7 @@ function mergeEvalOutput(key, patch) {
3223
3243
  const existing = scope.outputs[key];
3224
3244
  if (existing === void 0) {
3225
3245
  scope.outputs[key] = { ...patch };
3226
- recordOpIfActive(scope, {
3246
+ recordCacheRecordingOpIfActive(scope, {
3227
3247
  kind: "mergeOutput",
3228
3248
  key,
3229
3249
  patch
@@ -3238,7 +3258,7 @@ function mergeEvalOutput(key, patch) {
3238
3258
  ...existing,
3239
3259
  ...patch
3240
3260
  };
3241
- recordOpIfActive(scope, {
3261
+ recordCacheRecordingOpIfActive(scope, {
3242
3262
  kind: "mergeOutput",
3243
3263
  key,
3244
3264
  patch
@@ -3256,7 +3276,7 @@ function incrementEvalOutput(key, delta) {
3256
3276
  const existing = scope.outputs[key];
3257
3277
  if (existing === void 0) {
3258
3278
  scope.outputs[key] = delta;
3259
- recordOpIfActive(scope, {
3279
+ recordCacheRecordingOpIfActive(scope, {
3260
3280
  kind: "incrementOutput",
3261
3281
  key,
3262
3282
  delta
@@ -3268,7 +3288,7 @@ function incrementEvalOutput(key, delta) {
3268
3288
  return;
3269
3289
  }
3270
3290
  scope.outputs[key] = existing + delta;
3271
- recordOpIfActive(scope, {
3291
+ recordCacheRecordingOpIfActive(scope, {
3272
3292
  kind: "incrementOutput",
3273
3293
  key,
3274
3294
  delta
@@ -3675,10 +3695,6 @@ async function materializeExternalJsonValues(value, store) {
3675
3695
  if (!isRecordLike$3(value)) return value;
3676
3696
  return Object.fromEntries(await Promise.all(Object.entries(value).map(async ([key, entryValue]) => [key, await materializeExternalJsonValues(entryValue, store)])));
3677
3697
  }
3678
- /** Clone one value through the same serialization path used for cache data. */
3679
- async function cloneCacheValue(value, options = void 0) {
3680
- return deserializeCacheValue(await serializeCacheValue(value, options));
3681
- }
3682
3698
  function normalizeCacheSerializationOptions(options) {
3683
3699
  return {
3684
3700
  compress: options?.compress !== false,
@@ -4109,29 +4125,6 @@ function valueKind$1(value) {
4109
4125
  function copyArray(value) {
4110
4126
  return value.map((item) => item);
4111
4127
  }
4112
- function stripCacheAttributes(attributes) {
4113
- if (!attributes) return {};
4114
- const result = {};
4115
- for (const [key, value] of Object.entries(attributes)) if (!key.startsWith("cache.")) result[key] = value;
4116
- return result;
4117
- }
4118
- async function snapshotNonCacheAttributes(span) {
4119
- const snapshot = await cloneCacheValue(stripCacheAttributes(span?.attributes));
4120
- return isRecordLike$2(snapshot) ? snapshot : {};
4121
- }
4122
- function diffNonCacheAttributes(before, after) {
4123
- const result = {};
4124
- for (const [key, value] of Object.entries(after)) if (!cacheAttributeValuesEqual(before[key], value)) result[key] = value;
4125
- return result;
4126
- }
4127
- function cacheAttributeValuesEqual(left, right) {
4128
- if (Object.is(left, right)) return true;
4129
- try {
4130
- return JSON.stringify(left) === JSON.stringify(right);
4131
- } catch {
4132
- return false;
4133
- }
4134
- }
4135
4128
  function appendCacheRef(span, ref) {
4136
4129
  if (span === void 0) return;
4137
4130
  const existing = span.attributes?.["cache.refs"];
@@ -4150,7 +4143,7 @@ function recordCacheRef(scope, span, ref) {
4150
4143
  }
4151
4144
  scope.caseCacheRefs.push(ref);
4152
4145
  }
4153
- function serializeSubSpanTree(scope, spanId) {
4146
+ function serializeSubSpanTree(scope, spanId, spanIds) {
4154
4147
  const original = scope.spans.find((s) => s.id === spanId);
4155
4148
  if (!original) return {
4156
4149
  kind: "custom",
@@ -4163,7 +4156,7 @@ function serializeSubSpanTree(scope, spanId) {
4163
4156
  warnings: void 0,
4164
4157
  children: []
4165
4158
  };
4166
- const children = scope.spans.filter((s) => s.parentId === spanId).map((child) => serializeSubSpanTree(scope, child.id));
4159
+ const children = scope.spans.filter((s) => s.parentId === spanId && spanIds.has(s.id)).map((child) => serializeSubSpanTree(scope, child.id, spanIds));
4167
4160
  return {
4168
4161
  kind: original.kind,
4169
4162
  name: original.name,
@@ -4179,9 +4172,9 @@ function serializeSubSpanTree(scope, spanId) {
4179
4172
  function appendSubSpanOps(scope, frame) {
4180
4173
  for (let i = frame.baseSpanIndex; i < scope.spans.length; i++) {
4181
4174
  const candidate = scope.spans[i];
4182
- if (candidate?.parentId === frame.replayParentSpanId) frame.ops.push({
4175
+ if (candidate?.parentId === frame.replayParentSpanId && frame.spanIds.has(candidate.id)) frame.ops.push({
4183
4176
  kind: "subSpan",
4184
- span: serializeSubSpanTree(scope, candidate.id)
4177
+ span: serializeSubSpanTree(scope, candidate.id, frame.spanIds)
4185
4178
  });
4186
4179
  }
4187
4180
  }
@@ -4437,25 +4430,21 @@ function createTraceCache(generateSpanId) {
4437
4430
  key: keyHash,
4438
4431
  status: "bypass"
4439
4432
  });
4440
- const beforeAttributes = await snapshotNonCacheAttributes(activeSpan);
4441
4433
  const frame = {
4442
4434
  baseSpanIndex: scope.spans.length,
4443
4435
  replayParentSpanId: activeSpan?.id ?? null,
4436
+ spanIds: /* @__PURE__ */ new Set(),
4437
+ finalAttributes: {},
4444
4438
  ops: []
4445
4439
  };
4446
- scope.recordingStack.push(frame);
4447
- let bodyResult;
4448
- try {
4449
- bodyResult = await fn();
4450
- } finally {
4451
- scope.recordingStack.pop();
4452
- }
4440
+ const bodyResult = await runWithCacheRecordingFrame(frame, async () => {
4441
+ return await fn();
4442
+ });
4453
4443
  appendSubSpanOps(scope, frame);
4454
4444
  if (canStore) {
4455
- const finalAttributes = diffNonCacheAttributes(beforeAttributes, await snapshotNonCacheAttributes(activeSpan));
4456
4445
  const recording = {
4457
4446
  returnValue: bodyResult,
4458
- finalAttributes,
4447
+ finalAttributes: frame.finalAttributes,
4459
4448
  ops: frame.ops
4460
4449
  };
4461
4450
  await cacheCtx.adapter.write({
@@ -4514,6 +4503,13 @@ function mergeSpanAttributes(span, attributes) {
4514
4503
  ...span.attributes,
4515
4504
  ...attributes
4516
4505
  };
4506
+ const scope = getCurrentScope();
4507
+ if (scope !== void 0) recordCacheRecordingAttributesIfActive(scope, span, attributes);
4508
+ }
4509
+ function copyNonCacheAttributes(attributes) {
4510
+ const result = {};
4511
+ for (const [key, value] of Object.entries(attributes ?? {})) if (!key.startsWith("cache.")) result[key] = value;
4512
+ return result;
4517
4513
  }
4518
4514
  function isRecordLike$1(value) {
4519
4515
  return typeof value === "object" && value !== null && !Array.isArray(value);
@@ -4688,6 +4684,7 @@ function startExternalSpan(info) {
4688
4684
  status: "running",
4689
4685
  attributes: info.attributes
4690
4686
  });
4687
+ recordSpanForActiveCacheRecording(scope, id);
4691
4688
  return createExternalSpanHandle(id);
4692
4689
  }
4693
4690
  function updateExternalSpan(info) {
@@ -4746,6 +4743,7 @@ function recordExternalSpan(info) {
4746
4743
  warning: info.warning,
4747
4744
  warnings: info.warnings
4748
4745
  });
4746
+ recordSpanForActiveCacheRecording(scope, id);
4749
4747
  return id;
4750
4748
  }
4751
4749
  /**
@@ -4831,6 +4829,7 @@ async function traceSpanInternal(info, fn) {
4831
4829
  attributes: info.attributes
4832
4830
  };
4833
4831
  scope.spans.push(spanRecord);
4832
+ recordSpanForActiveCacheRecording(scope, id);
4834
4833
  const activeSpan = createSpanHandle(spanRecord);
4835
4834
  return await runWithActiveSpan(spanRecord, async () => {
4836
4835
  try {
@@ -4880,21 +4879,19 @@ async function traceSpanInternal(info, fn) {
4880
4879
  const frame = {
4881
4880
  baseSpanIndex: scope.spans.length,
4882
4881
  replayParentSpanId: id,
4882
+ spanIds: /* @__PURE__ */ new Set(),
4883
+ finalAttributes: copyNonCacheAttributes(spanRecord.attributes),
4883
4884
  ops: []
4884
4885
  };
4885
- scope.recordingStack.push(frame);
4886
- let bodyResult;
4887
- try {
4888
- bodyResult = await fn(activeSpan);
4889
- } finally {
4890
- scope.recordingStack.pop();
4891
- }
4886
+ const bodyResult = await runWithCacheRecordingFrame(frame, async () => {
4887
+ return await fn(activeSpan);
4888
+ });
4892
4889
  appendSubSpanOps(scope, frame);
4893
4890
  finishSpanWithoutThrownError(spanRecord, realStartedAt);
4894
4891
  if (canStore) {
4895
4892
  const recording = {
4896
4893
  returnValue: bodyResult,
4897
- finalAttributes: stripCacheAttributes(spanRecord.attributes),
4894
+ finalAttributes: frame.finalAttributes,
4898
4895
  finalStatus: spanRecord.status,
4899
4896
  finalError: spanRecord.error,
4900
4897
  finalErrors: spanRecord.errors,
@@ -4998,14 +4995,12 @@ const evalTracer = {
4998
4995
  status: "ok",
4999
4996
  attributes: { value: data }
5000
4997
  });
5001
- if (scope.replayingDepth === 0) {
5002
- const top = scope.recordingStack.at(-1);
5003
- if (top) top.ops.push({
5004
- kind: "checkpoint",
5005
- name,
5006
- data
5007
- });
5008
- }
4998
+ recordSpanForActiveCacheRecording(scope, id);
4999
+ recordCacheRecordingOpIfActive(scope, {
5000
+ kind: "checkpoint",
5001
+ name,
5002
+ data
5003
+ });
5009
5004
  }
5010
5005
  };
5011
5006
  /** Build a queryable trace tree helper from a flat span list and checkpoints. */
@@ -1,4 +1,4 @@
1
- import { Et as caseRowSchema, Pt as runWithEvalRegistry, Tt as caseDetailSchema, X as runWithEvalClock, _t as validateEvalTagName, bt as runSummarySchema, d as loadEvalModule, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, g as commitPendingCacheWrites, gt as matchesTagsFilter, ht as dedupeEvalTags, i as isCaseChildMessage, m as buildDeclaredColumnDefs, mt as deriveStatusFromChildStatuses, n as resolveRunnableEvalCases, o as stripTerminalControlCodes, pt as deriveStatusFromCaseRows, q as runInEvalRuntimeScope, t as filterEvalCases, u as runWithModuleIsolation, vt as validateTagsFilterExpression, wt as getCaseRowCaseKey, yt as runManifestSchema } from "./runExecution-C4kAOhC1.mjs";
1
+ import { Et as caseRowSchema, Pt as runWithEvalRegistry, Tt as caseDetailSchema, X as runWithEvalClock, _t as validateEvalTagName, bt as runSummarySchema, d as loadEvalModule, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, g as commitPendingCacheWrites, gt as matchesTagsFilter, ht as dedupeEvalTags, i as isCaseChildMessage, m as buildDeclaredColumnDefs, mt as deriveStatusFromChildStatuses, n as resolveRunnableEvalCases, o as stripTerminalControlCodes, pt as deriveStatusFromCaseRows, q as runInEvalRuntimeScope, t as filterEvalCases, u as runWithModuleIsolation, vt as validateTagsFilterExpression, wt as getCaseRowCaseKey, yt as runManifestSchema } from "./runExecution-d42Lm0i5.mjs";
2
2
  import { readFile, readdir, rm, writeFile } from "node:fs/promises";
3
3
  import { dirname, join } from "node:path";
4
4
  import { existsSync } from "node:fs";
@@ -1,2 +1,2 @@
1
- import { n as initRunner, t as getRunnerInstance } from "./runner-bjd_UB9i.mjs";
1
+ import { n as initRunner, t as getRunnerInstance } from "./runner-MSr8sAWm.mjs";
2
2
  export { getRunnerInstance, initRunner };
@@ -1,5 +1,5 @@
1
- import { n as createRunner } from "./cli-Cf37PZKi.mjs";
2
- import "./src-303BocMW.mjs";
1
+ import { n as createRunner } from "./cli-_g2qOMK6.mjs";
2
+ import "./src-CdZsOn6y.mjs";
3
3
  //#region ../../apps/server/src/runner.ts
4
4
  let runnerInstance = null;
5
5
  function getRunnerInstance() {
@@ -1,5 +1,5 @@
1
- import { Mt as defineEval$1, W as matchesEvalTags$1 } from "./runExecution-C4kAOhC1.mjs";
2
- import "./cli-Cf37PZKi.mjs";
1
+ import { Mt as defineEval$1, W as matchesEvalTags$1 } from "./runExecution-d42Lm0i5.mjs";
2
+ import "./cli-_g2qOMK6.mjs";
3
3
  //#region src/index.ts
4
4
  /** Register an eval definition with typed tag support. */
5
5
  function defineEval(definition) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ls-stack/agent-eval",
3
- "version": "0.58.0",
3
+ "version": "0.58.1",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "agent-evals": "./dist/bin.mjs"
@@ -33,8 +33,8 @@
33
33
  "@types/node": "^24.7.2",
34
34
  "typescript": "^5.9.2",
35
35
  "@agent-evals/runner": "0.0.1",
36
- "@agent-evals/sdk": "0.0.1",
37
- "@agent-evals/shared": "0.0.1"
36
+ "@agent-evals/shared": "0.0.1",
37
+ "@agent-evals/sdk": "0.0.1"
38
38
  },
39
39
  "scripts": {
40
40
  "build": "pnpm --filter @agent-evals/web build && pnpm --filter @agent-evals/shared build && pnpm --filter @agent-evals/sdk build && pnpm --filter @agent-evals/runner build && tsdown --filter cli-js && tsdown --filter cli-types",