@ls-stack/agent-eval 0.52.1 → 0.52.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,8 +25,8 @@
25
25
  href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
26
26
  rel="stylesheet"
27
27
  />
28
- <script type="module" crossorigin src="/assets/index-dAgwnqH2.js"></script>
29
- <link rel="stylesheet" crossorigin href="/assets/index-CFASvC2z.css">
28
+ <script type="module" crossorigin src="/assets/index-D9HUAH8K.js"></script>
29
+ <link rel="stylesheet" crossorigin href="/assets/index-BHc4gfUO.css">
30
30
  </head>
31
31
  <body>
32
32
  <div id="root"></div>
package/dist/bin.mjs CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { t as runCli } from "./cli-Cvs7tc2v.mjs";
2
+ import { t as runCli } from "./cli-DbVFgRO3.mjs";
3
3
  import { spawn } from "node:child_process";
4
4
  //#region src/bin.ts
5
5
  const moduleMocksFlag = "--experimental-test-module-mocks";
@@ -1,4 +1,4 @@
1
- import { A as validateCharts, At as buildEvalKey, C as deriveEvalFreshness, Ct as getEvalDisplayStatus, D as loadConfig, Dt as runSummarySchema, E as resolveEvalDefaultConfig, Lt as getEvalRegistry, O as buildDeclaredColumnDefs, Ot as resolveApiCallsConfig, S as parseManualInputValues, St as getEvalTitle, T as parseEvalDiscovery, Tt as matchesTagsFilter, _ as recomputePersistedCaseStatus, a as validateTagsFilters, b as resolveArtifactPath, bt as applyDerivedCallAttributes, c as getLastRunStatuses, d as loadPersistedRunSnapshots, f as nextShortIdFromSnapshots, g as recomputeEvalStatusesInRuns, h as persistRunState, i as resolveEvalTags, j as createFsCacheStore, jt as getCaseRowCaseKey, k as normalizeScoreDef, kt as resolveLlmCallsConfig, l as getLatestRunInfos, m as deleteTemporaryRuns, n as getTargetEvalKeys, o as stripTerminalControlCodes, p as persistCaseDetail, s as generateRunId, u as loadPersistedRunSnapshot, v as runTouchesEval, w as loadEvalModule, wt as deriveScopedSummaryFromCases, x as buildManualInputDescriptor, y as resolveTracePresentation } from "./runOrchestration-o38J7uZO.mjs";
1
+ import { A as validateCharts, At as buildEvalKey, C as deriveEvalFreshness, Ct as getEvalDisplayStatus, D as loadConfig, Dt as runSummarySchema, E as resolveEvalDefaultConfig, Lt as getEvalRegistry, O as buildDeclaredColumnDefs, Ot as resolveApiCallsConfig, S as parseManualInputValues, St as getEvalTitle, T as parseEvalDiscovery, Tt as matchesTagsFilter, _ as recomputePersistedCaseStatus, a as validateTagsFilters, b as resolveArtifactPath, bt as applyDerivedCallAttributes, c as getLastRunStatuses, d as loadPersistedRunSnapshots, f as nextShortIdFromSnapshots, g as recomputeEvalStatusesInRuns, h as persistRunState, i as resolveEvalTags, j as createFsCacheStore, jt as getCaseRowCaseKey, k as normalizeScoreDef, kt as resolveLlmCallsConfig, l as getLatestRunInfos, m as deleteTemporaryRuns, n as getTargetEvalKeys, o as stripTerminalControlCodes, p as persistCaseDetail, s as generateRunId, u as loadPersistedRunSnapshot, v as runTouchesEval, w as loadEvalModule, wt as deriveScopedSummaryFromCases, x as buildManualInputDescriptor, y as resolveTracePresentation } from "./runOrchestration-DT6cje9E.mjs";
2
2
  import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
3
3
  import { basename, dirname, extname, isAbsolute, join, relative, resolve, sep } from "node:path";
4
4
  import { createHash, randomUUID } from "node:crypto";
@@ -2095,8 +2095,8 @@ async function commandApp(args) {
2095
2095
  const { serve } = await import("@hono/node-server");
2096
2096
  const bundledWebDist = resolve(currentDir, "apps/web/dist");
2097
2097
  if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
2098
- const appModule = await import("./app-CzLj4ZX0.mjs");
2099
- const runnerModule = await import("./runner-iWtmKx9z.mjs");
2098
+ const appModule = await import("./app-BsFcUIQp.mjs");
2099
+ const runnerModule = await import("./runner-CyTUvbHE.mjs");
2100
2100
  if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
2101
2101
  if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
2102
2102
  await runnerModule.initRunner();
package/dist/index.d.mts CHANGED
@@ -2254,8 +2254,8 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2254
2254
  }>;
2255
2255
  label: z$1.ZodOptional<z$1.ZodString>;
2256
2256
  color: z$1.ZodOptional<z$1.ZodEnum<{
2257
- error: "error";
2258
2257
  success: "success";
2258
+ error: "error";
2259
2259
  warning: "warning";
2260
2260
  accent: "accent";
2261
2261
  accentDim: "accentDim";
@@ -2278,8 +2278,8 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2278
2278
  }>;
2279
2279
  label: z$1.ZodOptional<z$1.ZodString>;
2280
2280
  color: z$1.ZodOptional<z$1.ZodEnum<{
2281
- error: "error";
2282
2281
  success: "success";
2282
+ error: "error";
2283
2283
  warning: "warning";
2284
2284
  accent: "accent";
2285
2285
  accentDim: "accentDim";
@@ -2568,6 +2568,22 @@ declare const scoreTraceSchema: z$1.ZodObject<{
2568
2568
  }>>;
2569
2569
  }, z$1.core.$strip>>>;
2570
2570
  }, z$1.core.$strip>;
2571
+ cacheRefs: z$1.ZodDefault<z$1.ZodArray<z$1.ZodObject<{
2572
+ type: z$1.ZodLiteral<"value">;
2573
+ name: z$1.ZodString;
2574
+ namespace: z$1.ZodString;
2575
+ key: z$1.ZodString;
2576
+ status: z$1.ZodEnum<{
2577
+ hit: "hit";
2578
+ miss: "miss";
2579
+ refresh: "refresh";
2580
+ bypass: "bypass";
2581
+ }>;
2582
+ read: z$1.ZodOptional<z$1.ZodBoolean>;
2583
+ stored: z$1.ZodOptional<z$1.ZodBoolean>;
2584
+ storedAt: z$1.ZodOptional<z$1.ZodString>;
2585
+ age: z$1.ZodOptional<z$1.ZodNumber>;
2586
+ }, z$1.core.$strip>>>;
2571
2587
  }, z$1.core.$strip>;
2572
2588
  /** Trace payload captured while computing one score for a case. */
2573
2589
  type ScoreTrace = z$1.infer<typeof scoreTraceSchema>;
@@ -2724,6 +2740,22 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2724
2740
  }>>;
2725
2741
  }, z$1.core.$strip>>>;
2726
2742
  }, z$1.core.$strip>;
2743
+ cacheRefs: z$1.ZodDefault<z$1.ZodArray<z$1.ZodObject<{
2744
+ type: z$1.ZodLiteral<"value">;
2745
+ name: z$1.ZodString;
2746
+ namespace: z$1.ZodString;
2747
+ key: z$1.ZodString;
2748
+ status: z$1.ZodEnum<{
2749
+ hit: "hit";
2750
+ miss: "miss";
2751
+ refresh: "refresh";
2752
+ bypass: "bypass";
2753
+ }>;
2754
+ read: z$1.ZodOptional<z$1.ZodBoolean>;
2755
+ stored: z$1.ZodOptional<z$1.ZodBoolean>;
2756
+ storedAt: z$1.ZodOptional<z$1.ZodString>;
2757
+ age: z$1.ZodOptional<z$1.ZodNumber>;
2758
+ }, z$1.core.$strip>>>;
2727
2759
  }, z$1.core.$strip>>>;
2728
2760
  columns: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnion<readonly [z$1.ZodType<string | number | boolean | Record<string, unknown> | unknown[] | null, unknown, z$1.core.$ZodTypeInternals<string | number | boolean | Record<string, unknown> | unknown[] | null, unknown>>, z$1.ZodUnion<readonly [z$1.ZodObject<{
2729
2761
  source: z$1.ZodLiteral<"repo">;
@@ -2781,10 +2813,10 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2781
2813
  namespace: z$1.ZodString;
2782
2814
  key: z$1.ZodString;
2783
2815
  status: z$1.ZodEnum<{
2784
- bypass: "bypass";
2785
- refresh: "refresh";
2786
2816
  hit: "hit";
2787
2817
  miss: "miss";
2818
+ refresh: "refresh";
2819
+ bypass: "bypass";
2788
2820
  }>;
2789
2821
  read: z$1.ZodOptional<z$1.ZodBoolean>;
2790
2822
  stored: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -2851,8 +2883,8 @@ type EvalChartAggregate = z$1.infer<typeof evalChartAggregateSchema>;
2851
2883
  * not emit raw hex so authored evals stay decoupled from the web theme.
2852
2884
  */
2853
2885
  declare const evalChartColorSchema: z$1.ZodEnum<{
2854
- error: "error";
2855
2886
  success: "success";
2887
+ error: "error";
2856
2888
  warning: "warning";
2857
2889
  accent: "accent";
2858
2890
  accentDim: "accentDim";
@@ -2880,8 +2912,8 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2880
2912
  }>;
2881
2913
  label: z$1.ZodOptional<z$1.ZodString>;
2882
2914
  color: z$1.ZodOptional<z$1.ZodEnum<{
2883
- error: "error";
2884
2915
  success: "success";
2916
+ error: "error";
2885
2917
  warning: "warning";
2886
2918
  accent: "accent";
2887
2919
  accentDim: "accentDim";
@@ -2904,8 +2936,8 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2904
2936
  }>;
2905
2937
  label: z$1.ZodOptional<z$1.ZodString>;
2906
2938
  color: z$1.ZodOptional<z$1.ZodEnum<{
2907
- error: "error";
2908
2939
  success: "success";
2940
+ error: "error";
2909
2941
  warning: "warning";
2910
2942
  accent: "accent";
2911
2943
  accentDim: "accentDim";
@@ -2963,8 +2995,8 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
2963
2995
  }>;
2964
2996
  label: z$1.ZodOptional<z$1.ZodString>;
2965
2997
  color: z$1.ZodOptional<z$1.ZodEnum<{
2966
- error: "error";
2967
2998
  success: "success";
2999
+ error: "error";
2968
3000
  warning: "warning";
2969
3001
  accent: "accent";
2970
3002
  accentDim: "accentDim";
@@ -2987,8 +3019,8 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
2987
3019
  }>;
2988
3020
  label: z$1.ZodOptional<z$1.ZodString>;
2989
3021
  color: z$1.ZodOptional<z$1.ZodEnum<{
2990
- error: "error";
2991
3022
  success: "success";
3023
+ error: "error";
2992
3024
  warning: "warning";
2993
3025
  accent: "accent";
2994
3026
  accentDim: "accentDim";
@@ -3053,8 +3085,8 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
3053
3085
  }>;
3054
3086
  label: z$1.ZodOptional<z$1.ZodString>;
3055
3087
  color: z$1.ZodOptional<z$1.ZodEnum<{
3056
- error: "error";
3057
3088
  success: "success";
3089
+ error: "error";
3058
3090
  warning: "warning";
3059
3091
  accent: "accent";
3060
3092
  accentDim: "accentDim";
@@ -3077,8 +3109,8 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
3077
3109
  }>;
3078
3110
  label: z$1.ZodOptional<z$1.ZodString>;
3079
3111
  color: z$1.ZodOptional<z$1.ZodEnum<{
3080
- error: "error";
3081
3112
  success: "success";
3113
+ error: "error";
3082
3114
  warning: "warning";
3083
3115
  accent: "accent";
3084
3116
  accentDim: "accentDim";
@@ -3157,9 +3189,9 @@ declare const runManifestSchema$1: z$1.ZodObject<{
3157
3189
  median: "median";
3158
3190
  }>>>;
3159
3191
  cacheMode: z$1.ZodOptional<z$1.ZodEnum<{
3160
- use: "use";
3161
- bypass: "bypass";
3162
3192
  refresh: "refresh";
3193
+ bypass: "bypass";
3194
+ use: "use";
3163
3195
  }>>;
3164
3196
  }, z$1.core.$strip>;
3165
3197
  /** Persisted lifecycle metadata for a single eval run. */
@@ -4061,9 +4093,9 @@ declare function extractApiCalls(spans: EvalTraceSpan$1[], config: ResolvedApiCa
4061
4093
  * - `refresh`: never read, always write (forces re-execution and overwrites).
4062
4094
  */
4063
4095
  declare const cacheModeSchema: z$1.ZodEnum<{
4064
- use: "use";
4065
- bypass: "bypass";
4066
4096
  refresh: "refresh";
4097
+ bypass: "bypass";
4098
+ use: "use";
4067
4099
  }>;
4068
4100
  /** Mode controlling how cached spans behave during a run. */
4069
4101
  type CacheMode = z$1.infer<typeof cacheModeSchema>;
@@ -4077,17 +4109,17 @@ declare const spanCacheOptionsSchema: z$1.ZodObject<{
4077
4109
  type SpanCacheOptions = z$1.infer<typeof spanCacheOptionsSchema>;
4078
4110
  /** Category of operation stored in the eval cache. */
4079
4111
  declare const cacheOperationTypeSchema: z$1.ZodEnum<{
4080
- span: "span";
4081
4112
  value: "value";
4113
+ span: "span";
4082
4114
  }>;
4083
4115
  /** Category of operation stored in the eval cache. */
4084
4116
  type CacheOperationType = z$1.infer<typeof cacheOperationTypeSchema>;
4085
4117
  /** Status of a cache lookup recorded on a span or case scope. */
4086
4118
  declare const cacheStatusSchema: z$1.ZodEnum<{
4087
- bypass: "bypass";
4088
- refresh: "refresh";
4089
4119
  hit: "hit";
4090
4120
  miss: "miss";
4121
+ refresh: "refresh";
4122
+ bypass: "bypass";
4091
4123
  }>;
4092
4124
  /** Status of a cache lookup recorded on a span or case scope. */
4093
4125
  type CacheStatus = z$1.infer<typeof cacheStatusSchema>;
@@ -4104,10 +4136,10 @@ declare const traceCacheRefSchema: z$1.ZodObject<{
4104
4136
  namespace: z$1.ZodString;
4105
4137
  key: z$1.ZodString;
4106
4138
  status: z$1.ZodEnum<{
4107
- bypass: "bypass";
4108
- refresh: "refresh";
4109
4139
  hit: "hit";
4110
4140
  miss: "miss";
4141
+ refresh: "refresh";
4142
+ bypass: "bypass";
4111
4143
  }>;
4112
4144
  read: z$1.ZodOptional<z$1.ZodBoolean>;
4113
4145
  stored: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -4121,8 +4153,8 @@ declare const cacheListItemSchema$1: z$1.ZodObject<{
4121
4153
  key: z$1.ZodString;
4122
4154
  namespace: z$1.ZodString;
4123
4155
  operationType: z$1.ZodEnum<{
4124
- span: "span";
4125
4156
  value: "value";
4157
+ span: "span";
4126
4158
  }>;
4127
4159
  operationName: z$1.ZodString;
4128
4160
  spanName: z$1.ZodOptional<z$1.ZodString>;
@@ -4244,8 +4276,8 @@ declare const cacheEntrySchema: z$1.ZodObject<{
4244
4276
  key: z$1.ZodString;
4245
4277
  namespace: z$1.ZodString;
4246
4278
  operationType: z$1.ZodOptional<z$1.ZodEnum<{
4247
- span: "span";
4248
4279
  value: "value";
4280
+ span: "span";
4249
4281
  }>>;
4250
4282
  operationName: z$1.ZodOptional<z$1.ZodString>;
4251
4283
  spanName: z$1.ZodOptional<z$1.ZodString>;
@@ -4323,8 +4355,8 @@ declare const cacheDebugKeyEntrySchema: z$1.ZodObject<{
4323
4355
  key: z$1.ZodString;
4324
4356
  namespace: z$1.ZodString;
4325
4357
  operationType: z$1.ZodEnum<{
4326
- span: "span";
4327
4358
  value: "value";
4359
+ span: "span";
4328
4360
  }>;
4329
4361
  operationName: z$1.ZodString;
4330
4362
  storedAt: z$1.ZodString;
@@ -4334,8 +4366,8 @@ declare const cacheDebugKeyEntrySchema: z$1.ZodObject<{
4334
4366
  key: z$1.ZodString;
4335
4367
  namespace: z$1.ZodString;
4336
4368
  operationType: z$1.ZodOptional<z$1.ZodEnum<{
4337
- span: "span";
4338
4369
  value: "value";
4370
+ span: "span";
4339
4371
  }>>;
4340
4372
  operationName: z$1.ZodOptional<z$1.ZodString>;
4341
4373
  spanName: z$1.ZodOptional<z$1.ZodString>;
@@ -4413,8 +4445,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
4413
4445
  key: z$1.ZodString;
4414
4446
  namespace: z$1.ZodString;
4415
4447
  operationType: z$1.ZodOptional<z$1.ZodEnum<{
4416
- span: "span";
4417
4448
  value: "value";
4449
+ span: "span";
4418
4450
  }>>;
4419
4451
  operationName: z$1.ZodOptional<z$1.ZodString>;
4420
4452
  spanName: z$1.ZodOptional<z$1.ZodString>;
@@ -4483,8 +4515,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
4483
4515
  key: z$1.ZodString;
4484
4516
  namespace: z$1.ZodString;
4485
4517
  operationType: z$1.ZodEnum<{
4486
- span: "span";
4487
4518
  value: "value";
4519
+ span: "span";
4488
4520
  }>;
4489
4521
  operationName: z$1.ZodString;
4490
4522
  storedAt: z$1.ZodString;
@@ -4494,8 +4526,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
4494
4526
  key: z$1.ZodString;
4495
4527
  namespace: z$1.ZodString;
4496
4528
  operationType: z$1.ZodOptional<z$1.ZodEnum<{
4497
- span: "span";
4498
4529
  value: "value";
4530
+ span: "span";
4499
4531
  }>>;
4500
4532
  operationName: z$1.ZodOptional<z$1.ZodString>;
4501
4533
  spanName: z$1.ZodOptional<z$1.ZodString>;
@@ -4573,8 +4605,8 @@ declare const cacheFileSchema: z$1.ZodObject<{
4573
4605
  key: z$1.ZodString;
4574
4606
  namespace: z$1.ZodString;
4575
4607
  operationType: z$1.ZodOptional<z$1.ZodEnum<{
4576
- span: "span";
4577
4608
  value: "value";
4609
+ span: "span";
4578
4610
  }>>;
4579
4611
  operationName: z$1.ZodOptional<z$1.ZodString>;
4580
4612
  spanName: z$1.ZodOptional<z$1.ZodString>;
@@ -4651,8 +4683,8 @@ declare const cacheDebugKeyFileSchema: z$1.ZodObject<{
4651
4683
  key: z$1.ZodString;
4652
4684
  namespace: z$1.ZodString;
4653
4685
  operationType: z$1.ZodEnum<{
4654
- span: "span";
4655
4686
  value: "value";
4687
+ span: "span";
4656
4688
  }>;
4657
4689
  operationName: z$1.ZodString;
4658
4690
  storedAt: z$1.ZodString;
@@ -4662,8 +4694,8 @@ declare const cacheDebugKeyFileSchema: z$1.ZodObject<{
4662
4694
  key: z$1.ZodString;
4663
4695
  namespace: z$1.ZodString;
4664
4696
  operationType: z$1.ZodOptional<z$1.ZodEnum<{
4665
- span: "span";
4666
4697
  value: "value";
4698
+ span: "span";
4667
4699
  }>>;
4668
4700
  operationName: z$1.ZodOptional<z$1.ZodString>;
4669
4701
  spanName: z$1.ZodOptional<z$1.ZodString>;
@@ -4846,9 +4878,9 @@ declare const createRunRequestSchema$1: z$1.ZodObject<{
4846
4878
  temporary: z$1.ZodOptional<z$1.ZodBoolean>;
4847
4879
  cache: z$1.ZodOptional<z$1.ZodObject<{
4848
4880
  mode: z$1.ZodDefault<z$1.ZodEnum<{
4849
- use: "use";
4850
- bypass: "bypass";
4851
4881
  refresh: "refresh";
4882
+ bypass: "bypass";
4883
+ use: "use";
4852
4884
  }>>;
4853
4885
  }, z$1.core.$strip>>;
4854
4886
  manualInputs: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
@@ -5566,6 +5598,22 @@ declare const caseDetailSchema: z$1.ZodObject<{
5566
5598
  }>>;
5567
5599
  }, z$1.core.$strip>>>;
5568
5600
  }, z$1.core.$strip>;
5601
+ cacheRefs: z$1.ZodDefault<z$1.ZodArray<z$1.ZodObject<{
5602
+ type: z$1.ZodLiteral<"value">;
5603
+ name: z$1.ZodString;
5604
+ namespace: z$1.ZodString;
5605
+ key: z$1.ZodString;
5606
+ status: z$1.ZodEnum<{
5607
+ hit: "hit";
5608
+ miss: "miss";
5609
+ refresh: "refresh";
5610
+ bypass: "bypass";
5611
+ }>;
5612
+ read: z$1.ZodOptional<z$1.ZodBoolean>;
5613
+ stored: z$1.ZodOptional<z$1.ZodBoolean>;
5614
+ storedAt: z$1.ZodOptional<z$1.ZodString>;
5615
+ age: z$1.ZodOptional<z$1.ZodNumber>;
5616
+ }, z$1.core.$strip>>>;
5569
5617
  }, z$1.core.$strip>>>;
5570
5618
  columns: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnion<readonly [z$1.ZodType<string | number | boolean | Record<string, unknown> | unknown[] | null, unknown, z$1.core.$ZodTypeInternals<string | number | boolean | Record<string, unknown> | unknown[] | null, unknown>>, z$1.ZodUnion<readonly [z$1.ZodObject<{
5571
5619
  source: z$1.ZodLiteral<"repo">;
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as getCurrentScope, B as deserializeCacheValue, F as evalSpan, G as readManualInputFile, H as serializeCacheValue, I as evalTracer, J as appendToEvalOutput, K as evalExpect, L as hashCacheKey, Lt as getEvalRegistry, M as z, N as buildTraceTree, P as captureEvalSpanError, Q as evalTime, R as hashCacheKeySync, U as repoFile, V as serializeCacheRecording, W as manualInputFileValueSchema, X as evalAssert, Z as evalLog, _t as extractLlmCalls, at as nextEvalId, ct as runInExistingEvalScope, dt as startEvalBackgroundJob, et as getEvalCaseInput, gt as extractApiCalls, ht as extractCacheHits, it as mergeEvalOutput, lt as setEvalOutput, mt as extractCacheEntries, nt as isInEvalScope, ot as runInEvalRuntimeScope, q as EvalAssertionError, st as runInEvalScope, tt as incrementEvalOutput, ut as setScopeCacheContext, vt as simulateLlmCallCost, xt as getNestedAttribute, yt as simulateTokenAllocation, z as deserializeCacheRecording } from "./runOrchestration-o38J7uZO.mjs";
2
- import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-Cvs7tc2v.mjs";
3
- import { n as matchesEvalTags, t as defineEval } from "./src-Jahivm6d.mjs";
1
+ import { $ as getCurrentScope, B as deserializeCacheValue, F as evalSpan, G as readManualInputFile, H as serializeCacheValue, I as evalTracer, J as appendToEvalOutput, K as evalExpect, L as hashCacheKey, Lt as getEvalRegistry, M as z, N as buildTraceTree, P as captureEvalSpanError, Q as evalTime, R as hashCacheKeySync, U as repoFile, V as serializeCacheRecording, W as manualInputFileValueSchema, X as evalAssert, Z as evalLog, _t as extractLlmCalls, at as nextEvalId, ct as runInExistingEvalScope, dt as startEvalBackgroundJob, et as getEvalCaseInput, gt as extractApiCalls, ht as extractCacheHits, it as mergeEvalOutput, lt as setEvalOutput, mt as extractCacheEntries, nt as isInEvalScope, ot as runInEvalRuntimeScope, q as EvalAssertionError, st as runInEvalScope, tt as incrementEvalOutput, ut as setScopeCacheContext, vt as simulateLlmCallCost, xt as getNestedAttribute, yt as simulateTokenAllocation, z as deserializeCacheRecording } from "./runOrchestration-DT6cje9E.mjs";
2
+ import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-DbVFgRO3.mjs";
3
+ import { n as matchesEvalTags, t as defineEval } from "./src-DlvYXPxG.mjs";
4
4
  export { EvalAssertionError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
package/dist/runChild.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { At as buildEvalKey, D as loadConfig, Dt as runSummarySchema, Et as runManifestSchema, Ft as columnDefSchema, Mt as evalStatsConfigSchema, Nt as manualInputDescriptorSchema, Pt as evalChartsConfigSchema, T as parseEvalDiscovery, Y as configureEvalRunLogs, ft as createRunRequestSchema, h as persistRunState, j as createFsCacheStore, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-o38J7uZO.mjs";
1
+ import { At as buildEvalKey, D as loadConfig, Dt as runSummarySchema, Et as runManifestSchema, Ft as columnDefSchema, Mt as evalStatsConfigSchema, Nt as manualInputDescriptorSchema, Pt as evalChartsConfigSchema, T as parseEvalDiscovery, Y as configureEvalRunLogs, ft as createRunRequestSchema, h as persistRunState, j as createFsCacheStore, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-DT6cje9E.mjs";
2
2
  import { z } from "zod/v4";
3
3
  import { readFile } from "node:fs/promises";
4
4
  import { relative } from "node:path";
@@ -827,7 +827,12 @@ const runLogEntrySchema = z.object({
827
827
  /** Trace payload captured while computing one score for a case. */
828
828
  const scoreTraceSchema = z.object({
829
829
  trace: z.array(traceSpanSchema),
830
- traceDisplay: traceDisplayConfigSchema
830
+ traceDisplay: traceDisplayConfigSchema,
831
+ /**
832
+ * Value-cache refs recorded by `evalTracer.cache(...)` calls made directly
833
+ * from the score compute body, with no surrounding scorer span.
834
+ */
835
+ cacheRefs: z.array(traceCacheRefSchema).default([])
831
836
  });
832
837
  /** Schema for the detailed payload shown when opening a specific case. */
833
838
  const caseDetailSchema = z.object({
@@ -7134,9 +7139,10 @@ async function runCase(params) {
7134
7139
  ...entry,
7135
7140
  source: key
7136
7141
  })));
7137
- if (trace.length > 0) scoringTraces[key] = {
7142
+ if (trace.length > 0 || scoreRun.scope.caseCacheRefs.length > 0) scoringTraces[key] = {
7138
7143
  trace,
7139
- traceDisplay
7144
+ traceDisplay,
7145
+ cacheRefs: scoreRun.scope.caseCacheRefs
7140
7146
  };
7141
7147
  const rawValue = scoreRun.result;
7142
7148
  if (scoreRun.error) {
@@ -1,2 +1,2 @@
1
- import { n as initRunner, t as getRunnerInstance } from "./runner-LdMiDmAN.mjs";
1
+ import { n as initRunner, t as getRunnerInstance } from "./runner-WRQdfG0r.mjs";
2
2
  export { getRunnerInstance, initRunner };
@@ -1,5 +1,5 @@
1
- import { n as createRunner } from "./cli-Cvs7tc2v.mjs";
2
- import "./src-Jahivm6d.mjs";
1
+ import { n as createRunner } from "./cli-DbVFgRO3.mjs";
2
+ import "./src-DlvYXPxG.mjs";
3
3
  //#region ../../apps/server/src/runner.ts
4
4
  let runnerInstance = null;
5
5
  function getRunnerInstance() {
@@ -1,5 +1,5 @@
1
- import { It as defineEval$1, rt as matchesEvalTags$1 } from "./runOrchestration-o38J7uZO.mjs";
2
- import "./cli-Cvs7tc2v.mjs";
1
+ import { It as defineEval$1, rt as matchesEvalTags$1 } from "./runOrchestration-DT6cje9E.mjs";
2
+ import "./cli-DbVFgRO3.mjs";
3
3
  //#region src/index.ts
4
4
  /** Register an eval definition with typed tag support. */
5
5
  function defineEval(definition) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ls-stack/agent-eval",
3
- "version": "0.52.1",
3
+ "version": "0.52.3",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "agent-evals": "./dist/bin.mjs"
@@ -303,7 +303,8 @@ or if the case errors. Scores without `passThreshold` are informational.
303
303
  Score functions run in their own trace scope, separate from the execution
304
304
  trace, so LLM-as-judge scorers can use `evalTracer.span(...)` and cached spans
305
305
  without polluting the agent trajectory. Outputs set inside a scorer stay
306
- private to that score.
306
+ private to that score. Spanless `evalTracer.cache(...)` calls made directly
307
+ inside a scorer are stored on that score trace's `cacheRefs` payload.
307
308
 
308
309
  `manualScores` declares score columns that reviewers fill in after a run.
309
310
  Pending values keep the eval in an `unscored` state instead of failing.
@@ -473,7 +474,8 @@ Mental model:
473
474
  span, that span gets a `cache.refs` entry with the value cache name, key,
474
475
  namespace, and hit/miss status. When called directly from the case body
475
476
  (no surrounding span), the ref is recorded on the case detail's `cacheRefs`
476
- array.
477
+ array. When called directly from a scorer, the ref is recorded on that
478
+ scoring trace's `cacheRefs` array.
477
479
  - Cache identity is the namespace plus the authored key. Source-file
478
480
  fingerprints are tracked for run freshness separately, but do not participate
479
481
  in cache-key hashing.