@ls-stack/agent-eval 0.50.0 → 0.51.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,8 +25,8 @@
25
25
  href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
26
26
  rel="stylesheet"
27
27
  />
28
- <script type="module" crossorigin src="/assets/index-BkXnL_y8.js"></script>
29
- <link rel="stylesheet" crossorigin href="/assets/index-BQY_snr3.css">
28
+ <script type="module" crossorigin src="/assets/index-DwgyYZgf.js"></script>
29
+ <link rel="stylesheet" crossorigin href="/assets/index-C5SveD-X.css">
30
30
  </head>
31
31
  <body>
32
32
  <div id="root"></div>
package/dist/bin.mjs CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { t as runCli } from "./cli-R7_V6YWa.mjs";
2
+ import { t as runCli } from "./cli-Cvs7tc2v.mjs";
3
3
  import { spawn } from "node:child_process";
4
4
  //#region src/bin.ts
5
5
  const moduleMocksFlag = "--experimental-test-module-mocks";
@@ -1,4 +1,4 @@
1
- import { A as validateCharts, At as buildEvalKey, C as deriveEvalFreshness, Ct as getEvalDisplayStatus, D as loadConfig, Dt as runSummarySchema, E as resolveEvalDefaultConfig, Lt as getEvalRegistry, O as buildDeclaredColumnDefs, Ot as resolveApiCallsConfig, S as parseManualInputValues, St as getEvalTitle, T as parseEvalDiscovery, Tt as matchesTagsFilter, _ as recomputePersistedCaseStatus, a as validateTagsFilters, b as resolveArtifactPath, bt as applyDerivedCallAttributes, c as getLastRunStatuses, d as loadPersistedRunSnapshots, f as nextShortIdFromSnapshots, g as recomputeEvalStatusesInRuns, h as persistRunState, i as resolveEvalTags, j as createFsCacheStore, jt as getCaseRowCaseKey, k as normalizeScoreDef, kt as resolveLlmCallsConfig, l as getLatestRunInfos, m as deleteTemporaryRuns, n as getTargetEvalKeys, o as stripTerminalControlCodes, p as persistCaseDetail, s as generateRunId, u as loadPersistedRunSnapshot, v as runTouchesEval, w as loadEvalModule, wt as deriveScopedSummaryFromCases, x as buildManualInputDescriptor, y as resolveTracePresentation } from "./runOrchestration-CokPQet7.mjs";
1
+ import { A as validateCharts, At as buildEvalKey, C as deriveEvalFreshness, Ct as getEvalDisplayStatus, D as loadConfig, Dt as runSummarySchema, E as resolveEvalDefaultConfig, Lt as getEvalRegistry, O as buildDeclaredColumnDefs, Ot as resolveApiCallsConfig, S as parseManualInputValues, St as getEvalTitle, T as parseEvalDiscovery, Tt as matchesTagsFilter, _ as recomputePersistedCaseStatus, a as validateTagsFilters, b as resolveArtifactPath, bt as applyDerivedCallAttributes, c as getLastRunStatuses, d as loadPersistedRunSnapshots, f as nextShortIdFromSnapshots, g as recomputeEvalStatusesInRuns, h as persistRunState, i as resolveEvalTags, j as createFsCacheStore, jt as getCaseRowCaseKey, k as normalizeScoreDef, kt as resolveLlmCallsConfig, l as getLatestRunInfos, m as deleteTemporaryRuns, n as getTargetEvalKeys, o as stripTerminalControlCodes, p as persistCaseDetail, s as generateRunId, u as loadPersistedRunSnapshot, v as runTouchesEval, w as loadEvalModule, wt as deriveScopedSummaryFromCases, x as buildManualInputDescriptor, y as resolveTracePresentation } from "./runOrchestration-o38J7uZO.mjs";
2
2
  import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
3
3
  import { basename, dirname, extname, isAbsolute, join, relative, resolve, sep } from "node:path";
4
4
  import { createHash, randomUUID } from "node:crypto";
@@ -2095,8 +2095,8 @@ async function commandApp(args) {
2095
2095
  const { serve } = await import("@hono/node-server");
2096
2096
  const bundledWebDist = resolve(currentDir, "apps/web/dist");
2097
2097
  if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
2098
- const appModule = await import("./app-DR9WPMA4.mjs");
2099
- const runnerModule = await import("./runner-B8dLVAyM.mjs");
2098
+ const appModule = await import("./app-CzLj4ZX0.mjs");
2099
+ const runnerModule = await import("./runner-iWtmKx9z.mjs");
2100
2100
  if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
2101
2101
  if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
2102
2102
  await runnerModule.initRunner();
package/dist/index.d.mts CHANGED
@@ -1308,8 +1308,6 @@ type EvalCaseScope = {
1308
1308
  logs: RunLogEntry$1[];
1309
1309
  spans: EvalTraceSpan$2[];
1310
1310
  checkpoints: Map<string, unknown>;
1311
- spanStack: string[];
1312
- activeSpanStack: EvalTraceSpan$2[];
1313
1311
  /**
1314
1312
  * Stack of active cache recorders. Ops are written to the top-most frame
1315
1313
  * when it exists and `replayingDepth === 0`.
@@ -1837,9 +1835,9 @@ declare const traceAttributeDisplaySchema: z$1.ZodObject<{
1837
1835
  subtree: "subtree";
1838
1836
  }>>;
1839
1837
  mode: z$1.ZodOptional<z$1.ZodEnum<{
1840
- sum: "sum";
1841
- last: "last";
1842
1838
  all: "all";
1839
+ last: "last";
1840
+ sum: "sum";
1843
1841
  }>>;
1844
1842
  }, z$1.core.$strip>;
1845
1843
  /**
@@ -1873,9 +1871,9 @@ declare const traceDisplayConfigSchema: z$1.ZodObject<{
1873
1871
  subtree: "subtree";
1874
1872
  }>>;
1875
1873
  mode: z$1.ZodOptional<z$1.ZodEnum<{
1876
- sum: "sum";
1877
- last: "last";
1878
1874
  all: "all";
1875
+ last: "last";
1876
+ sum: "sum";
1879
1877
  }>>;
1880
1878
  }, z$1.core.$strip>>>;
1881
1879
  }, z$1.core.$strip>;
@@ -1913,9 +1911,9 @@ declare const traceAttributeDisplayInputSchema: z$1.ZodObject<{
1913
1911
  subtree: "subtree";
1914
1912
  }>>;
1915
1913
  mode: z$1.ZodOptional<z$1.ZodEnum<{
1916
- sum: "sum";
1917
- last: "last";
1918
1914
  all: "all";
1915
+ last: "last";
1916
+ sum: "sum";
1919
1917
  }>>;
1920
1918
  transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
1921
1919
  }, z$1.core.$strip>;
@@ -1951,9 +1949,9 @@ declare const traceDisplayInputConfigSchema: z$1.ZodObject<{
1951
1949
  subtree: "subtree";
1952
1950
  }>>;
1953
1951
  mode: z$1.ZodOptional<z$1.ZodEnum<{
1954
- sum: "sum";
1955
- last: "last";
1956
1952
  all: "all";
1953
+ last: "last";
1954
+ sum: "sum";
1957
1955
  }>>;
1958
1956
  transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
1959
1957
  }, z$1.core.$strip>>>;
@@ -1990,8 +1988,8 @@ declare const traceSpanSchema$1: z$1.ZodObject<{
1990
1988
  status: z$1.ZodEnum<{
1991
1989
  error: "error";
1992
1990
  running: "running";
1993
- ok: "ok";
1994
1991
  cancelled: "cancelled";
1992
+ ok: "ok";
1995
1993
  }>;
1996
1994
  attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
1997
1995
  error: z$1.ZodOptional<z$1.ZodObject<{
@@ -2032,11 +2030,11 @@ declare const evalFreshnessStatusSchema: z$1.ZodEnum<{
2032
2030
  type EvalFreshnessStatus = z$1.infer<typeof evalFreshnessStatusSchema>;
2033
2031
  /** Reducer used to collapse a column's per-case values into a single stat. */
2034
2032
  declare const evalStatAggregateSchema: z$1.ZodEnum<{
2033
+ last: "last";
2034
+ sum: "sum";
2035
2035
  avg: "avg";
2036
2036
  min: "min";
2037
2037
  max: "max";
2038
- sum: "sum";
2039
- last: "last";
2040
2038
  }>;
2041
2039
  /** Reducer used to collapse a column's per-case values into a single stat. */
2042
2040
  type EvalStatAggregate = z$1.infer<typeof evalStatAggregateSchema>;
@@ -2065,11 +2063,11 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2065
2063
  key: z$1.ZodString;
2066
2064
  label: z$1.ZodOptional<z$1.ZodString>;
2067
2065
  aggregate: z$1.ZodEnum<{
2066
+ last: "last";
2067
+ sum: "sum";
2068
2068
  avg: "avg";
2069
2069
  min: "min";
2070
2070
  max: "max";
2071
- sum: "sum";
2072
- last: "last";
2073
2071
  }>;
2074
2072
  format: z$1.ZodOptional<z$1.ZodEnum<{
2075
2073
  number: "number";
@@ -2110,11 +2108,11 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
2110
2108
  key: z$1.ZodString;
2111
2109
  label: z$1.ZodOptional<z$1.ZodString>;
2112
2110
  aggregate: z$1.ZodEnum<{
2111
+ last: "last";
2112
+ sum: "sum";
2113
2113
  avg: "avg";
2114
2114
  min: "min";
2115
2115
  max: "max";
2116
- sum: "sum";
2117
- last: "last";
2118
2116
  }>;
2119
2117
  format: z$1.ZodOptional<z$1.ZodEnum<{
2120
2118
  number: "number";
@@ -2216,11 +2214,11 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2216
2214
  key: z$1.ZodString;
2217
2215
  label: z$1.ZodOptional<z$1.ZodString>;
2218
2216
  aggregate: z$1.ZodEnum<{
2217
+ last: "last";
2218
+ sum: "sum";
2219
2219
  avg: "avg";
2220
2220
  min: "min";
2221
2221
  max: "max";
2222
- sum: "sum";
2223
- last: "last";
2224
2222
  }>;
2225
2223
  format: z$1.ZodOptional<z$1.ZodEnum<{
2226
2224
  number: "number";
@@ -2258,9 +2256,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2258
2256
  color: z$1.ZodOptional<z$1.ZodEnum<{
2259
2257
  success: "success";
2260
2258
  error: "error";
2259
+ warning: "warning";
2261
2260
  accent: "accent";
2262
2261
  accentDim: "accentDim";
2263
- warning: "warning";
2264
2262
  textMuted: "textMuted";
2265
2263
  }>>;
2266
2264
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -2271,10 +2269,10 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2271
2269
  source: z$1.ZodLiteral<"column">;
2272
2270
  key: z$1.ZodString;
2273
2271
  aggregate: z$1.ZodEnum<{
2272
+ sum: "sum";
2274
2273
  avg: "avg";
2275
2274
  min: "min";
2276
2275
  max: "max";
2277
- sum: "sum";
2278
2276
  latest: "latest";
2279
2277
  passThresholdRate: "passThresholdRate";
2280
2278
  }>;
@@ -2282,9 +2280,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2282
2280
  color: z$1.ZodOptional<z$1.ZodEnum<{
2283
2281
  success: "success";
2284
2282
  error: "error";
2283
+ warning: "warning";
2285
2284
  accent: "accent";
2286
2285
  accentDim: "accentDim";
2287
- warning: "warning";
2288
2286
  textMuted: "textMuted";
2289
2287
  }>>;
2290
2288
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -2313,10 +2311,10 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2313
2311
  source: z$1.ZodLiteral<"column">;
2314
2312
  key: z$1.ZodString;
2315
2313
  aggregate: z$1.ZodEnum<{
2314
+ sum: "sum";
2316
2315
  avg: "avg";
2317
2316
  min: "min";
2318
2317
  max: "max";
2319
- sum: "sum";
2320
2318
  latest: "latest";
2321
2319
  passThresholdRate: "passThresholdRate";
2322
2320
  }>;
@@ -2413,11 +2411,11 @@ declare const caseRowSchema$1: z$1.ZodObject<{
2413
2411
  tags: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
2414
2412
  status: z$1.ZodEnum<{
2415
2413
  error: "error";
2414
+ pending: "pending";
2416
2415
  running: "running";
2417
2416
  cancelled: "cancelled";
2418
2417
  pass: "pass";
2419
2418
  fail: "fail";
2420
- pending: "pending";
2421
2419
  }>;
2422
2420
  durationMs: z$1.ZodNullable<z$1.ZodNumber>;
2423
2421
  cacheHits: z$1.ZodOptional<z$1.ZodNumber>;
@@ -2513,8 +2511,8 @@ declare const scoreTraceSchema: z$1.ZodObject<{
2513
2511
  status: z$1.ZodEnum<{
2514
2512
  error: "error";
2515
2513
  running: "running";
2516
- ok: "ok";
2517
2514
  cancelled: "cancelled";
2515
+ ok: "ok";
2518
2516
  }>;
2519
2517
  attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
2520
2518
  error: z$1.ZodOptional<z$1.ZodObject<{
@@ -2564,9 +2562,9 @@ declare const scoreTraceSchema: z$1.ZodObject<{
2564
2562
  subtree: "subtree";
2565
2563
  }>>;
2566
2564
  mode: z$1.ZodOptional<z$1.ZodEnum<{
2567
- sum: "sum";
2568
- last: "last";
2569
2565
  all: "all";
2566
+ last: "last";
2567
+ sum: "sum";
2570
2568
  }>>;
2571
2569
  }, z$1.core.$strip>>>;
2572
2570
  }, z$1.core.$strip>;
@@ -2582,11 +2580,11 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2582
2580
  tags: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
2583
2581
  status: z$1.ZodEnum<{
2584
2582
  error: "error";
2583
+ pending: "pending";
2585
2584
  running: "running";
2586
2585
  cancelled: "cancelled";
2587
2586
  pass: "pass";
2588
2587
  fail: "fail";
2589
- pending: "pending";
2590
2588
  }>;
2591
2589
  input: z$1.ZodUnknown;
2592
2590
  trace: z$1.ZodArray<z$1.ZodObject<{
@@ -2600,8 +2598,8 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2600
2598
  status: z$1.ZodEnum<{
2601
2599
  error: "error";
2602
2600
  running: "running";
2603
- ok: "ok";
2604
2601
  cancelled: "cancelled";
2602
+ ok: "ok";
2605
2603
  }>;
2606
2604
  attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
2607
2605
  error: z$1.ZodOptional<z$1.ZodObject<{
@@ -2651,9 +2649,9 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2651
2649
  subtree: "subtree";
2652
2650
  }>>;
2653
2651
  mode: z$1.ZodOptional<z$1.ZodEnum<{
2654
- sum: "sum";
2655
- last: "last";
2656
2652
  all: "all";
2653
+ last: "last";
2654
+ sum: "sum";
2657
2655
  }>>;
2658
2656
  }, z$1.core.$strip>>>;
2659
2657
  }, z$1.core.$strip>;
@@ -2669,8 +2667,8 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2669
2667
  status: z$1.ZodEnum<{
2670
2668
  error: "error";
2671
2669
  running: "running";
2672
- ok: "ok";
2673
2670
  cancelled: "cancelled";
2671
+ ok: "ok";
2674
2672
  }>;
2675
2673
  attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
2676
2674
  error: z$1.ZodOptional<z$1.ZodObject<{
@@ -2720,9 +2718,9 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2720
2718
  subtree: "subtree";
2721
2719
  }>>;
2722
2720
  mode: z$1.ZodOptional<z$1.ZodEnum<{
2723
- sum: "sum";
2724
- last: "last";
2725
2721
  all: "all";
2722
+ last: "last";
2723
+ sum: "sum";
2726
2724
  }>>;
2727
2725
  }, z$1.core.$strip>>>;
2728
2726
  }, z$1.core.$strip>;
@@ -2839,10 +2837,10 @@ declare const evalChartBuiltinMetricSchema: z$1.ZodEnum<{
2839
2837
  type EvalChartBuiltinMetric = z$1.infer<typeof evalChartBuiltinMetricSchema>;
2840
2838
  /** Reducer applied to a numeric column across all cases of a single run. */
2841
2839
  declare const evalChartAggregateSchema: z$1.ZodEnum<{
2840
+ sum: "sum";
2842
2841
  avg: "avg";
2843
2842
  min: "min";
2844
2843
  max: "max";
2845
- sum: "sum";
2846
2844
  latest: "latest";
2847
2845
  passThresholdRate: "passThresholdRate";
2848
2846
  }>;
@@ -2855,9 +2853,9 @@ type EvalChartAggregate = z$1.infer<typeof evalChartAggregateSchema>;
2855
2853
  declare const evalChartColorSchema: z$1.ZodEnum<{
2856
2854
  success: "success";
2857
2855
  error: "error";
2856
+ warning: "warning";
2858
2857
  accent: "accent";
2859
2858
  accentDim: "accentDim";
2860
- warning: "warning";
2861
2859
  textMuted: "textMuted";
2862
2860
  }>;
2863
2861
  /** Semantic color token resolved to a theme color by the web UI. */
@@ -2884,9 +2882,9 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2884
2882
  color: z$1.ZodOptional<z$1.ZodEnum<{
2885
2883
  success: "success";
2886
2884
  error: "error";
2885
+ warning: "warning";
2887
2886
  accent: "accent";
2888
2887
  accentDim: "accentDim";
2889
- warning: "warning";
2890
2888
  textMuted: "textMuted";
2891
2889
  }>>;
2892
2890
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -2897,10 +2895,10 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2897
2895
  source: z$1.ZodLiteral<"column">;
2898
2896
  key: z$1.ZodString;
2899
2897
  aggregate: z$1.ZodEnum<{
2898
+ sum: "sum";
2900
2899
  avg: "avg";
2901
2900
  min: "min";
2902
2901
  max: "max";
2903
- sum: "sum";
2904
2902
  latest: "latest";
2905
2903
  passThresholdRate: "passThresholdRate";
2906
2904
  }>;
@@ -2908,9 +2906,9 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2908
2906
  color: z$1.ZodOptional<z$1.ZodEnum<{
2909
2907
  success: "success";
2910
2908
  error: "error";
2909
+ warning: "warning";
2911
2910
  accent: "accent";
2912
2911
  accentDim: "accentDim";
2913
- warning: "warning";
2914
2912
  textMuted: "textMuted";
2915
2913
  }>>;
2916
2914
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -2932,10 +2930,10 @@ declare const evalChartTooltipExtraSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObj
2932
2930
  source: z$1.ZodLiteral<"column">;
2933
2931
  key: z$1.ZodString;
2934
2932
  aggregate: z$1.ZodEnum<{
2933
+ sum: "sum";
2935
2934
  avg: "avg";
2936
2935
  min: "min";
2937
2936
  max: "max";
2938
- sum: "sum";
2939
2937
  latest: "latest";
2940
2938
  passThresholdRate: "passThresholdRate";
2941
2939
  }>;
@@ -2967,9 +2965,9 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
2967
2965
  color: z$1.ZodOptional<z$1.ZodEnum<{
2968
2966
  success: "success";
2969
2967
  error: "error";
2968
+ warning: "warning";
2970
2969
  accent: "accent";
2971
2970
  accentDim: "accentDim";
2972
- warning: "warning";
2973
2971
  textMuted: "textMuted";
2974
2972
  }>>;
2975
2973
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -2980,10 +2978,10 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
2980
2978
  source: z$1.ZodLiteral<"column">;
2981
2979
  key: z$1.ZodString;
2982
2980
  aggregate: z$1.ZodEnum<{
2981
+ sum: "sum";
2983
2982
  avg: "avg";
2984
2983
  min: "min";
2985
2984
  max: "max";
2986
- sum: "sum";
2987
2985
  latest: "latest";
2988
2986
  passThresholdRate: "passThresholdRate";
2989
2987
  }>;
@@ -2991,9 +2989,9 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
2991
2989
  color: z$1.ZodOptional<z$1.ZodEnum<{
2992
2990
  success: "success";
2993
2991
  error: "error";
2992
+ warning: "warning";
2994
2993
  accent: "accent";
2995
2994
  accentDim: "accentDim";
2996
- warning: "warning";
2997
2995
  textMuted: "textMuted";
2998
2996
  }>>;
2999
2997
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -3022,10 +3020,10 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
3022
3020
  source: z$1.ZodLiteral<"column">;
3023
3021
  key: z$1.ZodString;
3024
3022
  aggregate: z$1.ZodEnum<{
3023
+ sum: "sum";
3025
3024
  avg: "avg";
3026
3025
  min: "min";
3027
3026
  max: "max";
3028
- sum: "sum";
3029
3027
  latest: "latest";
3030
3028
  passThresholdRate: "passThresholdRate";
3031
3029
  }>;
@@ -3057,9 +3055,9 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
3057
3055
  color: z$1.ZodOptional<z$1.ZodEnum<{
3058
3056
  success: "success";
3059
3057
  error: "error";
3058
+ warning: "warning";
3060
3059
  accent: "accent";
3061
3060
  accentDim: "accentDim";
3062
- warning: "warning";
3063
3061
  textMuted: "textMuted";
3064
3062
  }>>;
3065
3063
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -3070,10 +3068,10 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
3070
3068
  source: z$1.ZodLiteral<"column">;
3071
3069
  key: z$1.ZodString;
3072
3070
  aggregate: z$1.ZodEnum<{
3071
+ sum: "sum";
3073
3072
  avg: "avg";
3074
3073
  min: "min";
3075
3074
  max: "max";
3076
- sum: "sum";
3077
3075
  latest: "latest";
3078
3076
  passThresholdRate: "passThresholdRate";
3079
3077
  }>;
@@ -3081,9 +3079,9 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
3081
3079
  color: z$1.ZodOptional<z$1.ZodEnum<{
3082
3080
  success: "success";
3083
3081
  error: "error";
3082
+ warning: "warning";
3084
3083
  accent: "accent";
3085
3084
  accentDim: "accentDim";
3086
- warning: "warning";
3087
3085
  textMuted: "textMuted";
3088
3086
  }>>;
3089
3087
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -3112,10 +3110,10 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
3112
3110
  source: z$1.ZodLiteral<"column">;
3113
3111
  key: z$1.ZodString;
3114
3112
  aggregate: z$1.ZodEnum<{
3113
+ sum: "sum";
3115
3114
  avg: "avg";
3116
3115
  min: "min";
3117
3116
  max: "max";
3118
- sum: "sum";
3119
3117
  latest: "latest";
3120
3118
  passThresholdRate: "passThresholdRate";
3121
3119
  }>;
@@ -3131,10 +3129,10 @@ declare const runManifestSchema$1: z$1.ZodObject<{
3131
3129
  shortId: z$1.ZodString;
3132
3130
  status: z$1.ZodEnum<{
3133
3131
  error: "error";
3134
- running: "running";
3135
- cancelled: "cancelled";
3136
3132
  pending: "pending";
3133
+ running: "running";
3137
3134
  completed: "completed";
3135
+ cancelled: "cancelled";
3138
3136
  }>;
3139
3137
  temporary: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodBoolean>>;
3140
3138
  startedAt: z$1.ZodString;
@@ -3143,9 +3141,9 @@ declare const runManifestSchema$1: z$1.ZodObject<{
3143
3141
  evalSourceFingerprints: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodString>>>;
3144
3142
  target: z$1.ZodObject<{
3145
3143
  mode: z$1.ZodEnum<{
3146
- caseIds: "caseIds";
3147
3144
  all: "all";
3148
3145
  evalIds: "evalIds";
3146
+ caseIds: "caseIds";
3149
3147
  }>;
3150
3148
  evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
3151
3149
  files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
@@ -3171,10 +3169,10 @@ declare const runSummarySchema$1: z$1.ZodObject<{
3171
3169
  runId: z$1.ZodString;
3172
3170
  status: z$1.ZodEnum<{
3173
3171
  error: "error";
3174
- running: "running";
3175
- cancelled: "cancelled";
3176
3172
  pending: "pending";
3173
+ running: "running";
3177
3174
  completed: "completed";
3175
+ cancelled: "cancelled";
3178
3176
  }>;
3179
3177
  totalCases: z$1.ZodNumber;
3180
3178
  passedCases: z$1.ZodNumber;
@@ -4186,8 +4184,8 @@ declare const cacheRecordingSchema: z$1.ZodObject<{
4186
4184
  finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
4187
4185
  error: "error";
4188
4186
  running: "running";
4189
- ok: "ok";
4190
4187
  cancelled: "cancelled";
4188
+ ok: "ok";
4191
4189
  }>>;
4192
4190
  finalError: z$1.ZodOptional<z$1.ZodObject<{
4193
4191
  name: z$1.ZodOptional<z$1.ZodString>;
@@ -4259,8 +4257,8 @@ declare const cacheEntrySchema: z$1.ZodObject<{
4259
4257
  finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
4260
4258
  error: "error";
4261
4259
  running: "running";
4262
- ok: "ok";
4263
4260
  cancelled: "cancelled";
4261
+ ok: "ok";
4264
4262
  }>>;
4265
4263
  finalError: z$1.ZodOptional<z$1.ZodObject<{
4266
4264
  name: z$1.ZodOptional<z$1.ZodString>;
@@ -4349,8 +4347,8 @@ declare const cacheDebugKeyEntrySchema: z$1.ZodObject<{
4349
4347
  finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
4350
4348
  error: "error";
4351
4349
  running: "running";
4352
- ok: "ok";
4353
4350
  cancelled: "cancelled";
4351
+ ok: "ok";
4354
4352
  }>>;
4355
4353
  finalError: z$1.ZodOptional<z$1.ZodObject<{
4356
4354
  name: z$1.ZodOptional<z$1.ZodString>;
@@ -4428,8 +4426,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
4428
4426
  finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
4429
4427
  error: "error";
4430
4428
  running: "running";
4431
- ok: "ok";
4432
4429
  cancelled: "cancelled";
4430
+ ok: "ok";
4433
4431
  }>>;
4434
4432
  finalError: z$1.ZodOptional<z$1.ZodObject<{
4435
4433
  name: z$1.ZodOptional<z$1.ZodString>;
@@ -4509,8 +4507,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
4509
4507
  finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
4510
4508
  error: "error";
4511
4509
  running: "running";
4512
- ok: "ok";
4513
4510
  cancelled: "cancelled";
4511
+ ok: "ok";
4514
4512
  }>>;
4515
4513
  finalError: z$1.ZodOptional<z$1.ZodObject<{
4516
4514
  name: z$1.ZodOptional<z$1.ZodString>;
@@ -4588,8 +4586,8 @@ declare const cacheFileSchema: z$1.ZodObject<{
4588
4586
  finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
4589
4587
  error: "error";
4590
4588
  running: "running";
4591
- ok: "ok";
4592
4589
  cancelled: "cancelled";
4590
+ ok: "ok";
4593
4591
  }>>;
4594
4592
  finalError: z$1.ZodOptional<z$1.ZodObject<{
4595
4593
  name: z$1.ZodOptional<z$1.ZodString>;
@@ -4677,8 +4675,8 @@ declare const cacheDebugKeyFileSchema: z$1.ZodObject<{
4677
4675
  finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
4678
4676
  error: "error";
4679
4677
  running: "running";
4680
- ok: "ok";
4681
4678
  cancelled: "cancelled";
4679
+ ok: "ok";
4682
4680
  }>>;
4683
4681
  finalError: z$1.ZodOptional<z$1.ZodObject<{
4684
4682
  name: z$1.ZodOptional<z$1.ZodString>;
@@ -4834,9 +4832,9 @@ type ConfigReloadState = z$1.infer<typeof configReloadStateSchema$1>;
4834
4832
  declare const createRunRequestSchema$1: z$1.ZodObject<{
4835
4833
  target: z$1.ZodObject<{
4836
4834
  mode: z$1.ZodEnum<{
4837
- caseIds: "caseIds";
4838
4835
  all: "all";
4839
4836
  evalIds: "evalIds";
4837
+ caseIds: "caseIds";
4840
4838
  }>;
4841
4839
  evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
4842
4840
  files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as getCurrentScope, B as deserializeCacheValue, F as evalSpan, G as readManualInputFile, H as serializeCacheValue, I as evalTracer, J as appendToEvalOutput, K as evalExpect, L as hashCacheKey, Lt as getEvalRegistry, M as z, N as buildTraceTree, P as captureEvalSpanError, Q as evalTime, R as hashCacheKeySync, U as repoFile, V as serializeCacheRecording, W as manualInputFileValueSchema, X as evalAssert, Z as evalLog, _t as extractLlmCalls, at as nextEvalId, ct as runInExistingEvalScope, dt as startEvalBackgroundJob, et as getEvalCaseInput, gt as extractApiCalls, ht as extractCacheHits, it as mergeEvalOutput, lt as setEvalOutput, mt as extractCacheEntries, nt as isInEvalScope, ot as runInEvalRuntimeScope, q as EvalAssertionError, st as runInEvalScope, tt as incrementEvalOutput, ut as setScopeCacheContext, vt as simulateLlmCallCost, xt as getNestedAttribute, yt as simulateTokenAllocation, z as deserializeCacheRecording } from "./runOrchestration-CokPQet7.mjs";
2
- import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-R7_V6YWa.mjs";
3
- import { n as matchesEvalTags, t as defineEval } from "./src-B43qR0Ea.mjs";
1
+ import { $ as getCurrentScope, B as deserializeCacheValue, F as evalSpan, G as readManualInputFile, H as serializeCacheValue, I as evalTracer, J as appendToEvalOutput, K as evalExpect, L as hashCacheKey, Lt as getEvalRegistry, M as z, N as buildTraceTree, P as captureEvalSpanError, Q as evalTime, R as hashCacheKeySync, U as repoFile, V as serializeCacheRecording, W as manualInputFileValueSchema, X as evalAssert, Z as evalLog, _t as extractLlmCalls, at as nextEvalId, ct as runInExistingEvalScope, dt as startEvalBackgroundJob, et as getEvalCaseInput, gt as extractApiCalls, ht as extractCacheHits, it as mergeEvalOutput, lt as setEvalOutput, mt as extractCacheEntries, nt as isInEvalScope, ot as runInEvalRuntimeScope, q as EvalAssertionError, st as runInEvalScope, tt as incrementEvalOutput, ut as setScopeCacheContext, vt as simulateLlmCallCost, xt as getNestedAttribute, yt as simulateTokenAllocation, z as deserializeCacheRecording } from "./runOrchestration-o38J7uZO.mjs";
2
+ import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-Cvs7tc2v.mjs";
3
+ import { n as matchesEvalTags, t as defineEval } from "./src-Jahivm6d.mjs";
4
4
  export { EvalAssertionError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
package/dist/runChild.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { At as buildEvalKey, D as loadConfig, Dt as runSummarySchema, Et as runManifestSchema, Ft as columnDefSchema, Mt as evalStatsConfigSchema, Nt as manualInputDescriptorSchema, Pt as evalChartsConfigSchema, T as parseEvalDiscovery, Y as configureEvalRunLogs, ft as createRunRequestSchema, h as persistRunState, j as createFsCacheStore, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-CokPQet7.mjs";
1
+ import { At as buildEvalKey, D as loadConfig, Dt as runSummarySchema, Et as runManifestSchema, Ft as columnDefSchema, Mt as evalStatsConfigSchema, Nt as manualInputDescriptorSchema, Pt as evalChartsConfigSchema, T as parseEvalDiscovery, Y as configureEvalRunLogs, ft as createRunRequestSchema, h as persistRunState, j as createFsCacheStore, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-o38J7uZO.mjs";
2
2
  import { z } from "zod/v4";
3
3
  import { readFile } from "node:fs/promises";
4
4
  import { relative } from "node:path";