@ls-stack/agent-eval 0.32.0 → 0.34.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,8 +25,8 @@
25
25
  href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
26
26
  rel="stylesheet"
27
27
  />
28
- <script type="module" crossorigin src="/assets/index-BNQnbfi0.js"></script>
29
- <link rel="stylesheet" crossorigin href="/assets/index-BPMMRktE.css">
28
+ <script type="module" crossorigin src="/assets/index-ol64metU.js"></script>
29
+ <link rel="stylesheet" crossorigin href="/assets/index-BJpxc61J.css">
30
30
  </head>
31
31
  <body>
32
32
  <div id="root"></div>
package/dist/bin.mjs CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { t as runCli } from "./cli-huuJbDNb.mjs";
2
+ import { t as runCli } from "./cli-CG66f0GN.mjs";
3
3
  import { spawn } from "node:child_process";
4
4
  //#region src/bin.ts
5
5
  const moduleMocksFlag = "--experimental-test-module-mocks";
@@ -1,4 +1,4 @@
1
- import { C as loadConfig, D as createFsCacheStore, E as validateCharts, H as getEvalDisplayStatus, S as resolveEvalDefaultConfig, T as normalizeScoreDef, U as deriveScopedSummaryFromCases, V as getEvalTitle, _ as buildManualInputDescriptor, a as getLastRunStatuses, b as loadEvalModule, bt as getCaseRowEvalKey, c as loadPersistedRunSnapshots, d as persistRunState, f as recomputeEvalStatusesInRuns, g as resolveArtifactPath, h as resolveTracePresentation, i as generateRunId, l as nextShortIdFromSnapshots, m as runTouchesEval, mt as resolveLlmCallsConfig, n as getTargetEvalKeys, o as getLatestRunInfos, p as recomputePersistedCaseStatus, pr as getEvalRegistry, pt as resolveApiCallsConfig, q as runSummarySchema, s as loadPersistedRunSnapshot, u as persistCaseDetail, v as parseManualInputValues, vt as buildEvalKey, w as buildDeclaredColumnDefs, x as parseEvalDiscovery, y as deriveEvalFreshness, yt as getCaseRowCaseKey, z as applyDerivedCallAttributes } from "./runOrchestration-ZpN7xty_.mjs";
1
+ import { C as loadConfig, D as createFsCacheStore, E as validateCharts, H as getEvalDisplayStatus, S as resolveEvalDefaultConfig, T as normalizeScoreDef, U as deriveScopedSummaryFromCases, V as getEvalTitle, _ as buildManualInputDescriptor, a as getLastRunStatuses, b as loadEvalModule, bt as getCaseRowEvalKey, c as loadPersistedRunSnapshots, d as persistRunState, f as recomputeEvalStatusesInRuns, g as resolveArtifactPath, h as resolveTracePresentation, i as generateRunId, l as nextShortIdFromSnapshots, m as runTouchesEval, mt as resolveLlmCallsConfig, n as getTargetEvalKeys, o as getLatestRunInfos, p as recomputePersistedCaseStatus, pr as getEvalRegistry, pt as resolveApiCallsConfig, q as runSummarySchema, s as loadPersistedRunSnapshot, u as persistCaseDetail, v as parseManualInputValues, vt as buildEvalKey, w as buildDeclaredColumnDefs, x as parseEvalDiscovery, y as deriveEvalFreshness, yt as getCaseRowCaseKey, z as applyDerivedCallAttributes } from "./runOrchestration-CYhn4DJ0.mjs";
2
2
  import { createHash, randomUUID } from "node:crypto";
3
3
  import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
4
4
  import { basename, dirname, extname, isAbsolute, join, relative, resolve, sep } from "node:path";
@@ -1940,8 +1940,8 @@ async function commandApp(args) {
1940
1940
  const { serve } = await import("@hono/node-server");
1941
1941
  const bundledWebDist = resolve(currentDir, "apps/web/dist");
1942
1942
  if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
1943
- const appModule = await import("./app-Dc6vvHRL.mjs");
1944
- const runnerModule = await import("./runner-BPXPvinB.mjs");
1943
+ const appModule = await import("./app-DOE-crd0.mjs");
1944
+ const runnerModule = await import("./runner-D5i7HLcE.mjs");
1945
1945
  if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
1946
1946
  if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
1947
1947
  await runnerModule.initRunner();
package/dist/index.d.mts CHANGED
@@ -3024,8 +3024,8 @@ declare const traceAttributeDisplaySchema: z$1.ZodObject<{
3024
3024
  }>>;
3025
3025
  mode: z$1.ZodOptional<z$1.ZodEnum<{
3026
3026
  all: "all";
3027
- last: "last";
3028
3027
  sum: "sum";
3028
+ last: "last";
3029
3029
  }>>;
3030
3030
  }, z$1.core.$strip>;
3031
3031
  /**
@@ -3060,8 +3060,8 @@ declare const traceDisplayConfigSchema: z$1.ZodObject<{
3060
3060
  }>>;
3061
3061
  mode: z$1.ZodOptional<z$1.ZodEnum<{
3062
3062
  all: "all";
3063
- last: "last";
3064
3063
  sum: "sum";
3064
+ last: "last";
3065
3065
  }>>;
3066
3066
  }, z$1.core.$strip>>>;
3067
3067
  }, z$1.core.$strip>;
@@ -3100,8 +3100,8 @@ declare const traceAttributeDisplayInputSchema: z$1.ZodObject<{
3100
3100
  }>>;
3101
3101
  mode: z$1.ZodOptional<z$1.ZodEnum<{
3102
3102
  all: "all";
3103
- last: "last";
3104
3103
  sum: "sum";
3104
+ last: "last";
3105
3105
  }>>;
3106
3106
  transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
3107
3107
  }, z$1.core.$strip>;
@@ -3138,8 +3138,8 @@ declare const traceDisplayInputConfigSchema: z$1.ZodObject<{
3138
3138
  }>>;
3139
3139
  mode: z$1.ZodOptional<z$1.ZodEnum<{
3140
3140
  all: "all";
3141
- last: "last";
3142
3141
  sum: "sum";
3142
+ last: "last";
3143
3143
  }>>;
3144
3144
  transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
3145
3145
  }, z$1.core.$strip>>>;
@@ -3176,8 +3176,8 @@ declare const traceSpanSchema: z$1.ZodObject<{
3176
3176
  status: z$1.ZodEnum<{
3177
3177
  error: "error";
3178
3178
  running: "running";
3179
- ok: "ok";
3180
3179
  cancelled: "cancelled";
3180
+ ok: "ok";
3181
3181
  }>;
3182
3182
  attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
3183
3183
  error: z$1.ZodOptional<z$1.ZodObject<{
@@ -3218,11 +3218,11 @@ declare const evalFreshnessStatusSchema: z$1.ZodEnum<{
3218
3218
  type EvalFreshnessStatus = z$1.infer<typeof evalFreshnessStatusSchema>;
3219
3219
  /** Reducer used to collapse a column's per-case values into a single stat. */
3220
3220
  declare const evalStatAggregateSchema: z$1.ZodEnum<{
3221
- last: "last";
3222
- sum: "sum";
3223
3221
  avg: "avg";
3222
+ sum: "sum";
3224
3223
  min: "min";
3225
3224
  max: "max";
3225
+ last: "last";
3226
3226
  }>;
3227
3227
  /** Reducer used to collapse a column's per-case values into a single stat. */
3228
3228
  type EvalStatAggregate = z$1.infer<typeof evalStatAggregateSchema>;
@@ -3246,11 +3246,11 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
3246
3246
  key: z$1.ZodString;
3247
3247
  label: z$1.ZodOptional<z$1.ZodString>;
3248
3248
  aggregate: z$1.ZodEnum<{
3249
- last: "last";
3250
- sum: "sum";
3251
3249
  avg: "avg";
3250
+ sum: "sum";
3252
3251
  min: "min";
3253
3252
  max: "max";
3253
+ last: "last";
3254
3254
  }>;
3255
3255
  format: z$1.ZodOptional<z$1.ZodEnum<{
3256
3256
  number: "number";
@@ -3288,11 +3288,11 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
3288
3288
  key: z$1.ZodString;
3289
3289
  label: z$1.ZodOptional<z$1.ZodString>;
3290
3290
  aggregate: z$1.ZodEnum<{
3291
- last: "last";
3292
- sum: "sum";
3293
3291
  avg: "avg";
3292
+ sum: "sum";
3294
3293
  min: "min";
3295
3294
  max: "max";
3295
+ last: "last";
3296
3296
  }>;
3297
3297
  format: z$1.ZodOptional<z$1.ZodEnum<{
3298
3298
  number: "number";
@@ -3389,11 +3389,11 @@ declare const evalSummarySchema: z$1.ZodObject<{
3389
3389
  key: z$1.ZodString;
3390
3390
  label: z$1.ZodOptional<z$1.ZodString>;
3391
3391
  aggregate: z$1.ZodEnum<{
3392
- last: "last";
3393
- sum: "sum";
3394
3392
  avg: "avg";
3393
+ sum: "sum";
3395
3394
  min: "min";
3396
3395
  max: "max";
3396
+ last: "last";
3397
3397
  }>;
3398
3398
  format: z$1.ZodOptional<z$1.ZodEnum<{
3399
3399
  number: "number";
@@ -3431,9 +3431,9 @@ declare const evalSummarySchema: z$1.ZodObject<{
3431
3431
  color: z$1.ZodOptional<z$1.ZodEnum<{
3432
3432
  success: "success";
3433
3433
  error: "error";
3434
- warning: "warning";
3435
3434
  accent: "accent";
3436
3435
  accentDim: "accentDim";
3436
+ warning: "warning";
3437
3437
  textMuted: "textMuted";
3438
3438
  }>>;
3439
3439
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -3444,8 +3444,8 @@ declare const evalSummarySchema: z$1.ZodObject<{
3444
3444
  source: z$1.ZodLiteral<"column">;
3445
3445
  key: z$1.ZodString;
3446
3446
  aggregate: z$1.ZodEnum<{
3447
- sum: "sum";
3448
3447
  avg: "avg";
3448
+ sum: "sum";
3449
3449
  min: "min";
3450
3450
  max: "max";
3451
3451
  latest: "latest";
@@ -3455,9 +3455,9 @@ declare const evalSummarySchema: z$1.ZodObject<{
3455
3455
  color: z$1.ZodOptional<z$1.ZodEnum<{
3456
3456
  success: "success";
3457
3457
  error: "error";
3458
- warning: "warning";
3459
3458
  accent: "accent";
3460
3459
  accentDim: "accentDim";
3460
+ warning: "warning";
3461
3461
  textMuted: "textMuted";
3462
3462
  }>>;
3463
3463
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -3486,8 +3486,8 @@ declare const evalSummarySchema: z$1.ZodObject<{
3486
3486
  source: z$1.ZodLiteral<"column">;
3487
3487
  key: z$1.ZodString;
3488
3488
  aggregate: z$1.ZodEnum<{
3489
- sum: "sum";
3490
3489
  avg: "avg";
3490
+ sum: "sum";
3491
3491
  min: "min";
3492
3492
  max: "max";
3493
3493
  latest: "latest";
@@ -3585,11 +3585,11 @@ declare const caseRowSchema: z$1.ZodObject<{
3585
3585
  evalId: z$1.ZodString;
3586
3586
  status: z$1.ZodEnum<{
3587
3587
  error: "error";
3588
+ pending: "pending";
3588
3589
  running: "running";
3589
3590
  cancelled: "cancelled";
3590
3591
  pass: "pass";
3591
3592
  fail: "fail";
3592
- pending: "pending";
3593
3593
  }>;
3594
3594
  durationMs: z$1.ZodNullable<z$1.ZodNumber>;
3595
3595
  costUsd: z$1.ZodOptional<z$1.ZodNullable<z$1.ZodNumber>>;
@@ -3681,8 +3681,8 @@ declare const scoreTraceSchema: z$1.ZodObject<{
3681
3681
  status: z$1.ZodEnum<{
3682
3682
  error: "error";
3683
3683
  running: "running";
3684
- ok: "ok";
3685
3684
  cancelled: "cancelled";
3685
+ ok: "ok";
3686
3686
  }>;
3687
3687
  attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
3688
3688
  error: z$1.ZodOptional<z$1.ZodObject<{
@@ -3733,8 +3733,8 @@ declare const scoreTraceSchema: z$1.ZodObject<{
3733
3733
  }>>;
3734
3734
  mode: z$1.ZodOptional<z$1.ZodEnum<{
3735
3735
  all: "all";
3736
- last: "last";
3737
3736
  sum: "sum";
3737
+ last: "last";
3738
3738
  }>>;
3739
3739
  }, z$1.core.$strip>>>;
3740
3740
  }, z$1.core.$strip>;
@@ -3749,11 +3749,11 @@ declare const caseDetailSchema: z$1.ZodObject<{
3749
3749
  evalId: z$1.ZodString;
3750
3750
  status: z$1.ZodEnum<{
3751
3751
  error: "error";
3752
+ pending: "pending";
3752
3753
  running: "running";
3753
3754
  cancelled: "cancelled";
3754
3755
  pass: "pass";
3755
3756
  fail: "fail";
3756
- pending: "pending";
3757
3757
  }>;
3758
3758
  input: z$1.ZodUnknown;
3759
3759
  trace: z$1.ZodArray<z$1.ZodObject<{
@@ -3767,8 +3767,8 @@ declare const caseDetailSchema: z$1.ZodObject<{
3767
3767
  status: z$1.ZodEnum<{
3768
3768
  error: "error";
3769
3769
  running: "running";
3770
- ok: "ok";
3771
3770
  cancelled: "cancelled";
3771
+ ok: "ok";
3772
3772
  }>;
3773
3773
  attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
3774
3774
  error: z$1.ZodOptional<z$1.ZodObject<{
@@ -3819,8 +3819,8 @@ declare const caseDetailSchema: z$1.ZodObject<{
3819
3819
  }>>;
3820
3820
  mode: z$1.ZodOptional<z$1.ZodEnum<{
3821
3821
  all: "all";
3822
- last: "last";
3823
3822
  sum: "sum";
3823
+ last: "last";
3824
3824
  }>>;
3825
3825
  }, z$1.core.$strip>>>;
3826
3826
  }, z$1.core.$strip>;
@@ -3836,8 +3836,8 @@ declare const caseDetailSchema: z$1.ZodObject<{
3836
3836
  status: z$1.ZodEnum<{
3837
3837
  error: "error";
3838
3838
  running: "running";
3839
- ok: "ok";
3840
3839
  cancelled: "cancelled";
3840
+ ok: "ok";
3841
3841
  }>;
3842
3842
  attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
3843
3843
  error: z$1.ZodOptional<z$1.ZodObject<{
@@ -3888,8 +3888,8 @@ declare const caseDetailSchema: z$1.ZodObject<{
3888
3888
  }>>;
3889
3889
  mode: z$1.ZodOptional<z$1.ZodEnum<{
3890
3890
  all: "all";
3891
- last: "last";
3892
3891
  sum: "sum";
3892
+ last: "last";
3893
3893
  }>>;
3894
3894
  }, z$1.core.$strip>>>;
3895
3895
  }, z$1.core.$strip>;
@@ -3949,10 +3949,10 @@ declare const caseDetailSchema: z$1.ZodObject<{
3949
3949
  namespace: z$1.ZodString;
3950
3950
  key: z$1.ZodString;
3951
3951
  status: z$1.ZodEnum<{
3952
+ bypass: "bypass";
3953
+ refresh: "refresh";
3952
3954
  hit: "hit";
3953
3955
  miss: "miss";
3954
- refresh: "refresh";
3955
- bypass: "bypass";
3956
3956
  }>;
3957
3957
  read: z$1.ZodOptional<z$1.ZodBoolean>;
3958
3958
  stored: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -4023,8 +4023,8 @@ declare const evalChartBuiltinMetricSchema: z$1.ZodEnum<{
4023
4023
  type EvalChartBuiltinMetric = z$1.infer<typeof evalChartBuiltinMetricSchema>;
4024
4024
  /** Reducer applied to a numeric column across all cases of a single run. */
4025
4025
  declare const evalChartAggregateSchema: z$1.ZodEnum<{
4026
- sum: "sum";
4027
4026
  avg: "avg";
4027
+ sum: "sum";
4028
4028
  min: "min";
4029
4029
  max: "max";
4030
4030
  latest: "latest";
@@ -4039,9 +4039,9 @@ type EvalChartAggregate = z$1.infer<typeof evalChartAggregateSchema>;
4039
4039
  declare const evalChartColorSchema: z$1.ZodEnum<{
4040
4040
  success: "success";
4041
4041
  error: "error";
4042
- warning: "warning";
4043
4042
  accent: "accent";
4044
4043
  accentDim: "accentDim";
4044
+ warning: "warning";
4045
4045
  textMuted: "textMuted";
4046
4046
  }>;
4047
4047
  /** Semantic color token resolved to a theme color by the web UI. */
@@ -4068,9 +4068,9 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
4068
4068
  color: z$1.ZodOptional<z$1.ZodEnum<{
4069
4069
  success: "success";
4070
4070
  error: "error";
4071
- warning: "warning";
4072
4071
  accent: "accent";
4073
4072
  accentDim: "accentDim";
4073
+ warning: "warning";
4074
4074
  textMuted: "textMuted";
4075
4075
  }>>;
4076
4076
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -4081,8 +4081,8 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
4081
4081
  source: z$1.ZodLiteral<"column">;
4082
4082
  key: z$1.ZodString;
4083
4083
  aggregate: z$1.ZodEnum<{
4084
- sum: "sum";
4085
4084
  avg: "avg";
4085
+ sum: "sum";
4086
4086
  min: "min";
4087
4087
  max: "max";
4088
4088
  latest: "latest";
@@ -4092,9 +4092,9 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
4092
4092
  color: z$1.ZodOptional<z$1.ZodEnum<{
4093
4093
  success: "success";
4094
4094
  error: "error";
4095
- warning: "warning";
4096
4095
  accent: "accent";
4097
4096
  accentDim: "accentDim";
4097
+ warning: "warning";
4098
4098
  textMuted: "textMuted";
4099
4099
  }>>;
4100
4100
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -4116,8 +4116,8 @@ declare const evalChartTooltipExtraSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObj
4116
4116
  source: z$1.ZodLiteral<"column">;
4117
4117
  key: z$1.ZodString;
4118
4118
  aggregate: z$1.ZodEnum<{
4119
- sum: "sum";
4120
4119
  avg: "avg";
4120
+ sum: "sum";
4121
4121
  min: "min";
4122
4122
  max: "max";
4123
4123
  latest: "latest";
@@ -4151,9 +4151,9 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
4151
4151
  color: z$1.ZodOptional<z$1.ZodEnum<{
4152
4152
  success: "success";
4153
4153
  error: "error";
4154
- warning: "warning";
4155
4154
  accent: "accent";
4156
4155
  accentDim: "accentDim";
4156
+ warning: "warning";
4157
4157
  textMuted: "textMuted";
4158
4158
  }>>;
4159
4159
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -4164,8 +4164,8 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
4164
4164
  source: z$1.ZodLiteral<"column">;
4165
4165
  key: z$1.ZodString;
4166
4166
  aggregate: z$1.ZodEnum<{
4167
- sum: "sum";
4168
4167
  avg: "avg";
4168
+ sum: "sum";
4169
4169
  min: "min";
4170
4170
  max: "max";
4171
4171
  latest: "latest";
@@ -4175,9 +4175,9 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
4175
4175
  color: z$1.ZodOptional<z$1.ZodEnum<{
4176
4176
  success: "success";
4177
4177
  error: "error";
4178
- warning: "warning";
4179
4178
  accent: "accent";
4180
4179
  accentDim: "accentDim";
4180
+ warning: "warning";
4181
4181
  textMuted: "textMuted";
4182
4182
  }>>;
4183
4183
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -4206,8 +4206,8 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
4206
4206
  source: z$1.ZodLiteral<"column">;
4207
4207
  key: z$1.ZodString;
4208
4208
  aggregate: z$1.ZodEnum<{
4209
- sum: "sum";
4210
4209
  avg: "avg";
4210
+ sum: "sum";
4211
4211
  min: "min";
4212
4212
  max: "max";
4213
4213
  latest: "latest";
@@ -4241,9 +4241,9 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
4241
4241
  color: z$1.ZodOptional<z$1.ZodEnum<{
4242
4242
  success: "success";
4243
4243
  error: "error";
4244
- warning: "warning";
4245
4244
  accent: "accent";
4246
4245
  accentDim: "accentDim";
4246
+ warning: "warning";
4247
4247
  textMuted: "textMuted";
4248
4248
  }>>;
4249
4249
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -4254,8 +4254,8 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
4254
4254
  source: z$1.ZodLiteral<"column">;
4255
4255
  key: z$1.ZodString;
4256
4256
  aggregate: z$1.ZodEnum<{
4257
- sum: "sum";
4258
4257
  avg: "avg";
4258
+ sum: "sum";
4259
4259
  min: "min";
4260
4260
  max: "max";
4261
4261
  latest: "latest";
@@ -4265,9 +4265,9 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
4265
4265
  color: z$1.ZodOptional<z$1.ZodEnum<{
4266
4266
  success: "success";
4267
4267
  error: "error";
4268
- warning: "warning";
4269
4268
  accent: "accent";
4270
4269
  accentDim: "accentDim";
4270
+ warning: "warning";
4271
4271
  textMuted: "textMuted";
4272
4272
  }>>;
4273
4273
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -4296,8 +4296,8 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
4296
4296
  source: z$1.ZodLiteral<"column">;
4297
4297
  key: z$1.ZodString;
4298
4298
  aggregate: z$1.ZodEnum<{
4299
- sum: "sum";
4300
4299
  avg: "avg";
4300
+ sum: "sum";
4301
4301
  min: "min";
4302
4302
  max: "max";
4303
4303
  latest: "latest";
@@ -4315,10 +4315,10 @@ declare const runManifestSchema: z$1.ZodObject<{
4315
4315
  shortId: z$1.ZodString;
4316
4316
  status: z$1.ZodEnum<{
4317
4317
  error: "error";
4318
- running: "running";
4319
- cancelled: "cancelled";
4320
4318
  pending: "pending";
4319
+ running: "running";
4321
4320
  completed: "completed";
4321
+ cancelled: "cancelled";
4322
4322
  }>;
4323
4323
  startedAt: z$1.ZodString;
4324
4324
  endedAt: z$1.ZodNullable<z$1.ZodString>;
@@ -4341,9 +4341,9 @@ declare const runManifestSchema: z$1.ZodObject<{
4341
4341
  median: "median";
4342
4342
  }>>>;
4343
4343
  cacheMode: z$1.ZodOptional<z$1.ZodEnum<{
4344
- refresh: "refresh";
4345
- bypass: "bypass";
4346
4344
  use: "use";
4345
+ bypass: "bypass";
4346
+ refresh: "refresh";
4347
4347
  }>>;
4348
4348
  }, z$1.core.$strip>;
4349
4349
  /** Persisted lifecycle metadata for a single eval run. */
@@ -4353,10 +4353,10 @@ declare const runSummarySchema: z$1.ZodObject<{
4353
4353
  runId: z$1.ZodString;
4354
4354
  status: z$1.ZodEnum<{
4355
4355
  error: "error";
4356
- running: "running";
4357
- cancelled: "cancelled";
4358
4356
  pending: "pending";
4357
+ running: "running";
4359
4358
  completed: "completed";
4359
+ cancelled: "cancelled";
4360
4360
  }>;
4361
4361
  totalCases: z$1.ZodNumber;
4362
4362
  passedCases: z$1.ZodNumber;
@@ -5150,8 +5150,8 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
5150
5150
  }>>;
5151
5151
  mode: z$1.ZodOptional<z$1.ZodEnum<{
5152
5152
  all: "all";
5153
- last: "last";
5154
5153
  sum: "sum";
5154
+ last: "last";
5155
5155
  }>>;
5156
5156
  transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
5157
5157
  }, z$1.core.$strip>>>;
@@ -5174,11 +5174,11 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
5174
5174
  key: z$1.ZodString;
5175
5175
  label: z$1.ZodOptional<z$1.ZodString>;
5176
5176
  aggregate: z$1.ZodEnum<{
5177
- last: "last";
5178
- sum: "sum";
5179
5177
  avg: "avg";
5178
+ sum: "sum";
5180
5179
  min: "min";
5181
5180
  max: "max";
5181
+ last: "last";
5182
5182
  }>;
5183
5183
  format: z$1.ZodOptional<z$1.ZodEnum<{
5184
5184
  number: "number";
@@ -5527,9 +5527,9 @@ declare function extractApiCalls(spans: EvalTraceSpan[], config: ResolvedApiCall
5527
5527
  * - `refresh`: never read, always write (forces re-execution and overwrites).
5528
5528
  */
5529
5529
  declare const cacheModeSchema: z$1.ZodEnum<{
5530
- refresh: "refresh";
5531
- bypass: "bypass";
5532
5530
  use: "use";
5531
+ bypass: "bypass";
5532
+ refresh: "refresh";
5533
5533
  }>;
5534
5534
  /** Mode controlling how cached spans behave during a run. */
5535
5535
  type CacheMode = z$1.infer<typeof cacheModeSchema>;
@@ -5550,10 +5550,10 @@ declare const cacheOperationTypeSchema: z$1.ZodEnum<{
5550
5550
  type CacheOperationType = z$1.infer<typeof cacheOperationTypeSchema>;
5551
5551
  /** Status of a cache lookup recorded on a span or case scope. */
5552
5552
  declare const cacheStatusSchema: z$1.ZodEnum<{
5553
+ bypass: "bypass";
5554
+ refresh: "refresh";
5553
5555
  hit: "hit";
5554
5556
  miss: "miss";
5555
- refresh: "refresh";
5556
- bypass: "bypass";
5557
5557
  }>;
5558
5558
  /** Status of a cache lookup recorded on a span or case scope. */
5559
5559
  type CacheStatus = z$1.infer<typeof cacheStatusSchema>;
@@ -5570,10 +5570,10 @@ declare const traceCacheRefSchema: z$1.ZodObject<{
5570
5570
  namespace: z$1.ZodString;
5571
5571
  key: z$1.ZodString;
5572
5572
  status: z$1.ZodEnum<{
5573
+ bypass: "bypass";
5574
+ refresh: "refresh";
5573
5575
  hit: "hit";
5574
5576
  miss: "miss";
5575
- refresh: "refresh";
5576
- bypass: "bypass";
5577
5577
  }>;
5578
5578
  read: z$1.ZodOptional<z$1.ZodBoolean>;
5579
5579
  stored: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -5651,8 +5651,8 @@ declare const cacheRecordingSchema: z$1.ZodObject<{
5651
5651
  finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
5652
5652
  error: "error";
5653
5653
  running: "running";
5654
- ok: "ok";
5655
5654
  cancelled: "cancelled";
5655
+ ok: "ok";
5656
5656
  }>>;
5657
5657
  finalError: z$1.ZodOptional<z$1.ZodObject<{
5658
5658
  name: z$1.ZodOptional<z$1.ZodString>;
@@ -5724,8 +5724,8 @@ declare const cacheEntrySchema: z$1.ZodObject<{
5724
5724
  finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
5725
5725
  error: "error";
5726
5726
  running: "running";
5727
- ok: "ok";
5728
5727
  cancelled: "cancelled";
5728
+ ok: "ok";
5729
5729
  }>>;
5730
5730
  finalError: z$1.ZodOptional<z$1.ZodObject<{
5731
5731
  name: z$1.ZodOptional<z$1.ZodString>;
@@ -5813,8 +5813,8 @@ declare const cacheEntryWithDebugKeySchema: z$1.ZodObject<{
5813
5813
  finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
5814
5814
  error: "error";
5815
5815
  running: "running";
5816
- ok: "ok";
5817
5816
  cancelled: "cancelled";
5817
+ ok: "ok";
5818
5818
  }>>;
5819
5819
  finalError: z$1.ZodOptional<z$1.ZodObject<{
5820
5820
  name: z$1.ZodOptional<z$1.ZodString>;
@@ -5902,8 +5902,8 @@ declare const cacheFileSchema: z$1.ZodObject<{
5902
5902
  finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
5903
5903
  error: "error";
5904
5904
  running: "running";
5905
- ok: "ok";
5906
5905
  cancelled: "cancelled";
5906
+ ok: "ok";
5907
5907
  }>>;
5908
5908
  finalError: z$1.ZodOptional<z$1.ZodObject<{
5909
5909
  name: z$1.ZodOptional<z$1.ZodString>;
@@ -6089,9 +6089,9 @@ declare const createRunRequestSchema: z$1.ZodObject<{
6089
6089
  trials: z$1.ZodNumber;
6090
6090
  cache: z$1.ZodOptional<z$1.ZodObject<{
6091
6091
  mode: z$1.ZodDefault<z$1.ZodEnum<{
6092
- refresh: "refresh";
6093
- bypass: "bypass";
6094
6092
  use: "use";
6093
+ bypass: "bypass";
6094
+ refresh: "refresh";
6095
6095
  }>>;
6096
6096
  }, z$1.core.$strip>>;
6097
6097
  manualInputs: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as apiCallMetricSchema, $n as getCurrentScope, $t as cacheDebugKeyEntrySchema, A as createRunRequestSchema, An as repoFileRefSchema, At as runLogEntrySchema, B as getNestedAttribute, Bn as deserializeCacheValue, Bt as manualInputNumberFieldSchema, Cn as cellValueSchema, Ct as caseRowSchema, Dn as fileRefSchema, Dt as evalStatItemSchema, En as columnKindSchema, Et as evalStatAggregateSchema, F as extractApiCalls, Fn as evalSpan, Ft as manualInputBooleanFieldSchema, G as deriveStatusFromChildStatuses, Gn as readManualInputFile, Gt as evalChartAxisSchema, H as getEvalDisplayStatus, Hn as serializeCacheValue, Ht as manualInputSelectOptionSchema, I as extractLlmCalls, In as evalTracer, It as manualInputDescriptorSchema, J as DEFAULT_API_CALLS_CONFIG, Jn as advanceEvalTime, Jt as evalChartConfigSchema, K as runManifestSchema, Kn as evalExpect, Kt as evalChartBuiltinMetricSchema, L as simulateLlmCallCost, Ln as hashCacheKey, Lt as manualInputFieldDescriptorSchema, M as sseEnvelopeSchema, Mn as z, Mt as runLogLocationSchema, N as extractCacheEntries, Nn as buildTraceTree, Nt as runLogPhaseSchema, O as configReloadStateSchema, On as jsonCellSchema, Ot as evalStatsConfigSchema, P as extractCacheHits, Pn as captureEvalSpanError, Pt as scoreTraceSchema, Q as apiCallMetricPlacementSchema, Qn as evalLog, Qt as evalChartsConfigSchema, R as simulateTokenAllocation, Rn as hashCacheKeySync, Rt as manualInputJsonFieldSchema, Sn as traceSpanWarningSchema, St as caseDetailSchema, Tn as columnFormatSchema, Tt as evalFreshnessStatusSchema, U as deriveScopedSummaryFromCases, Un as repoFile, Ut as manualInputTextFieldSchema, V as getEvalTitle, Vn as serializeCacheRecording, Vt as manualInputSelectFieldSchema, W as deriveStatusFromCaseRows, Wn as manualInputFileValueSchema, Wt as evalChartAggregateSchema, X as agentEvalsConfigSchema, Xt as evalChartTooltipExtraSchema, Y as DEFAULT_LLM_CALLS_CONFIG, Yn as appendToEvalOutput, Yt as evalChartMetricSchema, Z as apiCallMetricFormatSchema, Zn as evalAssert, Zt as evalChartTypeSchema, _n as traceDisplayConfigSchema, _t as buildCaseKey, an as cacheModeSchema, ar as nextEvalId, at as llmCallCostCurrencySchema, bn as traceSpanKindSchema, bt as getCaseRowEvalKey, cn as cacheRecordingSchema, cr as runInExistingEvalScope, ct as llmCallMetricSchema, dn as spanCacheOptionsSchema, dr as startEvalBackgroundJob, dt as llmCallsConfigSchema, en as cacheDebugKeyFileSchema, er as getEvalCaseInput, et as apiCallsConfigSchema, fn as traceCacheRefSchema, fr as defineEval, ft as removeDefaultConfigSchema, gn as traceAttributeDisplaySchema, gt as trialSelectionModeSchema, hn as traceAttributeDisplayPlacementSchema, ht as runLogsConfigSchema, in as cacheListItemSchema, ir as mergeEvalOutput, it as evalDeriveConfigSchema, j as updateManualScoreRequestSchema, jn as runArtifactRefSchema, jt as runLogLevelSchema, k as configReloadStatusSchema, kn as numberDisplayOptionsSchema, kt as evalSummarySchema, ln as cacheStatusSchema, lr as setEvalOutput, lt as llmCallPricingRateSchema, mn as traceAttributeDisplayInputSchema, mt as resolveLlmCallsConfig, nn as cacheEntryWithDebugKeySchema, nr as incrementEvalOutput, nt as evalColumnOverrideSchema, on as cacheOperationTypeSchema, or as runInEvalRuntimeScope, ot as llmCallMetricFormatSchema, pn as traceAttributeDisplayFormatSchema, pr as getEvalRegistry, pt as resolveApiCallsConfig, q as runSummarySchema, qn as EvalAssertionError, qt as evalChartColorSchema, rn as cacheFileSchema, rr as isInEvalScope, rt as evalColumnsSchema, sn as cacheRecordingOpSchema, sr as runInEvalScope, st as llmCallMetricPlacementSchema, tn as cacheEntrySchema, tr as getEvalStartTime, tt as defaultConfigKeySchema, un as serializedCacheSpanSchema, ur as setScopeCacheContext, ut as llmCallPricingSchema, vn as traceDisplayInputConfigSchema, vt as buildEvalKey, wn as columnDefSchema, wt as discoveryIssueSchema, xn as traceSpanSchema, xt as assertionFailureSchema, yn as traceSpanErrorSchema, yt as getCaseRowCaseKey, z as applyDerivedCallAttributes, zn as deserializeCacheRecording, zt as manualInputMultilineFieldSchema } from "./runOrchestration-ZpN7xty_.mjs";
2
- import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-huuJbDNb.mjs";
3
- import "./src-1Qvuh0NH.mjs";
1
+ import { $ as apiCallMetricSchema, $n as getCurrentScope, $t as cacheDebugKeyEntrySchema, A as createRunRequestSchema, An as repoFileRefSchema, At as runLogEntrySchema, B as getNestedAttribute, Bn as deserializeCacheValue, Bt as manualInputNumberFieldSchema, Cn as cellValueSchema, Ct as caseRowSchema, Dn as fileRefSchema, Dt as evalStatItemSchema, En as columnKindSchema, Et as evalStatAggregateSchema, F as extractApiCalls, Fn as evalSpan, Ft as manualInputBooleanFieldSchema, G as deriveStatusFromChildStatuses, Gn as readManualInputFile, Gt as evalChartAxisSchema, H as getEvalDisplayStatus, Hn as serializeCacheValue, Ht as manualInputSelectOptionSchema, I as extractLlmCalls, In as evalTracer, It as manualInputDescriptorSchema, J as DEFAULT_API_CALLS_CONFIG, Jn as advanceEvalTime, Jt as evalChartConfigSchema, K as runManifestSchema, Kn as evalExpect, Kt as evalChartBuiltinMetricSchema, L as simulateLlmCallCost, Ln as hashCacheKey, Lt as manualInputFieldDescriptorSchema, M as sseEnvelopeSchema, Mn as z, Mt as runLogLocationSchema, N as extractCacheEntries, Nn as buildTraceTree, Nt as runLogPhaseSchema, O as configReloadStateSchema, On as jsonCellSchema, Ot as evalStatsConfigSchema, P as extractCacheHits, Pn as captureEvalSpanError, Pt as scoreTraceSchema, Q as apiCallMetricPlacementSchema, Qn as evalLog, Qt as evalChartsConfigSchema, R as simulateTokenAllocation, Rn as hashCacheKeySync, Rt as manualInputJsonFieldSchema, Sn as traceSpanWarningSchema, St as caseDetailSchema, Tn as columnFormatSchema, Tt as evalFreshnessStatusSchema, U as deriveScopedSummaryFromCases, Un as repoFile, Ut as manualInputTextFieldSchema, V as getEvalTitle, Vn as serializeCacheRecording, Vt as manualInputSelectFieldSchema, W as deriveStatusFromCaseRows, Wn as manualInputFileValueSchema, Wt as evalChartAggregateSchema, X as agentEvalsConfigSchema, Xt as evalChartTooltipExtraSchema, Y as DEFAULT_LLM_CALLS_CONFIG, Yn as appendToEvalOutput, Yt as evalChartMetricSchema, Z as apiCallMetricFormatSchema, Zn as evalAssert, Zt as evalChartTypeSchema, _n as traceDisplayConfigSchema, _t as buildCaseKey, an as cacheModeSchema, ar as nextEvalId, at as llmCallCostCurrencySchema, bn as traceSpanKindSchema, bt as getCaseRowEvalKey, cn as cacheRecordingSchema, cr as runInExistingEvalScope, ct as llmCallMetricSchema, dn as spanCacheOptionsSchema, dr as startEvalBackgroundJob, dt as llmCallsConfigSchema, en as cacheDebugKeyFileSchema, er as getEvalCaseInput, et as apiCallsConfigSchema, fn as traceCacheRefSchema, fr as defineEval, ft as removeDefaultConfigSchema, gn as traceAttributeDisplaySchema, gt as trialSelectionModeSchema, hn as traceAttributeDisplayPlacementSchema, ht as runLogsConfigSchema, in as cacheListItemSchema, ir as mergeEvalOutput, it as evalDeriveConfigSchema, j as updateManualScoreRequestSchema, jn as runArtifactRefSchema, jt as runLogLevelSchema, k as configReloadStatusSchema, kn as numberDisplayOptionsSchema, kt as evalSummarySchema, ln as cacheStatusSchema, lr as setEvalOutput, lt as llmCallPricingRateSchema, mn as traceAttributeDisplayInputSchema, mt as resolveLlmCallsConfig, nn as cacheEntryWithDebugKeySchema, nr as incrementEvalOutput, nt as evalColumnOverrideSchema, on as cacheOperationTypeSchema, or as runInEvalRuntimeScope, ot as llmCallMetricFormatSchema, pn as traceAttributeDisplayFormatSchema, pr as getEvalRegistry, pt as resolveApiCallsConfig, q as runSummarySchema, qn as EvalAssertionError, qt as evalChartColorSchema, rn as cacheFileSchema, rr as isInEvalScope, rt as evalColumnsSchema, sn as cacheRecordingOpSchema, sr as runInEvalScope, st as llmCallMetricPlacementSchema, tn as cacheEntrySchema, tr as getEvalStartTime, tt as defaultConfigKeySchema, un as serializedCacheSpanSchema, ur as setScopeCacheContext, ut as llmCallPricingSchema, vn as traceDisplayInputConfigSchema, vt as buildEvalKey, wn as columnDefSchema, wt as discoveryIssueSchema, xn as traceSpanSchema, xt as assertionFailureSchema, yn as traceSpanErrorSchema, yt as getCaseRowCaseKey, z as applyDerivedCallAttributes, zn as deserializeCacheRecording, zt as manualInputMultilineFieldSchema } from "./runOrchestration-CYhn4DJ0.mjs";
2
+ import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-CG66f0GN.mjs";
3
+ import "./src-B0b1vgk0.mjs";
4
4
  export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, cleanupStagedManualInputFiles, columnDefSchema, columnFormatSchema, columnKindSchema, configReloadStateSchema, configReloadStatusSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalColumnOverrideSchema, evalColumnsSchema, evalDeriveConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, jsonCellSchema, llmCallCostCurrencySchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingRateSchema, llmCallPricingSchema, llmCallsConfigSchema, manualInputBooleanFieldSchema, manualInputDescriptorSchema, manualInputFieldDescriptorSchema, manualInputFileValueSchema, manualInputJsonFieldSchema, manualInputMultilineFieldSchema, manualInputNumberFieldSchema, manualInputSelectFieldSchema, manualInputSelectOptionSchema, manualInputTextFieldSchema, materializeManualInputFiles, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, readManualInputFile, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, spanCacheOptionsSchema, sseEnvelopeSchema, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
package/dist/runChild.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { A as createRunRequestSchema, C as loadConfig, D as createFsCacheStore, It as manualInputDescriptorSchema, K as runManifestSchema, Ot as evalStatsConfigSchema, Qt as evalChartsConfigSchema, Xn as configureEvalRunLogs, q as runSummarySchema, r as getTargetEvals$1, t as executeRun, vt as buildEvalKey, wn as columnDefSchema, x as parseEvalDiscovery } from "./runOrchestration-ZpN7xty_.mjs";
1
+ import { A as createRunRequestSchema, C as loadConfig, D as createFsCacheStore, It as manualInputDescriptorSchema, K as runManifestSchema, Ot as evalStatsConfigSchema, Qt as evalChartsConfigSchema, Xn as configureEvalRunLogs, q as runSummarySchema, r as getTargetEvals$1, t as executeRun, vt as buildEvalKey, wn as columnDefSchema, x as parseEvalDiscovery } from "./runOrchestration-CYhn4DJ0.mjs";
2
2
  import { createHash } from "node:crypto";
3
3
  import { readFile } from "node:fs/promises";
4
4
  import { relative } from "node:path";