@ls-stack/agent-eval 0.33.0 → 0.35.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,8 +25,8 @@
25
25
  href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
26
26
  rel="stylesheet"
27
27
  />
28
- <script type="module" crossorigin src="/assets/index-BV_DM8fZ.js"></script>
29
- <link rel="stylesheet" crossorigin href="/assets/index-BPMMRktE.css">
28
+ <script type="module" crossorigin src="/assets/index-sWPMWjFJ.js"></script>
29
+ <link rel="stylesheet" crossorigin href="/assets/index-BJpxc61J.css">
30
30
  </head>
31
31
  <body>
32
32
  <div id="root"></div>
package/dist/bin.mjs CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { t as runCli } from "./cli-huuJbDNb.mjs";
2
+ import { t as runCli } from "./cli-CVwIjcsX.mjs";
3
3
  import { spawn } from "node:child_process";
4
4
  //#region src/bin.ts
5
5
  const moduleMocksFlag = "--experimental-test-module-mocks";
@@ -1,4 +1,4 @@
1
- import { C as loadConfig, D as createFsCacheStore, E as validateCharts, H as getEvalDisplayStatus, S as resolveEvalDefaultConfig, T as normalizeScoreDef, U as deriveScopedSummaryFromCases, V as getEvalTitle, _ as buildManualInputDescriptor, a as getLastRunStatuses, b as loadEvalModule, bt as getCaseRowEvalKey, c as loadPersistedRunSnapshots, d as persistRunState, f as recomputeEvalStatusesInRuns, g as resolveArtifactPath, h as resolveTracePresentation, i as generateRunId, l as nextShortIdFromSnapshots, m as runTouchesEval, mt as resolveLlmCallsConfig, n as getTargetEvalKeys, o as getLatestRunInfos, p as recomputePersistedCaseStatus, pr as getEvalRegistry, pt as resolveApiCallsConfig, q as runSummarySchema, s as loadPersistedRunSnapshot, u as persistCaseDetail, v as parseManualInputValues, vt as buildEvalKey, w as buildDeclaredColumnDefs, x as parseEvalDiscovery, y as deriveEvalFreshness, yt as getCaseRowCaseKey, z as applyDerivedCallAttributes } from "./runOrchestration-ZpN7xty_.mjs";
1
+ import { C as loadConfig, D as createFsCacheStore, E as validateCharts, H as getEvalDisplayStatus, S as resolveEvalDefaultConfig, T as normalizeScoreDef, U as deriveScopedSummaryFromCases, V as getEvalTitle, _ as buildManualInputDescriptor, a as getLastRunStatuses, b as loadEvalModule, bt as getCaseRowEvalKey, c as loadPersistedRunSnapshots, d as persistRunState, f as recomputeEvalStatusesInRuns, g as resolveArtifactPath, h as resolveTracePresentation, i as generateRunId, l as nextShortIdFromSnapshots, m as runTouchesEval, mt as resolveLlmCallsConfig, n as getTargetEvalKeys, o as getLatestRunInfos, p as recomputePersistedCaseStatus, pr as getEvalRegistry, pt as resolveApiCallsConfig, q as runSummarySchema, s as loadPersistedRunSnapshot, u as persistCaseDetail, v as parseManualInputValues, vt as buildEvalKey, w as buildDeclaredColumnDefs, x as parseEvalDiscovery, y as deriveEvalFreshness, yt as getCaseRowCaseKey, z as applyDerivedCallAttributes } from "./runOrchestration-DoslE_Oo.mjs";
2
2
  import { createHash, randomUUID } from "node:crypto";
3
3
  import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
4
4
  import { basename, dirname, extname, isAbsolute, join, relative, resolve, sep } from "node:path";
@@ -1940,8 +1940,8 @@ async function commandApp(args) {
1940
1940
  const { serve } = await import("@hono/node-server");
1941
1941
  const bundledWebDist = resolve(currentDir, "apps/web/dist");
1942
1942
  if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
1943
- const appModule = await import("./app-Dc6vvHRL.mjs");
1944
- const runnerModule = await import("./runner-BPXPvinB.mjs");
1943
+ const appModule = await import("./app-CcZv9l_q.mjs");
1944
+ const runnerModule = await import("./runner-DA_o115w.mjs");
1945
1945
  if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
1946
1946
  if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
1947
1947
  await runnerModule.initRunner();
package/dist/index.d.mts CHANGED
@@ -2908,13 +2908,13 @@ type ColumnKind = z$1.infer<typeof columnKindSchema>;
2908
2908
  declare const columnFormatSchema: z$1.ZodEnum<{
2909
2909
  number: "number";
2910
2910
  boolean: "boolean";
2911
- duration: "duration";
2912
- json: "json";
2913
2911
  file: "file";
2914
2912
  markdown: "markdown";
2913
+ json: "json";
2915
2914
  image: "image";
2916
2915
  audio: "audio";
2917
2916
  video: "video";
2917
+ duration: "duration";
2918
2918
  percent: "percent";
2919
2919
  passFail: "passFail";
2920
2920
  stars: "stars";
@@ -2933,13 +2933,13 @@ declare const columnDefSchema: z$1.ZodObject<{
2933
2933
  format: z$1.ZodOptional<z$1.ZodEnum<{
2934
2934
  number: "number";
2935
2935
  boolean: "boolean";
2936
- duration: "duration";
2937
- json: "json";
2938
2936
  file: "file";
2939
2937
  markdown: "markdown";
2938
+ json: "json";
2940
2939
  image: "image";
2941
2940
  audio: "audio";
2942
2941
  video: "video";
2942
+ duration: "duration";
2943
2943
  percent: "percent";
2944
2944
  passFail: "passFail";
2945
2945
  stars: "stars";
@@ -2984,8 +2984,8 @@ declare const traceSpanKindSchema: z$1.ZodString;
2984
2984
  declare const traceAttributeDisplayFormatSchema: z$1.ZodEnum<{
2985
2985
  string: "string";
2986
2986
  number: "number";
2987
- duration: "duration";
2988
2987
  json: "json";
2988
+ duration: "duration";
2989
2989
  }>;
2990
2990
  /**
2991
2991
  * Formatting hint for trace attribute values rendered by the UI.
@@ -3009,8 +3009,8 @@ declare const traceAttributeDisplaySchema: z$1.ZodObject<{
3009
3009
  format: z$1.ZodOptional<z$1.ZodEnum<{
3010
3010
  string: "string";
3011
3011
  number: "number";
3012
- duration: "duration";
3013
3012
  json: "json";
3013
+ duration: "duration";
3014
3014
  }>>;
3015
3015
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
3016
3016
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -3045,8 +3045,8 @@ declare const traceDisplayConfigSchema: z$1.ZodObject<{
3045
3045
  format: z$1.ZodOptional<z$1.ZodEnum<{
3046
3046
  string: "string";
3047
3047
  number: "number";
3048
- duration: "duration";
3049
3048
  json: "json";
3049
+ duration: "duration";
3050
3050
  }>>;
3051
3051
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
3052
3052
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -3085,8 +3085,8 @@ declare const traceAttributeDisplayInputSchema: z$1.ZodObject<{
3085
3085
  format: z$1.ZodOptional<z$1.ZodEnum<{
3086
3086
  string: "string";
3087
3087
  number: "number";
3088
- duration: "duration";
3089
3088
  json: "json";
3089
+ duration: "duration";
3090
3090
  }>>;
3091
3091
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
3092
3092
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -3123,8 +3123,8 @@ declare const traceDisplayInputConfigSchema: z$1.ZodObject<{
3123
3123
  format: z$1.ZodOptional<z$1.ZodEnum<{
3124
3124
  string: "string";
3125
3125
  number: "number";
3126
- duration: "duration";
3127
3126
  json: "json";
3127
+ duration: "duration";
3128
3128
  }>>;
3129
3129
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
3130
3130
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -3255,13 +3255,13 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
3255
3255
  format: z$1.ZodOptional<z$1.ZodEnum<{
3256
3256
  number: "number";
3257
3257
  boolean: "boolean";
3258
- duration: "duration";
3259
- json: "json";
3260
3258
  file: "file";
3261
3259
  markdown: "markdown";
3260
+ json: "json";
3262
3261
  image: "image";
3263
3262
  audio: "audio";
3264
3263
  video: "video";
3264
+ duration: "duration";
3265
3265
  percent: "percent";
3266
3266
  passFail: "passFail";
3267
3267
  stars: "stars";
@@ -3297,13 +3297,13 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
3297
3297
  format: z$1.ZodOptional<z$1.ZodEnum<{
3298
3298
  number: "number";
3299
3299
  boolean: "boolean";
3300
- duration: "duration";
3301
- json: "json";
3302
3300
  file: "file";
3303
3301
  markdown: "markdown";
3302
+ json: "json";
3304
3303
  image: "image";
3305
3304
  audio: "audio";
3306
3305
  video: "video";
3306
+ duration: "duration";
3307
3307
  percent: "percent";
3308
3308
  passFail: "passFail";
3309
3309
  stars: "stars";
@@ -3340,13 +3340,13 @@ declare const evalSummarySchema: z$1.ZodObject<{
3340
3340
  format: z$1.ZodOptional<z$1.ZodEnum<{
3341
3341
  number: "number";
3342
3342
  boolean: "boolean";
3343
- duration: "duration";
3344
- json: "json";
3345
3343
  file: "file";
3346
3344
  markdown: "markdown";
3345
+ json: "json";
3347
3346
  image: "image";
3348
3347
  audio: "audio";
3349
3348
  video: "video";
3349
+ duration: "duration";
3350
3350
  percent: "percent";
3351
3351
  passFail: "passFail";
3352
3352
  stars: "stars";
@@ -3398,13 +3398,13 @@ declare const evalSummarySchema: z$1.ZodObject<{
3398
3398
  format: z$1.ZodOptional<z$1.ZodEnum<{
3399
3399
  number: "number";
3400
3400
  boolean: "boolean";
3401
- duration: "duration";
3402
- json: "json";
3403
3401
  file: "file";
3404
3402
  markdown: "markdown";
3403
+ json: "json";
3405
3404
  image: "image";
3406
3405
  audio: "audio";
3407
3406
  video: "video";
3407
+ duration: "duration";
3408
3408
  percent: "percent";
3409
3409
  passFail: "passFail";
3410
3410
  stars: "stars";
@@ -3429,8 +3429,8 @@ declare const evalSummarySchema: z$1.ZodObject<{
3429
3429
  }>;
3430
3430
  label: z$1.ZodOptional<z$1.ZodString>;
3431
3431
  color: z$1.ZodOptional<z$1.ZodEnum<{
3432
- error: "error";
3433
3432
  success: "success";
3433
+ error: "error";
3434
3434
  warning: "warning";
3435
3435
  accent: "accent";
3436
3436
  accentDim: "accentDim";
@@ -3453,8 +3453,8 @@ declare const evalSummarySchema: z$1.ZodObject<{
3453
3453
  }>;
3454
3454
  label: z$1.ZodOptional<z$1.ZodString>;
3455
3455
  color: z$1.ZodOptional<z$1.ZodEnum<{
3456
- error: "error";
3457
3456
  success: "success";
3457
+ error: "error";
3458
3458
  warning: "warning";
3459
3459
  accent: "accent";
3460
3460
  accentDim: "accentDim";
@@ -3718,8 +3718,8 @@ declare const scoreTraceSchema: z$1.ZodObject<{
3718
3718
  format: z$1.ZodOptional<z$1.ZodEnum<{
3719
3719
  string: "string";
3720
3720
  number: "number";
3721
- duration: "duration";
3722
3721
  json: "json";
3722
+ duration: "duration";
3723
3723
  }>>;
3724
3724
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
3725
3725
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -3804,8 +3804,8 @@ declare const caseDetailSchema: z$1.ZodObject<{
3804
3804
  format: z$1.ZodOptional<z$1.ZodEnum<{
3805
3805
  string: "string";
3806
3806
  number: "number";
3807
- duration: "duration";
3808
3807
  json: "json";
3808
+ duration: "duration";
3809
3809
  }>>;
3810
3810
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
3811
3811
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -3873,8 +3873,8 @@ declare const caseDetailSchema: z$1.ZodObject<{
3873
3873
  format: z$1.ZodOptional<z$1.ZodEnum<{
3874
3874
  string: "string";
3875
3875
  number: "number";
3876
- duration: "duration";
3877
3876
  json: "json";
3877
+ duration: "duration";
3878
3878
  }>>;
3879
3879
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
3880
3880
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -4037,8 +4037,8 @@ type EvalChartAggregate = z$1.infer<typeof evalChartAggregateSchema>;
4037
4037
  * not emit raw hex so authored evals stay decoupled from the web theme.
4038
4038
  */
4039
4039
  declare const evalChartColorSchema: z$1.ZodEnum<{
4040
- error: "error";
4041
4040
  success: "success";
4041
+ error: "error";
4042
4042
  warning: "warning";
4043
4043
  accent: "accent";
4044
4044
  accentDim: "accentDim";
@@ -4066,8 +4066,8 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
4066
4066
  }>;
4067
4067
  label: z$1.ZodOptional<z$1.ZodString>;
4068
4068
  color: z$1.ZodOptional<z$1.ZodEnum<{
4069
- error: "error";
4070
4069
  success: "success";
4070
+ error: "error";
4071
4071
  warning: "warning";
4072
4072
  accent: "accent";
4073
4073
  accentDim: "accentDim";
@@ -4090,8 +4090,8 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
4090
4090
  }>;
4091
4091
  label: z$1.ZodOptional<z$1.ZodString>;
4092
4092
  color: z$1.ZodOptional<z$1.ZodEnum<{
4093
- error: "error";
4094
4093
  success: "success";
4094
+ error: "error";
4095
4095
  warning: "warning";
4096
4096
  accent: "accent";
4097
4097
  accentDim: "accentDim";
@@ -4149,8 +4149,8 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
4149
4149
  }>;
4150
4150
  label: z$1.ZodOptional<z$1.ZodString>;
4151
4151
  color: z$1.ZodOptional<z$1.ZodEnum<{
4152
- error: "error";
4153
4152
  success: "success";
4153
+ error: "error";
4154
4154
  warning: "warning";
4155
4155
  accent: "accent";
4156
4156
  accentDim: "accentDim";
@@ -4173,8 +4173,8 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
4173
4173
  }>;
4174
4174
  label: z$1.ZodOptional<z$1.ZodString>;
4175
4175
  color: z$1.ZodOptional<z$1.ZodEnum<{
4176
- error: "error";
4177
4176
  success: "success";
4177
+ error: "error";
4178
4178
  warning: "warning";
4179
4179
  accent: "accent";
4180
4180
  accentDim: "accentDim";
@@ -4239,8 +4239,8 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
4239
4239
  }>;
4240
4240
  label: z$1.ZodOptional<z$1.ZodString>;
4241
4241
  color: z$1.ZodOptional<z$1.ZodEnum<{
4242
- error: "error";
4243
4242
  success: "success";
4243
+ error: "error";
4244
4244
  warning: "warning";
4245
4245
  accent: "accent";
4246
4246
  accentDim: "accentDim";
@@ -4263,8 +4263,8 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
4263
4263
  }>;
4264
4264
  label: z$1.ZodOptional<z$1.ZodString>;
4265
4265
  color: z$1.ZodOptional<z$1.ZodEnum<{
4266
- error: "error";
4267
4266
  success: "success";
4267
+ error: "error";
4268
4268
  warning: "warning";
4269
4269
  accent: "accent";
4270
4270
  accentDim: "accentDim";
@@ -4574,8 +4574,8 @@ declare const llmCallMetricFormatSchema: z$1.ZodEnum<{
4574
4574
  string: "string";
4575
4575
  number: "number";
4576
4576
  boolean: "boolean";
4577
- duration: "duration";
4578
4577
  json: "json";
4578
+ duration: "duration";
4579
4579
  }>;
4580
4580
  /** Render format applied to an LLM-call metric value. */
4581
4581
  type LlmCallMetricFormat = z$1.infer<typeof llmCallMetricFormatSchema>;
@@ -4584,8 +4584,8 @@ declare const apiCallMetricFormatSchema: z$1.ZodEnum<{
4584
4584
  string: "string";
4585
4585
  number: "number";
4586
4586
  boolean: "boolean";
4587
- duration: "duration";
4588
4587
  json: "json";
4588
+ duration: "duration";
4589
4589
  }>;
4590
4590
  /** Render format applied to an API-call metric value. */
4591
4591
  type ApiCallMetricFormat = z$1.infer<typeof apiCallMetricFormatSchema>;
@@ -4654,8 +4654,8 @@ declare const llmCallMetricSchema: z$1.ZodObject<{
4654
4654
  string: "string";
4655
4655
  number: "number";
4656
4656
  boolean: "boolean";
4657
- duration: "duration";
4658
4657
  json: "json";
4658
+ duration: "duration";
4659
4659
  }>>;
4660
4660
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
4661
4661
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -4683,8 +4683,8 @@ declare const apiCallMetricSchema: z$1.ZodObject<{
4683
4683
  string: "string";
4684
4684
  number: "number";
4685
4685
  boolean: "boolean";
4686
- duration: "duration";
4687
4686
  json: "json";
4687
+ duration: "duration";
4688
4688
  }>>;
4689
4689
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
4690
4690
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -4797,8 +4797,8 @@ declare const llmCallsConfigSchema: z$1.ZodObject<{
4797
4797
  string: "string";
4798
4798
  number: "number";
4799
4799
  boolean: "boolean";
4800
- duration: "duration";
4801
4800
  json: "json";
4801
+ duration: "duration";
4802
4802
  }>>;
4803
4803
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
4804
4804
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -4833,8 +4833,8 @@ declare const apiCallsConfigSchema: z$1.ZodObject<{
4833
4833
  string: "string";
4834
4834
  number: "number";
4835
4835
  boolean: "boolean";
4836
- duration: "duration";
4837
4836
  json: "json";
4837
+ duration: "duration";
4838
4838
  }>>;
4839
4839
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
4840
4840
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -5135,8 +5135,8 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
5135
5135
  format: z$1.ZodOptional<z$1.ZodEnum<{
5136
5136
  string: "string";
5137
5137
  number: "number";
5138
- duration: "duration";
5139
5138
  json: "json";
5139
+ duration: "duration";
5140
5140
  }>>;
5141
5141
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
5142
5142
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -5183,13 +5183,13 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
5183
5183
  format: z$1.ZodOptional<z$1.ZodEnum<{
5184
5184
  number: "number";
5185
5185
  boolean: "boolean";
5186
- duration: "duration";
5187
- json: "json";
5188
5186
  file: "file";
5189
5187
  markdown: "markdown";
5188
+ json: "json";
5190
5189
  image: "image";
5191
5190
  audio: "audio";
5192
5191
  video: "video";
5192
+ duration: "duration";
5193
5193
  percent: "percent";
5194
5194
  passFail: "passFail";
5195
5195
  stars: "stars";
@@ -5248,8 +5248,8 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
5248
5248
  string: "string";
5249
5249
  number: "number";
5250
5250
  boolean: "boolean";
5251
- duration: "duration";
5252
5251
  json: "json";
5252
+ duration: "duration";
5253
5253
  }>>;
5254
5254
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
5255
5255
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -5293,8 +5293,8 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
5293
5293
  string: "string";
5294
5294
  number: "number";
5295
5295
  boolean: "boolean";
5296
- duration: "duration";
5297
5296
  json: "json";
5297
+ duration: "duration";
5298
5298
  }>>;
5299
5299
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
5300
5300
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as apiCallMetricSchema, $n as getCurrentScope, $t as cacheDebugKeyEntrySchema, A as createRunRequestSchema, An as repoFileRefSchema, At as runLogEntrySchema, B as getNestedAttribute, Bn as deserializeCacheValue, Bt as manualInputNumberFieldSchema, Cn as cellValueSchema, Ct as caseRowSchema, Dn as fileRefSchema, Dt as evalStatItemSchema, En as columnKindSchema, Et as evalStatAggregateSchema, F as extractApiCalls, Fn as evalSpan, Ft as manualInputBooleanFieldSchema, G as deriveStatusFromChildStatuses, Gn as readManualInputFile, Gt as evalChartAxisSchema, H as getEvalDisplayStatus, Hn as serializeCacheValue, Ht as manualInputSelectOptionSchema, I as extractLlmCalls, In as evalTracer, It as manualInputDescriptorSchema, J as DEFAULT_API_CALLS_CONFIG, Jn as advanceEvalTime, Jt as evalChartConfigSchema, K as runManifestSchema, Kn as evalExpect, Kt as evalChartBuiltinMetricSchema, L as simulateLlmCallCost, Ln as hashCacheKey, Lt as manualInputFieldDescriptorSchema, M as sseEnvelopeSchema, Mn as z, Mt as runLogLocationSchema, N as extractCacheEntries, Nn as buildTraceTree, Nt as runLogPhaseSchema, O as configReloadStateSchema, On as jsonCellSchema, Ot as evalStatsConfigSchema, P as extractCacheHits, Pn as captureEvalSpanError, Pt as scoreTraceSchema, Q as apiCallMetricPlacementSchema, Qn as evalLog, Qt as evalChartsConfigSchema, R as simulateTokenAllocation, Rn as hashCacheKeySync, Rt as manualInputJsonFieldSchema, Sn as traceSpanWarningSchema, St as caseDetailSchema, Tn as columnFormatSchema, Tt as evalFreshnessStatusSchema, U as deriveScopedSummaryFromCases, Un as repoFile, Ut as manualInputTextFieldSchema, V as getEvalTitle, Vn as serializeCacheRecording, Vt as manualInputSelectFieldSchema, W as deriveStatusFromCaseRows, Wn as manualInputFileValueSchema, Wt as evalChartAggregateSchema, X as agentEvalsConfigSchema, Xt as evalChartTooltipExtraSchema, Y as DEFAULT_LLM_CALLS_CONFIG, Yn as appendToEvalOutput, Yt as evalChartMetricSchema, Z as apiCallMetricFormatSchema, Zn as evalAssert, Zt as evalChartTypeSchema, _n as traceDisplayConfigSchema, _t as buildCaseKey, an as cacheModeSchema, ar as nextEvalId, at as llmCallCostCurrencySchema, bn as traceSpanKindSchema, bt as getCaseRowEvalKey, cn as cacheRecordingSchema, cr as runInExistingEvalScope, ct as llmCallMetricSchema, dn as spanCacheOptionsSchema, dr as startEvalBackgroundJob, dt as llmCallsConfigSchema, en as cacheDebugKeyFileSchema, er as getEvalCaseInput, et as apiCallsConfigSchema, fn as traceCacheRefSchema, fr as defineEval, ft as removeDefaultConfigSchema, gn as traceAttributeDisplaySchema, gt as trialSelectionModeSchema, hn as traceAttributeDisplayPlacementSchema, ht as runLogsConfigSchema, in as cacheListItemSchema, ir as mergeEvalOutput, it as evalDeriveConfigSchema, j as updateManualScoreRequestSchema, jn as runArtifactRefSchema, jt as runLogLevelSchema, k as configReloadStatusSchema, kn as numberDisplayOptionsSchema, kt as evalSummarySchema, ln as cacheStatusSchema, lr as setEvalOutput, lt as llmCallPricingRateSchema, mn as traceAttributeDisplayInputSchema, mt as resolveLlmCallsConfig, nn as cacheEntryWithDebugKeySchema, nr as incrementEvalOutput, nt as evalColumnOverrideSchema, on as cacheOperationTypeSchema, or as runInEvalRuntimeScope, ot as llmCallMetricFormatSchema, pn as traceAttributeDisplayFormatSchema, pr as getEvalRegistry, pt as resolveApiCallsConfig, q as runSummarySchema, qn as EvalAssertionError, qt as evalChartColorSchema, rn as cacheFileSchema, rr as isInEvalScope, rt as evalColumnsSchema, sn as cacheRecordingOpSchema, sr as runInEvalScope, st as llmCallMetricPlacementSchema, tn as cacheEntrySchema, tr as getEvalStartTime, tt as defaultConfigKeySchema, un as serializedCacheSpanSchema, ur as setScopeCacheContext, ut as llmCallPricingSchema, vn as traceDisplayInputConfigSchema, vt as buildEvalKey, wn as columnDefSchema, wt as discoveryIssueSchema, xn as traceSpanSchema, xt as assertionFailureSchema, yn as traceSpanErrorSchema, yt as getCaseRowCaseKey, z as applyDerivedCallAttributes, zn as deserializeCacheRecording, zt as manualInputMultilineFieldSchema } from "./runOrchestration-ZpN7xty_.mjs";
2
- import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-huuJbDNb.mjs";
3
- import "./src-1Qvuh0NH.mjs";
1
+ import { $ as apiCallMetricSchema, $n as getCurrentScope, $t as cacheDebugKeyEntrySchema, A as createRunRequestSchema, An as repoFileRefSchema, At as runLogEntrySchema, B as getNestedAttribute, Bn as deserializeCacheValue, Bt as manualInputNumberFieldSchema, Cn as cellValueSchema, Ct as caseRowSchema, Dn as fileRefSchema, Dt as evalStatItemSchema, En as columnKindSchema, Et as evalStatAggregateSchema, F as extractApiCalls, Fn as evalSpan, Ft as manualInputBooleanFieldSchema, G as deriveStatusFromChildStatuses, Gn as readManualInputFile, Gt as evalChartAxisSchema, H as getEvalDisplayStatus, Hn as serializeCacheValue, Ht as manualInputSelectOptionSchema, I as extractLlmCalls, In as evalTracer, It as manualInputDescriptorSchema, J as DEFAULT_API_CALLS_CONFIG, Jn as advanceEvalTime, Jt as evalChartConfigSchema, K as runManifestSchema, Kn as evalExpect, Kt as evalChartBuiltinMetricSchema, L as simulateLlmCallCost, Ln as hashCacheKey, Lt as manualInputFieldDescriptorSchema, M as sseEnvelopeSchema, Mn as z, Mt as runLogLocationSchema, N as extractCacheEntries, Nn as buildTraceTree, Nt as runLogPhaseSchema, O as configReloadStateSchema, On as jsonCellSchema, Ot as evalStatsConfigSchema, P as extractCacheHits, Pn as captureEvalSpanError, Pt as scoreTraceSchema, Q as apiCallMetricPlacementSchema, Qn as evalLog, Qt as evalChartsConfigSchema, R as simulateTokenAllocation, Rn as hashCacheKeySync, Rt as manualInputJsonFieldSchema, Sn as traceSpanWarningSchema, St as caseDetailSchema, Tn as columnFormatSchema, Tt as evalFreshnessStatusSchema, U as deriveScopedSummaryFromCases, Un as repoFile, Ut as manualInputTextFieldSchema, V as getEvalTitle, Vn as serializeCacheRecording, Vt as manualInputSelectFieldSchema, W as deriveStatusFromCaseRows, Wn as manualInputFileValueSchema, Wt as evalChartAggregateSchema, X as agentEvalsConfigSchema, Xt as evalChartTooltipExtraSchema, Y as DEFAULT_LLM_CALLS_CONFIG, Yn as appendToEvalOutput, Yt as evalChartMetricSchema, Z as apiCallMetricFormatSchema, Zn as evalAssert, Zt as evalChartTypeSchema, _n as traceDisplayConfigSchema, _t as buildCaseKey, an as cacheModeSchema, ar as nextEvalId, at as llmCallCostCurrencySchema, bn as traceSpanKindSchema, bt as getCaseRowEvalKey, cn as cacheRecordingSchema, cr as runInExistingEvalScope, ct as llmCallMetricSchema, dn as spanCacheOptionsSchema, dr as startEvalBackgroundJob, dt as llmCallsConfigSchema, en as cacheDebugKeyFileSchema, er as getEvalCaseInput, et as apiCallsConfigSchema, fn as traceCacheRefSchema, fr as defineEval, ft as removeDefaultConfigSchema, gn as traceAttributeDisplaySchema, gt as trialSelectionModeSchema, hn as traceAttributeDisplayPlacementSchema, ht as runLogsConfigSchema, in as cacheListItemSchema, ir as mergeEvalOutput, it as evalDeriveConfigSchema, j as updateManualScoreRequestSchema, jn as runArtifactRefSchema, jt as runLogLevelSchema, k as configReloadStatusSchema, kn as numberDisplayOptionsSchema, kt as evalSummarySchema, ln as cacheStatusSchema, lr as setEvalOutput, lt as llmCallPricingRateSchema, mn as traceAttributeDisplayInputSchema, mt as resolveLlmCallsConfig, nn as cacheEntryWithDebugKeySchema, nr as incrementEvalOutput, nt as evalColumnOverrideSchema, on as cacheOperationTypeSchema, or as runInEvalRuntimeScope, ot as llmCallMetricFormatSchema, pn as traceAttributeDisplayFormatSchema, pr as getEvalRegistry, pt as resolveApiCallsConfig, q as runSummarySchema, qn as EvalAssertionError, qt as evalChartColorSchema, rn as cacheFileSchema, rr as isInEvalScope, rt as evalColumnsSchema, sn as cacheRecordingOpSchema, sr as runInEvalScope, st as llmCallMetricPlacementSchema, tn as cacheEntrySchema, tr as getEvalStartTime, tt as defaultConfigKeySchema, un as serializedCacheSpanSchema, ur as setScopeCacheContext, ut as llmCallPricingSchema, vn as traceDisplayInputConfigSchema, vt as buildEvalKey, wn as columnDefSchema, wt as discoveryIssueSchema, xn as traceSpanSchema, xt as assertionFailureSchema, yn as traceSpanErrorSchema, yt as getCaseRowCaseKey, z as applyDerivedCallAttributes, zn as deserializeCacheRecording, zt as manualInputMultilineFieldSchema } from "./runOrchestration-DoslE_Oo.mjs";
2
+ import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-CVwIjcsX.mjs";
3
+ import "./src-Bcc2ZHK8.mjs";
4
4
  export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, cleanupStagedManualInputFiles, columnDefSchema, columnFormatSchema, columnKindSchema, configReloadStateSchema, configReloadStatusSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalColumnOverrideSchema, evalColumnsSchema, evalDeriveConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, jsonCellSchema, llmCallCostCurrencySchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingRateSchema, llmCallPricingSchema, llmCallsConfigSchema, manualInputBooleanFieldSchema, manualInputDescriptorSchema, manualInputFieldDescriptorSchema, manualInputFileValueSchema, manualInputJsonFieldSchema, manualInputMultilineFieldSchema, manualInputNumberFieldSchema, manualInputSelectFieldSchema, manualInputSelectOptionSchema, manualInputTextFieldSchema, materializeManualInputFiles, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, readManualInputFile, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, spanCacheOptionsSchema, sseEnvelopeSchema, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
package/dist/runChild.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { A as createRunRequestSchema, C as loadConfig, D as createFsCacheStore, It as manualInputDescriptorSchema, K as runManifestSchema, Ot as evalStatsConfigSchema, Qt as evalChartsConfigSchema, Xn as configureEvalRunLogs, q as runSummarySchema, r as getTargetEvals$1, t as executeRun, vt as buildEvalKey, wn as columnDefSchema, x as parseEvalDiscovery } from "./runOrchestration-ZpN7xty_.mjs";
1
+ import { A as createRunRequestSchema, C as loadConfig, D as createFsCacheStore, It as manualInputDescriptorSchema, K as runManifestSchema, Ot as evalStatsConfigSchema, Qt as evalChartsConfigSchema, Xn as configureEvalRunLogs, q as runSummarySchema, r as getTargetEvals$1, t as executeRun, vt as buildEvalKey, wn as columnDefSchema, x as parseEvalDiscovery } from "./runOrchestration-DoslE_Oo.mjs";
2
2
  import { createHash } from "node:crypto";
3
3
  import { readFile } from "node:fs/promises";
4
4
  import { relative } from "node:path";
@@ -1688,7 +1688,7 @@ function createTraceCache(generateSpanId) {
1688
1688
  const hit = await cacheCtx.adapter.lookup(namespace, keyHash);
1689
1689
  if (hit) {
1690
1690
  const storedAt = hit.storedAt;
1691
- const age = Date.now() - new Date(storedAt).getTime();
1691
+ const age = getRealDateNowMs() - new Date(storedAt).getTime();
1692
1692
  recordCacheRef(scope, activeSpan, {
1693
1693
  type: "value",
1694
1694
  name: info.name,
@@ -2149,7 +2149,7 @@ async function traceSpanInternal(info, fn) {
2149
2149
  mergeSpanAttributes(spanRecord, {
2150
2150
  "cache.status": "hit",
2151
2151
  "cache.storedAt": storedAt,
2152
- "cache.age": Date.now() - new Date(storedAt).getTime()
2152
+ "cache.age": getRealDateNowMs() - new Date(storedAt).getTime()
2153
2153
  });
2154
2154
  const recording = deserializeCacheRecording(hit.recording);
2155
2155
  replayRecording(scope, spanRecord, recording, { generateSpanId });
@@ -5025,30 +5025,6 @@ function mergeOverrides(base, override) {
5025
5025
  };
5026
5026
  }
5027
5027
  /**
5028
- * Populate `target` with `ColumnDef` entries for any keys in `columns`
5029
- * that aren't already present, applying user-supplied `overrides` and
5030
- * flagging score columns declared via `scores`.
5031
- */
5032
- function mergeColumnDefs(target, columns, overrides, scores, manualScores) {
5033
- const scoreKeys = new Set(Object.keys(scores ?? {}));
5034
- const manualScoreKeys = new Set(Object.keys(manualScores ?? {}));
5035
- const overrideMap = overrides ?? {};
5036
- for (const [key, value] of Object.entries(columns)) {
5037
- if (target.has(key)) continue;
5038
- const override = mergeOverrides(getScoreOverride(scores?.[key]) ?? manualScores?.[key], overrideMap[key]);
5039
- const isScore = scoreKeys.has(key) || manualScoreKeys.has(key);
5040
- target.set(key, createColumnDef({
5041
- key,
5042
- override,
5043
- scoreDef: scores?.[key],
5044
- manualScoreDef: manualScores?.[key],
5045
- inferredKind: isScore ? "number" : inferKind(value),
5046
- isScore,
5047
- isManualScore: manualScoreKeys.has(key)
5048
- }));
5049
- }
5050
- }
5051
- /**
5052
5028
  * Build the column definitions declared directly on an eval before any runtime
5053
5029
  * output values exist. This lets discovery metadata describe authored rich
5054
5030
  * output columns even for runs created by another process.
@@ -5092,29 +5068,18 @@ function buildDeclaredColumnDefs(overrides, scores, manualScores) {
5092
5068
  }
5093
5069
  return [...declaredDefs.values()];
5094
5070
  }
5095
- /** Infer a `ColumnKind` from a runtime value when no override is set. */
5096
- function inferKind(value) {
5097
- if (typeof value === "number") return "number";
5098
- if (typeof value === "boolean") return "boolean";
5099
- return "string";
5100
- }
5101
5071
  /**
5102
5072
  * Coerce an arbitrary runtime value into a serializable `CellValue`.
5103
- * Non-primitive values fall back to `JSON.stringify`.
5073
+ * Runtime values use the SDK's tagged serializer so saved run artifacts keep
5074
+ * structured data instead of storing JSON strings. Native binary/file root
5075
+ * values are handled before this helper.
5104
5076
  */
5105
- function toCellValue(value, override = void 0) {
5106
- if (value === null) return null;
5107
- if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") return value;
5108
- if (value === void 0) return void 0;
5109
- if (override?.format === "image" || override?.format === "audio" || override?.format === "video" || override?.format === "file") {
5110
- const parsed = fileRefSchema.safeParse(value);
5111
- if (parsed.success) return parsed.data;
5112
- }
5113
- if (override?.format === "json") {
5114
- const parsed = jsonCellSchema.safeParse(value);
5115
- if (parsed.success) return parsed.data;
5116
- }
5117
- return JSON.stringify(value);
5077
+ async function toCellValue(value) {
5078
+ const fileRef = fileRefSchema.safeParse(value);
5079
+ if (fileRef.success) return fileRef.data;
5080
+ const serialized = await serializeCacheValue(value, { preserveUndefined: true });
5081
+ const parsed = jsonCellSchema.safeParse(serialized);
5082
+ if (parsed.success) return parsed.data;
5118
5083
  }
5119
5084
  function inferKindFromFormat(format) {
5120
5085
  if (format === "boolean") return "boolean";
@@ -6586,7 +6551,7 @@ async function runDeriveFromTracingConfig(params) {
6586
6551
  }
6587
6552
  }
6588
6553
  async function runCase(params) {
6589
- const { evalDef, evalId, evalKey = evalId, evalCase, globalTraceDisplay, globalColumns, globalDeriveFromTracing, llmCallsConfig = resolveLlmCallsConfig(void 0), apiCallsConfig = resolveApiCallsConfig(void 0), globalRemoveDefaultConfig, trial, startTime, cacheAdapter, cacheMode, moduleIsolation, evalFilePath, evalFileRelativePath = evalFilePath, workspaceRoot, artifactDir, runId } = params;
6554
+ const { evalDef, evalId, evalKey = evalId, evalCase, globalTraceDisplay, globalDeriveFromTracing, llmCallsConfig = resolveLlmCallsConfig(void 0), apiCallsConfig = resolveApiCallsConfig(void 0), globalRemoveDefaultConfig, trial, startTime, cacheAdapter, cacheMode, moduleIsolation, evalFilePath, evalFileRelativePath = evalFilePath, workspaceRoot, artifactDir, runId } = params;
6590
6555
  const scopedIdPrefix = buildScopedEvalIdPrefix({
6591
6556
  evalId,
6592
6557
  evalFilePath,
@@ -6739,12 +6704,6 @@ async function runCase(params) {
6739
6704
  const status = nonAssertError ? "error" : passed ? "pass" : "fail";
6740
6705
  const { trace: displayTrace, traceDisplay } = resolveTracePresentation(spansWithDerivedAttributes, globalTraceDisplay, evalDef.traceDisplay);
6741
6706
  const columns = {};
6742
- const columnOverrides = mergeDefaultColumns({
6743
- globalColumns,
6744
- columns: evalDef.columns,
6745
- globalRemove: globalRemoveDefaultConfig,
6746
- evalRemove: evalDef.removeDefaultConfig
6747
- });
6748
6707
  for (const [key, value] of Object.entries(scope.outputs)) {
6749
6708
  const cell = isBlob(value) ? await persistInlineArtifact({
6750
6709
  artifactDir,
@@ -6753,7 +6712,7 @@ async function runCase(params) {
6753
6712
  outputKey: key,
6754
6713
  trial,
6755
6714
  value
6756
- }) : toCellValue(value, columnOverrides?.[key]);
6715
+ }) : await toCellValue(value);
6757
6716
  if (cell !== void 0) columns[key] = cell;
6758
6717
  }
6759
6718
  for (const key of Object.keys(evalDef.manualScores ?? {})) columns[key] = null;
@@ -6989,7 +6948,6 @@ async function finalizePreparedCase(params) {
6989
6948
  const artifactFileId = getCaseArtifactFileId(runState, winningTrial.caseRow);
6990
6949
  runState.cases.push(winningTrial.caseRow);
6991
6950
  runState.caseDetails.set(getCaseRowCaseKey(winningTrial.caseRow), winningTrial.caseDetail);
6992
- preparedEval.mergeColumns(winningTrial.caseDetail.columns);
6993
6951
  if (winningTrial.caseRow.status === "pass") runState.summary.passedCases++;
6994
6952
  else if (winningTrial.caseRow.status === "error") runState.summary.errorCases++;
6995
6953
  else runState.summary.failedCases++;
@@ -7106,13 +7064,13 @@ async function executeRun({ runState, request, runDir, config, cacheStore, lastR
7106
7064
  globalRemove: config.removeDefaultConfig
7107
7065
  });
7108
7066
  const declaredColumnDefs = buildDeclaredColumnDefs(defaultConfig.columns, evalDef.scores, evalDef.manualScores);
7109
- const accumulatedColumns = new Map(declaredColumnDefs.map((def) => [def.key, def]));
7110
7067
  const validatedCharts = validateCharts({
7111
7068
  charts: defaultConfig.charts,
7112
7069
  columnDefs: declaredColumnDefs,
7113
7070
  evalId: evalMeta.id
7114
7071
  });
7115
7072
  for (const warning of validatedCharts.warnings) console.warn(warning);
7073
+ evalMeta.columnDefs = declaredColumnDefs;
7116
7074
  evalMeta.stats = defaultConfig.stats;
7117
7075
  evalMeta.charts = validatedCharts.charts;
7118
7076
  const evalCaseRows = [];
@@ -7121,13 +7079,9 @@ async function executeRun({ runState, request, runDir, config, cacheStore, lastR
7121
7079
  const manualScoreKeys = Object.freeze(Object.keys(evalDef.manualScores ?? {}));
7122
7080
  const preparedEval = {
7123
7081
  evalMeta,
7124
- accumulatedColumns,
7125
7082
  evalCaseRows,
7126
7083
  preparedCases,
7127
- scoreKeys: Object.freeze([...scoreKeys, ...manualScoreKeys]),
7128
- mergeColumns: (columns) => {
7129
- mergeColumnDefs(accumulatedColumns, columns, defaultConfig.columns, evalDef.scores, evalDef.manualScores);
7130
- }
7084
+ scoreKeys: Object.freeze([...scoreKeys, ...manualScoreKeys])
7131
7085
  };
7132
7086
  preparedEvals.push(preparedEval);
7133
7087
  for (const evalCase of cases) {
@@ -7236,7 +7190,6 @@ async function executeRun({ runState, request, runDir, config, cacheStore, lastR
7236
7190
  onCaseFinished,
7237
7191
  emitEvent
7238
7192
  });
7239
- preparedEval.evalMeta.columnDefs = [...preparedEval.accumulatedColumns.values()];
7240
7193
  lastRunStatusMap.set(preparedEval.evalMeta.key, toLastRunStatus(deriveStatusFromCaseRows({ caseRows: preparedEval.evalCaseRows })));
7241
7194
  const latestStatus = lastRunStatusMap.get(preparedEval.evalMeta.key) ?? null;
7242
7195
  latestRunInfoMap.set(preparedEval.evalMeta.key, {
@@ -1,5 +1,5 @@
1
- import { n as createRunner } from "./cli-huuJbDNb.mjs";
2
- import "./src-1Qvuh0NH.mjs";
1
+ import { n as createRunner } from "./cli-CVwIjcsX.mjs";
2
+ import "./src-Bcc2ZHK8.mjs";
3
3
  //#region ../../apps/server/src/runner.ts
4
4
  let runnerInstance = null;
5
5
  function getRunnerInstance() {
@@ -1,2 +1,2 @@
1
- import { n as initRunner, t as getRunnerInstance } from "./runner-Dkol2ukD.mjs";
1
+ import { n as initRunner, t as getRunnerInstance } from "./runner-ChHgWruW.mjs";
2
2
  export { getRunnerInstance, initRunner };
@@ -0,0 +1,3 @@
1
+ import "./runOrchestration-DoslE_Oo.mjs";
2
+ import "./cli-CVwIjcsX.mjs";
3
+ export {};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ls-stack/agent-eval",
3
- "version": "0.33.0",
3
+ "version": "0.35.0",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "agent-evals": "./dist/bin.mjs"
@@ -32,8 +32,8 @@
32
32
  "@types/node": "^24.7.2",
33
33
  "typescript": "^5.9.2",
34
34
  "@agent-evals/runner": "0.0.1",
35
- "@agent-evals/sdk": "0.0.1",
36
- "@agent-evals/shared": "0.0.1"
35
+ "@agent-evals/shared": "0.0.1",
36
+ "@agent-evals/sdk": "0.0.1"
37
37
  },
38
38
  "scripts": {
39
39
  "build": "pnpm --filter @agent-evals/web build && pnpm --filter @agent-evals/shared build && pnpm --filter @agent-evals/sdk build && pnpm --filter @agent-evals/runner build && tsdown --filter cli-js && tsdown --filter cli-types",