@ls-stack/agent-eval 0.36.0 → 0.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import { C as loadConfig, D as createFsCacheStore, E as validateCharts, H as getEvalDisplayStatus, S as resolveEvalDefaultConfig, T as normalizeScoreDef, U as deriveScopedSummaryFromCases, V as getEvalTitle, _ as buildManualInputDescriptor, a as getLastRunStatuses, b as loadEvalModule, bt as getCaseRowEvalKey, c as loadPersistedRunSnapshots, d as persistRunState, f as recomputeEvalStatusesInRuns, g as resolveArtifactPath, h as resolveTracePresentation, i as generateRunId, l as nextShortIdFromSnapshots, m as runTouchesEval, mt as resolveLlmCallsConfig, n as getTargetEvalKeys, o as getLatestRunInfos, p as recomputePersistedCaseStatus, pr as getEvalRegistry, pt as resolveApiCallsConfig, q as runSummarySchema, s as loadPersistedRunSnapshot, u as persistCaseDetail, v as parseManualInputValues, vt as buildEvalKey, w as buildDeclaredColumnDefs, x as parseEvalDiscovery, y as deriveEvalFreshness, yt as getCaseRowCaseKey, z as applyDerivedCallAttributes } from "./runOrchestration-V1TxX8es.mjs";
1
+ import { C as parseEvalDiscovery, D as normalizeScoreDef, E as buildDeclaredColumnDefs, G as deriveScopedSummaryFromCases, O as validateCharts, S as loadEvalModule, St as getCaseRowEvalKey, T as loadConfig, U as getEvalTitle, V as applyDerivedCallAttributes, W as getEvalDisplayStatus, Y as runSummarySchema, _ as resolveTracePresentation, a as generateRunId, b as parseManualInputValues, bt as buildEvalKey, c as loadPersistedRunSnapshot, d as persistCaseDetail, f as deleteTemporaryRuns, g as runTouchesEval, gt as resolveLlmCallsConfig, h as recomputePersistedCaseStatus, hr as getEvalRegistry, ht as resolveApiCallsConfig, i as stripTerminalControlCodes, k as createFsCacheStore, l as loadPersistedRunSnapshots, m as recomputeEvalStatusesInRuns, n as getTargetEvalKeys, o as getLastRunStatuses, p as persistRunState, s as getLatestRunInfos, u as nextShortIdFromSnapshots, v as resolveArtifactPath, w as resolveEvalDefaultConfig, x as deriveEvalFreshness, xt as getCaseRowCaseKey, y as buildManualInputDescriptor } from "./runOrchestration-C4o5TcIu.mjs";
2
2
  import { createHash, randomUUID } from "node:crypto";
3
3
  import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
4
4
  import { basename, dirname, extname, isAbsolute, join, relative, resolve, sep } from "node:path";
@@ -536,6 +536,7 @@ function isRunChildMessage(value) {
536
536
  const runChildInspectArgEnv = "AGENT_EVALS_RUN_CHILD_INSPECT_ARG";
537
537
  const inspectFlagPrefix = "--inspect";
538
538
  const inspectBrkFlagPrefix = "--inspect-brk";
539
+ const childOutputTailMaxLength = 12e3;
539
540
  function startRunChild(params) {
540
541
  const child = spawn(process.execPath, [
541
542
  ...getRunChildExecArgv(),
@@ -546,11 +547,12 @@ function startRunChild(params) {
546
547
  env: process.env,
547
548
  stdio: [
548
549
  "ignore",
549
- "inherit",
550
- "inherit",
550
+ "pipe",
551
+ "pipe",
551
552
  "ipc"
552
553
  ]
553
554
  });
555
+ const outputTail = createRunChildOutputTail(child);
554
556
  params.runState.childProcess = child;
555
557
  child.on("message", (message) => {
556
558
  if (!isRunChildMessage(message)) return;
@@ -564,8 +566,39 @@ function startRunChild(params) {
564
566
  if (params.runState.childProcess === child) params.runState.childProcess = void 0;
565
567
  if (params.runState.manifest.status !== "running" || params.runState.childTerminalReceived) return;
566
568
  const reason = signal !== null ? `Run child exited with signal ${signal}` : `Run child exited with code ${String(code)}`;
567
- markRunErrored(params.runState, reason, params.managerContext);
569
+ markRunErrored(params.runState, formatUnexpectedRunChildExit(reason, outputTail), params.managerContext);
570
+ });
571
+ }
572
+ function createRunChildOutputTail(child) {
573
+ const tail = {
574
+ stdout: "",
575
+ stderr: ""
576
+ };
577
+ child.stdout?.on("data", (chunk) => {
578
+ process.stdout.write(chunk);
579
+ tail.stdout = appendOutputTail(tail.stdout, chunkToText(chunk));
580
+ });
581
+ child.stderr?.on("data", (chunk) => {
582
+ process.stderr.write(chunk);
583
+ tail.stderr = appendOutputTail(tail.stderr, chunkToText(chunk));
568
584
  });
585
+ return tail;
586
+ }
587
+ function chunkToText(chunk) {
588
+ return typeof chunk === "string" ? chunk : chunk.toString("utf-8");
589
+ }
590
+ function appendOutputTail(current, next) {
591
+ const combined = current + next;
592
+ if (combined.length <= childOutputTailMaxLength) return combined;
593
+ return combined.slice(combined.length - childOutputTailMaxLength);
594
+ }
595
+ function formatUnexpectedRunChildExit(reason, outputTail) {
596
+ const sections = [reason];
597
+ const stderr = stripTerminalControlCodes(outputTail.stderr).trim();
598
+ const stdout = stripTerminalControlCodes(outputTail.stdout).trim();
599
+ if (stderr.length > 0) sections.push(`Child stderr (last ${String(stderr.length)} chars):\n${stderr}`);
600
+ if (stdout.length > 0) sections.push(`Child stdout (last ${String(stdout.length)} chars):\n${stdout}`);
601
+ return sections.join("\n\n");
569
602
  }
570
603
  function getRunChildExecArgv() {
571
604
  const execArgv = [];
@@ -647,6 +680,7 @@ function applyChildEvalMetas(evals, childMetas) {
647
680
  }
648
681
  evalMeta.columnDefs = childMeta.columnDefs;
649
682
  evalMeta.caseCount = childMeta.caseCount;
683
+ evalMeta.caseIds = childMeta.caseIds;
650
684
  evalMeta.stats = childMeta.stats;
651
685
  evalMeta.charts = childMeta.charts;
652
686
  evalMeta.sourceFingerprint = childMeta.sourceFingerprint;
@@ -1064,6 +1098,7 @@ function createRunner({ watchForChanges = true } = {}) {
1064
1098
  sourceFingerprint,
1065
1099
  columnDefs,
1066
1100
  caseCount: null,
1101
+ caseIds: void 0,
1067
1102
  stats,
1068
1103
  charts,
1069
1104
  manualInputDescriptor,
@@ -1074,18 +1109,24 @@ function createRunner({ watchForChanges = true } = {}) {
1074
1109
  emitDiscoveryEvent();
1075
1110
  },
1076
1111
  async startRun(request) {
1112
+ const deletedTemporaryRuns = await deleteTemporaryRuns({
1113
+ runs,
1114
+ cancelRunningRun: killRunChild
1115
+ });
1077
1116
  const runId = generateRunId();
1078
1117
  const shortId = `r${String(nextShortIdNum++)}`;
1079
1118
  const now = (/* @__PURE__ */ new Date()).toISOString();
1080
1119
  const cacheMode = request.cache?.mode ?? "use";
1081
1120
  const runDir = join(localStateDir, "runs", runId);
1121
+ const gitState = readGitWorktreeState(workspaceRoot);
1082
1122
  const manifest = {
1083
1123
  id: runId,
1084
1124
  shortId,
1085
1125
  status: "running",
1126
+ temporary: request.temporary === true,
1086
1127
  startedAt: now,
1087
1128
  endedAt: null,
1088
- commitSha: readGitWorktreeState(workspaceRoot).commitSha,
1129
+ commitSha: gitState.commitSha,
1089
1130
  evalSourceFingerprints: {},
1090
1131
  target: request.target,
1091
1132
  trials: request.trials,
@@ -1162,6 +1203,7 @@ function createRunner({ watchForChanges = true } = {}) {
1162
1203
  emitDiscoveryEvent
1163
1204
  }
1164
1205
  });
1206
+ if (deletedTemporaryRuns > 0) emitDiscoveryEvent();
1165
1207
  return {
1166
1208
  manifest,
1167
1209
  summary,
@@ -1445,6 +1487,7 @@ Flags:
1445
1487
  --no-cache Shortcut for --cache bypass
1446
1488
  --refresh-cache Shortcut for --cache refresh
1447
1489
  --clear-cache Clear the cache before starting the run
1490
+ --temporary Persist until the next run starts, then delete
1448
1491
  --input <json> Manual input value for a single targeted eval
1449
1492
  that declares manualInput
1450
1493
  --input-file <path> JSON object keyed by eval key (or eval id) with
@@ -1515,6 +1558,7 @@ Options:
1515
1558
  --no-cache Shortcut for --cache bypass
1516
1559
  --refresh-cache Shortcut for --cache refresh
1517
1560
  --clear-cache Clear the cache before starting the run
1561
+ --temporary Persist until the next run starts, then delete
1518
1562
  --no-env Disable automatic .env loading
1519
1563
  --help, -h Show help
1520
1564
  `);
@@ -1753,6 +1797,7 @@ function parseArgs(argv) {
1753
1797
  port: 4100,
1754
1798
  cacheMode: "use",
1755
1799
  clearCache: false,
1800
+ temporary: false,
1756
1801
  all: false,
1757
1802
  loadEnv: normalizedArgv.length === argv.length,
1758
1803
  inputJson: void 0,
@@ -1803,6 +1848,7 @@ function parseArgs(argv) {
1803
1848
  } else if (arg === "--no-cache") args.cacheMode = "bypass";
1804
1849
  else if (arg === "--refresh-cache") args.cacheMode = "refresh";
1805
1850
  else if (arg === "--clear-cache") args.clearCache = true;
1851
+ else if (arg === "--temporary") args.temporary = true;
1806
1852
  else if (arg === "--input" && next !== void 0) {
1807
1853
  args.inputJson = next;
1808
1854
  i++;
@@ -1940,8 +1986,8 @@ async function commandApp(args) {
1940
1986
  const { serve } = await import("@hono/node-server");
1941
1987
  const bundledWebDist = resolve(currentDir, "apps/web/dist");
1942
1988
  if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
1943
- const appModule = await import("./app-BlNzXWDM.mjs");
1944
- const runnerModule = await import("./runner-znY6PY1M.mjs");
1989
+ const appModule = await import("./app-C7ON9Wdh.mjs");
1990
+ const runnerModule = await import("./runner-LqeHPID6.mjs");
1945
1991
  if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
1946
1992
  if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
1947
1993
  await runnerModule.initRunner();
@@ -2030,6 +2076,7 @@ async function commandRun(args) {
2030
2076
  const run = await runner.startRun({
2031
2077
  target,
2032
2078
  trials: args.trials,
2079
+ temporary: args.temporary,
2033
2080
  cache: { mode: args.cacheMode },
2034
2081
  manualInputs: manualInputsResult.value
2035
2082
  });
@@ -2037,6 +2084,7 @@ async function commandRun(args) {
2037
2084
  console.info(`Run started: ${run.manifest.id}`);
2038
2085
  console.info(`Trials: ${String(args.trials)}`);
2039
2086
  if (args.cacheMode !== "use") console.info(`Cache mode: ${args.cacheMode}`);
2087
+ if (args.temporary) console.info("Temporary: yes");
2040
2088
  console.info("");
2041
2089
  }
2042
2090
  await waitForRunCompletion(runner, run.manifest.id);
@@ -2152,6 +2200,7 @@ function buildRunFileIndex(workspaceRoot, run) {
2152
2200
  id: run.manifest.id,
2153
2201
  shortId: run.manifest.shortId,
2154
2202
  status: run.manifest.status,
2203
+ temporary: run.manifest.temporary,
2155
2204
  startedAt: run.manifest.startedAt,
2156
2205
  endedAt: run.manifest.endedAt,
2157
2206
  target: run.manifest.target,
@@ -2212,7 +2261,7 @@ function printRunFileIndexes(indexes) {
2212
2261
  }
2213
2262
  }
2214
2263
  function printRunFileIndex(index) {
2215
- console.info(`${index.shortId} (${index.id}) ${index.status} ${formatCaseCounts(index.summary)}`);
2264
+ console.info(`${index.shortId} (${index.id}) ${index.status}${index.temporary ? " temporary" : ""} ${formatCaseCounts(index.summary)}`);
2216
2265
  console.info(` dir: ${index.files.dir}`);
2217
2266
  console.info(` run: ${index.files.run}`);
2218
2267
  console.info(` summary: ${index.files.summary}`);
package/dist/index.d.mts CHANGED
@@ -127,6 +127,7 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
127
127
  }>>;
128
128
  }, z$1.core.$strip>>;
129
129
  caseCount: z$1.ZodNullable<z$1.ZodNumber>;
130
+ caseIds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
130
131
  lastRunStatus: z$1.ZodNullable<z$1.ZodEnum<{
131
132
  error: "error";
132
133
  pass: "pass";
@@ -617,6 +618,7 @@ declare const runManifestSchema$1: z$1.ZodObject<{
617
618
  cancelled: "cancelled";
618
619
  error: "error";
619
620
  }>;
621
+ temporary: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodBoolean>>;
620
622
  startedAt: z$1.ZodString;
621
623
  endedAt: z$1.ZodNullable<z$1.ZodString>;
622
624
  commitSha: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodNullable<z$1.ZodString>>>;
@@ -963,6 +965,7 @@ declare const createRunRequestSchema$1: z$1.ZodObject<{
963
965
  caseIds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
964
966
  }, z$1.core.$strip>;
965
967
  trials: z$1.ZodNumber;
968
+ temporary: z$1.ZodOptional<z$1.ZodBoolean>;
966
969
  cache: z$1.ZodOptional<z$1.ZodObject<{
967
970
  mode: z$1.ZodDefault<z$1.ZodEnum<{
968
971
  use: "use";
@@ -2916,12 +2919,12 @@ declare const columnFormatSchema: z$1.ZodEnum<{
2916
2919
  number: "number";
2917
2920
  boolean: "boolean";
2918
2921
  file: "file";
2919
- duration: "duration";
2920
2922
  markdown: "markdown";
2921
2923
  json: "json";
2922
2924
  image: "image";
2923
2925
  audio: "audio";
2924
2926
  video: "video";
2927
+ duration: "duration";
2925
2928
  percent: "percent";
2926
2929
  passFail: "passFail";
2927
2930
  stars: "stars";
@@ -2941,12 +2944,12 @@ declare const columnDefSchema: z$1.ZodObject<{
2941
2944
  number: "number";
2942
2945
  boolean: "boolean";
2943
2946
  file: "file";
2944
- duration: "duration";
2945
2947
  markdown: "markdown";
2946
2948
  json: "json";
2947
2949
  image: "image";
2948
2950
  audio: "audio";
2949
2951
  video: "video";
2952
+ duration: "duration";
2950
2953
  percent: "percent";
2951
2954
  passFail: "passFail";
2952
2955
  stars: "stars";
@@ -2991,8 +2994,8 @@ declare const traceSpanKindSchema: z$1.ZodString;
2991
2994
  declare const traceAttributeDisplayFormatSchema: z$1.ZodEnum<{
2992
2995
  string: "string";
2993
2996
  number: "number";
2994
- duration: "duration";
2995
2997
  json: "json";
2998
+ duration: "duration";
2996
2999
  }>;
2997
3000
  /**
2998
3001
  * Formatting hint for trace attribute values rendered by the UI.
@@ -3016,8 +3019,8 @@ declare const traceAttributeDisplaySchema: z$1.ZodObject<{
3016
3019
  format: z$1.ZodOptional<z$1.ZodEnum<{
3017
3020
  string: "string";
3018
3021
  number: "number";
3019
- duration: "duration";
3020
3022
  json: "json";
3023
+ duration: "duration";
3021
3024
  }>>;
3022
3025
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
3023
3026
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -3052,8 +3055,8 @@ declare const traceDisplayConfigSchema: z$1.ZodObject<{
3052
3055
  format: z$1.ZodOptional<z$1.ZodEnum<{
3053
3056
  string: "string";
3054
3057
  number: "number";
3055
- duration: "duration";
3056
3058
  json: "json";
3059
+ duration: "duration";
3057
3060
  }>>;
3058
3061
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
3059
3062
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -3092,8 +3095,8 @@ declare const traceAttributeDisplayInputSchema: z$1.ZodObject<{
3092
3095
  format: z$1.ZodOptional<z$1.ZodEnum<{
3093
3096
  string: "string";
3094
3097
  number: "number";
3095
- duration: "duration";
3096
3098
  json: "json";
3099
+ duration: "duration";
3097
3100
  }>>;
3098
3101
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
3099
3102
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -3130,8 +3133,8 @@ declare const traceDisplayInputConfigSchema: z$1.ZodObject<{
3130
3133
  format: z$1.ZodOptional<z$1.ZodEnum<{
3131
3134
  string: "string";
3132
3135
  number: "number";
3133
- duration: "duration";
3134
3136
  json: "json";
3137
+ duration: "duration";
3135
3138
  }>>;
3136
3139
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
3137
3140
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -3263,12 +3266,12 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
3263
3266
  number: "number";
3264
3267
  boolean: "boolean";
3265
3268
  file: "file";
3266
- duration: "duration";
3267
3269
  markdown: "markdown";
3268
3270
  json: "json";
3269
3271
  image: "image";
3270
3272
  audio: "audio";
3271
3273
  video: "video";
3274
+ duration: "duration";
3272
3275
  percent: "percent";
3273
3276
  passFail: "passFail";
3274
3277
  stars: "stars";
@@ -3305,12 +3308,12 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
3305
3308
  number: "number";
3306
3309
  boolean: "boolean";
3307
3310
  file: "file";
3308
- duration: "duration";
3309
3311
  markdown: "markdown";
3310
3312
  json: "json";
3311
3313
  image: "image";
3312
3314
  audio: "audio";
3313
3315
  video: "video";
3316
+ duration: "duration";
3314
3317
  percent: "percent";
3315
3318
  passFail: "passFail";
3316
3319
  stars: "stars";
@@ -3348,12 +3351,12 @@ declare const evalSummarySchema: z$1.ZodObject<{
3348
3351
  number: "number";
3349
3352
  boolean: "boolean";
3350
3353
  file: "file";
3351
- duration: "duration";
3352
3354
  markdown: "markdown";
3353
3355
  json: "json";
3354
3356
  image: "image";
3355
3357
  audio: "audio";
3356
3358
  video: "video";
3359
+ duration: "duration";
3357
3360
  percent: "percent";
3358
3361
  passFail: "passFail";
3359
3362
  stars: "stars";
@@ -3372,6 +3375,7 @@ declare const evalSummarySchema: z$1.ZodObject<{
3372
3375
  }>>;
3373
3376
  }, z$1.core.$strip>>;
3374
3377
  caseCount: z$1.ZodNullable<z$1.ZodNumber>;
3378
+ caseIds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
3375
3379
  lastRunStatus: z$1.ZodNullable<z$1.ZodEnum<{
3376
3380
  error: "error";
3377
3381
  pass: "pass";
@@ -3406,12 +3410,12 @@ declare const evalSummarySchema: z$1.ZodObject<{
3406
3410
  number: "number";
3407
3411
  boolean: "boolean";
3408
3412
  file: "file";
3409
- duration: "duration";
3410
3413
  markdown: "markdown";
3411
3414
  json: "json";
3412
3415
  image: "image";
3413
3416
  audio: "audio";
3414
3417
  video: "video";
3418
+ duration: "duration";
3415
3419
  percent: "percent";
3416
3420
  passFail: "passFail";
3417
3421
  stars: "stars";
@@ -3437,8 +3441,8 @@ declare const evalSummarySchema: z$1.ZodObject<{
3437
3441
  label: z$1.ZodOptional<z$1.ZodString>;
3438
3442
  color: z$1.ZodOptional<z$1.ZodEnum<{
3439
3443
  success: "success";
3440
- accent: "accent";
3441
3444
  error: "error";
3445
+ accent: "accent";
3442
3446
  accentDim: "accentDim";
3443
3447
  warning: "warning";
3444
3448
  textMuted: "textMuted";
@@ -3461,8 +3465,8 @@ declare const evalSummarySchema: z$1.ZodObject<{
3461
3465
  label: z$1.ZodOptional<z$1.ZodString>;
3462
3466
  color: z$1.ZodOptional<z$1.ZodEnum<{
3463
3467
  success: "success";
3464
- accent: "accent";
3465
3468
  error: "error";
3469
+ accent: "accent";
3466
3470
  accentDim: "accentDim";
3467
3471
  warning: "warning";
3468
3472
  textMuted: "textMuted";
@@ -3725,8 +3729,8 @@ declare const scoreTraceSchema: z$1.ZodObject<{
3725
3729
  format: z$1.ZodOptional<z$1.ZodEnum<{
3726
3730
  string: "string";
3727
3731
  number: "number";
3728
- duration: "duration";
3729
3732
  json: "json";
3733
+ duration: "duration";
3730
3734
  }>>;
3731
3735
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
3732
3736
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -3811,8 +3815,8 @@ declare const caseDetailSchema: z$1.ZodObject<{
3811
3815
  format: z$1.ZodOptional<z$1.ZodEnum<{
3812
3816
  string: "string";
3813
3817
  number: "number";
3814
- duration: "duration";
3815
3818
  json: "json";
3819
+ duration: "duration";
3816
3820
  }>>;
3817
3821
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
3818
3822
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -3880,8 +3884,8 @@ declare const caseDetailSchema: z$1.ZodObject<{
3880
3884
  format: z$1.ZodOptional<z$1.ZodEnum<{
3881
3885
  string: "string";
3882
3886
  number: "number";
3883
- duration: "duration";
3884
3887
  json: "json";
3888
+ duration: "duration";
3885
3889
  }>>;
3886
3890
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
3887
3891
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -4045,8 +4049,8 @@ type EvalChartAggregate = z$1.infer<typeof evalChartAggregateSchema>;
4045
4049
  */
4046
4050
  declare const evalChartColorSchema: z$1.ZodEnum<{
4047
4051
  success: "success";
4048
- accent: "accent";
4049
4052
  error: "error";
4053
+ accent: "accent";
4050
4054
  accentDim: "accentDim";
4051
4055
  warning: "warning";
4052
4056
  textMuted: "textMuted";
@@ -4074,8 +4078,8 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
4074
4078
  label: z$1.ZodOptional<z$1.ZodString>;
4075
4079
  color: z$1.ZodOptional<z$1.ZodEnum<{
4076
4080
  success: "success";
4077
- accent: "accent";
4078
4081
  error: "error";
4082
+ accent: "accent";
4079
4083
  accentDim: "accentDim";
4080
4084
  warning: "warning";
4081
4085
  textMuted: "textMuted";
@@ -4098,8 +4102,8 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
4098
4102
  label: z$1.ZodOptional<z$1.ZodString>;
4099
4103
  color: z$1.ZodOptional<z$1.ZodEnum<{
4100
4104
  success: "success";
4101
- accent: "accent";
4102
4105
  error: "error";
4106
+ accent: "accent";
4103
4107
  accentDim: "accentDim";
4104
4108
  warning: "warning";
4105
4109
  textMuted: "textMuted";
@@ -4157,8 +4161,8 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
4157
4161
  label: z$1.ZodOptional<z$1.ZodString>;
4158
4162
  color: z$1.ZodOptional<z$1.ZodEnum<{
4159
4163
  success: "success";
4160
- accent: "accent";
4161
4164
  error: "error";
4165
+ accent: "accent";
4162
4166
  accentDim: "accentDim";
4163
4167
  warning: "warning";
4164
4168
  textMuted: "textMuted";
@@ -4181,8 +4185,8 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
4181
4185
  label: z$1.ZodOptional<z$1.ZodString>;
4182
4186
  color: z$1.ZodOptional<z$1.ZodEnum<{
4183
4187
  success: "success";
4184
- accent: "accent";
4185
4188
  error: "error";
4189
+ accent: "accent";
4186
4190
  accentDim: "accentDim";
4187
4191
  warning: "warning";
4188
4192
  textMuted: "textMuted";
@@ -4247,8 +4251,8 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
4247
4251
  label: z$1.ZodOptional<z$1.ZodString>;
4248
4252
  color: z$1.ZodOptional<z$1.ZodEnum<{
4249
4253
  success: "success";
4250
- accent: "accent";
4251
4254
  error: "error";
4255
+ accent: "accent";
4252
4256
  accentDim: "accentDim";
4253
4257
  warning: "warning";
4254
4258
  textMuted: "textMuted";
@@ -4271,8 +4275,8 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
4271
4275
  label: z$1.ZodOptional<z$1.ZodString>;
4272
4276
  color: z$1.ZodOptional<z$1.ZodEnum<{
4273
4277
  success: "success";
4274
- accent: "accent";
4275
4278
  error: "error";
4279
+ accent: "accent";
4276
4280
  accentDim: "accentDim";
4277
4281
  warning: "warning";
4278
4282
  textMuted: "textMuted";
@@ -4327,15 +4331,16 @@ declare const runManifestSchema: z$1.ZodObject<{
4327
4331
  pending: "pending";
4328
4332
  completed: "completed";
4329
4333
  }>;
4334
+ temporary: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodBoolean>>;
4330
4335
  startedAt: z$1.ZodString;
4331
4336
  endedAt: z$1.ZodNullable<z$1.ZodString>;
4332
4337
  commitSha: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodNullable<z$1.ZodString>>>;
4333
4338
  evalSourceFingerprints: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodString>>>;
4334
4339
  target: z$1.ZodObject<{
4335
4340
  mode: z$1.ZodEnum<{
4341
+ caseIds: "caseIds";
4336
4342
  all: "all";
4337
4343
  evalIds: "evalIds";
4338
- caseIds: "caseIds";
4339
4344
  }>;
4340
4345
  evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
4341
4346
  files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
@@ -4581,8 +4586,8 @@ declare const llmCallMetricFormatSchema: z$1.ZodEnum<{
4581
4586
  string: "string";
4582
4587
  number: "number";
4583
4588
  boolean: "boolean";
4584
- duration: "duration";
4585
4589
  json: "json";
4590
+ duration: "duration";
4586
4591
  }>;
4587
4592
  /** Render format applied to an LLM-call metric value. */
4588
4593
  type LlmCallMetricFormat = z$1.infer<typeof llmCallMetricFormatSchema>;
@@ -4591,8 +4596,8 @@ declare const apiCallMetricFormatSchema: z$1.ZodEnum<{
4591
4596
  string: "string";
4592
4597
  number: "number";
4593
4598
  boolean: "boolean";
4594
- duration: "duration";
4595
4599
  json: "json";
4600
+ duration: "duration";
4596
4601
  }>;
4597
4602
  /** Render format applied to an API-call metric value. */
4598
4603
  type ApiCallMetricFormat = z$1.infer<typeof apiCallMetricFormatSchema>;
@@ -4661,8 +4666,8 @@ declare const llmCallMetricSchema: z$1.ZodObject<{
4661
4666
  string: "string";
4662
4667
  number: "number";
4663
4668
  boolean: "boolean";
4664
- duration: "duration";
4665
4669
  json: "json";
4670
+ duration: "duration";
4666
4671
  }>>;
4667
4672
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
4668
4673
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -4690,8 +4695,8 @@ declare const apiCallMetricSchema: z$1.ZodObject<{
4690
4695
  string: "string";
4691
4696
  number: "number";
4692
4697
  boolean: "boolean";
4693
- duration: "duration";
4694
4698
  json: "json";
4699
+ duration: "duration";
4695
4700
  }>>;
4696
4701
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
4697
4702
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -4804,8 +4809,8 @@ declare const llmCallsConfigSchema: z$1.ZodObject<{
4804
4809
  string: "string";
4805
4810
  number: "number";
4806
4811
  boolean: "boolean";
4807
- duration: "duration";
4808
4812
  json: "json";
4813
+ duration: "duration";
4809
4814
  }>>;
4810
4815
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
4811
4816
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -4840,8 +4845,8 @@ declare const apiCallsConfigSchema: z$1.ZodObject<{
4840
4845
  string: "string";
4841
4846
  number: "number";
4842
4847
  boolean: "boolean";
4843
- duration: "duration";
4844
4848
  json: "json";
4849
+ duration: "duration";
4845
4850
  }>>;
4846
4851
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
4847
4852
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -5142,8 +5147,8 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
5142
5147
  format: z$1.ZodOptional<z$1.ZodEnum<{
5143
5148
  string: "string";
5144
5149
  number: "number";
5145
- duration: "duration";
5146
5150
  json: "json";
5151
+ duration: "duration";
5147
5152
  }>>;
5148
5153
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
5149
5154
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -5191,12 +5196,12 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
5191
5196
  number: "number";
5192
5197
  boolean: "boolean";
5193
5198
  file: "file";
5194
- duration: "duration";
5195
5199
  markdown: "markdown";
5196
5200
  json: "json";
5197
5201
  image: "image";
5198
5202
  audio: "audio";
5199
5203
  video: "video";
5204
+ duration: "duration";
5200
5205
  percent: "percent";
5201
5206
  passFail: "passFail";
5202
5207
  stars: "stars";
@@ -5255,8 +5260,8 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
5255
5260
  string: "string";
5256
5261
  number: "number";
5257
5262
  boolean: "boolean";
5258
- duration: "duration";
5259
5263
  json: "json";
5264
+ duration: "duration";
5260
5265
  }>>;
5261
5266
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
5262
5267
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -5300,8 +5305,8 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
5300
5305
  string: "string";
5301
5306
  number: "number";
5302
5307
  boolean: "boolean";
5303
- duration: "duration";
5304
5308
  json: "json";
5309
+ duration: "duration";
5305
5310
  }>>;
5306
5311
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
5307
5312
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -6084,9 +6089,9 @@ type ConfigReloadState = z$1.infer<typeof configReloadStateSchema>;
6084
6089
  declare const createRunRequestSchema: z$1.ZodObject<{
6085
6090
  target: z$1.ZodObject<{
6086
6091
  mode: z$1.ZodEnum<{
6092
+ caseIds: "caseIds";
6087
6093
  all: "all";
6088
6094
  evalIds: "evalIds";
6089
- caseIds: "caseIds";
6090
6095
  }>;
6091
6096
  evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
6092
6097
  files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
@@ -6094,6 +6099,7 @@ declare const createRunRequestSchema: z$1.ZodObject<{
6094
6099
  caseIds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
6095
6100
  }, z$1.core.$strip>;
6096
6101
  trials: z$1.ZodNumber;
6102
+ temporary: z$1.ZodOptional<z$1.ZodBoolean>;
6097
6103
  cache: z$1.ZodOptional<z$1.ZodObject<{
6098
6104
  mode: z$1.ZodDefault<z$1.ZodEnum<{
6099
6105
  refresh: "refresh";
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as apiCallMetricSchema, $n as getCurrentScope, $t as cacheDebugKeyEntrySchema, A as createRunRequestSchema, An as repoFileRefSchema, At as runLogEntrySchema, B as getNestedAttribute, Bn as deserializeCacheValue, Bt as manualInputNumberFieldSchema, Cn as cellValueSchema, Ct as caseRowSchema, Dn as fileRefSchema, Dt as evalStatItemSchema, En as columnKindSchema, Et as evalStatAggregateSchema, F as extractApiCalls, Fn as evalSpan, Ft as manualInputBooleanFieldSchema, G as deriveStatusFromChildStatuses, Gn as readManualInputFile, Gt as evalChartAxisSchema, H as getEvalDisplayStatus, Hn as serializeCacheValue, Ht as manualInputSelectOptionSchema, I as extractLlmCalls, In as evalTracer, It as manualInputDescriptorSchema, J as DEFAULT_API_CALLS_CONFIG, Jn as advanceEvalTime, Jt as evalChartConfigSchema, K as runManifestSchema, Kn as evalExpect, Kt as evalChartBuiltinMetricSchema, L as simulateLlmCallCost, Ln as hashCacheKey, Lt as manualInputFieldDescriptorSchema, M as sseEnvelopeSchema, Mn as z, Mt as runLogLocationSchema, N as extractCacheEntries, Nn as buildTraceTree, Nt as runLogPhaseSchema, O as configReloadStateSchema, On as jsonCellSchema, Ot as evalStatsConfigSchema, P as extractCacheHits, Pn as captureEvalSpanError, Pt as scoreTraceSchema, Q as apiCallMetricPlacementSchema, Qn as evalLog, Qt as evalChartsConfigSchema, R as simulateTokenAllocation, Rn as hashCacheKeySync, Rt as manualInputJsonFieldSchema, Sn as traceSpanWarningSchema, St as caseDetailSchema, Tn as columnFormatSchema, Tt as evalFreshnessStatusSchema, U as deriveScopedSummaryFromCases, Un as repoFile, Ut as manualInputTextFieldSchema, V as getEvalTitle, Vn as serializeCacheRecording, Vt as manualInputSelectFieldSchema, W as deriveStatusFromCaseRows, Wn as manualInputFileValueSchema, Wt as evalChartAggregateSchema, X as agentEvalsConfigSchema, Xt as evalChartTooltipExtraSchema, Y as DEFAULT_LLM_CALLS_CONFIG, Yn as appendToEvalOutput, Yt as evalChartMetricSchema, Z as apiCallMetricFormatSchema, Zn as evalAssert, Zt as evalChartTypeSchema, _n as traceDisplayConfigSchema, _t as buildCaseKey, an as cacheModeSchema, ar as nextEvalId, at as llmCallCostCurrencySchema, bn as traceSpanKindSchema, bt as getCaseRowEvalKey, cn as cacheRecordingSchema, cr as runInExistingEvalScope, ct as llmCallMetricSchema, dn as spanCacheOptionsSchema, dr as startEvalBackgroundJob, dt as llmCallsConfigSchema, en as cacheDebugKeyFileSchema, er as getEvalCaseInput, et as apiCallsConfigSchema, fn as traceCacheRefSchema, fr as defineEval, ft as removeDefaultConfigSchema, gn as traceAttributeDisplaySchema, gt as trialSelectionModeSchema, hn as traceAttributeDisplayPlacementSchema, ht as runLogsConfigSchema, in as cacheListItemSchema, ir as mergeEvalOutput, it as evalDeriveConfigSchema, j as updateManualScoreRequestSchema, jn as runArtifactRefSchema, jt as runLogLevelSchema, k as configReloadStatusSchema, kn as numberDisplayOptionsSchema, kt as evalSummarySchema, ln as cacheStatusSchema, lr as setEvalOutput, lt as llmCallPricingRateSchema, mn as traceAttributeDisplayInputSchema, mt as resolveLlmCallsConfig, nn as cacheEntryWithDebugKeySchema, nr as incrementEvalOutput, nt as evalColumnOverrideSchema, on as cacheOperationTypeSchema, or as runInEvalRuntimeScope, ot as llmCallMetricFormatSchema, pn as traceAttributeDisplayFormatSchema, pr as getEvalRegistry, pt as resolveApiCallsConfig, q as runSummarySchema, qn as EvalAssertionError, qt as evalChartColorSchema, rn as cacheFileSchema, rr as isInEvalScope, rt as evalColumnsSchema, sn as cacheRecordingOpSchema, sr as runInEvalScope, st as llmCallMetricPlacementSchema, tn as cacheEntrySchema, tr as getEvalStartTime, tt as defaultConfigKeySchema, un as serializedCacheSpanSchema, ur as setScopeCacheContext, ut as llmCallPricingSchema, vn as traceDisplayInputConfigSchema, vt as buildEvalKey, wn as columnDefSchema, wt as discoveryIssueSchema, xn as traceSpanSchema, xt as assertionFailureSchema, yn as traceSpanErrorSchema, yt as getCaseRowCaseKey, z as applyDerivedCallAttributes, zn as deserializeCacheRecording, zt as manualInputMultilineFieldSchema } from "./runOrchestration-V1TxX8es.mjs";
2
- import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-Dg3abrOv.mjs";
3
- import "./src-DBypR4TV.mjs";
1
+ import { $ as apiCallMetricFormatSchema, $n as evalAssert, $t as evalChartTypeSchema, A as configReloadStateSchema, An as jsonCellSchema, At as evalStatsConfigSchema, B as simulateTokenAllocation, Bn as hashCacheKeySync, Bt as manualInputJsonFieldSchema, Cn as traceSpanSchema, Ct as assertionFailureSchema, Dn as columnFormatSchema, Dt as evalFreshnessStatusSchema, En as columnDefSchema, Et as discoveryIssueSchema, F as extractCacheEntries, Fn as buildTraceTree, Ft as runLogPhaseSchema, G as deriveScopedSummaryFromCases, Gn as repoFile, Gt as manualInputTextFieldSchema, H as getNestedAttribute, Hn as deserializeCacheValue, Ht as manualInputNumberFieldSchema, I as extractCacheHits, In as captureEvalSpanError, It as scoreTraceSchema, J as runManifestSchema, Jn as evalExpect, Jt as evalChartBuiltinMetricSchema, K as deriveStatusFromCaseRows, Kn as manualInputFileValueSchema, Kt as evalChartAggregateSchema, L as extractApiCalls, Ln as evalSpan, Lt as manualInputBooleanFieldSchema, M as createRunRequestSchema, Mn as repoFileRefSchema, Mt as runLogEntrySchema, N as updateManualScoreRequestSchema, Nn as runArtifactRefSchema, Nt as runLogLevelSchema, On as columnKindSchema, Ot as evalStatAggregateSchema, P as sseEnvelopeSchema, Pn as z, Pt as runLogLocationSchema, Q as agentEvalsConfigSchema, Qt as evalChartTooltipExtraSchema, R as extractLlmCalls, Rn as evalTracer, Rt as manualInputDescriptorSchema, Sn as traceSpanKindSchema, St as getCaseRowEvalKey, Tn as cellValueSchema, Tt as caseRowSchema, U as getEvalTitle, Un as serializeCacheRecording, Ut as manualInputSelectFieldSchema, V as applyDerivedCallAttributes, Vn as deserializeCacheRecording, Vt as manualInputMultilineFieldSchema, W as getEvalDisplayStatus, Wn as serializeCacheValue, Wt as manualInputSelectOptionSchema, X as DEFAULT_API_CALLS_CONFIG, Xn as advanceEvalTime, Xt as evalChartConfigSchema, Y as runSummarySchema, Yn as EvalAssertionError, Yt as evalChartColorSchema, Z as DEFAULT_LLM_CALLS_CONFIG, Zn as appendToEvalOutput, Zt as evalChartMetricSchema, _n as traceAttributeDisplayPlacementSchema, _t as runLogsConfigSchema, an as cacheFileSchema, ar as isInEvalScope, at as evalColumnsSchema, bn as traceDisplayInputConfigSchema, bt as buildEvalKey, cn as cacheOperationTypeSchema, cr as runInEvalRuntimeScope, ct as llmCallMetricFormatSchema, dn as cacheStatusSchema, dr as setEvalOutput, dt as llmCallPricingRateSchema, en as evalChartsConfigSchema, er as evalLog, et as apiCallMetricPlacementSchema, fn as serializedCacheSpanSchema, fr as setScopeCacheContext, ft as llmCallPricingSchema, gn as traceAttributeDisplayInputSchema, gt as resolveLlmCallsConfig, hn as traceAttributeDisplayFormatSchema, hr as getEvalRegistry, ht as resolveApiCallsConfig, in as cacheEntryWithDebugKeySchema, ir as incrementEvalOutput, it as evalColumnOverrideSchema, j as configReloadStatusSchema, jn as numberDisplayOptionsSchema, jt as evalSummarySchema, kn as fileRefSchema, kt as evalStatItemSchema, ln as cacheRecordingOpSchema, lr as runInEvalScope, lt as llmCallMetricPlacementSchema, mn as traceCacheRefSchema, mr as defineEval, mt as removeDefaultConfigSchema, nn as cacheDebugKeyFileSchema, nr as getEvalCaseInput, nt as apiCallsConfigSchema, on as cacheListItemSchema, or as mergeEvalOutput, ot as evalDeriveConfigSchema, pn as spanCacheOptionsSchema, pr as startEvalBackgroundJob, pt as llmCallsConfigSchema, q as deriveStatusFromChildStatuses, qn as readManualInputFile, qt as evalChartAxisSchema, rn as cacheEntrySchema, rr as getEvalStartTime, rt as defaultConfigKeySchema, sn as cacheModeSchema, sr as nextEvalId, st as llmCallCostCurrencySchema, tn as cacheDebugKeyEntrySchema, tr as getCurrentScope, tt as apiCallMetricSchema, un as cacheRecordingSchema, ur as runInExistingEvalScope, ut as llmCallMetricSchema, vn as traceAttributeDisplaySchema, vt as trialSelectionModeSchema, wn as traceSpanWarningSchema, wt as caseDetailSchema, xn as traceSpanErrorSchema, xt as getCaseRowCaseKey, yn as traceDisplayConfigSchema, yt as buildCaseKey, z as simulateLlmCallCost, zn as hashCacheKey, zt as manualInputFieldDescriptorSchema } from "./runOrchestration-C4o5TcIu.mjs";
2
+ import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-CwGcJYWe.mjs";
3
+ import "./src--13_4uDG.mjs";
4
4
  export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, cleanupStagedManualInputFiles, columnDefSchema, columnFormatSchema, columnKindSchema, configReloadStateSchema, configReloadStatusSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalColumnOverrideSchema, evalColumnsSchema, evalDeriveConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, jsonCellSchema, llmCallCostCurrencySchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingRateSchema, llmCallPricingSchema, llmCallsConfigSchema, manualInputBooleanFieldSchema, manualInputDescriptorSchema, manualInputFieldDescriptorSchema, manualInputFileValueSchema, manualInputJsonFieldSchema, manualInputMultilineFieldSchema, manualInputNumberFieldSchema, manualInputSelectFieldSchema, manualInputSelectOptionSchema, manualInputTextFieldSchema, materializeManualInputFiles, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, readManualInputFile, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, spanCacheOptionsSchema, sseEnvelopeSchema, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };