@ls-stack/agent-eval 0.28.0 → 0.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-mBbAN-Gt.mjs → app-D6-msfKP.mjs} +33 -6
- package/dist/apps/web/dist/assets/index-BCr6J8Uj.js +118 -0
- package/dist/apps/web/dist/assets/index-DjUTm3M-.css +1 -0
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/{cli-BQwRbqsL.mjs → cli-CIc_gBNM.mjs} +893 -166
- package/dist/index.d.mts +5758 -3550
- package/dist/index.mjs +4 -4
- package/dist/runChild.mjs +4 -2
- package/dist/{runOrchestration-ClWYWPen.mjs → runOrchestration-CIARrLs6.mjs} +619 -25
- package/dist/{runner-DbVB66h9.mjs → runner-1F8MeY5V.mjs} +2 -2
- package/dist/{runner-BQn_xf36.mjs → runner-Bq1f9B9d.mjs} +1 -1
- package/dist/src-CkWT1iSu.mjs +3 -0
- package/package.json +4 -33
- package/skills/agent-eval/SKILL.md +55 -3
- package/dist/apps/web/dist/assets/index-8VE7b6RK.css +0 -1
- package/dist/apps/web/dist/assets/index-Czer_MdN.js +0 -118
- package/dist/src-CuirVcPY.mjs +0 -3
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
import { createRequire, registerHooks } from "node:module";
|
|
2
|
-
import { createHash } from "node:crypto";
|
|
2
|
+
import { createHash, randomUUID } from "node:crypto";
|
|
3
3
|
import { mkdir, readFile, readdir, rename, rm, stat, writeFile } from "node:fs/promises";
|
|
4
4
|
import { extname, isAbsolute, join, relative, resolve } from "node:path";
|
|
5
5
|
import { formatWithOptions, isDeepStrictEqual, stripVTControlCharacters } from "node:util";
|
|
6
6
|
import { AsyncLocalStorage } from "node:async_hooks";
|
|
7
7
|
import { z, z as z$1 } from "zod/v4";
|
|
8
|
-
import { Buffer as Buffer$1 } from "node:buffer";
|
|
8
|
+
import { Blob as Blob$1, Buffer as Buffer$1, File as File$1 } from "node:buffer";
|
|
9
9
|
import { gunzipSync, gzipSync } from "node:zlib";
|
|
10
10
|
import { getCompositeKey } from "@ls-stack/utils/getCompositeKey";
|
|
11
11
|
import { existsSync } from "node:fs";
|
|
12
|
-
import { resultify } from "t-result";
|
|
12
|
+
import { Result, resultify } from "t-result";
|
|
13
13
|
import { fileURLToPath, pathToFileURL } from "node:url";
|
|
14
14
|
//#region ../sdk/src/defineEval.ts
|
|
15
15
|
const evalRegistry = /* @__PURE__ */ new Map();
|
|
@@ -787,6 +787,67 @@ function evalExpect(value) {
|
|
|
787
787
|
return new EvalExpectationImpl(value, false);
|
|
788
788
|
}
|
|
789
789
|
//#endregion
|
|
790
|
+
//#region ../sdk/src/manualInputFile.ts
|
|
791
|
+
/**
|
|
792
|
+
* Zod schema describing one file uploaded through the manual-input modal.
|
|
793
|
+
*
|
|
794
|
+
* Use this as the field type on your `manualInput.schema` whenever you mark
|
|
795
|
+
* a field with `{ asFile: true }` in `manualInput.fields`. The UI / CLI stages
|
|
796
|
+
* the selected file on disk, the runner materializes it into the run artifacts
|
|
797
|
+
* directory, and the server validates this JSON metadata against the schema
|
|
798
|
+
* before flowing it into the case input.
|
|
799
|
+
*
|
|
800
|
+
* @example
|
|
801
|
+
* ```ts
|
|
802
|
+
* const schema = z.object({
|
|
803
|
+
* image: manualInputFileValueSchema,
|
|
804
|
+
* note: z.string().optional(),
|
|
805
|
+
* });
|
|
806
|
+
*
|
|
807
|
+
* defineEval({
|
|
808
|
+
* id: 'image-analyzer',
|
|
809
|
+
* manualInput: {
|
|
810
|
+
* schema,
|
|
811
|
+
* fields: { image: { asFile: true, accept: 'image/*' } },
|
|
812
|
+
* },
|
|
813
|
+
* // ...
|
|
814
|
+
* });
|
|
815
|
+
* ```
|
|
816
|
+
*/
|
|
817
|
+
const manualInputFileValueSchema = z.object({
|
|
818
|
+
name: z.string(),
|
|
819
|
+
mimeType: z.string(),
|
|
820
|
+
sizeBytes: z.number().int().nonnegative(),
|
|
821
|
+
sha256: z.string().regex(/^[a-f0-9]{64}$/),
|
|
822
|
+
path: z.string().min(1)
|
|
823
|
+
});
|
|
824
|
+
/**
|
|
825
|
+
* Read a manual-input file artifact from disk and expose common byte, Blob,
|
|
826
|
+
* File, text, and JSON views for eval code.
|
|
827
|
+
*
|
|
828
|
+
* @param value Manual-input file metadata received by an eval.
|
|
829
|
+
* @param options.cwd Directory used to resolve relative paths. Defaults to `process.cwd()`.
|
|
830
|
+
* @returns File bytes plus convenience views for common file-processing flows.
|
|
831
|
+
*/
|
|
832
|
+
async function readManualInputFile(value, options = {}) {
|
|
833
|
+
const absolutePath = resolve(options.cwd ?? process.cwd(), value.path);
|
|
834
|
+
const bytes = new Uint8Array(await readFile(absolutePath));
|
|
835
|
+
const arrayBuffer = bytes.buffer.slice(bytes.byteOffset, bytes.byteOffset + bytes.byteLength);
|
|
836
|
+
const blob = new Blob$1([bytes], { type: value.mimeType });
|
|
837
|
+
return {
|
|
838
|
+
value,
|
|
839
|
+
absolutePath,
|
|
840
|
+
bytes,
|
|
841
|
+
arrayBuffer,
|
|
842
|
+
blob,
|
|
843
|
+
file: new File$1([bytes], value.name, { type: value.mimeType }),
|
|
844
|
+
text: async () => await blob.text(),
|
|
845
|
+
json: async () => {
|
|
846
|
+
return JSON.parse(await blob.text());
|
|
847
|
+
}
|
|
848
|
+
};
|
|
849
|
+
}
|
|
850
|
+
//#endregion
|
|
790
851
|
//#region ../sdk/src/repoFile.ts
|
|
791
852
|
/**
|
|
792
853
|
* Create a file reference that can be emitted via `setEvalOutput(...)` and rendered
|
|
@@ -2688,6 +2749,11 @@ const evalChartConfigSchema = z.object({
|
|
|
2688
2749
|
* the rendered history window.
|
|
2689
2750
|
*/
|
|
2690
2751
|
hideIfNoValue: z.boolean().optional(),
|
|
2752
|
+
/**
|
|
2753
|
+
* Drop consecutive history points whose plotted metrics and tooltip extras
|
|
2754
|
+
* have the same values as the previous kept point.
|
|
2755
|
+
*/
|
|
2756
|
+
dedupeConsecutiveValues: z.boolean().optional(),
|
|
2691
2757
|
type: evalChartTypeSchema,
|
|
2692
2758
|
/** At least one series must be declared. */
|
|
2693
2759
|
metrics: z.array(evalChartMetricSchema).min(1),
|
|
@@ -2713,6 +2779,122 @@ const evalChartConfigSchema = z.object({
|
|
|
2713
2779
|
*/
|
|
2714
2780
|
const evalChartsConfigSchema = z.array(evalChartConfigSchema);
|
|
2715
2781
|
//#endregion
|
|
2782
|
+
//#region ../shared/src/schemas/manualInput.ts
|
|
2783
|
+
/**
|
|
2784
|
+
* Common metadata shared by every manual-input field descriptor exposed to
|
|
2785
|
+
* the web UI. The runner builds these from the eval's authored Zod schema and
|
|
2786
|
+
* any per-field overrides, so the client never needs the schema itself.
|
|
2787
|
+
*/
|
|
2788
|
+
const manualInputFieldBaseSchema = z.object({
|
|
2789
|
+
/** Top-level key on the eval input object that this field writes to. */
|
|
2790
|
+
key: z.string(),
|
|
2791
|
+
/** Human-readable label rendered next to the field in the modal. */
|
|
2792
|
+
label: z.string(),
|
|
2793
|
+
/** Optional helper text rendered under the label. */
|
|
2794
|
+
description: z.string().optional(),
|
|
2795
|
+
/** Optional placeholder rendered inside the input element. */
|
|
2796
|
+
placeholder: z.string().optional(),
|
|
2797
|
+
/** Whether the field must be filled before the run can be submitted. */
|
|
2798
|
+
required: z.boolean(),
|
|
2799
|
+
/**
|
|
2800
|
+
* Default value used to prefill the field. Type matches the underlying
|
|
2801
|
+
* widget kind (`string` for text/multiline/select, `number` for number,
|
|
2802
|
+
* `boolean` for boolean, JSON-serialisable for `json`).
|
|
2803
|
+
*/
|
|
2804
|
+
defaultValue: z.unknown().optional()
|
|
2805
|
+
});
|
|
2806
|
+
/** One option rendered by the `select` widget. */
|
|
2807
|
+
const manualInputSelectOptionSchema = z.object({
|
|
2808
|
+
value: z.string(),
|
|
2809
|
+
label: z.string()
|
|
2810
|
+
});
|
|
2811
|
+
/** Single line text widget descriptor. */
|
|
2812
|
+
const manualInputTextFieldSchema = manualInputFieldBaseSchema.extend({
|
|
2813
|
+
kind: z.literal("text"),
|
|
2814
|
+
/** Optional minimum character length enforced client-side. */
|
|
2815
|
+
minLength: z.number().int().min(0).optional(),
|
|
2816
|
+
/** Optional maximum character length enforced client-side. */
|
|
2817
|
+
maxLength: z.number().int().min(0).optional()
|
|
2818
|
+
});
|
|
2819
|
+
/** Multi-line textarea widget descriptor. */
|
|
2820
|
+
const manualInputMultilineFieldSchema = manualInputFieldBaseSchema.extend({
|
|
2821
|
+
kind: z.literal("multiline"),
|
|
2822
|
+
/** Optional minimum character length enforced client-side. */
|
|
2823
|
+
minLength: z.number().int().min(0).optional(),
|
|
2824
|
+
/** Optional maximum character length enforced client-side. */
|
|
2825
|
+
maxLength: z.number().int().min(0).optional(),
|
|
2826
|
+
/** Suggested number of visible textarea rows; UI may clamp this. */
|
|
2827
|
+
rows: z.number().int().min(1).optional()
|
|
2828
|
+
});
|
|
2829
|
+
/** Numeric input widget descriptor. */
|
|
2830
|
+
const manualInputNumberFieldSchema = manualInputFieldBaseSchema.extend({
|
|
2831
|
+
kind: z.literal("number"),
|
|
2832
|
+
/** Optional inclusive lower bound. */
|
|
2833
|
+
min: z.number().optional(),
|
|
2834
|
+
/** Optional inclusive upper bound. */
|
|
2835
|
+
max: z.number().optional(),
|
|
2836
|
+
/** Optional UI step increment. */
|
|
2837
|
+
step: z.number().positive().optional(),
|
|
2838
|
+
/** Whether the value must be an integer. */
|
|
2839
|
+
integer: z.boolean().optional()
|
|
2840
|
+
});
|
|
2841
|
+
/** Boolean checkbox/toggle widget descriptor. */
|
|
2842
|
+
const manualInputBooleanFieldSchema = manualInputFieldBaseSchema.extend({ kind: z.literal("boolean") });
|
|
2843
|
+
/** Single-select dropdown widget descriptor. */
|
|
2844
|
+
const manualInputSelectFieldSchema = manualInputFieldBaseSchema.extend({
|
|
2845
|
+
kind: z.literal("select"),
|
|
2846
|
+
options: z.array(manualInputSelectOptionSchema)
|
|
2847
|
+
});
|
|
2848
|
+
/** JSON textarea widget descriptor used for nested objects, arrays, and unions. */
|
|
2849
|
+
const manualInputJsonFieldSchema = manualInputFieldBaseSchema.extend({
|
|
2850
|
+
kind: z.literal("json"),
|
|
2851
|
+
/** Suggested number of visible textarea rows; UI may clamp this. */
|
|
2852
|
+
rows: z.number().int().min(1).optional()
|
|
2853
|
+
});
|
|
2854
|
+
/**
|
|
2855
|
+
* File / image upload widget descriptor. The widget supports clicking to
|
|
2856
|
+
* pick a file, drag-and-drop onto the dropzone, and pasting an image from
|
|
2857
|
+
* the system clipboard. The submitted value references a staged file artifact.
|
|
2858
|
+
*/
|
|
2859
|
+
const manualInputFileFieldSchema = manualInputFieldBaseSchema.extend({
|
|
2860
|
+
kind: z.literal("file"),
|
|
2861
|
+
/**
|
|
2862
|
+
* Browser `accept` attribute (e.g. `image/*`, `image/png,image/jpeg`,
|
|
2863
|
+
* `.pdf`). When omitted the picker accepts any file type.
|
|
2864
|
+
*/
|
|
2865
|
+
accept: z.string().optional(),
|
|
2866
|
+
/** Optional client-side maximum file size in bytes. */
|
|
2867
|
+
maxSizeBytes: z.number().int().positive().optional()
|
|
2868
|
+
});
|
|
2869
|
+
/**
|
|
2870
|
+
* Discriminated union of all supported manual-input widget kinds. The web UI
|
|
2871
|
+
* dispatches to the matching field component based on `kind`.
|
|
2872
|
+
*/
|
|
2873
|
+
const manualInputFieldDescriptorSchema = z.discriminatedUnion("kind", [
|
|
2874
|
+
manualInputTextFieldSchema,
|
|
2875
|
+
manualInputMultilineFieldSchema,
|
|
2876
|
+
manualInputNumberFieldSchema,
|
|
2877
|
+
manualInputBooleanFieldSchema,
|
|
2878
|
+
manualInputSelectFieldSchema,
|
|
2879
|
+
manualInputJsonFieldSchema,
|
|
2880
|
+
manualInputFileFieldSchema
|
|
2881
|
+
]);
|
|
2882
|
+
/**
|
|
2883
|
+
* Wire-format descriptor attached to an `EvalSummary` when the eval declares
|
|
2884
|
+
* `manualInput`. Carries the ordered list of fields the modal renders and
|
|
2885
|
+
* basic context shown in the modal header.
|
|
2886
|
+
*/
|
|
2887
|
+
const manualInputDescriptorSchema = z.object({
|
|
2888
|
+
/** Optional title shown in the modal header. Defaults to the eval title. */
|
|
2889
|
+
title: z.string().optional(),
|
|
2890
|
+
/** Optional helper text shown above the form. */
|
|
2891
|
+
description: z.string().optional(),
|
|
2892
|
+
/** Optional submit button label. Defaults to `Run`. */
|
|
2893
|
+
submitLabel: z.string().optional(),
|
|
2894
|
+
/** Ordered list of fields rendered in the modal. */
|
|
2895
|
+
fields: z.array(manualInputFieldDescriptorSchema)
|
|
2896
|
+
});
|
|
2897
|
+
//#endregion
|
|
2716
2898
|
//#region ../shared/src/schemas/eval.ts
|
|
2717
2899
|
/** Freshness signal derived from the latest relevant run plus git state. */
|
|
2718
2900
|
const evalFreshnessStatusSchema = z.enum([
|
|
@@ -2810,7 +2992,13 @@ const evalSummarySchema = z.object({
|
|
|
2810
2992
|
* Ordered per-eval history chart configuration for the EvalCard. Opt-in:
|
|
2811
2993
|
* when omitted or empty, the UI renders no history chart at all.
|
|
2812
2994
|
*/
|
|
2813
|
-
charts: evalChartsConfigSchema.optional()
|
|
2995
|
+
charts: evalChartsConfigSchema.optional(),
|
|
2996
|
+
/**
|
|
2997
|
+
* Manual-input form descriptor when the eval declares `manualInput`. The
|
|
2998
|
+
* web UI renders these fields in a modal before kicking off a run; the
|
|
2999
|
+
* runner consumes the validated values as the case input.
|
|
3000
|
+
*/
|
|
3001
|
+
manualInput: manualInputDescriptorSchema.optional()
|
|
2814
3002
|
});
|
|
2815
3003
|
/** Schema for one case row in an eval run result table. */
|
|
2816
3004
|
const caseRowSchema = z.object({
|
|
@@ -2950,7 +3138,7 @@ const caseDetailSchema = z.object({
|
|
|
2950
3138
|
});
|
|
2951
3139
|
/** Schema for discovery problems that should be shown before running evals. */
|
|
2952
3140
|
const discoveryIssueSchema = z.object({
|
|
2953
|
-
type: z.enum(["duplicate-eval-id"]),
|
|
3141
|
+
type: z.enum(["duplicate-eval-id", "manual-input-with-cases"]),
|
|
2954
3142
|
severity: z.enum(["error"]),
|
|
2955
3143
|
filePath: z.string(),
|
|
2956
3144
|
evalId: z.string(),
|
|
@@ -4128,6 +4316,7 @@ function isCacheHitEntry(entry) {
|
|
|
4128
4316
|
}
|
|
4129
4317
|
z.enum([
|
|
4130
4318
|
"discovery.updated",
|
|
4319
|
+
"config.reload",
|
|
4131
4320
|
"run.started",
|
|
4132
4321
|
"run.summary",
|
|
4133
4322
|
"case.started",
|
|
@@ -4147,6 +4336,19 @@ const sseEnvelopeSchema = z.object({
|
|
|
4147
4336
|
});
|
|
4148
4337
|
//#endregion
|
|
4149
4338
|
//#region ../shared/src/schemas/api.ts
|
|
4339
|
+
/** Lifecycle state for an app config reload triggered by `agent-evals.config.ts`. */
|
|
4340
|
+
const configReloadStatusSchema = z.enum([
|
|
4341
|
+
"idle",
|
|
4342
|
+
"pending",
|
|
4343
|
+
"reloading"
|
|
4344
|
+
]);
|
|
4345
|
+
/** UI/API-visible state for config reloads in `agent-evals app`. */
|
|
4346
|
+
const configReloadStateSchema = z.object({
|
|
4347
|
+
status: configReloadStatusSchema,
|
|
4348
|
+
activeRunCount: z.number().int().min(0),
|
|
4349
|
+
lastChangedAt: z.string().nullable(),
|
|
4350
|
+
lastReloadedAt: z.string().nullable()
|
|
4351
|
+
});
|
|
4150
4352
|
/** Schema for the API request that starts a new eval run. */
|
|
4151
4353
|
const createRunRequestSchema = z.object({
|
|
4152
4354
|
target: z.object({
|
|
@@ -4167,7 +4369,14 @@ const createRunRequestSchema = z.object({
|
|
|
4167
4369
|
* Optional cache controls for the run. When omitted, the cache is used in
|
|
4168
4370
|
* its default read-through / write-on-miss mode.
|
|
4169
4371
|
*/
|
|
4170
|
-
cache: z.object({ mode: cacheModeSchema.default("use") }).optional()
|
|
4372
|
+
cache: z.object({ mode: cacheModeSchema.default("use") }).optional(),
|
|
4373
|
+
/**
|
|
4374
|
+
* Manual-input values keyed by eval `key` (workspace-relative file path
|
|
4375
|
+
* plus authored eval id). Required for any targeted eval that declares
|
|
4376
|
+
* `manualInput` in its definition; the server validates each entry against
|
|
4377
|
+
* the eval's authored Zod schema before starting the run.
|
|
4378
|
+
*/
|
|
4379
|
+
manualInputs: z.record(z.string(), z.unknown()).optional()
|
|
4171
4380
|
});
|
|
4172
4381
|
/** Schema for updating a UI-authored manual score on one persisted case. */
|
|
4173
4382
|
const updateManualScoreRequestSchema = z.object({ value: z.number().min(0).max(1).nullable() });
|
|
@@ -4819,7 +5028,9 @@ async function loadConfig() {
|
|
|
4819
5028
|
const configPath = resolve(process.cwd(), "agent-evals.config.ts");
|
|
4820
5029
|
if (!existsSync(configPath)) return defaultConfig;
|
|
4821
5030
|
try {
|
|
4822
|
-
const
|
|
5031
|
+
const configUrl = pathToFileURL(configPath);
|
|
5032
|
+
configUrl.searchParams.set("v", randomUUID());
|
|
5033
|
+
const imported = await import(configUrl.href);
|
|
4823
5034
|
const configModule = configModuleSchema.parse(imported);
|
|
4824
5035
|
const userConfig = configModule.default ?? configModule.config;
|
|
4825
5036
|
if (!userConfig) return defaultConfig;
|
|
@@ -4997,6 +5208,7 @@ function appendDefaultCharts(params) {
|
|
|
4997
5208
|
if (activeKeys.has("costUsd")) defaults.push({
|
|
4998
5209
|
heading: "LLM Cost",
|
|
4999
5210
|
hideIfNoValue: true,
|
|
5211
|
+
dedupeConsecutiveValues: true,
|
|
5000
5212
|
type: "area",
|
|
5001
5213
|
metrics: [{
|
|
5002
5214
|
source: "column",
|
|
@@ -5006,7 +5218,7 @@ function appendDefaultCharts(params) {
|
|
|
5006
5218
|
color: "warning"
|
|
5007
5219
|
}]
|
|
5008
5220
|
});
|
|
5009
|
-
const
|
|
5221
|
+
const inputTokenMetrics = [
|
|
5010
5222
|
activeKeys.has("inputTokens") ? {
|
|
5011
5223
|
source: "column",
|
|
5012
5224
|
key: "inputTokens",
|
|
@@ -5014,13 +5226,6 @@ function appendDefaultCharts(params) {
|
|
|
5014
5226
|
label: "Input",
|
|
5015
5227
|
color: "accent"
|
|
5016
5228
|
} : null,
|
|
5017
|
-
activeKeys.has("outputTokens") ? {
|
|
5018
|
-
source: "column",
|
|
5019
|
-
key: "outputTokens",
|
|
5020
|
-
aggregate: "avg",
|
|
5021
|
-
label: "Output",
|
|
5022
|
-
color: "success"
|
|
5023
|
-
} : null,
|
|
5024
5229
|
activeKeys.has("cachedInputTokens") ? {
|
|
5025
5230
|
source: "column",
|
|
5026
5231
|
key: "cachedInputTokens",
|
|
@@ -5036,17 +5241,25 @@ function appendDefaultCharts(params) {
|
|
|
5036
5241
|
color: "warning"
|
|
5037
5242
|
} : null
|
|
5038
5243
|
].filter((metric) => metric !== null);
|
|
5039
|
-
if (
|
|
5040
|
-
heading: "LLM Tokens",
|
|
5244
|
+
if (inputTokenMetrics.length > 0) defaults.push({
|
|
5245
|
+
heading: "LLM Input Tokens",
|
|
5041
5246
|
hideIfNoValue: true,
|
|
5247
|
+
dedupeConsecutiveValues: true,
|
|
5042
5248
|
type: "bar",
|
|
5043
|
-
metrics:
|
|
5044
|
-
|
|
5249
|
+
metrics: inputTokenMetrics
|
|
5250
|
+
});
|
|
5251
|
+
if (activeKeys.has("outputTokens")) defaults.push({
|
|
5252
|
+
heading: "LLM Output Tokens",
|
|
5253
|
+
hideIfNoValue: true,
|
|
5254
|
+
dedupeConsecutiveValues: true,
|
|
5255
|
+
type: "bar",
|
|
5256
|
+
metrics: [{
|
|
5045
5257
|
source: "column",
|
|
5046
|
-
key: "
|
|
5258
|
+
key: "outputTokens",
|
|
5047
5259
|
aggregate: "avg",
|
|
5048
|
-
label: "
|
|
5049
|
-
|
|
5260
|
+
label: "Output",
|
|
5261
|
+
color: "success"
|
|
5262
|
+
}]
|
|
5050
5263
|
});
|
|
5051
5264
|
const merged = [...params.charts ?? [], ...defaults];
|
|
5052
5265
|
return merged.length > 0 ? merged : void 0;
|
|
@@ -5316,6 +5529,371 @@ function getRunFreshnessTimestamp(manifest) {
|
|
|
5316
5529
|
return manifest.endedAt ?? manifest.startedAt;
|
|
5317
5530
|
}
|
|
5318
5531
|
//#endregion
|
|
5532
|
+
//#region ../runner/src/manualInput/walker.ts
|
|
5533
|
+
function isObject(value) {
|
|
5534
|
+
return typeof value === "object" && value !== null;
|
|
5535
|
+
}
|
|
5536
|
+
function getZodDef(schema) {
|
|
5537
|
+
if (!isObject(schema)) return null;
|
|
5538
|
+
const zodHolder = schema._zod;
|
|
5539
|
+
if (!isObject(zodHolder)) return null;
|
|
5540
|
+
const def = zodHolder.def;
|
|
5541
|
+
if (!isObject(def)) return null;
|
|
5542
|
+
if (typeof def.type !== "string") return null;
|
|
5543
|
+
return {
|
|
5544
|
+
...def,
|
|
5545
|
+
type: def.type
|
|
5546
|
+
};
|
|
5547
|
+
}
|
|
5548
|
+
function getDescription(schema) {
|
|
5549
|
+
if (!isObject(schema)) return void 0;
|
|
5550
|
+
const description = schema.description;
|
|
5551
|
+
return typeof description === "string" ? description : void 0;
|
|
5552
|
+
}
|
|
5553
|
+
function getInnerSchema(def) {
|
|
5554
|
+
return def.innerType;
|
|
5555
|
+
}
|
|
5556
|
+
function getChecks(def) {
|
|
5557
|
+
const checks = def.checks;
|
|
5558
|
+
if (!Array.isArray(checks)) return [];
|
|
5559
|
+
const out = [];
|
|
5560
|
+
for (const check of checks) {
|
|
5561
|
+
if (!isObject(check)) continue;
|
|
5562
|
+
const zodHolder = check._zod;
|
|
5563
|
+
if (!isObject(zodHolder)) continue;
|
|
5564
|
+
const checkDef = zodHolder.def;
|
|
5565
|
+
if (!isObject(checkDef)) continue;
|
|
5566
|
+
if (typeof checkDef.check !== "string") continue;
|
|
5567
|
+
out.push({
|
|
5568
|
+
...checkDef,
|
|
5569
|
+
check: checkDef.check
|
|
5570
|
+
});
|
|
5571
|
+
}
|
|
5572
|
+
return out;
|
|
5573
|
+
}
|
|
5574
|
+
function findCheck(checks, name) {
|
|
5575
|
+
return checks.find((check) => check.check === name);
|
|
5576
|
+
}
|
|
5577
|
+
function unwrap(schema) {
|
|
5578
|
+
let current = schema;
|
|
5579
|
+
let required = true;
|
|
5580
|
+
let defaultValue = void 0;
|
|
5581
|
+
for (let depth = 0; depth < 8; depth += 1) {
|
|
5582
|
+
const def = getZodDef(current);
|
|
5583
|
+
if (!def) return null;
|
|
5584
|
+
if (def.type === "optional" || def.type === "nullable") {
|
|
5585
|
+
required = false;
|
|
5586
|
+
current = getInnerSchema(def);
|
|
5587
|
+
continue;
|
|
5588
|
+
}
|
|
5589
|
+
if (def.type === "nullish") {
|
|
5590
|
+
required = false;
|
|
5591
|
+
current = getInnerSchema(def);
|
|
5592
|
+
continue;
|
|
5593
|
+
}
|
|
5594
|
+
if (def.type === "default" || def.type === "prefault") {
|
|
5595
|
+
const raw = def.defaultValue;
|
|
5596
|
+
if (typeof raw === "function") defaultValue = Reflect.apply(raw, void 0, []);
|
|
5597
|
+
else defaultValue = raw;
|
|
5598
|
+
current = getInnerSchema(def);
|
|
5599
|
+
continue;
|
|
5600
|
+
}
|
|
5601
|
+
if (def.type === "readonly" || def.type === "pipe") {
|
|
5602
|
+
current = getInnerSchema(def) ?? def.in;
|
|
5603
|
+
continue;
|
|
5604
|
+
}
|
|
5605
|
+
return {
|
|
5606
|
+
schema: current,
|
|
5607
|
+
def,
|
|
5608
|
+
required,
|
|
5609
|
+
defaultValue
|
|
5610
|
+
};
|
|
5611
|
+
}
|
|
5612
|
+
return null;
|
|
5613
|
+
}
|
|
5614
|
+
function humaniseKey(key) {
|
|
5615
|
+
const spaced = key.replace(/([a-z0-9])([A-Z])/g, "$1 $2").replace(/[_-]+/g, " ").trim();
|
|
5616
|
+
if (!spaced) return key;
|
|
5617
|
+
const lowered = spaced.toLowerCase();
|
|
5618
|
+
return lowered.charAt(0).toUpperCase() + lowered.slice(1);
|
|
5619
|
+
}
|
|
5620
|
+
function normaliseSelectOptions(raw) {
|
|
5621
|
+
if (!raw) return void 0;
|
|
5622
|
+
return raw.map((entry) => {
|
|
5623
|
+
if (typeof entry === "string") return {
|
|
5624
|
+
value: entry,
|
|
5625
|
+
label: entry
|
|
5626
|
+
};
|
|
5627
|
+
return {
|
|
5628
|
+
value: entry.value,
|
|
5629
|
+
label: entry.label ?? entry.value
|
|
5630
|
+
};
|
|
5631
|
+
});
|
|
5632
|
+
}
|
|
5633
|
+
function enumOptionsFromEntries(def) {
|
|
5634
|
+
const entries = def.entries;
|
|
5635
|
+
if (!isObject(entries)) return null;
|
|
5636
|
+
const out = [];
|
|
5637
|
+
for (const [label, value] of Object.entries(entries)) if (typeof value === "string") out.push({
|
|
5638
|
+
value,
|
|
5639
|
+
label
|
|
5640
|
+
});
|
|
5641
|
+
else if (typeof value === "number") out.push({
|
|
5642
|
+
value: String(value),
|
|
5643
|
+
label
|
|
5644
|
+
});
|
|
5645
|
+
else return null;
|
|
5646
|
+
return out;
|
|
5647
|
+
}
|
|
5648
|
+
function literalUnionOptions(def) {
|
|
5649
|
+
const options = def.options;
|
|
5650
|
+
if (!Array.isArray(options)) return null;
|
|
5651
|
+
const out = [];
|
|
5652
|
+
for (const option of options) {
|
|
5653
|
+
const optDef = getZodDef(option);
|
|
5654
|
+
if (optDef?.type !== "literal") return null;
|
|
5655
|
+
const values = optDef.values;
|
|
5656
|
+
if (!Array.isArray(values) || values.length !== 1) return null;
|
|
5657
|
+
const value = values[0];
|
|
5658
|
+
if (typeof value === "string") out.push({
|
|
5659
|
+
value,
|
|
5660
|
+
label: value
|
|
5661
|
+
});
|
|
5662
|
+
else if (typeof value === "number") {
|
|
5663
|
+
const stringValue = String(value);
|
|
5664
|
+
out.push({
|
|
5665
|
+
value: stringValue,
|
|
5666
|
+
label: stringValue
|
|
5667
|
+
});
|
|
5668
|
+
} else return null;
|
|
5669
|
+
}
|
|
5670
|
+
return out.length > 0 ? out : null;
|
|
5671
|
+
}
|
|
5672
|
+
function literalSelectOptions(def) {
|
|
5673
|
+
const values = def.values;
|
|
5674
|
+
if (!Array.isArray(values)) return null;
|
|
5675
|
+
const out = [];
|
|
5676
|
+
for (const value of values) if (typeof value === "string") out.push({
|
|
5677
|
+
value,
|
|
5678
|
+
label: value
|
|
5679
|
+
});
|
|
5680
|
+
else if (typeof value === "number") {
|
|
5681
|
+
const stringValue = String(value);
|
|
5682
|
+
out.push({
|
|
5683
|
+
value: stringValue,
|
|
5684
|
+
label: stringValue
|
|
5685
|
+
});
|
|
5686
|
+
} else return null;
|
|
5687
|
+
return out;
|
|
5688
|
+
}
|
|
5689
|
+
function readStringChecks(def) {
|
|
5690
|
+
const checks = getChecks(def);
|
|
5691
|
+
const out = {};
|
|
5692
|
+
const min = findCheck(checks, "min_length");
|
|
5693
|
+
if (min && typeof min.minimum === "number") out.minLength = min.minimum;
|
|
5694
|
+
const max = findCheck(checks, "max_length");
|
|
5695
|
+
if (max && typeof max.maximum === "number") out.maxLength = max.maximum;
|
|
5696
|
+
return out;
|
|
5697
|
+
}
|
|
5698
|
+
const integerNumberFormats = new Set([
|
|
5699
|
+
"int",
|
|
5700
|
+
"safeint",
|
|
5701
|
+
"int32",
|
|
5702
|
+
"uint32",
|
|
5703
|
+
"int64",
|
|
5704
|
+
"uint64"
|
|
5705
|
+
]);
|
|
5706
|
+
function readNumberChecks(def) {
|
|
5707
|
+
const checks = getChecks(def);
|
|
5708
|
+
const out = {};
|
|
5709
|
+
const gt = findCheck(checks, "greater_than");
|
|
5710
|
+
if (gt && typeof gt.value === "number" && gt.inclusive === true) out.min = gt.value;
|
|
5711
|
+
const lt = findCheck(checks, "less_than");
|
|
5712
|
+
if (lt && typeof lt.value === "number" && lt.inclusive === true) out.max = lt.value;
|
|
5713
|
+
const format = findCheck(checks, "number_format");
|
|
5714
|
+
if (format && typeof format.format === "string" && integerNumberFormats.has(format.format)) out.integer = true;
|
|
5715
|
+
return out;
|
|
5716
|
+
}
|
|
5717
|
+
function buildField(key, fieldSchema, override) {
|
|
5718
|
+
const unwrapped = unwrap(fieldSchema);
|
|
5719
|
+
if (!unwrapped) return Result.err(/* @__PURE__ */ new Error(`manualInput: field "${key}" uses an unsupported Zod schema (could not introspect)`));
|
|
5720
|
+
const inner = unwrapped.def;
|
|
5721
|
+
const description = override?.description ?? getDescription(unwrapped.schema);
|
|
5722
|
+
const base = {
|
|
5723
|
+
key,
|
|
5724
|
+
label: override?.label ?? humaniseKey(key),
|
|
5725
|
+
description,
|
|
5726
|
+
placeholder: override?.placeholder,
|
|
5727
|
+
required: unwrapped.required,
|
|
5728
|
+
defaultValue: override?.defaultValue !== void 0 ? override.defaultValue : unwrapped.defaultValue
|
|
5729
|
+
};
|
|
5730
|
+
if (override?.asJson === true) {
|
|
5731
|
+
const rows = override.rows;
|
|
5732
|
+
return Result.ok({
|
|
5733
|
+
...base,
|
|
5734
|
+
kind: "json",
|
|
5735
|
+
rows
|
|
5736
|
+
});
|
|
5737
|
+
}
|
|
5738
|
+
if (override?.asFile === true) return Result.ok({
|
|
5739
|
+
...base,
|
|
5740
|
+
kind: "file",
|
|
5741
|
+
accept: override.accept,
|
|
5742
|
+
maxSizeBytes: override.maxSizeBytes
|
|
5743
|
+
});
|
|
5744
|
+
const overrideOptions = normaliseSelectOptions(override?.options);
|
|
5745
|
+
if (overrideOptions) return Result.ok({
|
|
5746
|
+
...base,
|
|
5747
|
+
kind: "select",
|
|
5748
|
+
options: overrideOptions
|
|
5749
|
+
});
|
|
5750
|
+
switch (inner.type) {
|
|
5751
|
+
case "string": {
|
|
5752
|
+
const checks = readStringChecks(inner);
|
|
5753
|
+
if (override?.multiline === true) return Result.ok({
|
|
5754
|
+
...base,
|
|
5755
|
+
kind: "multiline",
|
|
5756
|
+
rows: override.rows,
|
|
5757
|
+
minLength: checks.minLength,
|
|
5758
|
+
maxLength: checks.maxLength
|
|
5759
|
+
});
|
|
5760
|
+
return Result.ok({
|
|
5761
|
+
...base,
|
|
5762
|
+
kind: "text",
|
|
5763
|
+
minLength: checks.minLength,
|
|
5764
|
+
maxLength: checks.maxLength
|
|
5765
|
+
});
|
|
5766
|
+
}
|
|
5767
|
+
case "number":
|
|
5768
|
+
case "int":
|
|
5769
|
+
case "bigint": {
|
|
5770
|
+
const checks = readNumberChecks(inner);
|
|
5771
|
+
return Result.ok({
|
|
5772
|
+
...base,
|
|
5773
|
+
kind: "number",
|
|
5774
|
+
min: checks.min,
|
|
5775
|
+
max: checks.max,
|
|
5776
|
+
integer: checks.integer
|
|
5777
|
+
});
|
|
5778
|
+
}
|
|
5779
|
+
case "boolean": return Result.ok({
|
|
5780
|
+
...base,
|
|
5781
|
+
kind: "boolean"
|
|
5782
|
+
});
|
|
5783
|
+
case "enum": {
|
|
5784
|
+
const options = enumOptionsFromEntries(inner);
|
|
5785
|
+
if (options) return Result.ok({
|
|
5786
|
+
...base,
|
|
5787
|
+
kind: "select",
|
|
5788
|
+
options
|
|
5789
|
+
});
|
|
5790
|
+
return Result.ok({
|
|
5791
|
+
...base,
|
|
5792
|
+
kind: "json",
|
|
5793
|
+
rows: override?.rows
|
|
5794
|
+
});
|
|
5795
|
+
}
|
|
5796
|
+
case "literal": {
|
|
5797
|
+
const options = literalSelectOptions(inner);
|
|
5798
|
+
if (options && options.length > 0) return Result.ok({
|
|
5799
|
+
...base,
|
|
5800
|
+
kind: "select",
|
|
5801
|
+
options
|
|
5802
|
+
});
|
|
5803
|
+
return Result.ok({
|
|
5804
|
+
...base,
|
|
5805
|
+
kind: "json",
|
|
5806
|
+
rows: override?.rows
|
|
5807
|
+
});
|
|
5808
|
+
}
|
|
5809
|
+
case "union": {
|
|
5810
|
+
const options = literalUnionOptions(inner);
|
|
5811
|
+
if (options) return Result.ok({
|
|
5812
|
+
...base,
|
|
5813
|
+
kind: "select",
|
|
5814
|
+
options
|
|
5815
|
+
});
|
|
5816
|
+
return Result.ok({
|
|
5817
|
+
...base,
|
|
5818
|
+
kind: "json",
|
|
5819
|
+
rows: override?.rows
|
|
5820
|
+
});
|
|
5821
|
+
}
|
|
5822
|
+
default: return Result.ok({
|
|
5823
|
+
...base,
|
|
5824
|
+
kind: "json",
|
|
5825
|
+
rows: override?.rows
|
|
5826
|
+
});
|
|
5827
|
+
}
|
|
5828
|
+
}
|
|
5829
|
+
function getObjectShape(schema) {
|
|
5830
|
+
const def = getZodDef(schema);
|
|
5831
|
+
if (!def) return null;
|
|
5832
|
+
if (def.type !== "object") return null;
|
|
5833
|
+
const shape = def.shape;
|
|
5834
|
+
if (!isObject(shape)) return null;
|
|
5835
|
+
return shape;
|
|
5836
|
+
}
|
|
5837
|
+
/**
|
|
5838
|
+
* Walk an eval's `manualInput` configuration and produce the wire-format
|
|
5839
|
+
* descriptor consumed by the web UI. The schema must resolve to a top-level
|
|
5840
|
+
* `z.object(...)`; nested objects, arrays, unions, and other unsupported
|
|
5841
|
+
* shapes inside fields fall back to the JSON textarea widget.
|
|
5842
|
+
*
|
|
5843
|
+
* Returns a `Result` so the caller (eval discovery) can surface a discovery
|
|
5844
|
+
* issue without throwing when the schema is incompatible.
|
|
5845
|
+
*/
|
|
5846
|
+
function buildManualInputDescriptor(config) {
|
|
5847
|
+
const shape = getObjectShape(config.schema);
|
|
5848
|
+
if (!shape) return Result.err(/* @__PURE__ */ new Error("manualInput.schema must be a top-level z.object(...). Wrap nested types in an object schema."));
|
|
5849
|
+
const overrides = {};
|
|
5850
|
+
const rawOverrides = config.fields;
|
|
5851
|
+
if (rawOverrides) {
|
|
5852
|
+
for (const [key, override] of Object.entries(rawOverrides)) if (override) overrides[key] = override;
|
|
5853
|
+
}
|
|
5854
|
+
const fields = [];
|
|
5855
|
+
for (const [key, fieldSchema] of Object.entries(shape)) {
|
|
5856
|
+
const fieldResult = buildField(key, fieldSchema, overrides[key]);
|
|
5857
|
+
if (fieldResult.error) return fieldResult.errorResult();
|
|
5858
|
+
fields.push(fieldResult.value);
|
|
5859
|
+
}
|
|
5860
|
+
return Result.ok({
|
|
5861
|
+
title: config.title,
|
|
5862
|
+
description: config.description,
|
|
5863
|
+
submitLabel: config.submitLabel,
|
|
5864
|
+
fields
|
|
5865
|
+
});
|
|
5866
|
+
}
|
|
5867
|
+
/**
|
|
5868
|
+
* Resolve an eval's `manualInput` Zod schema against a raw user submission.
|
|
5869
|
+
* Returns the parsed value typed against the eval's `TInput` generic, or a
|
|
5870
|
+
* structured `Error` carrying the Zod issues for the caller to surface.
|
|
5871
|
+
*/
|
|
5872
|
+
function parseManualInputValues(config, raw) {
|
|
5873
|
+
const parsed = config.schema.safeParse(raw);
|
|
5874
|
+
if (!parsed.success) return Result.err(new ManualInputValidationError(parsed.error.issues.map(formatIssue)));
|
|
5875
|
+
return Result.ok(parsed.data);
|
|
5876
|
+
}
|
|
5877
|
+
/**
|
|
5878
|
+
* Error thrown / returned when manual-input values fail validation against
|
|
5879
|
+
* the eval's `manualInput.schema`. Carries the structured Zod issues so the
|
|
5880
|
+
* CLI and HTTP layers can surface them per-field.
|
|
5881
|
+
*/
|
|
5882
|
+
var ManualInputValidationError = class extends Error {
|
|
5883
|
+
issues;
|
|
5884
|
+
constructor(issues) {
|
|
5885
|
+
super(issues.length === 0 ? "manualInput validation failed" : `manualInput validation failed: ${issues.map((issue) => issue.path ? `${issue.path}: ${issue.message}` : issue.message).join("; ")}`);
|
|
5886
|
+
this.name = "ManualInputValidationError";
|
|
5887
|
+
this.issues = issues;
|
|
5888
|
+
}
|
|
5889
|
+
};
|
|
5890
|
+
function formatIssue(issue) {
|
|
5891
|
+
return {
|
|
5892
|
+
path: issue.path.map((segment) => typeof segment === "string" || typeof segment === "number" ? String(segment) : "").filter((segment) => segment !== "").join("."),
|
|
5893
|
+
message: issue.message
|
|
5894
|
+
};
|
|
5895
|
+
}
|
|
5896
|
+
//#endregion
|
|
5319
5897
|
//#region ../runner/src/outputArtifacts.ts
|
|
5320
5898
|
const mimeTypeExtensionMap = {
|
|
5321
5899
|
"application/json": ".json",
|
|
@@ -6347,8 +6925,24 @@ async function executeRun({ runState, request, runDir, config, cacheStore, lastR
|
|
|
6347
6925
|
await runWithModuleIsolation(moduleIsolation, async () => {
|
|
6348
6926
|
await runInEvalRuntimeScope("cases", async () => {
|
|
6349
6927
|
await entry.use(async (evalDef) => {
|
|
6350
|
-
|
|
6351
|
-
|
|
6928
|
+
if (evalDef.manualInput && evalDef.cases !== void 0) throw new Error(`Eval "${evalMeta.id}" cannot declare both "cases" and "manualInput". Remove one of them.`);
|
|
6929
|
+
let manualInputCase = null;
|
|
6930
|
+
if (evalDef.manualInput) {
|
|
6931
|
+
const rawValue = request.manualInputs?.[evalMeta.key];
|
|
6932
|
+
if (rawValue === void 0) throw new Error(`Eval "${evalMeta.id}" requires manual input. Provide it via the run modal in the web UI or "--input" / "--input-file" on the CLI.`);
|
|
6933
|
+
const parsed = parseManualInputValues(evalDef.manualInput, rawValue);
|
|
6934
|
+
if (parsed.error) {
|
|
6935
|
+
const formatted = parsed.error.issues.map((issue) => issue.path ? `${issue.path}: ${issue.message}` : issue.message).join("; ");
|
|
6936
|
+
throw new Error(`Invalid manual input for eval "${evalMeta.id}": ${formatted}`);
|
|
6937
|
+
}
|
|
6938
|
+
manualInputCase = {
|
|
6939
|
+
id: `${evalMeta.id}-manual`,
|
|
6940
|
+
input: parsed.value
|
|
6941
|
+
};
|
|
6942
|
+
}
|
|
6943
|
+
const evalCases = manualInputCase ? [manualInputCase] : await runWithEvalClock(evalDef.startTime, async () => typeof evalDef.cases === "function" ? await evalDef.cases() : evalDef.cases ?? [], { freezeTime: evalDef.freezeTime });
|
|
6944
|
+
const runnableCases = manualInputCase ? evalCases : resolveRunnableEvalCases({
|
|
6945
|
+
cases: evalCases,
|
|
6352
6946
|
evalId: evalMeta.id
|
|
6353
6947
|
});
|
|
6354
6948
|
const duplicateCaseIds = findDuplicateCaseIds(runnableCases);
|
|
@@ -6567,4 +7161,4 @@ function toLastRunStatus(status) {
|
|
|
6567
7161
|
return status === "pending" ? null : status;
|
|
6568
7162
|
}
|
|
6569
7163
|
//#endregion
|
|
6570
|
-
export {
|
|
7164
|
+
export { defaultConfigKeySchema as $, incrementEvalOutput as $n, cacheEntryWithDebugKeySchema as $t, createRunRequestSchema as A, buildTraceTree as An, runLogPhaseSchema as At, getEvalDisplayStatus as B, repoFile as Bn, manualInputTextFieldSchema as Bt, loadConfig as C, columnKindSchema as Cn, evalStatAggregateSchema as Ct, createFsCacheStore as D, repoFileRefSchema as Dn, runLogEntrySchema as Dt, validateCharts as E, numberDisplayOptionsSchema as En, evalSummarySchema as Et, extractApiCalls as F, hashCacheKeySync as Fn, manualInputJsonFieldSchema as Ft, runSummarySchema as G, advanceEvalTime as Gn, evalChartConfigSchema as Gt, deriveStatusFromCaseRows as H, readManualInputFile as Hn, evalChartAxisSchema as Ht, extractLlmCalls as I, deserializeCacheRecording as In, manualInputMultilineFieldSchema as It, agentEvalsConfigSchema as J, evalAssert as Jn, evalChartTypeSchema as Jt, DEFAULT_API_CALLS_CONFIG as K, appendToEvalOutput as Kn, evalChartMetricSchema as Kt, applyDerivedCallAttributes as L, deserializeCacheValue as Ln, manualInputNumberFieldSchema as Lt, sseEnvelopeSchema as M, evalSpan as Mn, manualInputBooleanFieldSchema as Mt, extractCacheEntries as N, evalTracer as Nn, manualInputDescriptorSchema as Nt, configReloadStateSchema as O, runArtifactRefSchema as On, runLogLevelSchema as Ot, extractCacheHits as P, hashCacheKey as Pn, manualInputFieldDescriptorSchema as Pt, apiCallsConfigSchema as Q, getEvalStartTime as Qn, cacheEntrySchema as Qt, getNestedAttribute as R, serializeCacheRecording as Rn, manualInputSelectFieldSchema as Rt, resolveEvalDefaultConfig as S, columnFormatSchema as Sn, evalFreshnessStatusSchema as St, normalizeScoreDef as T, jsonCellSchema as Tn, evalStatsConfigSchema as Tt, deriveStatusFromChildStatuses as U, evalExpect as Un, evalChartBuiltinMetricSchema as Ut, deriveScopedSummaryFromCases as V, manualInputFileValueSchema as Vn, evalChartAggregateSchema as Vt, runManifestSchema as W, EvalAssertionError as Wn, evalChartColorSchema as Wt, apiCallMetricPlacementSchema as X, getCurrentScope as Xn, cacheDebugKeyEntrySchema as Xt, apiCallMetricFormatSchema as Y, evalLog as Yn, evalChartsConfigSchema as Yt, apiCallMetricSchema as Z, getEvalCaseInput as Zn, cacheDebugKeyFileSchema as Zt, buildManualInputDescriptor as _, traceSpanKindSchema as _n, getCaseRowEvalKey as _t, getLastRunStatuses as a, cacheRecordingSchema as an, runInExistingEvalScope as ar, llmCallMetricSchema as at, loadEvalModule as b, cellValueSchema as bn, caseRowSchema as bt, loadPersistedRunSnapshots as c, spanCacheOptionsSchema as cn, startEvalBackgroundJob as cr, llmCallsConfigSchema as ct, persistRunState as d, traceAttributeDisplayInputSchema as dn, resolveLlmCallsConfig as dt, cacheFileSchema as en, isInEvalScope as er, evalColumnOverrideSchema as et, recomputeEvalStatusesInRuns as f, traceAttributeDisplayPlacementSchema as fn, runLogsConfigSchema as ft, resolveArtifactPath as g, traceSpanErrorSchema as gn, getCaseRowCaseKey as gt, resolveTracePresentation as h, traceDisplayInputConfigSchema as hn, buildEvalKey as ht, generateRunId as i, cacheRecordingOpSchema as in, runInEvalScope as ir, llmCallMetricPlacementSchema as it, updateManualScoreRequestSchema as j, captureEvalSpanError as jn, scoreTraceSchema as jt, configReloadStatusSchema as k, z$1 as kn, runLogLocationSchema as kt, nextShortIdFromSnapshots as l, traceCacheRefSchema as ln, defineEval as lr, removeDefaultConfigSchema as lt, runTouchesEval as m, traceDisplayConfigSchema as mn, buildCaseKey as mt, getTargetEvalKeys as n, cacheModeSchema as nn, nextEvalId as nr, evalDeriveConfigSchema as nt, getLatestRunInfos as o, cacheStatusSchema as on, setEvalOutput as or, llmCallPricingRateSchema as ot, recomputePersistedCaseStatus as p, traceAttributeDisplaySchema as pn, trialSelectionModeSchema as pt, DEFAULT_LLM_CALLS_CONFIG as q, configureEvalRunLogs as qn, evalChartTooltipExtraSchema as qt, getTargetEvals as r, cacheOperationTypeSchema as rn, runInEvalRuntimeScope as rr, llmCallMetricFormatSchema as rt, loadPersistedRunSnapshot as s, serializedCacheSpanSchema as sn, setScopeCacheContext as sr, llmCallPricingSchema as st, executeRun as t, cacheListItemSchema as tn, mergeEvalOutput as tr, evalColumnsSchema as tt, persistCaseDetail as u, traceAttributeDisplayFormatSchema as un, getEvalRegistry as ur, resolveApiCallsConfig as ut, parseManualInputValues as v, traceSpanSchema as vn, assertionFailureSchema as vt, buildDeclaredColumnDefs as w, fileRefSchema as wn, evalStatItemSchema as wt, parseEvalDiscovery as x, columnDefSchema as xn, discoveryIssueSchema as xt, deriveEvalFreshness as y, traceSpanWarningSchema as yn, caseDetailSchema as yt, getEvalTitle as z, serializeCacheValue as zn, manualInputSelectOptionSchema as zt };
|