@ls-stack/agent-eval 0.32.0 → 0.34.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-Dc6vvHRL.mjs → app-DOE-crd0.mjs} +4 -4
- package/dist/apps/web/dist/assets/index-BJpxc61J.css +1 -0
- package/dist/apps/web/dist/assets/index-ol64metU.js +140 -0
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/{cli-huuJbDNb.mjs → cli-CG66f0GN.mjs} +3 -3
- package/dist/index.d.mts +62 -62
- package/dist/index.mjs +3 -3
- package/dist/runChild.mjs +1 -1
- package/dist/{runOrchestration-ZpN7xty_.mjs → runOrchestration-CYhn4DJ0.mjs} +19 -55
- package/dist/{runner-BPXPvinB.mjs → runner-D5i7HLcE.mjs} +1 -1
- package/dist/{runner-Dkol2ukD.mjs → runner-j9b6g0h0.mjs} +2 -2
- package/dist/src-B0b1vgk0.mjs +3 -0
- package/package.json +3 -3
- package/skills/agent-eval/SKILL.md +7 -4
- package/dist/apps/web/dist/assets/index-BNQnbfi0.js +0 -118
- package/dist/apps/web/dist/assets/index-BPMMRktE.css +0 -1
- package/dist/src-1Qvuh0NH.mjs +0 -3
|
@@ -25,8 +25,8 @@
|
|
|
25
25
|
href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
|
|
26
26
|
rel="stylesheet"
|
|
27
27
|
/>
|
|
28
|
-
<script type="module" crossorigin src="/assets/index-
|
|
29
|
-
<link rel="stylesheet" crossorigin href="/assets/index-
|
|
28
|
+
<script type="module" crossorigin src="/assets/index-ol64metU.js"></script>
|
|
29
|
+
<link rel="stylesheet" crossorigin href="/assets/index-BJpxc61J.css">
|
|
30
30
|
</head>
|
|
31
31
|
<body>
|
|
32
32
|
<div id="root"></div>
|
package/dist/bin.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { C as loadConfig, D as createFsCacheStore, E as validateCharts, H as getEvalDisplayStatus, S as resolveEvalDefaultConfig, T as normalizeScoreDef, U as deriveScopedSummaryFromCases, V as getEvalTitle, _ as buildManualInputDescriptor, a as getLastRunStatuses, b as loadEvalModule, bt as getCaseRowEvalKey, c as loadPersistedRunSnapshots, d as persistRunState, f as recomputeEvalStatusesInRuns, g as resolveArtifactPath, h as resolveTracePresentation, i as generateRunId, l as nextShortIdFromSnapshots, m as runTouchesEval, mt as resolveLlmCallsConfig, n as getTargetEvalKeys, o as getLatestRunInfos, p as recomputePersistedCaseStatus, pr as getEvalRegistry, pt as resolveApiCallsConfig, q as runSummarySchema, s as loadPersistedRunSnapshot, u as persistCaseDetail, v as parseManualInputValues, vt as buildEvalKey, w as buildDeclaredColumnDefs, x as parseEvalDiscovery, y as deriveEvalFreshness, yt as getCaseRowCaseKey, z as applyDerivedCallAttributes } from "./runOrchestration-
|
|
1
|
+
import { C as loadConfig, D as createFsCacheStore, E as validateCharts, H as getEvalDisplayStatus, S as resolveEvalDefaultConfig, T as normalizeScoreDef, U as deriveScopedSummaryFromCases, V as getEvalTitle, _ as buildManualInputDescriptor, a as getLastRunStatuses, b as loadEvalModule, bt as getCaseRowEvalKey, c as loadPersistedRunSnapshots, d as persistRunState, f as recomputeEvalStatusesInRuns, g as resolveArtifactPath, h as resolveTracePresentation, i as generateRunId, l as nextShortIdFromSnapshots, m as runTouchesEval, mt as resolveLlmCallsConfig, n as getTargetEvalKeys, o as getLatestRunInfos, p as recomputePersistedCaseStatus, pr as getEvalRegistry, pt as resolveApiCallsConfig, q as runSummarySchema, s as loadPersistedRunSnapshot, u as persistCaseDetail, v as parseManualInputValues, vt as buildEvalKey, w as buildDeclaredColumnDefs, x as parseEvalDiscovery, y as deriveEvalFreshness, yt as getCaseRowCaseKey, z as applyDerivedCallAttributes } from "./runOrchestration-CYhn4DJ0.mjs";
|
|
2
2
|
import { createHash, randomUUID } from "node:crypto";
|
|
3
3
|
import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
|
|
4
4
|
import { basename, dirname, extname, isAbsolute, join, relative, resolve, sep } from "node:path";
|
|
@@ -1940,8 +1940,8 @@ async function commandApp(args) {
|
|
|
1940
1940
|
const { serve } = await import("@hono/node-server");
|
|
1941
1941
|
const bundledWebDist = resolve(currentDir, "apps/web/dist");
|
|
1942
1942
|
if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
|
|
1943
|
-
const appModule = await import("./app-
|
|
1944
|
-
const runnerModule = await import("./runner-
|
|
1943
|
+
const appModule = await import("./app-DOE-crd0.mjs");
|
|
1944
|
+
const runnerModule = await import("./runner-D5i7HLcE.mjs");
|
|
1945
1945
|
if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
|
|
1946
1946
|
if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
|
|
1947
1947
|
await runnerModule.initRunner();
|
package/dist/index.d.mts
CHANGED
|
@@ -3024,8 +3024,8 @@ declare const traceAttributeDisplaySchema: z$1.ZodObject<{
|
|
|
3024
3024
|
}>>;
|
|
3025
3025
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3026
3026
|
all: "all";
|
|
3027
|
-
last: "last";
|
|
3028
3027
|
sum: "sum";
|
|
3028
|
+
last: "last";
|
|
3029
3029
|
}>>;
|
|
3030
3030
|
}, z$1.core.$strip>;
|
|
3031
3031
|
/**
|
|
@@ -3060,8 +3060,8 @@ declare const traceDisplayConfigSchema: z$1.ZodObject<{
|
|
|
3060
3060
|
}>>;
|
|
3061
3061
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3062
3062
|
all: "all";
|
|
3063
|
-
last: "last";
|
|
3064
3063
|
sum: "sum";
|
|
3064
|
+
last: "last";
|
|
3065
3065
|
}>>;
|
|
3066
3066
|
}, z$1.core.$strip>>>;
|
|
3067
3067
|
}, z$1.core.$strip>;
|
|
@@ -3100,8 +3100,8 @@ declare const traceAttributeDisplayInputSchema: z$1.ZodObject<{
|
|
|
3100
3100
|
}>>;
|
|
3101
3101
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3102
3102
|
all: "all";
|
|
3103
|
-
last: "last";
|
|
3104
3103
|
sum: "sum";
|
|
3104
|
+
last: "last";
|
|
3105
3105
|
}>>;
|
|
3106
3106
|
transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
|
|
3107
3107
|
}, z$1.core.$strip>;
|
|
@@ -3138,8 +3138,8 @@ declare const traceDisplayInputConfigSchema: z$1.ZodObject<{
|
|
|
3138
3138
|
}>>;
|
|
3139
3139
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3140
3140
|
all: "all";
|
|
3141
|
-
last: "last";
|
|
3142
3141
|
sum: "sum";
|
|
3142
|
+
last: "last";
|
|
3143
3143
|
}>>;
|
|
3144
3144
|
transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
|
|
3145
3145
|
}, z$1.core.$strip>>>;
|
|
@@ -3176,8 +3176,8 @@ declare const traceSpanSchema: z$1.ZodObject<{
|
|
|
3176
3176
|
status: z$1.ZodEnum<{
|
|
3177
3177
|
error: "error";
|
|
3178
3178
|
running: "running";
|
|
3179
|
-
ok: "ok";
|
|
3180
3179
|
cancelled: "cancelled";
|
|
3180
|
+
ok: "ok";
|
|
3181
3181
|
}>;
|
|
3182
3182
|
attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
|
|
3183
3183
|
error: z$1.ZodOptional<z$1.ZodObject<{
|
|
@@ -3218,11 +3218,11 @@ declare const evalFreshnessStatusSchema: z$1.ZodEnum<{
|
|
|
3218
3218
|
type EvalFreshnessStatus = z$1.infer<typeof evalFreshnessStatusSchema>;
|
|
3219
3219
|
/** Reducer used to collapse a column's per-case values into a single stat. */
|
|
3220
3220
|
declare const evalStatAggregateSchema: z$1.ZodEnum<{
|
|
3221
|
-
last: "last";
|
|
3222
|
-
sum: "sum";
|
|
3223
3221
|
avg: "avg";
|
|
3222
|
+
sum: "sum";
|
|
3224
3223
|
min: "min";
|
|
3225
3224
|
max: "max";
|
|
3225
|
+
last: "last";
|
|
3226
3226
|
}>;
|
|
3227
3227
|
/** Reducer used to collapse a column's per-case values into a single stat. */
|
|
3228
3228
|
type EvalStatAggregate = z$1.infer<typeof evalStatAggregateSchema>;
|
|
@@ -3246,11 +3246,11 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
3246
3246
|
key: z$1.ZodString;
|
|
3247
3247
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3248
3248
|
aggregate: z$1.ZodEnum<{
|
|
3249
|
-
last: "last";
|
|
3250
|
-
sum: "sum";
|
|
3251
3249
|
avg: "avg";
|
|
3250
|
+
sum: "sum";
|
|
3252
3251
|
min: "min";
|
|
3253
3252
|
max: "max";
|
|
3253
|
+
last: "last";
|
|
3254
3254
|
}>;
|
|
3255
3255
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3256
3256
|
number: "number";
|
|
@@ -3288,11 +3288,11 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
|
|
|
3288
3288
|
key: z$1.ZodString;
|
|
3289
3289
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3290
3290
|
aggregate: z$1.ZodEnum<{
|
|
3291
|
-
last: "last";
|
|
3292
|
-
sum: "sum";
|
|
3293
3291
|
avg: "avg";
|
|
3292
|
+
sum: "sum";
|
|
3294
3293
|
min: "min";
|
|
3295
3294
|
max: "max";
|
|
3295
|
+
last: "last";
|
|
3296
3296
|
}>;
|
|
3297
3297
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3298
3298
|
number: "number";
|
|
@@ -3389,11 +3389,11 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
3389
3389
|
key: z$1.ZodString;
|
|
3390
3390
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3391
3391
|
aggregate: z$1.ZodEnum<{
|
|
3392
|
-
last: "last";
|
|
3393
|
-
sum: "sum";
|
|
3394
3392
|
avg: "avg";
|
|
3393
|
+
sum: "sum";
|
|
3395
3394
|
min: "min";
|
|
3396
3395
|
max: "max";
|
|
3396
|
+
last: "last";
|
|
3397
3397
|
}>;
|
|
3398
3398
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3399
3399
|
number: "number";
|
|
@@ -3431,9 +3431,9 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
3431
3431
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3432
3432
|
success: "success";
|
|
3433
3433
|
error: "error";
|
|
3434
|
-
warning: "warning";
|
|
3435
3434
|
accent: "accent";
|
|
3436
3435
|
accentDim: "accentDim";
|
|
3436
|
+
warning: "warning";
|
|
3437
3437
|
textMuted: "textMuted";
|
|
3438
3438
|
}>>;
|
|
3439
3439
|
axis: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -3444,8 +3444,8 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
3444
3444
|
source: z$1.ZodLiteral<"column">;
|
|
3445
3445
|
key: z$1.ZodString;
|
|
3446
3446
|
aggregate: z$1.ZodEnum<{
|
|
3447
|
-
sum: "sum";
|
|
3448
3447
|
avg: "avg";
|
|
3448
|
+
sum: "sum";
|
|
3449
3449
|
min: "min";
|
|
3450
3450
|
max: "max";
|
|
3451
3451
|
latest: "latest";
|
|
@@ -3455,9 +3455,9 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
3455
3455
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3456
3456
|
success: "success";
|
|
3457
3457
|
error: "error";
|
|
3458
|
-
warning: "warning";
|
|
3459
3458
|
accent: "accent";
|
|
3460
3459
|
accentDim: "accentDim";
|
|
3460
|
+
warning: "warning";
|
|
3461
3461
|
textMuted: "textMuted";
|
|
3462
3462
|
}>>;
|
|
3463
3463
|
axis: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -3486,8 +3486,8 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
3486
3486
|
source: z$1.ZodLiteral<"column">;
|
|
3487
3487
|
key: z$1.ZodString;
|
|
3488
3488
|
aggregate: z$1.ZodEnum<{
|
|
3489
|
-
sum: "sum";
|
|
3490
3489
|
avg: "avg";
|
|
3490
|
+
sum: "sum";
|
|
3491
3491
|
min: "min";
|
|
3492
3492
|
max: "max";
|
|
3493
3493
|
latest: "latest";
|
|
@@ -3585,11 +3585,11 @@ declare const caseRowSchema: z$1.ZodObject<{
|
|
|
3585
3585
|
evalId: z$1.ZodString;
|
|
3586
3586
|
status: z$1.ZodEnum<{
|
|
3587
3587
|
error: "error";
|
|
3588
|
+
pending: "pending";
|
|
3588
3589
|
running: "running";
|
|
3589
3590
|
cancelled: "cancelled";
|
|
3590
3591
|
pass: "pass";
|
|
3591
3592
|
fail: "fail";
|
|
3592
|
-
pending: "pending";
|
|
3593
3593
|
}>;
|
|
3594
3594
|
durationMs: z$1.ZodNullable<z$1.ZodNumber>;
|
|
3595
3595
|
costUsd: z$1.ZodOptional<z$1.ZodNullable<z$1.ZodNumber>>;
|
|
@@ -3681,8 +3681,8 @@ declare const scoreTraceSchema: z$1.ZodObject<{
|
|
|
3681
3681
|
status: z$1.ZodEnum<{
|
|
3682
3682
|
error: "error";
|
|
3683
3683
|
running: "running";
|
|
3684
|
-
ok: "ok";
|
|
3685
3684
|
cancelled: "cancelled";
|
|
3685
|
+
ok: "ok";
|
|
3686
3686
|
}>;
|
|
3687
3687
|
attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
|
|
3688
3688
|
error: z$1.ZodOptional<z$1.ZodObject<{
|
|
@@ -3733,8 +3733,8 @@ declare const scoreTraceSchema: z$1.ZodObject<{
|
|
|
3733
3733
|
}>>;
|
|
3734
3734
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3735
3735
|
all: "all";
|
|
3736
|
-
last: "last";
|
|
3737
3736
|
sum: "sum";
|
|
3737
|
+
last: "last";
|
|
3738
3738
|
}>>;
|
|
3739
3739
|
}, z$1.core.$strip>>>;
|
|
3740
3740
|
}, z$1.core.$strip>;
|
|
@@ -3749,11 +3749,11 @@ declare const caseDetailSchema: z$1.ZodObject<{
|
|
|
3749
3749
|
evalId: z$1.ZodString;
|
|
3750
3750
|
status: z$1.ZodEnum<{
|
|
3751
3751
|
error: "error";
|
|
3752
|
+
pending: "pending";
|
|
3752
3753
|
running: "running";
|
|
3753
3754
|
cancelled: "cancelled";
|
|
3754
3755
|
pass: "pass";
|
|
3755
3756
|
fail: "fail";
|
|
3756
|
-
pending: "pending";
|
|
3757
3757
|
}>;
|
|
3758
3758
|
input: z$1.ZodUnknown;
|
|
3759
3759
|
trace: z$1.ZodArray<z$1.ZodObject<{
|
|
@@ -3767,8 +3767,8 @@ declare const caseDetailSchema: z$1.ZodObject<{
|
|
|
3767
3767
|
status: z$1.ZodEnum<{
|
|
3768
3768
|
error: "error";
|
|
3769
3769
|
running: "running";
|
|
3770
|
-
ok: "ok";
|
|
3771
3770
|
cancelled: "cancelled";
|
|
3771
|
+
ok: "ok";
|
|
3772
3772
|
}>;
|
|
3773
3773
|
attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
|
|
3774
3774
|
error: z$1.ZodOptional<z$1.ZodObject<{
|
|
@@ -3819,8 +3819,8 @@ declare const caseDetailSchema: z$1.ZodObject<{
|
|
|
3819
3819
|
}>>;
|
|
3820
3820
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3821
3821
|
all: "all";
|
|
3822
|
-
last: "last";
|
|
3823
3822
|
sum: "sum";
|
|
3823
|
+
last: "last";
|
|
3824
3824
|
}>>;
|
|
3825
3825
|
}, z$1.core.$strip>>>;
|
|
3826
3826
|
}, z$1.core.$strip>;
|
|
@@ -3836,8 +3836,8 @@ declare const caseDetailSchema: z$1.ZodObject<{
|
|
|
3836
3836
|
status: z$1.ZodEnum<{
|
|
3837
3837
|
error: "error";
|
|
3838
3838
|
running: "running";
|
|
3839
|
-
ok: "ok";
|
|
3840
3839
|
cancelled: "cancelled";
|
|
3840
|
+
ok: "ok";
|
|
3841
3841
|
}>;
|
|
3842
3842
|
attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
|
|
3843
3843
|
error: z$1.ZodOptional<z$1.ZodObject<{
|
|
@@ -3888,8 +3888,8 @@ declare const caseDetailSchema: z$1.ZodObject<{
|
|
|
3888
3888
|
}>>;
|
|
3889
3889
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3890
3890
|
all: "all";
|
|
3891
|
-
last: "last";
|
|
3892
3891
|
sum: "sum";
|
|
3892
|
+
last: "last";
|
|
3893
3893
|
}>>;
|
|
3894
3894
|
}, z$1.core.$strip>>>;
|
|
3895
3895
|
}, z$1.core.$strip>;
|
|
@@ -3949,10 +3949,10 @@ declare const caseDetailSchema: z$1.ZodObject<{
|
|
|
3949
3949
|
namespace: z$1.ZodString;
|
|
3950
3950
|
key: z$1.ZodString;
|
|
3951
3951
|
status: z$1.ZodEnum<{
|
|
3952
|
+
bypass: "bypass";
|
|
3953
|
+
refresh: "refresh";
|
|
3952
3954
|
hit: "hit";
|
|
3953
3955
|
miss: "miss";
|
|
3954
|
-
refresh: "refresh";
|
|
3955
|
-
bypass: "bypass";
|
|
3956
3956
|
}>;
|
|
3957
3957
|
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
3958
3958
|
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -4023,8 +4023,8 @@ declare const evalChartBuiltinMetricSchema: z$1.ZodEnum<{
|
|
|
4023
4023
|
type EvalChartBuiltinMetric = z$1.infer<typeof evalChartBuiltinMetricSchema>;
|
|
4024
4024
|
/** Reducer applied to a numeric column across all cases of a single run. */
|
|
4025
4025
|
declare const evalChartAggregateSchema: z$1.ZodEnum<{
|
|
4026
|
-
sum: "sum";
|
|
4027
4026
|
avg: "avg";
|
|
4027
|
+
sum: "sum";
|
|
4028
4028
|
min: "min";
|
|
4029
4029
|
max: "max";
|
|
4030
4030
|
latest: "latest";
|
|
@@ -4039,9 +4039,9 @@ type EvalChartAggregate = z$1.infer<typeof evalChartAggregateSchema>;
|
|
|
4039
4039
|
declare const evalChartColorSchema: z$1.ZodEnum<{
|
|
4040
4040
|
success: "success";
|
|
4041
4041
|
error: "error";
|
|
4042
|
-
warning: "warning";
|
|
4043
4042
|
accent: "accent";
|
|
4044
4043
|
accentDim: "accentDim";
|
|
4044
|
+
warning: "warning";
|
|
4045
4045
|
textMuted: "textMuted";
|
|
4046
4046
|
}>;
|
|
4047
4047
|
/** Semantic color token resolved to a theme color by the web UI. */
|
|
@@ -4068,9 +4068,9 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
4068
4068
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4069
4069
|
success: "success";
|
|
4070
4070
|
error: "error";
|
|
4071
|
-
warning: "warning";
|
|
4072
4071
|
accent: "accent";
|
|
4073
4072
|
accentDim: "accentDim";
|
|
4073
|
+
warning: "warning";
|
|
4074
4074
|
textMuted: "textMuted";
|
|
4075
4075
|
}>>;
|
|
4076
4076
|
axis: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -4081,8 +4081,8 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
4081
4081
|
source: z$1.ZodLiteral<"column">;
|
|
4082
4082
|
key: z$1.ZodString;
|
|
4083
4083
|
aggregate: z$1.ZodEnum<{
|
|
4084
|
-
sum: "sum";
|
|
4085
4084
|
avg: "avg";
|
|
4085
|
+
sum: "sum";
|
|
4086
4086
|
min: "min";
|
|
4087
4087
|
max: "max";
|
|
4088
4088
|
latest: "latest";
|
|
@@ -4092,9 +4092,9 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
4092
4092
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4093
4093
|
success: "success";
|
|
4094
4094
|
error: "error";
|
|
4095
|
-
warning: "warning";
|
|
4096
4095
|
accent: "accent";
|
|
4097
4096
|
accentDim: "accentDim";
|
|
4097
|
+
warning: "warning";
|
|
4098
4098
|
textMuted: "textMuted";
|
|
4099
4099
|
}>>;
|
|
4100
4100
|
axis: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -4116,8 +4116,8 @@ declare const evalChartTooltipExtraSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObj
|
|
|
4116
4116
|
source: z$1.ZodLiteral<"column">;
|
|
4117
4117
|
key: z$1.ZodString;
|
|
4118
4118
|
aggregate: z$1.ZodEnum<{
|
|
4119
|
-
sum: "sum";
|
|
4120
4119
|
avg: "avg";
|
|
4120
|
+
sum: "sum";
|
|
4121
4121
|
min: "min";
|
|
4122
4122
|
max: "max";
|
|
4123
4123
|
latest: "latest";
|
|
@@ -4151,9 +4151,9 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
4151
4151
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4152
4152
|
success: "success";
|
|
4153
4153
|
error: "error";
|
|
4154
|
-
warning: "warning";
|
|
4155
4154
|
accent: "accent";
|
|
4156
4155
|
accentDim: "accentDim";
|
|
4156
|
+
warning: "warning";
|
|
4157
4157
|
textMuted: "textMuted";
|
|
4158
4158
|
}>>;
|
|
4159
4159
|
axis: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -4164,8 +4164,8 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
4164
4164
|
source: z$1.ZodLiteral<"column">;
|
|
4165
4165
|
key: z$1.ZodString;
|
|
4166
4166
|
aggregate: z$1.ZodEnum<{
|
|
4167
|
-
sum: "sum";
|
|
4168
4167
|
avg: "avg";
|
|
4168
|
+
sum: "sum";
|
|
4169
4169
|
min: "min";
|
|
4170
4170
|
max: "max";
|
|
4171
4171
|
latest: "latest";
|
|
@@ -4175,9 +4175,9 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
4175
4175
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4176
4176
|
success: "success";
|
|
4177
4177
|
error: "error";
|
|
4178
|
-
warning: "warning";
|
|
4179
4178
|
accent: "accent";
|
|
4180
4179
|
accentDim: "accentDim";
|
|
4180
|
+
warning: "warning";
|
|
4181
4181
|
textMuted: "textMuted";
|
|
4182
4182
|
}>>;
|
|
4183
4183
|
axis: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -4206,8 +4206,8 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
4206
4206
|
source: z$1.ZodLiteral<"column">;
|
|
4207
4207
|
key: z$1.ZodString;
|
|
4208
4208
|
aggregate: z$1.ZodEnum<{
|
|
4209
|
-
sum: "sum";
|
|
4210
4209
|
avg: "avg";
|
|
4210
|
+
sum: "sum";
|
|
4211
4211
|
min: "min";
|
|
4212
4212
|
max: "max";
|
|
4213
4213
|
latest: "latest";
|
|
@@ -4241,9 +4241,9 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
4241
4241
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4242
4242
|
success: "success";
|
|
4243
4243
|
error: "error";
|
|
4244
|
-
warning: "warning";
|
|
4245
4244
|
accent: "accent";
|
|
4246
4245
|
accentDim: "accentDim";
|
|
4246
|
+
warning: "warning";
|
|
4247
4247
|
textMuted: "textMuted";
|
|
4248
4248
|
}>>;
|
|
4249
4249
|
axis: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -4254,8 +4254,8 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
4254
4254
|
source: z$1.ZodLiteral<"column">;
|
|
4255
4255
|
key: z$1.ZodString;
|
|
4256
4256
|
aggregate: z$1.ZodEnum<{
|
|
4257
|
-
sum: "sum";
|
|
4258
4257
|
avg: "avg";
|
|
4258
|
+
sum: "sum";
|
|
4259
4259
|
min: "min";
|
|
4260
4260
|
max: "max";
|
|
4261
4261
|
latest: "latest";
|
|
@@ -4265,9 +4265,9 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
4265
4265
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4266
4266
|
success: "success";
|
|
4267
4267
|
error: "error";
|
|
4268
|
-
warning: "warning";
|
|
4269
4268
|
accent: "accent";
|
|
4270
4269
|
accentDim: "accentDim";
|
|
4270
|
+
warning: "warning";
|
|
4271
4271
|
textMuted: "textMuted";
|
|
4272
4272
|
}>>;
|
|
4273
4273
|
axis: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -4296,8 +4296,8 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
4296
4296
|
source: z$1.ZodLiteral<"column">;
|
|
4297
4297
|
key: z$1.ZodString;
|
|
4298
4298
|
aggregate: z$1.ZodEnum<{
|
|
4299
|
-
sum: "sum";
|
|
4300
4299
|
avg: "avg";
|
|
4300
|
+
sum: "sum";
|
|
4301
4301
|
min: "min";
|
|
4302
4302
|
max: "max";
|
|
4303
4303
|
latest: "latest";
|
|
@@ -4315,10 +4315,10 @@ declare const runManifestSchema: z$1.ZodObject<{
|
|
|
4315
4315
|
shortId: z$1.ZodString;
|
|
4316
4316
|
status: z$1.ZodEnum<{
|
|
4317
4317
|
error: "error";
|
|
4318
|
-
running: "running";
|
|
4319
|
-
cancelled: "cancelled";
|
|
4320
4318
|
pending: "pending";
|
|
4319
|
+
running: "running";
|
|
4321
4320
|
completed: "completed";
|
|
4321
|
+
cancelled: "cancelled";
|
|
4322
4322
|
}>;
|
|
4323
4323
|
startedAt: z$1.ZodString;
|
|
4324
4324
|
endedAt: z$1.ZodNullable<z$1.ZodString>;
|
|
@@ -4341,9 +4341,9 @@ declare const runManifestSchema: z$1.ZodObject<{
|
|
|
4341
4341
|
median: "median";
|
|
4342
4342
|
}>>>;
|
|
4343
4343
|
cacheMode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4344
|
-
refresh: "refresh";
|
|
4345
|
-
bypass: "bypass";
|
|
4346
4344
|
use: "use";
|
|
4345
|
+
bypass: "bypass";
|
|
4346
|
+
refresh: "refresh";
|
|
4347
4347
|
}>>;
|
|
4348
4348
|
}, z$1.core.$strip>;
|
|
4349
4349
|
/** Persisted lifecycle metadata for a single eval run. */
|
|
@@ -4353,10 +4353,10 @@ declare const runSummarySchema: z$1.ZodObject<{
|
|
|
4353
4353
|
runId: z$1.ZodString;
|
|
4354
4354
|
status: z$1.ZodEnum<{
|
|
4355
4355
|
error: "error";
|
|
4356
|
-
running: "running";
|
|
4357
|
-
cancelled: "cancelled";
|
|
4358
4356
|
pending: "pending";
|
|
4357
|
+
running: "running";
|
|
4359
4358
|
completed: "completed";
|
|
4359
|
+
cancelled: "cancelled";
|
|
4360
4360
|
}>;
|
|
4361
4361
|
totalCases: z$1.ZodNumber;
|
|
4362
4362
|
passedCases: z$1.ZodNumber;
|
|
@@ -5150,8 +5150,8 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
|
|
|
5150
5150
|
}>>;
|
|
5151
5151
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5152
5152
|
all: "all";
|
|
5153
|
-
last: "last";
|
|
5154
5153
|
sum: "sum";
|
|
5154
|
+
last: "last";
|
|
5155
5155
|
}>>;
|
|
5156
5156
|
transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
|
|
5157
5157
|
}, z$1.core.$strip>>>;
|
|
@@ -5174,11 +5174,11 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
|
|
|
5174
5174
|
key: z$1.ZodString;
|
|
5175
5175
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
5176
5176
|
aggregate: z$1.ZodEnum<{
|
|
5177
|
-
last: "last";
|
|
5178
|
-
sum: "sum";
|
|
5179
5177
|
avg: "avg";
|
|
5178
|
+
sum: "sum";
|
|
5180
5179
|
min: "min";
|
|
5181
5180
|
max: "max";
|
|
5181
|
+
last: "last";
|
|
5182
5182
|
}>;
|
|
5183
5183
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5184
5184
|
number: "number";
|
|
@@ -5527,9 +5527,9 @@ declare function extractApiCalls(spans: EvalTraceSpan[], config: ResolvedApiCall
|
|
|
5527
5527
|
* - `refresh`: never read, always write (forces re-execution and overwrites).
|
|
5528
5528
|
*/
|
|
5529
5529
|
declare const cacheModeSchema: z$1.ZodEnum<{
|
|
5530
|
-
refresh: "refresh";
|
|
5531
|
-
bypass: "bypass";
|
|
5532
5530
|
use: "use";
|
|
5531
|
+
bypass: "bypass";
|
|
5532
|
+
refresh: "refresh";
|
|
5533
5533
|
}>;
|
|
5534
5534
|
/** Mode controlling how cached spans behave during a run. */
|
|
5535
5535
|
type CacheMode = z$1.infer<typeof cacheModeSchema>;
|
|
@@ -5550,10 +5550,10 @@ declare const cacheOperationTypeSchema: z$1.ZodEnum<{
|
|
|
5550
5550
|
type CacheOperationType = z$1.infer<typeof cacheOperationTypeSchema>;
|
|
5551
5551
|
/** Status of a cache lookup recorded on a span or case scope. */
|
|
5552
5552
|
declare const cacheStatusSchema: z$1.ZodEnum<{
|
|
5553
|
+
bypass: "bypass";
|
|
5554
|
+
refresh: "refresh";
|
|
5553
5555
|
hit: "hit";
|
|
5554
5556
|
miss: "miss";
|
|
5555
|
-
refresh: "refresh";
|
|
5556
|
-
bypass: "bypass";
|
|
5557
5557
|
}>;
|
|
5558
5558
|
/** Status of a cache lookup recorded on a span or case scope. */
|
|
5559
5559
|
type CacheStatus = z$1.infer<typeof cacheStatusSchema>;
|
|
@@ -5570,10 +5570,10 @@ declare const traceCacheRefSchema: z$1.ZodObject<{
|
|
|
5570
5570
|
namespace: z$1.ZodString;
|
|
5571
5571
|
key: z$1.ZodString;
|
|
5572
5572
|
status: z$1.ZodEnum<{
|
|
5573
|
+
bypass: "bypass";
|
|
5574
|
+
refresh: "refresh";
|
|
5573
5575
|
hit: "hit";
|
|
5574
5576
|
miss: "miss";
|
|
5575
|
-
refresh: "refresh";
|
|
5576
|
-
bypass: "bypass";
|
|
5577
5577
|
}>;
|
|
5578
5578
|
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
5579
5579
|
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -5651,8 +5651,8 @@ declare const cacheRecordingSchema: z$1.ZodObject<{
|
|
|
5651
5651
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5652
5652
|
error: "error";
|
|
5653
5653
|
running: "running";
|
|
5654
|
-
ok: "ok";
|
|
5655
5654
|
cancelled: "cancelled";
|
|
5655
|
+
ok: "ok";
|
|
5656
5656
|
}>>;
|
|
5657
5657
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
5658
5658
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -5724,8 +5724,8 @@ declare const cacheEntrySchema: z$1.ZodObject<{
|
|
|
5724
5724
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5725
5725
|
error: "error";
|
|
5726
5726
|
running: "running";
|
|
5727
|
-
ok: "ok";
|
|
5728
5727
|
cancelled: "cancelled";
|
|
5728
|
+
ok: "ok";
|
|
5729
5729
|
}>>;
|
|
5730
5730
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
5731
5731
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -5813,8 +5813,8 @@ declare const cacheEntryWithDebugKeySchema: z$1.ZodObject<{
|
|
|
5813
5813
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5814
5814
|
error: "error";
|
|
5815
5815
|
running: "running";
|
|
5816
|
-
ok: "ok";
|
|
5817
5816
|
cancelled: "cancelled";
|
|
5817
|
+
ok: "ok";
|
|
5818
5818
|
}>>;
|
|
5819
5819
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
5820
5820
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -5902,8 +5902,8 @@ declare const cacheFileSchema: z$1.ZodObject<{
|
|
|
5902
5902
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5903
5903
|
error: "error";
|
|
5904
5904
|
running: "running";
|
|
5905
|
-
ok: "ok";
|
|
5906
5905
|
cancelled: "cancelled";
|
|
5906
|
+
ok: "ok";
|
|
5907
5907
|
}>>;
|
|
5908
5908
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
5909
5909
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -6089,9 +6089,9 @@ declare const createRunRequestSchema: z$1.ZodObject<{
|
|
|
6089
6089
|
trials: z$1.ZodNumber;
|
|
6090
6090
|
cache: z$1.ZodOptional<z$1.ZodObject<{
|
|
6091
6091
|
mode: z$1.ZodDefault<z$1.ZodEnum<{
|
|
6092
|
-
refresh: "refresh";
|
|
6093
|
-
bypass: "bypass";
|
|
6094
6092
|
use: "use";
|
|
6093
|
+
bypass: "bypass";
|
|
6094
|
+
refresh: "refresh";
|
|
6095
6095
|
}>>;
|
|
6096
6096
|
}, z$1.core.$strip>>;
|
|
6097
6097
|
manualInputs: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
|
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { $ as apiCallMetricSchema, $n as getCurrentScope, $t as cacheDebugKeyEntrySchema, A as createRunRequestSchema, An as repoFileRefSchema, At as runLogEntrySchema, B as getNestedAttribute, Bn as deserializeCacheValue, Bt as manualInputNumberFieldSchema, Cn as cellValueSchema, Ct as caseRowSchema, Dn as fileRefSchema, Dt as evalStatItemSchema, En as columnKindSchema, Et as evalStatAggregateSchema, F as extractApiCalls, Fn as evalSpan, Ft as manualInputBooleanFieldSchema, G as deriveStatusFromChildStatuses, Gn as readManualInputFile, Gt as evalChartAxisSchema, H as getEvalDisplayStatus, Hn as serializeCacheValue, Ht as manualInputSelectOptionSchema, I as extractLlmCalls, In as evalTracer, It as manualInputDescriptorSchema, J as DEFAULT_API_CALLS_CONFIG, Jn as advanceEvalTime, Jt as evalChartConfigSchema, K as runManifestSchema, Kn as evalExpect, Kt as evalChartBuiltinMetricSchema, L as simulateLlmCallCost, Ln as hashCacheKey, Lt as manualInputFieldDescriptorSchema, M as sseEnvelopeSchema, Mn as z, Mt as runLogLocationSchema, N as extractCacheEntries, Nn as buildTraceTree, Nt as runLogPhaseSchema, O as configReloadStateSchema, On as jsonCellSchema, Ot as evalStatsConfigSchema, P as extractCacheHits, Pn as captureEvalSpanError, Pt as scoreTraceSchema, Q as apiCallMetricPlacementSchema, Qn as evalLog, Qt as evalChartsConfigSchema, R as simulateTokenAllocation, Rn as hashCacheKeySync, Rt as manualInputJsonFieldSchema, Sn as traceSpanWarningSchema, St as caseDetailSchema, Tn as columnFormatSchema, Tt as evalFreshnessStatusSchema, U as deriveScopedSummaryFromCases, Un as repoFile, Ut as manualInputTextFieldSchema, V as getEvalTitle, Vn as serializeCacheRecording, Vt as manualInputSelectFieldSchema, W as deriveStatusFromCaseRows, Wn as manualInputFileValueSchema, Wt as evalChartAggregateSchema, X as agentEvalsConfigSchema, Xt as evalChartTooltipExtraSchema, Y as DEFAULT_LLM_CALLS_CONFIG, Yn as appendToEvalOutput, Yt as evalChartMetricSchema, Z as apiCallMetricFormatSchema, Zn as evalAssert, Zt as evalChartTypeSchema, _n as traceDisplayConfigSchema, _t as buildCaseKey, an as cacheModeSchema, ar as nextEvalId, at as llmCallCostCurrencySchema, bn as traceSpanKindSchema, bt as getCaseRowEvalKey, cn as cacheRecordingSchema, cr as runInExistingEvalScope, ct as llmCallMetricSchema, dn as spanCacheOptionsSchema, dr as startEvalBackgroundJob, dt as llmCallsConfigSchema, en as cacheDebugKeyFileSchema, er as getEvalCaseInput, et as apiCallsConfigSchema, fn as traceCacheRefSchema, fr as defineEval, ft as removeDefaultConfigSchema, gn as traceAttributeDisplaySchema, gt as trialSelectionModeSchema, hn as traceAttributeDisplayPlacementSchema, ht as runLogsConfigSchema, in as cacheListItemSchema, ir as mergeEvalOutput, it as evalDeriveConfigSchema, j as updateManualScoreRequestSchema, jn as runArtifactRefSchema, jt as runLogLevelSchema, k as configReloadStatusSchema, kn as numberDisplayOptionsSchema, kt as evalSummarySchema, ln as cacheStatusSchema, lr as setEvalOutput, lt as llmCallPricingRateSchema, mn as traceAttributeDisplayInputSchema, mt as resolveLlmCallsConfig, nn as cacheEntryWithDebugKeySchema, nr as incrementEvalOutput, nt as evalColumnOverrideSchema, on as cacheOperationTypeSchema, or as runInEvalRuntimeScope, ot as llmCallMetricFormatSchema, pn as traceAttributeDisplayFormatSchema, pr as getEvalRegistry, pt as resolveApiCallsConfig, q as runSummarySchema, qn as EvalAssertionError, qt as evalChartColorSchema, rn as cacheFileSchema, rr as isInEvalScope, rt as evalColumnsSchema, sn as cacheRecordingOpSchema, sr as runInEvalScope, st as llmCallMetricPlacementSchema, tn as cacheEntrySchema, tr as getEvalStartTime, tt as defaultConfigKeySchema, un as serializedCacheSpanSchema, ur as setScopeCacheContext, ut as llmCallPricingSchema, vn as traceDisplayInputConfigSchema, vt as buildEvalKey, wn as columnDefSchema, wt as discoveryIssueSchema, xn as traceSpanSchema, xt as assertionFailureSchema, yn as traceSpanErrorSchema, yt as getCaseRowCaseKey, z as applyDerivedCallAttributes, zn as deserializeCacheRecording, zt as manualInputMultilineFieldSchema } from "./runOrchestration-
|
|
2
|
-
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-
|
|
3
|
-
import "./src-
|
|
1
|
+
import { $ as apiCallMetricSchema, $n as getCurrentScope, $t as cacheDebugKeyEntrySchema, A as createRunRequestSchema, An as repoFileRefSchema, At as runLogEntrySchema, B as getNestedAttribute, Bn as deserializeCacheValue, Bt as manualInputNumberFieldSchema, Cn as cellValueSchema, Ct as caseRowSchema, Dn as fileRefSchema, Dt as evalStatItemSchema, En as columnKindSchema, Et as evalStatAggregateSchema, F as extractApiCalls, Fn as evalSpan, Ft as manualInputBooleanFieldSchema, G as deriveStatusFromChildStatuses, Gn as readManualInputFile, Gt as evalChartAxisSchema, H as getEvalDisplayStatus, Hn as serializeCacheValue, Ht as manualInputSelectOptionSchema, I as extractLlmCalls, In as evalTracer, It as manualInputDescriptorSchema, J as DEFAULT_API_CALLS_CONFIG, Jn as advanceEvalTime, Jt as evalChartConfigSchema, K as runManifestSchema, Kn as evalExpect, Kt as evalChartBuiltinMetricSchema, L as simulateLlmCallCost, Ln as hashCacheKey, Lt as manualInputFieldDescriptorSchema, M as sseEnvelopeSchema, Mn as z, Mt as runLogLocationSchema, N as extractCacheEntries, Nn as buildTraceTree, Nt as runLogPhaseSchema, O as configReloadStateSchema, On as jsonCellSchema, Ot as evalStatsConfigSchema, P as extractCacheHits, Pn as captureEvalSpanError, Pt as scoreTraceSchema, Q as apiCallMetricPlacementSchema, Qn as evalLog, Qt as evalChartsConfigSchema, R as simulateTokenAllocation, Rn as hashCacheKeySync, Rt as manualInputJsonFieldSchema, Sn as traceSpanWarningSchema, St as caseDetailSchema, Tn as columnFormatSchema, Tt as evalFreshnessStatusSchema, U as deriveScopedSummaryFromCases, Un as repoFile, Ut as manualInputTextFieldSchema, V as getEvalTitle, Vn as serializeCacheRecording, Vt as manualInputSelectFieldSchema, W as deriveStatusFromCaseRows, Wn as manualInputFileValueSchema, Wt as evalChartAggregateSchema, X as agentEvalsConfigSchema, Xt as evalChartTooltipExtraSchema, Y as DEFAULT_LLM_CALLS_CONFIG, Yn as appendToEvalOutput, Yt as evalChartMetricSchema, Z as apiCallMetricFormatSchema, Zn as evalAssert, Zt as evalChartTypeSchema, _n as traceDisplayConfigSchema, _t as buildCaseKey, an as cacheModeSchema, ar as nextEvalId, at as llmCallCostCurrencySchema, bn as traceSpanKindSchema, bt as getCaseRowEvalKey, cn as cacheRecordingSchema, cr as runInExistingEvalScope, ct as llmCallMetricSchema, dn as spanCacheOptionsSchema, dr as startEvalBackgroundJob, dt as llmCallsConfigSchema, en as cacheDebugKeyFileSchema, er as getEvalCaseInput, et as apiCallsConfigSchema, fn as traceCacheRefSchema, fr as defineEval, ft as removeDefaultConfigSchema, gn as traceAttributeDisplaySchema, gt as trialSelectionModeSchema, hn as traceAttributeDisplayPlacementSchema, ht as runLogsConfigSchema, in as cacheListItemSchema, ir as mergeEvalOutput, it as evalDeriveConfigSchema, j as updateManualScoreRequestSchema, jn as runArtifactRefSchema, jt as runLogLevelSchema, k as configReloadStatusSchema, kn as numberDisplayOptionsSchema, kt as evalSummarySchema, ln as cacheStatusSchema, lr as setEvalOutput, lt as llmCallPricingRateSchema, mn as traceAttributeDisplayInputSchema, mt as resolveLlmCallsConfig, nn as cacheEntryWithDebugKeySchema, nr as incrementEvalOutput, nt as evalColumnOverrideSchema, on as cacheOperationTypeSchema, or as runInEvalRuntimeScope, ot as llmCallMetricFormatSchema, pn as traceAttributeDisplayFormatSchema, pr as getEvalRegistry, pt as resolveApiCallsConfig, q as runSummarySchema, qn as EvalAssertionError, qt as evalChartColorSchema, rn as cacheFileSchema, rr as isInEvalScope, rt as evalColumnsSchema, sn as cacheRecordingOpSchema, sr as runInEvalScope, st as llmCallMetricPlacementSchema, tn as cacheEntrySchema, tr as getEvalStartTime, tt as defaultConfigKeySchema, un as serializedCacheSpanSchema, ur as setScopeCacheContext, ut as llmCallPricingSchema, vn as traceDisplayInputConfigSchema, vt as buildEvalKey, wn as columnDefSchema, wt as discoveryIssueSchema, xn as traceSpanSchema, xt as assertionFailureSchema, yn as traceSpanErrorSchema, yt as getCaseRowCaseKey, z as applyDerivedCallAttributes, zn as deserializeCacheRecording, zt as manualInputMultilineFieldSchema } from "./runOrchestration-CYhn4DJ0.mjs";
|
|
2
|
+
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-CG66f0GN.mjs";
|
|
3
|
+
import "./src-B0b1vgk0.mjs";
|
|
4
4
|
export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, cleanupStagedManualInputFiles, columnDefSchema, columnFormatSchema, columnKindSchema, configReloadStateSchema, configReloadStatusSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalColumnOverrideSchema, evalColumnsSchema, evalDeriveConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, jsonCellSchema, llmCallCostCurrencySchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingRateSchema, llmCallPricingSchema, llmCallsConfigSchema, manualInputBooleanFieldSchema, manualInputDescriptorSchema, manualInputFieldDescriptorSchema, manualInputFileValueSchema, manualInputJsonFieldSchema, manualInputMultilineFieldSchema, manualInputNumberFieldSchema, manualInputSelectFieldSchema, manualInputSelectOptionSchema, manualInputTextFieldSchema, materializeManualInputFiles, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, readManualInputFile, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, spanCacheOptionsSchema, sseEnvelopeSchema, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
package/dist/runChild.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { A as createRunRequestSchema, C as loadConfig, D as createFsCacheStore, It as manualInputDescriptorSchema, K as runManifestSchema, Ot as evalStatsConfigSchema, Qt as evalChartsConfigSchema, Xn as configureEvalRunLogs, q as runSummarySchema, r as getTargetEvals$1, t as executeRun, vt as buildEvalKey, wn as columnDefSchema, x as parseEvalDiscovery } from "./runOrchestration-
|
|
1
|
+
import { A as createRunRequestSchema, C as loadConfig, D as createFsCacheStore, It as manualInputDescriptorSchema, K as runManifestSchema, Ot as evalStatsConfigSchema, Qt as evalChartsConfigSchema, Xn as configureEvalRunLogs, q as runSummarySchema, r as getTargetEvals$1, t as executeRun, vt as buildEvalKey, wn as columnDefSchema, x as parseEvalDiscovery } from "./runOrchestration-CYhn4DJ0.mjs";
|
|
2
2
|
import { createHash } from "node:crypto";
|
|
3
3
|
import { readFile } from "node:fs/promises";
|
|
4
4
|
import { relative } from "node:path";
|