@ls-stack/agent-eval 0.29.0 → 0.30.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-D6-msfKP.mjs → app-CbOZBHju.mjs} +4 -4
- package/dist/apps/web/dist/assets/{index-BCr6J8Uj.js → index-DEikHy2a.js} +35 -35
- package/dist/apps/web/dist/index.html +1 -1
- package/dist/bin.mjs +1 -1
- package/dist/{cli-CIc_gBNM.mjs → cli-CiFOqMwS.mjs} +3 -3
- package/dist/index.d.mts +133 -109
- package/dist/index.mjs +3 -3
- package/dist/runChild.mjs +1 -1
- package/dist/{runOrchestration-CIARrLs6.mjs → runOrchestration-CO3Vf0cQ.mjs} +35 -9
- package/dist/{runner-Bq1f9B9d.mjs → runner-4pF_Qrc9.mjs} +1 -1
- package/dist/{runner-1F8MeY5V.mjs → runner-CXHkf7ih.mjs} +2 -2
- package/dist/src-BiPLv9ya.mjs +3 -0
- package/package.json +1 -1
- package/skills/agent-eval/SKILL.md +8 -5
- package/dist/src-CkWT1iSu.mjs +0 -3
package/dist/index.d.mts
CHANGED
|
@@ -700,10 +700,10 @@ declare const apiCallMetricPlacementSchema$1: z$1.ZodEnum<{
|
|
|
700
700
|
}>;
|
|
701
701
|
/** Placement option for an API-call metric. */
|
|
702
702
|
type ApiCallMetricPlacement$1 = z$1.infer<typeof apiCallMetricPlacementSchema$1>;
|
|
703
|
-
/** Context passed to
|
|
703
|
+
/** Context passed to LLM/API-call derived attribute functions. */
|
|
704
704
|
type CallDerivedAttributeContext$1 = {
|
|
705
|
-
/**
|
|
706
|
-
span: EvalTraceSpan$2; /** Dot-path helper for reading from
|
|
705
|
+
/** Current attributes from the matching trace span. */attributes: Record<string, unknown> | undefined; /** Matching trace span. */
|
|
706
|
+
span: EvalTraceSpan$2; /** Dot-path helper for reading from the current span attributes. */
|
|
707
707
|
get: (path: string) => unknown;
|
|
708
708
|
};
|
|
709
709
|
/**
|
|
@@ -711,14 +711,25 @@ type CallDerivedAttributeContext$1 = {
|
|
|
711
711
|
* LLM/API-call span. Return `undefined` to omit the attribute for that span.
|
|
712
712
|
*/
|
|
713
713
|
type CallDerivedAttribute$1 = (ctx: CallDerivedAttributeContext$1) => unknown;
|
|
714
|
+
/**
|
|
715
|
+
* Runner-side function used to derive multiple span attributes from a matching
|
|
716
|
+
* LLM/API-call span. Returned object keys are dot-paths under
|
|
717
|
+
* `span.attributes`; `undefined` values are skipped.
|
|
718
|
+
*/
|
|
719
|
+
type CallDerivedAttributesFn$1 = (ctx: CallDerivedAttributeContext$1) => Record<string, unknown> | undefined;
|
|
714
720
|
/** One resolved derived span attribute rule. */
|
|
715
721
|
type ResolvedCallDerivedAttribute$1 = {
|
|
716
|
-
/** Dot-path where
|
|
722
|
+
/** Dot-path where one derived value is persisted on `span.attributes`. */path?: string;
|
|
717
723
|
/**
|
|
718
|
-
* Function that derives
|
|
724
|
+
* Function that derives one persisted value for each matching span. Omitted
|
|
719
725
|
* after this config is serialized to the browser.
|
|
720
726
|
*/
|
|
721
727
|
compute?: CallDerivedAttribute$1;
|
|
728
|
+
/**
|
|
729
|
+
* Function that derives multiple persisted values for each matching span.
|
|
730
|
+
* Omitted after this config is serialized to the browser.
|
|
731
|
+
*/
|
|
732
|
+
computeMany?: CallDerivedAttributesFn$1;
|
|
722
733
|
};
|
|
723
734
|
/** Resolved LLM-calls config sent to the UI with all defaults applied. */
|
|
724
735
|
type ResolvedLlmCallsConfig$1 = {
|
|
@@ -3004,9 +3015,9 @@ declare const traceAttributeDisplaySchema: z$1.ZodObject<{
|
|
|
3004
3015
|
subtree: "subtree";
|
|
3005
3016
|
}>>;
|
|
3006
3017
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3007
|
-
sum: "sum";
|
|
3008
|
-
last: "last";
|
|
3009
3018
|
all: "all";
|
|
3019
|
+
last: "last";
|
|
3020
|
+
sum: "sum";
|
|
3010
3021
|
}>>;
|
|
3011
3022
|
}, z$1.core.$strip>;
|
|
3012
3023
|
/**
|
|
@@ -3040,9 +3051,9 @@ declare const traceDisplayConfigSchema: z$1.ZodObject<{
|
|
|
3040
3051
|
subtree: "subtree";
|
|
3041
3052
|
}>>;
|
|
3042
3053
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3043
|
-
sum: "sum";
|
|
3044
|
-
last: "last";
|
|
3045
3054
|
all: "all";
|
|
3055
|
+
last: "last";
|
|
3056
|
+
sum: "sum";
|
|
3046
3057
|
}>>;
|
|
3047
3058
|
}, z$1.core.$strip>>>;
|
|
3048
3059
|
}, z$1.core.$strip>;
|
|
@@ -3080,9 +3091,9 @@ declare const traceAttributeDisplayInputSchema: z$1.ZodObject<{
|
|
|
3080
3091
|
subtree: "subtree";
|
|
3081
3092
|
}>>;
|
|
3082
3093
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3083
|
-
sum: "sum";
|
|
3084
|
-
last: "last";
|
|
3085
3094
|
all: "all";
|
|
3095
|
+
last: "last";
|
|
3096
|
+
sum: "sum";
|
|
3086
3097
|
}>>;
|
|
3087
3098
|
transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
|
|
3088
3099
|
}, z$1.core.$strip>;
|
|
@@ -3118,9 +3129,9 @@ declare const traceDisplayInputConfigSchema: z$1.ZodObject<{
|
|
|
3118
3129
|
subtree: "subtree";
|
|
3119
3130
|
}>>;
|
|
3120
3131
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3121
|
-
sum: "sum";
|
|
3122
|
-
last: "last";
|
|
3123
3132
|
all: "all";
|
|
3133
|
+
last: "last";
|
|
3134
|
+
sum: "sum";
|
|
3124
3135
|
}>>;
|
|
3125
3136
|
transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
|
|
3126
3137
|
}, z$1.core.$strip>>>;
|
|
@@ -3199,11 +3210,11 @@ declare const evalFreshnessStatusSchema: z$1.ZodEnum<{
|
|
|
3199
3210
|
type EvalFreshnessStatus = z$1.infer<typeof evalFreshnessStatusSchema>;
|
|
3200
3211
|
/** Reducer used to collapse a column's per-case values into a single stat. */
|
|
3201
3212
|
declare const evalStatAggregateSchema: z$1.ZodEnum<{
|
|
3202
|
-
|
|
3213
|
+
last: "last";
|
|
3214
|
+
sum: "sum";
|
|
3203
3215
|
min: "min";
|
|
3204
3216
|
max: "max";
|
|
3205
|
-
|
|
3206
|
-
last: "last";
|
|
3217
|
+
avg: "avg";
|
|
3207
3218
|
}>;
|
|
3208
3219
|
/** Reducer used to collapse a column's per-case values into a single stat. */
|
|
3209
3220
|
type EvalStatAggregate = z$1.infer<typeof evalStatAggregateSchema>;
|
|
@@ -3227,11 +3238,11 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
3227
3238
|
key: z$1.ZodString;
|
|
3228
3239
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3229
3240
|
aggregate: z$1.ZodEnum<{
|
|
3230
|
-
|
|
3241
|
+
last: "last";
|
|
3242
|
+
sum: "sum";
|
|
3231
3243
|
min: "min";
|
|
3232
3244
|
max: "max";
|
|
3233
|
-
|
|
3234
|
-
last: "last";
|
|
3245
|
+
avg: "avg";
|
|
3235
3246
|
}>;
|
|
3236
3247
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3237
3248
|
number: "number";
|
|
@@ -3269,11 +3280,11 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
|
|
|
3269
3280
|
key: z$1.ZodString;
|
|
3270
3281
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3271
3282
|
aggregate: z$1.ZodEnum<{
|
|
3272
|
-
|
|
3283
|
+
last: "last";
|
|
3284
|
+
sum: "sum";
|
|
3273
3285
|
min: "min";
|
|
3274
3286
|
max: "max";
|
|
3275
|
-
|
|
3276
|
-
last: "last";
|
|
3287
|
+
avg: "avg";
|
|
3277
3288
|
}>;
|
|
3278
3289
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3279
3290
|
number: "number";
|
|
@@ -3348,10 +3359,10 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
3348
3359
|
caseCount: z$1.ZodNullable<z$1.ZodNumber>;
|
|
3349
3360
|
lastRunStatus: z$1.ZodNullable<z$1.ZodEnum<{
|
|
3350
3361
|
error: "error";
|
|
3351
|
-
pass: "pass";
|
|
3352
|
-
fail: "fail";
|
|
3353
3362
|
running: "running";
|
|
3354
3363
|
cancelled: "cancelled";
|
|
3364
|
+
pass: "pass";
|
|
3365
|
+
fail: "fail";
|
|
3355
3366
|
unscored: "unscored";
|
|
3356
3367
|
}>>;
|
|
3357
3368
|
stats: z$1.ZodOptional<z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
@@ -3370,11 +3381,11 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
3370
3381
|
key: z$1.ZodString;
|
|
3371
3382
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3372
3383
|
aggregate: z$1.ZodEnum<{
|
|
3373
|
-
|
|
3384
|
+
last: "last";
|
|
3385
|
+
sum: "sum";
|
|
3374
3386
|
min: "min";
|
|
3375
3387
|
max: "max";
|
|
3376
|
-
|
|
3377
|
-
last: "last";
|
|
3388
|
+
avg: "avg";
|
|
3378
3389
|
}>;
|
|
3379
3390
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3380
3391
|
number: "number";
|
|
@@ -3405,16 +3416,16 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
3405
3416
|
metrics: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
3406
3417
|
source: z$1.ZodLiteral<"builtin">;
|
|
3407
3418
|
metric: z$1.ZodEnum<{
|
|
3408
|
-
passRate: "passRate";
|
|
3409
3419
|
durationMs: "durationMs";
|
|
3420
|
+
passRate: "passRate";
|
|
3410
3421
|
}>;
|
|
3411
3422
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3412
3423
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3413
3424
|
success: "success";
|
|
3414
3425
|
error: "error";
|
|
3426
|
+
warning: "warning";
|
|
3415
3427
|
accent: "accent";
|
|
3416
3428
|
accentDim: "accentDim";
|
|
3417
|
-
warning: "warning";
|
|
3418
3429
|
textMuted: "textMuted";
|
|
3419
3430
|
}>>;
|
|
3420
3431
|
axis: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -3425,10 +3436,10 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
3425
3436
|
source: z$1.ZodLiteral<"column">;
|
|
3426
3437
|
key: z$1.ZodString;
|
|
3427
3438
|
aggregate: z$1.ZodEnum<{
|
|
3428
|
-
|
|
3439
|
+
sum: "sum";
|
|
3429
3440
|
min: "min";
|
|
3430
3441
|
max: "max";
|
|
3431
|
-
|
|
3442
|
+
avg: "avg";
|
|
3432
3443
|
latest: "latest";
|
|
3433
3444
|
passThresholdRate: "passThresholdRate";
|
|
3434
3445
|
}>;
|
|
@@ -3436,9 +3447,9 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
3436
3447
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3437
3448
|
success: "success";
|
|
3438
3449
|
error: "error";
|
|
3450
|
+
warning: "warning";
|
|
3439
3451
|
accent: "accent";
|
|
3440
3452
|
accentDim: "accentDim";
|
|
3441
|
-
warning: "warning";
|
|
3442
3453
|
textMuted: "textMuted";
|
|
3443
3454
|
}>>;
|
|
3444
3455
|
axis: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -3459,18 +3470,18 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
3459
3470
|
tooltipExtras: z$1.ZodOptional<z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
3460
3471
|
source: z$1.ZodLiteral<"builtin">;
|
|
3461
3472
|
metric: z$1.ZodEnum<{
|
|
3462
|
-
passRate: "passRate";
|
|
3463
3473
|
durationMs: "durationMs";
|
|
3474
|
+
passRate: "passRate";
|
|
3464
3475
|
}>;
|
|
3465
3476
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3466
3477
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
3467
3478
|
source: z$1.ZodLiteral<"column">;
|
|
3468
3479
|
key: z$1.ZodString;
|
|
3469
3480
|
aggregate: z$1.ZodEnum<{
|
|
3470
|
-
|
|
3481
|
+
sum: "sum";
|
|
3471
3482
|
min: "min";
|
|
3472
3483
|
max: "max";
|
|
3473
|
-
|
|
3484
|
+
avg: "avg";
|
|
3474
3485
|
latest: "latest";
|
|
3475
3486
|
passThresholdRate: "passThresholdRate";
|
|
3476
3487
|
}>;
|
|
@@ -3566,11 +3577,11 @@ declare const caseRowSchema: z$1.ZodObject<{
|
|
|
3566
3577
|
evalId: z$1.ZodString;
|
|
3567
3578
|
status: z$1.ZodEnum<{
|
|
3568
3579
|
error: "error";
|
|
3569
|
-
|
|
3570
|
-
fail: "fail";
|
|
3580
|
+
pending: "pending";
|
|
3571
3581
|
running: "running";
|
|
3572
3582
|
cancelled: "cancelled";
|
|
3573
|
-
|
|
3583
|
+
pass: "pass";
|
|
3584
|
+
fail: "fail";
|
|
3574
3585
|
}>;
|
|
3575
3586
|
durationMs: z$1.ZodNullable<z$1.ZodNumber>;
|
|
3576
3587
|
costUsd: z$1.ZodOptional<z$1.ZodNullable<z$1.ZodNumber>>;
|
|
@@ -3713,9 +3724,9 @@ declare const scoreTraceSchema: z$1.ZodObject<{
|
|
|
3713
3724
|
subtree: "subtree";
|
|
3714
3725
|
}>>;
|
|
3715
3726
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3716
|
-
sum: "sum";
|
|
3717
|
-
last: "last";
|
|
3718
3727
|
all: "all";
|
|
3728
|
+
last: "last";
|
|
3729
|
+
sum: "sum";
|
|
3719
3730
|
}>>;
|
|
3720
3731
|
}, z$1.core.$strip>>>;
|
|
3721
3732
|
}, z$1.core.$strip>;
|
|
@@ -3730,11 +3741,11 @@ declare const caseDetailSchema: z$1.ZodObject<{
|
|
|
3730
3741
|
evalId: z$1.ZodString;
|
|
3731
3742
|
status: z$1.ZodEnum<{
|
|
3732
3743
|
error: "error";
|
|
3733
|
-
|
|
3734
|
-
fail: "fail";
|
|
3744
|
+
pending: "pending";
|
|
3735
3745
|
running: "running";
|
|
3736
3746
|
cancelled: "cancelled";
|
|
3737
|
-
|
|
3747
|
+
pass: "pass";
|
|
3748
|
+
fail: "fail";
|
|
3738
3749
|
}>;
|
|
3739
3750
|
input: z$1.ZodUnknown;
|
|
3740
3751
|
trace: z$1.ZodArray<z$1.ZodObject<{
|
|
@@ -3799,9 +3810,9 @@ declare const caseDetailSchema: z$1.ZodObject<{
|
|
|
3799
3810
|
subtree: "subtree";
|
|
3800
3811
|
}>>;
|
|
3801
3812
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3802
|
-
sum: "sum";
|
|
3803
|
-
last: "last";
|
|
3804
3813
|
all: "all";
|
|
3814
|
+
last: "last";
|
|
3815
|
+
sum: "sum";
|
|
3805
3816
|
}>>;
|
|
3806
3817
|
}, z$1.core.$strip>>>;
|
|
3807
3818
|
}, z$1.core.$strip>;
|
|
@@ -3868,9 +3879,9 @@ declare const caseDetailSchema: z$1.ZodObject<{
|
|
|
3868
3879
|
subtree: "subtree";
|
|
3869
3880
|
}>>;
|
|
3870
3881
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3871
|
-
sum: "sum";
|
|
3872
|
-
last: "last";
|
|
3873
3882
|
all: "all";
|
|
3883
|
+
last: "last";
|
|
3884
|
+
sum: "sum";
|
|
3874
3885
|
}>>;
|
|
3875
3886
|
}, z$1.core.$strip>>>;
|
|
3876
3887
|
}, z$1.core.$strip>;
|
|
@@ -3930,10 +3941,10 @@ declare const caseDetailSchema: z$1.ZodObject<{
|
|
|
3930
3941
|
namespace: z$1.ZodString;
|
|
3931
3942
|
key: z$1.ZodString;
|
|
3932
3943
|
status: z$1.ZodEnum<{
|
|
3944
|
+
bypass: "bypass";
|
|
3945
|
+
refresh: "refresh";
|
|
3933
3946
|
hit: "hit";
|
|
3934
3947
|
miss: "miss";
|
|
3935
|
-
refresh: "refresh";
|
|
3936
|
-
bypass: "bypass";
|
|
3937
3948
|
}>;
|
|
3938
3949
|
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
3939
3950
|
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -3994,8 +4005,8 @@ type EvalChartType = z$1.infer<typeof evalChartTypeSchema>;
|
|
|
3994
4005
|
* than from a per-case column.
|
|
3995
4006
|
*/
|
|
3996
4007
|
declare const evalChartBuiltinMetricSchema: z$1.ZodEnum<{
|
|
3997
|
-
passRate: "passRate";
|
|
3998
4008
|
durationMs: "durationMs";
|
|
4009
|
+
passRate: "passRate";
|
|
3999
4010
|
}>;
|
|
4000
4011
|
/**
|
|
4001
4012
|
* Run-level metric sourced from the aggregated `RunSummary` for a run, rather
|
|
@@ -4004,10 +4015,10 @@ declare const evalChartBuiltinMetricSchema: z$1.ZodEnum<{
|
|
|
4004
4015
|
type EvalChartBuiltinMetric = z$1.infer<typeof evalChartBuiltinMetricSchema>;
|
|
4005
4016
|
/** Reducer applied to a numeric column across all cases of a single run. */
|
|
4006
4017
|
declare const evalChartAggregateSchema: z$1.ZodEnum<{
|
|
4007
|
-
|
|
4018
|
+
sum: "sum";
|
|
4008
4019
|
min: "min";
|
|
4009
4020
|
max: "max";
|
|
4010
|
-
|
|
4021
|
+
avg: "avg";
|
|
4011
4022
|
latest: "latest";
|
|
4012
4023
|
passThresholdRate: "passThresholdRate";
|
|
4013
4024
|
}>;
|
|
@@ -4020,9 +4031,9 @@ type EvalChartAggregate = z$1.infer<typeof evalChartAggregateSchema>;
|
|
|
4020
4031
|
declare const evalChartColorSchema: z$1.ZodEnum<{
|
|
4021
4032
|
success: "success";
|
|
4022
4033
|
error: "error";
|
|
4034
|
+
warning: "warning";
|
|
4023
4035
|
accent: "accent";
|
|
4024
4036
|
accentDim: "accentDim";
|
|
4025
|
-
warning: "warning";
|
|
4026
4037
|
textMuted: "textMuted";
|
|
4027
4038
|
}>;
|
|
4028
4039
|
/** Semantic color token resolved to a theme color by the web UI. */
|
|
@@ -4042,16 +4053,16 @@ type EvalChartAxis = z$1.infer<typeof evalChartAxisSchema>;
|
|
|
4042
4053
|
declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
4043
4054
|
source: z$1.ZodLiteral<"builtin">;
|
|
4044
4055
|
metric: z$1.ZodEnum<{
|
|
4045
|
-
passRate: "passRate";
|
|
4046
4056
|
durationMs: "durationMs";
|
|
4057
|
+
passRate: "passRate";
|
|
4047
4058
|
}>;
|
|
4048
4059
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
4049
4060
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4050
4061
|
success: "success";
|
|
4051
4062
|
error: "error";
|
|
4063
|
+
warning: "warning";
|
|
4052
4064
|
accent: "accent";
|
|
4053
4065
|
accentDim: "accentDim";
|
|
4054
|
-
warning: "warning";
|
|
4055
4066
|
textMuted: "textMuted";
|
|
4056
4067
|
}>>;
|
|
4057
4068
|
axis: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -4062,10 +4073,10 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
4062
4073
|
source: z$1.ZodLiteral<"column">;
|
|
4063
4074
|
key: z$1.ZodString;
|
|
4064
4075
|
aggregate: z$1.ZodEnum<{
|
|
4065
|
-
|
|
4076
|
+
sum: "sum";
|
|
4066
4077
|
min: "min";
|
|
4067
4078
|
max: "max";
|
|
4068
|
-
|
|
4079
|
+
avg: "avg";
|
|
4069
4080
|
latest: "latest";
|
|
4070
4081
|
passThresholdRate: "passThresholdRate";
|
|
4071
4082
|
}>;
|
|
@@ -4073,9 +4084,9 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
4073
4084
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4074
4085
|
success: "success";
|
|
4075
4086
|
error: "error";
|
|
4087
|
+
warning: "warning";
|
|
4076
4088
|
accent: "accent";
|
|
4077
4089
|
accentDim: "accentDim";
|
|
4078
|
-
warning: "warning";
|
|
4079
4090
|
textMuted: "textMuted";
|
|
4080
4091
|
}>>;
|
|
4081
4092
|
axis: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -4089,18 +4100,18 @@ type EvalChartMetric = z$1.infer<typeof evalChartMetricSchema>;
|
|
|
4089
4100
|
declare const evalChartTooltipExtraSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
4090
4101
|
source: z$1.ZodLiteral<"builtin">;
|
|
4091
4102
|
metric: z$1.ZodEnum<{
|
|
4092
|
-
passRate: "passRate";
|
|
4093
4103
|
durationMs: "durationMs";
|
|
4104
|
+
passRate: "passRate";
|
|
4094
4105
|
}>;
|
|
4095
4106
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
4096
4107
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
4097
4108
|
source: z$1.ZodLiteral<"column">;
|
|
4098
4109
|
key: z$1.ZodString;
|
|
4099
4110
|
aggregate: z$1.ZodEnum<{
|
|
4100
|
-
|
|
4111
|
+
sum: "sum";
|
|
4101
4112
|
min: "min";
|
|
4102
4113
|
max: "max";
|
|
4103
|
-
|
|
4114
|
+
avg: "avg";
|
|
4104
4115
|
latest: "latest";
|
|
4105
4116
|
passThresholdRate: "passThresholdRate";
|
|
4106
4117
|
}>;
|
|
@@ -4125,16 +4136,16 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
4125
4136
|
metrics: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
4126
4137
|
source: z$1.ZodLiteral<"builtin">;
|
|
4127
4138
|
metric: z$1.ZodEnum<{
|
|
4128
|
-
passRate: "passRate";
|
|
4129
4139
|
durationMs: "durationMs";
|
|
4140
|
+
passRate: "passRate";
|
|
4130
4141
|
}>;
|
|
4131
4142
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
4132
4143
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4133
4144
|
success: "success";
|
|
4134
4145
|
error: "error";
|
|
4146
|
+
warning: "warning";
|
|
4135
4147
|
accent: "accent";
|
|
4136
4148
|
accentDim: "accentDim";
|
|
4137
|
-
warning: "warning";
|
|
4138
4149
|
textMuted: "textMuted";
|
|
4139
4150
|
}>>;
|
|
4140
4151
|
axis: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -4145,10 +4156,10 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
4145
4156
|
source: z$1.ZodLiteral<"column">;
|
|
4146
4157
|
key: z$1.ZodString;
|
|
4147
4158
|
aggregate: z$1.ZodEnum<{
|
|
4148
|
-
|
|
4159
|
+
sum: "sum";
|
|
4149
4160
|
min: "min";
|
|
4150
4161
|
max: "max";
|
|
4151
|
-
|
|
4162
|
+
avg: "avg";
|
|
4152
4163
|
latest: "latest";
|
|
4153
4164
|
passThresholdRate: "passThresholdRate";
|
|
4154
4165
|
}>;
|
|
@@ -4156,9 +4167,9 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
4156
4167
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4157
4168
|
success: "success";
|
|
4158
4169
|
error: "error";
|
|
4170
|
+
warning: "warning";
|
|
4159
4171
|
accent: "accent";
|
|
4160
4172
|
accentDim: "accentDim";
|
|
4161
|
-
warning: "warning";
|
|
4162
4173
|
textMuted: "textMuted";
|
|
4163
4174
|
}>>;
|
|
4164
4175
|
axis: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -4179,18 +4190,18 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
4179
4190
|
tooltipExtras: z$1.ZodOptional<z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
4180
4191
|
source: z$1.ZodLiteral<"builtin">;
|
|
4181
4192
|
metric: z$1.ZodEnum<{
|
|
4182
|
-
passRate: "passRate";
|
|
4183
4193
|
durationMs: "durationMs";
|
|
4194
|
+
passRate: "passRate";
|
|
4184
4195
|
}>;
|
|
4185
4196
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
4186
4197
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
4187
4198
|
source: z$1.ZodLiteral<"column">;
|
|
4188
4199
|
key: z$1.ZodString;
|
|
4189
4200
|
aggregate: z$1.ZodEnum<{
|
|
4190
|
-
|
|
4201
|
+
sum: "sum";
|
|
4191
4202
|
min: "min";
|
|
4192
4203
|
max: "max";
|
|
4193
|
-
|
|
4204
|
+
avg: "avg";
|
|
4194
4205
|
latest: "latest";
|
|
4195
4206
|
passThresholdRate: "passThresholdRate";
|
|
4196
4207
|
}>;
|
|
@@ -4215,16 +4226,16 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
4215
4226
|
metrics: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
4216
4227
|
source: z$1.ZodLiteral<"builtin">;
|
|
4217
4228
|
metric: z$1.ZodEnum<{
|
|
4218
|
-
passRate: "passRate";
|
|
4219
4229
|
durationMs: "durationMs";
|
|
4230
|
+
passRate: "passRate";
|
|
4220
4231
|
}>;
|
|
4221
4232
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
4222
4233
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4223
4234
|
success: "success";
|
|
4224
4235
|
error: "error";
|
|
4236
|
+
warning: "warning";
|
|
4225
4237
|
accent: "accent";
|
|
4226
4238
|
accentDim: "accentDim";
|
|
4227
|
-
warning: "warning";
|
|
4228
4239
|
textMuted: "textMuted";
|
|
4229
4240
|
}>>;
|
|
4230
4241
|
axis: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -4235,10 +4246,10 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
4235
4246
|
source: z$1.ZodLiteral<"column">;
|
|
4236
4247
|
key: z$1.ZodString;
|
|
4237
4248
|
aggregate: z$1.ZodEnum<{
|
|
4238
|
-
|
|
4249
|
+
sum: "sum";
|
|
4239
4250
|
min: "min";
|
|
4240
4251
|
max: "max";
|
|
4241
|
-
|
|
4252
|
+
avg: "avg";
|
|
4242
4253
|
latest: "latest";
|
|
4243
4254
|
passThresholdRate: "passThresholdRate";
|
|
4244
4255
|
}>;
|
|
@@ -4246,9 +4257,9 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
4246
4257
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4247
4258
|
success: "success";
|
|
4248
4259
|
error: "error";
|
|
4260
|
+
warning: "warning";
|
|
4249
4261
|
accent: "accent";
|
|
4250
4262
|
accentDim: "accentDim";
|
|
4251
|
-
warning: "warning";
|
|
4252
4263
|
textMuted: "textMuted";
|
|
4253
4264
|
}>>;
|
|
4254
4265
|
axis: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -4269,18 +4280,18 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
4269
4280
|
tooltipExtras: z$1.ZodOptional<z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
4270
4281
|
source: z$1.ZodLiteral<"builtin">;
|
|
4271
4282
|
metric: z$1.ZodEnum<{
|
|
4272
|
-
passRate: "passRate";
|
|
4273
4283
|
durationMs: "durationMs";
|
|
4284
|
+
passRate: "passRate";
|
|
4274
4285
|
}>;
|
|
4275
4286
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
4276
4287
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
4277
4288
|
source: z$1.ZodLiteral<"column">;
|
|
4278
4289
|
key: z$1.ZodString;
|
|
4279
4290
|
aggregate: z$1.ZodEnum<{
|
|
4280
|
-
|
|
4291
|
+
sum: "sum";
|
|
4281
4292
|
min: "min";
|
|
4282
4293
|
max: "max";
|
|
4283
|
-
|
|
4294
|
+
avg: "avg";
|
|
4284
4295
|
latest: "latest";
|
|
4285
4296
|
passThresholdRate: "passThresholdRate";
|
|
4286
4297
|
}>;
|
|
@@ -4296,10 +4307,10 @@ declare const runManifestSchema: z$1.ZodObject<{
|
|
|
4296
4307
|
shortId: z$1.ZodString;
|
|
4297
4308
|
status: z$1.ZodEnum<{
|
|
4298
4309
|
error: "error";
|
|
4299
|
-
running: "running";
|
|
4300
|
-
cancelled: "cancelled";
|
|
4301
4310
|
pending: "pending";
|
|
4311
|
+
running: "running";
|
|
4302
4312
|
completed: "completed";
|
|
4313
|
+
cancelled: "cancelled";
|
|
4303
4314
|
}>;
|
|
4304
4315
|
startedAt: z$1.ZodString;
|
|
4305
4316
|
endedAt: z$1.ZodNullable<z$1.ZodString>;
|
|
@@ -4322,9 +4333,9 @@ declare const runManifestSchema: z$1.ZodObject<{
|
|
|
4322
4333
|
median: "median";
|
|
4323
4334
|
}>>>;
|
|
4324
4335
|
cacheMode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4325
|
-
refresh: "refresh";
|
|
4326
|
-
bypass: "bypass";
|
|
4327
4336
|
use: "use";
|
|
4337
|
+
bypass: "bypass";
|
|
4338
|
+
refresh: "refresh";
|
|
4328
4339
|
}>>;
|
|
4329
4340
|
}, z$1.core.$strip>;
|
|
4330
4341
|
/** Persisted lifecycle metadata for a single eval run. */
|
|
@@ -4334,10 +4345,10 @@ declare const runSummarySchema: z$1.ZodObject<{
|
|
|
4334
4345
|
runId: z$1.ZodString;
|
|
4335
4346
|
status: z$1.ZodEnum<{
|
|
4336
4347
|
error: "error";
|
|
4337
|
-
running: "running";
|
|
4338
|
-
cancelled: "cancelled";
|
|
4339
4348
|
pending: "pending";
|
|
4349
|
+
running: "running";
|
|
4340
4350
|
completed: "completed";
|
|
4351
|
+
cancelled: "cancelled";
|
|
4341
4352
|
}>;
|
|
4342
4353
|
totalCases: z$1.ZodNumber;
|
|
4343
4354
|
passedCases: z$1.ZodNumber;
|
|
@@ -4447,8 +4458,8 @@ declare const trialSelectionModeSchema: z$1.ZodEnum<{
|
|
|
4447
4458
|
type TrialSelectionMode = z$1.infer<typeof trialSelectionModeSchema>;
|
|
4448
4459
|
/** Built-in eval-level output/column keys. */
|
|
4449
4460
|
declare const defaultConfigKeySchema: z$1.ZodEnum<{
|
|
4450
|
-
costUsd: "costUsd";
|
|
4451
4461
|
apiCalls: "apiCalls";
|
|
4462
|
+
costUsd: "costUsd";
|
|
4452
4463
|
llmTurns: "llmTurns";
|
|
4453
4464
|
inputTokens: "inputTokens";
|
|
4454
4465
|
outputTokens: "outputTokens";
|
|
@@ -4461,8 +4472,8 @@ declare const defaultConfigKeySchema: z$1.ZodEnum<{
|
|
|
4461
4472
|
/** Built-in eval-level output/column key. */
|
|
4462
4473
|
/** Removal config for built-in eval-level outputs and UI metadata. */
|
|
4463
4474
|
declare const removeDefaultConfigSchema: z$1.ZodUnion<readonly [z$1.ZodLiteral<true>, z$1.ZodArray<z$1.ZodEnum<{
|
|
4464
|
-
costUsd: "costUsd";
|
|
4465
4475
|
apiCalls: "apiCalls";
|
|
4476
|
+
costUsd: "costUsd";
|
|
4466
4477
|
llmTurns: "llmTurns";
|
|
4467
4478
|
inputTokens: "inputTokens";
|
|
4468
4479
|
outputTokens: "outputTokens";
|
|
@@ -4584,10 +4595,10 @@ declare const apiCallMetricPlacementSchema: z$1.ZodEnum<{
|
|
|
4584
4595
|
}>;
|
|
4585
4596
|
/** Placement option for an API-call metric. */
|
|
4586
4597
|
type ApiCallMetricPlacement = z$1.infer<typeof apiCallMetricPlacementSchema>;
|
|
4587
|
-
/** Context passed to
|
|
4598
|
+
/** Context passed to LLM/API-call derived attribute functions. */
|
|
4588
4599
|
type CallDerivedAttributeContext = {
|
|
4589
|
-
/**
|
|
4590
|
-
span: EvalTraceSpan; /** Dot-path helper for reading from
|
|
4600
|
+
/** Current attributes from the matching trace span. */attributes: Record<string, unknown> | undefined; /** Matching trace span. */
|
|
4601
|
+
span: EvalTraceSpan; /** Dot-path helper for reading from the current span attributes. */
|
|
4591
4602
|
get: (path: string) => unknown;
|
|
4592
4603
|
};
|
|
4593
4604
|
/**
|
|
@@ -4595,14 +4606,27 @@ type CallDerivedAttributeContext = {
|
|
|
4595
4606
|
* LLM/API-call span. Return `undefined` to omit the attribute for that span.
|
|
4596
4607
|
*/
|
|
4597
4608
|
type CallDerivedAttribute = (ctx: CallDerivedAttributeContext) => unknown;
|
|
4609
|
+
/**
|
|
4610
|
+
* Runner-side function used to derive multiple span attributes from a matching
|
|
4611
|
+
* LLM/API-call span. Returned object keys are dot-paths under
|
|
4612
|
+
* `span.attributes`; `undefined` values are skipped.
|
|
4613
|
+
*/
|
|
4614
|
+
type CallDerivedAttributesFn = (ctx: CallDerivedAttributeContext) => Record<string, unknown> | undefined;
|
|
4615
|
+
/** Authored LLM/API-call derived-attributes config. */
|
|
4616
|
+
type CallDerivedAttributesConfig = Record<string, CallDerivedAttribute> | CallDerivedAttributesFn;
|
|
4598
4617
|
/** One resolved derived span attribute rule. */
|
|
4599
4618
|
type ResolvedCallDerivedAttribute = {
|
|
4600
|
-
/** Dot-path where
|
|
4619
|
+
/** Dot-path where one derived value is persisted on `span.attributes`. */path?: string;
|
|
4601
4620
|
/**
|
|
4602
|
-
* Function that derives
|
|
4621
|
+
* Function that derives one persisted value for each matching span. Omitted
|
|
4603
4622
|
* after this config is serialized to the browser.
|
|
4604
4623
|
*/
|
|
4605
4624
|
compute?: CallDerivedAttribute;
|
|
4625
|
+
/**
|
|
4626
|
+
* Function that derives multiple persisted values for each matching span.
|
|
4627
|
+
* Omitted after this config is serialized to the browser.
|
|
4628
|
+
*/
|
|
4629
|
+
computeMany?: CallDerivedAttributesFn;
|
|
4606
4630
|
};
|
|
4607
4631
|
/**
|
|
4608
4632
|
* Schema for a single user-defined metric attached to LLM call rows.
|
|
@@ -4721,7 +4745,7 @@ declare const llmCallsConfigSchema: z$1.ZodObject<{
|
|
|
4721
4745
|
reasoning: z$1.ZodOptional<z$1.ZodString>;
|
|
4722
4746
|
toolCalls: z$1.ZodOptional<z$1.ZodString>;
|
|
4723
4747
|
}, z$1.core.$strip>>;
|
|
4724
|
-
derivedAttributes: z$1.ZodOptional<z$1.
|
|
4748
|
+
derivedAttributes: z$1.ZodOptional<z$1.ZodType<CallDerivedAttributesConfig, unknown, z$1.core.$ZodTypeInternals<CallDerivedAttributesConfig, unknown>>>;
|
|
4725
4749
|
pricing: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodObject<{
|
|
4726
4750
|
inputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
4727
4751
|
outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
@@ -4774,7 +4798,7 @@ declare const apiCallsConfigSchema: z$1.ZodObject<{
|
|
|
4774
4798
|
durationMs: z$1.ZodOptional<z$1.ZodString>;
|
|
4775
4799
|
error: z$1.ZodOptional<z$1.ZodString>;
|
|
4776
4800
|
}, z$1.core.$strip>>;
|
|
4777
|
-
derivedAttributes: z$1.ZodOptional<z$1.
|
|
4801
|
+
derivedAttributes: z$1.ZodOptional<z$1.ZodType<CallDerivedAttributesConfig, unknown, z$1.core.$ZodTypeInternals<CallDerivedAttributesConfig, unknown>>>;
|
|
4778
4802
|
metrics: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
|
|
4779
4803
|
label: z$1.ZodString;
|
|
4780
4804
|
tooltip: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -5086,9 +5110,9 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
|
|
|
5086
5110
|
subtree: "subtree";
|
|
5087
5111
|
}>>;
|
|
5088
5112
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5089
|
-
sum: "sum";
|
|
5090
|
-
last: "last";
|
|
5091
5113
|
all: "all";
|
|
5114
|
+
last: "last";
|
|
5115
|
+
sum: "sum";
|
|
5092
5116
|
}>>;
|
|
5093
5117
|
transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
|
|
5094
5118
|
}, z$1.core.$strip>>>;
|
|
@@ -5111,11 +5135,11 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
|
|
|
5111
5135
|
key: z$1.ZodString;
|
|
5112
5136
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
5113
5137
|
aggregate: z$1.ZodEnum<{
|
|
5114
|
-
|
|
5138
|
+
last: "last";
|
|
5139
|
+
sum: "sum";
|
|
5115
5140
|
min: "min";
|
|
5116
5141
|
max: "max";
|
|
5117
|
-
|
|
5118
|
-
last: "last";
|
|
5142
|
+
avg: "avg";
|
|
5119
5143
|
}>;
|
|
5120
5144
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5121
5145
|
number: "number";
|
|
@@ -5153,7 +5177,7 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
|
|
|
5153
5177
|
reasoning: z$1.ZodOptional<z$1.ZodString>;
|
|
5154
5178
|
toolCalls: z$1.ZodOptional<z$1.ZodString>;
|
|
5155
5179
|
}, z$1.core.$strip>>;
|
|
5156
|
-
derivedAttributes: z$1.ZodOptional<z$1.
|
|
5180
|
+
derivedAttributes: z$1.ZodOptional<z$1.ZodType<CallDerivedAttributesConfig, unknown, z$1.core.$ZodTypeInternals<CallDerivedAttributesConfig, unknown>>>;
|
|
5157
5181
|
pricing: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodObject<{
|
|
5158
5182
|
inputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
5159
5183
|
outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
@@ -5190,8 +5214,8 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
|
|
|
5190
5214
|
}, z$1.core.$strip>>>;
|
|
5191
5215
|
}, z$1.core.$strip>>;
|
|
5192
5216
|
removeDefaultConfig: z$1.ZodOptional<z$1.ZodUnion<readonly [z$1.ZodLiteral<true>, z$1.ZodArray<z$1.ZodEnum<{
|
|
5193
|
-
costUsd: "costUsd";
|
|
5194
5217
|
apiCalls: "apiCalls";
|
|
5218
|
+
costUsd: "costUsd";
|
|
5195
5219
|
llmTurns: "llmTurns";
|
|
5196
5220
|
inputTokens: "inputTokens";
|
|
5197
5221
|
outputTokens: "outputTokens";
|
|
@@ -5215,7 +5239,7 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
|
|
|
5215
5239
|
durationMs: z$1.ZodOptional<z$1.ZodString>;
|
|
5216
5240
|
error: z$1.ZodOptional<z$1.ZodString>;
|
|
5217
5241
|
}, z$1.core.$strip>>;
|
|
5218
|
-
derivedAttributes: z$1.ZodOptional<z$1.
|
|
5242
|
+
derivedAttributes: z$1.ZodOptional<z$1.ZodType<CallDerivedAttributesConfig, unknown, z$1.core.$ZodTypeInternals<CallDerivedAttributesConfig, unknown>>>;
|
|
5219
5243
|
metrics: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
|
|
5220
5244
|
label: z$1.ZodString;
|
|
5221
5245
|
tooltip: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -5380,9 +5404,9 @@ declare function extractApiCalls(spans: EvalTraceSpan[], config: ResolvedApiCall
|
|
|
5380
5404
|
* - `refresh`: never read, always write (forces re-execution and overwrites).
|
|
5381
5405
|
*/
|
|
5382
5406
|
declare const cacheModeSchema: z$1.ZodEnum<{
|
|
5383
|
-
refresh: "refresh";
|
|
5384
|
-
bypass: "bypass";
|
|
5385
5407
|
use: "use";
|
|
5408
|
+
bypass: "bypass";
|
|
5409
|
+
refresh: "refresh";
|
|
5386
5410
|
}>;
|
|
5387
5411
|
/** Mode controlling how cached spans behave during a run. */
|
|
5388
5412
|
type CacheMode = z$1.infer<typeof cacheModeSchema>;
|
|
@@ -5403,10 +5427,10 @@ declare const cacheOperationTypeSchema: z$1.ZodEnum<{
|
|
|
5403
5427
|
type CacheOperationType = z$1.infer<typeof cacheOperationTypeSchema>;
|
|
5404
5428
|
/** Status of a cache lookup recorded on a span or case scope. */
|
|
5405
5429
|
declare const cacheStatusSchema: z$1.ZodEnum<{
|
|
5430
|
+
bypass: "bypass";
|
|
5431
|
+
refresh: "refresh";
|
|
5406
5432
|
hit: "hit";
|
|
5407
5433
|
miss: "miss";
|
|
5408
|
-
refresh: "refresh";
|
|
5409
|
-
bypass: "bypass";
|
|
5410
5434
|
}>;
|
|
5411
5435
|
/** Status of a cache lookup recorded on a span or case scope. */
|
|
5412
5436
|
type CacheStatus = z$1.infer<typeof cacheStatusSchema>;
|
|
@@ -5423,10 +5447,10 @@ declare const traceCacheRefSchema: z$1.ZodObject<{
|
|
|
5423
5447
|
namespace: z$1.ZodString;
|
|
5424
5448
|
key: z$1.ZodString;
|
|
5425
5449
|
status: z$1.ZodEnum<{
|
|
5450
|
+
bypass: "bypass";
|
|
5451
|
+
refresh: "refresh";
|
|
5426
5452
|
hit: "hit";
|
|
5427
5453
|
miss: "miss";
|
|
5428
|
-
refresh: "refresh";
|
|
5429
|
-
bypass: "bypass";
|
|
5430
5454
|
}>;
|
|
5431
5455
|
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
5432
5456
|
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -5942,9 +5966,9 @@ declare const createRunRequestSchema: z$1.ZodObject<{
|
|
|
5942
5966
|
trials: z$1.ZodNumber;
|
|
5943
5967
|
cache: z$1.ZodOptional<z$1.ZodObject<{
|
|
5944
5968
|
mode: z$1.ZodDefault<z$1.ZodEnum<{
|
|
5945
|
-
refresh: "refresh";
|
|
5946
|
-
bypass: "bypass";
|
|
5947
5969
|
use: "use";
|
|
5970
|
+
bypass: "bypass";
|
|
5971
|
+
refresh: "refresh";
|
|
5948
5972
|
}>>;
|
|
5949
5973
|
}, z$1.core.$strip>>;
|
|
5950
5974
|
manualInputs: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
|
|
@@ -6214,4 +6238,4 @@ type ManualInputDescriptor = z$1.infer<typeof manualInputDescriptorSchema>; //#e
|
|
|
6214
6238
|
*/
|
|
6215
6239
|
declare function runCli(argv: string[]): Promise<void>;
|
|
6216
6240
|
//#endregion
|
|
6217
|
-
export { type AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheSerializationOptions, type CacheStatus, type CallDerivedAttribute, type CallDerivedAttributeContext, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type ConfigReloadState, type ConfigReloadStatus, type CreateRunRequest, DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, type DefaultConfigKey, type DerivedStatus, type DiscoveryIssue, EvalAssertionError, type EvalCacheConfig, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveConfig, type EvalDeriveContext, type EvalDeriveFn, type EvalDeriveMap, type EvalDeriveValueFn, type EvalDisplayStatus, type EvalExecuteContext, type EvalExpectation, type EvalFreshnessStatus, type EvalManualInputConfig, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTimeUnit, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallPricingRate, type LlmCallPricingRegistry, type LlmCallsConfigInput, type ManualInputDescriptor, type ManualInputFieldDescriptor, type ManualInputFieldKind, type ManualInputFieldOverride, type ManualInputFieldsConfig, type ManualInputFileValue, type ManualInputSelectOption, type MaterializeManualInputFilesResult, type NumberDisplayOptions, type ReadManualInputFileResult, type RemoveDefaultConfig, type RepoFileRef, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedCallDerivedAttribute, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, cleanupStagedManualInputFiles, columnDefSchema, columnFormatSchema, columnKindSchema, configReloadStateSchema, configReloadStatusSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalColumnOverrideSchema, evalColumnsSchema, evalDeriveConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingRateSchema, llmCallPricingSchema, llmCallsConfigSchema, manualInputBooleanFieldSchema, manualInputDescriptorSchema, manualInputFieldDescriptorSchema, manualInputFileValueSchema, manualInputJsonFieldSchema, manualInputMultilineFieldSchema, manualInputNumberFieldSchema, manualInputSelectFieldSchema, manualInputSelectOptionSchema, manualInputTextFieldSchema, materializeManualInputFiles, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, readManualInputFile, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
|
6241
|
+
export { type AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheSerializationOptions, type CacheStatus, type CallDerivedAttribute, type CallDerivedAttributeContext, type CallDerivedAttributesConfig, type CallDerivedAttributesFn, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type ConfigReloadState, type ConfigReloadStatus, type CreateRunRequest, DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, type DefaultConfigKey, type DerivedStatus, type DiscoveryIssue, EvalAssertionError, type EvalCacheConfig, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveConfig, type EvalDeriveContext, type EvalDeriveFn, type EvalDeriveMap, type EvalDeriveValueFn, type EvalDisplayStatus, type EvalExecuteContext, type EvalExpectation, type EvalFreshnessStatus, type EvalManualInputConfig, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTimeUnit, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallPricingRate, type LlmCallPricingRegistry, type LlmCallsConfigInput, type ManualInputDescriptor, type ManualInputFieldDescriptor, type ManualInputFieldKind, type ManualInputFieldOverride, type ManualInputFieldsConfig, type ManualInputFileValue, type ManualInputSelectOption, type MaterializeManualInputFilesResult, type NumberDisplayOptions, type ReadManualInputFileResult, type RemoveDefaultConfig, type RepoFileRef, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedCallDerivedAttribute, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, cleanupStagedManualInputFiles, columnDefSchema, columnFormatSchema, columnKindSchema, configReloadStateSchema, configReloadStatusSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalColumnOverrideSchema, evalColumnsSchema, evalDeriveConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingRateSchema, llmCallPricingSchema, llmCallsConfigSchema, manualInputBooleanFieldSchema, manualInputDescriptorSchema, manualInputFieldDescriptorSchema, manualInputFileValueSchema, manualInputJsonFieldSchema, manualInputMultilineFieldSchema, manualInputNumberFieldSchema, manualInputSelectFieldSchema, manualInputSelectOptionSchema, manualInputTextFieldSchema, materializeManualInputFiles, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, readManualInputFile, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|