@ls-stack/agent-eval 0.30.0 → 0.32.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-CbOZBHju.mjs → app-Dc6vvHRL.mjs} +4 -4
- package/dist/apps/web/dist/assets/index-BNQnbfi0.js +118 -0
- package/dist/apps/web/dist/assets/index-BPMMRktE.css +1 -0
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/{cli-CiFOqMwS.mjs → cli-huuJbDNb.mjs} +3 -3
- package/dist/index.d.mts +176 -53
- package/dist/index.mjs +4 -4
- package/dist/runChild.mjs +1 -1
- package/dist/{runOrchestration-CO3Vf0cQ.mjs → runOrchestration-ZpN7xty_.mjs} +127 -3
- package/dist/{runner-4pF_Qrc9.mjs → runner-BPXPvinB.mjs} +1 -1
- package/dist/{runner-CXHkf7ih.mjs → runner-Dkol2ukD.mjs} +2 -2
- package/dist/src-1Qvuh0NH.mjs +3 -0
- package/package.json +2 -2
- package/skills/agent-eval/SKILL.md +4 -3
- package/dist/apps/web/dist/assets/index-DEikHy2a.js +0 -118
- package/dist/apps/web/dist/assets/index-DjUTm3M-.css +0 -1
- package/dist/src-BiPLv9ya.mjs +0 -3
package/dist/index.d.mts
CHANGED
|
@@ -754,6 +754,7 @@ type ResolvedLlmCallsConfig$1 = {
|
|
|
754
754
|
derivedAttributes: ResolvedCallDerivedAttribute$1[];
|
|
755
755
|
metrics: ResolvedLlmCallMetric$1[];
|
|
756
756
|
pricing: ResolvedLlmCallPricing$1[];
|
|
757
|
+
costCurrencies: ResolvedLlmCallCostCurrency$1[];
|
|
757
758
|
};
|
|
758
759
|
/** Resolved API-calls config sent to the UI with all defaults applied. */
|
|
759
760
|
type ResolvedApiCallsConfig$1 = {
|
|
@@ -801,6 +802,13 @@ type ResolvedLlmCallPricing$1 = {
|
|
|
801
802
|
cacheCreationInputUsdPerMillion?: number;
|
|
802
803
|
cacheCreationInput1hUsdPerMillion?: number;
|
|
803
804
|
reasoningUsdPerMillion?: number;
|
|
805
|
+
};
|
|
806
|
+
/** Fully-resolved extra currency used by the LLM calls tab. */
|
|
807
|
+
type ResolvedLlmCallCostCurrency$1 = {
|
|
808
|
+
code: string;
|
|
809
|
+
label?: string;
|
|
810
|
+
usdToCurrencyRate: number;
|
|
811
|
+
numberFormat?: NumberDisplayOptions$2;
|
|
804
812
|
}; //#endregion
|
|
805
813
|
//#region ../shared/src/schemas/cache.d.ts
|
|
806
814
|
/** Summary of a single persisted cache entry, used by list/delete endpoints. */
|
|
@@ -3168,8 +3176,8 @@ declare const traceSpanSchema: z$1.ZodObject<{
|
|
|
3168
3176
|
status: z$1.ZodEnum<{
|
|
3169
3177
|
error: "error";
|
|
3170
3178
|
running: "running";
|
|
3171
|
-
cancelled: "cancelled";
|
|
3172
3179
|
ok: "ok";
|
|
3180
|
+
cancelled: "cancelled";
|
|
3173
3181
|
}>;
|
|
3174
3182
|
attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
|
|
3175
3183
|
error: z$1.ZodOptional<z$1.ZodObject<{
|
|
@@ -3212,9 +3220,9 @@ type EvalFreshnessStatus = z$1.infer<typeof evalFreshnessStatusSchema>;
|
|
|
3212
3220
|
declare const evalStatAggregateSchema: z$1.ZodEnum<{
|
|
3213
3221
|
last: "last";
|
|
3214
3222
|
sum: "sum";
|
|
3223
|
+
avg: "avg";
|
|
3215
3224
|
min: "min";
|
|
3216
3225
|
max: "max";
|
|
3217
|
-
avg: "avg";
|
|
3218
3226
|
}>;
|
|
3219
3227
|
/** Reducer used to collapse a column's per-case values into a single stat. */
|
|
3220
3228
|
type EvalStatAggregate = z$1.infer<typeof evalStatAggregateSchema>;
|
|
@@ -3240,9 +3248,9 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
3240
3248
|
aggregate: z$1.ZodEnum<{
|
|
3241
3249
|
last: "last";
|
|
3242
3250
|
sum: "sum";
|
|
3251
|
+
avg: "avg";
|
|
3243
3252
|
min: "min";
|
|
3244
3253
|
max: "max";
|
|
3245
|
-
avg: "avg";
|
|
3246
3254
|
}>;
|
|
3247
3255
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3248
3256
|
number: "number";
|
|
@@ -3282,9 +3290,9 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
|
|
|
3282
3290
|
aggregate: z$1.ZodEnum<{
|
|
3283
3291
|
last: "last";
|
|
3284
3292
|
sum: "sum";
|
|
3293
|
+
avg: "avg";
|
|
3285
3294
|
min: "min";
|
|
3286
3295
|
max: "max";
|
|
3287
|
-
avg: "avg";
|
|
3288
3296
|
}>;
|
|
3289
3297
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3290
3298
|
number: "number";
|
|
@@ -3383,9 +3391,9 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
3383
3391
|
aggregate: z$1.ZodEnum<{
|
|
3384
3392
|
last: "last";
|
|
3385
3393
|
sum: "sum";
|
|
3394
|
+
avg: "avg";
|
|
3386
3395
|
min: "min";
|
|
3387
3396
|
max: "max";
|
|
3388
|
-
avg: "avg";
|
|
3389
3397
|
}>;
|
|
3390
3398
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3391
3399
|
number: "number";
|
|
@@ -3416,8 +3424,8 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
3416
3424
|
metrics: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
3417
3425
|
source: z$1.ZodLiteral<"builtin">;
|
|
3418
3426
|
metric: z$1.ZodEnum<{
|
|
3419
|
-
durationMs: "durationMs";
|
|
3420
3427
|
passRate: "passRate";
|
|
3428
|
+
durationMs: "durationMs";
|
|
3421
3429
|
}>;
|
|
3422
3430
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3423
3431
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -3437,9 +3445,9 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
3437
3445
|
key: z$1.ZodString;
|
|
3438
3446
|
aggregate: z$1.ZodEnum<{
|
|
3439
3447
|
sum: "sum";
|
|
3448
|
+
avg: "avg";
|
|
3440
3449
|
min: "min";
|
|
3441
3450
|
max: "max";
|
|
3442
|
-
avg: "avg";
|
|
3443
3451
|
latest: "latest";
|
|
3444
3452
|
passThresholdRate: "passThresholdRate";
|
|
3445
3453
|
}>;
|
|
@@ -3470,8 +3478,8 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
3470
3478
|
tooltipExtras: z$1.ZodOptional<z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
3471
3479
|
source: z$1.ZodLiteral<"builtin">;
|
|
3472
3480
|
metric: z$1.ZodEnum<{
|
|
3473
|
-
durationMs: "durationMs";
|
|
3474
3481
|
passRate: "passRate";
|
|
3482
|
+
durationMs: "durationMs";
|
|
3475
3483
|
}>;
|
|
3476
3484
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3477
3485
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
@@ -3479,9 +3487,9 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
3479
3487
|
key: z$1.ZodString;
|
|
3480
3488
|
aggregate: z$1.ZodEnum<{
|
|
3481
3489
|
sum: "sum";
|
|
3490
|
+
avg: "avg";
|
|
3482
3491
|
min: "min";
|
|
3483
3492
|
max: "max";
|
|
3484
|
-
avg: "avg";
|
|
3485
3493
|
latest: "latest";
|
|
3486
3494
|
passThresholdRate: "passThresholdRate";
|
|
3487
3495
|
}>;
|
|
@@ -3577,11 +3585,11 @@ declare const caseRowSchema: z$1.ZodObject<{
|
|
|
3577
3585
|
evalId: z$1.ZodString;
|
|
3578
3586
|
status: z$1.ZodEnum<{
|
|
3579
3587
|
error: "error";
|
|
3580
|
-
pending: "pending";
|
|
3581
3588
|
running: "running";
|
|
3582
3589
|
cancelled: "cancelled";
|
|
3583
3590
|
pass: "pass";
|
|
3584
3591
|
fail: "fail";
|
|
3592
|
+
pending: "pending";
|
|
3585
3593
|
}>;
|
|
3586
3594
|
durationMs: z$1.ZodNullable<z$1.ZodNumber>;
|
|
3587
3595
|
costUsd: z$1.ZodOptional<z$1.ZodNullable<z$1.ZodNumber>>;
|
|
@@ -3673,8 +3681,8 @@ declare const scoreTraceSchema: z$1.ZodObject<{
|
|
|
3673
3681
|
status: z$1.ZodEnum<{
|
|
3674
3682
|
error: "error";
|
|
3675
3683
|
running: "running";
|
|
3676
|
-
cancelled: "cancelled";
|
|
3677
3684
|
ok: "ok";
|
|
3685
|
+
cancelled: "cancelled";
|
|
3678
3686
|
}>;
|
|
3679
3687
|
attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
|
|
3680
3688
|
error: z$1.ZodOptional<z$1.ZodObject<{
|
|
@@ -3741,11 +3749,11 @@ declare const caseDetailSchema: z$1.ZodObject<{
|
|
|
3741
3749
|
evalId: z$1.ZodString;
|
|
3742
3750
|
status: z$1.ZodEnum<{
|
|
3743
3751
|
error: "error";
|
|
3744
|
-
pending: "pending";
|
|
3745
3752
|
running: "running";
|
|
3746
3753
|
cancelled: "cancelled";
|
|
3747
3754
|
pass: "pass";
|
|
3748
3755
|
fail: "fail";
|
|
3756
|
+
pending: "pending";
|
|
3749
3757
|
}>;
|
|
3750
3758
|
input: z$1.ZodUnknown;
|
|
3751
3759
|
trace: z$1.ZodArray<z$1.ZodObject<{
|
|
@@ -3759,8 +3767,8 @@ declare const caseDetailSchema: z$1.ZodObject<{
|
|
|
3759
3767
|
status: z$1.ZodEnum<{
|
|
3760
3768
|
error: "error";
|
|
3761
3769
|
running: "running";
|
|
3762
|
-
cancelled: "cancelled";
|
|
3763
3770
|
ok: "ok";
|
|
3771
|
+
cancelled: "cancelled";
|
|
3764
3772
|
}>;
|
|
3765
3773
|
attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
|
|
3766
3774
|
error: z$1.ZodOptional<z$1.ZodObject<{
|
|
@@ -3828,8 +3836,8 @@ declare const caseDetailSchema: z$1.ZodObject<{
|
|
|
3828
3836
|
status: z$1.ZodEnum<{
|
|
3829
3837
|
error: "error";
|
|
3830
3838
|
running: "running";
|
|
3831
|
-
cancelled: "cancelled";
|
|
3832
3839
|
ok: "ok";
|
|
3840
|
+
cancelled: "cancelled";
|
|
3833
3841
|
}>;
|
|
3834
3842
|
attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
|
|
3835
3843
|
error: z$1.ZodOptional<z$1.ZodObject<{
|
|
@@ -3941,10 +3949,10 @@ declare const caseDetailSchema: z$1.ZodObject<{
|
|
|
3941
3949
|
namespace: z$1.ZodString;
|
|
3942
3950
|
key: z$1.ZodString;
|
|
3943
3951
|
status: z$1.ZodEnum<{
|
|
3944
|
-
bypass: "bypass";
|
|
3945
|
-
refresh: "refresh";
|
|
3946
3952
|
hit: "hit";
|
|
3947
3953
|
miss: "miss";
|
|
3954
|
+
refresh: "refresh";
|
|
3955
|
+
bypass: "bypass";
|
|
3948
3956
|
}>;
|
|
3949
3957
|
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
3950
3958
|
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -4005,8 +4013,8 @@ type EvalChartType = z$1.infer<typeof evalChartTypeSchema>;
|
|
|
4005
4013
|
* than from a per-case column.
|
|
4006
4014
|
*/
|
|
4007
4015
|
declare const evalChartBuiltinMetricSchema: z$1.ZodEnum<{
|
|
4008
|
-
durationMs: "durationMs";
|
|
4009
4016
|
passRate: "passRate";
|
|
4017
|
+
durationMs: "durationMs";
|
|
4010
4018
|
}>;
|
|
4011
4019
|
/**
|
|
4012
4020
|
* Run-level metric sourced from the aggregated `RunSummary` for a run, rather
|
|
@@ -4016,9 +4024,9 @@ type EvalChartBuiltinMetric = z$1.infer<typeof evalChartBuiltinMetricSchema>;
|
|
|
4016
4024
|
/** Reducer applied to a numeric column across all cases of a single run. */
|
|
4017
4025
|
declare const evalChartAggregateSchema: z$1.ZodEnum<{
|
|
4018
4026
|
sum: "sum";
|
|
4027
|
+
avg: "avg";
|
|
4019
4028
|
min: "min";
|
|
4020
4029
|
max: "max";
|
|
4021
|
-
avg: "avg";
|
|
4022
4030
|
latest: "latest";
|
|
4023
4031
|
passThresholdRate: "passThresholdRate";
|
|
4024
4032
|
}>;
|
|
@@ -4053,8 +4061,8 @@ type EvalChartAxis = z$1.infer<typeof evalChartAxisSchema>;
|
|
|
4053
4061
|
declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
4054
4062
|
source: z$1.ZodLiteral<"builtin">;
|
|
4055
4063
|
metric: z$1.ZodEnum<{
|
|
4056
|
-
durationMs: "durationMs";
|
|
4057
4064
|
passRate: "passRate";
|
|
4065
|
+
durationMs: "durationMs";
|
|
4058
4066
|
}>;
|
|
4059
4067
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
4060
4068
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -4074,9 +4082,9 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
4074
4082
|
key: z$1.ZodString;
|
|
4075
4083
|
aggregate: z$1.ZodEnum<{
|
|
4076
4084
|
sum: "sum";
|
|
4085
|
+
avg: "avg";
|
|
4077
4086
|
min: "min";
|
|
4078
4087
|
max: "max";
|
|
4079
|
-
avg: "avg";
|
|
4080
4088
|
latest: "latest";
|
|
4081
4089
|
passThresholdRate: "passThresholdRate";
|
|
4082
4090
|
}>;
|
|
@@ -4100,8 +4108,8 @@ type EvalChartMetric = z$1.infer<typeof evalChartMetricSchema>;
|
|
|
4100
4108
|
declare const evalChartTooltipExtraSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
4101
4109
|
source: z$1.ZodLiteral<"builtin">;
|
|
4102
4110
|
metric: z$1.ZodEnum<{
|
|
4103
|
-
durationMs: "durationMs";
|
|
4104
4111
|
passRate: "passRate";
|
|
4112
|
+
durationMs: "durationMs";
|
|
4105
4113
|
}>;
|
|
4106
4114
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
4107
4115
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
@@ -4109,9 +4117,9 @@ declare const evalChartTooltipExtraSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObj
|
|
|
4109
4117
|
key: z$1.ZodString;
|
|
4110
4118
|
aggregate: z$1.ZodEnum<{
|
|
4111
4119
|
sum: "sum";
|
|
4120
|
+
avg: "avg";
|
|
4112
4121
|
min: "min";
|
|
4113
4122
|
max: "max";
|
|
4114
|
-
avg: "avg";
|
|
4115
4123
|
latest: "latest";
|
|
4116
4124
|
passThresholdRate: "passThresholdRate";
|
|
4117
4125
|
}>;
|
|
@@ -4136,8 +4144,8 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
4136
4144
|
metrics: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
4137
4145
|
source: z$1.ZodLiteral<"builtin">;
|
|
4138
4146
|
metric: z$1.ZodEnum<{
|
|
4139
|
-
durationMs: "durationMs";
|
|
4140
4147
|
passRate: "passRate";
|
|
4148
|
+
durationMs: "durationMs";
|
|
4141
4149
|
}>;
|
|
4142
4150
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
4143
4151
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -4157,9 +4165,9 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
4157
4165
|
key: z$1.ZodString;
|
|
4158
4166
|
aggregate: z$1.ZodEnum<{
|
|
4159
4167
|
sum: "sum";
|
|
4168
|
+
avg: "avg";
|
|
4160
4169
|
min: "min";
|
|
4161
4170
|
max: "max";
|
|
4162
|
-
avg: "avg";
|
|
4163
4171
|
latest: "latest";
|
|
4164
4172
|
passThresholdRate: "passThresholdRate";
|
|
4165
4173
|
}>;
|
|
@@ -4190,8 +4198,8 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
4190
4198
|
tooltipExtras: z$1.ZodOptional<z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
4191
4199
|
source: z$1.ZodLiteral<"builtin">;
|
|
4192
4200
|
metric: z$1.ZodEnum<{
|
|
4193
|
-
durationMs: "durationMs";
|
|
4194
4201
|
passRate: "passRate";
|
|
4202
|
+
durationMs: "durationMs";
|
|
4195
4203
|
}>;
|
|
4196
4204
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
4197
4205
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
@@ -4199,9 +4207,9 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
4199
4207
|
key: z$1.ZodString;
|
|
4200
4208
|
aggregate: z$1.ZodEnum<{
|
|
4201
4209
|
sum: "sum";
|
|
4210
|
+
avg: "avg";
|
|
4202
4211
|
min: "min";
|
|
4203
4212
|
max: "max";
|
|
4204
|
-
avg: "avg";
|
|
4205
4213
|
latest: "latest";
|
|
4206
4214
|
passThresholdRate: "passThresholdRate";
|
|
4207
4215
|
}>;
|
|
@@ -4226,8 +4234,8 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
4226
4234
|
metrics: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
4227
4235
|
source: z$1.ZodLiteral<"builtin">;
|
|
4228
4236
|
metric: z$1.ZodEnum<{
|
|
4229
|
-
durationMs: "durationMs";
|
|
4230
4237
|
passRate: "passRate";
|
|
4238
|
+
durationMs: "durationMs";
|
|
4231
4239
|
}>;
|
|
4232
4240
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
4233
4241
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -4247,9 +4255,9 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
4247
4255
|
key: z$1.ZodString;
|
|
4248
4256
|
aggregate: z$1.ZodEnum<{
|
|
4249
4257
|
sum: "sum";
|
|
4258
|
+
avg: "avg";
|
|
4250
4259
|
min: "min";
|
|
4251
4260
|
max: "max";
|
|
4252
|
-
avg: "avg";
|
|
4253
4261
|
latest: "latest";
|
|
4254
4262
|
passThresholdRate: "passThresholdRate";
|
|
4255
4263
|
}>;
|
|
@@ -4280,8 +4288,8 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
4280
4288
|
tooltipExtras: z$1.ZodOptional<z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
4281
4289
|
source: z$1.ZodLiteral<"builtin">;
|
|
4282
4290
|
metric: z$1.ZodEnum<{
|
|
4283
|
-
durationMs: "durationMs";
|
|
4284
4291
|
passRate: "passRate";
|
|
4292
|
+
durationMs: "durationMs";
|
|
4285
4293
|
}>;
|
|
4286
4294
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
4287
4295
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
@@ -4289,9 +4297,9 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
4289
4297
|
key: z$1.ZodString;
|
|
4290
4298
|
aggregate: z$1.ZodEnum<{
|
|
4291
4299
|
sum: "sum";
|
|
4300
|
+
avg: "avg";
|
|
4292
4301
|
min: "min";
|
|
4293
4302
|
max: "max";
|
|
4294
|
-
avg: "avg";
|
|
4295
4303
|
latest: "latest";
|
|
4296
4304
|
passThresholdRate: "passThresholdRate";
|
|
4297
4305
|
}>;
|
|
@@ -4307,10 +4315,10 @@ declare const runManifestSchema: z$1.ZodObject<{
|
|
|
4307
4315
|
shortId: z$1.ZodString;
|
|
4308
4316
|
status: z$1.ZodEnum<{
|
|
4309
4317
|
error: "error";
|
|
4310
|
-
pending: "pending";
|
|
4311
4318
|
running: "running";
|
|
4312
|
-
completed: "completed";
|
|
4313
4319
|
cancelled: "cancelled";
|
|
4320
|
+
pending: "pending";
|
|
4321
|
+
completed: "completed";
|
|
4314
4322
|
}>;
|
|
4315
4323
|
startedAt: z$1.ZodString;
|
|
4316
4324
|
endedAt: z$1.ZodNullable<z$1.ZodString>;
|
|
@@ -4333,9 +4341,9 @@ declare const runManifestSchema: z$1.ZodObject<{
|
|
|
4333
4341
|
median: "median";
|
|
4334
4342
|
}>>>;
|
|
4335
4343
|
cacheMode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4336
|
-
use: "use";
|
|
4337
|
-
bypass: "bypass";
|
|
4338
4344
|
refresh: "refresh";
|
|
4345
|
+
bypass: "bypass";
|
|
4346
|
+
use: "use";
|
|
4339
4347
|
}>>;
|
|
4340
4348
|
}, z$1.core.$strip>;
|
|
4341
4349
|
/** Persisted lifecycle metadata for a single eval run. */
|
|
@@ -4345,10 +4353,10 @@ declare const runSummarySchema: z$1.ZodObject<{
|
|
|
4345
4353
|
runId: z$1.ZodString;
|
|
4346
4354
|
status: z$1.ZodEnum<{
|
|
4347
4355
|
error: "error";
|
|
4348
|
-
pending: "pending";
|
|
4349
4356
|
running: "running";
|
|
4350
|
-
completed: "completed";
|
|
4351
4357
|
cancelled: "cancelled";
|
|
4358
|
+
pending: "pending";
|
|
4359
|
+
completed: "completed";
|
|
4352
4360
|
}>;
|
|
4353
4361
|
totalCases: z$1.ZodNumber;
|
|
4354
4362
|
passedCases: z$1.ZodNumber;
|
|
@@ -4458,8 +4466,8 @@ declare const trialSelectionModeSchema: z$1.ZodEnum<{
|
|
|
4458
4466
|
type TrialSelectionMode = z$1.infer<typeof trialSelectionModeSchema>;
|
|
4459
4467
|
/** Built-in eval-level output/column keys. */
|
|
4460
4468
|
declare const defaultConfigKeySchema: z$1.ZodEnum<{
|
|
4461
|
-
apiCalls: "apiCalls";
|
|
4462
4469
|
costUsd: "costUsd";
|
|
4470
|
+
apiCalls: "apiCalls";
|
|
4463
4471
|
llmTurns: "llmTurns";
|
|
4464
4472
|
inputTokens: "inputTokens";
|
|
4465
4473
|
outputTokens: "outputTokens";
|
|
@@ -4472,8 +4480,8 @@ declare const defaultConfigKeySchema: z$1.ZodEnum<{
|
|
|
4472
4480
|
/** Built-in eval-level output/column key. */
|
|
4473
4481
|
/** Removal config for built-in eval-level outputs and UI metadata. */
|
|
4474
4482
|
declare const removeDefaultConfigSchema: z$1.ZodUnion<readonly [z$1.ZodLiteral<true>, z$1.ZodArray<z$1.ZodEnum<{
|
|
4475
|
-
apiCalls: "apiCalls";
|
|
4476
4483
|
costUsd: "costUsd";
|
|
4484
|
+
apiCalls: "apiCalls";
|
|
4477
4485
|
llmTurns: "llmTurns";
|
|
4478
4486
|
inputTokens: "inputTokens";
|
|
4479
4487
|
outputTokens: "outputTokens";
|
|
@@ -4725,6 +4733,18 @@ declare const llmCallPricingSchema: z$1.ZodObject<{
|
|
|
4725
4733
|
type LlmCallPricing = z$1.infer<typeof llmCallPricingSchema>;
|
|
4726
4734
|
/** Model-keyed pricing registry authored in `agent-evals.config.ts`. */
|
|
4727
4735
|
type LlmCallPricingRegistry = Record<string, LlmCallPricing>;
|
|
4736
|
+
/**
|
|
4737
|
+
* Schema for extra currencies displayed in the LLM calls breakdown table.
|
|
4738
|
+
* Costs are still derived in USD, then multiplied by `usdToCurrencyRate`.
|
|
4739
|
+
*/
|
|
4740
|
+
declare const llmCallCostCurrencySchema: z$1.ZodObject<{
|
|
4741
|
+
code: z$1.ZodString;
|
|
4742
|
+
label: z$1.ZodOptional<z$1.ZodString>;
|
|
4743
|
+
usdToCurrencyRate: z$1.ZodNumber;
|
|
4744
|
+
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
4745
|
+
}, z$1.core.$strip>;
|
|
4746
|
+
/** Extra LLM-call cost currency authored in `agent-evals.config.ts`. */
|
|
4747
|
+
type LlmCallCostCurrency = z$1.infer<typeof llmCallCostCurrencySchema>;
|
|
4728
4748
|
/** Schema for the global LLM calls config block in `agent-evals.config.ts`. */
|
|
4729
4749
|
declare const llmCallsConfigSchema: z$1.ZodObject<{
|
|
4730
4750
|
kinds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
@@ -4763,6 +4783,12 @@ declare const llmCallsConfigSchema: z$1.ZodObject<{
|
|
|
4763
4783
|
reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
4764
4784
|
}, z$1.core.$strip>>>;
|
|
4765
4785
|
}, z$1.core.$strip>>>;
|
|
4786
|
+
costCurrencies: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
|
|
4787
|
+
code: z$1.ZodString;
|
|
4788
|
+
label: z$1.ZodOptional<z$1.ZodString>;
|
|
4789
|
+
usdToCurrencyRate: z$1.ZodNumber;
|
|
4790
|
+
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
4791
|
+
}, z$1.core.$strip>>>;
|
|
4766
4792
|
metrics: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
|
|
4767
4793
|
label: z$1.ZodString;
|
|
4768
4794
|
tooltip: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4848,6 +4874,7 @@ type ResolvedLlmCallsConfig = {
|
|
|
4848
4874
|
derivedAttributes: ResolvedCallDerivedAttribute[];
|
|
4849
4875
|
metrics: ResolvedLlmCallMetric[];
|
|
4850
4876
|
pricing: ResolvedLlmCallPricing[];
|
|
4877
|
+
costCurrencies: ResolvedLlmCallCostCurrency[];
|
|
4851
4878
|
};
|
|
4852
4879
|
/** Resolved API-calls config sent to the UI with all defaults applied. */
|
|
4853
4880
|
type ResolvedApiCallsConfig = {
|
|
@@ -4896,6 +4923,13 @@ type ResolvedLlmCallPricing = {
|
|
|
4896
4923
|
cacheCreationInput1hUsdPerMillion?: number;
|
|
4897
4924
|
reasoningUsdPerMillion?: number;
|
|
4898
4925
|
};
|
|
4926
|
+
/** Fully-resolved extra currency used by the LLM calls tab. */
|
|
4927
|
+
type ResolvedLlmCallCostCurrency = {
|
|
4928
|
+
code: string;
|
|
4929
|
+
label?: string;
|
|
4930
|
+
usdToCurrencyRate: number;
|
|
4931
|
+
numberFormat?: NumberDisplayOptions;
|
|
4932
|
+
};
|
|
4899
4933
|
/** Default LLM-calls config the UI uses before the workspace fetch resolves. */
|
|
4900
4934
|
declare const DEFAULT_LLM_CALLS_CONFIG: ResolvedLlmCallsConfig;
|
|
4901
4935
|
/** Default API-calls config the UI uses before the workspace fetch resolves. */
|
|
@@ -4911,6 +4945,8 @@ declare const DEFAULT_API_CALLS_CONFIG: ResolvedApiCallsConfig;
|
|
|
4911
4945
|
* - Missing `metrics[].placements` defaults to `['body']`.
|
|
4912
4946
|
* - Missing `pricing` defaults to an empty registry; built-in costs are only
|
|
4913
4947
|
* derived from configured model-keyed pricing and token counts.
|
|
4948
|
+
* - Missing `costCurrencies` defaults to an empty list; extra currencies only
|
|
4949
|
+
* affect the expanded LLM calls breakdown table.
|
|
4914
4950
|
*/
|
|
4915
4951
|
declare function resolveLlmCallsConfig(input: LlmCallsConfigInput | undefined): ResolvedLlmCallsConfig;
|
|
4916
4952
|
/**
|
|
@@ -5009,6 +5045,9 @@ type AgentEvalsConfig = {
|
|
|
5009
5045
|
* outputUsdPerMillion: 0.6,
|
|
5010
5046
|
* },
|
|
5011
5047
|
* },
|
|
5048
|
+
* costCurrencies: [
|
|
5049
|
+
* { code: 'BRL', usdToCurrencyRate: 5.7, numberFormat: { prefix: 'R$ ' } },
|
|
5050
|
+
* ],
|
|
5012
5051
|
* }
|
|
5013
5052
|
* ```
|
|
5014
5053
|
*/
|
|
@@ -5137,9 +5176,9 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
|
|
|
5137
5176
|
aggregate: z$1.ZodEnum<{
|
|
5138
5177
|
last: "last";
|
|
5139
5178
|
sum: "sum";
|
|
5179
|
+
avg: "avg";
|
|
5140
5180
|
min: "min";
|
|
5141
5181
|
max: "max";
|
|
5142
|
-
avg: "avg";
|
|
5143
5182
|
}>;
|
|
5144
5183
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5145
5184
|
number: "number";
|
|
@@ -5195,6 +5234,12 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
|
|
|
5195
5234
|
reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
5196
5235
|
}, z$1.core.$strip>>>;
|
|
5197
5236
|
}, z$1.core.$strip>>>;
|
|
5237
|
+
costCurrencies: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
|
|
5238
|
+
code: z$1.ZodString;
|
|
5239
|
+
label: z$1.ZodOptional<z$1.ZodString>;
|
|
5240
|
+
usdToCurrencyRate: z$1.ZodNumber;
|
|
5241
|
+
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
5242
|
+
}, z$1.core.$strip>>>;
|
|
5198
5243
|
metrics: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
|
|
5199
5244
|
label: z$1.ZodString;
|
|
5200
5245
|
tooltip: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -5214,8 +5259,8 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
|
|
|
5214
5259
|
}, z$1.core.$strip>>>;
|
|
5215
5260
|
}, z$1.core.$strip>>;
|
|
5216
5261
|
removeDefaultConfig: z$1.ZodOptional<z$1.ZodUnion<readonly [z$1.ZodLiteral<true>, z$1.ZodArray<z$1.ZodEnum<{
|
|
5217
|
-
apiCalls: "apiCalls";
|
|
5218
5262
|
costUsd: "costUsd";
|
|
5263
|
+
apiCalls: "apiCalls";
|
|
5219
5264
|
llmTurns: "llmTurns";
|
|
5220
5265
|
inputTokens: "inputTokens";
|
|
5221
5266
|
outputTokens: "outputTokens";
|
|
@@ -5326,6 +5371,84 @@ type LlmCallEntry = {
|
|
|
5326
5371
|
warnings: EvalTraceSpanWarning[];
|
|
5327
5372
|
error: EvalTraceSpanError | null;
|
|
5328
5373
|
};
|
|
5374
|
+
/**
|
|
5375
|
+
* Cost-simulation scenarios available in the LLM calls breakdown table.
|
|
5376
|
+
*
|
|
5377
|
+
* - `actual` — Real billed cost recorded on the span.
|
|
5378
|
+
* - `noCache` — Bill every input token at the base input rate, ignoring all
|
|
5379
|
+
* cache reads and cache writes. Worst case for any prompt that could be
|
|
5380
|
+
* cached.
|
|
5381
|
+
* - `withBaseCaching` — Steady-state cost on a fully warmed cache: cache
|
|
5382
|
+
* writes are treated as already paid (free), cache reads keep the cache-read
|
|
5383
|
+
* discount, and base input keeps the base rate. When the call has no
|
|
5384
|
+
* caching at all, every input token is billed at the cache-read rate, as if
|
|
5385
|
+
* the prompt had been warmed by an earlier run. Cache-read pricing is the
|
|
5386
|
+
* same on the base (5-minute) and extended (1-hour) tiers, so this scenario
|
|
5387
|
+
* covers the warmed case for both TTLs.
|
|
5388
|
+
* - `withBaseCachingWrite` — First-call cost paying the 5-minute cache write
|
|
5389
|
+
* premium. When the call already uses caching, every cache write token is
|
|
5390
|
+
* billed at the 5-minute rate (any extended-cache split is folded into the
|
|
5391
|
+
* 5-minute rate). When the call has no caching at all, every input token is
|
|
5392
|
+
* billed at the 5-minute cache write rate, as if this were the first call
|
|
5393
|
+
* warming up the base cache.
|
|
5394
|
+
* - `withExtendedCachingWrite` — First-call cost paying the extended (e.g.
|
|
5395
|
+
* 1-hour) cache write premium. When the call already uses caching, every
|
|
5396
|
+
* cache write token is billed at the extended rate. When the call has no
|
|
5397
|
+
* caching at all, every input token is billed at the extended cache write
|
|
5398
|
+
* rate, as if this were the first call warming up the extended cache.
|
|
5399
|
+
*/
|
|
5400
|
+
type LlmCostScenario = 'actual' | 'noCache' | 'withBaseCaching' | 'withBaseCachingWrite' | 'withExtendedCachingWrite';
|
|
5401
|
+
/** Per-row cost values returned by {@link simulateLlmCallCost}. */
|
|
5402
|
+
type LlmCallCostBreakdown = {
|
|
5403
|
+
inputCostUsd: number | null;
|
|
5404
|
+
outputCostUsd: number | null;
|
|
5405
|
+
cachedInputCostUsd: number | null;
|
|
5406
|
+
cacheCreationInputCostUsd: number | null;
|
|
5407
|
+
reasoningCostUsd: number | null;
|
|
5408
|
+
totalCostUsd: number | null;
|
|
5409
|
+
};
|
|
5410
|
+
/**
|
|
5411
|
+
* Recompute the LLM-call cost breakdown for a hypothetical billing scenario,
|
|
5412
|
+
* using the call's recorded token counts and the resolved pricing registry.
|
|
5413
|
+
*
|
|
5414
|
+
* The `actual` scenario returns the costs already stored on `entry`. Other
|
|
5415
|
+
* scenarios re-derive each cost component from `pricing` so users can compare
|
|
5416
|
+
* what the same usage would have cost under different cache strategies. When
|
|
5417
|
+
* pricing is missing for the model/provider, simulated cost components fall
|
|
5418
|
+
* back to `null` exactly like the original extractor.
|
|
5419
|
+
*/
|
|
5420
|
+
declare function simulateLlmCallCost({
|
|
5421
|
+
entry,
|
|
5422
|
+
pricing,
|
|
5423
|
+
scenario
|
|
5424
|
+
}: {
|
|
5425
|
+
entry: LlmCallEntry;
|
|
5426
|
+
pricing: ResolvedLlmCallPricing[];
|
|
5427
|
+
scenario: LlmCostScenario;
|
|
5428
|
+
}): LlmCallCostBreakdown;
|
|
5429
|
+
/** Per-row simulated token counts shown in the LLM call breakdown table. */
|
|
5430
|
+
type LlmCallSimulatedTokens = {
|
|
5431
|
+
/** Tokens shown on the `Input` row — base input only (cached + creation are subtracted). */baseInputTokens: number | null; /** Tokens shown on the `Cache read` row. */
|
|
5432
|
+
cachedInputTokens: number | null; /** Tokens shown on the `Cache write` row. */
|
|
5433
|
+
cacheCreationInputTokens: number | null;
|
|
5434
|
+
};
|
|
5435
|
+
/**
|
|
5436
|
+
* Project the call's recorded token allocation onto a hypothetical billing
|
|
5437
|
+
* scenario. Cacheable tokens shift between rows so the breakdown reflects the
|
|
5438
|
+
* simulated billing model: `noCache` folds reads/writes into base input,
|
|
5439
|
+
* `withBaseCaching` (warmed) treats every cacheable token as a cache read, and
|
|
5440
|
+
* the first-call write scenarios treat every cacheable token as a cache write.
|
|
5441
|
+
*
|
|
5442
|
+
* The returned counts are what the UI renders on each row and what
|
|
5443
|
+
* {@link simulateLlmCallCost} prices, so display and totals never drift.
|
|
5444
|
+
*/
|
|
5445
|
+
declare function simulateTokenAllocation({
|
|
5446
|
+
entry,
|
|
5447
|
+
scenario
|
|
5448
|
+
}: {
|
|
5449
|
+
entry: LlmCallEntry;
|
|
5450
|
+
scenario: LlmCostScenario;
|
|
5451
|
+
}): LlmCallSimulatedTokens;
|
|
5329
5452
|
/**
|
|
5330
5453
|
* Filter `spans` down to LLM calls and project each one to the structured
|
|
5331
5454
|
* shape consumed by the LLM calls tab.
|
|
@@ -5404,9 +5527,9 @@ declare function extractApiCalls(spans: EvalTraceSpan[], config: ResolvedApiCall
|
|
|
5404
5527
|
* - `refresh`: never read, always write (forces re-execution and overwrites).
|
|
5405
5528
|
*/
|
|
5406
5529
|
declare const cacheModeSchema: z$1.ZodEnum<{
|
|
5407
|
-
use: "use";
|
|
5408
|
-
bypass: "bypass";
|
|
5409
5530
|
refresh: "refresh";
|
|
5531
|
+
bypass: "bypass";
|
|
5532
|
+
use: "use";
|
|
5410
5533
|
}>;
|
|
5411
5534
|
/** Mode controlling how cached spans behave during a run. */
|
|
5412
5535
|
type CacheMode = z$1.infer<typeof cacheModeSchema>;
|
|
@@ -5427,10 +5550,10 @@ declare const cacheOperationTypeSchema: z$1.ZodEnum<{
|
|
|
5427
5550
|
type CacheOperationType = z$1.infer<typeof cacheOperationTypeSchema>;
|
|
5428
5551
|
/** Status of a cache lookup recorded on a span or case scope. */
|
|
5429
5552
|
declare const cacheStatusSchema: z$1.ZodEnum<{
|
|
5430
|
-
bypass: "bypass";
|
|
5431
|
-
refresh: "refresh";
|
|
5432
5553
|
hit: "hit";
|
|
5433
5554
|
miss: "miss";
|
|
5555
|
+
refresh: "refresh";
|
|
5556
|
+
bypass: "bypass";
|
|
5434
5557
|
}>;
|
|
5435
5558
|
/** Status of a cache lookup recorded on a span or case scope. */
|
|
5436
5559
|
type CacheStatus = z$1.infer<typeof cacheStatusSchema>;
|
|
@@ -5447,10 +5570,10 @@ declare const traceCacheRefSchema: z$1.ZodObject<{
|
|
|
5447
5570
|
namespace: z$1.ZodString;
|
|
5448
5571
|
key: z$1.ZodString;
|
|
5449
5572
|
status: z$1.ZodEnum<{
|
|
5450
|
-
bypass: "bypass";
|
|
5451
|
-
refresh: "refresh";
|
|
5452
5573
|
hit: "hit";
|
|
5453
5574
|
miss: "miss";
|
|
5575
|
+
refresh: "refresh";
|
|
5576
|
+
bypass: "bypass";
|
|
5454
5577
|
}>;
|
|
5455
5578
|
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
5456
5579
|
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -5528,8 +5651,8 @@ declare const cacheRecordingSchema: z$1.ZodObject<{
|
|
|
5528
5651
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5529
5652
|
error: "error";
|
|
5530
5653
|
running: "running";
|
|
5531
|
-
cancelled: "cancelled";
|
|
5532
5654
|
ok: "ok";
|
|
5655
|
+
cancelled: "cancelled";
|
|
5533
5656
|
}>>;
|
|
5534
5657
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
5535
5658
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -5601,8 +5724,8 @@ declare const cacheEntrySchema: z$1.ZodObject<{
|
|
|
5601
5724
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5602
5725
|
error: "error";
|
|
5603
5726
|
running: "running";
|
|
5604
|
-
cancelled: "cancelled";
|
|
5605
5727
|
ok: "ok";
|
|
5728
|
+
cancelled: "cancelled";
|
|
5606
5729
|
}>>;
|
|
5607
5730
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
5608
5731
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -5690,8 +5813,8 @@ declare const cacheEntryWithDebugKeySchema: z$1.ZodObject<{
|
|
|
5690
5813
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5691
5814
|
error: "error";
|
|
5692
5815
|
running: "running";
|
|
5693
|
-
cancelled: "cancelled";
|
|
5694
5816
|
ok: "ok";
|
|
5817
|
+
cancelled: "cancelled";
|
|
5695
5818
|
}>>;
|
|
5696
5819
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
5697
5820
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -5779,8 +5902,8 @@ declare const cacheFileSchema: z$1.ZodObject<{
|
|
|
5779
5902
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5780
5903
|
error: "error";
|
|
5781
5904
|
running: "running";
|
|
5782
|
-
cancelled: "cancelled";
|
|
5783
5905
|
ok: "ok";
|
|
5906
|
+
cancelled: "cancelled";
|
|
5784
5907
|
}>>;
|
|
5785
5908
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
5786
5909
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -5966,9 +6089,9 @@ declare const createRunRequestSchema: z$1.ZodObject<{
|
|
|
5966
6089
|
trials: z$1.ZodNumber;
|
|
5967
6090
|
cache: z$1.ZodOptional<z$1.ZodObject<{
|
|
5968
6091
|
mode: z$1.ZodDefault<z$1.ZodEnum<{
|
|
5969
|
-
use: "use";
|
|
5970
|
-
bypass: "bypass";
|
|
5971
6092
|
refresh: "refresh";
|
|
6093
|
+
bypass: "bypass";
|
|
6094
|
+
use: "use";
|
|
5972
6095
|
}>>;
|
|
5973
6096
|
}, z$1.core.$strip>>;
|
|
5974
6097
|
manualInputs: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
|
|
@@ -6238,4 +6361,4 @@ type ManualInputDescriptor = z$1.infer<typeof manualInputDescriptorSchema>; //#e
|
|
|
6238
6361
|
*/
|
|
6239
6362
|
declare function runCli(argv: string[]): Promise<void>;
|
|
6240
6363
|
//#endregion
|
|
6241
|
-
export { type AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheSerializationOptions, type CacheStatus, type CallDerivedAttribute, type CallDerivedAttributeContext, type CallDerivedAttributesConfig, type CallDerivedAttributesFn, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type ConfigReloadState, type ConfigReloadStatus, type CreateRunRequest, DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, type DefaultConfigKey, type DerivedStatus, type DiscoveryIssue, EvalAssertionError, type EvalCacheConfig, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveConfig, type EvalDeriveContext, type EvalDeriveFn, type EvalDeriveMap, type EvalDeriveValueFn, type EvalDisplayStatus, type EvalExecuteContext, type EvalExpectation, type EvalFreshnessStatus, type EvalManualInputConfig, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTimeUnit, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallPricingRate, type LlmCallPricingRegistry, type LlmCallsConfigInput, type ManualInputDescriptor, type ManualInputFieldDescriptor, type ManualInputFieldKind, type ManualInputFieldOverride, type ManualInputFieldsConfig, type ManualInputFileValue, type ManualInputSelectOption, type MaterializeManualInputFilesResult, type NumberDisplayOptions, type ReadManualInputFileResult, type RemoveDefaultConfig, type RepoFileRef, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedCallDerivedAttribute, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, cleanupStagedManualInputFiles, columnDefSchema, columnFormatSchema, columnKindSchema, configReloadStateSchema, configReloadStatusSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalColumnOverrideSchema, evalColumnsSchema, evalDeriveConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingRateSchema, llmCallPricingSchema, llmCallsConfigSchema, manualInputBooleanFieldSchema, manualInputDescriptorSchema, manualInputFieldDescriptorSchema, manualInputFileValueSchema, manualInputJsonFieldSchema, manualInputMultilineFieldSchema, manualInputNumberFieldSchema, manualInputSelectFieldSchema, manualInputSelectOptionSchema, manualInputTextFieldSchema, materializeManualInputFiles, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, readManualInputFile, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
|
6364
|
+
export { type AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheSerializationOptions, type CacheStatus, type CallDerivedAttribute, type CallDerivedAttributeContext, type CallDerivedAttributesConfig, type CallDerivedAttributesFn, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type ConfigReloadState, type ConfigReloadStatus, type CreateRunRequest, DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, type DefaultConfigKey, type DerivedStatus, type DiscoveryIssue, EvalAssertionError, type EvalCacheConfig, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveConfig, type EvalDeriveContext, type EvalDeriveFn, type EvalDeriveMap, type EvalDeriveValueFn, type EvalDisplayStatus, type EvalExecuteContext, type EvalExpectation, type EvalFreshnessStatus, type EvalManualInputConfig, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTimeUnit, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallCostBreakdown, type LlmCallCostCurrency, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallPricingRate, type LlmCallPricingRegistry, type LlmCallSimulatedTokens, type LlmCallsConfigInput, type LlmCostScenario, type ManualInputDescriptor, type ManualInputFieldDescriptor, type ManualInputFieldKind, type ManualInputFieldOverride, type ManualInputFieldsConfig, type ManualInputFileValue, type ManualInputSelectOption, type MaterializeManualInputFilesResult, type NumberDisplayOptions, type ReadManualInputFileResult, type RemoveDefaultConfig, type RepoFileRef, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedCallDerivedAttribute, type ResolvedLlmCallCostCurrency, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, cleanupStagedManualInputFiles, columnDefSchema, columnFormatSchema, columnKindSchema, configReloadStateSchema, configReloadStatusSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalColumnOverrideSchema, evalColumnsSchema, evalDeriveConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, jsonCellSchema, llmCallCostCurrencySchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingRateSchema, llmCallPricingSchema, llmCallsConfigSchema, manualInputBooleanFieldSchema, manualInputDescriptorSchema, manualInputFieldDescriptorSchema, manualInputFileValueSchema, manualInputJsonFieldSchema, manualInputMultilineFieldSchema, manualInputNumberFieldSchema, manualInputSelectFieldSchema, manualInputSelectOptionSchema, manualInputTextFieldSchema, materializeManualInputFiles, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, readManualInputFile, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, spanCacheOptionsSchema, sseEnvelopeSchema, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|