@ls-stack/agent-eval 0.31.0 → 0.32.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-BrSMRTpy.mjs → app-Dc6vvHRL.mjs} +4 -4
- package/dist/apps/web/dist/assets/index-BNQnbfi0.js +118 -0
- package/dist/apps/web/dist/assets/index-BPMMRktE.css +1 -0
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/{cli-CMPmuY7W.mjs → cli-huuJbDNb.mjs} +3 -3
- package/dist/index.d.mts +119 -41
- package/dist/index.mjs +4 -4
- package/dist/runChild.mjs +1 -1
- package/dist/{runOrchestration-CAyVXPFz.mjs → runOrchestration-ZpN7xty_.mjs} +94 -1
- package/dist/{runner-CmpWwCe1.mjs → runner-BPXPvinB.mjs} +1 -1
- package/dist/{runner-Bnm1nz0U.mjs → runner-Dkol2ukD.mjs} +2 -2
- package/dist/src-1Qvuh0NH.mjs +3 -0
- package/package.json +2 -2
- package/dist/apps/web/dist/assets/index-CPcVyFRP.js +0 -118
- package/dist/apps/web/dist/assets/index-ClPR-tfN.css +0 -1
- package/dist/src-gZm9nyTp.mjs +0 -3
package/dist/index.d.mts
CHANGED
|
@@ -2908,13 +2908,13 @@ type ColumnKind = z$1.infer<typeof columnKindSchema>;
|
|
|
2908
2908
|
declare const columnFormatSchema: z$1.ZodEnum<{
|
|
2909
2909
|
number: "number";
|
|
2910
2910
|
boolean: "boolean";
|
|
2911
|
-
duration: "duration";
|
|
2912
|
-
json: "json";
|
|
2913
2911
|
file: "file";
|
|
2914
2912
|
markdown: "markdown";
|
|
2913
|
+
json: "json";
|
|
2915
2914
|
image: "image";
|
|
2916
2915
|
audio: "audio";
|
|
2917
2916
|
video: "video";
|
|
2917
|
+
duration: "duration";
|
|
2918
2918
|
percent: "percent";
|
|
2919
2919
|
passFail: "passFail";
|
|
2920
2920
|
stars: "stars";
|
|
@@ -2933,13 +2933,13 @@ declare const columnDefSchema: z$1.ZodObject<{
|
|
|
2933
2933
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2934
2934
|
number: "number";
|
|
2935
2935
|
boolean: "boolean";
|
|
2936
|
-
duration: "duration";
|
|
2937
|
-
json: "json";
|
|
2938
2936
|
file: "file";
|
|
2939
2937
|
markdown: "markdown";
|
|
2938
|
+
json: "json";
|
|
2940
2939
|
image: "image";
|
|
2941
2940
|
audio: "audio";
|
|
2942
2941
|
video: "video";
|
|
2942
|
+
duration: "duration";
|
|
2943
2943
|
percent: "percent";
|
|
2944
2944
|
passFail: "passFail";
|
|
2945
2945
|
stars: "stars";
|
|
@@ -2984,8 +2984,8 @@ declare const traceSpanKindSchema: z$1.ZodString;
|
|
|
2984
2984
|
declare const traceAttributeDisplayFormatSchema: z$1.ZodEnum<{
|
|
2985
2985
|
string: "string";
|
|
2986
2986
|
number: "number";
|
|
2987
|
-
duration: "duration";
|
|
2988
2987
|
json: "json";
|
|
2988
|
+
duration: "duration";
|
|
2989
2989
|
}>;
|
|
2990
2990
|
/**
|
|
2991
2991
|
* Formatting hint for trace attribute values rendered by the UI.
|
|
@@ -3009,8 +3009,8 @@ declare const traceAttributeDisplaySchema: z$1.ZodObject<{
|
|
|
3009
3009
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3010
3010
|
string: "string";
|
|
3011
3011
|
number: "number";
|
|
3012
|
-
duration: "duration";
|
|
3013
3012
|
json: "json";
|
|
3013
|
+
duration: "duration";
|
|
3014
3014
|
}>>;
|
|
3015
3015
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
3016
3016
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -3045,8 +3045,8 @@ declare const traceDisplayConfigSchema: z$1.ZodObject<{
|
|
|
3045
3045
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3046
3046
|
string: "string";
|
|
3047
3047
|
number: "number";
|
|
3048
|
-
duration: "duration";
|
|
3049
3048
|
json: "json";
|
|
3049
|
+
duration: "duration";
|
|
3050
3050
|
}>>;
|
|
3051
3051
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
3052
3052
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -3085,8 +3085,8 @@ declare const traceAttributeDisplayInputSchema: z$1.ZodObject<{
|
|
|
3085
3085
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3086
3086
|
string: "string";
|
|
3087
3087
|
number: "number";
|
|
3088
|
-
duration: "duration";
|
|
3089
3088
|
json: "json";
|
|
3089
|
+
duration: "duration";
|
|
3090
3090
|
}>>;
|
|
3091
3091
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
3092
3092
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -3123,8 +3123,8 @@ declare const traceDisplayInputConfigSchema: z$1.ZodObject<{
|
|
|
3123
3123
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3124
3124
|
string: "string";
|
|
3125
3125
|
number: "number";
|
|
3126
|
-
duration: "duration";
|
|
3127
3126
|
json: "json";
|
|
3127
|
+
duration: "duration";
|
|
3128
3128
|
}>>;
|
|
3129
3129
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
3130
3130
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -3255,13 +3255,13 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
3255
3255
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3256
3256
|
number: "number";
|
|
3257
3257
|
boolean: "boolean";
|
|
3258
|
-
duration: "duration";
|
|
3259
|
-
json: "json";
|
|
3260
3258
|
file: "file";
|
|
3261
3259
|
markdown: "markdown";
|
|
3260
|
+
json: "json";
|
|
3262
3261
|
image: "image";
|
|
3263
3262
|
audio: "audio";
|
|
3264
3263
|
video: "video";
|
|
3264
|
+
duration: "duration";
|
|
3265
3265
|
percent: "percent";
|
|
3266
3266
|
passFail: "passFail";
|
|
3267
3267
|
stars: "stars";
|
|
@@ -3297,13 +3297,13 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
|
|
|
3297
3297
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3298
3298
|
number: "number";
|
|
3299
3299
|
boolean: "boolean";
|
|
3300
|
-
duration: "duration";
|
|
3301
|
-
json: "json";
|
|
3302
3300
|
file: "file";
|
|
3303
3301
|
markdown: "markdown";
|
|
3302
|
+
json: "json";
|
|
3304
3303
|
image: "image";
|
|
3305
3304
|
audio: "audio";
|
|
3306
3305
|
video: "video";
|
|
3306
|
+
duration: "duration";
|
|
3307
3307
|
percent: "percent";
|
|
3308
3308
|
passFail: "passFail";
|
|
3309
3309
|
stars: "stars";
|
|
@@ -3340,13 +3340,13 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
3340
3340
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3341
3341
|
number: "number";
|
|
3342
3342
|
boolean: "boolean";
|
|
3343
|
-
duration: "duration";
|
|
3344
|
-
json: "json";
|
|
3345
3343
|
file: "file";
|
|
3346
3344
|
markdown: "markdown";
|
|
3345
|
+
json: "json";
|
|
3347
3346
|
image: "image";
|
|
3348
3347
|
audio: "audio";
|
|
3349
3348
|
video: "video";
|
|
3349
|
+
duration: "duration";
|
|
3350
3350
|
percent: "percent";
|
|
3351
3351
|
passFail: "passFail";
|
|
3352
3352
|
stars: "stars";
|
|
@@ -3398,13 +3398,13 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
3398
3398
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3399
3399
|
number: "number";
|
|
3400
3400
|
boolean: "boolean";
|
|
3401
|
-
duration: "duration";
|
|
3402
|
-
json: "json";
|
|
3403
3401
|
file: "file";
|
|
3404
3402
|
markdown: "markdown";
|
|
3403
|
+
json: "json";
|
|
3405
3404
|
image: "image";
|
|
3406
3405
|
audio: "audio";
|
|
3407
3406
|
video: "video";
|
|
3407
|
+
duration: "duration";
|
|
3408
3408
|
percent: "percent";
|
|
3409
3409
|
passFail: "passFail";
|
|
3410
3410
|
stars: "stars";
|
|
@@ -3429,8 +3429,8 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
3429
3429
|
}>;
|
|
3430
3430
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3431
3431
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3432
|
-
error: "error";
|
|
3433
3432
|
success: "success";
|
|
3433
|
+
error: "error";
|
|
3434
3434
|
warning: "warning";
|
|
3435
3435
|
accent: "accent";
|
|
3436
3436
|
accentDim: "accentDim";
|
|
@@ -3453,8 +3453,8 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
3453
3453
|
}>;
|
|
3454
3454
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3455
3455
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3456
|
-
error: "error";
|
|
3457
3456
|
success: "success";
|
|
3457
|
+
error: "error";
|
|
3458
3458
|
warning: "warning";
|
|
3459
3459
|
accent: "accent";
|
|
3460
3460
|
accentDim: "accentDim";
|
|
@@ -3718,8 +3718,8 @@ declare const scoreTraceSchema: z$1.ZodObject<{
|
|
|
3718
3718
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3719
3719
|
string: "string";
|
|
3720
3720
|
number: "number";
|
|
3721
|
-
duration: "duration";
|
|
3722
3721
|
json: "json";
|
|
3722
|
+
duration: "duration";
|
|
3723
3723
|
}>>;
|
|
3724
3724
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
3725
3725
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -3804,8 +3804,8 @@ declare const caseDetailSchema: z$1.ZodObject<{
|
|
|
3804
3804
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3805
3805
|
string: "string";
|
|
3806
3806
|
number: "number";
|
|
3807
|
-
duration: "duration";
|
|
3808
3807
|
json: "json";
|
|
3808
|
+
duration: "duration";
|
|
3809
3809
|
}>>;
|
|
3810
3810
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
3811
3811
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -3873,8 +3873,8 @@ declare const caseDetailSchema: z$1.ZodObject<{
|
|
|
3873
3873
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3874
3874
|
string: "string";
|
|
3875
3875
|
number: "number";
|
|
3876
|
-
duration: "duration";
|
|
3877
3876
|
json: "json";
|
|
3877
|
+
duration: "duration";
|
|
3878
3878
|
}>>;
|
|
3879
3879
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
3880
3880
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -4037,8 +4037,8 @@ type EvalChartAggregate = z$1.infer<typeof evalChartAggregateSchema>;
|
|
|
4037
4037
|
* not emit raw hex so authored evals stay decoupled from the web theme.
|
|
4038
4038
|
*/
|
|
4039
4039
|
declare const evalChartColorSchema: z$1.ZodEnum<{
|
|
4040
|
-
error: "error";
|
|
4041
4040
|
success: "success";
|
|
4041
|
+
error: "error";
|
|
4042
4042
|
warning: "warning";
|
|
4043
4043
|
accent: "accent";
|
|
4044
4044
|
accentDim: "accentDim";
|
|
@@ -4066,8 +4066,8 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
4066
4066
|
}>;
|
|
4067
4067
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
4068
4068
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4069
|
-
error: "error";
|
|
4070
4069
|
success: "success";
|
|
4070
|
+
error: "error";
|
|
4071
4071
|
warning: "warning";
|
|
4072
4072
|
accent: "accent";
|
|
4073
4073
|
accentDim: "accentDim";
|
|
@@ -4090,8 +4090,8 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
4090
4090
|
}>;
|
|
4091
4091
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
4092
4092
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4093
|
-
error: "error";
|
|
4094
4093
|
success: "success";
|
|
4094
|
+
error: "error";
|
|
4095
4095
|
warning: "warning";
|
|
4096
4096
|
accent: "accent";
|
|
4097
4097
|
accentDim: "accentDim";
|
|
@@ -4149,8 +4149,8 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
4149
4149
|
}>;
|
|
4150
4150
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
4151
4151
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4152
|
-
error: "error";
|
|
4153
4152
|
success: "success";
|
|
4153
|
+
error: "error";
|
|
4154
4154
|
warning: "warning";
|
|
4155
4155
|
accent: "accent";
|
|
4156
4156
|
accentDim: "accentDim";
|
|
@@ -4173,8 +4173,8 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
4173
4173
|
}>;
|
|
4174
4174
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
4175
4175
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4176
|
-
error: "error";
|
|
4177
4176
|
success: "success";
|
|
4177
|
+
error: "error";
|
|
4178
4178
|
warning: "warning";
|
|
4179
4179
|
accent: "accent";
|
|
4180
4180
|
accentDim: "accentDim";
|
|
@@ -4239,8 +4239,8 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
4239
4239
|
}>;
|
|
4240
4240
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
4241
4241
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4242
|
-
error: "error";
|
|
4243
4242
|
success: "success";
|
|
4243
|
+
error: "error";
|
|
4244
4244
|
warning: "warning";
|
|
4245
4245
|
accent: "accent";
|
|
4246
4246
|
accentDim: "accentDim";
|
|
@@ -4263,8 +4263,8 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
4263
4263
|
}>;
|
|
4264
4264
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
4265
4265
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4266
|
-
error: "error";
|
|
4267
4266
|
success: "success";
|
|
4267
|
+
error: "error";
|
|
4268
4268
|
warning: "warning";
|
|
4269
4269
|
accent: "accent";
|
|
4270
4270
|
accentDim: "accentDim";
|
|
@@ -4574,8 +4574,8 @@ declare const llmCallMetricFormatSchema: z$1.ZodEnum<{
|
|
|
4574
4574
|
string: "string";
|
|
4575
4575
|
number: "number";
|
|
4576
4576
|
boolean: "boolean";
|
|
4577
|
-
duration: "duration";
|
|
4578
4577
|
json: "json";
|
|
4578
|
+
duration: "duration";
|
|
4579
4579
|
}>;
|
|
4580
4580
|
/** Render format applied to an LLM-call metric value. */
|
|
4581
4581
|
type LlmCallMetricFormat = z$1.infer<typeof llmCallMetricFormatSchema>;
|
|
@@ -4584,8 +4584,8 @@ declare const apiCallMetricFormatSchema: z$1.ZodEnum<{
|
|
|
4584
4584
|
string: "string";
|
|
4585
4585
|
number: "number";
|
|
4586
4586
|
boolean: "boolean";
|
|
4587
|
-
duration: "duration";
|
|
4588
4587
|
json: "json";
|
|
4588
|
+
duration: "duration";
|
|
4589
4589
|
}>;
|
|
4590
4590
|
/** Render format applied to an API-call metric value. */
|
|
4591
4591
|
type ApiCallMetricFormat = z$1.infer<typeof apiCallMetricFormatSchema>;
|
|
@@ -4654,8 +4654,8 @@ declare const llmCallMetricSchema: z$1.ZodObject<{
|
|
|
4654
4654
|
string: "string";
|
|
4655
4655
|
number: "number";
|
|
4656
4656
|
boolean: "boolean";
|
|
4657
|
-
duration: "duration";
|
|
4658
4657
|
json: "json";
|
|
4658
|
+
duration: "duration";
|
|
4659
4659
|
}>>;
|
|
4660
4660
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
4661
4661
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -4683,8 +4683,8 @@ declare const apiCallMetricSchema: z$1.ZodObject<{
|
|
|
4683
4683
|
string: "string";
|
|
4684
4684
|
number: "number";
|
|
4685
4685
|
boolean: "boolean";
|
|
4686
|
-
duration: "duration";
|
|
4687
4686
|
json: "json";
|
|
4687
|
+
duration: "duration";
|
|
4688
4688
|
}>>;
|
|
4689
4689
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
4690
4690
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -4797,8 +4797,8 @@ declare const llmCallsConfigSchema: z$1.ZodObject<{
|
|
|
4797
4797
|
string: "string";
|
|
4798
4798
|
number: "number";
|
|
4799
4799
|
boolean: "boolean";
|
|
4800
|
-
duration: "duration";
|
|
4801
4800
|
json: "json";
|
|
4801
|
+
duration: "duration";
|
|
4802
4802
|
}>>;
|
|
4803
4803
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
4804
4804
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -4833,8 +4833,8 @@ declare const apiCallsConfigSchema: z$1.ZodObject<{
|
|
|
4833
4833
|
string: "string";
|
|
4834
4834
|
number: "number";
|
|
4835
4835
|
boolean: "boolean";
|
|
4836
|
-
duration: "duration";
|
|
4837
4836
|
json: "json";
|
|
4837
|
+
duration: "duration";
|
|
4838
4838
|
}>>;
|
|
4839
4839
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
4840
4840
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -5135,8 +5135,8 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
|
|
|
5135
5135
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5136
5136
|
string: "string";
|
|
5137
5137
|
number: "number";
|
|
5138
|
-
duration: "duration";
|
|
5139
5138
|
json: "json";
|
|
5139
|
+
duration: "duration";
|
|
5140
5140
|
}>>;
|
|
5141
5141
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
5142
5142
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -5183,13 +5183,13 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
|
|
|
5183
5183
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5184
5184
|
number: "number";
|
|
5185
5185
|
boolean: "boolean";
|
|
5186
|
-
duration: "duration";
|
|
5187
|
-
json: "json";
|
|
5188
5186
|
file: "file";
|
|
5189
5187
|
markdown: "markdown";
|
|
5188
|
+
json: "json";
|
|
5190
5189
|
image: "image";
|
|
5191
5190
|
audio: "audio";
|
|
5192
5191
|
video: "video";
|
|
5192
|
+
duration: "duration";
|
|
5193
5193
|
percent: "percent";
|
|
5194
5194
|
passFail: "passFail";
|
|
5195
5195
|
stars: "stars";
|
|
@@ -5248,8 +5248,8 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
|
|
|
5248
5248
|
string: "string";
|
|
5249
5249
|
number: "number";
|
|
5250
5250
|
boolean: "boolean";
|
|
5251
|
-
duration: "duration";
|
|
5252
5251
|
json: "json";
|
|
5252
|
+
duration: "duration";
|
|
5253
5253
|
}>>;
|
|
5254
5254
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
5255
5255
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -5293,8 +5293,8 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
|
|
|
5293
5293
|
string: "string";
|
|
5294
5294
|
number: "number";
|
|
5295
5295
|
boolean: "boolean";
|
|
5296
|
-
duration: "duration";
|
|
5297
5296
|
json: "json";
|
|
5297
|
+
duration: "duration";
|
|
5298
5298
|
}>>;
|
|
5299
5299
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
5300
5300
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -5371,6 +5371,84 @@ type LlmCallEntry = {
|
|
|
5371
5371
|
warnings: EvalTraceSpanWarning[];
|
|
5372
5372
|
error: EvalTraceSpanError | null;
|
|
5373
5373
|
};
|
|
5374
|
+
/**
|
|
5375
|
+
* Cost-simulation scenarios available in the LLM calls breakdown table.
|
|
5376
|
+
*
|
|
5377
|
+
* - `actual` — Real billed cost recorded on the span.
|
|
5378
|
+
* - `noCache` — Bill every input token at the base input rate, ignoring all
|
|
5379
|
+
* cache reads and cache writes. Worst case for any prompt that could be
|
|
5380
|
+
* cached.
|
|
5381
|
+
* - `withBaseCaching` — Steady-state cost on a fully warmed cache: cache
|
|
5382
|
+
* writes are treated as already paid (free), cache reads keep the cache-read
|
|
5383
|
+
* discount, and base input keeps the base rate. When the call has no
|
|
5384
|
+
* caching at all, every input token is billed at the cache-read rate, as if
|
|
5385
|
+
* the prompt had been warmed by an earlier run. Cache-read pricing is the
|
|
5386
|
+
* same on the base (5-minute) and extended (1-hour) tiers, so this scenario
|
|
5387
|
+
* covers the warmed case for both TTLs.
|
|
5388
|
+
* - `withBaseCachingWrite` — First-call cost paying the 5-minute cache write
|
|
5389
|
+
* premium. When the call already uses caching, every cache write token is
|
|
5390
|
+
* billed at the 5-minute rate (any extended-cache split is folded into the
|
|
5391
|
+
* 5-minute rate). When the call has no caching at all, every input token is
|
|
5392
|
+
* billed at the 5-minute cache write rate, as if this were the first call
|
|
5393
|
+
* warming up the base cache.
|
|
5394
|
+
* - `withExtendedCachingWrite` — First-call cost paying the extended (e.g.
|
|
5395
|
+
* 1-hour) cache write premium. When the call already uses caching, every
|
|
5396
|
+
* cache write token is billed at the extended rate. When the call has no
|
|
5397
|
+
* caching at all, every input token is billed at the extended cache write
|
|
5398
|
+
* rate, as if this were the first call warming up the extended cache.
|
|
5399
|
+
*/
|
|
5400
|
+
type LlmCostScenario = 'actual' | 'noCache' | 'withBaseCaching' | 'withBaseCachingWrite' | 'withExtendedCachingWrite';
|
|
5401
|
+
/** Per-row cost values returned by {@link simulateLlmCallCost}. */
|
|
5402
|
+
type LlmCallCostBreakdown = {
|
|
5403
|
+
inputCostUsd: number | null;
|
|
5404
|
+
outputCostUsd: number | null;
|
|
5405
|
+
cachedInputCostUsd: number | null;
|
|
5406
|
+
cacheCreationInputCostUsd: number | null;
|
|
5407
|
+
reasoningCostUsd: number | null;
|
|
5408
|
+
totalCostUsd: number | null;
|
|
5409
|
+
};
|
|
5410
|
+
/**
|
|
5411
|
+
* Recompute the LLM-call cost breakdown for a hypothetical billing scenario,
|
|
5412
|
+
* using the call's recorded token counts and the resolved pricing registry.
|
|
5413
|
+
*
|
|
5414
|
+
* The `actual` scenario returns the costs already stored on `entry`. Other
|
|
5415
|
+
* scenarios re-derive each cost component from `pricing` so users can compare
|
|
5416
|
+
* what the same usage would have cost under different cache strategies. When
|
|
5417
|
+
* pricing is missing for the model/provider, simulated cost components fall
|
|
5418
|
+
* back to `null` exactly like the original extractor.
|
|
5419
|
+
*/
|
|
5420
|
+
declare function simulateLlmCallCost({
|
|
5421
|
+
entry,
|
|
5422
|
+
pricing,
|
|
5423
|
+
scenario
|
|
5424
|
+
}: {
|
|
5425
|
+
entry: LlmCallEntry;
|
|
5426
|
+
pricing: ResolvedLlmCallPricing[];
|
|
5427
|
+
scenario: LlmCostScenario;
|
|
5428
|
+
}): LlmCallCostBreakdown;
|
|
5429
|
+
/** Per-row simulated token counts shown in the LLM call breakdown table. */
|
|
5430
|
+
type LlmCallSimulatedTokens = {
|
|
5431
|
+
/** Tokens shown on the `Input` row — base input only (cached + creation are subtracted). */baseInputTokens: number | null; /** Tokens shown on the `Cache read` row. */
|
|
5432
|
+
cachedInputTokens: number | null; /** Tokens shown on the `Cache write` row. */
|
|
5433
|
+
cacheCreationInputTokens: number | null;
|
|
5434
|
+
};
|
|
5435
|
+
/**
|
|
5436
|
+
* Project the call's recorded token allocation onto a hypothetical billing
|
|
5437
|
+
* scenario. Cacheable tokens shift between rows so the breakdown reflects the
|
|
5438
|
+
* simulated billing model: `noCache` folds reads/writes into base input,
|
|
5439
|
+
* `withBaseCaching` (warmed) treats every cacheable token as a cache read, and
|
|
5440
|
+
* the first-call write scenarios treat every cacheable token as a cache write.
|
|
5441
|
+
*
|
|
5442
|
+
* The returned counts are what the UI renders on each row and what
|
|
5443
|
+
* {@link simulateLlmCallCost} prices, so display and totals never drift.
|
|
5444
|
+
*/
|
|
5445
|
+
declare function simulateTokenAllocation({
|
|
5446
|
+
entry,
|
|
5447
|
+
scenario
|
|
5448
|
+
}: {
|
|
5449
|
+
entry: LlmCallEntry;
|
|
5450
|
+
scenario: LlmCostScenario;
|
|
5451
|
+
}): LlmCallSimulatedTokens;
|
|
5374
5452
|
/**
|
|
5375
5453
|
* Filter `spans` down to LLM calls and project each one to the structured
|
|
5376
5454
|
* shape consumed by the LLM calls tab.
|
|
@@ -6283,4 +6361,4 @@ type ManualInputDescriptor = z$1.infer<typeof manualInputDescriptorSchema>; //#e
|
|
|
6283
6361
|
*/
|
|
6284
6362
|
declare function runCli(argv: string[]): Promise<void>;
|
|
6285
6363
|
//#endregion
|
|
6286
|
-
export { type AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheSerializationOptions, type CacheStatus, type CallDerivedAttribute, type CallDerivedAttributeContext, type CallDerivedAttributesConfig, type CallDerivedAttributesFn, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type ConfigReloadState, type ConfigReloadStatus, type CreateRunRequest, DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, type DefaultConfigKey, type DerivedStatus, type DiscoveryIssue, EvalAssertionError, type EvalCacheConfig, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveConfig, type EvalDeriveContext, type EvalDeriveFn, type EvalDeriveMap, type EvalDeriveValueFn, type EvalDisplayStatus, type EvalExecuteContext, type EvalExpectation, type EvalFreshnessStatus, type EvalManualInputConfig, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTimeUnit, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallCostCurrency, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallPricingRate, type LlmCallPricingRegistry, type LlmCallsConfigInput, type ManualInputDescriptor, type ManualInputFieldDescriptor, type ManualInputFieldKind, type ManualInputFieldOverride, type ManualInputFieldsConfig, type ManualInputFileValue, type ManualInputSelectOption, type MaterializeManualInputFilesResult, type NumberDisplayOptions, type ReadManualInputFileResult, type RemoveDefaultConfig, type RepoFileRef, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedCallDerivedAttribute, type ResolvedLlmCallCostCurrency, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, cleanupStagedManualInputFiles, columnDefSchema, columnFormatSchema, columnKindSchema, configReloadStateSchema, configReloadStatusSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalColumnOverrideSchema, evalColumnsSchema, evalDeriveConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, jsonCellSchema, llmCallCostCurrencySchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingRateSchema, llmCallPricingSchema, llmCallsConfigSchema, manualInputBooleanFieldSchema, manualInputDescriptorSchema, manualInputFieldDescriptorSchema, manualInputFileValueSchema, manualInputJsonFieldSchema, manualInputMultilineFieldSchema, manualInputNumberFieldSchema, manualInputSelectFieldSchema, manualInputSelectOptionSchema, manualInputTextFieldSchema, materializeManualInputFiles, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, readManualInputFile, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
|
6364
|
+
export { type AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheSerializationOptions, type CacheStatus, type CallDerivedAttribute, type CallDerivedAttributeContext, type CallDerivedAttributesConfig, type CallDerivedAttributesFn, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type ConfigReloadState, type ConfigReloadStatus, type CreateRunRequest, DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, type DefaultConfigKey, type DerivedStatus, type DiscoveryIssue, EvalAssertionError, type EvalCacheConfig, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveConfig, type EvalDeriveContext, type EvalDeriveFn, type EvalDeriveMap, type EvalDeriveValueFn, type EvalDisplayStatus, type EvalExecuteContext, type EvalExpectation, type EvalFreshnessStatus, type EvalManualInputConfig, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTimeUnit, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallCostBreakdown, type LlmCallCostCurrency, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallPricingRate, type LlmCallPricingRegistry, type LlmCallSimulatedTokens, type LlmCallsConfigInput, type LlmCostScenario, type ManualInputDescriptor, type ManualInputFieldDescriptor, type ManualInputFieldKind, type ManualInputFieldOverride, type ManualInputFieldsConfig, type ManualInputFileValue, type ManualInputSelectOption, type MaterializeManualInputFilesResult, type NumberDisplayOptions, type ReadManualInputFileResult, type RemoveDefaultConfig, type RepoFileRef, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedCallDerivedAttribute, type ResolvedLlmCallCostCurrency, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, cleanupStagedManualInputFiles, columnDefSchema, columnFormatSchema, columnKindSchema, configReloadStateSchema, configReloadStatusSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalColumnOverrideSchema, evalColumnsSchema, evalDeriveConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, jsonCellSchema, llmCallCostCurrencySchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingRateSchema, llmCallPricingSchema, llmCallsConfigSchema, manualInputBooleanFieldSchema, manualInputDescriptorSchema, manualInputFieldDescriptorSchema, manualInputFileValueSchema, manualInputJsonFieldSchema, manualInputMultilineFieldSchema, manualInputNumberFieldSchema, manualInputSelectFieldSchema, manualInputSelectOptionSchema, manualInputTextFieldSchema, materializeManualInputFiles, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, readManualInputFile, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, spanCacheOptionsSchema, sseEnvelopeSchema, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { $ as
|
|
2
|
-
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-
|
|
3
|
-
import "./src-
|
|
4
|
-
export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, cleanupStagedManualInputFiles, columnDefSchema, columnFormatSchema, columnKindSchema, configReloadStateSchema, configReloadStatusSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalColumnOverrideSchema, evalColumnsSchema, evalDeriveConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, jsonCellSchema, llmCallCostCurrencySchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingRateSchema, llmCallPricingSchema, llmCallsConfigSchema, manualInputBooleanFieldSchema, manualInputDescriptorSchema, manualInputFieldDescriptorSchema, manualInputFileValueSchema, manualInputJsonFieldSchema, manualInputMultilineFieldSchema, manualInputNumberFieldSchema, manualInputSelectFieldSchema, manualInputSelectOptionSchema, manualInputTextFieldSchema, materializeManualInputFiles, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, readManualInputFile, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
|
1
|
+
import { $ as apiCallMetricSchema, $n as getCurrentScope, $t as cacheDebugKeyEntrySchema, A as createRunRequestSchema, An as repoFileRefSchema, At as runLogEntrySchema, B as getNestedAttribute, Bn as deserializeCacheValue, Bt as manualInputNumberFieldSchema, Cn as cellValueSchema, Ct as caseRowSchema, Dn as fileRefSchema, Dt as evalStatItemSchema, En as columnKindSchema, Et as evalStatAggregateSchema, F as extractApiCalls, Fn as evalSpan, Ft as manualInputBooleanFieldSchema, G as deriveStatusFromChildStatuses, Gn as readManualInputFile, Gt as evalChartAxisSchema, H as getEvalDisplayStatus, Hn as serializeCacheValue, Ht as manualInputSelectOptionSchema, I as extractLlmCalls, In as evalTracer, It as manualInputDescriptorSchema, J as DEFAULT_API_CALLS_CONFIG, Jn as advanceEvalTime, Jt as evalChartConfigSchema, K as runManifestSchema, Kn as evalExpect, Kt as evalChartBuiltinMetricSchema, L as simulateLlmCallCost, Ln as hashCacheKey, Lt as manualInputFieldDescriptorSchema, M as sseEnvelopeSchema, Mn as z, Mt as runLogLocationSchema, N as extractCacheEntries, Nn as buildTraceTree, Nt as runLogPhaseSchema, O as configReloadStateSchema, On as jsonCellSchema, Ot as evalStatsConfigSchema, P as extractCacheHits, Pn as captureEvalSpanError, Pt as scoreTraceSchema, Q as apiCallMetricPlacementSchema, Qn as evalLog, Qt as evalChartsConfigSchema, R as simulateTokenAllocation, Rn as hashCacheKeySync, Rt as manualInputJsonFieldSchema, Sn as traceSpanWarningSchema, St as caseDetailSchema, Tn as columnFormatSchema, Tt as evalFreshnessStatusSchema, U as deriveScopedSummaryFromCases, Un as repoFile, Ut as manualInputTextFieldSchema, V as getEvalTitle, Vn as serializeCacheRecording, Vt as manualInputSelectFieldSchema, W as deriveStatusFromCaseRows, Wn as manualInputFileValueSchema, Wt as evalChartAggregateSchema, X as agentEvalsConfigSchema, Xt as evalChartTooltipExtraSchema, Y as DEFAULT_LLM_CALLS_CONFIG, Yn as appendToEvalOutput, Yt as evalChartMetricSchema, Z as apiCallMetricFormatSchema, Zn as evalAssert, Zt as evalChartTypeSchema, _n as traceDisplayConfigSchema, _t as buildCaseKey, an as cacheModeSchema, ar as nextEvalId, at as llmCallCostCurrencySchema, bn as traceSpanKindSchema, bt as getCaseRowEvalKey, cn as cacheRecordingSchema, cr as runInExistingEvalScope, ct as llmCallMetricSchema, dn as spanCacheOptionsSchema, dr as startEvalBackgroundJob, dt as llmCallsConfigSchema, en as cacheDebugKeyFileSchema, er as getEvalCaseInput, et as apiCallsConfigSchema, fn as traceCacheRefSchema, fr as defineEval, ft as removeDefaultConfigSchema, gn as traceAttributeDisplaySchema, gt as trialSelectionModeSchema, hn as traceAttributeDisplayPlacementSchema, ht as runLogsConfigSchema, in as cacheListItemSchema, ir as mergeEvalOutput, it as evalDeriveConfigSchema, j as updateManualScoreRequestSchema, jn as runArtifactRefSchema, jt as runLogLevelSchema, k as configReloadStatusSchema, kn as numberDisplayOptionsSchema, kt as evalSummarySchema, ln as cacheStatusSchema, lr as setEvalOutput, lt as llmCallPricingRateSchema, mn as traceAttributeDisplayInputSchema, mt as resolveLlmCallsConfig, nn as cacheEntryWithDebugKeySchema, nr as incrementEvalOutput, nt as evalColumnOverrideSchema, on as cacheOperationTypeSchema, or as runInEvalRuntimeScope, ot as llmCallMetricFormatSchema, pn as traceAttributeDisplayFormatSchema, pr as getEvalRegistry, pt as resolveApiCallsConfig, q as runSummarySchema, qn as EvalAssertionError, qt as evalChartColorSchema, rn as cacheFileSchema, rr as isInEvalScope, rt as evalColumnsSchema, sn as cacheRecordingOpSchema, sr as runInEvalScope, st as llmCallMetricPlacementSchema, tn as cacheEntrySchema, tr as getEvalStartTime, tt as defaultConfigKeySchema, un as serializedCacheSpanSchema, ur as setScopeCacheContext, ut as llmCallPricingSchema, vn as traceDisplayInputConfigSchema, vt as buildEvalKey, wn as columnDefSchema, wt as discoveryIssueSchema, xn as traceSpanSchema, xt as assertionFailureSchema, yn as traceSpanErrorSchema, yt as getCaseRowCaseKey, z as applyDerivedCallAttributes, zn as deserializeCacheRecording, zt as manualInputMultilineFieldSchema } from "./runOrchestration-ZpN7xty_.mjs";
|
|
2
|
+
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-huuJbDNb.mjs";
|
|
3
|
+
import "./src-1Qvuh0NH.mjs";
|
|
4
|
+
export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, cleanupStagedManualInputFiles, columnDefSchema, columnFormatSchema, columnKindSchema, configReloadStateSchema, configReloadStatusSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalColumnOverrideSchema, evalColumnsSchema, evalDeriveConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, jsonCellSchema, llmCallCostCurrencySchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingRateSchema, llmCallPricingSchema, llmCallsConfigSchema, manualInputBooleanFieldSchema, manualInputDescriptorSchema, manualInputFieldDescriptorSchema, manualInputFileValueSchema, manualInputJsonFieldSchema, manualInputMultilineFieldSchema, manualInputNumberFieldSchema, manualInputSelectFieldSchema, manualInputSelectOptionSchema, manualInputTextFieldSchema, materializeManualInputFiles, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, readManualInputFile, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, spanCacheOptionsSchema, sseEnvelopeSchema, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
package/dist/runChild.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { A as createRunRequestSchema, C as loadConfig, D as createFsCacheStore,
|
|
1
|
+
import { A as createRunRequestSchema, C as loadConfig, D as createFsCacheStore, It as manualInputDescriptorSchema, K as runManifestSchema, Ot as evalStatsConfigSchema, Qt as evalChartsConfigSchema, Xn as configureEvalRunLogs, q as runSummarySchema, r as getTargetEvals$1, t as executeRun, vt as buildEvalKey, wn as columnDefSchema, x as parseEvalDiscovery } from "./runOrchestration-ZpN7xty_.mjs";
|
|
2
2
|
import { createHash } from "node:crypto";
|
|
3
3
|
import { readFile } from "node:fs/promises";
|
|
4
4
|
import { relative } from "node:path";
|