@ls-stack/agent-eval 0.60.0 → 0.60.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-opbcrpvt.mjs → app-DPCFFkyQ.mjs} +4 -4
- package/dist/apps/web/dist/assets/{index-Dowobz-z.js → index-CM6MDNqo.js} +73 -73
- package/dist/apps/web/dist/index.html +1 -1
- package/dist/bin.mjs +1 -1
- package/dist/caseChild.mjs +1 -1
- package/dist/{cli-FOyPC8UD.mjs → cli-CbePEEua.mjs} +72 -27
- package/dist/index.d.mts +50 -41
- package/dist/index.mjs +3 -3
- package/dist/runChild.mjs +2 -2
- package/dist/{runExecution-CjWJUUZ5.mjs → runExecution-Bq0Y3y_1.mjs} +2 -2
- package/dist/{runOrchestration-DE2TFAS6.mjs → runOrchestration-BpwW0AmB.mjs} +1 -1
- package/dist/runner-Kp0JqxrU.mjs +15 -0
- package/dist/{runner-CIxj7jYj.mjs → runner-XEP21_u9.mjs} +1 -1
- package/dist/{src-p-GRSVDb.mjs → src-CVM_FqPx.mjs} +2 -2
- package/package.json +1 -1
- package/skills/agent-eval/SKILL.md +6 -3
- package/dist/runner-Dv5cseOt.mjs +0 -15
package/dist/index.d.mts
CHANGED
|
@@ -1942,6 +1942,7 @@ declare const columnFormatSchema: z.ZodEnum<{
|
|
|
1942
1942
|
number: "number";
|
|
1943
1943
|
boolean: "boolean";
|
|
1944
1944
|
file: "file";
|
|
1945
|
+
duration: "duration";
|
|
1945
1946
|
markdown: "markdown";
|
|
1946
1947
|
json: "json";
|
|
1947
1948
|
image: "image";
|
|
@@ -1949,7 +1950,6 @@ declare const columnFormatSchema: z.ZodEnum<{
|
|
|
1949
1950
|
pdf: "pdf";
|
|
1950
1951
|
audio: "audio";
|
|
1951
1952
|
video: "video";
|
|
1952
|
-
duration: "duration";
|
|
1953
1953
|
percent: "percent";
|
|
1954
1954
|
passFail: "passFail";
|
|
1955
1955
|
stars: "stars";
|
|
@@ -1969,6 +1969,7 @@ declare const columnDefSchema: z.ZodObject<{
|
|
|
1969
1969
|
number: "number";
|
|
1970
1970
|
boolean: "boolean";
|
|
1971
1971
|
file: "file";
|
|
1972
|
+
duration: "duration";
|
|
1972
1973
|
markdown: "markdown";
|
|
1973
1974
|
json: "json";
|
|
1974
1975
|
image: "image";
|
|
@@ -1976,7 +1977,6 @@ declare const columnDefSchema: z.ZodObject<{
|
|
|
1976
1977
|
pdf: "pdf";
|
|
1977
1978
|
audio: "audio";
|
|
1978
1979
|
video: "video";
|
|
1979
|
-
duration: "duration";
|
|
1980
1980
|
percent: "percent";
|
|
1981
1981
|
passFail: "passFail";
|
|
1982
1982
|
stars: "stars";
|
|
@@ -2022,8 +2022,8 @@ type CellValue = z.infer<typeof cellValueSchema>; //#endregion
|
|
|
2022
2022
|
declare const traceAttributeDisplayFormatSchema: z.ZodEnum<{
|
|
2023
2023
|
string: "string";
|
|
2024
2024
|
number: "number";
|
|
2025
|
-
json: "json";
|
|
2026
2025
|
duration: "duration";
|
|
2026
|
+
json: "json";
|
|
2027
2027
|
}>;
|
|
2028
2028
|
/**
|
|
2029
2029
|
* Formatting hint for trace attribute values rendered by the UI.
|
|
@@ -2047,8 +2047,8 @@ declare const traceAttributeDisplaySchema: z.ZodObject<{
|
|
|
2047
2047
|
format: z.ZodOptional<z.ZodEnum<{
|
|
2048
2048
|
string: "string";
|
|
2049
2049
|
number: "number";
|
|
2050
|
-
json: "json";
|
|
2051
2050
|
duration: "duration";
|
|
2051
|
+
json: "json";
|
|
2052
2052
|
}>>;
|
|
2053
2053
|
numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
2054
2054
|
placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
|
|
@@ -2083,8 +2083,8 @@ declare const traceDisplayConfigSchema: z.ZodObject<{
|
|
|
2083
2083
|
format: z.ZodOptional<z.ZodEnum<{
|
|
2084
2084
|
string: "string";
|
|
2085
2085
|
number: "number";
|
|
2086
|
-
json: "json";
|
|
2087
2086
|
duration: "duration";
|
|
2087
|
+
json: "json";
|
|
2088
2088
|
}>>;
|
|
2089
2089
|
numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
2090
2090
|
placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
|
|
@@ -2123,8 +2123,8 @@ declare const traceAttributeDisplayInputSchema: z.ZodObject<{
|
|
|
2123
2123
|
format: z.ZodOptional<z.ZodEnum<{
|
|
2124
2124
|
string: "string";
|
|
2125
2125
|
number: "number";
|
|
2126
|
-
json: "json";
|
|
2127
2126
|
duration: "duration";
|
|
2127
|
+
json: "json";
|
|
2128
2128
|
}>>;
|
|
2129
2129
|
numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
2130
2130
|
placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
|
|
@@ -2161,8 +2161,8 @@ declare const traceDisplayInputConfigSchema: z.ZodObject<{
|
|
|
2161
2161
|
format: z.ZodOptional<z.ZodEnum<{
|
|
2162
2162
|
string: "string";
|
|
2163
2163
|
number: "number";
|
|
2164
|
-
json: "json";
|
|
2165
2164
|
duration: "duration";
|
|
2165
|
+
json: "json";
|
|
2166
2166
|
}>>;
|
|
2167
2167
|
numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
2168
2168
|
placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
|
|
@@ -2327,6 +2327,7 @@ declare const evalStatItemSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
2327
2327
|
number: "number";
|
|
2328
2328
|
boolean: "boolean";
|
|
2329
2329
|
file: "file";
|
|
2330
|
+
duration: "duration";
|
|
2330
2331
|
markdown: "markdown";
|
|
2331
2332
|
json: "json";
|
|
2332
2333
|
image: "image";
|
|
@@ -2334,7 +2335,6 @@ declare const evalStatItemSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
2334
2335
|
pdf: "pdf";
|
|
2335
2336
|
audio: "audio";
|
|
2336
2337
|
video: "video";
|
|
2337
|
-
duration: "duration";
|
|
2338
2338
|
percent: "percent";
|
|
2339
2339
|
passFail: "passFail";
|
|
2340
2340
|
stars: "stars";
|
|
@@ -2391,6 +2391,7 @@ declare const evalStatsConfigSchema: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodOb
|
|
|
2391
2391
|
number: "number";
|
|
2392
2392
|
boolean: "boolean";
|
|
2393
2393
|
file: "file";
|
|
2394
|
+
duration: "duration";
|
|
2394
2395
|
markdown: "markdown";
|
|
2395
2396
|
json: "json";
|
|
2396
2397
|
image: "image";
|
|
@@ -2398,7 +2399,6 @@ declare const evalStatsConfigSchema: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodOb
|
|
|
2398
2399
|
pdf: "pdf";
|
|
2399
2400
|
audio: "audio";
|
|
2400
2401
|
video: "video";
|
|
2401
|
-
duration: "duration";
|
|
2402
2402
|
percent: "percent";
|
|
2403
2403
|
passFail: "passFail";
|
|
2404
2404
|
stars: "stars";
|
|
@@ -2437,6 +2437,7 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2437
2437
|
number: "number";
|
|
2438
2438
|
boolean: "boolean";
|
|
2439
2439
|
file: "file";
|
|
2440
|
+
duration: "duration";
|
|
2440
2441
|
markdown: "markdown";
|
|
2441
2442
|
json: "json";
|
|
2442
2443
|
image: "image";
|
|
@@ -2444,7 +2445,6 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2444
2445
|
pdf: "pdf";
|
|
2445
2446
|
audio: "audio";
|
|
2446
2447
|
video: "video";
|
|
2447
|
-
duration: "duration";
|
|
2448
2448
|
percent: "percent";
|
|
2449
2449
|
passFail: "passFail";
|
|
2450
2450
|
stars: "stars";
|
|
@@ -2518,6 +2518,7 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2518
2518
|
number: "number";
|
|
2519
2519
|
boolean: "boolean";
|
|
2520
2520
|
file: "file";
|
|
2521
|
+
duration: "duration";
|
|
2521
2522
|
markdown: "markdown";
|
|
2522
2523
|
json: "json";
|
|
2523
2524
|
image: "image";
|
|
@@ -2525,7 +2526,6 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2525
2526
|
pdf: "pdf";
|
|
2526
2527
|
audio: "audio";
|
|
2527
2528
|
video: "video";
|
|
2528
|
-
duration: "duration";
|
|
2529
2529
|
percent: "percent";
|
|
2530
2530
|
passFail: "passFail";
|
|
2531
2531
|
stars: "stars";
|
|
@@ -2559,8 +2559,8 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2559
2559
|
label: z.ZodOptional<z.ZodString>;
|
|
2560
2560
|
color: z.ZodOptional<z.ZodEnum<{
|
|
2561
2561
|
success: "success";
|
|
2562
|
-
error: "error";
|
|
2563
2562
|
accent: "accent";
|
|
2563
|
+
error: "error";
|
|
2564
2564
|
accentDim: "accentDim";
|
|
2565
2565
|
warning: "warning";
|
|
2566
2566
|
textMuted: "textMuted";
|
|
@@ -2583,8 +2583,8 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2583
2583
|
label: z.ZodOptional<z.ZodString>;
|
|
2584
2584
|
color: z.ZodOptional<z.ZodEnum<{
|
|
2585
2585
|
success: "success";
|
|
2586
|
-
error: "error";
|
|
2587
2586
|
accent: "accent";
|
|
2587
|
+
error: "error";
|
|
2588
2588
|
accentDim: "accentDim";
|
|
2589
2589
|
warning: "warning";
|
|
2590
2590
|
textMuted: "textMuted";
|
|
@@ -2749,6 +2749,7 @@ declare const caseRowSchema$1: z.ZodObject<{
|
|
|
2749
2749
|
number: "number";
|
|
2750
2750
|
boolean: "boolean";
|
|
2751
2751
|
file: "file";
|
|
2752
|
+
duration: "duration";
|
|
2752
2753
|
markdown: "markdown";
|
|
2753
2754
|
json: "json";
|
|
2754
2755
|
image: "image";
|
|
@@ -2756,7 +2757,6 @@ declare const caseRowSchema$1: z.ZodObject<{
|
|
|
2756
2757
|
pdf: "pdf";
|
|
2757
2758
|
audio: "audio";
|
|
2758
2759
|
video: "video";
|
|
2759
|
-
duration: "duration";
|
|
2760
2760
|
percent: "percent";
|
|
2761
2761
|
passFail: "passFail";
|
|
2762
2762
|
stars: "stars";
|
|
@@ -2894,8 +2894,8 @@ declare const scoreTraceSchema: z.ZodObject<{
|
|
|
2894
2894
|
format: z.ZodOptional<z.ZodEnum<{
|
|
2895
2895
|
string: "string";
|
|
2896
2896
|
number: "number";
|
|
2897
|
-
json: "json";
|
|
2898
2897
|
duration: "duration";
|
|
2898
|
+
json: "json";
|
|
2899
2899
|
}>>;
|
|
2900
2900
|
numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
2901
2901
|
placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
|
|
@@ -2997,8 +2997,8 @@ declare const caseDetailSchema$1: z.ZodObject<{
|
|
|
2997
2997
|
format: z.ZodOptional<z.ZodEnum<{
|
|
2998
2998
|
string: "string";
|
|
2999
2999
|
number: "number";
|
|
3000
|
-
json: "json";
|
|
3001
3000
|
duration: "duration";
|
|
3001
|
+
json: "json";
|
|
3002
3002
|
}>>;
|
|
3003
3003
|
numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
3004
3004
|
placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
|
|
@@ -3066,8 +3066,8 @@ declare const caseDetailSchema$1: z.ZodObject<{
|
|
|
3066
3066
|
format: z.ZodOptional<z.ZodEnum<{
|
|
3067
3067
|
string: "string";
|
|
3068
3068
|
number: "number";
|
|
3069
|
-
json: "json";
|
|
3070
3069
|
duration: "duration";
|
|
3070
|
+
json: "json";
|
|
3071
3071
|
}>>;
|
|
3072
3072
|
numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
3073
3073
|
placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
|
|
@@ -3127,6 +3127,7 @@ declare const caseDetailSchema$1: z.ZodObject<{
|
|
|
3127
3127
|
number: "number";
|
|
3128
3128
|
boolean: "boolean";
|
|
3129
3129
|
file: "file";
|
|
3130
|
+
duration: "duration";
|
|
3130
3131
|
markdown: "markdown";
|
|
3131
3132
|
json: "json";
|
|
3132
3133
|
image: "image";
|
|
@@ -3134,7 +3135,6 @@ declare const caseDetailSchema$1: z.ZodObject<{
|
|
|
3134
3135
|
pdf: "pdf";
|
|
3135
3136
|
audio: "audio";
|
|
3136
3137
|
video: "video";
|
|
3137
|
-
duration: "duration";
|
|
3138
3138
|
percent: "percent";
|
|
3139
3139
|
passFail: "passFail";
|
|
3140
3140
|
stars: "stars";
|
|
@@ -3284,8 +3284,8 @@ type EvalChartAggregate = z.infer<typeof evalChartAggregateSchema>;
|
|
|
3284
3284
|
*/
|
|
3285
3285
|
declare const evalChartColorSchema: z.ZodEnum<{
|
|
3286
3286
|
success: "success";
|
|
3287
|
-
error: "error";
|
|
3288
3287
|
accent: "accent";
|
|
3288
|
+
error: "error";
|
|
3289
3289
|
accentDim: "accentDim";
|
|
3290
3290
|
warning: "warning";
|
|
3291
3291
|
textMuted: "textMuted";
|
|
@@ -3313,8 +3313,8 @@ declare const evalChartMetricSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
3313
3313
|
label: z.ZodOptional<z.ZodString>;
|
|
3314
3314
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3315
3315
|
success: "success";
|
|
3316
|
-
error: "error";
|
|
3317
3316
|
accent: "accent";
|
|
3317
|
+
error: "error";
|
|
3318
3318
|
accentDim: "accentDim";
|
|
3319
3319
|
warning: "warning";
|
|
3320
3320
|
textMuted: "textMuted";
|
|
@@ -3337,8 +3337,8 @@ declare const evalChartMetricSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
3337
3337
|
label: z.ZodOptional<z.ZodString>;
|
|
3338
3338
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3339
3339
|
success: "success";
|
|
3340
|
-
error: "error";
|
|
3341
3340
|
accent: "accent";
|
|
3341
|
+
error: "error";
|
|
3342
3342
|
accentDim: "accentDim";
|
|
3343
3343
|
warning: "warning";
|
|
3344
3344
|
textMuted: "textMuted";
|
|
@@ -3396,8 +3396,8 @@ declare const evalChartConfigSchema: z.ZodObject<{
|
|
|
3396
3396
|
label: z.ZodOptional<z.ZodString>;
|
|
3397
3397
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3398
3398
|
success: "success";
|
|
3399
|
-
error: "error";
|
|
3400
3399
|
accent: "accent";
|
|
3400
|
+
error: "error";
|
|
3401
3401
|
accentDim: "accentDim";
|
|
3402
3402
|
warning: "warning";
|
|
3403
3403
|
textMuted: "textMuted";
|
|
@@ -3420,8 +3420,8 @@ declare const evalChartConfigSchema: z.ZodObject<{
|
|
|
3420
3420
|
label: z.ZodOptional<z.ZodString>;
|
|
3421
3421
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3422
3422
|
success: "success";
|
|
3423
|
-
error: "error";
|
|
3424
3423
|
accent: "accent";
|
|
3424
|
+
error: "error";
|
|
3425
3425
|
accentDim: "accentDim";
|
|
3426
3426
|
warning: "warning";
|
|
3427
3427
|
textMuted: "textMuted";
|
|
@@ -3486,8 +3486,8 @@ declare const evalChartsConfigSchema: z.ZodArray<z.ZodObject<{
|
|
|
3486
3486
|
label: z.ZodOptional<z.ZodString>;
|
|
3487
3487
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3488
3488
|
success: "success";
|
|
3489
|
-
error: "error";
|
|
3490
3489
|
accent: "accent";
|
|
3490
|
+
error: "error";
|
|
3491
3491
|
accentDim: "accentDim";
|
|
3492
3492
|
warning: "warning";
|
|
3493
3493
|
textMuted: "textMuted";
|
|
@@ -3510,8 +3510,8 @@ declare const evalChartsConfigSchema: z.ZodArray<z.ZodObject<{
|
|
|
3510
3510
|
label: z.ZodOptional<z.ZodString>;
|
|
3511
3511
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3512
3512
|
success: "success";
|
|
3513
|
-
error: "error";
|
|
3514
3513
|
accent: "accent";
|
|
3514
|
+
error: "error";
|
|
3515
3515
|
accentDim: "accentDim";
|
|
3516
3516
|
warning: "warning";
|
|
3517
3517
|
textMuted: "textMuted";
|
|
@@ -3808,8 +3808,8 @@ declare const llmCallMetricFormatSchema$1: z.ZodEnum<{
|
|
|
3808
3808
|
string: "string";
|
|
3809
3809
|
number: "number";
|
|
3810
3810
|
boolean: "boolean";
|
|
3811
|
-
json: "json";
|
|
3812
3811
|
duration: "duration";
|
|
3812
|
+
json: "json";
|
|
3813
3813
|
}>;
|
|
3814
3814
|
/** Render format applied to an LLM-call metric value. */
|
|
3815
3815
|
type LlmCallMetricFormat = z.infer<typeof llmCallMetricFormatSchema$1>;
|
|
@@ -3818,8 +3818,8 @@ declare const apiCallMetricFormatSchema$1: z.ZodEnum<{
|
|
|
3818
3818
|
string: "string";
|
|
3819
3819
|
number: "number";
|
|
3820
3820
|
boolean: "boolean";
|
|
3821
|
-
json: "json";
|
|
3822
3821
|
duration: "duration";
|
|
3822
|
+
json: "json";
|
|
3823
3823
|
}>;
|
|
3824
3824
|
/** Render format applied to an API-call metric value. */
|
|
3825
3825
|
type ApiCallMetricFormat = z.infer<typeof apiCallMetricFormatSchema$1>;
|
|
@@ -3888,8 +3888,8 @@ declare const llmCallMetricSchema: z.ZodObject<{
|
|
|
3888
3888
|
string: "string";
|
|
3889
3889
|
number: "number";
|
|
3890
3890
|
boolean: "boolean";
|
|
3891
|
-
json: "json";
|
|
3892
3891
|
duration: "duration";
|
|
3892
|
+
json: "json";
|
|
3893
3893
|
}>>;
|
|
3894
3894
|
numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
3895
3895
|
placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
|
|
@@ -3917,8 +3917,8 @@ declare const apiCallMetricSchema: z.ZodObject<{
|
|
|
3917
3917
|
string: "string";
|
|
3918
3918
|
number: "number";
|
|
3919
3919
|
boolean: "boolean";
|
|
3920
|
-
json: "json";
|
|
3921
3920
|
duration: "duration";
|
|
3921
|
+
json: "json";
|
|
3922
3922
|
}>>;
|
|
3923
3923
|
numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
3924
3924
|
placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
|
|
@@ -4031,8 +4031,8 @@ declare const llmCallsConfigSchema: z.ZodObject<{
|
|
|
4031
4031
|
string: "string";
|
|
4032
4032
|
number: "number";
|
|
4033
4033
|
boolean: "boolean";
|
|
4034
|
-
json: "json";
|
|
4035
4034
|
duration: "duration";
|
|
4035
|
+
json: "json";
|
|
4036
4036
|
}>>;
|
|
4037
4037
|
numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
4038
4038
|
placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
|
|
@@ -4067,8 +4067,8 @@ declare const apiCallsConfigSchema: z.ZodObject<{
|
|
|
4067
4067
|
string: "string";
|
|
4068
4068
|
number: "number";
|
|
4069
4069
|
boolean: "boolean";
|
|
4070
|
-
json: "json";
|
|
4071
4070
|
duration: "duration";
|
|
4071
|
+
json: "json";
|
|
4072
4072
|
}>>;
|
|
4073
4073
|
numberFormat: z.ZodOptional<z.ZodType<NumberDisplayOptions, unknown, z.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
4074
4074
|
placements: z.ZodOptional<z.ZodArray<z.ZodEnum<{
|
|
@@ -4670,6 +4670,7 @@ declare const cacheRecordingOpSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
4670
4670
|
number: "number";
|
|
4671
4671
|
boolean: "boolean";
|
|
4672
4672
|
file: "file";
|
|
4673
|
+
duration: "duration";
|
|
4673
4674
|
markdown: "markdown";
|
|
4674
4675
|
json: "json";
|
|
4675
4676
|
image: "image";
|
|
@@ -4677,7 +4678,6 @@ declare const cacheRecordingOpSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
4677
4678
|
pdf: "pdf";
|
|
4678
4679
|
audio: "audio";
|
|
4679
4680
|
video: "video";
|
|
4680
|
-
duration: "duration";
|
|
4681
4681
|
percent: "percent";
|
|
4682
4682
|
passFail: "passFail";
|
|
4683
4683
|
stars: "stars";
|
|
@@ -4758,6 +4758,7 @@ declare const cacheRecordingSchema: z.ZodObject<{
|
|
|
4758
4758
|
number: "number";
|
|
4759
4759
|
boolean: "boolean";
|
|
4760
4760
|
file: "file";
|
|
4761
|
+
duration: "duration";
|
|
4761
4762
|
markdown: "markdown";
|
|
4762
4763
|
json: "json";
|
|
4763
4764
|
image: "image";
|
|
@@ -4765,7 +4766,6 @@ declare const cacheRecordingSchema: z.ZodObject<{
|
|
|
4765
4766
|
pdf: "pdf";
|
|
4766
4767
|
audio: "audio";
|
|
4767
4768
|
video: "video";
|
|
4768
|
-
duration: "duration";
|
|
4769
4769
|
percent: "percent";
|
|
4770
4770
|
passFail: "passFail";
|
|
4771
4771
|
stars: "stars";
|
|
@@ -4859,6 +4859,7 @@ declare const cacheEntrySchema: z.ZodObject<{
|
|
|
4859
4859
|
number: "number";
|
|
4860
4860
|
boolean: "boolean";
|
|
4861
4861
|
file: "file";
|
|
4862
|
+
duration: "duration";
|
|
4862
4863
|
markdown: "markdown";
|
|
4863
4864
|
json: "json";
|
|
4864
4865
|
image: "image";
|
|
@@ -4866,7 +4867,6 @@ declare const cacheEntrySchema: z.ZodObject<{
|
|
|
4866
4867
|
pdf: "pdf";
|
|
4867
4868
|
audio: "audio";
|
|
4868
4869
|
video: "video";
|
|
4869
|
-
duration: "duration";
|
|
4870
4870
|
percent: "percent";
|
|
4871
4871
|
passFail: "passFail";
|
|
4872
4872
|
stars: "stars";
|
|
@@ -4977,6 +4977,7 @@ declare const cacheDebugKeyEntrySchema: z.ZodObject<{
|
|
|
4977
4977
|
number: "number";
|
|
4978
4978
|
boolean: "boolean";
|
|
4979
4979
|
file: "file";
|
|
4980
|
+
duration: "duration";
|
|
4980
4981
|
markdown: "markdown";
|
|
4981
4982
|
json: "json";
|
|
4982
4983
|
image: "image";
|
|
@@ -4984,7 +4985,6 @@ declare const cacheDebugKeyEntrySchema: z.ZodObject<{
|
|
|
4984
4985
|
pdf: "pdf";
|
|
4985
4986
|
audio: "audio";
|
|
4986
4987
|
video: "video";
|
|
4987
|
-
duration: "duration";
|
|
4988
4988
|
percent: "percent";
|
|
4989
4989
|
passFail: "passFail";
|
|
4990
4990
|
stars: "stars";
|
|
@@ -5084,6 +5084,7 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
|
|
|
5084
5084
|
number: "number";
|
|
5085
5085
|
boolean: "boolean";
|
|
5086
5086
|
file: "file";
|
|
5087
|
+
duration: "duration";
|
|
5087
5088
|
markdown: "markdown";
|
|
5088
5089
|
json: "json";
|
|
5089
5090
|
image: "image";
|
|
@@ -5091,7 +5092,6 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
|
|
|
5091
5092
|
pdf: "pdf";
|
|
5092
5093
|
audio: "audio";
|
|
5093
5094
|
video: "video";
|
|
5094
|
-
duration: "duration";
|
|
5095
5095
|
percent: "percent";
|
|
5096
5096
|
passFail: "passFail";
|
|
5097
5097
|
stars: "stars";
|
|
@@ -5193,6 +5193,7 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
|
|
|
5193
5193
|
number: "number";
|
|
5194
5194
|
boolean: "boolean";
|
|
5195
5195
|
file: "file";
|
|
5196
|
+
duration: "duration";
|
|
5196
5197
|
markdown: "markdown";
|
|
5197
5198
|
json: "json";
|
|
5198
5199
|
image: "image";
|
|
@@ -5200,7 +5201,6 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
|
|
|
5200
5201
|
pdf: "pdf";
|
|
5201
5202
|
audio: "audio";
|
|
5202
5203
|
video: "video";
|
|
5203
|
-
duration: "duration";
|
|
5204
5204
|
percent: "percent";
|
|
5205
5205
|
passFail: "passFail";
|
|
5206
5206
|
stars: "stars";
|
|
@@ -5300,6 +5300,7 @@ declare const cacheFileSchema: z.ZodObject<{
|
|
|
5300
5300
|
number: "number";
|
|
5301
5301
|
boolean: "boolean";
|
|
5302
5302
|
file: "file";
|
|
5303
|
+
duration: "duration";
|
|
5303
5304
|
markdown: "markdown";
|
|
5304
5305
|
json: "json";
|
|
5305
5306
|
image: "image";
|
|
@@ -5307,7 +5308,6 @@ declare const cacheFileSchema: z.ZodObject<{
|
|
|
5307
5308
|
pdf: "pdf";
|
|
5308
5309
|
audio: "audio";
|
|
5309
5310
|
video: "video";
|
|
5310
|
-
duration: "duration";
|
|
5311
5311
|
percent: "percent";
|
|
5312
5312
|
passFail: "passFail";
|
|
5313
5313
|
stars: "stars";
|
|
@@ -5417,6 +5417,7 @@ declare const cacheDebugKeyFileSchema: z.ZodObject<{
|
|
|
5417
5417
|
number: "number";
|
|
5418
5418
|
boolean: "boolean";
|
|
5419
5419
|
file: "file";
|
|
5420
|
+
duration: "duration";
|
|
5420
5421
|
markdown: "markdown";
|
|
5421
5422
|
json: "json";
|
|
5422
5423
|
image: "image";
|
|
@@ -5424,7 +5425,6 @@ declare const cacheDebugKeyFileSchema: z.ZodObject<{
|
|
|
5424
5425
|
pdf: "pdf";
|
|
5425
5426
|
audio: "audio";
|
|
5426
5427
|
video: "video";
|
|
5427
|
-
duration: "duration";
|
|
5428
5428
|
percent: "percent";
|
|
5429
5429
|
passFail: "passFail";
|
|
5430
5430
|
stars: "stars";
|
|
@@ -7219,11 +7219,20 @@ type EvalRunner = {
|
|
|
7219
7219
|
validateManualInputs(request: CreateRunRequest$1): ManualInputValidationResult;
|
|
7220
7220
|
}; //#endregion
|
|
7221
7221
|
//#region src/runner.d.ts
|
|
7222
|
-
/**
|
|
7222
|
+
/**
|
|
7223
|
+
* Create an in-memory eval runner bound to the current workspace config.
|
|
7224
|
+
*
|
|
7225
|
+
* @param options.watchForChanges Watch eval files, run history, config, and
|
|
7226
|
+
* workspace `.env` for live reloads.
|
|
7227
|
+
* @param options.loadEnv Load `.env` from the current workspace before config,
|
|
7228
|
+
* discovery, and runs. Shell-provided values keep precedence.
|
|
7229
|
+
*/
|
|
7223
7230
|
declare function createRunner({
|
|
7224
|
-
watchForChanges
|
|
7231
|
+
watchForChanges,
|
|
7232
|
+
loadEnv
|
|
7225
7233
|
}?: {
|
|
7226
7234
|
watchForChanges?: boolean;
|
|
7235
|
+
loadEnv?: boolean;
|
|
7227
7236
|
}): EvalRunner; //#endregion
|
|
7228
7237
|
//#region src/manualInput/files.d.ts
|
|
7229
7238
|
type StageManualInputFileParams = {
|
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { $ as setScopeCacheContext, A as repoFile, B as evalTime, C as evalTracer, D as deserializeCacheValue, E as deserializeCacheRecording, F as EvalRuntimeUsageError, Ft as getEvalRegistry, H as getEvalCaseInput, I as appendToEvalOutput, J as runInEvalRuntimeScope, K as mergeEvalOutput, M as readManualInputFile, N as evalExpect, O as serializeCacheRecording, P as EvalAssertionError, Q as setEvalOutput, R as evalAssert, S as evalSpan, T as hashCacheKeySync, U as incrementEvalOutput, V as getCurrentScope, W as isInEvalScope, X as runInExistingEvalScope, Y as runInEvalScope, at as extractApiCalls, b as buildTraceTree, ct as simulateTokenAllocation, et as startEvalBackgroundJob, it as extractCacheHits, j as manualInputFileValueSchema, k as serializeCacheValue, ot as extractLlmCalls, q as nextEvalId, rt as extractCacheEntries, st as simulateLlmCallCost, ut as getNestedAttribute, w as hashCacheKey, x as captureEvalSpanError, z as evalLog } from "./runExecution-
|
|
2
|
-
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-
|
|
3
|
-
import { n as matchesEvalTags, t as defineEval } from "./src-
|
|
1
|
+
import { $ as setScopeCacheContext, A as repoFile, B as evalTime, C as evalTracer, D as deserializeCacheValue, E as deserializeCacheRecording, F as EvalRuntimeUsageError, Ft as getEvalRegistry, H as getEvalCaseInput, I as appendToEvalOutput, J as runInEvalRuntimeScope, K as mergeEvalOutput, M as readManualInputFile, N as evalExpect, O as serializeCacheRecording, P as EvalAssertionError, Q as setEvalOutput, R as evalAssert, S as evalSpan, T as hashCacheKeySync, U as incrementEvalOutput, V as getCurrentScope, W as isInEvalScope, X as runInExistingEvalScope, Y as runInEvalScope, at as extractApiCalls, b as buildTraceTree, ct as simulateTokenAllocation, et as startEvalBackgroundJob, it as extractCacheHits, j as manualInputFileValueSchema, k as serializeCacheValue, ot as extractLlmCalls, q as nextEvalId, rt as extractCacheEntries, st as simulateLlmCallCost, ut as getNestedAttribute, w as hashCacheKey, x as captureEvalSpanError, z as evalLog } from "./runExecution-Bq0Y3y_1.mjs";
|
|
2
|
+
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-CbePEEua.mjs";
|
|
3
|
+
import { n as matchesEvalTags, t as defineEval } from "./src-CVM_FqPx.mjs";
|
|
4
4
|
export { EvalAssertionError, EvalRuntimeUsageError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob };
|
package/dist/runChild.mjs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { At as evalStatsConfigSchema, L as configureEvalRunLogs, Mt as evalChartsConfigSchema, Nt as columnDefSchema, Tt as buildEvalKey, bt as runManifestSchema, jt as manualInputDescriptorSchema, kt as evalStatAggregateSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, tt as createRunRequestSchema, v as createFsCacheStore, xt as runSummarySchema, y as getCacheRetentionOptions } from "./runExecution-
|
|
2
|
-
import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-
|
|
1
|
+
import { At as evalStatsConfigSchema, L as configureEvalRunLogs, Mt as evalChartsConfigSchema, Nt as columnDefSchema, Tt as buildEvalKey, bt as runManifestSchema, jt as manualInputDescriptorSchema, kt as evalStatAggregateSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, tt as createRunRequestSchema, v as createFsCacheStore, xt as runSummarySchema, y as getCacheRetentionOptions } from "./runExecution-Bq0Y3y_1.mjs";
|
|
2
|
+
import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-BpwW0AmB.mjs";
|
|
3
3
|
import { z } from "zod";
|
|
4
4
|
import { readFile } from "node:fs/promises";
|
|
5
5
|
import { relative } from "node:path";
|
|
@@ -289,7 +289,7 @@ z.object({
|
|
|
289
289
|
key: z.string(),
|
|
290
290
|
namespace: z.string(),
|
|
291
291
|
storedAt: z.string(),
|
|
292
|
-
/** Last successful cache
|
|
292
|
+
/** Last successful cache read or write time. Legacy entries may be `null`. */
|
|
293
293
|
lastAccessedAt: z.string().nullable()
|
|
294
294
|
});
|
|
295
295
|
z.object({
|
|
@@ -5395,7 +5395,7 @@ function createFsCacheStore(options) {
|
|
|
5395
5395
|
const index = await readNamespaceIndex(cacheDir, entry.namespace);
|
|
5396
5396
|
index.entries[entry.key] = {
|
|
5397
5397
|
storedAt: entry.storedAt,
|
|
5398
|
-
lastAccessedAt:
|
|
5398
|
+
lastAccessedAt: entry.storedAt,
|
|
5399
5399
|
blobRefs: await collectExternalJsonBlobRefs(entry, blobDirs)
|
|
5400
5400
|
};
|
|
5401
5401
|
await writeNamespaceIndex(cacheDir, index);
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { Dt as caseDetailSchema, Et as getCaseRowCaseKey, It as runWithEvalRegistry, J as runInEvalRuntimeScope, Ot as caseRowSchema, Z as runWithEvalClock, _t as matchesTagsFilter, bt as runManifestSchema, d as loadEvalModule, f as resolveEvalDefaultConfig, g as commitPendingCacheWrites, gt as dedupeEvalTags, ht as deriveStatusFromChildStatuses, i as isCaseChildMessage, m as buildDeclaredColumnDefs, mt as deriveStatusFromCaseRows, n as resolveRunnableEvalCases, o as stripTerminalControlCodes, pt as deriveScopedSummaryFromCases, t as filterEvalCases, u as runWithModuleIsolation, vt as validateEvalTagName, wt as buildCaseKey, xt as runSummarySchema, yt as validateTagsFilterExpression } from "./runExecution-
|
|
1
|
+
import { Dt as caseDetailSchema, Et as getCaseRowCaseKey, It as runWithEvalRegistry, J as runInEvalRuntimeScope, Ot as caseRowSchema, Z as runWithEvalClock, _t as matchesTagsFilter, bt as runManifestSchema, d as loadEvalModule, f as resolveEvalDefaultConfig, g as commitPendingCacheWrites, gt as dedupeEvalTags, ht as deriveStatusFromChildStatuses, i as isCaseChildMessage, m as buildDeclaredColumnDefs, mt as deriveStatusFromCaseRows, n as resolveRunnableEvalCases, o as stripTerminalControlCodes, pt as deriveScopedSummaryFromCases, t as filterEvalCases, u as runWithModuleIsolation, vt as validateEvalTagName, wt as buildCaseKey, xt as runSummarySchema, yt as validateTagsFilterExpression } from "./runExecution-Bq0Y3y_1.mjs";
|
|
2
2
|
import { Result, resultify } from "t-result";
|
|
3
3
|
import { readFile, readdir, rm, writeFile } from "node:fs/promises";
|
|
4
4
|
import { dirname, join } from "node:path";
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { n as createRunner } from "./cli-CbePEEua.mjs";
|
|
2
|
+
import "./src-CVM_FqPx.mjs";
|
|
3
|
+
//#region ../../apps/server/src/runner.ts
|
|
4
|
+
let runnerInstance = null;
|
|
5
|
+
function getRunnerInstance({ loadEnv = true } = {}) {
|
|
6
|
+
if (!runnerInstance) runnerInstance = createRunner({ loadEnv });
|
|
7
|
+
return runnerInstance;
|
|
8
|
+
}
|
|
9
|
+
async function initRunner(options = {}) {
|
|
10
|
+
const runner = getRunnerInstance(options);
|
|
11
|
+
await runner.init();
|
|
12
|
+
return runner;
|
|
13
|
+
}
|
|
14
|
+
//#endregion
|
|
15
|
+
export { initRunner as n, getRunnerInstance as t };
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { n as initRunner, t as getRunnerInstance } from "./runner-
|
|
1
|
+
import { n as initRunner, t as getRunnerInstance } from "./runner-Kp0JqxrU.mjs";
|
|
2
2
|
export { getRunnerInstance, initRunner };
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { G as matchesEvalTags$1, Pt as defineEval$1 } from "./runExecution-
|
|
2
|
-
import "./cli-
|
|
1
|
+
import { G as matchesEvalTags$1, Pt as defineEval$1 } from "./runExecution-Bq0Y3y_1.mjs";
|
|
2
|
+
import "./cli-CbePEEua.mjs";
|
|
3
3
|
//#region src/index.ts
|
|
4
4
|
/** Register an eval definition with typed tag support. */
|
|
5
5
|
function defineEval(definition) {
|
package/package.json
CHANGED
|
@@ -34,9 +34,10 @@ display rules), read the TypeScript declarations shipped with the package:
|
|
|
34
34
|
it before the next run starts. Temporary runs appear in `show-runs` while
|
|
35
35
|
present; normal runs are never deleted by temporary-run cleanup. In the app,
|
|
36
36
|
the run drawer can promote a temporary run to durable history.
|
|
37
|
-
- `agent-evals app` watches `agent-evals.config.ts` and
|
|
38
|
-
place when the runner is idle. If config
|
|
39
|
-
reload applies after the current run
|
|
37
|
+
- `agent-evals app` watches `agent-evals.config.ts` and the workspace `.env`
|
|
38
|
+
and reloads them in place when the runner is idle. If config or `.env`
|
|
39
|
+
changes during an active run, the reload applies after the current run
|
|
40
|
+
reaches a terminal state.
|
|
40
41
|
- App-triggered runs log the queued target evals, resolved case concurrency,
|
|
41
42
|
each case start for evals that are actually running, and the terminal run
|
|
42
43
|
summary in the server terminal.
|
|
@@ -567,6 +568,8 @@ Mental model:
|
|
|
567
568
|
runner stays idle for `cache.pruneIdleDelayMs ?? 5000` milliseconds. Configure
|
|
568
569
|
`cache.maxEntries` as a number for the default cap, or as
|
|
569
570
|
`{ default, namespaces }` for exact namespace-specific caps.
|
|
571
|
+
Writes initialize the row's last access time to the stored time; later cache
|
|
572
|
+
hits refresh that timestamp at the configured access-time update interval.
|
|
570
573
|
- Unindexed legacy cache files are ignored by normal lookup/listing. Use
|
|
571
574
|
`agent-evals cache repair` to remove unindexed cache files, stale index rows,
|
|
572
575
|
debug sidecars, and unreferenced blob files.
|
package/dist/runner-Dv5cseOt.mjs
DELETED
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
import { n as createRunner } from "./cli-FOyPC8UD.mjs";
|
|
2
|
-
import "./src-p-GRSVDb.mjs";
|
|
3
|
-
//#region ../../apps/server/src/runner.ts
|
|
4
|
-
let runnerInstance = null;
|
|
5
|
-
function getRunnerInstance() {
|
|
6
|
-
if (!runnerInstance) runnerInstance = createRunner();
|
|
7
|
-
return runnerInstance;
|
|
8
|
-
}
|
|
9
|
-
async function initRunner() {
|
|
10
|
-
const runner = getRunnerInstance();
|
|
11
|
-
await runner.init();
|
|
12
|
-
return runner;
|
|
13
|
-
}
|
|
14
|
-
//#endregion
|
|
15
|
-
export { initRunner as n, getRunnerInstance as t };
|