@ls-stack/agent-eval 0.58.5 → 0.59.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-DLNmRUqH.mjs → app-B3PEtWqH.mjs} +4 -4
- package/dist/apps/web/dist/assets/index-BIEuCK_8.js +377 -0
- package/dist/apps/web/dist/assets/index-CWoKLKTt.css +1 -0
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/caseChild.mjs +1 -1
- package/dist/{cli-ClAkjTvo.mjs → cli-Dkp2-rBm.mjs} +4 -4
- package/dist/index.d.mts +100 -64
- package/dist/index.mjs +3 -3
- package/dist/runChild.mjs +2 -2
- package/dist/{runExecution-BMnJXWhN.mjs → runExecution-C3XVZHRC.mjs} +94 -6
- package/dist/{runOrchestration-CvbTAoEb.mjs → runOrchestration-B5An-AEi.mjs} +1 -1
- package/dist/{runner-DJJekv9f.mjs → runner-BJXz_V_V.mjs} +1 -1
- package/dist/{runner-BfHgVhGS.mjs → runner-C9J-1fkp.mjs} +2 -2
- package/dist/{src-DfzidkYr.mjs → src-8dGXUULC.mjs} +2 -2
- package/package.json +3 -3
- package/skills/agent-eval/SKILL.md +10 -2
- package/dist/apps/web/dist/assets/index-BD6FXk5p.js +0 -377
- package/dist/apps/web/dist/assets/index-C2fbGEsB.css +0 -1
package/dist/index.d.mts
CHANGED
|
@@ -398,6 +398,18 @@ type EvalCase$1$1<TInput = unknown> = {
|
|
|
398
398
|
input: TInput;
|
|
399
399
|
tags?: string[];
|
|
400
400
|
};
|
|
401
|
+
/** Normalized view of one tool-call span and its common tool metadata. */
|
|
402
|
+
type EvalToolCallSpan = {
|
|
403
|
+
/** Preferred tool name, using GenAI/Mastra identity metadata when present. */name: string; /** Original trace span display name. */
|
|
404
|
+
spanName: string; /** Original trace span kind. */
|
|
405
|
+
kind: string; /** Parsed tool-call arguments, or the raw value when parsing is not possible. */
|
|
406
|
+
arguments: unknown; /** Parsed tool-call result, or the raw value when parsing is not possible. */
|
|
407
|
+
result: unknown; /** Tool description from GenAI/Mastra metadata when present. */
|
|
408
|
+
description: string | undefined; /** Tool type from GenAI/Mastra metadata when present. */
|
|
409
|
+
toolType: string | undefined; /** Original span attributes. */
|
|
410
|
+
attributes: Record<string, unknown> | undefined; /** Original trace span for fields not normalized above. */
|
|
411
|
+
span: EvalTraceSpan$2;
|
|
412
|
+
};
|
|
401
413
|
/** Query helpers built from the flattened trace recorded for one eval case. */
|
|
402
414
|
type EvalTraceTree = {
|
|
403
415
|
/** Flat span list in creation order. */spans: EvalTraceSpan$2[]; /** Top-level spans whose `parentId` is `null`. */
|
|
@@ -406,10 +418,16 @@ type EvalTraceTree = {
|
|
|
406
418
|
findSpans: (name: string) => EvalTraceSpan$2[]; /** Return whether any span name exactly matches `name`. */
|
|
407
419
|
hasSpan: (name: string) => boolean; /** Return every span whose kind exactly matches `kind`. */
|
|
408
420
|
findSpansByKind: (kind: string) => EvalTraceSpan$2[]; /** Return every span with `kind: 'tool'` or `kind: 'tool_call'`. */
|
|
409
|
-
findToolCallSpans: () => EvalTraceSpan$2[];
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
421
|
+
findToolCallSpans: () => EvalTraceSpan$2[];
|
|
422
|
+
/**
|
|
423
|
+
* Return tool-call names, preferring GenAI/Mastra tool identity attributes
|
|
424
|
+
* when available.
|
|
425
|
+
*/
|
|
426
|
+
listToolCallSpanNames: () => string[]; /** Return whether a tool-call span name or tool identity matches `name`. */
|
|
427
|
+
hasToolCallSpan: (name: string) => boolean; /** Return normalized tool-call spans whose name or tool identity matches `name`. */
|
|
428
|
+
getToolCallSpans: (name: string) => EvalToolCallSpan[]; /** Return how many tool-call spans have a name or tool identity matching `toolName`. */
|
|
429
|
+
getToolCallSpanCount: (toolName: string) => number; /** Return whether a tool-call span name or tool identity appears exactly `expectedCalls` times. */
|
|
430
|
+
hasToolCallSpanCount: (toolName: string, expectedCalls: number) => boolean; /** Return span names in creation order, optionally filtered by kind. */
|
|
413
431
|
listSpanNames: (kind?: string) => string[]; /** Return span names in depth-first tree order, optionally filtered by kind. */
|
|
414
432
|
listSpanNamesDfs: (kind?: string) => string[]; /** Return all spans in depth-first tree order. */
|
|
415
433
|
flattenDfs: () => EvalTraceSpan$2[];
|
|
@@ -1923,15 +1941,15 @@ type ColumnKind = z$1.infer<typeof columnKindSchema>;
|
|
|
1923
1941
|
declare const columnFormatSchema: z$1.ZodEnum<{
|
|
1924
1942
|
number: "number";
|
|
1925
1943
|
boolean: "boolean";
|
|
1944
|
+
duration: "duration";
|
|
1945
|
+
json: "json";
|
|
1926
1946
|
file: "file";
|
|
1927
1947
|
markdown: "markdown";
|
|
1928
|
-
json: "json";
|
|
1929
1948
|
image: "image";
|
|
1930
1949
|
html: "html";
|
|
1931
1950
|
pdf: "pdf";
|
|
1932
1951
|
audio: "audio";
|
|
1933
1952
|
video: "video";
|
|
1934
|
-
duration: "duration";
|
|
1935
1953
|
percent: "percent";
|
|
1936
1954
|
passFail: "passFail";
|
|
1937
1955
|
stars: "stars";
|
|
@@ -1950,15 +1968,15 @@ declare const columnDefSchema: z$1.ZodObject<{
|
|
|
1950
1968
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
1951
1969
|
number: "number";
|
|
1952
1970
|
boolean: "boolean";
|
|
1971
|
+
duration: "duration";
|
|
1972
|
+
json: "json";
|
|
1953
1973
|
file: "file";
|
|
1954
1974
|
markdown: "markdown";
|
|
1955
|
-
json: "json";
|
|
1956
1975
|
image: "image";
|
|
1957
1976
|
html: "html";
|
|
1958
1977
|
pdf: "pdf";
|
|
1959
1978
|
audio: "audio";
|
|
1960
1979
|
video: "video";
|
|
1961
|
-
duration: "duration";
|
|
1962
1980
|
percent: "percent";
|
|
1963
1981
|
passFail: "passFail";
|
|
1964
1982
|
stars: "stars";
|
|
@@ -2004,8 +2022,8 @@ type CellValue = z$1.infer<typeof cellValueSchema>; //#endregion
|
|
|
2004
2022
|
declare const traceAttributeDisplayFormatSchema: z$1.ZodEnum<{
|
|
2005
2023
|
string: "string";
|
|
2006
2024
|
number: "number";
|
|
2007
|
-
json: "json";
|
|
2008
2025
|
duration: "duration";
|
|
2026
|
+
json: "json";
|
|
2009
2027
|
}>;
|
|
2010
2028
|
/**
|
|
2011
2029
|
* Formatting hint for trace attribute values rendered by the UI.
|
|
@@ -2029,8 +2047,8 @@ declare const traceAttributeDisplaySchema: z$1.ZodObject<{
|
|
|
2029
2047
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2030
2048
|
string: "string";
|
|
2031
2049
|
number: "number";
|
|
2032
|
-
json: "json";
|
|
2033
2050
|
duration: "duration";
|
|
2051
|
+
json: "json";
|
|
2034
2052
|
}>>;
|
|
2035
2053
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
2036
2054
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -2065,8 +2083,8 @@ declare const traceDisplayConfigSchema: z$1.ZodObject<{
|
|
|
2065
2083
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2066
2084
|
string: "string";
|
|
2067
2085
|
number: "number";
|
|
2068
|
-
json: "json";
|
|
2069
2086
|
duration: "duration";
|
|
2087
|
+
json: "json";
|
|
2070
2088
|
}>>;
|
|
2071
2089
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
2072
2090
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -2105,8 +2123,8 @@ declare const traceAttributeDisplayInputSchema: z$1.ZodObject<{
|
|
|
2105
2123
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2106
2124
|
string: "string";
|
|
2107
2125
|
number: "number";
|
|
2108
|
-
json: "json";
|
|
2109
2126
|
duration: "duration";
|
|
2127
|
+
json: "json";
|
|
2110
2128
|
}>>;
|
|
2111
2129
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
2112
2130
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -2143,8 +2161,8 @@ declare const traceDisplayInputConfigSchema: z$1.ZodObject<{
|
|
|
2143
2161
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2144
2162
|
string: "string";
|
|
2145
2163
|
number: "number";
|
|
2146
|
-
json: "json";
|
|
2147
2164
|
duration: "duration";
|
|
2165
|
+
json: "json";
|
|
2148
2166
|
}>>;
|
|
2149
2167
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
2150
2168
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -2308,15 +2326,15 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
2308
2326
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2309
2327
|
number: "number";
|
|
2310
2328
|
boolean: "boolean";
|
|
2329
|
+
duration: "duration";
|
|
2330
|
+
json: "json";
|
|
2311
2331
|
file: "file";
|
|
2312
2332
|
markdown: "markdown";
|
|
2313
|
-
json: "json";
|
|
2314
2333
|
image: "image";
|
|
2315
2334
|
html: "html";
|
|
2316
2335
|
pdf: "pdf";
|
|
2317
2336
|
audio: "audio";
|
|
2318
2337
|
video: "video";
|
|
2319
|
-
duration: "duration";
|
|
2320
2338
|
percent: "percent";
|
|
2321
2339
|
passFail: "passFail";
|
|
2322
2340
|
stars: "stars";
|
|
@@ -2372,15 +2390,15 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
|
|
|
2372
2390
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2373
2391
|
number: "number";
|
|
2374
2392
|
boolean: "boolean";
|
|
2393
|
+
duration: "duration";
|
|
2394
|
+
json: "json";
|
|
2375
2395
|
file: "file";
|
|
2376
2396
|
markdown: "markdown";
|
|
2377
|
-
json: "json";
|
|
2378
2397
|
image: "image";
|
|
2379
2398
|
html: "html";
|
|
2380
2399
|
pdf: "pdf";
|
|
2381
2400
|
audio: "audio";
|
|
2382
2401
|
video: "video";
|
|
2383
|
-
duration: "duration";
|
|
2384
2402
|
percent: "percent";
|
|
2385
2403
|
passFail: "passFail";
|
|
2386
2404
|
stars: "stars";
|
|
@@ -2418,15 +2436,15 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2418
2436
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2419
2437
|
number: "number";
|
|
2420
2438
|
boolean: "boolean";
|
|
2439
|
+
duration: "duration";
|
|
2440
|
+
json: "json";
|
|
2421
2441
|
file: "file";
|
|
2422
2442
|
markdown: "markdown";
|
|
2423
|
-
json: "json";
|
|
2424
2443
|
image: "image";
|
|
2425
2444
|
html: "html";
|
|
2426
2445
|
pdf: "pdf";
|
|
2427
2446
|
audio: "audio";
|
|
2428
2447
|
video: "video";
|
|
2429
|
-
duration: "duration";
|
|
2430
2448
|
percent: "percent";
|
|
2431
2449
|
passFail: "passFail";
|
|
2432
2450
|
stars: "stars";
|
|
@@ -2499,15 +2517,15 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2499
2517
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2500
2518
|
number: "number";
|
|
2501
2519
|
boolean: "boolean";
|
|
2520
|
+
duration: "duration";
|
|
2521
|
+
json: "json";
|
|
2502
2522
|
file: "file";
|
|
2503
2523
|
markdown: "markdown";
|
|
2504
|
-
json: "json";
|
|
2505
2524
|
image: "image";
|
|
2506
2525
|
html: "html";
|
|
2507
2526
|
pdf: "pdf";
|
|
2508
2527
|
audio: "audio";
|
|
2509
2528
|
video: "video";
|
|
2510
|
-
duration: "duration";
|
|
2511
2529
|
percent: "percent";
|
|
2512
2530
|
passFail: "passFail";
|
|
2513
2531
|
stars: "stars";
|
|
@@ -2540,8 +2558,8 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2540
2558
|
}>;
|
|
2541
2559
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2542
2560
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2543
|
-
success: "success";
|
|
2544
2561
|
error: "error";
|
|
2562
|
+
success: "success";
|
|
2545
2563
|
warning: "warning";
|
|
2546
2564
|
accent: "accent";
|
|
2547
2565
|
accentDim: "accentDim";
|
|
@@ -2564,8 +2582,8 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2564
2582
|
}>;
|
|
2565
2583
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2566
2584
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2567
|
-
success: "success";
|
|
2568
2585
|
error: "error";
|
|
2586
|
+
success: "success";
|
|
2569
2587
|
warning: "warning";
|
|
2570
2588
|
accent: "accent";
|
|
2571
2589
|
accentDim: "accentDim";
|
|
@@ -2730,15 +2748,15 @@ declare const caseRowSchema$1: z$1.ZodObject<{
|
|
|
2730
2748
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2731
2749
|
number: "number";
|
|
2732
2750
|
boolean: "boolean";
|
|
2751
|
+
duration: "duration";
|
|
2752
|
+
json: "json";
|
|
2733
2753
|
file: "file";
|
|
2734
2754
|
markdown: "markdown";
|
|
2735
|
-
json: "json";
|
|
2736
2755
|
image: "image";
|
|
2737
2756
|
html: "html";
|
|
2738
2757
|
pdf: "pdf";
|
|
2739
2758
|
audio: "audio";
|
|
2740
2759
|
video: "video";
|
|
2741
|
-
duration: "duration";
|
|
2742
2760
|
percent: "percent";
|
|
2743
2761
|
passFail: "passFail";
|
|
2744
2762
|
stars: "stars";
|
|
@@ -2876,8 +2894,8 @@ declare const scoreTraceSchema: z$1.ZodObject<{
|
|
|
2876
2894
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2877
2895
|
string: "string";
|
|
2878
2896
|
number: "number";
|
|
2879
|
-
json: "json";
|
|
2880
2897
|
duration: "duration";
|
|
2898
|
+
json: "json";
|
|
2881
2899
|
}>>;
|
|
2882
2900
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
2883
2901
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -2979,8 +2997,8 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
2979
2997
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2980
2998
|
string: "string";
|
|
2981
2999
|
number: "number";
|
|
2982
|
-
json: "json";
|
|
2983
3000
|
duration: "duration";
|
|
3001
|
+
json: "json";
|
|
2984
3002
|
}>>;
|
|
2985
3003
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
2986
3004
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -3048,8 +3066,8 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
3048
3066
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3049
3067
|
string: "string";
|
|
3050
3068
|
number: "number";
|
|
3051
|
-
json: "json";
|
|
3052
3069
|
duration: "duration";
|
|
3070
|
+
json: "json";
|
|
3053
3071
|
}>>;
|
|
3054
3072
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
3055
3073
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -3108,15 +3126,15 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
3108
3126
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3109
3127
|
number: "number";
|
|
3110
3128
|
boolean: "boolean";
|
|
3129
|
+
duration: "duration";
|
|
3130
|
+
json: "json";
|
|
3111
3131
|
file: "file";
|
|
3112
3132
|
markdown: "markdown";
|
|
3113
|
-
json: "json";
|
|
3114
3133
|
image: "image";
|
|
3115
3134
|
html: "html";
|
|
3116
3135
|
pdf: "pdf";
|
|
3117
3136
|
audio: "audio";
|
|
3118
3137
|
video: "video";
|
|
3119
|
-
duration: "duration";
|
|
3120
3138
|
percent: "percent";
|
|
3121
3139
|
passFail: "passFail";
|
|
3122
3140
|
stars: "stars";
|
|
@@ -3265,8 +3283,8 @@ type EvalChartAggregate = z$1.infer<typeof evalChartAggregateSchema>;
|
|
|
3265
3283
|
* not emit raw hex so authored evals stay decoupled from the web theme.
|
|
3266
3284
|
*/
|
|
3267
3285
|
declare const evalChartColorSchema: z$1.ZodEnum<{
|
|
3268
|
-
success: "success";
|
|
3269
3286
|
error: "error";
|
|
3287
|
+
success: "success";
|
|
3270
3288
|
warning: "warning";
|
|
3271
3289
|
accent: "accent";
|
|
3272
3290
|
accentDim: "accentDim";
|
|
@@ -3294,8 +3312,8 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
3294
3312
|
}>;
|
|
3295
3313
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3296
3314
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3297
|
-
success: "success";
|
|
3298
3315
|
error: "error";
|
|
3316
|
+
success: "success";
|
|
3299
3317
|
warning: "warning";
|
|
3300
3318
|
accent: "accent";
|
|
3301
3319
|
accentDim: "accentDim";
|
|
@@ -3318,8 +3336,8 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
3318
3336
|
}>;
|
|
3319
3337
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3320
3338
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3321
|
-
success: "success";
|
|
3322
3339
|
error: "error";
|
|
3340
|
+
success: "success";
|
|
3323
3341
|
warning: "warning";
|
|
3324
3342
|
accent: "accent";
|
|
3325
3343
|
accentDim: "accentDim";
|
|
@@ -3377,8 +3395,8 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
3377
3395
|
}>;
|
|
3378
3396
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3379
3397
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3380
|
-
success: "success";
|
|
3381
3398
|
error: "error";
|
|
3399
|
+
success: "success";
|
|
3382
3400
|
warning: "warning";
|
|
3383
3401
|
accent: "accent";
|
|
3384
3402
|
accentDim: "accentDim";
|
|
@@ -3401,8 +3419,8 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
3401
3419
|
}>;
|
|
3402
3420
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3403
3421
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3404
|
-
success: "success";
|
|
3405
3422
|
error: "error";
|
|
3423
|
+
success: "success";
|
|
3406
3424
|
warning: "warning";
|
|
3407
3425
|
accent: "accent";
|
|
3408
3426
|
accentDim: "accentDim";
|
|
@@ -3467,8 +3485,8 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
3467
3485
|
}>;
|
|
3468
3486
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3469
3487
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3470
|
-
success: "success";
|
|
3471
3488
|
error: "error";
|
|
3489
|
+
success: "success";
|
|
3472
3490
|
warning: "warning";
|
|
3473
3491
|
accent: "accent";
|
|
3474
3492
|
accentDim: "accentDim";
|
|
@@ -3491,8 +3509,8 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
3491
3509
|
}>;
|
|
3492
3510
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3493
3511
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3494
|
-
success: "success";
|
|
3495
3512
|
error: "error";
|
|
3513
|
+
success: "success";
|
|
3496
3514
|
warning: "warning";
|
|
3497
3515
|
accent: "accent";
|
|
3498
3516
|
accentDim: "accentDim";
|
|
@@ -3688,6 +3706,18 @@ type EvalCase$1<TInput = unknown> = {
|
|
|
3688
3706
|
input: TInput;
|
|
3689
3707
|
tags?: string[];
|
|
3690
3708
|
};
|
|
3709
|
+
/** Normalized view of one tool-call span and its common tool metadata. */
|
|
3710
|
+
type EvalToolCallSpan$1 = {
|
|
3711
|
+
/** Preferred tool name, using GenAI/Mastra identity metadata when present. */name: string; /** Original trace span display name. */
|
|
3712
|
+
spanName: string; /** Original trace span kind. */
|
|
3713
|
+
kind: string; /** Parsed tool-call arguments, or the raw value when parsing is not possible. */
|
|
3714
|
+
arguments: unknown; /** Parsed tool-call result, or the raw value when parsing is not possible. */
|
|
3715
|
+
result: unknown; /** Tool description from GenAI/Mastra metadata when present. */
|
|
3716
|
+
description: string | undefined; /** Tool type from GenAI/Mastra metadata when present. */
|
|
3717
|
+
toolType: string | undefined; /** Original span attributes. */
|
|
3718
|
+
attributes: Record<string, unknown> | undefined; /** Original trace span for fields not normalized above. */
|
|
3719
|
+
span: EvalTraceSpan$1;
|
|
3720
|
+
};
|
|
3691
3721
|
/** Query helpers built from the flattened trace recorded for one eval case. */
|
|
3692
3722
|
type EvalTraceTree$1 = {
|
|
3693
3723
|
/** Flat span list in creation order. */spans: EvalTraceSpan$1[]; /** Top-level spans whose `parentId` is `null`. */
|
|
@@ -3696,10 +3726,16 @@ type EvalTraceTree$1 = {
|
|
|
3696
3726
|
findSpans: (name: string) => EvalTraceSpan$1[]; /** Return whether any span name exactly matches `name`. */
|
|
3697
3727
|
hasSpan: (name: string) => boolean; /** Return every span whose kind exactly matches `kind`. */
|
|
3698
3728
|
findSpansByKind: (kind: string) => EvalTraceSpan$1[]; /** Return every span with `kind: 'tool'` or `kind: 'tool_call'`. */
|
|
3699
|
-
findToolCallSpans: () => EvalTraceSpan$1[];
|
|
3700
|
-
|
|
3701
|
-
|
|
3702
|
-
|
|
3729
|
+
findToolCallSpans: () => EvalTraceSpan$1[];
|
|
3730
|
+
/**
|
|
3731
|
+
* Return tool-call names, preferring GenAI/Mastra tool identity attributes
|
|
3732
|
+
* when available.
|
|
3733
|
+
*/
|
|
3734
|
+
listToolCallSpanNames: () => string[]; /** Return whether a tool-call span name or tool identity matches `name`. */
|
|
3735
|
+
hasToolCallSpan: (name: string) => boolean; /** Return normalized tool-call spans whose name or tool identity matches `name`. */
|
|
3736
|
+
getToolCallSpans: (name: string) => EvalToolCallSpan$1[]; /** Return how many tool-call spans have a name or tool identity matching `toolName`. */
|
|
3737
|
+
getToolCallSpanCount: (toolName: string) => number; /** Return whether a tool-call span name or tool identity appears exactly `expectedCalls` times. */
|
|
3738
|
+
hasToolCallSpanCount: (toolName: string, expectedCalls: number) => boolean; /** Return span names in creation order, optionally filtered by kind. */
|
|
3703
3739
|
listSpanNames: (kind?: string) => string[]; /** Return span names in depth-first tree order, optionally filtered by kind. */
|
|
3704
3740
|
listSpanNamesDfs: (kind?: string) => string[]; /** Return all spans in depth-first tree order. */
|
|
3705
3741
|
flattenDfs: () => EvalTraceSpan$1[];
|
|
@@ -3772,8 +3808,8 @@ declare const llmCallMetricFormatSchema$1: z$1.ZodEnum<{
|
|
|
3772
3808
|
string: "string";
|
|
3773
3809
|
number: "number";
|
|
3774
3810
|
boolean: "boolean";
|
|
3775
|
-
json: "json";
|
|
3776
3811
|
duration: "duration";
|
|
3812
|
+
json: "json";
|
|
3777
3813
|
}>;
|
|
3778
3814
|
/** Render format applied to an LLM-call metric value. */
|
|
3779
3815
|
type LlmCallMetricFormat = z$1.infer<typeof llmCallMetricFormatSchema$1>;
|
|
@@ -3782,8 +3818,8 @@ declare const apiCallMetricFormatSchema$1: z$1.ZodEnum<{
|
|
|
3782
3818
|
string: "string";
|
|
3783
3819
|
number: "number";
|
|
3784
3820
|
boolean: "boolean";
|
|
3785
|
-
json: "json";
|
|
3786
3821
|
duration: "duration";
|
|
3822
|
+
json: "json";
|
|
3787
3823
|
}>;
|
|
3788
3824
|
/** Render format applied to an API-call metric value. */
|
|
3789
3825
|
type ApiCallMetricFormat = z$1.infer<typeof apiCallMetricFormatSchema$1>;
|
|
@@ -3852,8 +3888,8 @@ declare const llmCallMetricSchema: z$1.ZodObject<{
|
|
|
3852
3888
|
string: "string";
|
|
3853
3889
|
number: "number";
|
|
3854
3890
|
boolean: "boolean";
|
|
3855
|
-
json: "json";
|
|
3856
3891
|
duration: "duration";
|
|
3892
|
+
json: "json";
|
|
3857
3893
|
}>>;
|
|
3858
3894
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
3859
3895
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -3881,8 +3917,8 @@ declare const apiCallMetricSchema: z$1.ZodObject<{
|
|
|
3881
3917
|
string: "string";
|
|
3882
3918
|
number: "number";
|
|
3883
3919
|
boolean: "boolean";
|
|
3884
|
-
json: "json";
|
|
3885
3920
|
duration: "duration";
|
|
3921
|
+
json: "json";
|
|
3886
3922
|
}>>;
|
|
3887
3923
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
3888
3924
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -3995,8 +4031,8 @@ declare const llmCallsConfigSchema: z$1.ZodObject<{
|
|
|
3995
4031
|
string: "string";
|
|
3996
4032
|
number: "number";
|
|
3997
4033
|
boolean: "boolean";
|
|
3998
|
-
json: "json";
|
|
3999
4034
|
duration: "duration";
|
|
4035
|
+
json: "json";
|
|
4000
4036
|
}>>;
|
|
4001
4037
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
4002
4038
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -4031,8 +4067,8 @@ declare const apiCallsConfigSchema: z$1.ZodObject<{
|
|
|
4031
4067
|
string: "string";
|
|
4032
4068
|
number: "number";
|
|
4033
4069
|
boolean: "boolean";
|
|
4034
|
-
json: "json";
|
|
4035
4070
|
duration: "duration";
|
|
4071
|
+
json: "json";
|
|
4036
4072
|
}>>;
|
|
4037
4073
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
4038
4074
|
placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
|
|
@@ -4633,15 +4669,15 @@ declare const cacheRecordingOpSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
4633
4669
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4634
4670
|
number: "number";
|
|
4635
4671
|
boolean: "boolean";
|
|
4672
|
+
duration: "duration";
|
|
4673
|
+
json: "json";
|
|
4636
4674
|
file: "file";
|
|
4637
4675
|
markdown: "markdown";
|
|
4638
|
-
json: "json";
|
|
4639
4676
|
image: "image";
|
|
4640
4677
|
html: "html";
|
|
4641
4678
|
pdf: "pdf";
|
|
4642
4679
|
audio: "audio";
|
|
4643
4680
|
video: "video";
|
|
4644
|
-
duration: "duration";
|
|
4645
4681
|
percent: "percent";
|
|
4646
4682
|
passFail: "passFail";
|
|
4647
4683
|
stars: "stars";
|
|
@@ -4721,15 +4757,15 @@ declare const cacheRecordingSchema: z$1.ZodObject<{
|
|
|
4721
4757
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4722
4758
|
number: "number";
|
|
4723
4759
|
boolean: "boolean";
|
|
4760
|
+
duration: "duration";
|
|
4761
|
+
json: "json";
|
|
4724
4762
|
file: "file";
|
|
4725
4763
|
markdown: "markdown";
|
|
4726
|
-
json: "json";
|
|
4727
4764
|
image: "image";
|
|
4728
4765
|
html: "html";
|
|
4729
4766
|
pdf: "pdf";
|
|
4730
4767
|
audio: "audio";
|
|
4731
4768
|
video: "video";
|
|
4732
|
-
duration: "duration";
|
|
4733
4769
|
percent: "percent";
|
|
4734
4770
|
passFail: "passFail";
|
|
4735
4771
|
stars: "stars";
|
|
@@ -4822,15 +4858,15 @@ declare const cacheEntrySchema: z$1.ZodObject<{
|
|
|
4822
4858
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4823
4859
|
number: "number";
|
|
4824
4860
|
boolean: "boolean";
|
|
4861
|
+
duration: "duration";
|
|
4862
|
+
json: "json";
|
|
4825
4863
|
file: "file";
|
|
4826
4864
|
markdown: "markdown";
|
|
4827
|
-
json: "json";
|
|
4828
4865
|
image: "image";
|
|
4829
4866
|
html: "html";
|
|
4830
4867
|
pdf: "pdf";
|
|
4831
4868
|
audio: "audio";
|
|
4832
4869
|
video: "video";
|
|
4833
|
-
duration: "duration";
|
|
4834
4870
|
percent: "percent";
|
|
4835
4871
|
passFail: "passFail";
|
|
4836
4872
|
stars: "stars";
|
|
@@ -4940,15 +4976,15 @@ declare const cacheDebugKeyEntrySchema: z$1.ZodObject<{
|
|
|
4940
4976
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4941
4977
|
number: "number";
|
|
4942
4978
|
boolean: "boolean";
|
|
4979
|
+
duration: "duration";
|
|
4980
|
+
json: "json";
|
|
4943
4981
|
file: "file";
|
|
4944
4982
|
markdown: "markdown";
|
|
4945
|
-
json: "json";
|
|
4946
4983
|
image: "image";
|
|
4947
4984
|
html: "html";
|
|
4948
4985
|
pdf: "pdf";
|
|
4949
4986
|
audio: "audio";
|
|
4950
4987
|
video: "video";
|
|
4951
|
-
duration: "duration";
|
|
4952
4988
|
percent: "percent";
|
|
4953
4989
|
passFail: "passFail";
|
|
4954
4990
|
stars: "stars";
|
|
@@ -5047,15 +5083,15 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
|
|
|
5047
5083
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5048
5084
|
number: "number";
|
|
5049
5085
|
boolean: "boolean";
|
|
5086
|
+
duration: "duration";
|
|
5087
|
+
json: "json";
|
|
5050
5088
|
file: "file";
|
|
5051
5089
|
markdown: "markdown";
|
|
5052
|
-
json: "json";
|
|
5053
5090
|
image: "image";
|
|
5054
5091
|
html: "html";
|
|
5055
5092
|
pdf: "pdf";
|
|
5056
5093
|
audio: "audio";
|
|
5057
5094
|
video: "video";
|
|
5058
|
-
duration: "duration";
|
|
5059
5095
|
percent: "percent";
|
|
5060
5096
|
passFail: "passFail";
|
|
5061
5097
|
stars: "stars";
|
|
@@ -5156,15 +5192,15 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
|
|
|
5156
5192
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5157
5193
|
number: "number";
|
|
5158
5194
|
boolean: "boolean";
|
|
5195
|
+
duration: "duration";
|
|
5196
|
+
json: "json";
|
|
5159
5197
|
file: "file";
|
|
5160
5198
|
markdown: "markdown";
|
|
5161
|
-
json: "json";
|
|
5162
5199
|
image: "image";
|
|
5163
5200
|
html: "html";
|
|
5164
5201
|
pdf: "pdf";
|
|
5165
5202
|
audio: "audio";
|
|
5166
5203
|
video: "video";
|
|
5167
|
-
duration: "duration";
|
|
5168
5204
|
percent: "percent";
|
|
5169
5205
|
passFail: "passFail";
|
|
5170
5206
|
stars: "stars";
|
|
@@ -5263,15 +5299,15 @@ declare const cacheFileSchema: z$1.ZodObject<{
|
|
|
5263
5299
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5264
5300
|
number: "number";
|
|
5265
5301
|
boolean: "boolean";
|
|
5302
|
+
duration: "duration";
|
|
5303
|
+
json: "json";
|
|
5266
5304
|
file: "file";
|
|
5267
5305
|
markdown: "markdown";
|
|
5268
|
-
json: "json";
|
|
5269
5306
|
image: "image";
|
|
5270
5307
|
html: "html";
|
|
5271
5308
|
pdf: "pdf";
|
|
5272
5309
|
audio: "audio";
|
|
5273
5310
|
video: "video";
|
|
5274
|
-
duration: "duration";
|
|
5275
5311
|
percent: "percent";
|
|
5276
5312
|
passFail: "passFail";
|
|
5277
5313
|
stars: "stars";
|
|
@@ -5380,15 +5416,15 @@ declare const cacheDebugKeyFileSchema: z$1.ZodObject<{
|
|
|
5380
5416
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5381
5417
|
number: "number";
|
|
5382
5418
|
boolean: "boolean";
|
|
5419
|
+
duration: "duration";
|
|
5420
|
+
json: "json";
|
|
5383
5421
|
file: "file";
|
|
5384
5422
|
markdown: "markdown";
|
|
5385
|
-
json: "json";
|
|
5386
5423
|
image: "image";
|
|
5387
5424
|
html: "html";
|
|
5388
5425
|
pdf: "pdf";
|
|
5389
5426
|
audio: "audio";
|
|
5390
5427
|
video: "video";
|
|
5391
|
-
duration: "duration";
|
|
5392
5428
|
percent: "percent";
|
|
5393
5429
|
passFail: "passFail";
|
|
5394
5430
|
stars: "stars";
|
|
@@ -7294,4 +7330,4 @@ declare function defineEval<TInput = unknown, TOutputs extends EvalOutputs = Eva
|
|
|
7294
7330
|
/** Return whether the active eval case has tags matching the typed input. */
|
|
7295
7331
|
declare function matchesEvalTags(input: EvalTagMatchInput): boolean;
|
|
7296
7332
|
//#endregion
|
|
7297
|
-
export { AgentEvalTagRegistry, AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheRepairSummary, type CacheScopeContext, type CacheSerializationOptions, type CacheStatus, type CallDerivedAttribute, type CallDerivedAttributeContext, type CallDerivedAttributesConfig, type CallDerivedAttributesFn, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type ConfigReloadState, type ConfigReloadStatus, type CreateRunRequest, type DefaultConfigKey, type DerivedStatus, type DiscoveryIssue, EvalAssertionError, type EvalCacheConfig, EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, EvalDefinition, type EvalDeriveConfig, type EvalDeriveContext, type EvalDeriveFn, type EvalDeriveMap, type EvalDeriveValueFn, type EvalDisplayStatus, type EvalExecuteContext, type EvalExpectation, type EvalFreshnessStatus, type EvalManualInputConfig, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, EvalRuntimeUsageError, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, EvalTag, EvalTagMatchInput, type EvalTraceTree, type EvalTracingAssertionsConfig, type EvalTracingAssertionsFn, type JsonCell, type LlmCallCostBreakdown, type LlmCallCostCurrency, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallPricingRate, type LlmCallPricingRegistry, type LlmCallSimulatedTokens, type LlmCallsConfigInput, type LlmCostScenario, type ManualInputDescriptor, type ManualInputFieldDescriptor, type ManualInputFieldKind, type ManualInputFieldOverride, type ManualInputFieldsConfig, type ManualInputFileValue, type ManualInputSelectOption, type MaterializeManualInputFilesResult, type NumberDisplayOptions, type ReadManualInputFileResult, type RemoveDefaultConfig, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedCallDerivedAttribute, type ResolvedLlmCallCostCurrency, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
|
|
7333
|
+
export { AgentEvalTagRegistry, AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheRepairSummary, type CacheScopeContext, type CacheSerializationOptions, type CacheStatus, type CallDerivedAttribute, type CallDerivedAttributeContext, type CallDerivedAttributesConfig, type CallDerivedAttributesFn, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type ConfigReloadState, type ConfigReloadStatus, type CreateRunRequest, type DefaultConfigKey, type DerivedStatus, type DiscoveryIssue, EvalAssertionError, type EvalCacheConfig, EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, EvalDefinition, type EvalDeriveConfig, type EvalDeriveContext, type EvalDeriveFn, type EvalDeriveMap, type EvalDeriveValueFn, type EvalDisplayStatus, type EvalExecuteContext, type EvalExpectation, type EvalFreshnessStatus, type EvalManualInputConfig, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, EvalRuntimeUsageError, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, EvalTag, EvalTagMatchInput, type EvalToolCallSpan, type EvalTraceTree, type EvalTracingAssertionsConfig, type EvalTracingAssertionsFn, type JsonCell, type LlmCallCostBreakdown, type LlmCallCostCurrency, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallPricingRate, type LlmCallPricingRegistry, type LlmCallSimulatedTokens, type LlmCallsConfigInput, type LlmCostScenario, type ManualInputDescriptor, type ManualInputFieldDescriptor, type ManualInputFieldKind, type ManualInputFieldOverride, type ManualInputFieldsConfig, type ManualInputFileValue, type ManualInputSelectOption, type MaterializeManualInputFilesResult, type NumberDisplayOptions, type ReadManualInputFileResult, type RemoveDefaultConfig, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedCallDerivedAttribute, type ResolvedLlmCallCostCurrency, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
|
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { $ as setEvalOutput, A as serializeCacheValue, B as evalLog, C as evalSpan, D as deserializeCacheRecording, E as hashCacheKeySync, F as EvalAssertionError, G as isInEvalScope, H as getCurrentScope, I as EvalRuntimeUsageError, It as getEvalRegistry, J as nextEvalId, L as appendToEvalOutput, M as manualInputFileValueSchema, N as readManualInputFile, O as deserializeCacheValue, P as evalExpect, S as captureEvalSpanError, T as hashCacheKey, U as getEvalCaseInput, V as evalTime, W as incrementEvalOutput, X as runInEvalScope, Y as runInEvalRuntimeScope, Z as runInExistingEvalScope, at as extractCacheHits, b as z, ct as simulateLlmCallCost, dt as getNestedAttribute, et as setScopeCacheContext, it as extractCacheEntries, j as repoFile, k as serializeCacheRecording, lt as simulateTokenAllocation, ot as extractApiCalls, q as mergeEvalOutput, st as extractLlmCalls, tt as startEvalBackgroundJob, w as evalTracer, x as buildTraceTree, z as evalAssert } from "./runExecution-
|
|
2
|
-
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-
|
|
3
|
-
import { n as matchesEvalTags, t as defineEval } from "./src-
|
|
1
|
+
import { $ as setEvalOutput, A as serializeCacheValue, B as evalLog, C as evalSpan, D as deserializeCacheRecording, E as hashCacheKeySync, F as EvalAssertionError, G as isInEvalScope, H as getCurrentScope, I as EvalRuntimeUsageError, It as getEvalRegistry, J as nextEvalId, L as appendToEvalOutput, M as manualInputFileValueSchema, N as readManualInputFile, O as deserializeCacheValue, P as evalExpect, S as captureEvalSpanError, T as hashCacheKey, U as getEvalCaseInput, V as evalTime, W as incrementEvalOutput, X as runInEvalScope, Y as runInEvalRuntimeScope, Z as runInExistingEvalScope, at as extractCacheHits, b as z, ct as simulateLlmCallCost, dt as getNestedAttribute, et as setScopeCacheContext, it as extractCacheEntries, j as repoFile, k as serializeCacheRecording, lt as simulateTokenAllocation, ot as extractApiCalls, q as mergeEvalOutput, st as extractLlmCalls, tt as startEvalBackgroundJob, w as evalTracer, x as buildTraceTree, z as evalAssert } from "./runExecution-C3XVZHRC.mjs";
|
|
2
|
+
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-Dkp2-rBm.mjs";
|
|
3
|
+
import { n as matchesEvalTags, t as defineEval } from "./src-8dGXUULC.mjs";
|
|
4
4
|
export { EvalAssertionError, EvalRuntimeUsageError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
|
package/dist/runChild.mjs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { At as evalStatAggregateSchema, Et as buildEvalKey, Mt as manualInputDescriptorSchema, Nt as evalChartsConfigSchema, Pt as columnDefSchema, R as configureEvalRunLogs, St as runSummarySchema, jt as evalStatsConfigSchema, l as registerAgentEvalsPackageResolutionHooks, nt as createRunRequestSchema, p as loadConfig, v as createFsCacheStore, xt as runManifestSchema, y as getCacheRetentionOptions } from "./runExecution-
|
|
2
|
-
import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-
|
|
1
|
+
import { At as evalStatAggregateSchema, Et as buildEvalKey, Mt as manualInputDescriptorSchema, Nt as evalChartsConfigSchema, Pt as columnDefSchema, R as configureEvalRunLogs, St as runSummarySchema, jt as evalStatsConfigSchema, l as registerAgentEvalsPackageResolutionHooks, nt as createRunRequestSchema, p as loadConfig, v as createFsCacheStore, xt as runManifestSchema, y as getCacheRetentionOptions } from "./runExecution-C3XVZHRC.mjs";
|
|
2
|
+
import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-B5An-AEi.mjs";
|
|
3
3
|
import { z } from "zod/v4";
|
|
4
4
|
import { readFile } from "node:fs/promises";
|
|
5
5
|
import { relative } from "node:path";
|