@ls-stack/agent-eval 0.58.5 → 0.59.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -398,6 +398,18 @@ type EvalCase$1$1<TInput = unknown> = {
398
398
  input: TInput;
399
399
  tags?: string[];
400
400
  };
401
+ /** Normalized view of one tool-call span and its common tool metadata. */
402
+ type EvalToolCallSpan = {
403
+ /** Preferred tool name, using GenAI/Mastra identity metadata when present. */name: string; /** Original trace span display name. */
404
+ spanName: string; /** Original trace span kind. */
405
+ kind: string; /** Parsed tool-call arguments, or the raw value when parsing is not possible. */
406
+ arguments: unknown; /** Parsed tool-call result, or the raw value when parsing is not possible. */
407
+ result: unknown; /** Tool description from GenAI/Mastra metadata when present. */
408
+ description: string | undefined; /** Tool type from GenAI/Mastra metadata when present. */
409
+ toolType: string | undefined; /** Original span attributes. */
410
+ attributes: Record<string, unknown> | undefined; /** Original trace span for fields not normalized above. */
411
+ span: EvalTraceSpan$2;
412
+ };
401
413
  /** Query helpers built from the flattened trace recorded for one eval case. */
402
414
  type EvalTraceTree = {
403
415
  /** Flat span list in creation order. */spans: EvalTraceSpan$2[]; /** Top-level spans whose `parentId` is `null`. */
@@ -406,10 +418,16 @@ type EvalTraceTree = {
406
418
  findSpans: (name: string) => EvalTraceSpan$2[]; /** Return whether any span name exactly matches `name`. */
407
419
  hasSpan: (name: string) => boolean; /** Return every span whose kind exactly matches `kind`. */
408
420
  findSpansByKind: (kind: string) => EvalTraceSpan$2[]; /** Return every span with `kind: 'tool'` or `kind: 'tool_call'`. */
409
- findToolCallSpans: () => EvalTraceSpan$2[]; /** Return the names of every span with `kind: 'tool'` or `kind: 'tool_call'`. */
410
- listToolCallSpanNames: () => string[]; /** Return whether a tool-call span has a name exactly matching `name`. */
411
- hasToolCallSpan: (name: string) => boolean; /** Return whether a tool-call span name appears exactly `expectedCalls` times. */
412
- hasNToolCallSpans: (toolName: string, expectedCalls: number) => boolean; /** Return span names in creation order, optionally filtered by kind. */
421
+ findToolCallSpans: () => EvalTraceSpan$2[];
422
+ /**
423
+ * Return tool-call names, preferring GenAI/Mastra tool identity attributes
424
+ * when available.
425
+ */
426
+ listToolCallSpanNames: () => string[]; /** Return whether a tool-call span name or tool identity matches `name`. */
427
+ hasToolCallSpan: (name: string) => boolean; /** Return normalized tool-call spans whose name or tool identity matches `name`. */
428
+ getToolCallSpans: (name: string) => EvalToolCallSpan[]; /** Return how many tool-call spans have a name or tool identity matching `toolName`. */
429
+ getToolCallSpanCount: (toolName: string) => number; /** Return whether a tool-call span name or tool identity appears exactly `expectedCalls` times. */
430
+ hasToolCallSpanCount: (toolName: string, expectedCalls: number) => boolean; /** Return span names in creation order, optionally filtered by kind. */
413
431
  listSpanNames: (kind?: string) => string[]; /** Return span names in depth-first tree order, optionally filtered by kind. */
414
432
  listSpanNamesDfs: (kind?: string) => string[]; /** Return all spans in depth-first tree order. */
415
433
  flattenDfs: () => EvalTraceSpan$2[];
@@ -1923,15 +1941,15 @@ type ColumnKind = z$1.infer<typeof columnKindSchema>;
1923
1941
  declare const columnFormatSchema: z$1.ZodEnum<{
1924
1942
  number: "number";
1925
1943
  boolean: "boolean";
1944
+ duration: "duration";
1945
+ json: "json";
1926
1946
  file: "file";
1927
1947
  markdown: "markdown";
1928
- json: "json";
1929
1948
  image: "image";
1930
1949
  html: "html";
1931
1950
  pdf: "pdf";
1932
1951
  audio: "audio";
1933
1952
  video: "video";
1934
- duration: "duration";
1935
1953
  percent: "percent";
1936
1954
  passFail: "passFail";
1937
1955
  stars: "stars";
@@ -1950,15 +1968,15 @@ declare const columnDefSchema: z$1.ZodObject<{
1950
1968
  format: z$1.ZodOptional<z$1.ZodEnum<{
1951
1969
  number: "number";
1952
1970
  boolean: "boolean";
1971
+ duration: "duration";
1972
+ json: "json";
1953
1973
  file: "file";
1954
1974
  markdown: "markdown";
1955
- json: "json";
1956
1975
  image: "image";
1957
1976
  html: "html";
1958
1977
  pdf: "pdf";
1959
1978
  audio: "audio";
1960
1979
  video: "video";
1961
- duration: "duration";
1962
1980
  percent: "percent";
1963
1981
  passFail: "passFail";
1964
1982
  stars: "stars";
@@ -2004,8 +2022,8 @@ type CellValue = z$1.infer<typeof cellValueSchema>; //#endregion
2004
2022
  declare const traceAttributeDisplayFormatSchema: z$1.ZodEnum<{
2005
2023
  string: "string";
2006
2024
  number: "number";
2007
- json: "json";
2008
2025
  duration: "duration";
2026
+ json: "json";
2009
2027
  }>;
2010
2028
  /**
2011
2029
  * Formatting hint for trace attribute values rendered by the UI.
@@ -2029,8 +2047,8 @@ declare const traceAttributeDisplaySchema: z$1.ZodObject<{
2029
2047
  format: z$1.ZodOptional<z$1.ZodEnum<{
2030
2048
  string: "string";
2031
2049
  number: "number";
2032
- json: "json";
2033
2050
  duration: "duration";
2051
+ json: "json";
2034
2052
  }>>;
2035
2053
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
2036
2054
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -2065,8 +2083,8 @@ declare const traceDisplayConfigSchema: z$1.ZodObject<{
2065
2083
  format: z$1.ZodOptional<z$1.ZodEnum<{
2066
2084
  string: "string";
2067
2085
  number: "number";
2068
- json: "json";
2069
2086
  duration: "duration";
2087
+ json: "json";
2070
2088
  }>>;
2071
2089
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
2072
2090
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -2105,8 +2123,8 @@ declare const traceAttributeDisplayInputSchema: z$1.ZodObject<{
2105
2123
  format: z$1.ZodOptional<z$1.ZodEnum<{
2106
2124
  string: "string";
2107
2125
  number: "number";
2108
- json: "json";
2109
2126
  duration: "duration";
2127
+ json: "json";
2110
2128
  }>>;
2111
2129
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
2112
2130
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -2143,8 +2161,8 @@ declare const traceDisplayInputConfigSchema: z$1.ZodObject<{
2143
2161
  format: z$1.ZodOptional<z$1.ZodEnum<{
2144
2162
  string: "string";
2145
2163
  number: "number";
2146
- json: "json";
2147
2164
  duration: "duration";
2165
+ json: "json";
2148
2166
  }>>;
2149
2167
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
2150
2168
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -2308,15 +2326,15 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2308
2326
  format: z$1.ZodOptional<z$1.ZodEnum<{
2309
2327
  number: "number";
2310
2328
  boolean: "boolean";
2329
+ duration: "duration";
2330
+ json: "json";
2311
2331
  file: "file";
2312
2332
  markdown: "markdown";
2313
- json: "json";
2314
2333
  image: "image";
2315
2334
  html: "html";
2316
2335
  pdf: "pdf";
2317
2336
  audio: "audio";
2318
2337
  video: "video";
2319
- duration: "duration";
2320
2338
  percent: "percent";
2321
2339
  passFail: "passFail";
2322
2340
  stars: "stars";
@@ -2372,15 +2390,15 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
2372
2390
  format: z$1.ZodOptional<z$1.ZodEnum<{
2373
2391
  number: "number";
2374
2392
  boolean: "boolean";
2393
+ duration: "duration";
2394
+ json: "json";
2375
2395
  file: "file";
2376
2396
  markdown: "markdown";
2377
- json: "json";
2378
2397
  image: "image";
2379
2398
  html: "html";
2380
2399
  pdf: "pdf";
2381
2400
  audio: "audio";
2382
2401
  video: "video";
2383
- duration: "duration";
2384
2402
  percent: "percent";
2385
2403
  passFail: "passFail";
2386
2404
  stars: "stars";
@@ -2418,15 +2436,15 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2418
2436
  format: z$1.ZodOptional<z$1.ZodEnum<{
2419
2437
  number: "number";
2420
2438
  boolean: "boolean";
2439
+ duration: "duration";
2440
+ json: "json";
2421
2441
  file: "file";
2422
2442
  markdown: "markdown";
2423
- json: "json";
2424
2443
  image: "image";
2425
2444
  html: "html";
2426
2445
  pdf: "pdf";
2427
2446
  audio: "audio";
2428
2447
  video: "video";
2429
- duration: "duration";
2430
2448
  percent: "percent";
2431
2449
  passFail: "passFail";
2432
2450
  stars: "stars";
@@ -2499,15 +2517,15 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2499
2517
  format: z$1.ZodOptional<z$1.ZodEnum<{
2500
2518
  number: "number";
2501
2519
  boolean: "boolean";
2520
+ duration: "duration";
2521
+ json: "json";
2502
2522
  file: "file";
2503
2523
  markdown: "markdown";
2504
- json: "json";
2505
2524
  image: "image";
2506
2525
  html: "html";
2507
2526
  pdf: "pdf";
2508
2527
  audio: "audio";
2509
2528
  video: "video";
2510
- duration: "duration";
2511
2529
  percent: "percent";
2512
2530
  passFail: "passFail";
2513
2531
  stars: "stars";
@@ -2540,8 +2558,8 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2540
2558
  }>;
2541
2559
  label: z$1.ZodOptional<z$1.ZodString>;
2542
2560
  color: z$1.ZodOptional<z$1.ZodEnum<{
2543
- success: "success";
2544
2561
  error: "error";
2562
+ success: "success";
2545
2563
  warning: "warning";
2546
2564
  accent: "accent";
2547
2565
  accentDim: "accentDim";
@@ -2564,8 +2582,8 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2564
2582
  }>;
2565
2583
  label: z$1.ZodOptional<z$1.ZodString>;
2566
2584
  color: z$1.ZodOptional<z$1.ZodEnum<{
2567
- success: "success";
2568
2585
  error: "error";
2586
+ success: "success";
2569
2587
  warning: "warning";
2570
2588
  accent: "accent";
2571
2589
  accentDim: "accentDim";
@@ -2730,15 +2748,15 @@ declare const caseRowSchema$1: z$1.ZodObject<{
2730
2748
  format: z$1.ZodOptional<z$1.ZodEnum<{
2731
2749
  number: "number";
2732
2750
  boolean: "boolean";
2751
+ duration: "duration";
2752
+ json: "json";
2733
2753
  file: "file";
2734
2754
  markdown: "markdown";
2735
- json: "json";
2736
2755
  image: "image";
2737
2756
  html: "html";
2738
2757
  pdf: "pdf";
2739
2758
  audio: "audio";
2740
2759
  video: "video";
2741
- duration: "duration";
2742
2760
  percent: "percent";
2743
2761
  passFail: "passFail";
2744
2762
  stars: "stars";
@@ -2876,8 +2894,8 @@ declare const scoreTraceSchema: z$1.ZodObject<{
2876
2894
  format: z$1.ZodOptional<z$1.ZodEnum<{
2877
2895
  string: "string";
2878
2896
  number: "number";
2879
- json: "json";
2880
2897
  duration: "duration";
2898
+ json: "json";
2881
2899
  }>>;
2882
2900
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
2883
2901
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -2979,8 +2997,8 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2979
2997
  format: z$1.ZodOptional<z$1.ZodEnum<{
2980
2998
  string: "string";
2981
2999
  number: "number";
2982
- json: "json";
2983
3000
  duration: "duration";
3001
+ json: "json";
2984
3002
  }>>;
2985
3003
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
2986
3004
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -3048,8 +3066,8 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
3048
3066
  format: z$1.ZodOptional<z$1.ZodEnum<{
3049
3067
  string: "string";
3050
3068
  number: "number";
3051
- json: "json";
3052
3069
  duration: "duration";
3070
+ json: "json";
3053
3071
  }>>;
3054
3072
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
3055
3073
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -3108,15 +3126,15 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
3108
3126
  format: z$1.ZodOptional<z$1.ZodEnum<{
3109
3127
  number: "number";
3110
3128
  boolean: "boolean";
3129
+ duration: "duration";
3130
+ json: "json";
3111
3131
  file: "file";
3112
3132
  markdown: "markdown";
3113
- json: "json";
3114
3133
  image: "image";
3115
3134
  html: "html";
3116
3135
  pdf: "pdf";
3117
3136
  audio: "audio";
3118
3137
  video: "video";
3119
- duration: "duration";
3120
3138
  percent: "percent";
3121
3139
  passFail: "passFail";
3122
3140
  stars: "stars";
@@ -3265,8 +3283,8 @@ type EvalChartAggregate = z$1.infer<typeof evalChartAggregateSchema>;
3265
3283
  * not emit raw hex so authored evals stay decoupled from the web theme.
3266
3284
  */
3267
3285
  declare const evalChartColorSchema: z$1.ZodEnum<{
3268
- success: "success";
3269
3286
  error: "error";
3287
+ success: "success";
3270
3288
  warning: "warning";
3271
3289
  accent: "accent";
3272
3290
  accentDim: "accentDim";
@@ -3294,8 +3312,8 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
3294
3312
  }>;
3295
3313
  label: z$1.ZodOptional<z$1.ZodString>;
3296
3314
  color: z$1.ZodOptional<z$1.ZodEnum<{
3297
- success: "success";
3298
3315
  error: "error";
3316
+ success: "success";
3299
3317
  warning: "warning";
3300
3318
  accent: "accent";
3301
3319
  accentDim: "accentDim";
@@ -3318,8 +3336,8 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
3318
3336
  }>;
3319
3337
  label: z$1.ZodOptional<z$1.ZodString>;
3320
3338
  color: z$1.ZodOptional<z$1.ZodEnum<{
3321
- success: "success";
3322
3339
  error: "error";
3340
+ success: "success";
3323
3341
  warning: "warning";
3324
3342
  accent: "accent";
3325
3343
  accentDim: "accentDim";
@@ -3377,8 +3395,8 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
3377
3395
  }>;
3378
3396
  label: z$1.ZodOptional<z$1.ZodString>;
3379
3397
  color: z$1.ZodOptional<z$1.ZodEnum<{
3380
- success: "success";
3381
3398
  error: "error";
3399
+ success: "success";
3382
3400
  warning: "warning";
3383
3401
  accent: "accent";
3384
3402
  accentDim: "accentDim";
@@ -3401,8 +3419,8 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
3401
3419
  }>;
3402
3420
  label: z$1.ZodOptional<z$1.ZodString>;
3403
3421
  color: z$1.ZodOptional<z$1.ZodEnum<{
3404
- success: "success";
3405
3422
  error: "error";
3423
+ success: "success";
3406
3424
  warning: "warning";
3407
3425
  accent: "accent";
3408
3426
  accentDim: "accentDim";
@@ -3467,8 +3485,8 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
3467
3485
  }>;
3468
3486
  label: z$1.ZodOptional<z$1.ZodString>;
3469
3487
  color: z$1.ZodOptional<z$1.ZodEnum<{
3470
- success: "success";
3471
3488
  error: "error";
3489
+ success: "success";
3472
3490
  warning: "warning";
3473
3491
  accent: "accent";
3474
3492
  accentDim: "accentDim";
@@ -3491,8 +3509,8 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
3491
3509
  }>;
3492
3510
  label: z$1.ZodOptional<z$1.ZodString>;
3493
3511
  color: z$1.ZodOptional<z$1.ZodEnum<{
3494
- success: "success";
3495
3512
  error: "error";
3513
+ success: "success";
3496
3514
  warning: "warning";
3497
3515
  accent: "accent";
3498
3516
  accentDim: "accentDim";
@@ -3688,6 +3706,18 @@ type EvalCase$1<TInput = unknown> = {
3688
3706
  input: TInput;
3689
3707
  tags?: string[];
3690
3708
  };
3709
+ /** Normalized view of one tool-call span and its common tool metadata. */
3710
+ type EvalToolCallSpan$1 = {
3711
+ /** Preferred tool name, using GenAI/Mastra identity metadata when present. */name: string; /** Original trace span display name. */
3712
+ spanName: string; /** Original trace span kind. */
3713
+ kind: string; /** Parsed tool-call arguments, or the raw value when parsing is not possible. */
3714
+ arguments: unknown; /** Parsed tool-call result, or the raw value when parsing is not possible. */
3715
+ result: unknown; /** Tool description from GenAI/Mastra metadata when present. */
3716
+ description: string | undefined; /** Tool type from GenAI/Mastra metadata when present. */
3717
+ toolType: string | undefined; /** Original span attributes. */
3718
+ attributes: Record<string, unknown> | undefined; /** Original trace span for fields not normalized above. */
3719
+ span: EvalTraceSpan$1;
3720
+ };
3691
3721
  /** Query helpers built from the flattened trace recorded for one eval case. */
3692
3722
  type EvalTraceTree$1 = {
3693
3723
  /** Flat span list in creation order. */spans: EvalTraceSpan$1[]; /** Top-level spans whose `parentId` is `null`. */
@@ -3696,10 +3726,16 @@ type EvalTraceTree$1 = {
3696
3726
  findSpans: (name: string) => EvalTraceSpan$1[]; /** Return whether any span name exactly matches `name`. */
3697
3727
  hasSpan: (name: string) => boolean; /** Return every span whose kind exactly matches `kind`. */
3698
3728
  findSpansByKind: (kind: string) => EvalTraceSpan$1[]; /** Return every span with `kind: 'tool'` or `kind: 'tool_call'`. */
3699
- findToolCallSpans: () => EvalTraceSpan$1[]; /** Return the names of every span with `kind: 'tool'` or `kind: 'tool_call'`. */
3700
- listToolCallSpanNames: () => string[]; /** Return whether a tool-call span has a name exactly matching `name`. */
3701
- hasToolCallSpan: (name: string) => boolean; /** Return whether a tool-call span name appears exactly `expectedCalls` times. */
3702
- hasNToolCallSpans: (toolName: string, expectedCalls: number) => boolean; /** Return span names in creation order, optionally filtered by kind. */
3729
+ findToolCallSpans: () => EvalTraceSpan$1[];
3730
+ /**
3731
+ * Return tool-call names, preferring GenAI/Mastra tool identity attributes
3732
+ * when available.
3733
+ */
3734
+ listToolCallSpanNames: () => string[]; /** Return whether a tool-call span name or tool identity matches `name`. */
3735
+ hasToolCallSpan: (name: string) => boolean; /** Return normalized tool-call spans whose name or tool identity matches `name`. */
3736
+ getToolCallSpans: (name: string) => EvalToolCallSpan$1[]; /** Return how many tool-call spans have a name or tool identity matching `toolName`. */
3737
+ getToolCallSpanCount: (toolName: string) => number; /** Return whether a tool-call span name or tool identity appears exactly `expectedCalls` times. */
3738
+ hasToolCallSpanCount: (toolName: string, expectedCalls: number) => boolean; /** Return span names in creation order, optionally filtered by kind. */
3703
3739
  listSpanNames: (kind?: string) => string[]; /** Return span names in depth-first tree order, optionally filtered by kind. */
3704
3740
  listSpanNamesDfs: (kind?: string) => string[]; /** Return all spans in depth-first tree order. */
3705
3741
  flattenDfs: () => EvalTraceSpan$1[];
@@ -3772,8 +3808,8 @@ declare const llmCallMetricFormatSchema$1: z$1.ZodEnum<{
3772
3808
  string: "string";
3773
3809
  number: "number";
3774
3810
  boolean: "boolean";
3775
- json: "json";
3776
3811
  duration: "duration";
3812
+ json: "json";
3777
3813
  }>;
3778
3814
  /** Render format applied to an LLM-call metric value. */
3779
3815
  type LlmCallMetricFormat = z$1.infer<typeof llmCallMetricFormatSchema$1>;
@@ -3782,8 +3818,8 @@ declare const apiCallMetricFormatSchema$1: z$1.ZodEnum<{
3782
3818
  string: "string";
3783
3819
  number: "number";
3784
3820
  boolean: "boolean";
3785
- json: "json";
3786
3821
  duration: "duration";
3822
+ json: "json";
3787
3823
  }>;
3788
3824
  /** Render format applied to an API-call metric value. */
3789
3825
  type ApiCallMetricFormat = z$1.infer<typeof apiCallMetricFormatSchema$1>;
@@ -3852,8 +3888,8 @@ declare const llmCallMetricSchema: z$1.ZodObject<{
3852
3888
  string: "string";
3853
3889
  number: "number";
3854
3890
  boolean: "boolean";
3855
- json: "json";
3856
3891
  duration: "duration";
3892
+ json: "json";
3857
3893
  }>>;
3858
3894
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
3859
3895
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -3881,8 +3917,8 @@ declare const apiCallMetricSchema: z$1.ZodObject<{
3881
3917
  string: "string";
3882
3918
  number: "number";
3883
3919
  boolean: "boolean";
3884
- json: "json";
3885
3920
  duration: "duration";
3921
+ json: "json";
3886
3922
  }>>;
3887
3923
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
3888
3924
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -3995,8 +4031,8 @@ declare const llmCallsConfigSchema: z$1.ZodObject<{
3995
4031
  string: "string";
3996
4032
  number: "number";
3997
4033
  boolean: "boolean";
3998
- json: "json";
3999
4034
  duration: "duration";
4035
+ json: "json";
4000
4036
  }>>;
4001
4037
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
4002
4038
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -4031,8 +4067,8 @@ declare const apiCallsConfigSchema: z$1.ZodObject<{
4031
4067
  string: "string";
4032
4068
  number: "number";
4033
4069
  boolean: "boolean";
4034
- json: "json";
4035
4070
  duration: "duration";
4071
+ json: "json";
4036
4072
  }>>;
4037
4073
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
4038
4074
  placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
@@ -4633,15 +4669,15 @@ declare const cacheRecordingOpSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
4633
4669
  format: z$1.ZodOptional<z$1.ZodEnum<{
4634
4670
  number: "number";
4635
4671
  boolean: "boolean";
4672
+ duration: "duration";
4673
+ json: "json";
4636
4674
  file: "file";
4637
4675
  markdown: "markdown";
4638
- json: "json";
4639
4676
  image: "image";
4640
4677
  html: "html";
4641
4678
  pdf: "pdf";
4642
4679
  audio: "audio";
4643
4680
  video: "video";
4644
- duration: "duration";
4645
4681
  percent: "percent";
4646
4682
  passFail: "passFail";
4647
4683
  stars: "stars";
@@ -4721,15 +4757,15 @@ declare const cacheRecordingSchema: z$1.ZodObject<{
4721
4757
  format: z$1.ZodOptional<z$1.ZodEnum<{
4722
4758
  number: "number";
4723
4759
  boolean: "boolean";
4760
+ duration: "duration";
4761
+ json: "json";
4724
4762
  file: "file";
4725
4763
  markdown: "markdown";
4726
- json: "json";
4727
4764
  image: "image";
4728
4765
  html: "html";
4729
4766
  pdf: "pdf";
4730
4767
  audio: "audio";
4731
4768
  video: "video";
4732
- duration: "duration";
4733
4769
  percent: "percent";
4734
4770
  passFail: "passFail";
4735
4771
  stars: "stars";
@@ -4822,15 +4858,15 @@ declare const cacheEntrySchema: z$1.ZodObject<{
4822
4858
  format: z$1.ZodOptional<z$1.ZodEnum<{
4823
4859
  number: "number";
4824
4860
  boolean: "boolean";
4861
+ duration: "duration";
4862
+ json: "json";
4825
4863
  file: "file";
4826
4864
  markdown: "markdown";
4827
- json: "json";
4828
4865
  image: "image";
4829
4866
  html: "html";
4830
4867
  pdf: "pdf";
4831
4868
  audio: "audio";
4832
4869
  video: "video";
4833
- duration: "duration";
4834
4870
  percent: "percent";
4835
4871
  passFail: "passFail";
4836
4872
  stars: "stars";
@@ -4940,15 +4976,15 @@ declare const cacheDebugKeyEntrySchema: z$1.ZodObject<{
4940
4976
  format: z$1.ZodOptional<z$1.ZodEnum<{
4941
4977
  number: "number";
4942
4978
  boolean: "boolean";
4979
+ duration: "duration";
4980
+ json: "json";
4943
4981
  file: "file";
4944
4982
  markdown: "markdown";
4945
- json: "json";
4946
4983
  image: "image";
4947
4984
  html: "html";
4948
4985
  pdf: "pdf";
4949
4986
  audio: "audio";
4950
4987
  video: "video";
4951
- duration: "duration";
4952
4988
  percent: "percent";
4953
4989
  passFail: "passFail";
4954
4990
  stars: "stars";
@@ -5047,15 +5083,15 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
5047
5083
  format: z$1.ZodOptional<z$1.ZodEnum<{
5048
5084
  number: "number";
5049
5085
  boolean: "boolean";
5086
+ duration: "duration";
5087
+ json: "json";
5050
5088
  file: "file";
5051
5089
  markdown: "markdown";
5052
- json: "json";
5053
5090
  image: "image";
5054
5091
  html: "html";
5055
5092
  pdf: "pdf";
5056
5093
  audio: "audio";
5057
5094
  video: "video";
5058
- duration: "duration";
5059
5095
  percent: "percent";
5060
5096
  passFail: "passFail";
5061
5097
  stars: "stars";
@@ -5156,15 +5192,15 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
5156
5192
  format: z$1.ZodOptional<z$1.ZodEnum<{
5157
5193
  number: "number";
5158
5194
  boolean: "boolean";
5195
+ duration: "duration";
5196
+ json: "json";
5159
5197
  file: "file";
5160
5198
  markdown: "markdown";
5161
- json: "json";
5162
5199
  image: "image";
5163
5200
  html: "html";
5164
5201
  pdf: "pdf";
5165
5202
  audio: "audio";
5166
5203
  video: "video";
5167
- duration: "duration";
5168
5204
  percent: "percent";
5169
5205
  passFail: "passFail";
5170
5206
  stars: "stars";
@@ -5263,15 +5299,15 @@ declare const cacheFileSchema: z$1.ZodObject<{
5263
5299
  format: z$1.ZodOptional<z$1.ZodEnum<{
5264
5300
  number: "number";
5265
5301
  boolean: "boolean";
5302
+ duration: "duration";
5303
+ json: "json";
5266
5304
  file: "file";
5267
5305
  markdown: "markdown";
5268
- json: "json";
5269
5306
  image: "image";
5270
5307
  html: "html";
5271
5308
  pdf: "pdf";
5272
5309
  audio: "audio";
5273
5310
  video: "video";
5274
- duration: "duration";
5275
5311
  percent: "percent";
5276
5312
  passFail: "passFail";
5277
5313
  stars: "stars";
@@ -5380,15 +5416,15 @@ declare const cacheDebugKeyFileSchema: z$1.ZodObject<{
5380
5416
  format: z$1.ZodOptional<z$1.ZodEnum<{
5381
5417
  number: "number";
5382
5418
  boolean: "boolean";
5419
+ duration: "duration";
5420
+ json: "json";
5383
5421
  file: "file";
5384
5422
  markdown: "markdown";
5385
- json: "json";
5386
5423
  image: "image";
5387
5424
  html: "html";
5388
5425
  pdf: "pdf";
5389
5426
  audio: "audio";
5390
5427
  video: "video";
5391
- duration: "duration";
5392
5428
  percent: "percent";
5393
5429
  passFail: "passFail";
5394
5430
  stars: "stars";
@@ -7294,4 +7330,4 @@ declare function defineEval<TInput = unknown, TOutputs extends EvalOutputs = Eva
7294
7330
  /** Return whether the active eval case has tags matching the typed input. */
7295
7331
  declare function matchesEvalTags(input: EvalTagMatchInput): boolean;
7296
7332
  //#endregion
7297
- export { AgentEvalTagRegistry, AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheRepairSummary, type CacheScopeContext, type CacheSerializationOptions, type CacheStatus, type CallDerivedAttribute, type CallDerivedAttributeContext, type CallDerivedAttributesConfig, type CallDerivedAttributesFn, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type ConfigReloadState, type ConfigReloadStatus, type CreateRunRequest, type DefaultConfigKey, type DerivedStatus, type DiscoveryIssue, EvalAssertionError, type EvalCacheConfig, EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, EvalDefinition, type EvalDeriveConfig, type EvalDeriveContext, type EvalDeriveFn, type EvalDeriveMap, type EvalDeriveValueFn, type EvalDisplayStatus, type EvalExecuteContext, type EvalExpectation, type EvalFreshnessStatus, type EvalManualInputConfig, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, EvalRuntimeUsageError, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, EvalTag, EvalTagMatchInput, type EvalTraceTree, type EvalTracingAssertionsConfig, type EvalTracingAssertionsFn, type JsonCell, type LlmCallCostBreakdown, type LlmCallCostCurrency, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallPricingRate, type LlmCallPricingRegistry, type LlmCallSimulatedTokens, type LlmCallsConfigInput, type LlmCostScenario, type ManualInputDescriptor, type ManualInputFieldDescriptor, type ManualInputFieldKind, type ManualInputFieldOverride, type ManualInputFieldsConfig, type ManualInputFileValue, type ManualInputSelectOption, type MaterializeManualInputFilesResult, type NumberDisplayOptions, type ReadManualInputFileResult, type RemoveDefaultConfig, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedCallDerivedAttribute, type ResolvedLlmCallCostCurrency, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
7333
+ export { AgentEvalTagRegistry, AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheRepairSummary, type CacheScopeContext, type CacheSerializationOptions, type CacheStatus, type CallDerivedAttribute, type CallDerivedAttributeContext, type CallDerivedAttributesConfig, type CallDerivedAttributesFn, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type ConfigReloadState, type ConfigReloadStatus, type CreateRunRequest, type DefaultConfigKey, type DerivedStatus, type DiscoveryIssue, EvalAssertionError, type EvalCacheConfig, EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, EvalDefinition, type EvalDeriveConfig, type EvalDeriveContext, type EvalDeriveFn, type EvalDeriveMap, type EvalDeriveValueFn, type EvalDisplayStatus, type EvalExecuteContext, type EvalExpectation, type EvalFreshnessStatus, type EvalManualInputConfig, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, EvalRuntimeUsageError, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, EvalTag, EvalTagMatchInput, type EvalToolCallSpan, type EvalTraceTree, type EvalTracingAssertionsConfig, type EvalTracingAssertionsFn, type JsonCell, type LlmCallCostBreakdown, type LlmCallCostCurrency, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallPricingRate, type LlmCallPricingRegistry, type LlmCallSimulatedTokens, type LlmCallsConfigInput, type LlmCostScenario, type ManualInputDescriptor, type ManualInputFieldDescriptor, type ManualInputFieldKind, type ManualInputFieldOverride, type ManualInputFieldsConfig, type ManualInputFileValue, type ManualInputSelectOption, type MaterializeManualInputFilesResult, type NumberDisplayOptions, type ReadManualInputFileResult, type RemoveDefaultConfig, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedCallDerivedAttribute, type ResolvedLlmCallCostCurrency, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as setEvalOutput, A as serializeCacheValue, B as evalLog, C as evalSpan, D as deserializeCacheRecording, E as hashCacheKeySync, F as EvalAssertionError, G as isInEvalScope, H as getCurrentScope, I as EvalRuntimeUsageError, It as getEvalRegistry, J as nextEvalId, L as appendToEvalOutput, M as manualInputFileValueSchema, N as readManualInputFile, O as deserializeCacheValue, P as evalExpect, S as captureEvalSpanError, T as hashCacheKey, U as getEvalCaseInput, V as evalTime, W as incrementEvalOutput, X as runInEvalScope, Y as runInEvalRuntimeScope, Z as runInExistingEvalScope, at as extractCacheHits, b as z, ct as simulateLlmCallCost, dt as getNestedAttribute, et as setScopeCacheContext, it as extractCacheEntries, j as repoFile, k as serializeCacheRecording, lt as simulateTokenAllocation, ot as extractApiCalls, q as mergeEvalOutput, st as extractLlmCalls, tt as startEvalBackgroundJob, w as evalTracer, x as buildTraceTree, z as evalAssert } from "./runExecution-BMnJXWhN.mjs";
2
- import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-ClAkjTvo.mjs";
3
- import { n as matchesEvalTags, t as defineEval } from "./src-DfzidkYr.mjs";
1
+ import { $ as setEvalOutput, A as serializeCacheValue, B as evalLog, C as evalSpan, D as deserializeCacheRecording, E as hashCacheKeySync, F as EvalAssertionError, G as isInEvalScope, H as getCurrentScope, I as EvalRuntimeUsageError, It as getEvalRegistry, J as nextEvalId, L as appendToEvalOutput, M as manualInputFileValueSchema, N as readManualInputFile, O as deserializeCacheValue, P as evalExpect, S as captureEvalSpanError, T as hashCacheKey, U as getEvalCaseInput, V as evalTime, W as incrementEvalOutput, X as runInEvalScope, Y as runInEvalRuntimeScope, Z as runInExistingEvalScope, at as extractCacheHits, b as z, ct as simulateLlmCallCost, dt as getNestedAttribute, et as setScopeCacheContext, it as extractCacheEntries, j as repoFile, k as serializeCacheRecording, lt as simulateTokenAllocation, ot as extractApiCalls, q as mergeEvalOutput, st as extractLlmCalls, tt as startEvalBackgroundJob, w as evalTracer, x as buildTraceTree, z as evalAssert } from "./runExecution-C3XVZHRC.mjs";
2
+ import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-Dkp2-rBm.mjs";
3
+ import { n as matchesEvalTags, t as defineEval } from "./src-8dGXUULC.mjs";
4
4
  export { EvalAssertionError, EvalRuntimeUsageError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
package/dist/runChild.mjs CHANGED
@@ -1,5 +1,5 @@
1
- import { At as evalStatAggregateSchema, Et as buildEvalKey, Mt as manualInputDescriptorSchema, Nt as evalChartsConfigSchema, Pt as columnDefSchema, R as configureEvalRunLogs, St as runSummarySchema, jt as evalStatsConfigSchema, l as registerAgentEvalsPackageResolutionHooks, nt as createRunRequestSchema, p as loadConfig, v as createFsCacheStore, xt as runManifestSchema, y as getCacheRetentionOptions } from "./runExecution-BMnJXWhN.mjs";
2
- import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-CvbTAoEb.mjs";
1
+ import { At as evalStatAggregateSchema, Et as buildEvalKey, Mt as manualInputDescriptorSchema, Nt as evalChartsConfigSchema, Pt as columnDefSchema, R as configureEvalRunLogs, St as runSummarySchema, jt as evalStatsConfigSchema, l as registerAgentEvalsPackageResolutionHooks, nt as createRunRequestSchema, p as loadConfig, v as createFsCacheStore, xt as runManifestSchema, y as getCacheRetentionOptions } from "./runExecution-C3XVZHRC.mjs";
2
+ import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-B5An-AEi.mjs";
3
3
  import { z } from "zod/v4";
4
4
  import { readFile } from "node:fs/promises";
5
5
  import { relative } from "node:path";