@ai-sdk-tool/eval 0.1.8 → 1.0.0-canary.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -3,7 +3,7 @@ import { LanguageModel } from 'ai';
3
3
  /**
4
4
  * The result of a single benchmark run.
5
5
  */
6
- interface BenchmarkResult {
6
+ type BenchmarkResult = {
7
7
  /**
8
8
  * A numeric score for the benchmark.
9
9
  * The interpretation of this score is up to the benchmark author.
@@ -27,11 +27,11 @@ interface BenchmarkResult {
27
27
  * An error object if the benchmark failed unexpectedly.
28
28
  */
29
29
  error?: Error;
30
- }
30
+ };
31
31
  /**
32
32
  * The interface for defining a language model benchmark.
33
33
  */
34
- interface LanguageModelV2Benchmark {
34
+ type LanguageModelV3Benchmark = {
35
35
  /**
36
36
  * A unique name for the benchmark.
37
37
  */
@@ -51,7 +51,7 @@ interface LanguageModelV2Benchmark {
51
51
  * @returns A promise that resolves to a BenchmarkResult.
52
52
  */
53
53
  run(model: LanguageModel, config?: Record<string, unknown>): Promise<BenchmarkResult>;
54
- }
54
+ };
55
55
  /**
56
56
  * The supported reporter types.
57
57
  */
@@ -60,17 +60,17 @@ type ReporterType = "console" | "json" | "console.debug";
60
60
  * The full result object for an evaluation run,
61
61
  * containing results for all model-benchmark combinations.
62
62
  */
63
- interface EvaluationResult {
63
+ type EvaluationResult = {
64
64
  model: string;
65
65
  /** Optional user-provided key when models are passed as a keyed map */
66
66
  modelKey?: string;
67
67
  benchmark: string;
68
68
  result: BenchmarkResult;
69
- }
69
+ };
70
70
  /**
71
71
  * Options for the `evaluate` function.
72
72
  */
73
- interface EvaluateOptions {
73
+ type EvaluateOptions = {
74
74
  /**
75
75
  * The language model or models to evaluate.
76
76
  */
@@ -78,7 +78,7 @@ interface EvaluateOptions {
78
78
  /**
79
79
  * An array of benchmarks to run against the models.
80
80
  */
81
- benchmarks: LanguageModelV2Benchmark[];
81
+ benchmarks: LanguageModelV3Benchmark[];
82
82
  /**
83
83
  * The reporter to use for displaying results.
84
84
  * Defaults to 'console'.
@@ -92,16 +92,16 @@ interface EvaluateOptions {
92
92
  * Optional maximum number of tokens to generate during evaluation.
93
93
  */
94
94
  maxTokens?: number;
95
- }
95
+ };
96
96
 
97
- declare function evaluate(options: EvaluateOptions): Promise<EvaluationResult[]>;
97
+ declare const bfclSimpleBenchmark: LanguageModelV3Benchmark;
98
+ declare const bfclParallelBenchmark: LanguageModelV3Benchmark;
99
+ declare const bfclMultipleBenchmark: LanguageModelV3Benchmark;
100
+ declare const bfclParallelMultipleBenchmark: LanguageModelV3Benchmark;
98
101
 
99
- declare const bfclSimpleBenchmark: LanguageModelV2Benchmark;
100
- declare const bfclParallelBenchmark: LanguageModelV2Benchmark;
101
- declare const bfclMultipleBenchmark: LanguageModelV2Benchmark;
102
- declare const bfclParallelMultipleBenchmark: LanguageModelV2Benchmark;
102
+ declare const jsonGenerationBenchmark: LanguageModelV3Benchmark;
103
+ declare const jsonGenerationSchemaOnlyBenchmark: LanguageModelV3Benchmark;
103
104
 
104
- declare const jsonGenerationBenchmark: LanguageModelV2Benchmark;
105
- declare const jsonGenerationSchemaOnlyBenchmark: LanguageModelV2Benchmark;
105
+ declare function evaluate(options: EvaluateOptions): Promise<EvaluationResult[]>;
106
106
 
107
- export { type BenchmarkResult, type EvaluateOptions, type LanguageModelV2Benchmark, type ReporterType, bfclMultipleBenchmark, bfclParallelBenchmark, bfclParallelMultipleBenchmark, bfclSimpleBenchmark, evaluate, jsonGenerationBenchmark, jsonGenerationSchemaOnlyBenchmark };
107
+ export { type BenchmarkResult, type EvaluateOptions, type LanguageModelV3Benchmark, type ReporterType, bfclMultipleBenchmark, bfclParallelBenchmark, bfclParallelMultipleBenchmark, bfclSimpleBenchmark, evaluate, jsonGenerationBenchmark, jsonGenerationSchemaOnlyBenchmark };
package/dist/index.d.ts CHANGED
@@ -3,7 +3,7 @@ import { LanguageModel } from 'ai';
3
3
  /**
4
4
  * The result of a single benchmark run.
5
5
  */
6
- interface BenchmarkResult {
6
+ type BenchmarkResult = {
7
7
  /**
8
8
  * A numeric score for the benchmark.
9
9
  * The interpretation of this score is up to the benchmark author.
@@ -27,11 +27,11 @@ interface BenchmarkResult {
27
27
  * An error object if the benchmark failed unexpectedly.
28
28
  */
29
29
  error?: Error;
30
- }
30
+ };
31
31
  /**
32
32
  * The interface for defining a language model benchmark.
33
33
  */
34
- interface LanguageModelV2Benchmark {
34
+ type LanguageModelV3Benchmark = {
35
35
  /**
36
36
  * A unique name for the benchmark.
37
37
  */
@@ -51,7 +51,7 @@ interface LanguageModelV2Benchmark {
51
51
  * @returns A promise that resolves to a BenchmarkResult.
52
52
  */
53
53
  run(model: LanguageModel, config?: Record<string, unknown>): Promise<BenchmarkResult>;
54
- }
54
+ };
55
55
  /**
56
56
  * The supported reporter types.
57
57
  */
@@ -60,17 +60,17 @@ type ReporterType = "console" | "json" | "console.debug";
60
60
  * The full result object for an evaluation run,
61
61
  * containing results for all model-benchmark combinations.
62
62
  */
63
- interface EvaluationResult {
63
+ type EvaluationResult = {
64
64
  model: string;
65
65
  /** Optional user-provided key when models are passed as a keyed map */
66
66
  modelKey?: string;
67
67
  benchmark: string;
68
68
  result: BenchmarkResult;
69
- }
69
+ };
70
70
  /**
71
71
  * Options for the `evaluate` function.
72
72
  */
73
- interface EvaluateOptions {
73
+ type EvaluateOptions = {
74
74
  /**
75
75
  * The language model or models to evaluate.
76
76
  */
@@ -78,7 +78,7 @@ interface EvaluateOptions {
78
78
  /**
79
79
  * An array of benchmarks to run against the models.
80
80
  */
81
- benchmarks: LanguageModelV2Benchmark[];
81
+ benchmarks: LanguageModelV3Benchmark[];
82
82
  /**
83
83
  * The reporter to use for displaying results.
84
84
  * Defaults to 'console'.
@@ -92,16 +92,16 @@ interface EvaluateOptions {
92
92
  * Optional maximum number of tokens to generate during evaluation.
93
93
  */
94
94
  maxTokens?: number;
95
- }
95
+ };
96
96
 
97
- declare function evaluate(options: EvaluateOptions): Promise<EvaluationResult[]>;
97
+ declare const bfclSimpleBenchmark: LanguageModelV3Benchmark;
98
+ declare const bfclParallelBenchmark: LanguageModelV3Benchmark;
99
+ declare const bfclMultipleBenchmark: LanguageModelV3Benchmark;
100
+ declare const bfclParallelMultipleBenchmark: LanguageModelV3Benchmark;
98
101
 
99
- declare const bfclSimpleBenchmark: LanguageModelV2Benchmark;
100
- declare const bfclParallelBenchmark: LanguageModelV2Benchmark;
101
- declare const bfclMultipleBenchmark: LanguageModelV2Benchmark;
102
- declare const bfclParallelMultipleBenchmark: LanguageModelV2Benchmark;
102
+ declare const jsonGenerationBenchmark: LanguageModelV3Benchmark;
103
+ declare const jsonGenerationSchemaOnlyBenchmark: LanguageModelV3Benchmark;
103
104
 
104
- declare const jsonGenerationBenchmark: LanguageModelV2Benchmark;
105
- declare const jsonGenerationSchemaOnlyBenchmark: LanguageModelV2Benchmark;
105
+ declare function evaluate(options: EvaluateOptions): Promise<EvaluationResult[]>;
106
106
 
107
- export { type BenchmarkResult, type EvaluateOptions, type LanguageModelV2Benchmark, type ReporterType, bfclMultipleBenchmark, bfclParallelBenchmark, bfclParallelMultipleBenchmark, bfclSimpleBenchmark, evaluate, jsonGenerationBenchmark, jsonGenerationSchemaOnlyBenchmark };
107
+ export { type BenchmarkResult, type EvaluateOptions, type LanguageModelV3Benchmark, type ReporterType, bfclMultipleBenchmark, bfclParallelBenchmark, bfclParallelMultipleBenchmark, bfclSimpleBenchmark, evaluate, jsonGenerationBenchmark, jsonGenerationSchemaOnlyBenchmark };