agent-duelist 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -25,9 +25,11 @@ interface TaskInput {
25
25
  schema?: ZodSchema;
26
26
  tools?: ToolDefinition[];
27
27
  signal?: AbortSignal;
28
+ /** Per-request timeout in ms, forwarded to SDK HTTP clients. */
29
+ timeout?: number;
28
30
  }
29
31
  interface TaskResult {
30
- output: string | Record<string, unknown>;
32
+ output: string | Record<string, unknown> | unknown[];
31
33
  usage?: {
32
34
  promptTokens?: number;
33
35
  completionTokens?: number;
@@ -62,7 +64,7 @@ interface BenchmarkResult {
62
64
  scores: ScoreResult[];
63
65
  error?: string;
64
66
  raw: {
65
- output: string | Record<string, unknown>;
67
+ output: string | Record<string, unknown> | unknown[];
66
68
  latencyMs: number;
67
69
  usage?: {
68
70
  promptTokens?: number;
@@ -135,6 +137,11 @@ declare function openaiCompatible(options: OpenAICompatibleOptions): ArenaProvid
135
137
  * Used as the deployment name unless `options.deployment` overrides it.
136
138
  */
137
139
  declare function azureOpenai(model: string, options?: AzureOpenAIProviderOptions): ArenaProvider;
140
+ interface GeminiProviderOptions {
141
+ apiKey?: string;
142
+ timeoutMs?: number;
143
+ }
144
+ declare function gemini(model: string, options?: GeminiProviderOptions): ArenaProvider;
138
145
 
139
146
  interface AnthropicProviderOptions {
140
147
  apiKey?: string;
@@ -142,12 +149,6 @@ interface AnthropicProviderOptions {
142
149
  }
143
150
  declare function anthropic(model: string, options?: AnthropicProviderOptions): ArenaProvider;
144
151
 
145
- interface GeminiProviderOptions {
146
- apiKey?: string;
147
- timeoutMs?: number;
148
- }
149
- declare function gemini(model: string, options?: GeminiProviderOptions): ArenaProvider;
150
-
151
152
  interface ModelPricing {
152
153
  inputPerToken: number;
153
154
  outputPerToken: number;
@@ -197,6 +198,31 @@ declare function saveBaseline(path: string, results: BenchmarkResult[]): void;
197
198
 
198
199
  declare function markdownReporter(report: CiReport, _current: BenchmarkResult[]): string;
199
200
 
201
+ declare function htmlReporter(results: BenchmarkResult[]): string;
202
+
203
+ interface TaskPack {
204
+ /** Short identifier, e.g. 'structured-output' */
205
+ name: string;
206
+ /** Human-readable label for console output */
207
+ label: string;
208
+ /** One-sentence description shown in --pack list */
209
+ description: string;
210
+ /** The tasks in this pack */
211
+ tasks: ArenaTask[];
212
+ /** Recommended scorers for this pack */
213
+ scorers: BuiltInScorerName[];
214
+ }
215
+
216
+ /** Get a pack by name. Throws if not found. */
217
+ declare function loadPack(name: string): TaskPack;
218
+ /** Get all available pack names */
219
+ declare function listPacks(): Array<{
220
+ name: string;
221
+ label: string;
222
+ description: string;
223
+ taskCount: number;
224
+ }>;
225
+
200
226
  interface GitHubContext {
201
227
  token: string;
202
228
  owner: string;
@@ -206,4 +232,4 @@ interface GitHubContext {
206
232
  declare function detectGitHubContext(): GitHubContext | null;
207
233
  declare function upsertPrComment(ctx: GitHubContext, body: string, marker: string): Promise<void>;
208
234
 
209
- export { type Arena, type ArenaConfig, type ArenaProvider, type ArenaTask, type BenchmarkResult, type BuiltInScorerName, type CiReport, type CostSummary, type ScoreResult, type ScorerComparison, type ScorerFn, type ScorerStats, type TaskInput, type TaskResult, type ToolCall, type ToolDefinition, anthropic, azureOpenai, compareResults, computeStats, consoleReporter, defineArena, detectGitHubContext, gemini, jsonReporter, loadBaseline, markdownReporter, openai, openaiCompatible, registerPricing, saveBaseline, upsertPrComment };
235
+ export { type Arena, type ArenaConfig, type ArenaProvider, type ArenaTask, type BenchmarkResult, type BuiltInScorerName, type CiReport, type CostSummary, type ScoreResult, type ScorerComparison, type ScorerFn, type ScorerStats, type TaskInput, type TaskPack, type TaskResult, type ToolCall, type ToolDefinition, anthropic, azureOpenai, compareResults, computeStats, consoleReporter, defineArena, detectGitHubContext, gemini, htmlReporter, jsonReporter, listPacks, loadBaseline, loadPack, markdownReporter, openai, openaiCompatible, registerPricing, saveBaseline, upsertPrComment };
package/dist/index.d.ts CHANGED
@@ -25,9 +25,11 @@ interface TaskInput {
25
25
  schema?: ZodSchema;
26
26
  tools?: ToolDefinition[];
27
27
  signal?: AbortSignal;
28
+ /** Per-request timeout in ms, forwarded to SDK HTTP clients. */
29
+ timeout?: number;
28
30
  }
29
31
  interface TaskResult {
30
- output: string | Record<string, unknown>;
32
+ output: string | Record<string, unknown> | unknown[];
31
33
  usage?: {
32
34
  promptTokens?: number;
33
35
  completionTokens?: number;
@@ -62,7 +64,7 @@ interface BenchmarkResult {
62
64
  scores: ScoreResult[];
63
65
  error?: string;
64
66
  raw: {
65
- output: string | Record<string, unknown>;
67
+ output: string | Record<string, unknown> | unknown[];
66
68
  latencyMs: number;
67
69
  usage?: {
68
70
  promptTokens?: number;
@@ -135,6 +137,11 @@ declare function openaiCompatible(options: OpenAICompatibleOptions): ArenaProvid
135
137
  * Used as the deployment name unless `options.deployment` overrides it.
136
138
  */
137
139
  declare function azureOpenai(model: string, options?: AzureOpenAIProviderOptions): ArenaProvider;
140
+ interface GeminiProviderOptions {
141
+ apiKey?: string;
142
+ timeoutMs?: number;
143
+ }
144
+ declare function gemini(model: string, options?: GeminiProviderOptions): ArenaProvider;
138
145
 
139
146
  interface AnthropicProviderOptions {
140
147
  apiKey?: string;
@@ -142,12 +149,6 @@ interface AnthropicProviderOptions {
142
149
  }
143
150
  declare function anthropic(model: string, options?: AnthropicProviderOptions): ArenaProvider;
144
151
 
145
- interface GeminiProviderOptions {
146
- apiKey?: string;
147
- timeoutMs?: number;
148
- }
149
- declare function gemini(model: string, options?: GeminiProviderOptions): ArenaProvider;
150
-
151
152
  interface ModelPricing {
152
153
  inputPerToken: number;
153
154
  outputPerToken: number;
@@ -197,6 +198,31 @@ declare function saveBaseline(path: string, results: BenchmarkResult[]): void;
197
198
 
198
199
  declare function markdownReporter(report: CiReport, _current: BenchmarkResult[]): string;
199
200
 
201
+ declare function htmlReporter(results: BenchmarkResult[]): string;
202
+
203
+ interface TaskPack {
204
+ /** Short identifier, e.g. 'structured-output' */
205
+ name: string;
206
+ /** Human-readable label for console output */
207
+ label: string;
208
+ /** One-sentence description shown in --pack list */
209
+ description: string;
210
+ /** The tasks in this pack */
211
+ tasks: ArenaTask[];
212
+ /** Recommended scorers for this pack */
213
+ scorers: BuiltInScorerName[];
214
+ }
215
+
216
+ /** Get a pack by name. Throws if not found. */
217
+ declare function loadPack(name: string): TaskPack;
218
+ /** Get all available pack names */
219
+ declare function listPacks(): Array<{
220
+ name: string;
221
+ label: string;
222
+ description: string;
223
+ taskCount: number;
224
+ }>;
225
+
200
226
  interface GitHubContext {
201
227
  token: string;
202
228
  owner: string;
@@ -206,4 +232,4 @@ interface GitHubContext {
206
232
  declare function detectGitHubContext(): GitHubContext | null;
207
233
  declare function upsertPrComment(ctx: GitHubContext, body: string, marker: string): Promise<void>;
208
234
 
209
- export { type Arena, type ArenaConfig, type ArenaProvider, type ArenaTask, type BenchmarkResult, type BuiltInScorerName, type CiReport, type CostSummary, type ScoreResult, type ScorerComparison, type ScorerFn, type ScorerStats, type TaskInput, type TaskResult, type ToolCall, type ToolDefinition, anthropic, azureOpenai, compareResults, computeStats, consoleReporter, defineArena, detectGitHubContext, gemini, jsonReporter, loadBaseline, markdownReporter, openai, openaiCompatible, registerPricing, saveBaseline, upsertPrComment };
235
+ export { type Arena, type ArenaConfig, type ArenaProvider, type ArenaTask, type BenchmarkResult, type BuiltInScorerName, type CiReport, type CostSummary, type ScoreResult, type ScorerComparison, type ScorerFn, type ScorerStats, type TaskInput, type TaskPack, type TaskResult, type ToolCall, type ToolDefinition, anthropic, azureOpenai, compareResults, computeStats, consoleReporter, defineArena, detectGitHubContext, gemini, htmlReporter, jsonReporter, listPacks, loadBaseline, loadPack, markdownReporter, openai, openaiCompatible, registerPricing, saveBaseline, upsertPrComment };