agent-duelist 0.2.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -25,9 +25,11 @@ interface TaskInput {
25
25
  schema?: ZodSchema;
26
26
  tools?: ToolDefinition[];
27
27
  signal?: AbortSignal;
28
+ /** Per-request timeout in ms, forwarded to SDK HTTP clients. */
29
+ timeout?: number;
28
30
  }
29
31
  interface TaskResult {
30
- output: string | Record<string, unknown>;
32
+ output: string | Record<string, unknown> | unknown[];
31
33
  usage?: {
32
34
  promptTokens?: number;
33
35
  completionTokens?: number;
@@ -62,7 +64,7 @@ interface BenchmarkResult {
62
64
  scores: ScoreResult[];
63
65
  error?: string;
64
66
  raw: {
65
- output: string | Record<string, unknown>;
67
+ output: string | Record<string, unknown> | unknown[];
66
68
  latencyMs: number;
67
69
  usage?: {
68
70
  promptTokens?: number;
@@ -198,6 +200,29 @@ declare function markdownReporter(report: CiReport, _current: BenchmarkResult[])
198
200
 
199
201
  declare function htmlReporter(results: BenchmarkResult[]): string;
200
202
 
203
+ interface TaskPack {
204
+ /** Short identifier, e.g. 'structured-output' */
205
+ name: string;
206
+ /** Human-readable label for console output */
207
+ label: string;
208
+ /** One-sentence description shown in --pack list */
209
+ description: string;
210
+ /** The tasks in this pack */
211
+ tasks: ArenaTask[];
212
+ /** Recommended scorers for this pack */
213
+ scorers: BuiltInScorerName[];
214
+ }
215
+
216
+ /** Get a pack by name. Throws if not found. */
217
+ declare function loadPack(name: string): TaskPack;
218
+ /** Get all available pack names */
219
+ declare function listPacks(): Array<{
220
+ name: string;
221
+ label: string;
222
+ description: string;
223
+ taskCount: number;
224
+ }>;
225
+
201
226
  interface GitHubContext {
202
227
  token: string;
203
228
  owner: string;
@@ -207,4 +232,4 @@ interface GitHubContext {
207
232
  declare function detectGitHubContext(): GitHubContext | null;
208
233
  declare function upsertPrComment(ctx: GitHubContext, body: string, marker: string): Promise<void>;
209
234
 
210
- export { type Arena, type ArenaConfig, type ArenaProvider, type ArenaTask, type BenchmarkResult, type BuiltInScorerName, type CiReport, type CostSummary, type ScoreResult, type ScorerComparison, type ScorerFn, type ScorerStats, type TaskInput, type TaskResult, type ToolCall, type ToolDefinition, anthropic, azureOpenai, compareResults, computeStats, consoleReporter, defineArena, detectGitHubContext, gemini, htmlReporter, jsonReporter, loadBaseline, markdownReporter, openai, openaiCompatible, registerPricing, saveBaseline, upsertPrComment };
235
+ export { type Arena, type ArenaConfig, type ArenaProvider, type ArenaTask, type BenchmarkResult, type BuiltInScorerName, type CiReport, type CostSummary, type ScoreResult, type ScorerComparison, type ScorerFn, type ScorerStats, type TaskInput, type TaskPack, type TaskResult, type ToolCall, type ToolDefinition, anthropic, azureOpenai, compareResults, computeStats, consoleReporter, defineArena, detectGitHubContext, gemini, htmlReporter, jsonReporter, listPacks, loadBaseline, loadPack, markdownReporter, openai, openaiCompatible, registerPricing, saveBaseline, upsertPrComment };
package/dist/index.d.ts CHANGED
@@ -25,9 +25,11 @@ interface TaskInput {
25
25
  schema?: ZodSchema;
26
26
  tools?: ToolDefinition[];
27
27
  signal?: AbortSignal;
28
+ /** Per-request timeout in ms, forwarded to SDK HTTP clients. */
29
+ timeout?: number;
28
30
  }
29
31
  interface TaskResult {
30
- output: string | Record<string, unknown>;
32
+ output: string | Record<string, unknown> | unknown[];
31
33
  usage?: {
32
34
  promptTokens?: number;
33
35
  completionTokens?: number;
@@ -62,7 +64,7 @@ interface BenchmarkResult {
62
64
  scores: ScoreResult[];
63
65
  error?: string;
64
66
  raw: {
65
- output: string | Record<string, unknown>;
67
+ output: string | Record<string, unknown> | unknown[];
66
68
  latencyMs: number;
67
69
  usage?: {
68
70
  promptTokens?: number;
@@ -198,6 +200,29 @@ declare function markdownReporter(report: CiReport, _current: BenchmarkResult[])
198
200
 
199
201
  declare function htmlReporter(results: BenchmarkResult[]): string;
200
202
 
203
+ interface TaskPack {
204
+ /** Short identifier, e.g. 'structured-output' */
205
+ name: string;
206
+ /** Human-readable label for console output */
207
+ label: string;
208
+ /** One-sentence description shown in --pack list */
209
+ description: string;
210
+ /** The tasks in this pack */
211
+ tasks: ArenaTask[];
212
+ /** Recommended scorers for this pack */
213
+ scorers: BuiltInScorerName[];
214
+ }
215
+
216
+ /** Get a pack by name. Throws if not found. */
217
+ declare function loadPack(name: string): TaskPack;
218
+ /** Get all available pack names */
219
+ declare function listPacks(): Array<{
220
+ name: string;
221
+ label: string;
222
+ description: string;
223
+ taskCount: number;
224
+ }>;
225
+
201
226
  interface GitHubContext {
202
227
  token: string;
203
228
  owner: string;
@@ -207,4 +232,4 @@ interface GitHubContext {
207
232
  declare function detectGitHubContext(): GitHubContext | null;
208
233
  declare function upsertPrComment(ctx: GitHubContext, body: string, marker: string): Promise<void>;
209
234
 
210
- export { type Arena, type ArenaConfig, type ArenaProvider, type ArenaTask, type BenchmarkResult, type BuiltInScorerName, type CiReport, type CostSummary, type ScoreResult, type ScorerComparison, type ScorerFn, type ScorerStats, type TaskInput, type TaskResult, type ToolCall, type ToolDefinition, anthropic, azureOpenai, compareResults, computeStats, consoleReporter, defineArena, detectGitHubContext, gemini, htmlReporter, jsonReporter, loadBaseline, markdownReporter, openai, openaiCompatible, registerPricing, saveBaseline, upsertPrComment };
235
+ export { type Arena, type ArenaConfig, type ArenaProvider, type ArenaTask, type BenchmarkResult, type BuiltInScorerName, type CiReport, type CostSummary, type ScoreResult, type ScorerComparison, type ScorerFn, type ScorerStats, type TaskInput, type TaskPack, type TaskResult, type ToolCall, type ToolDefinition, anthropic, azureOpenai, compareResults, computeStats, consoleReporter, defineArena, detectGitHubContext, gemini, htmlReporter, jsonReporter, listPacks, loadBaseline, loadPack, markdownReporter, openai, openaiCompatible, registerPricing, saveBaseline, upsertPrComment };