npm - agent-duelist - Versions diffs - 0.2.0 → 0.3.0 - Mend

agent-duelist 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/dist/index.d.cts CHANGED Viewed

@@ -25,9 +25,11 @@ interface TaskInput {
     schema?: ZodSchema;
     tools?: ToolDefinition[];
     signal?: AbortSignal;
+    /** Per-request timeout in ms, forwarded to SDK HTTP clients. */
+    timeout?: number;
 }
 interface TaskResult {
-    output: string | Record<string, unknown>;
+    output: string | Record<string, unknown> | unknown[];
     usage?: {
         promptTokens?: number;
         completionTokens?: number;
@@ -62,7 +64,7 @@ interface BenchmarkResult {
     scores: ScoreResult[];
     error?: string;
     raw: {
-        output: string | Record<string, unknown>;
+        output: string | Record<string, unknown> | unknown[];
         latencyMs: number;
         usage?: {
             promptTokens?: number;
@@ -135,6 +137,11 @@ declare function openaiCompatible(options: OpenAICompatibleOptions): ArenaProvid
  *   Used as the deployment name unless `options.deployment` overrides it.
  */
 declare function azureOpenai(model: string, options?: AzureOpenAIProviderOptions): ArenaProvider;
+interface GeminiProviderOptions {
+    apiKey?: string;
+    timeoutMs?: number;
+}
+declare function gemini(model: string, options?: GeminiProviderOptions): ArenaProvider;
 interface AnthropicProviderOptions {
     apiKey?: string;
@@ -142,12 +149,6 @@ interface AnthropicProviderOptions {
 }
 declare function anthropic(model: string, options?: AnthropicProviderOptions): ArenaProvider;
-interface GeminiProviderOptions {
-    apiKey?: string;
-    timeoutMs?: number;
-}
-declare function gemini(model: string, options?: GeminiProviderOptions): ArenaProvider;
 interface ModelPricing {
     inputPerToken: number;
     outputPerToken: number;
@@ -197,6 +198,31 @@ declare function saveBaseline(path: string, results: BenchmarkResult[]): void;
 declare function markdownReporter(report: CiReport, _current: BenchmarkResult[]): string;
+declare function htmlReporter(results: BenchmarkResult[]): string;
+interface TaskPack {
+    /** Short identifier, e.g. 'structured-output' */
+    name: string;
+    /** Human-readable label for console output */
+    label: string;
+    /** One-sentence description shown in --pack list */
+    description: string;
+    /** The tasks in this pack */
+    tasks: ArenaTask[];
+    /** Recommended scorers for this pack */
+    scorers: BuiltInScorerName[];
+}
+/** Get a pack by name. Throws if not found. */
+declare function loadPack(name: string): TaskPack;
+/** Get all available pack names */
+declare function listPacks(): Array<{
+    name: string;
+    label: string;
+    description: string;
+    taskCount: number;
+}>;
 interface GitHubContext {
     token: string;
     owner: string;
@@ -206,4 +232,4 @@ interface GitHubContext {
 declare function detectGitHubContext(): GitHubContext | null;
 declare function upsertPrComment(ctx: GitHubContext, body: string, marker: string): Promise<void>;
-export { type Arena, type ArenaConfig, type ArenaProvider, type ArenaTask, type BenchmarkResult, type BuiltInScorerName, type CiReport, type CostSummary, type ScoreResult, type ScorerComparison, type ScorerFn, type ScorerStats, type TaskInput, type TaskResult, type ToolCall, type ToolDefinition, anthropic, azureOpenai, compareResults, computeStats, consoleReporter, defineArena, detectGitHubContext, gemini, jsonReporter, loadBaseline, markdownReporter, openai, openaiCompatible, registerPricing, saveBaseline, upsertPrComment };
+export { type Arena, type ArenaConfig, type ArenaProvider, type ArenaTask, type BenchmarkResult, type BuiltInScorerName, type CiReport, type CostSummary, type ScoreResult, type ScorerComparison, type ScorerFn, type ScorerStats, type TaskInput, type TaskPack, type TaskResult, type ToolCall, type ToolDefinition, anthropic, azureOpenai, compareResults, computeStats, consoleReporter, defineArena, detectGitHubContext, gemini, htmlReporter, jsonReporter, listPacks, loadBaseline, loadPack, markdownReporter, openai, openaiCompatible, registerPricing, saveBaseline, upsertPrComment };

package/dist/index.d.ts CHANGED Viewed

@@ -25,9 +25,11 @@ interface TaskInput {
     schema?: ZodSchema;
     tools?: ToolDefinition[];
     signal?: AbortSignal;
+    /** Per-request timeout in ms, forwarded to SDK HTTP clients. */
+    timeout?: number;
 }
 interface TaskResult {
-    output: string | Record<string, unknown>;
+    output: string | Record<string, unknown> | unknown[];
     usage?: {
         promptTokens?: number;
         completionTokens?: number;
@@ -62,7 +64,7 @@ interface BenchmarkResult {
     scores: ScoreResult[];
     error?: string;
     raw: {
-        output: string | Record<string, unknown>;
+        output: string | Record<string, unknown> | unknown[];
         latencyMs: number;
         usage?: {
             promptTokens?: number;
@@ -135,6 +137,11 @@ declare function openaiCompatible(options: OpenAICompatibleOptions): ArenaProvid
  *   Used as the deployment name unless `options.deployment` overrides it.
  */
 declare function azureOpenai(model: string, options?: AzureOpenAIProviderOptions): ArenaProvider;
+interface GeminiProviderOptions {
+    apiKey?: string;
+    timeoutMs?: number;
+}
+declare function gemini(model: string, options?: GeminiProviderOptions): ArenaProvider;
 interface AnthropicProviderOptions {
     apiKey?: string;
@@ -142,12 +149,6 @@ interface AnthropicProviderOptions {
 }
 declare function anthropic(model: string, options?: AnthropicProviderOptions): ArenaProvider;
-interface GeminiProviderOptions {
-    apiKey?: string;
-    timeoutMs?: number;
-}
-declare function gemini(model: string, options?: GeminiProviderOptions): ArenaProvider;
 interface ModelPricing {
     inputPerToken: number;
     outputPerToken: number;
@@ -197,6 +198,31 @@ declare function saveBaseline(path: string, results: BenchmarkResult[]): void;
 declare function markdownReporter(report: CiReport, _current: BenchmarkResult[]): string;
+declare function htmlReporter(results: BenchmarkResult[]): string;
+interface TaskPack {
+    /** Short identifier, e.g. 'structured-output' */
+    name: string;
+    /** Human-readable label for console output */
+    label: string;
+    /** One-sentence description shown in --pack list */
+    description: string;
+    /** The tasks in this pack */
+    tasks: ArenaTask[];
+    /** Recommended scorers for this pack */
+    scorers: BuiltInScorerName[];
+}
+/** Get a pack by name. Throws if not found. */
+declare function loadPack(name: string): TaskPack;
+/** Get all available pack names */
+declare function listPacks(): Array<{
+    name: string;
+    label: string;
+    description: string;
+    taskCount: number;
+}>;
 interface GitHubContext {
     token: string;
     owner: string;
@@ -206,4 +232,4 @@ interface GitHubContext {
 declare function detectGitHubContext(): GitHubContext | null;
 declare function upsertPrComment(ctx: GitHubContext, body: string, marker: string): Promise<void>;
-export { type Arena, type ArenaConfig, type ArenaProvider, type ArenaTask, type BenchmarkResult, type BuiltInScorerName, type CiReport, type CostSummary, type ScoreResult, type ScorerComparison, type ScorerFn, type ScorerStats, type TaskInput, type TaskResult, type ToolCall, type ToolDefinition, anthropic, azureOpenai, compareResults, computeStats, consoleReporter, defineArena, detectGitHubContext, gemini, jsonReporter, loadBaseline, markdownReporter, openai, openaiCompatible, registerPricing, saveBaseline, upsertPrComment };
+export { type Arena, type ArenaConfig, type ArenaProvider, type ArenaTask, type BenchmarkResult, type BuiltInScorerName, type CiReport, type CostSummary, type ScoreResult, type ScorerComparison, type ScorerFn, type ScorerStats, type TaskInput, type TaskPack, type TaskResult, type ToolCall, type ToolDefinition, anthropic, azureOpenai, compareResults, computeStats, consoleReporter, defineArena, detectGitHubContext, gemini, htmlReporter, jsonReporter, listPacks, loadBaseline, loadPack, markdownReporter, openai, openaiCompatible, registerPricing, saveBaseline, upsertPrComment };