npm - outcome-cli - Versions diffs - 1.0.0 - Mend

outcome-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (113) hide show

package/README.md +261 -0
package/package.json +95 -0
package/src/agents/README.md +139 -0
package/src/agents/adapters/anthropic.adapter.ts +166 -0
package/src/agents/adapters/dalle.adapter.ts +145 -0
package/src/agents/adapters/gemini.adapter.ts +134 -0
package/src/agents/adapters/imagen.adapter.ts +106 -0
package/src/agents/adapters/nano-banana.adapter.ts +129 -0
package/src/agents/adapters/openai.adapter.ts +165 -0
package/src/agents/adapters/veo.adapter.ts +130 -0
package/src/agents/agent.schema.property.test.ts +379 -0
package/src/agents/agent.schema.test.ts +148 -0
package/src/agents/agent.schema.ts +263 -0
package/src/agents/index.ts +60 -0
package/src/agents/registered-agent.schema.ts +356 -0
package/src/agents/registry.ts +97 -0
package/src/agents/tournament-configs.property.test.ts +266 -0
package/src/cli/README.md +145 -0
package/src/cli/commands/define.ts +79 -0
package/src/cli/commands/list.ts +46 -0
package/src/cli/commands/logs.ts +83 -0
package/src/cli/commands/run.ts +416 -0
package/src/cli/commands/verify.ts +110 -0
package/src/cli/index.ts +81 -0
package/src/config/README.md +128 -0
package/src/config/env.ts +262 -0
package/src/config/index.ts +19 -0
package/src/eval/README.md +318 -0
package/src/eval/ai-judge.test.ts +435 -0
package/src/eval/ai-judge.ts +368 -0
package/src/eval/code-validators.ts +414 -0
package/src/eval/evaluateOutcome.property.test.ts +1174 -0
package/src/eval/evaluateOutcome.ts +591 -0
package/src/eval/immigration-validators.ts +122 -0
package/src/eval/index.ts +90 -0
package/src/eval/judge-cache.ts +402 -0
package/src/eval/tournament-validators.property.test.ts +439 -0
package/src/eval/validators.property.test.ts +1118 -0
package/src/eval/validators.ts +1199 -0
package/src/eval/weighted-scorer.ts +285 -0
package/src/index.ts +17 -0
package/src/league/README.md +188 -0
package/src/league/health-check.ts +353 -0
package/src/league/index.ts +93 -0
package/src/league/killAgent.ts +151 -0
package/src/league/league.test.ts +1151 -0
package/src/league/runLeague.ts +843 -0
package/src/league/scoreAgent.ts +175 -0
package/src/modules/omnibridge/__tests__/.gitkeep +1 -0
package/src/modules/omnibridge/__tests__/auth-tunnel.property.test.ts +524 -0
package/src/modules/omnibridge/__tests__/deterministic-logger.property.test.ts +965 -0
package/src/modules/omnibridge/__tests__/ghost-api.property.test.ts +461 -0
package/src/modules/omnibridge/__tests__/omnibridge-integration.test.ts +542 -0
package/src/modules/omnibridge/__tests__/parallel-executor.property.test.ts +671 -0
package/src/modules/omnibridge/__tests__/semantic-normalizer.property.test.ts +521 -0
package/src/modules/omnibridge/__tests__/semantic-normalizer.test.ts +254 -0
package/src/modules/omnibridge/__tests__/session-vault.property.test.ts +367 -0
package/src/modules/omnibridge/__tests__/shadow-session.property.test.ts +523 -0
package/src/modules/omnibridge/__tests__/triangulation-engine.property.test.ts +292 -0
package/src/modules/omnibridge/__tests__/verification-engine.property.test.ts +769 -0
package/src/modules/omnibridge/api/.gitkeep +1 -0
package/src/modules/omnibridge/api/ghost-api.ts +1087 -0
package/src/modules/omnibridge/auth/.gitkeep +1 -0
package/src/modules/omnibridge/auth/auth-tunnel.ts +843 -0
package/src/modules/omnibridge/auth/session-vault.ts +577 -0
package/src/modules/omnibridge/core/.gitkeep +1 -0
package/src/modules/omnibridge/core/semantic-normalizer.ts +702 -0
package/src/modules/omnibridge/core/triangulation-engine.ts +530 -0
package/src/modules/omnibridge/core/types.ts +610 -0
package/src/modules/omnibridge/execution/.gitkeep +1 -0
package/src/modules/omnibridge/execution/deterministic-logger.ts +629 -0
package/src/modules/omnibridge/execution/parallel-executor.ts +542 -0
package/src/modules/omnibridge/execution/shadow-session.ts +794 -0
package/src/modules/omnibridge/index.ts +212 -0
package/src/modules/omnibridge/omnibridge.ts +510 -0
package/src/modules/omnibridge/verification/.gitkeep +1 -0
package/src/modules/omnibridge/verification/verification-engine.ts +783 -0
package/src/outcomes/README.md +75 -0
package/src/outcomes/acquire-pilot-customer.ts +297 -0
package/src/outcomes/code-delivery-outcomes.ts +89 -0
package/src/outcomes/code-outcomes.ts +256 -0
package/src/outcomes/code_review_battle.test.ts +135 -0
package/src/outcomes/code_review_battle.ts +135 -0
package/src/outcomes/cold_email_battle.ts +97 -0
package/src/outcomes/content_creation_battle.ts +160 -0
package/src/outcomes/f1_stem_opt_compliance.ts +61 -0
package/src/outcomes/index.ts +107 -0
package/src/outcomes/lead_gen_battle.test.ts +113 -0
package/src/outcomes/lead_gen_battle.ts +99 -0
package/src/outcomes/outcome.schema.property.test.ts +229 -0
package/src/outcomes/outcome.schema.ts +187 -0
package/src/outcomes/qualified_sales_interest.ts +118 -0
package/src/outcomes/swarm_planner.property.test.ts +370 -0
package/src/outcomes/swarm_planner.ts +96 -0
package/src/outcomes/web_extraction.ts +234 -0
package/src/runtime/README.md +220 -0
package/src/runtime/agentRunner.test.ts +341 -0
package/src/runtime/agentRunner.ts +746 -0
package/src/runtime/claudeAdapter.ts +232 -0
package/src/runtime/costTracker.ts +123 -0
package/src/runtime/index.ts +34 -0
package/src/runtime/modelAdapter.property.test.ts +305 -0
package/src/runtime/modelAdapter.ts +144 -0
package/src/runtime/openaiAdapter.ts +235 -0
package/src/utils/README.md +122 -0
package/src/utils/command-runner.ts +134 -0
package/src/utils/cost-guard.ts +379 -0
package/src/utils/errors.test.ts +290 -0
package/src/utils/errors.ts +442 -0
package/src/utils/index.ts +37 -0
package/src/utils/logger.test.ts +361 -0
package/src/utils/logger.ts +419 -0
package/src/utils/output-parsers.ts +216 -0

package/src/league/runLeague.ts ADDED Viewed

@@ -0,0 +1,843 @@
+/**
+ * League Runner - Parallel agent competition orchestration
+ *
+ * Implements the league execution model where N agents compete in parallel
+ * to achieve an outcome. First agent to succeed wins the bounty.
+ *
+ * @module league/runLeague
+ * @see Requirements 4.1, 4.5, 10.3
+ */
+import { runCommand, CommandResult } from '../utils/command-runner';
+import {
+  parseTestOutput,
+  parseLintOutput,
+  parseBenchmarkOutput,
+  parseSecurityScanOutput,
+  TestResult,
+  LintResult,
+  BenchmarkResult,
+  SecurityScanResult,
+} from '../utils/output-parsers';
+import type { AgentConfig } from '../agents/agent.schema.js';
+import type { Outcome } from '../outcomes/outcome.schema.js';
+import type { Lead } from '../jobs/job.interface.js';
+import {
+  type AgentRun,
+  type AgentRunConfig,
+  runAgent,
+  runAgentMock,
+} from '../runtime/agentRunner.js';
+import { evaluateOutcome, type EvaluationResult } from '../eval/evaluateOutcome.js';
+import { calculateDawsCodeScore } from '../eval/weighted-scorer.js';
+import { CODE_DELIVERY_OUTCOMES } from '../outcomes/code-delivery-outcomes.js';
+import { logSuccess, logFailure } from '../utils/logger.js';
+import { analyticsCollector } from '../analytics/collector.js';
+import { dataCollector } from '../data/collector.js';
+import { launchMonitor } from '../monitoring/launch-monitor.js';
+/**
+ * Configuration for running a league.
+ *
+ * @see design.md - League System
+ */
+export interface LeagueConfig {
+  /** ID of the outcome to compete for */
+  outcomeId: string;
+  /** Number of agents to run in parallel */
+  agentCount: number;
+  /** Maximum total tokens across all agents */
+  globalSpendCeiling: number;
+  /** Agent configurations to use (will be assigned to agents) */
+  agentConfigs: AgentConfig[];
+  /** Outcome definition */
+  outcome: Outcome;
+  /** Lead data for agents to process */
+  lead: Lead;
+  /** Whether to use mock mode (no real API calls) */
+  mockMode?: boolean;
+  /** Optional API key for model calls */
+  apiKey?: string;
+}
+/**
+ * Result of a single agent's participation in the league.
+ */
+export interface AgentResult {
+  /** Agent ID */
+  agentId: string;
+  /** Final status */
+  status: 'winner' | 'killed' | 'failed';
+  /** Reason for termination if killed */
+  killReason?: string;
+  /** Number of attempts made */
+  attempts: number;
+  /** Tokens spent */
+  tokensSpent: number;
+  /** Duration in milliseconds */
+  durationMs: number;
+  /** Evaluation result if completed */
+  evaluationResult?: EvaluationResult;
+  /** DAWS score (code outcomes) */
+  dawsScore?: number;
+}
+/**
+ * Tournament metrics for analyzing agent performance and profitability.
+ *
+ * @see Requirements 5.1, 5.2, 5.3, 5.4
+ */
+export interface TournamentMetrics {
+  /** Net profitability calculated as (payoutAmount - totalCost) / totalCost */
+  netProfitability: number;
+  /** Cost per success calculated as totalSpend / successCount */
+  costPerSuccess: number;
+  /** Number of successful agents in the tournament */
+  successCount: number;
+}
+/**
+ * Result of a league run.
+ *
+ * @see design.md - League System
+ */
+export interface LeagueResult {
+  /** ID of the winning agent, null if no winner */
+  winnerId: string | null;
+  /** Results for all agents */
+  agents: AgentResult[];
+  /** Total tokens spent across all agents */
+  totalCost: number;
+  /** Total duration of the league run in milliseconds */
+  duration: number;
+  /** Whether the global spend ceiling was hit */
+  globalCeilingHit: boolean;
+  /** Tournament metrics for profitability analysis */
+  tournamentMetrics: TournamentMetrics;
+}
+/**
+ * Internal state for tracking a running agent.
+ */
+interface RunningAgent {
+  agentId: string;
+  config: AgentConfig;
+  promise: Promise<AgentRun>;
+  startTime: number;
+  killed: boolean;
+  killReason?: string;
+}
+function normalizeDawsScores(params: {
+  evalResult: EvaluationResult;
+  runDurationMs: number;
+  tokensSpent: number;
+  attempts: number;
+  outcomeTimeLimitMs?: number;
+  globalSpendCeiling?: number;
+}): { GCR: number; QAS: number; CEF: number; DTT: number; RES: number } {
+  const { evalResult, runDurationMs, tokensSpent, attempts, outcomeTimeLimitMs, globalSpendCeiling } = params;
+  const criteriaCount = evalResult.criteriaResults.length || 1;
+  const passedCount = evalResult.criteriaResults.filter((c) => c.passed).length;
+  const gcr = evalResult.status === 'SUCCESS' ? 1 : 0;
+  const qas = Math.min(1, passedCount / criteriaCount);
+  const spendCeiling = globalSpendCeiling && globalSpendCeiling > 0 ? globalSpendCeiling : 50000;
+  const cef = Math.max(0, Math.min(1, 1 - tokensSpent / spendCeiling));
+  const tl = outcomeTimeLimitMs && outcomeTimeLimitMs > 0 ? outcomeTimeLimitMs : 30 * 60 * 1000;
+  const dtt = Math.max(0, Math.min(1, 1 - runDurationMs / tl));
+  const res = Math.max(0, Math.min(1, 1 - (attempts - 1) / Math.max(1, attempts)));
+  return { GCR: gcr, QAS: qas, CEF: cef, DTT: dtt, RES: res };
+}
+async function runCommandAsync(command?: string, cwd?: string): Promise<CommandResult | undefined> {
+  if (!command || !cwd) return undefined;
+  try {
+    return await runCommand(command, [], { cwd, timeoutMs: 30000, maxOutputSize: 100000 });
+  } catch (err) {
+    return {
+      exitCode: 1,
+      stdout: '',
+      stderr: (err as Error).message,
+    };
+  }
+}
+async function enrichCodeExecution(artifact: AgentRun['artifacts'][number]): Promise<AgentRun['artifacts'][number]> {
+  const content: any = { ...(artifact.content as any) };
+  const cwd = (content.worktreePath || content.repoPath) as string | undefined;
+  // Run test command
+  const testExec = await runCommandAsync(content.testCommand, cwd);
+  if (testExec) {
+    const parsedTest = parseTestOutput(testExec);
+    content.testResult = parsedTest;
+  }
+  // Run build command
+  const buildExec = await runCommandAsync(content.buildCommand, cwd);
+  if (buildExec) {
+    content.buildResult = {
+      success: buildExec.exitCode === 0,
+      exitCode: buildExec.exitCode,
+    };
+  }
+  // Run lint command
+  const lintExec = await runCommandAsync(content.lintCommand, cwd);
+  if (lintExec) {
+    const parsedLint = parseLintOutput(lintExec);
+    content.lintResult = parsedLint;
+  }
+  // Run benchmark command
+  const benchExec = await runCommandAsync(content.benchmarkCommand, cwd);
+  if (benchExec) {
+    const parsedBench = parseBenchmarkOutput(benchExec);
+    // Store the parsed metrics for validation
+    content.benchmarkResult = parsedBench;
+  }
+  // Run security scan command
+  const secExec = await runCommandAsync(content.securityScanCommand, cwd);
+  if (secExec) {
+    const parsedSec = parseSecurityScanOutput(secExec);
+    content.securityScanResult = parsedSec;
+  }
+  return {
+    ...artifact,
+    content,
+  };
+}
+/**
+ * Calculates tournament metrics for profitability analysis.
+ *
+ * Implements:
+ * - NetProfitability as (payoutAmount - totalCost) / totalCost (Requirement 5.1)
+ * - CostPerSuccess as totalSpend / successCount (Requirement 5.2)
+ * - Handles edge cases: zero cost returns 0, zero successes returns Infinity (Requirement 5.4)
+ *
+ * @param payoutAmount - The payout amount for the outcome
+ * @param totalCost - Total tokens spent across all agents
+ * @param successCount - Number of successful agents
+ * @returns TournamentMetrics with calculated profitability metrics
+ *
+ * @example
+ * const metrics = calculateTournamentMetrics(250, 100, 1);
+ * // { netProfitability: 1.5, costPerSuccess: 100, successCount: 1 }
+ *
+ * @see Requirements 5.1, 5.2, 5.4
+ */
+export function calculateTournamentMetrics(
+  payoutAmount: number,
+  totalCost: number,
+  successCount: number
+): TournamentMetrics {
+  // Handle edge case: zero cost returns 0 profitability (Requirement 5.4)
+  const netProfitability = totalCost === 0 ? 0 : (payoutAmount - totalCost) / totalCost;
+  // Handle edge case: zero successes returns Infinity (Requirement 5.4)
+  const costPerSuccess = successCount === 0 ? Infinity : totalCost / successCount;
+  return {
+    netProfitability,
+    costPerSuccess,
+    successCount,
+  };
+}
+/**
+ * Runs a league with N agents competing in parallel.
+ *
+ * Implements:
+ * - Parallel agent execution (Requirement 4.1)
+ * - Winner promotion and competitor termination (Requirement 4.5)
+ * - Global spend ceiling enforcement (Requirement 10.3)
+ * - Analytics and monitoring integration (Requirements 12.1)
+ *
+ * @param config - League configuration
+ * @returns LeagueResult with winner and all agent results
+ *
+ * @example
+ * const result = await runLeague({
+ *   outcomeId: 'qualified_sales_interest',
+ *   agentCount: 3,
+ *   globalSpendCeiling: 50000,
+ *   agentConfigs: [agent1, agent2, agent3],
+ *   outcome: qualifiedSalesInterest,
+ *   lead: leadData,
+ * });
+ *
+ * @see Requirements 4.1, 4.5, 10.3, 12.1
+ */
+export async function runLeague(config: LeagueConfig): Promise<LeagueResult> {
+  const startTime = Date.now();
+  const {
+    outcomeId,
+    agentCount,
+    globalSpendCeiling,
+    agentConfigs,
+    outcome,
+    lead,
+    mockMode = false,
+    apiKey,
+  } = config;
+  // Track league start in analytics and monitoring systems
+  await dataCollector.trackBattleExecution({
+    battleId: `league_${outcomeId}_${startTime}`,
+    outcomeId,
+    agents: agentConfigs.slice(0, agentCount).map(a => ({
+      agentId: a.id,
+      developerId: 'system',
+      modelType: a.modelProvider,
+      version: '1.0.0'
+    })),
+    config: {
+      maxAttempts: 3,
+      timeLimit: 300000,
+      payoutAmount: 0
+    },
+    results: {
+      attempts: 0,
+      duration: 0,
+      totalCost: 0,
+      success: false
+    },
+    metrics: {
+      tokensUsed: 0,
+      averageResponseTime: 0,
+      successRate: 0,
+      costPerAttempt: 0
+    },
+    metadata: {
+      agentCount,
+      globalSpendCeiling,
+      mockMode
+    }
+  });
+  // Start battle tracking in analytics collector (extends MetricsCollector)
+  analyticsCollector.startBattle(`league_${outcomeId}_${startTime}`, agentCount);
+  launchMonitor.recordEvent({
+    type: 'business',
+    metric: 'league_started',
+    value: 1,
+    timestamp: new Date(startTime),
+    metadata: { outcomeId, agentCount }
+  });
+  // Validate configuration
+  if (agentCount <= 0) {
+    throw new Error('Agent count must be positive');
+  }
+  if (agentConfigs.length < agentCount) {
+    throw new Error(`Not enough agent configs: need ${agentCount}, got ${agentConfigs.length}`);
+  }
+  // Track running agents and their state
+  const runningAgents: RunningAgent[] = [];
+  let winnerId: string | null = null;
+  let globalTokensSpent = 0;
+  let globalCeilingHit = false;
+  // Signal for terminating agents when winner found
+  const killSignals: Map<string, () => boolean> = new Map();
+  // Start N agents in parallel (Requirement 4.1)
+  for (let i = 0; i < agentCount; i++) {
+    const agentConfig = agentConfigs[i];
+    const agentId = `${agentConfig.id}-${i}`;
+    // Create kill signal checker for this agent
+    const shouldKill = (): boolean => {
+      // Kill if another agent already won
+      if (winnerId !== null && winnerId !== agentId) {
+        return true;
+      }
+      // Kill if global ceiling exceeded
+      if (globalTokensSpent >= globalSpendCeiling) {
+        return true;
+      }
+      return false;
+    };
+    killSignals.set(agentId, shouldKill);
+    // Create agent run config
+    const runConfig: AgentRunConfig = {
+      agent: { ...agentConfig, id: agentId },
+      outcome,
+      lead,
+      apiKey,
+      onKillSignal: shouldKill,
+    };
+    // Start agent (mock or real)
+    const runFn = mockMode ? runAgentMock : runAgent;
+    const promise = runFn(runConfig);
+    runningAgents.push({
+      agentId,
+      config: agentConfig,
+      promise,
+      startTime: Date.now(),
+      killed: false,
+    });
+    // Track agent start
+    await dataCollector.trackUserInteraction({
+      userId: agentId,
+      sessionId: `league_${outcomeId}_${startTime}`,
+      type: 'form_submit', // Agent submission is a form submit action
+      action: 'agent_started',
+      page: 'league',
+      success: true,
+      metadata: {
+        outcomeId,
+        agentConfig: agentConfig.id,
+        leagueStartTime: startTime
+      }
+    });
+    // Track agent submission in analytics
+    analyticsCollector.recordUserActivity({
+      userId: agentId,
+      userRole: 'developer',
+      action: 'submit_agent',
+      timestamp: new Date(),
+      metadata: {
+        outcomeId,
+        agentConfig: agentConfig.id
+      }
+    });
+  }
+  // Process agent completions
+  const agentResults: AgentResult[] = [];
+  const pendingPromises = new Map<string, Promise<AgentRun>>();
+  for (const agent of runningAgents) {
+    pendingPromises.set(agent.agentId, agent.promise);
+  }
+  // Process agents as they complete
+  while (pendingPromises.size > 0) {
+    // Wait for any agent to complete
+    const completedRuns = await Promise.race(
+      Array.from(pendingPromises.entries()).map(async ([agentId, promise]) => {
+        const run = await promise;
+        return { agentId, run };
+      })
+    );
+    const { agentId, run } = completedRuns;
+    pendingPromises.delete(agentId);
+    // Update global token count
+    globalTokensSpent += run.tokensSpent;
+    // Track token usage in analytics
+    analyticsCollector.recordFinancialEvent({
+      type: 'cost',
+      category: 'ai_model',
+      amount: run.tokensSpent * 0.001, // Convert tokens to cost estimate
+      currency: 'USD',
+      timestamp: new Date(),
+      metadata: {
+        agentId,
+        outcomeId,
+        tokensSpent: run.tokensSpent
+      }
+    });
+    // Record tokens in analytics collector (extends MetricsCollector)
+    analyticsCollector.recordTokens(`league_${outcomeId}_${startTime}`, run.tokensSpent, run.tokensSpent * 0.001);
+    // Check global ceiling
+    if (globalTokensSpent >= globalSpendCeiling && !globalCeilingHit) {
+      globalCeilingHit = true;
+      launchMonitor.recordEvent({
+        type: 'system',
+        metric: 'global_ceiling_hit',
+        value: globalTokensSpent,
+        timestamp: new Date(),
+        metadata: { outcomeId, ceiling: globalSpendCeiling }
+      });
+    }
+    // Determine agent result
+    let agentResult: AgentResult;
+    if (run.status === 'killed') {
+      // Agent was killed (cost exceeded, timeout, or competitor won)
+      agentResult = {
+        agentId,
+        status: 'killed',
+        killReason: run.killReason ?? 'unknown',
+        attempts: run.attempts,
+        tokensSpent: run.tokensSpent,
+        durationMs: run.durationMs,
+      };
+      logFailure(
+        agentId,
+        outcomeId,
+        'v1.0.0',
+        run.tokensSpent,
+        `Killed: ${run.killReason}`
+      );
+      // Track agent failure
+      await dataCollector.trackBattleExecution({
+        battleId: `agent_${agentId}_${startTime}`,
+        outcomeId,
+        agents: [{
+          agentId: agentId,
+          developerId: 'system',
+          modelType: 'unknown',
+          version: '1.0.0'
+        }],
+        config: {
+          maxAttempts: 3,
+          timeLimit: 300000,
+          payoutAmount: 0
+        },
+        results: {
+          attempts: run.attempts,
+          duration: Date.now() - startTime,
+          totalCost: run.tokensSpent,
+          success: false,
+          errorMessage: run.killReason
+        },
+        metrics: {
+          tokensUsed: run.tokensSpent,
+          averageResponseTime: 0,
+          successRate: 0,
+          costPerAttempt: run.tokensSpent / Math.max(run.attempts, 1)
+        },
+        metadata: {
+          killReason: run.killReason,
+          attempts: run.attempts
+        }
+      });
+    } else if (run.status === 'completed' && run.artifacts.length > 0) {
+      // Agent completed - evaluate the artifact
+      const artifact = run.artifacts[run.artifacts.length - 1];
+      const isCodeOutcome = Boolean(CODE_DELIVERY_OUTCOMES[outcome.name]);
+      const enrichedArtifact = isCodeOutcome ? await enrichCodeExecution(artifact) : artifact;
+      const evalResult = await evaluateOutcome(outcome, enrichedArtifact);
+      const dawsScore = isCodeOutcome
+        ? calculateDawsCodeScore(
+            normalizeDawsScores({
+              evalResult,
+              runDurationMs: run.durationMs,
+              tokensSpent: run.tokensSpent,
+              attempts: run.attempts,
+              outcomeTimeLimitMs: outcome.timeLimitMs,
+              globalSpendCeiling,
+            })
+          ).finalScore
+        : undefined;
+      if (evalResult.status === 'SUCCESS' && winnerId === null) {
+        // First successful agent wins! (Requirement 4.5)
+        winnerId = agentId;
+        agentResult = {
+          agentId,
+          status: 'winner',
+          attempts: run.attempts,
+          tokensSpent: run.tokensSpent,
+          durationMs: run.durationMs,
+          evaluationResult: evalResult,
+          dawsScore,
+        };
+        logSuccess(agentId, outcomeId, 'v1.0.0', run.tokensSpent, {
+          winner: true,
+          payoutAmount: outcome.payoutAmount,
+        });
+        // Track winner in analytics
+        analyticsCollector.recordDeveloperEarnings({
+          developerId: agentId,
+          agentId,
+          battleId: `league_${outcomeId}_${startTime}`,
+          earnings: outcome.payoutAmount,
+          timestamp: new Date(),
+          winRate: 1.0
+        });
+        analyticsCollector.recordFinancialEvent({
+          type: 'cost',
+          category: 'payout',
+          amount: outcome.payoutAmount,
+          currency: 'USD',
+          timestamp: new Date(),
+          metadata: {
+            agentId,
+            outcomeId,
+            winnerId: agentId
+          }
+        });
+        // Track successful battle
+        await dataCollector.trackBattleExecution({
+          battleId: `agent_${agentId}_${startTime}`,
+          outcomeId,
+          agents: [{
+            agentId: agentId,
+            developerId: 'system',
+            modelType: 'unknown',
+            version: '1.0.0'
+          }],
+          config: {
+            maxAttempts: 3,
+            timeLimit: 300000,
+            payoutAmount: outcome.payoutAmount
+          },
+          results: {
+            winner: agentId,
+            attempts: run.attempts,
+            duration: Date.now() - startTime,
+            totalCost: run.tokensSpent,
+            success: true
+          },
+          metrics: {
+            tokensUsed: run.tokensSpent,
+            averageResponseTime: 0,
+            successRate: 1.0,
+            costPerAttempt: run.tokensSpent / Math.max(run.attempts, 1)
+          },
+          metadata: {
+            payoutAmount: outcome.payoutAmount,
+            attempts: run.attempts,
+            evaluationResult: evalResult.status
+          }
+        });
+        // Complete battle in analytics collector
+        analyticsCollector.completeBattle(`league_${outcomeId}_${startTime}`, agentId);
+      } else {
+        // Evaluation failed or another agent already won
+        const isCompetitorWon = winnerId !== null;
+        agentResult = {
+          agentId,
+          status: isCompetitorWon ? 'killed' : 'failed',
+          killReason: isCompetitorWon ? 'competitor_won' : undefined,
+          attempts: run.attempts,
+          tokensSpent: run.tokensSpent,
+          durationMs: run.durationMs,
+          evaluationResult: evalResult,
+          dawsScore,
+        };
+        logFailure(
+          agentId,
+          outcomeId,
+          'v1.0.0',
+          run.tokensSpent,
+          isCompetitorWon ? 'Competitor won first' : evalResult.reason
+        );
+        // Track agent failure
+        await dataCollector.trackBattleExecution({
+          battleId: `agent_${agentId}_${startTime}`,
+          outcomeId,
+          agents: [{
+            agentId: agentId,
+            developerId: 'system',
+            modelType: 'unknown',
+            version: '1.0.0'
+          }],
+          config: {
+            maxAttempts: 3,
+            timeLimit: 300000,
+            payoutAmount: 0
+          },
+          results: {
+            attempts: run.attempts,
+            duration: run.durationMs,
+            totalCost: run.tokensSpent,
+            success: false,
+            errorMessage: isCompetitorWon ? 'Competitor won first' : evalResult.reason
+          },
+          metrics: {
+            tokensUsed: run.tokensSpent,
+            averageResponseTime: 0,
+            successRate: 0,
+            costPerAttempt: run.tokensSpent / Math.max(run.attempts, 1)
+          },
+          metadata: {
+            evaluationStatus: evalResult.status,
+            evaluationReason: evalResult.reason,
+            competitorWon: isCompetitorWon
+          }
+        });
+        // Mark battle as failed in analytics collector if no winner yet
+        if (!winnerId) {
+          analyticsCollector.failBattle(`league_${outcomeId}_${startTime}`, evalResult.reason);
+        }
+      }
+    } else {
+      // Agent failed without producing artifacts
+      agentResult = {
+        agentId,
+        status: 'failed',
+        attempts: run.attempts,
+        tokensSpent: run.tokensSpent,
+        durationMs: run.durationMs,
+      };
+      logFailure(
+        agentId,
+        outcomeId,
+        'v1.0.0',
+        run.tokensSpent,
+        run.error ?? 'No artifacts produced'
+      );
+      // Track agent failure
+      await dataCollector.trackBattleExecution({
+        battleId: `agent_${agentId}_${startTime}`,
+        outcomeId,
+        agents: [{
+          agentId: agentId,
+          developerId: 'system',
+          modelType: 'unknown',
+          version: '1.0.0'
+        }],
+        config: {
+          maxAttempts: 3,
+          timeLimit: 300000,
+          payoutAmount: 0
+        },
+        results: {
+          attempts: run.attempts,
+          duration: run.durationMs,
+          totalCost: run.tokensSpent,
+          success: false,
+          errorMessage: run.error ?? 'No artifacts produced'
+        },
+        metrics: {
+          tokensUsed: run.tokensSpent,
+          averageResponseTime: 0,
+          successRate: 0,
+          costPerAttempt: run.tokensSpent / Math.max(run.attempts, 1)
+        },
+        metadata: {
+          error: run.error,
+          noArtifacts: true
+        }
+      });
+      // Mark battle as failed in analytics collector
+      analyticsCollector.failBattle(`league_${outcomeId}_${startTime}`, run.error ?? 'No artifacts produced');
+    }
+    agentResults.push(agentResult);
+  }
+  // Calculate tournament metrics (Requirements 5.1, 5.2, 5.3)
+  const successCount = agentResults.filter(agent => agent.status === 'winner').length;
+  const tournamentMetrics = calculateTournamentMetrics(
+    outcome.payoutAmount,
+    globalTokensSpent,
+    successCount
+  );
+  const endTime = Date.now();
+  const duration = endTime - startTime;
+  // Track league completion
+  await dataCollector.trackBattleExecution({
+    battleId: `league_${outcomeId}_${startTime}`,
+    outcomeId,
+    agents: agentConfigs.slice(0, agentCount).map(a => ({
+      agentId: a.id,
+      developerId: 'system',
+      modelType: a.modelProvider,
+      version: '1.0.0'
+    })),
+    config: {
+      maxAttempts: 3,
+      timeLimit: 300000,
+      payoutAmount: winnerId ? (outcome?.payoutAmount || 0) : 0
+    },
+    results: {
+      winner: winnerId || undefined,
+      attempts: agentCount,
+      duration: duration,
+      totalCost: globalTokensSpent,
+      success: winnerId !== null
+    },
+    metrics: {
+      tokensUsed: globalTokensSpent,
+      averageResponseTime: duration / agentCount,
+      successRate: winnerId ? 1.0 : 0.0,
+      costPerAttempt: globalTokensSpent / agentCount
+    },
+    metadata: {
+      agentCount,
+      globalSpendCeiling,
+      globalCeilingHit,
+      tournamentMetrics,
+      duration
+    }
+  });
+  // Track business metrics
+  launchMonitor.recordEvent({
+    type: 'business',
+    metric: 'league_completed',
+    value: 1,
+    timestamp: new Date(endTime),
+    metadata: {
+      outcomeId,
+      winnerId,
+      totalCost: globalTokensSpent,
+      duration,
+      success: winnerId !== null
+    }
+  });
+  // Track system performance
+  launchMonitor.recordEvent({
+    type: 'system',
+    metric: 'league_duration',
+    value: duration,
+    timestamp: new Date(endTime),
+    metadata: { outcomeId, agentCount }
+  });
+  return {
+    winnerId,
+    agents: agentResults,
+    totalCost: globalTokensSpent,
+    duration,
+    globalCeilingHit,
+    tournamentMetrics,
+  };
+}
+/**
+ * Runs a league in mock mode for testing.
+ *
+ * @param config - League configuration (mockMode will be forced to true)
+ * @returns LeagueResult
+ */
+export async function runLeagueMock(
+  config: Omit<LeagueConfig, 'mockMode'>
+): Promise<LeagueResult> {
+  return runLeague({ ...config, mockMode: true });
+}