npm - outcome-cli - Versions diffs - 1.0.0 - Mend

outcome-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (113) hide show

package/README.md +261 -0
package/package.json +95 -0
package/src/agents/README.md +139 -0
package/src/agents/adapters/anthropic.adapter.ts +166 -0
package/src/agents/adapters/dalle.adapter.ts +145 -0
package/src/agents/adapters/gemini.adapter.ts +134 -0
package/src/agents/adapters/imagen.adapter.ts +106 -0
package/src/agents/adapters/nano-banana.adapter.ts +129 -0
package/src/agents/adapters/openai.adapter.ts +165 -0
package/src/agents/adapters/veo.adapter.ts +130 -0
package/src/agents/agent.schema.property.test.ts +379 -0
package/src/agents/agent.schema.test.ts +148 -0
package/src/agents/agent.schema.ts +263 -0
package/src/agents/index.ts +60 -0
package/src/agents/registered-agent.schema.ts +356 -0
package/src/agents/registry.ts +97 -0
package/src/agents/tournament-configs.property.test.ts +266 -0
package/src/cli/README.md +145 -0
package/src/cli/commands/define.ts +79 -0
package/src/cli/commands/list.ts +46 -0
package/src/cli/commands/logs.ts +83 -0
package/src/cli/commands/run.ts +416 -0
package/src/cli/commands/verify.ts +110 -0
package/src/cli/index.ts +81 -0
package/src/config/README.md +128 -0
package/src/config/env.ts +262 -0
package/src/config/index.ts +19 -0
package/src/eval/README.md +318 -0
package/src/eval/ai-judge.test.ts +435 -0
package/src/eval/ai-judge.ts +368 -0
package/src/eval/code-validators.ts +414 -0
package/src/eval/evaluateOutcome.property.test.ts +1174 -0
package/src/eval/evaluateOutcome.ts +591 -0
package/src/eval/immigration-validators.ts +122 -0
package/src/eval/index.ts +90 -0
package/src/eval/judge-cache.ts +402 -0
package/src/eval/tournament-validators.property.test.ts +439 -0
package/src/eval/validators.property.test.ts +1118 -0
package/src/eval/validators.ts +1199 -0
package/src/eval/weighted-scorer.ts +285 -0
package/src/index.ts +17 -0
package/src/league/README.md +188 -0
package/src/league/health-check.ts +353 -0
package/src/league/index.ts +93 -0
package/src/league/killAgent.ts +151 -0
package/src/league/league.test.ts +1151 -0
package/src/league/runLeague.ts +843 -0
package/src/league/scoreAgent.ts +175 -0
package/src/modules/omnibridge/__tests__/.gitkeep +1 -0
package/src/modules/omnibridge/__tests__/auth-tunnel.property.test.ts +524 -0
package/src/modules/omnibridge/__tests__/deterministic-logger.property.test.ts +965 -0
package/src/modules/omnibridge/__tests__/ghost-api.property.test.ts +461 -0
package/src/modules/omnibridge/__tests__/omnibridge-integration.test.ts +542 -0
package/src/modules/omnibridge/__tests__/parallel-executor.property.test.ts +671 -0
package/src/modules/omnibridge/__tests__/semantic-normalizer.property.test.ts +521 -0
package/src/modules/omnibridge/__tests__/semantic-normalizer.test.ts +254 -0
package/src/modules/omnibridge/__tests__/session-vault.property.test.ts +367 -0
package/src/modules/omnibridge/__tests__/shadow-session.property.test.ts +523 -0
package/src/modules/omnibridge/__tests__/triangulation-engine.property.test.ts +292 -0
package/src/modules/omnibridge/__tests__/verification-engine.property.test.ts +769 -0
package/src/modules/omnibridge/api/.gitkeep +1 -0
package/src/modules/omnibridge/api/ghost-api.ts +1087 -0
package/src/modules/omnibridge/auth/.gitkeep +1 -0
package/src/modules/omnibridge/auth/auth-tunnel.ts +843 -0
package/src/modules/omnibridge/auth/session-vault.ts +577 -0
package/src/modules/omnibridge/core/.gitkeep +1 -0
package/src/modules/omnibridge/core/semantic-normalizer.ts +702 -0
package/src/modules/omnibridge/core/triangulation-engine.ts +530 -0
package/src/modules/omnibridge/core/types.ts +610 -0
package/src/modules/omnibridge/execution/.gitkeep +1 -0
package/src/modules/omnibridge/execution/deterministic-logger.ts +629 -0
package/src/modules/omnibridge/execution/parallel-executor.ts +542 -0
package/src/modules/omnibridge/execution/shadow-session.ts +794 -0
package/src/modules/omnibridge/index.ts +212 -0
package/src/modules/omnibridge/omnibridge.ts +510 -0
package/src/modules/omnibridge/verification/.gitkeep +1 -0
package/src/modules/omnibridge/verification/verification-engine.ts +783 -0
package/src/outcomes/README.md +75 -0
package/src/outcomes/acquire-pilot-customer.ts +297 -0
package/src/outcomes/code-delivery-outcomes.ts +89 -0
package/src/outcomes/code-outcomes.ts +256 -0
package/src/outcomes/code_review_battle.test.ts +135 -0
package/src/outcomes/code_review_battle.ts +135 -0
package/src/outcomes/cold_email_battle.ts +97 -0
package/src/outcomes/content_creation_battle.ts +160 -0
package/src/outcomes/f1_stem_opt_compliance.ts +61 -0
package/src/outcomes/index.ts +107 -0
package/src/outcomes/lead_gen_battle.test.ts +113 -0
package/src/outcomes/lead_gen_battle.ts +99 -0
package/src/outcomes/outcome.schema.property.test.ts +229 -0
package/src/outcomes/outcome.schema.ts +187 -0
package/src/outcomes/qualified_sales_interest.ts +118 -0
package/src/outcomes/swarm_planner.property.test.ts +370 -0
package/src/outcomes/swarm_planner.ts +96 -0
package/src/outcomes/web_extraction.ts +234 -0
package/src/runtime/README.md +220 -0
package/src/runtime/agentRunner.test.ts +341 -0
package/src/runtime/agentRunner.ts +746 -0
package/src/runtime/claudeAdapter.ts +232 -0
package/src/runtime/costTracker.ts +123 -0
package/src/runtime/index.ts +34 -0
package/src/runtime/modelAdapter.property.test.ts +305 -0
package/src/runtime/modelAdapter.ts +144 -0
package/src/runtime/openaiAdapter.ts +235 -0
package/src/utils/README.md +122 -0
package/src/utils/command-runner.ts +134 -0
package/src/utils/cost-guard.ts +379 -0
package/src/utils/errors.test.ts +290 -0
package/src/utils/errors.ts +442 -0
package/src/utils/index.ts +37 -0
package/src/utils/logger.test.ts +361 -0
package/src/utils/logger.ts +419 -0
package/src/utils/output-parsers.ts +216 -0

package/src/runtime/agentRunner.ts ADDED Viewed

@@ -0,0 +1,746 @@
+/**
+ * Agent Runner - Agent execution engine with attempt loop
+ *
+ * Implements the agent execution loop with:
+ * - Attempt tracking and limits
+ * - Cost tracking integration
+ * - Model adapter integration
+ * - Exponential backoff for retries
+ * - Runtime limit enforcement
+ * - Skill (tool) execution support
+ *
+ * @module runtime/agentRunner
+ * @see Requirements 4.2, 4.3, 10.1, 10.2, 11.2
+ */
+import type { AgentConfig } from '../agents/agent.schema.js';
+import type { Outcome } from '../outcomes/outcome.schema.js';
+import type { Lead } from '../jobs/job.interface.js';
+import type { ArtifactContent, AgentArtifact } from '../eval/evaluateOutcome.js';
+import {
+  type CostTracker,
+  createCostTracker,
+  recordUsage,
+  isOverBudget,
+} from './costTracker.js';
+import {
+  type ModelAdapter,
+  type ModelResponse,
+  type ConversationMessage,
+  type ToolDefinition,
+  createAdapter,
+} from './modelAdapter.js';
+import {
+  type Skill,
+  type SkillRegistry,
+  allSkills,
+  createFilteredRegistry,
+} from '../skills/index.js';
+/**
+ * Reason for agent termination.
+ *
+ * @see GLOSSARY.md - Kill Reason
+ */
+export type KillReason =
+  | 'cost_exceeded'
+  | 'attempts_exceeded'
+  | 'timeout'
+  | 'competitor_won'
+  | 'success';
+/**
+ * Status of an agent run.
+ */
+export type AgentRunStatus = 'running' | 'completed' | 'killed';
+/**
+ * Represents the state and results of an agent execution.
+ *
+ * @see design.md - Runtime System
+ */
+export interface AgentRun {
+  /** ID of the agent being run */
+  agentId: string;
+  /** ID of the outcome being attempted */
+  outcomeId: string;
+  /** Current status of the run */
+  status: AgentRunStatus;
+  /** Number of attempts made */
+  attempts: number;
+  /** Total tokens spent across all attempts */
+  tokensSpent: number;
+  /** All artifacts produced during the run */
+  artifacts: AgentArtifact[];
+  /** Reason for termination if killed */
+  killReason?: KillReason;
+  /** Duration of the run in milliseconds */
+  durationMs: number;
+  /** Error message if run failed */
+  error?: string;
+}
+/**
+ * Configuration for running an agent.
+ */
+export interface AgentRunConfig {
+  /** Agent configuration */
+  agent: AgentConfig;
+  /** Outcome to achieve */
+  outcome: Outcome;
+  /** Lead data to process */
+  lead: Lead;
+  /** Optional API key override (defaults to env var) */
+  apiKey?: string;
+  /** Optional callback when agent is killed externally (e.g., competitor won) */
+  onKillSignal?: () => boolean;
+}
+/**
+ * Result of a single attempt.
+ */
+interface AttemptResult {
+  /** Whether the attempt was successful (model call succeeded) */
+  success: boolean;
+  /** The generated artifact if successful */
+  artifact?: AgentArtifact;
+  /** Tokens used in this attempt */
+  tokensUsed: number;
+  /** Error message if attempt failed */
+  error?: string;
+}
+/**
+ * Default retry configuration.
+ */
+const DEFAULT_MAX_RETRIES = 3;
+const DEFAULT_BASE_DELAY_MS = 1000;
+const DEFAULT_MAX_DELAY_MS = 10000;
+const MAX_TOOL_ITERATIONS = 10;
+/**
+ * Calculates exponential backoff delay with jitter.
+ *
+ * @param attempt - Current retry attempt (0-indexed)
+ * @param baseDelayMs - Base delay in milliseconds
+ * @param maxDelayMs - Maximum delay cap
+ * @returns Delay in milliseconds
+ */
+function calculateBackoffDelay(
+  attempt: number,
+  baseDelayMs: number = DEFAULT_BASE_DELAY_MS,
+  maxDelayMs: number = DEFAULT_MAX_DELAY_MS
+): number {
+  // Exponential backoff: base * 2^attempt
+  const exponentialDelay = baseDelayMs * Math.pow(2, attempt);
+  // Cap at max delay
+  const cappedDelay = Math.min(exponentialDelay, maxDelayMs);
+  // Add jitter (±25%) to prevent thundering herd
+  const jitter = cappedDelay * 0.25 * (Math.random() * 2 - 1);
+  return Math.floor(cappedDelay + jitter);
+}
+/**
+ * Sleeps for the specified duration.
+ *
+ * @param ms - Duration in milliseconds
+ */
+async function sleep(ms: number): Promise<void> {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
+/**
+ * Loads skills for an agent based on their toolAccess configuration.
+ *
+ * @param toolAccess - List of skill names the agent can access
+ * @returns SkillRegistry with allowed skills
+ */
+function loadSkillsForAgent(toolAccess: string[]): SkillRegistry {
+  return createFilteredRegistry(allSkills, toolAccess);
+}
+/**
+ * Gets tool definitions from a skill registry.
+ *
+ * @param registry - The skill registry
+ * @returns Array of tool definitions for the model
+ */
+function getToolDefinitions(registry: SkillRegistry): ToolDefinition[] {
+  return registry.getToolDefinitions();
+}
+/**
+ * Executes a skill and returns the result as a string.
+ *
+ * @param skill - The skill to execute
+ * @param args - Arguments to pass to the skill
+ * @returns Result string for the model
+ */
+async function executeSkill(skill: Skill, args: Record<string, unknown>): Promise<string> {
+  const result = await skill.execute(args);
+  if (result.success) {
+    return JSON.stringify(result.data);
+  }
+  return JSON.stringify({ error: (result as { success: false; error: string }).error });
+}
+/**
+ * Builds the prompt for the model including context based on outcome type.
+ *
+ * @param agentPrompt - The agent's base prompt
+ * @param outcome - The outcome being attempted
+ * @param lead - The lead data to include (for sales outcomes)
+ * @returns Complete prompt string
+ */
+function buildPrompt(agentPrompt: string, outcome: Outcome, lead: Lead): string {
+  if (outcome.name === 'code_review_battle') {
+    return `${agentPrompt}
+## Code Review Challenge
+You are performing a code review battle. Your task is to analyze the following vulnerable code and provide a comprehensive review.
+**Source Code to Review:**
+\`\`\`javascript
+function authenticateUser(username, password) {
+  const query = "SELECT * FROM users WHERE username = '" + username + "' AND password = '" + password + "'";
+  const result = db.query(query);
+  if (result.length > 0) {
+    for (let i = 0; i < result.length; i++) {
+      const user = result[i];
+      const permissions = db.query("SELECT * FROM permissions WHERE user_id = " + user.id);
+      if (permissions.length > 0) {
+        return { success: true, user: user, permissions: permissions };
+      }
+    }
+  }
+  return { success: false };
+}
+\`\`\`
+**Your Response Must Be Valid JSON** with this exact structure:
+\`\`\`json
+{
+  "issues": [
+    {
+      "type": "security" | "performance" | "style" | "logic",
+      "severity": "CRITICAL" | "HIGH" | "MEDIUM" | "LOW",
+      "description": "Description of the issue",
+      "lineNumber": 1
+    }
+  ],
+  "comments": [
+    {
+      "lineContent": "exact line from code above",
+      "comment": "your comment about this line",
+      "lineNumber": 1
+    }
+  ],
+  "refactorSuggestion": {
+    "originalComplexity": 8,
+    "suggestedComplexity": 4,
+    "description": "How to refactor to reduce complexity"
+  }
+}
+\`\`\`
+**Requirements:**
+- Identify at least one CRITICAL security vulnerability (SQL injection, XSS)
+- Identify at least one performance bottleneck (N+1 queries)
+- All comments must reference actual lines from the source code above
+- Suggest refactoring that reduces cyclomatic complexity by at least 2 points
+- Respond ONLY with valid JSON, no additional text`;
+  } else if (outcome.name === 'lead_gen_battle') {
+    return `${agentPrompt}
+## Lead Generation Challenge
+You are competing in a lead generation precision battle. Your task is to generate a qualified lead that meets all validation criteria.
+**Your Response Must Be Valid JSON** with this exact structure:
+\`\`\`json
+{
+  "email": "valid.email@company.com",
+  "companySize": 100,
+  "role": "Decision Maker Role",
+  "linkedIn": "https://www.linkedin.com/in/profile"
+}
+\`\`\`
+**Requirements:**
+- Email must have valid format (user@domain.com)
+- Company size must be at least 50 employees
+- Role must NOT be "intern" or "student" (case-insensitive)
+- LinkedIn URL must start with "https://www.linkedin.com/in/"
+- Respond ONLY with valid JSON, no additional text`;
+  } else {
+    // Default for qualified_sales_interest and other outcomes
+    return `${agentPrompt}
+## Current Lead Information
+- Email: ${lead.email}
+- Company: ${lead.company}
+- Company Size: ${lead.companySize} employees
+- Role: ${lead.role}
+- Previous Interactions: ${lead.previousInteractions.join(', ') || 'None'}
+${lead.leadSource ? `- Lead Source: ${lead.leadSource}` : ''}
+${lead.leadOrigin ? `- Lead Origin: ${lead.leadOrigin}` : ''}
+Please engage with this lead and work towards qualifying them based on the criteria outlined above.`;
+  }
+}
+/**
+ * Creates artifact content based on the outcome type.
+ * Agents should generate real content, not hardcoded mock data.
+ * This function only handles the structure, not the content.
+ */
+function createArtifactContent(outcomeId: string, content: string, lead: Lead): ArtifactContent {
+  // Try to parse the content as JSON first (for structured outcomes)
+  try {
+    const parsed = JSON.parse(content);
+    if (typeof parsed === 'object' && parsed !== null &&
+      'message' in parsed && 'targetEmail' in parsed) {
+      return parsed as ArtifactContent;
+    }
+  } catch {
+    // Content is not JSON, continue with text processing
+  }
+  if (outcomeId === 'code_review_battle') {
+    // For code review battles, create ArtifactContent structure
+    return {
+      message: content,
+      targetEmail: lead.email,
+      targetCompany: lead.company,
+      targetCompanySize: lead.companySize,
+      targetRole: lead.role,
+    };
+  } else if (outcomeId === 'lead_gen_battle') {
+    // For lead gen battles, create ArtifactContent structure
+    return {
+      message: content,
+      targetEmail: lead.email,
+      targetCompany: lead.company,
+      targetCompanySize: lead.companySize,
+      targetRole: lead.role,
+    };
+  } else {
+    // Default for qualified_sales_interest and other outcomes
+    return {
+      message: content,
+      targetEmail: lead.email,
+      targetCompany: lead.company,
+      targetCompanySize: lead.companySize,
+      targetRole: lead.role,
+    };
+  }
+}
+/**
+ * Creates an artifact from model response.
+ *
+ * @param agentId - ID of the agent
+ * @param outcomeId - ID of the outcome
+ * @param attemptNumber - Current attempt number
+ * @param content - Generated content
+ * @param lead - Lead data for artifact
+ * @returns AgentArtifact
+ */
+function createArtifact(
+  agentId: string,
+  outcomeId: string,
+  attemptNumber: number,
+  content: string,
+  lead: Lead
+): AgentArtifact {
+  return {
+    agentId,
+    outcomeId,
+    attemptNumber,
+    content: createArtifactContent(outcomeId, content, lead),
+    timestamp: new Date().toISOString(),
+  };
+}
+/**
+ * Executes a single attempt with retry logic and tool use support.
+ *
+ * @param adapter - Model adapter to use
+ * @param prompt - Prompt to send
+ * @param agentId - Agent ID for artifact
+ * @param outcomeId - Outcome ID for artifact
+ * @param attemptNumber - Current attempt number
+ * @param lead - Lead data
+ * @param skillRegistry - Registry of available skills
+ * @returns AttemptResult
+ */
+async function executeAttemptWithRetry(
+  adapter: ModelAdapter,
+  prompt: string,
+  agentId: string,
+  outcomeId: string,
+  attemptNumber: number,
+  lead: Lead,
+  skillRegistry?: SkillRegistry
+): Promise<AttemptResult> {
+  let lastError: string | undefined;
+  let totalTokensUsed = 0;
+  // Get tool definitions if skills are available
+  const tools = skillRegistry ? getToolDefinitions(skillRegistry) : undefined;
+  const hasTools = tools && tools.length > 0;
+  for (let retry = 0; retry < DEFAULT_MAX_RETRIES; retry++) {
+    try {
+      let response: ModelResponse = await adapter.complete(prompt, {
+        maxTokens: 1024,
+        temperature: 0.7,
+        tools: hasTools ? tools : undefined,
+      });
+      totalTokensUsed += response.tokensUsed;
+      // Handle tool use loop
+      if (hasTools && response.toolCalls && response.toolCalls.length > 0) {
+        const conversation: ConversationMessage[] = [
+          { role: 'user', content: prompt },
+        ];
+        let toolIterations = 0;
+        // Continue until model stops requesting tools or we hit the limit
+        while (
+          response.toolCalls &&
+          response.toolCalls.length > 0 &&
+          toolIterations < MAX_TOOL_ITERATIONS
+        ) {
+          toolIterations++;
+          // Add assistant message with tool calls
+          conversation.push({
+            role: 'assistant',
+            content: response.content,
+            toolCalls: response.toolCalls,
+          });
+          // Execute each tool call
+          for (const toolCall of response.toolCalls) {
+            const skill = skillRegistry!.get(toolCall.name);
+            let resultContent: string;
+            if (skill) {
+              resultContent = await executeSkill(skill, toolCall.arguments);
+            } else {
+              resultContent = JSON.stringify({
+                error: `Unknown tool: ${toolCall.name}`,
+              });
+            }
+            // Add tool result to conversation
+            conversation.push({
+              role: 'tool',
+              content: resultContent,
+              toolCallId: toolCall.id,
+            });
+          }
+          // Continue conversation with tool results
+          response = await adapter.continueWithToolResults(conversation, {
+            maxTokens: 1024,
+            temperature: 0.7,
+            tools,
+          });
+          totalTokensUsed += response.tokensUsed;
+        }
+      }
+      const artifact = createArtifact(
+        agentId,
+        outcomeId,
+        attemptNumber,
+        response.content,
+        lead
+      );
+      return {
+        success: true,
+        artifact,
+        tokensUsed: totalTokensUsed,
+      };
+    } catch (error) {
+      lastError = error instanceof Error ? error.message : String(error);
+      totalTokensUsed += adapter.countTokens(prompt); // Estimate tokens for failed attempt
+      // Don't retry on final attempt
+      if (retry < DEFAULT_MAX_RETRIES - 1) {
+        const delay = calculateBackoffDelay(retry);
+        await sleep(delay);
+      }
+    }
+  }
+  return {
+    success: false,
+    tokensUsed: totalTokensUsed,
+    error: `Failed after ${DEFAULT_MAX_RETRIES} retries: ${lastError}`,
+  };
+}
+/**
+ * Checks if the agent should be killed based on current state.
+ *
+ * @param attempts - Current attempt count
+ * @param maxAttempts - Maximum allowed attempts
+ * @param costTracker - Cost tracker instance
+ * @param startTime - Run start time
+ * @param timeLimitMs - Time limit in milliseconds
+ * @param onKillSignal - Optional external kill signal callback
+ * @returns KillReason if agent should be killed, null otherwise
+ */
+function checkKillConditions(
+  attempts: number,
+  maxAttempts: number,
+  costTracker: CostTracker,
+  startTime: number,
+  timeLimitMs: number,
+  onKillSignal?: () => boolean
+): KillReason | null {
+  // Check external kill signal (competitor won)
+  if (onKillSignal && onKillSignal()) {
+    return 'competitor_won';
+  }
+  // Check cost ceiling (Requirement 10.1)
+  if (isOverBudget(costTracker)) {
+    return 'cost_exceeded';
+  }
+  // Check attempt limit (Requirement 4.3)
+  if (attempts >= maxAttempts) {
+    return 'attempts_exceeded';
+  }
+  // Check runtime limit (Requirement 10.2)
+  const elapsed = Date.now() - startTime;
+  if (elapsed >= timeLimitMs) {
+    return 'timeout';
+  }
+  return null;
+}
+/**
+ * Runs an agent to achieve an outcome.
+ *
+ * Implements the agent execution loop with:
+ * - Attempt tracking and limits
+ * - Cost tracking (Requirement 4.2)
+ * - Attempt limit enforcement (Requirement 4.3)
+ * - Token cap enforcement (Requirement 10.1)
+ * - Runtime limit enforcement (Requirement 10.2)
+ * - Skill (tool) execution (Requirement 11.2)
+ * - Exponential backoff for retries
+ *
+ * @param config - Agent run configuration
+ * @returns AgentRun result
+ *
+ * @example
+ * const result = await runAgent({
+ *   agent: salesAgent,
+ *   outcome: qualifiedSalesInterest,
+ *   lead: leadData,
+ * });
+ *
+ * @see Requirements 4.2, 4.3, 10.1, 10.2, 11.2
+ */
+export async function runAgent(config: AgentRunConfig): Promise<AgentRun> {
+  const { agent, outcome, lead, apiKey, onKillSignal } = config;
+  const startTime = Date.now();
+  // Initialize cost tracker (Requirement 4.2)
+  const costTracker = createCostTracker(agent.id, agent.costCeiling);
+  // Initialize run state
+  const run: AgentRun = {
+    agentId: agent.id,
+    outcomeId: outcome.name,
+    status: 'running',
+    attempts: 0,
+    tokensSpent: 0,
+    artifacts: [],
+    durationMs: 0,
+  };
+  // Load skills based on agent's toolAccess configuration (Requirement 11.2)
+  const skillRegistry = agent.toolAccess.length > 0
+    ? loadSkillsForAgent(agent.toolAccess)
+    : undefined;
+  // Create model adapter
+  let adapter: ModelAdapter;
+  try {
+    const resolvedApiKey =
+      apiKey ??
+      (agent.modelProvider === 'claude'
+        ? process.env.ANTHROPIC_API_KEY
+        : process.env.OPENAI_API_KEY);
+    if (!resolvedApiKey) {
+      throw new Error(
+        `Missing API key for ${agent.modelProvider}. Set ${agent.modelProvider === 'claude' ? 'ANTHROPIC_API_KEY' : 'OPENAI_API_KEY'
+        } environment variable.`
+      );
+    }
+    adapter = await createAdapter({
+      provider: agent.modelProvider === 'ollama' ? 'openai' : agent.modelProvider,
+      modelId: agent.modelId,
+      apiKey: resolvedApiKey,
+    });
+  } catch (error) {
+    run.status = 'killed';
+    run.error = error instanceof Error ? error.message : String(error);
+    run.durationMs = Date.now() - startTime;
+    return run;
+  }
+  // Build prompt with outcome-specific context
+  const prompt = buildPrompt(agent.prompt, outcome, lead);
+  // Main attempt loop
+  while (run.status === 'running') {
+    // Check kill conditions before each attempt
+    const killReason = checkKillConditions(
+      run.attempts,
+      outcome.maxAttempts,
+      costTracker,
+      startTime,
+      outcome.timeLimitMs,
+      onKillSignal
+    );
+    if (killReason) {
+      run.status = 'killed';
+      run.killReason = killReason;
+      break;
+    }
+    // Execute attempt with retry logic and skill support
+    run.attempts += 1;
+    const attemptResult = await executeAttemptWithRetry(
+      adapter,
+      prompt,
+      agent.id,
+      outcome.name,
+      run.attempts,
+      lead,
+      skillRegistry
+    );
+    // Record token usage (Requirement 10.5)
+    recordUsage(costTracker, attemptResult.tokensUsed);
+    run.tokensSpent = costTracker.tokensSpent;
+    if (attemptResult.success && attemptResult.artifact) {
+      run.artifacts.push(attemptResult.artifact);
+      // Mark as completed - evaluation happens in league system
+      run.status = 'completed';
+    } else if (attemptResult.error) {
+      // Log error but continue if we have attempts left
+      run.error = attemptResult.error;
+      // Check if we should stop due to limits after failed attempt
+      const postAttemptKillReason = checkKillConditions(
+        run.attempts,
+        outcome.maxAttempts,
+        costTracker,
+        startTime,
+        outcome.timeLimitMs,
+        onKillSignal
+      );
+      if (postAttemptKillReason) {
+        run.status = 'killed';
+        run.killReason = postAttemptKillReason;
+      }
+    }
+  }
+  run.durationMs = Date.now() - startTime;
+  return run;
+}
+/**
+ * Creates a mock agent run for testing without making API calls.
+ *
+ * @param config - Agent run configuration
+ * @returns Mock AgentRun result
+ */
+export async function runAgentMock(config: AgentRunConfig): Promise<AgentRun> {
+  const { agent, outcome, lead } = config;
+  const startTime = Date.now();
+  // Simulate processing time deterministically based on agent ID
+  // Use a simple hash of the agent ID to get consistent timing
+  let hash = 0;
+  for (let i = 0; i < agent.id.length; i++) {
+    hash = ((hash << 5) - hash + agent.id.charCodeAt(i)) & 0xffffffff;
+  }
+  const deterministicDelay = 100 + (Math.abs(hash) % 200);
+  await sleep(deterministicDelay);
+  // Create mock artifact content based on outcome
+  let mockContent: string;
+  if (outcome.name === 'swarm_planner') {
+    mockContent = JSON.stringify({
+      tasks: [
+        {
+          id: 'task-1',
+          description: 'Mock task 1: Analyze input data',
+          input: { data: lead },
+          priority: 1
+        },
+        {
+          id: 'task-2',
+          description: 'Mock task 2: Generate response',
+          input: { data: lead },
+          priority: 2
+        }
+      ]
+    });
+  } else {
+    mockContent = `Hello! I've analyzed ${lead.company} and their mission.
+Score: 9/10. Fit: Excellent. Evidence: They are a high-growth product company.
+Summary: Strong ICP match with high revenue potential.
+Recommendation: Prioritize for enterprise outreach.
+Based on our conversation, I understand that you're looking for a solution to improve your sales process. With your team of ${lead.companySize} employees, you're at the perfect size to benefit from our platform.
+I'd love to schedule a demo to show you how we can help. The pricing starts at $99/month for teams your size, and we can discuss next steps during a call.
+Could you confirm your email at ${lead.email} so I can send over the meeting invite?`;
+  }
+  const artifact = createArtifact(
+    agent.id,
+    outcome.name,
+    1,
+    mockContent,
+    lead
+  );
+  return {
+    agentId: agent.id,
+    outcomeId: outcome.name,
+    status: 'completed',
+    attempts: 1,
+    tokensSpent: 500,
+    artifacts: [artifact],
+    durationMs: Date.now() - startTime,
+  };
+}