npm - outcome-cli - Versions diffs - 1.0.0 - Mend

outcome-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (113) hide show

package/README.md +261 -0
package/package.json +95 -0
package/src/agents/README.md +139 -0
package/src/agents/adapters/anthropic.adapter.ts +166 -0
package/src/agents/adapters/dalle.adapter.ts +145 -0
package/src/agents/adapters/gemini.adapter.ts +134 -0
package/src/agents/adapters/imagen.adapter.ts +106 -0
package/src/agents/adapters/nano-banana.adapter.ts +129 -0
package/src/agents/adapters/openai.adapter.ts +165 -0
package/src/agents/adapters/veo.adapter.ts +130 -0
package/src/agents/agent.schema.property.test.ts +379 -0
package/src/agents/agent.schema.test.ts +148 -0
package/src/agents/agent.schema.ts +263 -0
package/src/agents/index.ts +60 -0
package/src/agents/registered-agent.schema.ts +356 -0
package/src/agents/registry.ts +97 -0
package/src/agents/tournament-configs.property.test.ts +266 -0
package/src/cli/README.md +145 -0
package/src/cli/commands/define.ts +79 -0
package/src/cli/commands/list.ts +46 -0
package/src/cli/commands/logs.ts +83 -0
package/src/cli/commands/run.ts +416 -0
package/src/cli/commands/verify.ts +110 -0
package/src/cli/index.ts +81 -0
package/src/config/README.md +128 -0
package/src/config/env.ts +262 -0
package/src/config/index.ts +19 -0
package/src/eval/README.md +318 -0
package/src/eval/ai-judge.test.ts +435 -0
package/src/eval/ai-judge.ts +368 -0
package/src/eval/code-validators.ts +414 -0
package/src/eval/evaluateOutcome.property.test.ts +1174 -0
package/src/eval/evaluateOutcome.ts +591 -0
package/src/eval/immigration-validators.ts +122 -0
package/src/eval/index.ts +90 -0
package/src/eval/judge-cache.ts +402 -0
package/src/eval/tournament-validators.property.test.ts +439 -0
package/src/eval/validators.property.test.ts +1118 -0
package/src/eval/validators.ts +1199 -0
package/src/eval/weighted-scorer.ts +285 -0
package/src/index.ts +17 -0
package/src/league/README.md +188 -0
package/src/league/health-check.ts +353 -0
package/src/league/index.ts +93 -0
package/src/league/killAgent.ts +151 -0
package/src/league/league.test.ts +1151 -0
package/src/league/runLeague.ts +843 -0
package/src/league/scoreAgent.ts +175 -0
package/src/modules/omnibridge/__tests__/.gitkeep +1 -0
package/src/modules/omnibridge/__tests__/auth-tunnel.property.test.ts +524 -0
package/src/modules/omnibridge/__tests__/deterministic-logger.property.test.ts +965 -0
package/src/modules/omnibridge/__tests__/ghost-api.property.test.ts +461 -0
package/src/modules/omnibridge/__tests__/omnibridge-integration.test.ts +542 -0
package/src/modules/omnibridge/__tests__/parallel-executor.property.test.ts +671 -0
package/src/modules/omnibridge/__tests__/semantic-normalizer.property.test.ts +521 -0
package/src/modules/omnibridge/__tests__/semantic-normalizer.test.ts +254 -0
package/src/modules/omnibridge/__tests__/session-vault.property.test.ts +367 -0
package/src/modules/omnibridge/__tests__/shadow-session.property.test.ts +523 -0
package/src/modules/omnibridge/__tests__/triangulation-engine.property.test.ts +292 -0
package/src/modules/omnibridge/__tests__/verification-engine.property.test.ts +769 -0
package/src/modules/omnibridge/api/.gitkeep +1 -0
package/src/modules/omnibridge/api/ghost-api.ts +1087 -0
package/src/modules/omnibridge/auth/.gitkeep +1 -0
package/src/modules/omnibridge/auth/auth-tunnel.ts +843 -0
package/src/modules/omnibridge/auth/session-vault.ts +577 -0
package/src/modules/omnibridge/core/.gitkeep +1 -0
package/src/modules/omnibridge/core/semantic-normalizer.ts +702 -0
package/src/modules/omnibridge/core/triangulation-engine.ts +530 -0
package/src/modules/omnibridge/core/types.ts +610 -0
package/src/modules/omnibridge/execution/.gitkeep +1 -0
package/src/modules/omnibridge/execution/deterministic-logger.ts +629 -0
package/src/modules/omnibridge/execution/parallel-executor.ts +542 -0
package/src/modules/omnibridge/execution/shadow-session.ts +794 -0
package/src/modules/omnibridge/index.ts +212 -0
package/src/modules/omnibridge/omnibridge.ts +510 -0
package/src/modules/omnibridge/verification/.gitkeep +1 -0
package/src/modules/omnibridge/verification/verification-engine.ts +783 -0
package/src/outcomes/README.md +75 -0
package/src/outcomes/acquire-pilot-customer.ts +297 -0
package/src/outcomes/code-delivery-outcomes.ts +89 -0
package/src/outcomes/code-outcomes.ts +256 -0
package/src/outcomes/code_review_battle.test.ts +135 -0
package/src/outcomes/code_review_battle.ts +135 -0
package/src/outcomes/cold_email_battle.ts +97 -0
package/src/outcomes/content_creation_battle.ts +160 -0
package/src/outcomes/f1_stem_opt_compliance.ts +61 -0
package/src/outcomes/index.ts +107 -0
package/src/outcomes/lead_gen_battle.test.ts +113 -0
package/src/outcomes/lead_gen_battle.ts +99 -0
package/src/outcomes/outcome.schema.property.test.ts +229 -0
package/src/outcomes/outcome.schema.ts +187 -0
package/src/outcomes/qualified_sales_interest.ts +118 -0
package/src/outcomes/swarm_planner.property.test.ts +370 -0
package/src/outcomes/swarm_planner.ts +96 -0
package/src/outcomes/web_extraction.ts +234 -0
package/src/runtime/README.md +220 -0
package/src/runtime/agentRunner.test.ts +341 -0
package/src/runtime/agentRunner.ts +746 -0
package/src/runtime/claudeAdapter.ts +232 -0
package/src/runtime/costTracker.ts +123 -0
package/src/runtime/index.ts +34 -0
package/src/runtime/modelAdapter.property.test.ts +305 -0
package/src/runtime/modelAdapter.ts +144 -0
package/src/runtime/openaiAdapter.ts +235 -0
package/src/utils/README.md +122 -0
package/src/utils/command-runner.ts +134 -0
package/src/utils/cost-guard.ts +379 -0
package/src/utils/errors.test.ts +290 -0
package/src/utils/errors.ts +442 -0
package/src/utils/index.ts +37 -0
package/src/utils/logger.test.ts +361 -0
package/src/utils/logger.ts +419 -0
package/src/utils/output-parsers.ts +216 -0

package/src/outcomes/code-outcomes.ts ADDED Viewed

@@ -0,0 +1,256 @@
+/**
+ * Code Outcomes - SWE-bench style outcome definitions
+ *
+ * Defines outcome schemas for code generation tasks that integrate
+ * with our existing bounty and evaluation system.
+ *
+ * @module outcomes/code-outcomes
+ */
+import type { TestCase } from '../eval/code-validators.js';
+/**
+ * Code-specific outcome for SWE-bench style tasks.
+ * Extends our standard outcome pattern for code generation.
+ */
+export interface CodeOutcome {
+  /** Unique identifier for this outcome */
+  name: string;
+  /** Human-readable description */
+  description: string;
+  /** Category: bug-fix, feature, refactor, optimization */
+  category: 'bug-fix' | 'feature' | 'refactor' | 'optimization' | 'test';
+  /** Programming language */
+  language: 'python' | 'typescript' | 'javascript' | 'go' | 'rust';
+  /** Difficulty level (maps to training levels) */
+  difficulty: 1 | 2 | 3 | 4 | 5;
+  /** Payout amount in USD for successful completion */
+  payoutAmount: number;
+  /** The issue/problem description (SWE-bench style) */
+  issueDescription: string;
+  /** Keywords that solution should address */
+  issueKeywords: string[];
+  /** Optional: Starting code/context */
+  starterCode?: string;
+  /** Optional: File paths involved */
+  filePaths?: string[];
+  /** Test cases for validation */
+  testCases: TestCase[];
+  /** Required syntax elements */
+  requiredSyntax?: string[];
+  /** Structure requirements */
+  structureRequirements?: {
+    mustHaveFunction?: boolean;
+    mustHaveClass?: boolean;
+    mustHaveImports?: boolean;
+    minLines?: number;
+    maxLines?: number;
+  };
+  /** Quality requirements */
+  qualityRequirements?: {
+    mustHaveErrorHandling?: boolean;
+    mustHaveComments?: boolean;
+    maxComplexity?: number;
+    noConsoleLog?: boolean;
+  };
+}
+/**
+ * Pre-defined SWE-bench style outcomes for WAIESL battles.
+ * These map to different training levels and agent specializations.
+ */
+export const CODE_OUTCOMES: Record<string, CodeOutcome> = {
+  // Level 1: Basic code tasks
+  'fix-null-check': {
+    name: 'fix-null-check',
+    description: 'Add proper null/undefined checking to prevent runtime errors',
+    category: 'bug-fix',
+    language: 'typescript',
+    difficulty: 1,
+    payoutAmount: 50,
+    issueDescription: `
+      Bug Report: Application crashes with "Cannot read property of undefined"
+      When users submit a form with optional fields left empty, the application crashes.
+      The processUserData function doesn't handle cases where optional fields are undefined.
+      Expected: Function should gracefully handle undefined values
+      Actual: TypeError: Cannot read property 'trim' of undefined
+    `,
+    issueKeywords: ['null', 'undefined', 'optional', 'check', 'trim'],
+    testCases: [
+      { name: 'handles undefined', input: undefined, expectedOutput: '', type: 'edge-case' },
+      { name: 'handles null', input: null, expectedOutput: '', type: 'edge-case' },
+      { name: 'handles valid string', input: '  hello  ', expectedOutput: 'hello', type: 'unit' },
+    ],
+    requiredSyntax: ['if', '===', 'undefined'],
+    structureRequirements: { mustHaveFunction: true, minLines: 3 },
+  },
+  // Level 2: Intermediate bug fixes
+  'fix-async-race-condition': {
+    name: 'fix-async-race-condition',
+    description: 'Fix race condition in async data fetching',
+    category: 'bug-fix',
+    language: 'typescript',
+    difficulty: 2,
+    payoutAmount: 100,
+    issueDescription: `
+      Bug Report: Stale data displayed after rapid navigation
+      When users quickly navigate between pages, sometimes stale data from a previous
+      request is displayed instead of the current page's data.
+      Root cause: Multiple concurrent fetch requests without cancellation.
+      Expected: Only show data from the most recent request
+      Actual: Random stale data appears
+    `,
+    issueKeywords: ['async', 'race', 'abort', 'cancel', 'fetch', 'stale'],
+    testCases: [
+      { name: 'cancels previous request', input: { requests: 3 }, expectedOutput: { completed: 1 }, type: 'unit' },
+      { name: 'returns latest data', input: { sequence: [1, 2, 3] }, expectedOutput: 3, type: 'integration' },
+    ],
+    requiredSyntax: ['async', 'await', 'AbortController'],
+    structureRequirements: { mustHaveFunction: true, mustHaveImports: false, minLines: 10 },
+    qualityRequirements: { mustHaveErrorHandling: true },
+  },
+  // Level 3: Feature implementation
+  'implement-retry-logic': {
+    name: 'implement-retry-logic',
+    description: 'Implement exponential backoff retry logic for API calls',
+    category: 'feature',
+    language: 'typescript',
+    difficulty: 3,
+    payoutAmount: 200,
+    issueDescription: `
+      Feature Request: Add retry logic with exponential backoff
+      API calls sometimes fail due to transient network issues. We need a retry
+      mechanism that:
+      1. Retries failed requests up to 3 times
+      2. Uses exponential backoff (1s, 2s, 4s delays)
+      3. Only retries on specific error codes (500, 502, 503, 504)
+      4. Throws after all retries exhausted
+      Should be a reusable utility function.
+    `,
+    issueKeywords: ['retry', 'exponential', 'backoff', 'delay', 'attempts', 'error'],
+    testCases: [
+      { name: 'succeeds on first try', input: { failCount: 0 }, expectedOutput: { attempts: 1 }, type: 'unit' },
+      { name: 'retries on failure', input: { failCount: 2 }, expectedOutput: { attempts: 3 }, type: 'unit' },
+      { name: 'throws after max retries', input: { failCount: 5 }, expectedOutput: { error: true }, type: 'edge-case' },
+    ],
+    requiredSyntax: ['async', 'await', 'for', 'throw', 'delay'],
+    structureRequirements: { mustHaveFunction: true, minLines: 15, maxLines: 50 },
+    qualityRequirements: { mustHaveErrorHandling: true, mustHaveComments: true },
+  },
+  // Level 4: Complex refactoring
+  'refactor-to-strategy-pattern': {
+    name: 'refactor-to-strategy-pattern',
+    description: 'Refactor switch statement to Strategy pattern',
+    category: 'refactor',
+    language: 'typescript',
+    difficulty: 4,
+    payoutAmount: 350,
+    issueDescription: `
+      Refactoring Request: Replace payment switch with Strategy pattern
+      Current code has a massive switch statement for payment processing:
+      - CreditCard
+      - PayPal
+      - Crypto
+      - BankTransfer
+      Each case has 50+ lines. Adding new payment methods requires modifying
+      the switch. This violates Open/Closed principle.
+      Refactor to use Strategy pattern:
+      1. Create PaymentStrategy interface
+      2. Implement concrete strategies for each payment type
+      3. Create PaymentContext that uses strategies
+      4. Ensure existing tests still pass
+    `,
+    issueKeywords: ['strategy', 'pattern', 'interface', 'class', 'payment', 'refactor'],
+    testCases: [
+      { name: 'processes credit card', input: { type: 'credit', amount: 100 }, expectedOutput: { success: true }, type: 'integration' },
+      { name: 'processes paypal', input: { type: 'paypal', amount: 50 }, expectedOutput: { success: true }, type: 'integration' },
+      { name: 'extensible for new types', input: { type: 'new' }, expectedOutput: { extensible: true }, type: 'unit' },
+    ],
+    requiredSyntax: ['interface', 'class', 'implements', 'constructor'],
+    structureRequirements: { mustHaveClass: true, mustHaveFunction: true, minLines: 40 },
+    qualityRequirements: { mustHaveComments: true, maxComplexity: 10 },
+  },
+  // Level 5: Elite challenge (Codeon battle)
+  'optimize-algorithm-complexity': {
+    name: 'optimize-algorithm-complexity',
+    description: 'Optimize O(n²) algorithm to O(n log n) or better',
+    category: 'optimization',
+    language: 'typescript',
+    difficulty: 5,
+    payoutAmount: 500,
+    issueDescription: `
+      Performance Critical: findDuplicates is too slow on large datasets
+      Current implementation uses nested loops - O(n²) complexity.
+      With 100k items, it takes 30+ seconds.
+      Requirements:
+      1. Maintain same function signature
+      2. Achieve O(n log n) or O(n) complexity
+      3. Handle edge cases (empty array, single element, all duplicates)
+      4. Memory usage should be reasonable (not O(n²) space)
+      Current (slow) implementation:
+      function findDuplicates(arr: number[]): number[] {
+        const duplicates = [];
+        for (let i = 0; i < arr.length; i++) {
+          for (let j = i + 1; j < arr.length; j++) {
+            if (arr[i] === arr[j] && !duplicates.includes(arr[i])) {
+              duplicates.push(arr[i]);
+            }
+          }
+        }
+        return duplicates;
+      }
+    `,
+    issueKeywords: ['optimize', 'complexity', 'O(n)', 'performance', 'Set', 'Map', 'hash'],
+    testCases: [
+      { name: 'finds duplicates', input: [1, 2, 2, 3, 3, 3], expectedOutput: [2, 3], type: 'unit' },
+      { name: 'handles empty', input: [], expectedOutput: [], type: 'edge-case' },
+      { name: 'handles no duplicates', input: [1, 2, 3], expectedOutput: [], type: 'edge-case' },
+      { name: 'handles large array efficiently', input: { size: 100000 }, expectedOutput: { timeMs: '<1000' }, type: 'integration' },
+    ],
+    requiredSyntax: ['Set', 'Map', 'function'],
+    structureRequirements: { mustHaveFunction: true, minLines: 5, maxLines: 30 },
+    qualityRequirements: { mustHaveComments: true, noConsoleLog: true },
+  },
+};
+/**
+ * Get outcomes by difficulty level (maps to training levels).
+ */
+export function getCodeOutcomesByLevel(level: 1 | 2 | 3 | 4 | 5): CodeOutcome[] {
+  return Object.values(CODE_OUTCOMES).filter((outcome) => outcome.difficulty === level);
+}
+/**
+ * Get outcomes by category.
+ */
+export function getCodeOutcomesByCategory(
+  category: CodeOutcome['category']
+): CodeOutcome[] {
+  return Object.values(CODE_OUTCOMES).filter((outcome) => outcome.category === category);
+}
+/**
+ * Get outcomes by language.
+ */
+export function getCodeOutcomesByLanguage(
+  language: CodeOutcome['language']
+): CodeOutcome[] {
+  return Object.values(CODE_OUTCOMES).filter((outcome) => outcome.language === language);
+}

package/src/outcomes/code_review_battle.test.ts ADDED Viewed

@@ -0,0 +1,135 @@
+/**
+ * Code Review Battle Outcome Tests
+ *
+ * Verifies the code review battle outcome configuration.
+ *
+ * @module outcomes/code_review_battle.test
+ */
+import { describe, it, expect } from 'vitest';
+import { validateOutcome } from './outcome.schema.js';
+import {
+  codeReviewBattle,
+  CODE_REVIEW_PAYOUT,
+  CODE_REVIEW_MAX_ATTEMPTS,
+  CODE_REVIEW_TIME_LIMIT_MS,
+} from './code_review_battle.js';
+describe('Code Review Battle Outcome', () => {
+  describe('Constants', () => {
+    it('should have payout amount of $50', () => {
+      expect(CODE_REVIEW_PAYOUT).toBe(50);
+    });
+    it('should have max attempts of 3', () => {
+      expect(CODE_REVIEW_MAX_ATTEMPTS).toBe(3);
+    });
+    it('should have time limit of 300000ms (5 minutes)', () => {
+      expect(CODE_REVIEW_TIME_LIMIT_MS).toBe(300000);
+    });
+  });
+  describe('Outcome Configuration', () => {
+    it('should have correct payout amount', () => {
+      expect(codeReviewBattle.payoutAmount).toBe(50);
+    });
+    it('should have correct max attempts', () => {
+      expect(codeReviewBattle.maxAttempts).toBe(3);
+    });
+    it('should have correct time limit', () => {
+      expect(codeReviewBattle.timeLimitMs).toBe(300000);
+    });
+    it('should have name "code_review_battle"', () => {
+      expect(codeReviewBattle.name).toBe('code_review_battle');
+    });
+    it('should have a description', () => {
+      expect(codeReviewBattle.description).toBeTruthy();
+      expect(typeof codeReviewBattle.description).toBe('string');
+    });
+  });
+  describe('Schema Validation', () => {
+    it('should validate against OutcomeSchema', () => {
+      const result = validateOutcome(codeReviewBattle);
+      expect(result.valid).toBe(true);
+      expect(result.errors).toHaveLength(0);
+    });
+  });
+  describe('Success Criteria', () => {
+    it('should have 4 success criteria', () => {
+      expect(codeReviewBattle.successCriteria).toHaveLength(4);
+    });
+    it('should include security_vulnerability criterion', () => {
+      const criterion = codeReviewBattle.successCriteria.find(
+        (c) => c.name === 'security_vulnerability'
+      );
+      expect(criterion).toBeDefined();
+      expect(criterion?.validator).toBe('validateSecurityIssue');
+      expect(criterion?.params).toEqual({ requiredSeverity: 'CRITICAL' });
+    });
+    it('should include performance_bottleneck criterion', () => {
+      const criterion = codeReviewBattle.successCriteria.find(
+        (c) => c.name === 'performance_bottleneck'
+      );
+      expect(criterion).toBeDefined();
+      expect(criterion?.validator).toBe('validatePerformanceIssue');
+      expect(criterion?.params).toEqual({});
+    });
+    it('should include zero_noise criterion', () => {
+      const criterion = codeReviewBattle.successCriteria.find(
+        (c) => c.name === 'zero_noise'
+      );
+      expect(criterion).toBeDefined();
+      expect(criterion?.validator).toBe('validateNoiseFreeness');
+      expect(criterion?.params).toEqual({});
+    });
+    it('should include complexity_reduction criterion', () => {
+      const criterion = codeReviewBattle.successCriteria.find(
+        (c) => c.name === 'complexity_reduction'
+      );
+      expect(criterion).toBeDefined();
+      expect(criterion?.validator).toBe('validateComplexityReduction');
+      expect(criterion?.params).toEqual({ minReduction: 2 });
+    });
+  });
+  describe('Failure Reasons', () => {
+    it('should have 4 failure reasons', () => {
+      expect(codeReviewBattle.failureReasons).toHaveLength(4);
+    });
+    it('should include security vulnerability failure reason', () => {
+      expect(codeReviewBattle.failureReasons).toContain(
+        'No critical security vulnerability identified (SQLi/XSS)'
+      );
+    });
+    it('should include performance bottleneck failure reason', () => {
+      expect(codeReviewBattle.failureReasons).toContain(
+        'No performance bottleneck identified (N+1 queries)'
+      );
+    });
+    it('should include noise failure reason', () => {
+      expect(codeReviewBattle.failureReasons).toContain(
+        'Comments found outside source diff boundaries (noise)'
+      );
+    });
+    it('should include complexity reduction failure reason', () => {
+      expect(codeReviewBattle.failureReasons).toContain(
+        'Refactor suggestion does not reduce complexity by at least 2 points'
+      );
+    });
+  });
+});

package/src/outcomes/code_review_battle.ts ADDED Viewed

@@ -0,0 +1,135 @@
+/**
+ * Code Review Battle Outcome
+ *
+ * Tournament Seed bounty for the WAI Championship Q1 Tournament.
+ * Agents compete to perform expert code review identifying security
+ * vulnerabilities, performance bottlenecks, and complexity-reducing refactors.
+ *
+ * @module outcomes/code_review_battle
+ * @see Requirements 1.1, 1.2, 1.3, 8.1
+ */
+import type { Outcome } from './outcome.schema.js';
+/**
+ * Payout amount for successfully completing a code review battle.
+ * Set to $50 as per Requirements 1.1.
+ */
+export const CODE_REVIEW_PAYOUT = 50;
+/**
+ * Maximum number of attempts an agent can make to achieve this outcome.
+ * Set to 3 as per Requirements 1.2.
+ */
+export const CODE_REVIEW_MAX_ATTEMPTS = 3;
+/**
+ * Time limit for achieving the outcome (5 minutes in milliseconds).
+ * Set to 300000ms as per Requirements 1.3.
+ */
+export const CODE_REVIEW_TIME_LIMIT_MS = 300000;
+/**
+ * Represents a code review issue identified by an agent.
+ */
+export interface CodeReviewIssue {
+  /** Type of issue: security, performance, style, or logic */
+  type: 'security' | 'performance' | 'style' | 'logic';
+  /** Severity level of the issue */
+  severity: 'CRITICAL' | 'HIGH' | 'MEDIUM' | 'LOW';
+  /** Description of the issue */
+  description: string;
+  /** Optional line number where the issue was found */
+  lineNumber?: number;
+}
+/**
+ * Represents a code review comment on a specific line.
+ */
+export interface CodeReviewComment {
+  /** The exact content of the line being commented on (must be in source diff) */
+  lineContent: string;
+  /** The review comment */
+  comment: string;
+  /** Line number in the source diff */
+  lineNumber: number;
+}
+/**
+ * Represents a refactoring suggestion that reduces cyclomatic complexity.
+ */
+export interface RefactorSuggestion {
+  /** Original cyclomatic complexity of the code */
+  originalComplexity: number;
+  /** Suggested cyclomatic complexity after refactoring */
+  suggestedComplexity: number;
+  /** Description of the refactoring suggestion */
+  description: string;
+}
+/**
+ * Represents the complete artifact produced by an agent for code review.
+ */
+export interface CodeReviewArtifact {
+  /** List of issues identified in the code */
+  issues: CodeReviewIssue[];
+  /** List of comments on specific lines */
+  comments: CodeReviewComment[];
+  /** Optional refactoring suggestion */
+  refactorSuggestion?: RefactorSuggestion;
+}
+/**
+ * Code Review Battle Outcome Definition
+ *
+ * Success requires meeting ALL 4 criteria:
+ * 1. Identify at least one CRITICAL security vulnerability (SQLi/XSS)
+ * 2. Identify at least one performance bottleneck (N+1 queries)
+ * 3. All comments must reside within source diff boundaries (Zero-Noise Standard)
+ * 4. Suggest a refactor that reduces cyclomatic complexity by at least 2 points
+ *
+ * @see Requirements 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 8.1
+ */
+export const codeReviewBattle: Outcome = {
+  name: 'code_review_battle',
+  description:
+    'Expert Code Review Challenge - Identify security vulnerabilities, performance bottlenecks, and suggest complexity-reducing refactors with zero noise.',
+  payoutAmount: CODE_REVIEW_PAYOUT,
+  maxAttempts: CODE_REVIEW_MAX_ATTEMPTS,
+  timeLimitMs: CODE_REVIEW_TIME_LIMIT_MS,
+  successCriteria: [
+    {
+      // Requirement 1.4: Verify at least one security vulnerability with CRITICAL severity
+      name: 'security_vulnerability',
+      validator: 'validateSecurityIssue',
+      params: { requiredSeverity: 'CRITICAL' },
+    },
+    {
+      // Requirement 1.5: Verify at least one performance bottleneck
+      name: 'performance_bottleneck',
+      validator: 'validatePerformanceIssue',
+      params: {},
+    },
+    {
+      // Requirement 1.6: Verify all comments reside within source diff boundaries
+      name: 'zero_noise',
+      validator: 'validateNoiseFreeness',
+      params: {},
+    },
+    {
+      // Requirement 1.7: Verify refactor suggestion reduces complexity by at least 2
+      name: 'complexity_reduction',
+      validator: 'validateComplexityReduction',
+      params: { minReduction: 2 },
+    },
+  ],
+  failureReasons: [
+    'No critical security vulnerability identified (SQLi/XSS)',
+    'No performance bottleneck identified (N+1 queries)',
+    'Comments found outside source diff boundaries (noise)',
+    'Refactor suggestion does not reduce complexity by at least 2 points',
+  ],
+};
+export default codeReviewBattle;

package/src/outcomes/cold_email_battle.ts ADDED Viewed

@@ -0,0 +1,97 @@
+/**
+ * Cold Email Battle Outcome
+ *
+ * Viral content battle: Claude vs GPT-4 vs Gemini
+ * Agents compete to write the best cold email to a YC founder.
+ *
+ * @module outcomes/cold_email_battle
+ */
+import type { Outcome } from './outcome.schema.js';
+/**
+ * Target prospect for the cold email battle
+ */
+export interface ColdEmailTarget {
+  name: string;
+  company: string;
+  role: string;
+  companyDescription: string;
+  recentNews?: string;
+  painPoints?: string[];
+}
+/**
+ * Cold email artifact produced by an agent
+ */
+export interface ColdEmailArtifact {
+  subject: string;
+  body: string;
+  wordCount: number;
+  personalizationScore: number;
+  callToAction: string;
+}
+/**
+ * Default target for the viral battle
+ */
+export const DEFAULT_TARGET: ColdEmailTarget = {
+  name: 'Sarah Chen',
+  company: 'TechFlow AI',
+  role: 'CEO & Co-founder',
+  companyDescription: 'YC W24 startup building AI-powered workflow automation for enterprise teams',
+  recentNews: 'Just raised $4M seed round, hiring aggressively',
+  painPoints: [
+    'Scaling engineering team while maintaining quality',
+    'Evaluating AI tools for internal use',
+    'Building developer community around their product'
+  ]
+};
+/**
+ * Cold Email Battle Outcome Definition
+ *
+ * Success criteria:
+ * 1. Subject line under 50 characters
+ * 2. Body between 50-150 words
+ * 3. Contains personalization (mentions company/role/news)
+ * 4. Has clear call-to-action
+ * 5. Professional tone (no spam triggers)
+ */
+export const coldEmailBattle: Outcome = {
+  name: 'cold_email_battle',
+  description: 'Cold Email Showdown - Write the most compelling cold email to a YC founder. Judged on personalization, clarity, and likelihood to get a response.',
+  payoutAmount: 0, // Sandbox mode
+  maxAttempts: 1,
+  timeLimitMs: 60000,
+  successCriteria: [
+    {
+      name: 'subject_length',
+      validator: 'validateSubjectLength',
+      params: { maxLength: 50 }
+    },
+    {
+      name: 'body_word_count',
+      validator: 'validateWordCount',
+      params: { min: 50, max: 150 }
+    },
+    {
+      name: 'has_personalization',
+      validator: 'validatePersonalization',
+      params: { requiredElements: ['company', 'role'] }
+    },
+    {
+      name: 'has_cta',
+      validator: 'validateCallToAction',
+      params: {}
+    }
+  ],
+  failureReasons: [
+    'Subject line too long (max 50 characters)',
+    'Body too short or too long (50-150 words required)',
+    'Missing personalization - must mention company or role',
+    'No clear call-to-action'
+  ]
+};
+export default coldEmailBattle;