npm - outcome-cli - Versions diffs - 1.0.0 - Mend

outcome-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (113) hide show

package/README.md +261 -0
package/package.json +95 -0
package/src/agents/README.md +139 -0
package/src/agents/adapters/anthropic.adapter.ts +166 -0
package/src/agents/adapters/dalle.adapter.ts +145 -0
package/src/agents/adapters/gemini.adapter.ts +134 -0
package/src/agents/adapters/imagen.adapter.ts +106 -0
package/src/agents/adapters/nano-banana.adapter.ts +129 -0
package/src/agents/adapters/openai.adapter.ts +165 -0
package/src/agents/adapters/veo.adapter.ts +130 -0
package/src/agents/agent.schema.property.test.ts +379 -0
package/src/agents/agent.schema.test.ts +148 -0
package/src/agents/agent.schema.ts +263 -0
package/src/agents/index.ts +60 -0
package/src/agents/registered-agent.schema.ts +356 -0
package/src/agents/registry.ts +97 -0
package/src/agents/tournament-configs.property.test.ts +266 -0
package/src/cli/README.md +145 -0
package/src/cli/commands/define.ts +79 -0
package/src/cli/commands/list.ts +46 -0
package/src/cli/commands/logs.ts +83 -0
package/src/cli/commands/run.ts +416 -0
package/src/cli/commands/verify.ts +110 -0
package/src/cli/index.ts +81 -0
package/src/config/README.md +128 -0
package/src/config/env.ts +262 -0
package/src/config/index.ts +19 -0
package/src/eval/README.md +318 -0
package/src/eval/ai-judge.test.ts +435 -0
package/src/eval/ai-judge.ts +368 -0
package/src/eval/code-validators.ts +414 -0
package/src/eval/evaluateOutcome.property.test.ts +1174 -0
package/src/eval/evaluateOutcome.ts +591 -0
package/src/eval/immigration-validators.ts +122 -0
package/src/eval/index.ts +90 -0
package/src/eval/judge-cache.ts +402 -0
package/src/eval/tournament-validators.property.test.ts +439 -0
package/src/eval/validators.property.test.ts +1118 -0
package/src/eval/validators.ts +1199 -0
package/src/eval/weighted-scorer.ts +285 -0
package/src/index.ts +17 -0
package/src/league/README.md +188 -0
package/src/league/health-check.ts +353 -0
package/src/league/index.ts +93 -0
package/src/league/killAgent.ts +151 -0
package/src/league/league.test.ts +1151 -0
package/src/league/runLeague.ts +843 -0
package/src/league/scoreAgent.ts +175 -0
package/src/modules/omnibridge/__tests__/.gitkeep +1 -0
package/src/modules/omnibridge/__tests__/auth-tunnel.property.test.ts +524 -0
package/src/modules/omnibridge/__tests__/deterministic-logger.property.test.ts +965 -0
package/src/modules/omnibridge/__tests__/ghost-api.property.test.ts +461 -0
package/src/modules/omnibridge/__tests__/omnibridge-integration.test.ts +542 -0
package/src/modules/omnibridge/__tests__/parallel-executor.property.test.ts +671 -0
package/src/modules/omnibridge/__tests__/semantic-normalizer.property.test.ts +521 -0
package/src/modules/omnibridge/__tests__/semantic-normalizer.test.ts +254 -0
package/src/modules/omnibridge/__tests__/session-vault.property.test.ts +367 -0
package/src/modules/omnibridge/__tests__/shadow-session.property.test.ts +523 -0
package/src/modules/omnibridge/__tests__/triangulation-engine.property.test.ts +292 -0
package/src/modules/omnibridge/__tests__/verification-engine.property.test.ts +769 -0
package/src/modules/omnibridge/api/.gitkeep +1 -0
package/src/modules/omnibridge/api/ghost-api.ts +1087 -0
package/src/modules/omnibridge/auth/.gitkeep +1 -0
package/src/modules/omnibridge/auth/auth-tunnel.ts +843 -0
package/src/modules/omnibridge/auth/session-vault.ts +577 -0
package/src/modules/omnibridge/core/.gitkeep +1 -0
package/src/modules/omnibridge/core/semantic-normalizer.ts +702 -0
package/src/modules/omnibridge/core/triangulation-engine.ts +530 -0
package/src/modules/omnibridge/core/types.ts +610 -0
package/src/modules/omnibridge/execution/.gitkeep +1 -0
package/src/modules/omnibridge/execution/deterministic-logger.ts +629 -0
package/src/modules/omnibridge/execution/parallel-executor.ts +542 -0
package/src/modules/omnibridge/execution/shadow-session.ts +794 -0
package/src/modules/omnibridge/index.ts +212 -0
package/src/modules/omnibridge/omnibridge.ts +510 -0
package/src/modules/omnibridge/verification/.gitkeep +1 -0
package/src/modules/omnibridge/verification/verification-engine.ts +783 -0
package/src/outcomes/README.md +75 -0
package/src/outcomes/acquire-pilot-customer.ts +297 -0
package/src/outcomes/code-delivery-outcomes.ts +89 -0
package/src/outcomes/code-outcomes.ts +256 -0
package/src/outcomes/code_review_battle.test.ts +135 -0
package/src/outcomes/code_review_battle.ts +135 -0
package/src/outcomes/cold_email_battle.ts +97 -0
package/src/outcomes/content_creation_battle.ts +160 -0
package/src/outcomes/f1_stem_opt_compliance.ts +61 -0
package/src/outcomes/index.ts +107 -0
package/src/outcomes/lead_gen_battle.test.ts +113 -0
package/src/outcomes/lead_gen_battle.ts +99 -0
package/src/outcomes/outcome.schema.property.test.ts +229 -0
package/src/outcomes/outcome.schema.ts +187 -0
package/src/outcomes/qualified_sales_interest.ts +118 -0
package/src/outcomes/swarm_planner.property.test.ts +370 -0
package/src/outcomes/swarm_planner.ts +96 -0
package/src/outcomes/web_extraction.ts +234 -0
package/src/runtime/README.md +220 -0
package/src/runtime/agentRunner.test.ts +341 -0
package/src/runtime/agentRunner.ts +746 -0
package/src/runtime/claudeAdapter.ts +232 -0
package/src/runtime/costTracker.ts +123 -0
package/src/runtime/index.ts +34 -0
package/src/runtime/modelAdapter.property.test.ts +305 -0
package/src/runtime/modelAdapter.ts +144 -0
package/src/runtime/openaiAdapter.ts +235 -0
package/src/utils/README.md +122 -0
package/src/utils/command-runner.ts +134 -0
package/src/utils/cost-guard.ts +379 -0
package/src/utils/errors.test.ts +290 -0
package/src/utils/errors.ts +442 -0
package/src/utils/index.ts +37 -0
package/src/utils/logger.test.ts +361 -0
package/src/utils/logger.ts +419 -0
package/src/utils/output-parsers.ts +216 -0

package/src/eval/evaluateOutcome.ts ADDED Viewed

@@ -0,0 +1,591 @@
+/**
+ * Evaluation Orchestration - Binary evaluation of agent artifacts
+ *
+ * Evaluates whether an agent artifact meets all success criteria defined
+ * in an outcome. Returns exactly SUCCESS or FAILURE with structured reasons.
+ *
+ * @module eval/evaluateOutcome
+ */
+import type { Outcome, SuccessCriterion } from '../outcomes/outcome.schema.js';
+import {
+  type ValidationResult,
+  validateBuyingIntent,
+  validateCompanySize,
+  validateRole,
+  validateMessageLength,
+  validateEmail,
+  validateSecurityIssue,
+  validatePerformanceIssue,
+  validateNoiseFreeness,
+  validateComplexityReduction,
+  validateLinkedIn,
+  validateLeadGenPrecision,
+  validateCompanyDataset,
+  type CompanyDataset,
+  validateTestsPass,
+  validateBuilds,
+  validateLintClean,
+  validateBenchmark,
+  validateSecurityScan,
+} from './validators.js';
+import {
+  validateI983RequiredFields,
+  validateOPTDateRange,
+  validateEVerifyFormat,
+  validateTrainingDescriptionLength,
+} from './immigration-validators.js';
+/**
+ * Result of evaluating a single criterion.
+ */
+export interface CriterionResult {
+  /** Name of the criterion that was evaluated */
+  name: string;
+  /** Whether the criterion passed */
+  passed: boolean;
+  /** Human-readable reason for the result */
+  reason: string;
+}
+/**
+ * Content produced by an agent for evaluation.
+ */
+export interface ArtifactContent {
+  /** The message text generated by the agent */
+  message?: string;
+  /** Target email address */
+  targetEmail?: string;
+  /** Target company name */
+  targetCompany?: string;
+  /** Target company size (number of employees) */
+  targetCompanySize?: number;
+  /** Target person's role */
+  targetRole?: string;
+  // Image/Video Generation fields
+  type?: 'image' | 'video' | 'text' | 'code';
+  imageUrl?: string;
+  videoUrl?: string;
+  originalPrompt?: string;
+  revisedPrompt?: string;
+  dimensions?: string;
+  model?: string;
+  style?: string;
+  target?: string;
+  generatedText?: string;
+  duration?: string;
+  // Lead gen fields
+  email?: string;
+  companySize?: number;
+  role?: string;
+  linkedIn?: string;
+  // Code review fields
+  issues?: Array<{
+    type: string;
+    severity: string;
+    description: string;
+  }>;
+  comments?: Array<{
+    lineContent: string;
+    comment: string;
+  }>;
+  refactorSuggestion?: {
+    originalComplexity: number;
+    suggestedComplexity: number;
+    description: string;
+  };
+  // Immigration compliance fields
+  extractedFormData?: Record<string, unknown>;
+  // Code delivery fields
+  repoPath?: string;
+  worktreePath?: string;
+  commitSha?: string;
+  testCommand?: string;
+  buildCommand?: string;
+  lintCommand?: string;
+  benchmarkCommand?: string;
+  securityScanCommand?: string;
+  testResult?: Record<string, unknown>;
+  buildResult?: Record<string, unknown>;
+  lintResult?: Record<string, unknown>;
+  benchmarkResult?: Record<string, unknown>;
+  securityScanResult?: Record<string, unknown>;
+  code?: string;
+  language?: string;
+}
+/**
+ * Agent artifact submitted for evaluation.
+ */
+export interface AgentArtifact {
+  /** ID of the agent that produced this artifact */
+  agentId: string;
+  /** ID of the outcome being attempted */
+  outcomeId: string;
+  /** Attempt number (1-indexed) */
+  attemptNumber: number;
+  /** Content produced by the agent */
+  content: ArtifactContent;
+  /** ISO timestamp when artifact was created */
+  timestamp: string;
+}
+/**
+ * Result of evaluating an agent artifact against an outcome.
+ * Always binary: SUCCESS or FAILURE.
+ */
+export interface EvaluationResult {
+  /** Binary status - exactly SUCCESS or FAILURE */
+  status: 'SUCCESS' | 'FAILURE';
+  /** Human-readable reason for the result */
+  reason: string;
+  /** Results for each individual criterion */
+  criteriaResults: CriterionResult[];
+  /** Verification details included only on SUCCESS */
+  verificationDetails?: Record<string, unknown>;
+}
+/**
+ * Map of validator names to their implementation functions.
+ * Used to dynamically call validators based on outcome configuration.
+ */
+type ValidatorFn = (content: ArtifactContent, params: Record<string, unknown>) => ValidationResult;
+const validatorMap: Record<string, ValidatorFn> = {
+  validateBuyingIntent: (content, params) => {
+    const keywords = params.keywords as string[];
+    return validateBuyingIntent(content.message || '', keywords);
+  },
+  validateCompanySize: (content, params) => {
+    const minimum = params.minimum as number;
+    // Handle both old format (targetCompanySize) and new format (companySize)
+    const companySize = (content as any).companySize ?? content.targetCompanySize;
+    return validateCompanySize(companySize, minimum);
+  },
+  validateRole: (content, params) => {
+    const excludedRoles = params.excludedRoles as string[];
+    // Handle both old format (targetRole) and new format (role)
+    const role = (content as any).role ?? content.targetRole;
+    return validateRole(role, excludedRoles);
+  },
+  validateMessageLength: (content, params) => {
+    const minWords = params.minWords as number;
+    return validateMessageLength(content.message || '', minWords);
+  },
+  validateEmail: (content) => {
+    // Handle both old format (targetEmail) and new format (email)
+    const email = (content as any).email ?? content.targetEmail;
+    return validateEmail(email);
+  },
+  // Code review battle validators
+  validateSecurityIssue: (content, params) => {
+    const requiredSeverity = (params.requiredSeverity as string) || 'CRITICAL';
+    return validateSecurityIssue(content as any, requiredSeverity);
+  },
+  validatePerformanceIssue: (content) => {
+    return validatePerformanceIssue(content as any);
+  },
+  validateNoiseFreeness: (content, _params) => {
+    // For mock mode, we'll assume the source diff contains our mock lines
+    const sourceDiff = `const query = "SELECT * FROM users WHERE username = '" + username + "' AND password = '" + password + "'";
+const permissions = db.query("SELECT * FROM permissions WHERE user_id = " + user.id);`;
+    return validateNoiseFreeness(content as any, sourceDiff);
+  },
+  validateComplexityReduction: (content, params) => {
+    const minReduction = (params.minReduction as number) || 2;
+    return validateComplexityReduction(content as any, minReduction);
+  },
+  // Lead gen battle validators
+  validateLinkedIn: (content) => {
+    return validateLinkedIn((content as any).linkedIn);
+  },
+  validateLeadGenPrecision: (content) => {
+    return validateLeadGenPrecision(content as any);
+  },
+  // Code delivery validators
+  validateTestsPass: (_content, params) => {
+    const c = _content as any;
+    return validateTestsPass(c.testResult ?? c.tests ?? {}, params as { minPassRate?: number });
+  },
+  validateBuilds: (_content) => {
+    const c = _content as any;
+    return validateBuilds(c.buildResult ?? c.build ?? {});
+  },
+  validateLintClean: (_content, params) => {
+    const c = _content as any;
+    return validateLintClean(c.lintResult ?? c.lint ?? {}, params as { allowWarnings?: boolean });
+  },
+  validateBenchmark: (_content, params) => {
+    const c = _content as any;
+    return validateBenchmark(c.benchmarkResult ?? c.benchmark ?? {}, params as { p95ThresholdMs: number });
+  },
+  validateSecurityScan: (_content, params) => {
+    const c = _content as any;
+    return validateSecurityScan(c.securityScanResult ?? c.security ?? {}, params as { maxSeverity?: 'critical' | 'high' | 'medium' | 'low' });
+  },
+  // Dataset validators - Outcome-Verified Marketplace
+  validateCompanyDataset: (content, params) => {
+    const minRows = (params.minRows as number) || 25;
+    return validateCompanyDataset(content as unknown as CompanyDataset, minRows);
+  },
+  // Immigration compliance validators
+  validateI983RequiredFields: (content, params) => {
+    return validateI983RequiredFields(content as Record<string, unknown>, params as { requiredFields: string[] });
+  },
+  validateOPTDateRange: (content, params) => {
+    return validateOPTDateRange(content as Record<string, unknown>, params as { field: string; minDaysFromNow: number; maxDaysFromNow: number });
+  },
+  validateEVerifyFormat: (content, params) => {
+    return validateEVerifyFormat(content as Record<string, unknown>, params as { field: string });
+  },
+  validateTrainingDescriptionLength: (content, params) => {
+    return validateTrainingDescriptionLength(content as Record<string, unknown>, params as { field: string; minWords: number });
+  },
+};
+/**
+ * Validates that an artifact has all required content fields for the given outcome.
+ *
+ * @param artifact - The artifact to validate
+ * @param outcome - The outcome to validate against
+ * @returns ValidationResult indicating if artifact is valid
+ */
+function validateArtifactSchema(artifact: unknown, outcome: Outcome): ValidationResult {
+  const errors: string[] = [];
+  if (typeof artifact !== 'object' || artifact === null) {
+    return { valid: false, errors: ['Artifact must be an object'] };
+  }
+  const a = artifact as Record<string, unknown>;
+  if (typeof a.agentId !== 'string' || a.agentId.trim() === '') {
+    errors.push('Artifact must have a non-empty string "agentId"');
+  }
+  if (typeof a.outcomeId !== 'string' || a.outcomeId.trim() === '') {
+    errors.push('Artifact must have a non-empty string "outcomeId"');
+  }
+  if (typeof a.attemptNumber !== 'number' || !Number.isInteger(a.attemptNumber) || a.attemptNumber < 1) {
+    errors.push('Artifact must have a positive integer "attemptNumber"');
+  }
+  if (typeof a.timestamp !== 'string' || a.timestamp.trim() === '') {
+    errors.push('Artifact must have a non-empty string "timestamp"');
+  }
+  // Validate content based on outcome type
+  if (typeof a.content !== 'object' || a.content === null) {
+    errors.push('Artifact must have an object "content"');
+  } else {
+    const content = a.content as Record<string, unknown>;
+    // Outcome-specific content validation
+    if (outcome.name === 'code_review_battle') {
+      // Code review battle expects issues, comments, and optional refactor suggestion
+      if (!Array.isArray(content.issues)) {
+        errors.push('Code review artifact content must have an array "issues"');
+      } else {
+        content.issues.forEach((issue: unknown, index: number) => {
+          if (typeof issue !== 'object' || issue === null) {
+            errors.push(`issues[${index}] must be an object`);
+          } else {
+            const issueObj = issue as Record<string, unknown>;
+            if (typeof issueObj.type !== 'string') {
+              errors.push(`issues[${index}] must have a string "type"`);
+            }
+            if (typeof issueObj.severity !== 'string') {
+              errors.push(`issues[${index}] must have a string "severity"`);
+            }
+            if (typeof issueObj.description !== 'string') {
+              errors.push(`issues[${index}] must have a string "description"`);
+            }
+          }
+        });
+      }
+      if (!Array.isArray(content.comments)) {
+        errors.push('Code review artifact content must have an array "comments"');
+      } else {
+        content.comments.forEach((comment: unknown, index: number) => {
+          if (typeof comment !== 'object' || comment === null) {
+            errors.push(`comments[${index}] must be an object`);
+          } else {
+            const commentObj = comment as Record<string, unknown>;
+            if (typeof commentObj.lineContent !== 'string') {
+              errors.push(`comments[${index}] must have a string "lineContent"`);
+            }
+            if (typeof commentObj.comment !== 'string') {
+              errors.push(`comments[${index}] must have a string "comment"`);
+            }
+          }
+        });
+      }
+      // Refactor suggestion is optional
+      if (content.refactorSuggestion !== undefined) {
+        if (typeof content.refactorSuggestion !== 'object' || content.refactorSuggestion === null) {
+          errors.push('refactorSuggestion must be an object if provided');
+        } else {
+          const refactor = content.refactorSuggestion as Record<string, unknown>;
+          if (typeof refactor.originalComplexity !== 'number') {
+            errors.push('refactorSuggestion must have a number "originalComplexity"');
+          }
+          if (typeof refactor.suggestedComplexity !== 'number') {
+            errors.push('refactorSuggestion must have a number "suggestedComplexity"');
+          }
+          if (typeof refactor.description !== 'string') {
+            errors.push('refactorSuggestion must have a string "description"');
+          }
+        }
+      }
+    } else if (outcome.name === 'lead_gen_battle') {
+      // Lead gen battle expects email, companySize, role, linkedIn
+      if (typeof content.email !== 'string') {
+        errors.push('Lead gen artifact content must have a string "email"');
+      }
+      if (typeof content.companySize !== 'number') {
+        errors.push('Lead gen artifact content must have a number "companySize"');
+      }
+      if (typeof content.role !== 'string') {
+        errors.push('Lead gen artifact content must have a string "role"');
+      }
+      if (typeof content.linkedIn !== 'string') {
+        errors.push('Lead gen artifact content must have a string "linkedIn"');
+      }
+    } else if (
+      outcome.name === 'feature_implementation' ||
+      outcome.name === 'refactor_task' ||
+      outcome.name === 'test_generation'
+    ) {
+      // Code delivery outcomes expect structured execution summaries
+      if (typeof content.testResult !== 'object' || content.testResult === null) {
+        errors.push('Code artifact must include "testResult" object');
+      }
+      if (typeof content.buildResult !== 'object' || content.buildResult === null) {
+        errors.push('Code artifact must include "buildResult" object');
+      }
+      if (typeof content.lintResult !== 'object' || content.lintResult === null) {
+        errors.push('Code artifact must include "lintResult" object');
+      }
+      if (typeof content.benchmarkResult !== 'object' || content.benchmarkResult === null) {
+        errors.push('Code artifact must include "benchmarkResult" object');
+      }
+      if (content.securityScanResult !== undefined && (typeof content.securityScanResult !== 'object' || content.securityScanResult === null)) {
+        errors.push('If provided, "securityScanResult" must be an object');
+      }
+      if (content.repoPath !== undefined && typeof content.repoPath !== 'string') {
+        errors.push('If provided, "repoPath" must be a string');
+      }
+      if (content.worktreePath !== undefined && typeof content.worktreePath !== 'string') {
+        errors.push('If provided, "worktreePath" must be a string');
+      }
+      if (content.commitSha !== undefined && typeof content.commitSha !== 'string') {
+        errors.push('If provided, "commitSha" must be a string');
+      }
+      if (content.testCommand !== undefined && typeof content.testCommand !== 'string') {
+        errors.push('If provided, "testCommand" must be a string');
+      }
+      if (content.buildCommand !== undefined && typeof content.buildCommand !== 'string') {
+        errors.push('If provided, "buildCommand" must be a string');
+      }
+      if (content.lintCommand !== undefined && typeof content.lintCommand !== 'string') {
+        errors.push('If provided, "lintCommand" must be a string');
+      }
+      if (content.benchmarkCommand !== undefined && typeof content.benchmarkCommand !== 'string') {
+        errors.push('If provided, "benchmarkCommand" must be a string');
+      }
+      if (content.securityScanCommand !== undefined && typeof content.securityScanCommand !== 'string') {
+        errors.push('If provided, "securityScanCommand" must be a string');
+      }
+      if (typeof content.code !== 'string') {
+        errors.push('Code artifact must include "code" string');
+      }
+      if (typeof content.language !== 'string') {
+        errors.push('Code artifact must include "language" string');
+      }
+    } else if (outcome.name === 'ai_sales_tools_dataset_v1' || outcome.name.includes('dataset')) {
+      // Dataset outcomes - Outcome-Verified Marketplace
+      const contentRecord = content as unknown as Record<string, unknown>;
+      if (!Array.isArray(contentRecord.companies)) {
+        errors.push('Dataset artifact content must have an array "companies"');
+      }
+      if (typeof contentRecord.generatedAt !== 'string') {
+        errors.push('Dataset artifact content must have a string "generatedAt"');
+      }
+    } else if (outcome.name.includes('image') || outcome.name.includes('video') || outcome.name.includes('content')) {
+      // Content creation battles
+      if (typeof content.type !== 'string') {
+        errors.push('Content must have a string "type" ("image" | "video")');
+      }
+      if (content.type === 'image' && typeof content.imageUrl !== 'string') {
+        errors.push('Image artifact content must have a string "imageUrl"');
+      }
+      if (content.type === 'video' && typeof content.videoUrl !== 'string') {
+        errors.push('Video artifact content must have a string "videoUrl"');
+      }
+    } else if (outcome.name === 'f1_stem_opt_compliance') {
+      // F1 STEM OPT compliance validation
+      if (typeof content.extractedFormData !== 'object' || content.extractedFormData === null) {
+        errors.push('Artifact content must have an object "extractedFormData"');
+      }
+    } else {
+      // Default validation for qualified_sales_interest and other outcomes
+      if (typeof content.message !== 'string') {
+        errors.push('Artifact content must have a string "message"');
+      }
+      if (typeof content.targetEmail !== 'string') {
+        errors.push('Artifact content must have a string "targetEmail"');
+      }
+      if (typeof content.targetCompany !== 'string') {
+        errors.push('Artifact content must have a string "targetCompany"');
+      }
+      if (typeof content.targetCompanySize !== 'number') {
+        errors.push('Artifact content must have a number "targetCompanySize"');
+      }
+      if (typeof content.targetRole !== 'string') {
+        errors.push('Artifact content must have a string "targetRole"');
+      }
+    }
+  }
+  return { valid: errors.length === 0, errors };
+}
+/**
+ * Evaluates a single success criterion against artifact content.
+ *
+ * @param criterion - The criterion to evaluate
+ * @param content - The artifact content to evaluate against
+ * @returns CriterionResult with pass/fail status and reason
+ */
+function evaluateCriterion(
+  criterion: SuccessCriterion,
+  content: ArtifactContent
+): CriterionResult {
+  const validator = validatorMap[criterion.validator];
+  if (!validator) {
+    // Unknown validator - fail closed
+    return {
+      name: criterion.name,
+      passed: false,
+      reason: `Unknown validator: ${criterion.validator}`,
+    };
+  }
+  try {
+    const result = validator(content, criterion.params);
+    return {
+      name: criterion.name,
+      passed: result.valid,
+      reason: result.valid
+        ? `Criterion "${criterion.name}" passed`
+        : result.errors.join('; '),
+    };
+  } catch (error) {
+    // Validator threw an error - fail closed
+    const errorMessage = error instanceof Error ? error.message : 'Unknown error';
+    return {
+      name: criterion.name,
+      passed: false,
+      reason: `Validator error: ${errorMessage}`,
+    };
+  }
+}
+/**
+ * Evaluates whether an agent artifact meets all success criteria for an outcome.
+ *
+ * This function implements binary evaluation logic:
+ * - Returns SUCCESS only if ALL criteria pass
+ * - Returns FAILURE if ANY criterion fails or data is invalid
+ * - Fails closed on errors (returns FAILURE, not exceptions)
+ *
+ * @param outcome - The outcome definition with success criteria
+ * @param artifact - The agent's produced artifact
+ * @returns EvaluationResult with binary status and detailed criteria results
+ *
+ * @example
+ * const result = await evaluateOutcome(qualifiedSalesInterest, artifact);
+ * if (result.status === 'SUCCESS') {
+ *   console.log('Payout triggered:', result.verificationDetails);
+ * } else {
+ *   console.log('Failed:', result.reason);
+ * }
+ *
+ * @see Requirements 5.1, 5.2, 5.3, 5.4
+ */
+export async function evaluateOutcome(
+  outcome: Outcome,
+  artifact: AgentArtifact
+): Promise<EvaluationResult> {
+  // Validate artifact schema first - fail closed on invalid data
+  const schemaValidation = validateArtifactSchema(artifact, outcome);
+  if (!schemaValidation.valid) {
+    return {
+      status: 'FAILURE',
+      reason: `Invalid artifact: ${schemaValidation.errors.join('; ')}`,
+      criteriaResults: [],
+    };
+  }
+  // Verify artifact is for the correct outcome
+  if (artifact.outcomeId !== outcome.name) {
+    return {
+      status: 'FAILURE',
+      reason: `Artifact outcome mismatch: expected "${outcome.name}", got "${artifact.outcomeId}"`,
+      criteriaResults: [],
+    };
+  }
+  // Evaluate all criteria
+  const criteriaResults: CriterionResult[] = outcome.successCriteria.map(
+    (criterion) => evaluateCriterion(criterion, artifact.content)
+  );
+  // Check if all criteria passed
+  const allPassed = criteriaResults.every((result) => result.passed);
+  const failedCriteria = criteriaResults.filter((result) => !result.passed);
+  if (allPassed) {
+    // SUCCESS - include verification details
+    return {
+      status: 'SUCCESS',
+      reason: `All ${criteriaResults.length} criteria passed`,
+      criteriaResults,
+      verificationDetails: {
+        outcomeId: outcome.name,
+        payoutAmount: outcome.payoutAmount,
+        agentId: artifact.agentId,
+        attemptNumber: artifact.attemptNumber,
+        evaluatedAt: new Date().toISOString(),
+        criteriaCount: criteriaResults.length,
+      },
+    };
+  }
+  // FAILURE - include structured reason
+  const failureReasons = failedCriteria.map((c) => c.reason).join('; ');
+  return {
+    status: 'FAILURE',
+    reason: `Failed ${failedCriteria.length} of ${criteriaResults.length} criteria: ${failureReasons}`,
+    criteriaResults,
+  };
+}