npm - outcome-cli - Versions diffs - 1.0.0 - Mend

outcome-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (113) hide show

package/README.md +261 -0
package/package.json +95 -0
package/src/agents/README.md +139 -0
package/src/agents/adapters/anthropic.adapter.ts +166 -0
package/src/agents/adapters/dalle.adapter.ts +145 -0
package/src/agents/adapters/gemini.adapter.ts +134 -0
package/src/agents/adapters/imagen.adapter.ts +106 -0
package/src/agents/adapters/nano-banana.adapter.ts +129 -0
package/src/agents/adapters/openai.adapter.ts +165 -0
package/src/agents/adapters/veo.adapter.ts +130 -0
package/src/agents/agent.schema.property.test.ts +379 -0
package/src/agents/agent.schema.test.ts +148 -0
package/src/agents/agent.schema.ts +263 -0
package/src/agents/index.ts +60 -0
package/src/agents/registered-agent.schema.ts +356 -0
package/src/agents/registry.ts +97 -0
package/src/agents/tournament-configs.property.test.ts +266 -0
package/src/cli/README.md +145 -0
package/src/cli/commands/define.ts +79 -0
package/src/cli/commands/list.ts +46 -0
package/src/cli/commands/logs.ts +83 -0
package/src/cli/commands/run.ts +416 -0
package/src/cli/commands/verify.ts +110 -0
package/src/cli/index.ts +81 -0
package/src/config/README.md +128 -0
package/src/config/env.ts +262 -0
package/src/config/index.ts +19 -0
package/src/eval/README.md +318 -0
package/src/eval/ai-judge.test.ts +435 -0
package/src/eval/ai-judge.ts +368 -0
package/src/eval/code-validators.ts +414 -0
package/src/eval/evaluateOutcome.property.test.ts +1174 -0
package/src/eval/evaluateOutcome.ts +591 -0
package/src/eval/immigration-validators.ts +122 -0
package/src/eval/index.ts +90 -0
package/src/eval/judge-cache.ts +402 -0
package/src/eval/tournament-validators.property.test.ts +439 -0
package/src/eval/validators.property.test.ts +1118 -0
package/src/eval/validators.ts +1199 -0
package/src/eval/weighted-scorer.ts +285 -0
package/src/index.ts +17 -0
package/src/league/README.md +188 -0
package/src/league/health-check.ts +353 -0
package/src/league/index.ts +93 -0
package/src/league/killAgent.ts +151 -0
package/src/league/league.test.ts +1151 -0
package/src/league/runLeague.ts +843 -0
package/src/league/scoreAgent.ts +175 -0
package/src/modules/omnibridge/__tests__/.gitkeep +1 -0
package/src/modules/omnibridge/__tests__/auth-tunnel.property.test.ts +524 -0
package/src/modules/omnibridge/__tests__/deterministic-logger.property.test.ts +965 -0
package/src/modules/omnibridge/__tests__/ghost-api.property.test.ts +461 -0
package/src/modules/omnibridge/__tests__/omnibridge-integration.test.ts +542 -0
package/src/modules/omnibridge/__tests__/parallel-executor.property.test.ts +671 -0
package/src/modules/omnibridge/__tests__/semantic-normalizer.property.test.ts +521 -0
package/src/modules/omnibridge/__tests__/semantic-normalizer.test.ts +254 -0
package/src/modules/omnibridge/__tests__/session-vault.property.test.ts +367 -0
package/src/modules/omnibridge/__tests__/shadow-session.property.test.ts +523 -0
package/src/modules/omnibridge/__tests__/triangulation-engine.property.test.ts +292 -0
package/src/modules/omnibridge/__tests__/verification-engine.property.test.ts +769 -0
package/src/modules/omnibridge/api/.gitkeep +1 -0
package/src/modules/omnibridge/api/ghost-api.ts +1087 -0
package/src/modules/omnibridge/auth/.gitkeep +1 -0
package/src/modules/omnibridge/auth/auth-tunnel.ts +843 -0
package/src/modules/omnibridge/auth/session-vault.ts +577 -0
package/src/modules/omnibridge/core/.gitkeep +1 -0
package/src/modules/omnibridge/core/semantic-normalizer.ts +702 -0
package/src/modules/omnibridge/core/triangulation-engine.ts +530 -0
package/src/modules/omnibridge/core/types.ts +610 -0
package/src/modules/omnibridge/execution/.gitkeep +1 -0
package/src/modules/omnibridge/execution/deterministic-logger.ts +629 -0
package/src/modules/omnibridge/execution/parallel-executor.ts +542 -0
package/src/modules/omnibridge/execution/shadow-session.ts +794 -0
package/src/modules/omnibridge/index.ts +212 -0
package/src/modules/omnibridge/omnibridge.ts +510 -0
package/src/modules/omnibridge/verification/.gitkeep +1 -0
package/src/modules/omnibridge/verification/verification-engine.ts +783 -0
package/src/outcomes/README.md +75 -0
package/src/outcomes/acquire-pilot-customer.ts +297 -0
package/src/outcomes/code-delivery-outcomes.ts +89 -0
package/src/outcomes/code-outcomes.ts +256 -0
package/src/outcomes/code_review_battle.test.ts +135 -0
package/src/outcomes/code_review_battle.ts +135 -0
package/src/outcomes/cold_email_battle.ts +97 -0
package/src/outcomes/content_creation_battle.ts +160 -0
package/src/outcomes/f1_stem_opt_compliance.ts +61 -0
package/src/outcomes/index.ts +107 -0
package/src/outcomes/lead_gen_battle.test.ts +113 -0
package/src/outcomes/lead_gen_battle.ts +99 -0
package/src/outcomes/outcome.schema.property.test.ts +229 -0
package/src/outcomes/outcome.schema.ts +187 -0
package/src/outcomes/qualified_sales_interest.ts +118 -0
package/src/outcomes/swarm_planner.property.test.ts +370 -0
package/src/outcomes/swarm_planner.ts +96 -0
package/src/outcomes/web_extraction.ts +234 -0
package/src/runtime/README.md +220 -0
package/src/runtime/agentRunner.test.ts +341 -0
package/src/runtime/agentRunner.ts +746 -0
package/src/runtime/claudeAdapter.ts +232 -0
package/src/runtime/costTracker.ts +123 -0
package/src/runtime/index.ts +34 -0
package/src/runtime/modelAdapter.property.test.ts +305 -0
package/src/runtime/modelAdapter.ts +144 -0
package/src/runtime/openaiAdapter.ts +235 -0
package/src/utils/README.md +122 -0
package/src/utils/command-runner.ts +134 -0
package/src/utils/cost-guard.ts +379 -0
package/src/utils/errors.test.ts +290 -0
package/src/utils/errors.ts +442 -0
package/src/utils/index.ts +37 -0
package/src/utils/logger.test.ts +361 -0
package/src/utils/logger.ts +419 -0
package/src/utils/output-parsers.ts +216 -0

package/src/eval/tournament-validators.property.test.ts ADDED Viewed

@@ -0,0 +1,439 @@
+/**
+ * Property-based tests for Tournament Seed Bounty Validators
+ *
+ * Tests the validators used in code review battles and lead generation battles.
+ * Each property test validates universal correctness properties across many inputs.
+ *
+ * @module eval/tournament-validators.property.test
+ */
+import { describe, test, expect } from 'vitest';
+import * as fc from 'fast-check';
+import {
+  validateSecurityIssue,
+  validatePerformanceIssue,
+  validateNoiseFreeness,
+  validateComplexityReduction,
+  validateLinkedIn,
+  validateLeadGenPrecision,
+  validateExpertReview,
+  type ValidationResult,
+} from './validators.js';
+// ============================================================================
+// Code Review Battle Property Tests
+// ============================================================================
+/**
+ * Property-based tests for Security Issue Detection
+ *
+ * **Feature: tournament-seed-bounties, Property 1: Security Issue Detection**
+ * **Validates: Requirements 1.4, 3.3**
+ *
+ * Property 1: Security Issue Detection
+ * *For any* code review artifact, the security validator SHALL return valid: true
+ * if and only if the artifact contains at least one issue with type "security"
+ * and severity "CRITICAL".
+ */
+describe('Security Issue Detection - Property Tests', () => {
+  // **Feature: tournament-seed-bounties, Property 1: Security Issue Detection**
+  test('artifacts with CRITICAL security issues are valid', () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.record({
+          type: fc.constantFrom('security', 'performance', 'style', 'logic'),
+          severity: fc.constantFrom('CRITICAL', 'HIGH', 'MEDIUM', 'LOW'),
+          description: fc.string(),
+          lineNumber: fc.integer({ min: 1, max: 100 })
+        })),
+        (issues) => {
+          // Ensure at least one CRITICAL security issue exists
+          const artifactWithSecurity = {
+            issues: [
+              ...issues,
+              {
+                type: 'security',
+                severity: 'CRITICAL',
+                description: 'SQL injection vulnerability',
+                lineNumber: 1
+              }
+            ]
+          };
+          const result = validateSecurityIssue(artifactWithSecurity, 'CRITICAL');
+          expect(result.valid).toBe(true);
+          expect(result.errors).toHaveLength(0);
+        }
+      ),
+      { numRuns: 100 }
+    );
+  });
+  // **Feature: tournament-seed-bounties, Property 1: Security Issue Detection**
+  test('artifacts without CRITICAL security issues are invalid', () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.record({
+          type: fc.constantFrom('performance', 'style', 'logic'), // No security
+          severity: fc.constantFrom('CRITICAL', 'HIGH', 'MEDIUM', 'LOW'),
+          description: fc.string(),
+          lineNumber: fc.integer({ min: 1, max: 100 })
+        })),
+        (issues) => {
+          const artifact = { issues };
+          const result = validateSecurityIssue(artifact, 'CRITICAL');
+          expect(result.valid).toBe(false);
+          expect(result.errors.length).toBeGreaterThan(0);
+        }
+      ),
+      { numRuns: 100 }
+    );
+  });
+  // **Feature: tournament-seed-bounties, Property 1: Security Issue Detection**
+  test('validation is deterministic', () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.record({
+          type: fc.constantFrom('security', 'performance', 'style', 'logic'),
+          severity: fc.constantFrom('CRITICAL', 'HIGH', 'MEDIUM', 'LOW'),
+          description: fc.string()
+        })),
+        (issues) => {
+          const artifact = { issues };
+          const result1 = validateSecurityIssue(artifact, 'CRITICAL');
+          const result2 = validateSecurityIssue(artifact, 'CRITICAL');
+          expect(result1.valid).toBe(result2.valid);
+          expect(result1.errors).toEqual(result2.errors);
+        }
+      ),
+      { numRuns: 100 }
+    );
+  });
+});
+/**
+ * Property-based tests for Performance Issue Detection
+ *
+ * **Feature: tournament-seed-bounties, Property 2: Performance Issue Detection**
+ * **Validates: Requirements 1.5, 3.4**
+ *
+ * Property 2: Performance Issue Detection
+ * *For any* code review artifact, the performance validator SHALL return valid: true
+ * if and only if the artifact contains at least one issue with type "performance".
+ */
+describe('Performance Issue Detection - Property Tests', () => {
+  // **Feature: tournament-seed-bounties, Property 2: Performance Issue Detection**
+  test('artifacts with performance issues are valid', () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.record({
+          type: fc.constantFrom('security', 'performance', 'style', 'logic'),
+          severity: fc.constantFrom('CRITICAL', 'HIGH', 'MEDIUM', 'LOW'),
+          description: fc.string()
+        })),
+        (issues) => {
+          // Ensure at least one performance issue exists
+          const artifactWithPerformance = {
+            issues: [
+              ...issues,
+              {
+                type: 'performance',
+                severity: 'HIGH',
+                description: 'N+1 query detected'
+              }
+            ]
+          };
+          const result = validatePerformanceIssue(artifactWithPerformance);
+          expect(result.valid).toBe(true);
+          expect(result.errors).toHaveLength(0);
+        }
+      ),
+      { numRuns: 100 }
+    );
+  });
+  // **Feature: tournament-seed-bounties, Property 2: Performance Issue Detection**
+  test('artifacts without performance issues are invalid', () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.record({
+          type: fc.constantFrom('security', 'style', 'logic'), // No performance
+          severity: fc.constantFrom('CRITICAL', 'HIGH', 'MEDIUM', 'LOW'),
+          description: fc.string()
+        })),
+        (issues) => {
+          const artifact = { issues };
+          const result = validatePerformanceIssue(artifact);
+          expect(result.valid).toBe(false);
+          expect(result.errors.length).toBeGreaterThan(0);
+        }
+      ),
+      { numRuns: 100 }
+    );
+  });
+});
+/**
+ * Property-based tests for Noise-Free Comment Validation
+ *
+ * **Feature: tournament-seed-bounties, Property 3: Noise-Free Comment Validation**
+ * **Validates: Requirements 1.6, 3.5**
+ *
+ * Property 3: Noise-Free Comment Validation
+ * *For any* code review artifact and source diff, the noise-freeness validator
+ * SHALL return valid: true if and only if every comment's lineContent is present
+ * in the source diff string.
+ */
+describe('Noise-Free Comment Validation - Property Tests', () => {
+  // **Feature: tournament-seed-bounties, Property 3: Noise-Free Comment Validation**
+  test('comments within source diff are valid', () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.string({ minLength: 1, maxLength: 100 })),
+        fc.array(fc.string({ minLength: 1, maxLength: 50 })),
+        (sourceLines, additionalLines) => {
+          const sourceDiff = [...sourceLines, ...additionalLines].join('\n');
+          // Create comments that reference lines in the source diff
+          const comments = sourceLines.map((line, index) => ({
+            lineContent: line,
+            comment: `Comment about line ${index}`,
+            lineNumber: index + 1
+          }));
+          const artifact = { comments };
+          const result = validateNoiseFreeness(artifact, sourceDiff);
+          expect(result.valid).toBe(true);
+          expect(result.errors).toHaveLength(0);
+        }
+      ),
+      { numRuns: 100 }
+    );
+  });
+  // **Feature: tournament-seed-bounties, Property 3: Noise-Free Comment Validation**
+  test('comments outside source diff are invalid', () => {
+    fc.assert(
+      fc.property(
+        fc.string({ minLength: 1, maxLength: 100 }),
+        fc.string({ minLength: 1, maxLength: 100 }).filter(s => s !== ''),
+        (sourceDiff, outsideLine) => {
+          // Ensure outsideLine is not in sourceDiff
+          fc.pre(!sourceDiff.includes(outsideLine));
+          const comments = [{
+            lineContent: outsideLine,
+            comment: 'This comment is outside the diff',
+            lineNumber: 1
+          }];
+          const artifact = { comments };
+          const result = validateNoiseFreeness(artifact, sourceDiff);
+          expect(result.valid).toBe(false);
+          expect(result.errors.length).toBeGreaterThan(0);
+        }
+      ),
+      { numRuns: 100 }
+    );
+  });
+});
+/**
+ * Property-based tests for Complexity Reduction Validation
+ *
+ * **Feature: tournament-seed-bounties, Property 4: Complexity Reduction Validation**
+ * **Validates: Requirements 1.7, 3.7**
+ *
+ * Property 4: Complexity Reduction Validation
+ * *For any* code review artifact with a refactor suggestion, the complexity validator
+ * SHALL return valid: true if and only if (originalComplexity - suggestedComplexity)
+ * >= minReduction.
+ */
+describe('Complexity Reduction Validation - Property Tests', () => {
+  // **Feature: tournament-seed-bounties, Property 4: Complexity Reduction Validation**
+  test('sufficient complexity reduction is valid', () => {
+    fc.assert(
+      fc.property(
+        fc.integer({ min: 5, max: 20 }), // originalComplexity
+        fc.integer({ min: 2, max: 10 }), // minReduction
+        (originalComplexity, minReduction) => {
+          const suggestedComplexity = originalComplexity - minReduction;
+          const artifact = {
+            refactorSuggestion: {
+              originalComplexity,
+              suggestedComplexity,
+              description: 'Refactor to reduce complexity'
+            }
+          };
+          const result = validateComplexityReduction(artifact, minReduction);
+          expect(result.valid).toBe(true);
+          expect(result.errors).toHaveLength(0);
+        }
+      ),
+      { numRuns: 100 }
+    );
+  });
+  // **Feature: tournament-seed-bounties, Property 4: Complexity Reduction Validation**
+  test('insufficient complexity reduction is invalid', () => {
+    fc.assert(
+      fc.property(
+        fc.integer({ min: 5, max: 20 }), // originalComplexity
+        fc.integer({ min: 3, max: 10 }), // minReduction
+        (originalComplexity, minReduction) => {
+          // Create insufficient reduction (less than minReduction)
+          const reduction = Math.max(0, minReduction - 1);
+          const suggestedComplexity = originalComplexity - reduction;
+          const artifact = {
+            refactorSuggestion: {
+              originalComplexity,
+              suggestedComplexity,
+              description: 'Insufficient refactor'
+            }
+          };
+          const result = validateComplexityReduction(artifact, minReduction);
+          expect(result.valid).toBe(false);
+          expect(result.errors.length).toBeGreaterThan(0);
+        }
+      ),
+      { numRuns: 100 }
+    );
+  });
+  // **Feature: tournament-seed-bounties, Property 4: Complexity Reduction Validation**
+  test('missing refactor suggestion is invalid', () => {
+    const artifact = {}; // No refactorSuggestion
+    const result = validateComplexityReduction(artifact, 2);
+    expect(result.valid).toBe(false);
+    expect(result.errors.length).toBeGreaterThan(0);
+    expect(result.errors[0]).toContain('No refactor suggestion provided');
+  });
+});
+// ============================================================================
+// Lead Generation Battle Property Tests
+// ============================================================================
+/**
+ * Property-based tests for ValidationResult Structure Consistency
+ *
+ * **Feature: tournament-seed-bounties, Property 7: ValidationResult Structure Consistency**
+ * **Validates: Requirements 3.2, 4.2, 1.8, 2.8**
+ *
+ * Property 7: ValidationResult Structure Consistency
+ * *For any* validator function call, the result SHALL have a valid boolean and an
+ * errors array where: if valid is true, errors is empty; if valid is false,
+ * errors contains at least one descriptive message.
+ */
+describe('ValidationResult Structure Consistency - Property Tests', () => {
+  // **Feature: tournament-seed-bounties, Property 7: ValidationResult Structure Consistency**
+  test('all validators return consistent ValidationResult structure', () => {
+    fc.assert(
+      fc.property(
+        fc.string(),
+        (linkedIn) => {
+          const result = validateLinkedIn(linkedIn);
+          // Must have valid boolean and errors array
+          expect(typeof result.valid).toBe('boolean');
+          expect(Array.isArray(result.errors)).toBe(true);
+          // If valid, errors should be empty; if invalid, errors should have content
+          if (result.valid) {
+            expect(result.errors).toHaveLength(0);
+          } else {
+            expect(result.errors.length).toBeGreaterThan(0);
+            expect(result.errors.every(error => typeof error === 'string')).toBe(true);
+          }
+        }
+      ),
+      { numRuns: 100 }
+    );
+  });
+  // **Feature: tournament-seed-bounties, Property 7: ValidationResult Structure Consistency**
+  test('composite validators maintain structure consistency', () => {
+    fc.assert(
+      fc.property(
+        fc.record({
+          email: fc.string(),
+          companySize: fc.integer({ min: 0, max: 10000 }),
+          role: fc.string(),
+          linkedIn: fc.string()
+        }),
+        (artifact) => {
+          const result = validateLeadGenPrecision(artifact);
+          // Must have valid boolean and errors array
+          expect(typeof result.valid).toBe('boolean');
+          expect(Array.isArray(result.errors)).toBe(true);
+          // If valid, errors should be empty; if invalid, errors should have content
+          if (result.valid) {
+            expect(result.errors).toHaveLength(0);
+          } else {
+            expect(result.errors.length).toBeGreaterThan(0);
+            expect(result.errors.every(error => typeof error === 'string')).toBe(true);
+          }
+        }
+      ),
+      { numRuns: 100 }
+    );
+  });
+});
+// ============================================================================
+// Integration Property Tests
+// ============================================================================
+/**
+ * Property-based tests for Expert Review Composite Validation
+ *
+ * Tests the validateExpertReview function which combines all code review validations.
+ */
+describe('Expert Review Composite Validation - Property Tests', () => {
+  test('expert review combines all validation errors', () => {
+    fc.assert(
+      fc.property(
+        fc.array(fc.record({
+          type: fc.constantFrom('style', 'logic'), // No security or performance
+          severity: fc.constantFrom('HIGH', 'MEDIUM', 'LOW'), // No CRITICAL
+          description: fc.string()
+        })),
+        fc.array(fc.record({
+          lineContent: fc.string(),
+          comment: fc.string(),
+          lineNumber: fc.integer({ min: 1, max: 100 })
+        })),
+        fc.string(),
+        (issues, comments, sourceDiff) => {
+          // Create artifact that will fail multiple validations
+          const artifact = {
+            issues,
+            comments,
+            // No refactorSuggestion
+          };
+          const result = validateExpertReview(artifact, sourceDiff);
+          // Should be invalid and have multiple errors
+          expect(result.valid).toBe(false);
+          expect(result.errors.length).toBeGreaterThan(0);
+          // Should contain specific error messages
+          const errorText = result.errors.join(' ');
+          expect(errorText).toContain('security');
+          expect(errorText).toContain('performance');
+          expect(errorText).toContain('refactor');
+        }
+      ),
+      { numRuns: 100 }
+    );
+  });
+});