outcome-cli 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +261 -0
- package/package.json +95 -0
- package/src/agents/README.md +139 -0
- package/src/agents/adapters/anthropic.adapter.ts +166 -0
- package/src/agents/adapters/dalle.adapter.ts +145 -0
- package/src/agents/adapters/gemini.adapter.ts +134 -0
- package/src/agents/adapters/imagen.adapter.ts +106 -0
- package/src/agents/adapters/nano-banana.adapter.ts +129 -0
- package/src/agents/adapters/openai.adapter.ts +165 -0
- package/src/agents/adapters/veo.adapter.ts +130 -0
- package/src/agents/agent.schema.property.test.ts +379 -0
- package/src/agents/agent.schema.test.ts +148 -0
- package/src/agents/agent.schema.ts +263 -0
- package/src/agents/index.ts +60 -0
- package/src/agents/registered-agent.schema.ts +356 -0
- package/src/agents/registry.ts +97 -0
- package/src/agents/tournament-configs.property.test.ts +266 -0
- package/src/cli/README.md +145 -0
- package/src/cli/commands/define.ts +79 -0
- package/src/cli/commands/list.ts +46 -0
- package/src/cli/commands/logs.ts +83 -0
- package/src/cli/commands/run.ts +416 -0
- package/src/cli/commands/verify.ts +110 -0
- package/src/cli/index.ts +81 -0
- package/src/config/README.md +128 -0
- package/src/config/env.ts +262 -0
- package/src/config/index.ts +19 -0
- package/src/eval/README.md +318 -0
- package/src/eval/ai-judge.test.ts +435 -0
- package/src/eval/ai-judge.ts +368 -0
- package/src/eval/code-validators.ts +414 -0
- package/src/eval/evaluateOutcome.property.test.ts +1174 -0
- package/src/eval/evaluateOutcome.ts +591 -0
- package/src/eval/immigration-validators.ts +122 -0
- package/src/eval/index.ts +90 -0
- package/src/eval/judge-cache.ts +402 -0
- package/src/eval/tournament-validators.property.test.ts +439 -0
- package/src/eval/validators.property.test.ts +1118 -0
- package/src/eval/validators.ts +1199 -0
- package/src/eval/weighted-scorer.ts +285 -0
- package/src/index.ts +17 -0
- package/src/league/README.md +188 -0
- package/src/league/health-check.ts +353 -0
- package/src/league/index.ts +93 -0
- package/src/league/killAgent.ts +151 -0
- package/src/league/league.test.ts +1151 -0
- package/src/league/runLeague.ts +843 -0
- package/src/league/scoreAgent.ts +175 -0
- package/src/modules/omnibridge/__tests__/.gitkeep +1 -0
- package/src/modules/omnibridge/__tests__/auth-tunnel.property.test.ts +524 -0
- package/src/modules/omnibridge/__tests__/deterministic-logger.property.test.ts +965 -0
- package/src/modules/omnibridge/__tests__/ghost-api.property.test.ts +461 -0
- package/src/modules/omnibridge/__tests__/omnibridge-integration.test.ts +542 -0
- package/src/modules/omnibridge/__tests__/parallel-executor.property.test.ts +671 -0
- package/src/modules/omnibridge/__tests__/semantic-normalizer.property.test.ts +521 -0
- package/src/modules/omnibridge/__tests__/semantic-normalizer.test.ts +254 -0
- package/src/modules/omnibridge/__tests__/session-vault.property.test.ts +367 -0
- package/src/modules/omnibridge/__tests__/shadow-session.property.test.ts +523 -0
- package/src/modules/omnibridge/__tests__/triangulation-engine.property.test.ts +292 -0
- package/src/modules/omnibridge/__tests__/verification-engine.property.test.ts +769 -0
- package/src/modules/omnibridge/api/.gitkeep +1 -0
- package/src/modules/omnibridge/api/ghost-api.ts +1087 -0
- package/src/modules/omnibridge/auth/.gitkeep +1 -0
- package/src/modules/omnibridge/auth/auth-tunnel.ts +843 -0
- package/src/modules/omnibridge/auth/session-vault.ts +577 -0
- package/src/modules/omnibridge/core/.gitkeep +1 -0
- package/src/modules/omnibridge/core/semantic-normalizer.ts +702 -0
- package/src/modules/omnibridge/core/triangulation-engine.ts +530 -0
- package/src/modules/omnibridge/core/types.ts +610 -0
- package/src/modules/omnibridge/execution/.gitkeep +1 -0
- package/src/modules/omnibridge/execution/deterministic-logger.ts +629 -0
- package/src/modules/omnibridge/execution/parallel-executor.ts +542 -0
- package/src/modules/omnibridge/execution/shadow-session.ts +794 -0
- package/src/modules/omnibridge/index.ts +212 -0
- package/src/modules/omnibridge/omnibridge.ts +510 -0
- package/src/modules/omnibridge/verification/.gitkeep +1 -0
- package/src/modules/omnibridge/verification/verification-engine.ts +783 -0
- package/src/outcomes/README.md +75 -0
- package/src/outcomes/acquire-pilot-customer.ts +297 -0
- package/src/outcomes/code-delivery-outcomes.ts +89 -0
- package/src/outcomes/code-outcomes.ts +256 -0
- package/src/outcomes/code_review_battle.test.ts +135 -0
- package/src/outcomes/code_review_battle.ts +135 -0
- package/src/outcomes/cold_email_battle.ts +97 -0
- package/src/outcomes/content_creation_battle.ts +160 -0
- package/src/outcomes/f1_stem_opt_compliance.ts +61 -0
- package/src/outcomes/index.ts +107 -0
- package/src/outcomes/lead_gen_battle.test.ts +113 -0
- package/src/outcomes/lead_gen_battle.ts +99 -0
- package/src/outcomes/outcome.schema.property.test.ts +229 -0
- package/src/outcomes/outcome.schema.ts +187 -0
- package/src/outcomes/qualified_sales_interest.ts +118 -0
- package/src/outcomes/swarm_planner.property.test.ts +370 -0
- package/src/outcomes/swarm_planner.ts +96 -0
- package/src/outcomes/web_extraction.ts +234 -0
- package/src/runtime/README.md +220 -0
- package/src/runtime/agentRunner.test.ts +341 -0
- package/src/runtime/agentRunner.ts +746 -0
- package/src/runtime/claudeAdapter.ts +232 -0
- package/src/runtime/costTracker.ts +123 -0
- package/src/runtime/index.ts +34 -0
- package/src/runtime/modelAdapter.property.test.ts +305 -0
- package/src/runtime/modelAdapter.ts +144 -0
- package/src/runtime/openaiAdapter.ts +235 -0
- package/src/utils/README.md +122 -0
- package/src/utils/command-runner.ts +134 -0
- package/src/utils/cost-guard.ts +379 -0
- package/src/utils/errors.test.ts +290 -0
- package/src/utils/errors.ts +442 -0
- package/src/utils/index.ts +37 -0
- package/src/utils/logger.test.ts +361 -0
- package/src/utils/logger.ts +419 -0
- package/src/utils/output-parsers.ts +216 -0
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agent Registry - Loads and manages all available agents
|
|
3
|
+
*
|
|
4
|
+
* This module discovers all agent configurations in the configs directory
|
|
5
|
+
* and provides a centralized registry for accessing them.
|
|
6
|
+
*
|
|
7
|
+
* @module agents/registry
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { readFileSync, readdirSync } from 'fs';
|
|
11
|
+
import { join } from 'path';
|
|
12
|
+
import { fileURLToPath } from 'url';
|
|
13
|
+
import { dirname } from 'path';
|
|
14
|
+
import type { AgentConfig } from './agent.schema.js';
|
|
15
|
+
import { parseAgentConfigYaml } from './agent.schema.js';
|
|
16
|
+
|
|
17
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
18
|
+
const __dirname = dirname(__filename);
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Registry of all loaded agent configurations
|
|
22
|
+
*/
|
|
23
|
+
const agentRegistry: Map<string, AgentConfig> = new Map();
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Loads all agent configurations from the configs directory
|
|
27
|
+
*
|
|
28
|
+
* @returns Array of loaded agent configs
|
|
29
|
+
*/
|
|
30
|
+
export async function loadAllAgents(): Promise<AgentConfig[]> {
|
|
31
|
+
const configsDir = join(__dirname, 'configs');
|
|
32
|
+
const agents: AgentConfig[] = [];
|
|
33
|
+
|
|
34
|
+
try {
|
|
35
|
+
const files = readdirSync(configsDir);
|
|
36
|
+
const yamlFiles = files.filter(f => f.endsWith('.yaml') || f.endsWith('.yml'));
|
|
37
|
+
|
|
38
|
+
for (const file of yamlFiles) {
|
|
39
|
+
const filePath = join(configsDir, file);
|
|
40
|
+
const yamlContent = readFileSync(filePath, 'utf-8');
|
|
41
|
+
|
|
42
|
+
const { config, validation } = await parseAgentConfigYaml(yamlContent);
|
|
43
|
+
|
|
44
|
+
if (config && validation.valid) {
|
|
45
|
+
agentRegistry.set(config.id, config);
|
|
46
|
+
agents.push(config);
|
|
47
|
+
console.log(`✓ Loaded agent: ${config.name} (${config.id})`);
|
|
48
|
+
} else {
|
|
49
|
+
console.error(`✗ Failed to load ${file}:`, validation.errors);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
console.log(`\n📦 Agent Registry: ${agents.length} agents loaded\n`);
|
|
54
|
+
return agents;
|
|
55
|
+
} catch (error) {
|
|
56
|
+
console.error('Error loading agents:', error);
|
|
57
|
+
return [];
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Gets an agent configuration by ID
|
|
63
|
+
*
|
|
64
|
+
* @param agentId - The agent ID to retrieve
|
|
65
|
+
* @returns The agent config or undefined if not found
|
|
66
|
+
*/
|
|
67
|
+
export function getAgent(agentId: string): AgentConfig | undefined {
|
|
68
|
+
return agentRegistry.get(agentId);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Gets all registered agent IDs
|
|
73
|
+
*
|
|
74
|
+
* @returns Array of agent IDs
|
|
75
|
+
*/
|
|
76
|
+
export function getAllAgentIds(): string[] {
|
|
77
|
+
return Array.from(agentRegistry.keys());
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Gets all registered agent configurations
|
|
82
|
+
*
|
|
83
|
+
* @returns Array of all agent configs
|
|
84
|
+
*/
|
|
85
|
+
export function getAllAgents(): AgentConfig[] {
|
|
86
|
+
return Array.from(agentRegistry.values());
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Checks if an agent with the given ID exists
|
|
91
|
+
*
|
|
92
|
+
* @param agentId - The agent ID to check
|
|
93
|
+
* @returns True if agent exists in registry
|
|
94
|
+
*/
|
|
95
|
+
export function hasAgent(agentId: string): boolean {
|
|
96
|
+
return agentRegistry.has(agentId);
|
|
97
|
+
}
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Property-based tests for Tournament Agent Configuration Schema Validation
|
|
3
|
+
*
|
|
4
|
+
* **Feature: tournament-seed-bounties, Property 10: Agent Configuration Schema Validation**
|
|
5
|
+
* **Validates: Requirements 6.5, 8.3, 8.4**
|
|
6
|
+
*
|
|
7
|
+
* Property 10: Agent Configuration Schema Validation
|
|
8
|
+
* *For any* agent configuration loaded from YAML, the configuration SHALL validate
|
|
9
|
+
* successfully against the AgentSchema, or the system SHALL throw a descriptive error.
|
|
10
|
+
*
|
|
11
|
+
* @module agents/tournament-configs.property.test
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { describe, test, expect } from 'vitest';
|
|
15
|
+
import * as fc from 'fast-check';
|
|
16
|
+
import { readFile } from 'node:fs/promises';
|
|
17
|
+
import { parse } from 'yaml';
|
|
18
|
+
import { validateAgentConfig, type AgentConfig } from './agent.schema.js';
|
|
19
|
+
|
|
20
|
+
// Tournament agent configuration files
|
|
21
|
+
const TOURNAMENT_CONFIG_FILES = [
|
|
22
|
+
'src/agents/configs/tournament-coderabbit.yaml',
|
|
23
|
+
'src/agents/configs/tournament-claude-sonnet.yaml',
|
|
24
|
+
'src/agents/configs/tournament-gpt4o.yaml',
|
|
25
|
+
'src/agents/configs/tournament-llama3.yaml'
|
|
26
|
+
];
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Arbitrary for generating valid agent configurations
|
|
30
|
+
*/
|
|
31
|
+
const validAgentConfigArb = fc.record({
|
|
32
|
+
id: fc.string({ minLength: 3, maxLength: 50 }).filter(s => /^[a-z0-9-]+$/.test(s)),
|
|
33
|
+
name: fc.string({ minLength: 3, maxLength: 100 }).filter(s => s.trim().length >= 3),
|
|
34
|
+
prompt: fc.string({ minLength: 20, maxLength: 2000 }).filter(s => s.trim().length >= 20),
|
|
35
|
+
strategyDescription: fc.string({ minLength: 20, maxLength: 500 }).filter(s => s.trim().length >= 20),
|
|
36
|
+
toolAccess: fc.array(fc.string({ minLength: 1, maxLength: 50 }), { maxLength: 10 }),
|
|
37
|
+
costCeiling: fc.integer({ min: 1000, max: 50000 }),
|
|
38
|
+
modelProvider: fc.constantFrom('claude', 'openai'),
|
|
39
|
+
modelId: fc.string({ minLength: 3, maxLength: 100 }).filter(s => s.trim().length >= 3)
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Arbitrary for generating invalid agent configurations
|
|
44
|
+
*/
|
|
45
|
+
const invalidAgentConfigArb = fc.oneof(
|
|
46
|
+
// Missing required fields
|
|
47
|
+
fc.record({
|
|
48
|
+
name: fc.string(),
|
|
49
|
+
prompt: fc.string()
|
|
50
|
+
// Missing id, strategyDescription, etc.
|
|
51
|
+
}),
|
|
52
|
+
// Invalid field types
|
|
53
|
+
fc.record({
|
|
54
|
+
id: fc.integer(), // Should be string
|
|
55
|
+
name: fc.string(),
|
|
56
|
+
prompt: fc.string(),
|
|
57
|
+
strategyDescription: fc.string(),
|
|
58
|
+
toolAccess: fc.array(fc.string()),
|
|
59
|
+
costCeiling: fc.string(), // Should be number
|
|
60
|
+
modelProvider: fc.constantFrom('claude', 'openai'),
|
|
61
|
+
modelId: fc.string()
|
|
62
|
+
}),
|
|
63
|
+
// Invalid field values
|
|
64
|
+
fc.record({
|
|
65
|
+
id: fc.string(),
|
|
66
|
+
name: fc.string(),
|
|
67
|
+
prompt: fc.string(),
|
|
68
|
+
strategyDescription: fc.string(),
|
|
69
|
+
toolAccess: fc.array(fc.string()),
|
|
70
|
+
costCeiling: fc.integer({ min: -1000, max: 0 }), // Invalid cost ceiling
|
|
71
|
+
modelProvider: fc.constantFrom('invalid-provider'), // Invalid provider
|
|
72
|
+
modelId: fc.string()
|
|
73
|
+
})
|
|
74
|
+
);
|
|
75
|
+
|
|
76
|
+
describe('Tournament Agent Configuration Schema Validation - Property Tests', () => {
|
|
77
|
+
// **Feature: tournament-seed-bounties, Property 10: Agent Configuration Schema Validation**
|
|
78
|
+
test('all tournament agent configs validate against schema', async () => {
|
|
79
|
+
for (const configFile of TOURNAMENT_CONFIG_FILES) {
|
|
80
|
+
const yamlContent = await readFile(configFile, 'utf-8');
|
|
81
|
+
const config = parse(yamlContent) as AgentConfig;
|
|
82
|
+
|
|
83
|
+
const result = validateAgentConfig(config);
|
|
84
|
+
expect(result.valid).toBe(true);
|
|
85
|
+
expect(result.errors).toHaveLength(0);
|
|
86
|
+
}
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
// **Feature: tournament-seed-bounties, Property 10: Agent Configuration Schema Validation**
|
|
90
|
+
test('valid agent configurations pass validation', () => {
|
|
91
|
+
fc.assert(
|
|
92
|
+
fc.property(validAgentConfigArb, (config) => {
|
|
93
|
+
const result = validateAgentConfig(config);
|
|
94
|
+
expect(result.valid).toBe(true);
|
|
95
|
+
expect(result.errors).toHaveLength(0);
|
|
96
|
+
}),
|
|
97
|
+
{ numRuns: 100 }
|
|
98
|
+
);
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
// **Feature: tournament-seed-bounties, Property 10: Agent Configuration Schema Validation**
|
|
102
|
+
test('invalid agent configurations fail validation with descriptive errors', () => {
|
|
103
|
+
fc.assert(
|
|
104
|
+
fc.property(invalidAgentConfigArb, (config) => {
|
|
105
|
+
const result = validateAgentConfig(config);
|
|
106
|
+
expect(result.valid).toBe(false);
|
|
107
|
+
expect(result.errors.length).toBeGreaterThan(0);
|
|
108
|
+
|
|
109
|
+
// Errors should be descriptive strings
|
|
110
|
+
expect(result.errors.every(error => typeof error === 'string')).toBe(true);
|
|
111
|
+
expect(result.errors.every(error => error.length > 0)).toBe(true);
|
|
112
|
+
}),
|
|
113
|
+
{ numRuns: 100 }
|
|
114
|
+
);
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
// **Feature: tournament-seed-bounties, Property 10: Agent Configuration Schema Validation**
|
|
118
|
+
test('validation is deterministic', () => {
|
|
119
|
+
fc.assert(
|
|
120
|
+
fc.property(validAgentConfigArb, (config) => {
|
|
121
|
+
const result1 = validateAgentConfig(config);
|
|
122
|
+
const result2 = validateAgentConfig(config);
|
|
123
|
+
|
|
124
|
+
expect(result1.valid).toBe(result2.valid);
|
|
125
|
+
expect(result1.errors).toEqual(result2.errors);
|
|
126
|
+
}),
|
|
127
|
+
{ numRuns: 100 }
|
|
128
|
+
);
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
// **Feature: tournament-seed-bounties, Property 10: Agent Configuration Schema Validation**
|
|
132
|
+
test('tournament configs have required tournament-specific fields', async () => {
|
|
133
|
+
for (const configFile of TOURNAMENT_CONFIG_FILES) {
|
|
134
|
+
const yamlContent = await readFile(configFile, 'utf-8');
|
|
135
|
+
const config = parse(yamlContent) as AgentConfig;
|
|
136
|
+
|
|
137
|
+
// Tournament configs should have specific characteristics
|
|
138
|
+
expect(config.id).toMatch(/^tournament-/);
|
|
139
|
+
expect(config.name).toBeTruthy();
|
|
140
|
+
expect(config.prompt).toBeTruthy();
|
|
141
|
+
expect(config.strategyDescription).toBeTruthy();
|
|
142
|
+
expect(Array.isArray(config.toolAccess)).toBe(true);
|
|
143
|
+
expect(typeof config.costCeiling).toBe('number');
|
|
144
|
+
expect(config.costCeiling).toBeGreaterThan(0);
|
|
145
|
+
expect(['claude', 'openai', 'ollama'].includes(config.modelProvider)).toBe(true);
|
|
146
|
+
expect(config.modelId).toBeTruthy();
|
|
147
|
+
}
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
// **Feature: tournament-seed-bounties, Property 10: Agent Configuration Schema Validation**
|
|
151
|
+
test('tournament configs are outcome-aware', async () => {
|
|
152
|
+
for (const configFile of TOURNAMENT_CONFIG_FILES) {
|
|
153
|
+
const yamlContent = await readFile(configFile, 'utf-8');
|
|
154
|
+
const config = parse(yamlContent) as AgentConfig;
|
|
155
|
+
|
|
156
|
+
// Tournament configs should mention tournament-specific concepts
|
|
157
|
+
const promptLower = config.prompt.toLowerCase();
|
|
158
|
+
const descriptionLower = config.strategyDescription.toLowerCase();
|
|
159
|
+
|
|
160
|
+
const hasTournamentConcepts =
|
|
161
|
+
promptLower.includes('tournament') ||
|
|
162
|
+
promptLower.includes('code review') ||
|
|
163
|
+
promptLower.includes('lead gen') ||
|
|
164
|
+
promptLower.includes('security') ||
|
|
165
|
+
promptLower.includes('performance') ||
|
|
166
|
+
promptLower.includes('linkedin') ||
|
|
167
|
+
descriptionLower.includes('tournament') ||
|
|
168
|
+
descriptionLower.includes('code review') ||
|
|
169
|
+
descriptionLower.includes('lead gen');
|
|
170
|
+
|
|
171
|
+
expect(hasTournamentConcepts).toBe(true);
|
|
172
|
+
}
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
// **Feature: tournament-seed-bounties, Property 10: Agent Configuration Schema Validation**
|
|
176
|
+
test('validation result structure is consistent', () => {
|
|
177
|
+
fc.assert(
|
|
178
|
+
fc.property(
|
|
179
|
+
fc.oneof(validAgentConfigArb, invalidAgentConfigArb),
|
|
180
|
+
(config) => {
|
|
181
|
+
const result = validateAgentConfig(config);
|
|
182
|
+
|
|
183
|
+
// Must have valid boolean and errors array
|
|
184
|
+
expect(typeof result.valid).toBe('boolean');
|
|
185
|
+
expect(Array.isArray(result.errors)).toBe(true);
|
|
186
|
+
|
|
187
|
+
// If valid, errors should be empty; if invalid, errors should have content
|
|
188
|
+
if (result.valid) {
|
|
189
|
+
expect(result.errors).toHaveLength(0);
|
|
190
|
+
} else {
|
|
191
|
+
expect(result.errors.length).toBeGreaterThan(0);
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
),
|
|
195
|
+
{ numRuns: 100 }
|
|
196
|
+
);
|
|
197
|
+
});
|
|
198
|
+
|
|
199
|
+
// **Feature: tournament-seed-bounties, Property 10: Agent Configuration Schema Validation**
|
|
200
|
+
test('cost ceiling validation works correctly', () => {
|
|
201
|
+
fc.assert(
|
|
202
|
+
fc.property(
|
|
203
|
+
fc.integer({ min: -10000, max: 100000 }),
|
|
204
|
+
(costCeiling) => {
|
|
205
|
+
const config = {
|
|
206
|
+
id: 'test-agent',
|
|
207
|
+
name: 'Test Agent',
|
|
208
|
+
prompt: 'Test prompt for validation',
|
|
209
|
+
strategyDescription: 'Test strategy description',
|
|
210
|
+
toolAccess: [],
|
|
211
|
+
costCeiling,
|
|
212
|
+
modelProvider: 'claude' as const,
|
|
213
|
+
modelId: 'claude-3-sonnet'
|
|
214
|
+
};
|
|
215
|
+
|
|
216
|
+
const result = validateAgentConfig(config);
|
|
217
|
+
|
|
218
|
+
if (costCeiling > 0) {
|
|
219
|
+
expect(result.valid).toBe(true);
|
|
220
|
+
} else {
|
|
221
|
+
expect(result.valid).toBe(false);
|
|
222
|
+
expect(result.errors.some(error =>
|
|
223
|
+
error.toLowerCase().includes('cost') ||
|
|
224
|
+
error.toLowerCase().includes('ceiling')
|
|
225
|
+
)).toBe(true);
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
),
|
|
229
|
+
{ numRuns: 100 }
|
|
230
|
+
);
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
// **Feature: tournament-seed-bounties, Property 10: Agent Configuration Schema Validation**
|
|
234
|
+
test('model provider validation works correctly', () => {
|
|
235
|
+
fc.assert(
|
|
236
|
+
fc.property(
|
|
237
|
+
fc.string(),
|
|
238
|
+
(modelProvider) => {
|
|
239
|
+
const config = {
|
|
240
|
+
id: 'test-agent',
|
|
241
|
+
name: 'Test Agent',
|
|
242
|
+
prompt: 'Test prompt for validation',
|
|
243
|
+
strategyDescription: 'Test strategy description',
|
|
244
|
+
toolAccess: [],
|
|
245
|
+
costCeiling: 5000,
|
|
246
|
+
modelProvider: modelProvider as any,
|
|
247
|
+
modelId: 'test-model'
|
|
248
|
+
};
|
|
249
|
+
|
|
250
|
+
const result = validateAgentConfig(config);
|
|
251
|
+
|
|
252
|
+
if (['claude', 'openai'].includes(modelProvider)) {
|
|
253
|
+
expect(result.valid).toBe(true);
|
|
254
|
+
} else {
|
|
255
|
+
expect(result.valid).toBe(false);
|
|
256
|
+
expect(result.errors.some(error =>
|
|
257
|
+
error.toLowerCase().includes('provider') ||
|
|
258
|
+
error.toLowerCase().includes('model')
|
|
259
|
+
)).toBe(true);
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
),
|
|
263
|
+
{ numRuns: 100 }
|
|
264
|
+
);
|
|
265
|
+
});
|
|
266
|
+
});
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
# CLI Module
|
|
2
|
+
|
|
3
|
+
Command-line interface for the Earnd Bounty Engine using Commander.js.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
The CLI is included with the package. After building, it can be run via:
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
npx ts-node src/cli/index.ts
|
|
11
|
+
# or after compilation
|
|
12
|
+
node dist/cli/index.js
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Commands
|
|
16
|
+
|
|
17
|
+
### outcome run
|
|
18
|
+
|
|
19
|
+
Start a league run with N agents competing for an outcome.
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
earnd outcome run <name> [options]
|
|
23
|
+
|
|
24
|
+
Arguments:
|
|
25
|
+
name Outcome name to run (e.g., qualified_sales_interest)
|
|
26
|
+
|
|
27
|
+
Options:
|
|
28
|
+
-a, --agents <number> Number of agents to run in parallel (default: "3")
|
|
29
|
+
-c, --ceiling <number> Global spend ceiling in tokens (default: "50000")
|
|
30
|
+
--mock Run in mock mode without real API calls
|
|
31
|
+
--provider <provider> Model provider: claude or openai (default: "claude")
|
|
32
|
+
--model <model> Model ID to use (default: "claude-3-sonnet-20240229")
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
**Examples:**
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
# Run with 3 agents in mock mode
|
|
39
|
+
earnd outcome run qualified_sales_interest --mock
|
|
40
|
+
|
|
41
|
+
# Run with 5 agents using real API
|
|
42
|
+
earnd outcome run qualified_sales_interest --agents 5
|
|
43
|
+
|
|
44
|
+
# Run with OpenAI
|
|
45
|
+
earnd outcome run qualified_sales_interest --provider openai --model gpt-4
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
### outcome logs
|
|
49
|
+
|
|
50
|
+
Display all logs for an outcome.
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
earnd outcome logs <name> [options]
|
|
54
|
+
|
|
55
|
+
Arguments:
|
|
56
|
+
name Outcome name to show logs for
|
|
57
|
+
|
|
58
|
+
Options:
|
|
59
|
+
-a, --agent <id> Filter logs by agent ID
|
|
60
|
+
--json Output logs in JSON format
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
**Examples:**
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
# Show all logs for an outcome
|
|
67
|
+
earnd outcome logs qualified_sales_interest
|
|
68
|
+
|
|
69
|
+
# Show logs for specific agent
|
|
70
|
+
earnd outcome logs qualified_sales_interest --agent agent-0
|
|
71
|
+
|
|
72
|
+
# Output as JSON
|
|
73
|
+
earnd outcome logs qualified_sales_interest --json
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### outcome payout
|
|
77
|
+
|
|
78
|
+
Trigger payout for a winning agent.
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
earnd outcome payout <name> --agent <id> [options]
|
|
82
|
+
|
|
83
|
+
Arguments:
|
|
84
|
+
name Outcome name
|
|
85
|
+
|
|
86
|
+
Options:
|
|
87
|
+
--agent <id> Agent ID to trigger payout for (required)
|
|
88
|
+
--dry-run Show payout details without executing
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
**Examples:**
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
# Check payout eligibility (dry run)
|
|
95
|
+
earnd outcome payout qualified_sales_interest --agent agent-0 --dry-run
|
|
96
|
+
|
|
97
|
+
# Execute payout
|
|
98
|
+
earnd outcome payout qualified_sales_interest --agent agent-0
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## Environment Variables
|
|
102
|
+
|
|
103
|
+
For real API calls (non-mock mode), set the appropriate API key:
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
# For Claude
|
|
107
|
+
export ANTHROPIC_API_KEY=sk-ant-...
|
|
108
|
+
|
|
109
|
+
# For OpenAI
|
|
110
|
+
export OPENAI_API_KEY=sk-...
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
## Requirements Reference
|
|
114
|
+
|
|
115
|
+
- **7.1** - `outcome run <name> --agents=N` starts a league run
|
|
116
|
+
- **7.2** - `outcome logs <name>` displays all logs for the outcome
|
|
117
|
+
- **7.3** - `outcome payout <name> --agent=<id>` triggers payout
|
|
118
|
+
- **7.4** - CLI output is readable and demo-friendly
|
|
119
|
+
|
|
120
|
+
## Output Format
|
|
121
|
+
|
|
122
|
+
The CLI uses emoji and formatting for demo-friendly output:
|
|
123
|
+
|
|
124
|
+
```text
|
|
125
|
+
🚀 Earnd Bounty Engine - League Run
|
|
126
|
+
══════════════════════════════════════════════════
|
|
127
|
+
📋 Outcome: qualified_sales_interest
|
|
128
|
+
👥 Agents: 3
|
|
129
|
+
💰 Global Ceiling: 50,000 tokens
|
|
130
|
+
🤖 Provider: claude
|
|
131
|
+
══════════════════════════════════════════════════
|
|
132
|
+
|
|
133
|
+
🏃 Starting league run...
|
|
134
|
+
|
|
135
|
+
✅ Agent:agent-0 | SUCCESS
|
|
136
|
+
❌ Agent:agent-1 | FAILURE | Reason: Competitor won first
|
|
137
|
+
❌ Agent:agent-2 | FAILURE | Reason: Competitor won first
|
|
138
|
+
|
|
139
|
+
══════════════════════════════════════════════════
|
|
140
|
+
📊 League Results
|
|
141
|
+
══════════════════════════════════════════════════
|
|
142
|
+
|
|
143
|
+
🏆 Winner: agent-0
|
|
144
|
+
💵 Payout: $250
|
|
145
|
+
```
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Outcome Define Command
|
|
3
|
+
*
|
|
4
|
+
* Defines a new outcome with custom success criteria.
|
|
5
|
+
* Useful for creating domain-specific outcomes for leagues.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { Command } from 'commander';
|
|
9
|
+
import { writeFileSync } from 'fs';
|
|
10
|
+
import { resolve } from 'path';
|
|
11
|
+
import { Outcome, SuccessCriterion } from '../../outcomes/outcome.schema.js';
|
|
12
|
+
|
|
13
|
+
export function createDefineCommand(): Command {
|
|
14
|
+
const defineCmd = new Command('define');
|
|
15
|
+
|
|
16
|
+
defineCmd
|
|
17
|
+
.description('Define a new custom outcome')
|
|
18
|
+
.argument('<name>', 'Name of the new outcome')
|
|
19
|
+
.argument('<description>', 'Description of what the outcome achieves')
|
|
20
|
+
.option('-f, --file <file>', 'Output file path (default: outcomes/<name>.json)', (name) => `outcomes/${name}.json`)
|
|
21
|
+
.option('-p, --payout <amount>', 'Payout amount in dollars (default: 100)', parseFloat, 100)
|
|
22
|
+
.option('-t, --timeout <minutes>', 'Time limit in minutes (default: 30)', parseFloat, 30)
|
|
23
|
+
.option('--validator <validator>', 'Validator function name to use', 'validateSuccess')
|
|
24
|
+
.option('--params <json>', 'JSON string of validator parameters', '{}')
|
|
25
|
+
.action(async (name: string, description: string, options: {
|
|
26
|
+
file?: string;
|
|
27
|
+
payout?: number;
|
|
28
|
+
timeout?: number;
|
|
29
|
+
validator?: string;
|
|
30
|
+
params?: string;
|
|
31
|
+
}) => {
|
|
32
|
+
try {
|
|
33
|
+
// Parse validator params
|
|
34
|
+
let validatorParams: Record<string, any> = {};
|
|
35
|
+
try {
|
|
36
|
+
validatorParams = JSON.parse(options.params || '{}');
|
|
37
|
+
} catch (e) {
|
|
38
|
+
console.error('Error parsing validator params JSON');
|
|
39
|
+
process.exit(1);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// Create success criteria
|
|
43
|
+
const criteria: SuccessCriterion[] = [{
|
|
44
|
+
name: `${name} Success`,
|
|
45
|
+
description: description,
|
|
46
|
+
validator: options.validator || 'validateSuccess',
|
|
47
|
+
params: validatorParams,
|
|
48
|
+
}];
|
|
49
|
+
|
|
50
|
+
// Create the outcome
|
|
51
|
+
const outcome: Outcome = {
|
|
52
|
+
name,
|
|
53
|
+
description,
|
|
54
|
+
payoutAmount: options.payout || 100,
|
|
55
|
+
timeLimitMs: (options.timeout || 30) * 60 * 1000,
|
|
56
|
+
successCriteria: criteria,
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
// Write to file
|
|
60
|
+
const outputPath = resolve(options.file || `outcomes/${name}.json`);
|
|
61
|
+
writeFileSync(outputPath, JSON.stringify(outcome, null, 2));
|
|
62
|
+
|
|
63
|
+
console.log(`✅ Created outcome "${name}"`);
|
|
64
|
+
console.log(`📁 Saved to: ${outputPath}`);
|
|
65
|
+
console.log(`💰 Payout: $${outcome.payoutAmount}`);
|
|
66
|
+
console.log(`⏱️ Time limit: ${options.timeout || 30} minutes`);
|
|
67
|
+
console.log(`📋 Criteria: ${criteria.length} (${criteria[0].validator})`);
|
|
68
|
+
|
|
69
|
+
console.log('\nTo use this outcome:');
|
|
70
|
+
console.log(` outcome run ${name} --config your-config.json`);
|
|
71
|
+
|
|
72
|
+
} catch (error) {
|
|
73
|
+
console.error(`Error: ${error instanceof Error ? error.message : String(error)}`);
|
|
74
|
+
process.exit(1);
|
|
75
|
+
}
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
return defineCmd;
|
|
79
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* List Command - Show available outcomes (sales + code delivery)
|
|
3
|
+
*/
|
|
4
|
+
import { Command } from 'commander';
|
|
5
|
+
import { CODE_DELIVERY_OUTCOMES } from '../../outcomes/code-delivery-outcomes.js';
|
|
6
|
+
import { CODE_OUTCOMES } from '../../outcomes/code-outcomes.js';
|
|
7
|
+
import { qualifiedSalesInterest } from '../../outcomes/qualified_sales_interest.js';
|
|
8
|
+
import { codeReviewBattle } from '../../outcomes/code_review_battle.js';
|
|
9
|
+
import { leadGenBattle } from '../../outcomes/lead_gen_battle.js';
|
|
10
|
+
|
|
11
|
+
export const listCommand = new Command('list')
|
|
12
|
+
.description('List available outcomes and presets')
|
|
13
|
+
.action(() => {
|
|
14
|
+
const entries: Array<{ name: string; description: string; category: string }> = [];
|
|
15
|
+
|
|
16
|
+
entries.push({
|
|
17
|
+
name: qualifiedSalesInterest.name,
|
|
18
|
+
description: qualifiedSalesInterest.description,
|
|
19
|
+
category: 'sales',
|
|
20
|
+
});
|
|
21
|
+
entries.push({
|
|
22
|
+
name: codeReviewBattle.name,
|
|
23
|
+
description: codeReviewBattle.description,
|
|
24
|
+
category: 'code-review',
|
|
25
|
+
});
|
|
26
|
+
entries.push({
|
|
27
|
+
name: leadGenBattle.name,
|
|
28
|
+
description: leadGenBattle.description,
|
|
29
|
+
category: 'lead-gen',
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
Object.values(CODE_DELIVERY_OUTCOMES).forEach((o) =>
|
|
33
|
+
entries.push({ name: o.name, description: o.description, category: 'code-delivery' })
|
|
34
|
+
);
|
|
35
|
+
|
|
36
|
+
Object.values(CODE_OUTCOMES).forEach((o) =>
|
|
37
|
+
entries.push({ name: o.name, description: o.description, category: `code-${o.category}` })
|
|
38
|
+
);
|
|
39
|
+
|
|
40
|
+
console.log('\nAvailable outcomes:\n');
|
|
41
|
+
for (const e of entries) {
|
|
42
|
+
console.log(`- ${e.name} [${e.category}]`);
|
|
43
|
+
console.log(` ${e.description}`);
|
|
44
|
+
}
|
|
45
|
+
console.log('');
|
|
46
|
+
});
|