outcome-cli 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +261 -0
- package/package.json +95 -0
- package/src/agents/README.md +139 -0
- package/src/agents/adapters/anthropic.adapter.ts +166 -0
- package/src/agents/adapters/dalle.adapter.ts +145 -0
- package/src/agents/adapters/gemini.adapter.ts +134 -0
- package/src/agents/adapters/imagen.adapter.ts +106 -0
- package/src/agents/adapters/nano-banana.adapter.ts +129 -0
- package/src/agents/adapters/openai.adapter.ts +165 -0
- package/src/agents/adapters/veo.adapter.ts +130 -0
- package/src/agents/agent.schema.property.test.ts +379 -0
- package/src/agents/agent.schema.test.ts +148 -0
- package/src/agents/agent.schema.ts +263 -0
- package/src/agents/index.ts +60 -0
- package/src/agents/registered-agent.schema.ts +356 -0
- package/src/agents/registry.ts +97 -0
- package/src/agents/tournament-configs.property.test.ts +266 -0
- package/src/cli/README.md +145 -0
- package/src/cli/commands/define.ts +79 -0
- package/src/cli/commands/list.ts +46 -0
- package/src/cli/commands/logs.ts +83 -0
- package/src/cli/commands/run.ts +416 -0
- package/src/cli/commands/verify.ts +110 -0
- package/src/cli/index.ts +81 -0
- package/src/config/README.md +128 -0
- package/src/config/env.ts +262 -0
- package/src/config/index.ts +19 -0
- package/src/eval/README.md +318 -0
- package/src/eval/ai-judge.test.ts +435 -0
- package/src/eval/ai-judge.ts +368 -0
- package/src/eval/code-validators.ts +414 -0
- package/src/eval/evaluateOutcome.property.test.ts +1174 -0
- package/src/eval/evaluateOutcome.ts +591 -0
- package/src/eval/immigration-validators.ts +122 -0
- package/src/eval/index.ts +90 -0
- package/src/eval/judge-cache.ts +402 -0
- package/src/eval/tournament-validators.property.test.ts +439 -0
- package/src/eval/validators.property.test.ts +1118 -0
- package/src/eval/validators.ts +1199 -0
- package/src/eval/weighted-scorer.ts +285 -0
- package/src/index.ts +17 -0
- package/src/league/README.md +188 -0
- package/src/league/health-check.ts +353 -0
- package/src/league/index.ts +93 -0
- package/src/league/killAgent.ts +151 -0
- package/src/league/league.test.ts +1151 -0
- package/src/league/runLeague.ts +843 -0
- package/src/league/scoreAgent.ts +175 -0
- package/src/modules/omnibridge/__tests__/.gitkeep +1 -0
- package/src/modules/omnibridge/__tests__/auth-tunnel.property.test.ts +524 -0
- package/src/modules/omnibridge/__tests__/deterministic-logger.property.test.ts +965 -0
- package/src/modules/omnibridge/__tests__/ghost-api.property.test.ts +461 -0
- package/src/modules/omnibridge/__tests__/omnibridge-integration.test.ts +542 -0
- package/src/modules/omnibridge/__tests__/parallel-executor.property.test.ts +671 -0
- package/src/modules/omnibridge/__tests__/semantic-normalizer.property.test.ts +521 -0
- package/src/modules/omnibridge/__tests__/semantic-normalizer.test.ts +254 -0
- package/src/modules/omnibridge/__tests__/session-vault.property.test.ts +367 -0
- package/src/modules/omnibridge/__tests__/shadow-session.property.test.ts +523 -0
- package/src/modules/omnibridge/__tests__/triangulation-engine.property.test.ts +292 -0
- package/src/modules/omnibridge/__tests__/verification-engine.property.test.ts +769 -0
- package/src/modules/omnibridge/api/.gitkeep +1 -0
- package/src/modules/omnibridge/api/ghost-api.ts +1087 -0
- package/src/modules/omnibridge/auth/.gitkeep +1 -0
- package/src/modules/omnibridge/auth/auth-tunnel.ts +843 -0
- package/src/modules/omnibridge/auth/session-vault.ts +577 -0
- package/src/modules/omnibridge/core/.gitkeep +1 -0
- package/src/modules/omnibridge/core/semantic-normalizer.ts +702 -0
- package/src/modules/omnibridge/core/triangulation-engine.ts +530 -0
- package/src/modules/omnibridge/core/types.ts +610 -0
- package/src/modules/omnibridge/execution/.gitkeep +1 -0
- package/src/modules/omnibridge/execution/deterministic-logger.ts +629 -0
- package/src/modules/omnibridge/execution/parallel-executor.ts +542 -0
- package/src/modules/omnibridge/execution/shadow-session.ts +794 -0
- package/src/modules/omnibridge/index.ts +212 -0
- package/src/modules/omnibridge/omnibridge.ts +510 -0
- package/src/modules/omnibridge/verification/.gitkeep +1 -0
- package/src/modules/omnibridge/verification/verification-engine.ts +783 -0
- package/src/outcomes/README.md +75 -0
- package/src/outcomes/acquire-pilot-customer.ts +297 -0
- package/src/outcomes/code-delivery-outcomes.ts +89 -0
- package/src/outcomes/code-outcomes.ts +256 -0
- package/src/outcomes/code_review_battle.test.ts +135 -0
- package/src/outcomes/code_review_battle.ts +135 -0
- package/src/outcomes/cold_email_battle.ts +97 -0
- package/src/outcomes/content_creation_battle.ts +160 -0
- package/src/outcomes/f1_stem_opt_compliance.ts +61 -0
- package/src/outcomes/index.ts +107 -0
- package/src/outcomes/lead_gen_battle.test.ts +113 -0
- package/src/outcomes/lead_gen_battle.ts +99 -0
- package/src/outcomes/outcome.schema.property.test.ts +229 -0
- package/src/outcomes/outcome.schema.ts +187 -0
- package/src/outcomes/qualified_sales_interest.ts +118 -0
- package/src/outcomes/swarm_planner.property.test.ts +370 -0
- package/src/outcomes/swarm_planner.ts +96 -0
- package/src/outcomes/web_extraction.ts +234 -0
- package/src/runtime/README.md +220 -0
- package/src/runtime/agentRunner.test.ts +341 -0
- package/src/runtime/agentRunner.ts +746 -0
- package/src/runtime/claudeAdapter.ts +232 -0
- package/src/runtime/costTracker.ts +123 -0
- package/src/runtime/index.ts +34 -0
- package/src/runtime/modelAdapter.property.test.ts +305 -0
- package/src/runtime/modelAdapter.ts +144 -0
- package/src/runtime/openaiAdapter.ts +235 -0
- package/src/utils/README.md +122 -0
- package/src/utils/command-runner.ts +134 -0
- package/src/utils/cost-guard.ts +379 -0
- package/src/utils/errors.test.ts +290 -0
- package/src/utils/errors.ts +442 -0
- package/src/utils/index.ts +37 -0
- package/src/utils/logger.test.ts +361 -0
- package/src/utils/logger.ts +419 -0
- package/src/utils/output-parsers.ts +216 -0
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Logs Command - Display logs for an outcome
|
|
3
|
+
*
|
|
4
|
+
* @module cli/commands/logs
|
|
5
|
+
* @see Requirements 7.2
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { getLogs, getAgentLogs, formatLogsForCli, type LogEntry } from '../../utils/logger.js';
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Options for the logs command.
|
|
12
|
+
*/
|
|
13
|
+
interface LogsOptions {
|
|
14
|
+
agent?: string;
|
|
15
|
+
json: boolean;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Formats a log entry as JSON-friendly object.
|
|
20
|
+
*/
|
|
21
|
+
function formatLogAsJson(entry: LogEntry): object {
|
|
22
|
+
return {
|
|
23
|
+
timestamp: entry.timestamp,
|
|
24
|
+
agentId: entry.agentId,
|
|
25
|
+
outcomeId: entry.outcomeId,
|
|
26
|
+
promptVersion: entry.promptVersion,
|
|
27
|
+
tokensSpent: entry.tokensSpent,
|
|
28
|
+
result: entry.result,
|
|
29
|
+
...(entry.failureReason && { failureReason: entry.failureReason }),
|
|
30
|
+
...(entry.metadata && { metadata: entry.metadata }),
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Handler for the `outcome logs` command.
|
|
36
|
+
*
|
|
37
|
+
* @param name - Outcome name to show logs for
|
|
38
|
+
* @param options - Command options
|
|
39
|
+
*
|
|
40
|
+
* @see Requirements 7.2
|
|
41
|
+
*/
|
|
42
|
+
export async function logsCommand(name: string, options: LogsOptions): Promise<void> {
|
|
43
|
+
// Get logs based on filters
|
|
44
|
+
let logs: LogEntry[];
|
|
45
|
+
|
|
46
|
+
if (options.agent) {
|
|
47
|
+
logs = getAgentLogs(name, options.agent);
|
|
48
|
+
} else {
|
|
49
|
+
logs = getLogs(name);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Output in requested format
|
|
53
|
+
if (options.json) {
|
|
54
|
+
const jsonOutput = logs.map(formatLogAsJson);
|
|
55
|
+
console.log(JSON.stringify(jsonOutput, null, 2));
|
|
56
|
+
} else {
|
|
57
|
+
if (logs.length === 0) {
|
|
58
|
+
console.log(`\n📋 No logs found for outcome: ${name}`);
|
|
59
|
+
if (options.agent) {
|
|
60
|
+
console.log(` (filtered by agent: ${options.agent})`);
|
|
61
|
+
}
|
|
62
|
+
console.log('\nTip: Run a league first with: earnd outcome run <name>');
|
|
63
|
+
return;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
console.log(formatLogsForCli(name));
|
|
67
|
+
|
|
68
|
+
if (options.agent) {
|
|
69
|
+
console.log(`Filtered by agent: ${options.agent}`);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// Summary statistics
|
|
73
|
+
const successCount = logs.filter((l) => l.result === 'SUCCESS').length;
|
|
74
|
+
const failureCount = logs.filter((l) => l.result === 'FAILURE').length;
|
|
75
|
+
const pendingCount = logs.filter((l) => l.result === 'PENDING').length;
|
|
76
|
+
const totalTokens = logs.reduce((sum, l) => sum + l.tokensSpent, 0);
|
|
77
|
+
|
|
78
|
+
console.log('\n📊 Summary:');
|
|
79
|
+
console.log(` Total Entries: ${logs.length}`);
|
|
80
|
+
console.log(` Success: ${successCount}, Failure: ${failureCount}, Pending: ${pendingCount}`);
|
|
81
|
+
console.log(` Total Tokens: ${totalTokens.toLocaleString()}`);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
@@ -0,0 +1,416 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Run Command - Start a league run with N agents
|
|
3
|
+
*
|
|
4
|
+
* @module cli/commands/run
|
|
5
|
+
* @see Requirements 7.1
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { runLeague, runLeagueMock, type LeagueConfig } from '../../league/index.js';
|
|
9
|
+
import { qualifiedSalesInterest } from '../../outcomes/qualified_sales_interest.js';
|
|
10
|
+
import { codeReviewBattle } from '../../outcomes/code_review_battle.js';
|
|
11
|
+
import { leadGenBattle } from '../../outcomes/lead_gen_battle.js';
|
|
12
|
+
import { CODE_DELIVERY_OUTCOMES } from '../../outcomes/code-delivery-outcomes.js';
|
|
13
|
+
import type { AgentConfig } from '../../agents/agent.schema.js';
|
|
14
|
+
import type { Lead } from '../../jobs/job.interface.js';
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Options for the run command.
|
|
18
|
+
*/
|
|
19
|
+
interface RunOptions {
|
|
20
|
+
agents: string;
|
|
21
|
+
ceiling: string;
|
|
22
|
+
mock: boolean;
|
|
23
|
+
provider: 'claude' | 'openai';
|
|
24
|
+
model: string;
|
|
25
|
+
mode: 'league' | 'swarm' | 'pipeline';
|
|
26
|
+
goal?: string;
|
|
27
|
+
targetUrl?: string;
|
|
28
|
+
dryRun?: boolean;
|
|
29
|
+
json?: boolean;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Creates sample code delivery artifact for feature/refactor/test generation.
|
|
34
|
+
*/
|
|
35
|
+
function createSampleCodeArtifact(): any {
|
|
36
|
+
return {
|
|
37
|
+
repoPath: '/tmp/repo',
|
|
38
|
+
worktreePath: '/tmp/repo-worktree',
|
|
39
|
+
commitSha: 'HEAD',
|
|
40
|
+
testCommand: 'npm test',
|
|
41
|
+
buildCommand: 'npm run build',
|
|
42
|
+
lintCommand: 'npm run lint',
|
|
43
|
+
benchmarkCommand: 'npm run bench',
|
|
44
|
+
securityScanCommand: 'npm run scan',
|
|
45
|
+
testResult: { passed: true, failCount: 0, totalCount: 120 },
|
|
46
|
+
buildResult: { success: true },
|
|
47
|
+
lintResult: { errors: 0, warnings: 0 },
|
|
48
|
+
benchmarkResult: { p95Ms: 180, maxMs: 220 },
|
|
49
|
+
securityScanResult: { findings: [] },
|
|
50
|
+
code: '// feature implementation placeholder',
|
|
51
|
+
language: 'typescript',
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Creates a default agent configuration.
|
|
57
|
+
*
|
|
58
|
+
* @param index Agent index
|
|
59
|
+
* @param provider Model provider
|
|
60
|
+
* @param modelId Model ID
|
|
61
|
+
* @param outcomeName Outcome name
|
|
62
|
+
* @returns Agent configuration
|
|
63
|
+
*/
|
|
64
|
+
function createDefaultAgent(
|
|
65
|
+
index: number,
|
|
66
|
+
provider: 'claude' | 'openai',
|
|
67
|
+
modelId: string,
|
|
68
|
+
outcomeName: string
|
|
69
|
+
): AgentConfig {
|
|
70
|
+
// Use tournament-specific agents for tournament outcomes
|
|
71
|
+
if (outcomeName === 'code_review_battle' || outcomeName === 'lead_gen_battle') {
|
|
72
|
+
const tournamentAgents = [
|
|
73
|
+
'tournament-coderabbit',
|
|
74
|
+
'tournament-claude-sonnet',
|
|
75
|
+
'tournament-gpt4o',
|
|
76
|
+
'tournament-llama3'
|
|
77
|
+
];
|
|
78
|
+
|
|
79
|
+
// Cycle through tournament agents
|
|
80
|
+
const agentId = tournamentAgents[index % tournamentAgents.length];
|
|
81
|
+
|
|
82
|
+
return {
|
|
83
|
+
id: `${agentId}-${index}`,
|
|
84
|
+
name: `Tournament Agent ${index} (${agentId})`,
|
|
85
|
+
prompt: getTournamentPrompt(agentId, outcomeName),
|
|
86
|
+
strategyDescription: `Tournament-optimized for ${outcomeName}`,
|
|
87
|
+
toolAccess: getTournamentTools(agentId),
|
|
88
|
+
costCeiling: getTournamentCostCeiling(agentId),
|
|
89
|
+
modelProvider: getTournamentProvider(agentId),
|
|
90
|
+
modelId: getTournamentModelId(agentId),
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// Default sales agent for qualified_sales_interest
|
|
95
|
+
return {
|
|
96
|
+
id: `agent-${index}`,
|
|
97
|
+
name: `Agent ${index}`,
|
|
98
|
+
prompt: `You are a professional sales qualification agent for a B2B SaaS company.
|
|
99
|
+
Your goal is to engage with prospects and qualify them as potential customers.
|
|
100
|
+
|
|
101
|
+
## Your Objective
|
|
102
|
+
Generate a qualified sales lead by having a conversation that:
|
|
103
|
+
1. Identifies buying intent (mentions of pricing, demo requests, or next steps)
|
|
104
|
+
2. Confirms the prospect's company has at least 50 employees
|
|
105
|
+
3. Verifies the prospect holds a decision-making role (not an intern or student)
|
|
106
|
+
4. Produces a substantive response (at least 20 words)
|
|
107
|
+
5. Obtains a valid email address for follow-up
|
|
108
|
+
|
|
109
|
+
## Response Format
|
|
110
|
+
When you have gathered all required information, structure your response as:
|
|
111
|
+
- A summary of the prospect's needs and interest level
|
|
112
|
+
- Key qualification details (company size, role, buying signals)
|
|
113
|
+
- Proposed next steps (demo, pricing discussion, etc.)
|
|
114
|
+
- Contact information for follow-up`,
|
|
115
|
+
strategyDescription: 'Consultative sales approach to qualify leads',
|
|
116
|
+
toolAccess: [],
|
|
117
|
+
costCeiling: 10000,
|
|
118
|
+
modelProvider: provider,
|
|
119
|
+
modelId,
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Gets tournament-specific prompt for an agent.
|
|
125
|
+
*/
|
|
126
|
+
function getTournamentPrompt(agentId: string, outcomeName: string): string {
|
|
127
|
+
const basePrompts = {
|
|
128
|
+
'tournament-coderabbit': `You are CodeRabbit, an AI-powered code review specialist designed for tournament competition.
|
|
129
|
+
Your expertise is in comprehensive code analysis, identifying security vulnerabilities, performance bottlenecks, and suggesting complexity-reducing refactors.`,
|
|
130
|
+
'tournament-claude-sonnet': `You are Claude Sonnet, a tournament-optimized generalist AI agent with exceptional reasoning capabilities across multiple domains.
|
|
131
|
+
Your strength lies in balanced, methodical analysis that excels in both code review and lead generation challenges.`,
|
|
132
|
+
'tournament-gpt4o': `You are GPT-4o, the high-accuracy tournament elite designed for maximum precision in competitive AI challenges.
|
|
133
|
+
Your competitive advantage is exceptional accuracy and attention to detail.`,
|
|
134
|
+
'tournament-llama3': `You are Llama3, the cost-efficient tournament competitor designed to deliver competitive performance while maintaining optimal cost-effectiveness.
|
|
135
|
+
Your competitive strategy focuses on efficient resource utilization.`
|
|
136
|
+
};
|
|
137
|
+
|
|
138
|
+
const outcomeSpecific = outcomeName === 'code_review_battle'
|
|
139
|
+
? `Focus on identifying CRITICAL security vulnerabilities, performance bottlenecks, ensuring zero noise comments, and suggesting complexity-reducing refactors.`
|
|
140
|
+
: `Focus on precise lead qualification with valid email formats, company size validation, role verification, and LinkedIn URL validation.`;
|
|
141
|
+
|
|
142
|
+
return `${basePrompts[agentId as keyof typeof basePrompts]}
|
|
143
|
+
|
|
144
|
+
${outcomeSpecific}
|
|
145
|
+
|
|
146
|
+
Your goal is to achieve perfect scores in tournament ${outcomeName} by meeting all success criteria with surgical precision.`;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Gets tournament-specific tools for an agent.
|
|
151
|
+
*/
|
|
152
|
+
function getTournamentTools(agentId: string): string[] {
|
|
153
|
+
const toolMap = {
|
|
154
|
+
'tournament-coderabbit': ['code_analyzer', 'security_scanner', 'performance_profiler'],
|
|
155
|
+
'tournament-claude-sonnet': ['code_analyzer', 'email_validator', 'company_research', 'linkedin_lookup'],
|
|
156
|
+
'tournament-gpt4o': ['advanced_code_analyzer', 'security_vulnerability_scanner', 'email_validator', 'company_research', 'linkedin_validator'],
|
|
157
|
+
'tournament-llama3': ['basic_code_analyzer', 'email_validator', 'company_lookup']
|
|
158
|
+
};
|
|
159
|
+
|
|
160
|
+
return toolMap[agentId as keyof typeof toolMap] || [];
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Gets tournament-specific cost ceiling for an agent.
|
|
165
|
+
*/
|
|
166
|
+
function getTournamentCostCeiling(agentId: string): number {
|
|
167
|
+
const ceilingMap = {
|
|
168
|
+
'tournament-coderabbit': 10000,
|
|
169
|
+
'tournament-claude-sonnet': 8000,
|
|
170
|
+
'tournament-gpt4o': 12000,
|
|
171
|
+
'tournament-llama3': 3000
|
|
172
|
+
};
|
|
173
|
+
|
|
174
|
+
return ceilingMap[agentId as keyof typeof ceilingMap] || 8000;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
/**
|
|
178
|
+
* Gets tournament-specific model provider for an agent.
|
|
179
|
+
*/
|
|
180
|
+
function getTournamentProvider(agentId: string): 'claude' | 'openai' {
|
|
181
|
+
const providerMap = {
|
|
182
|
+
'tournament-coderabbit': 'claude' as const,
|
|
183
|
+
'tournament-claude-sonnet': 'claude' as const,
|
|
184
|
+
'tournament-gpt4o': 'openai' as const,
|
|
185
|
+
'tournament-llama3': 'claude' as const // Fallback to claude for CLI compatibility
|
|
186
|
+
};
|
|
187
|
+
|
|
188
|
+
return providerMap[agentId as keyof typeof providerMap] || 'claude';
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Gets tournament-specific model ID for an agent.
|
|
193
|
+
*/
|
|
194
|
+
function getTournamentModelId(agentId: string): string {
|
|
195
|
+
const modelMap = {
|
|
196
|
+
'tournament-coderabbit': 'claude-sonnet-4-20250514',
|
|
197
|
+
'tournament-claude-sonnet': 'claude-sonnet-4-20250514',
|
|
198
|
+
'tournament-gpt4o': 'gpt-4o',
|
|
199
|
+
'tournament-llama3': 'claude-sonnet-4-20250514' // Fallback for CLI compatibility
|
|
200
|
+
};
|
|
201
|
+
|
|
202
|
+
return modelMap[agentId as keyof typeof modelMap] || 'claude-sonnet-4-20250514';
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
/**
|
|
206
|
+
* Creates a sample lead for testing.
|
|
207
|
+
*/
|
|
208
|
+
function createSampleLead(): Lead {
|
|
209
|
+
return {
|
|
210
|
+
email: 'sarah.chen@innovatetech.com',
|
|
211
|
+
company: 'InnovateTech Solutions',
|
|
212
|
+
companySize: 150,
|
|
213
|
+
role: 'VP of Engineering',
|
|
214
|
+
previousInteractions: ['Downloaded whitepaper', 'Attended webinar'],
|
|
215
|
+
};
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
/**
|
|
219
|
+
* Creates sample code review data for tournament testing.
|
|
220
|
+
*/
|
|
221
|
+
function createSampleCodeReview(): any {
|
|
222
|
+
return {
|
|
223
|
+
sourceCode: `
|
|
224
|
+
// Vulnerable authentication function for code review
|
|
225
|
+
function authenticateUser(username, password) {
|
|
226
|
+
const query = "SELECT * FROM users WHERE username = '" + username + "' AND password = '" + password + "'";
|
|
227
|
+
const result = db.query(query);
|
|
228
|
+
|
|
229
|
+
if (result.length > 0) {
|
|
230
|
+
for (let i = 0; i < result.length; i++) {
|
|
231
|
+
const user = result[i];
|
|
232
|
+
const permissions = db.query("SELECT * FROM permissions WHERE user_id = " + user.id);
|
|
233
|
+
if (permissions.length > 0) {
|
|
234
|
+
return { success: true, user: user, permissions: permissions };
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
return { success: false };
|
|
239
|
+
}
|
|
240
|
+
`,
|
|
241
|
+
requirements: [
|
|
242
|
+
'Identify CRITICAL security vulnerabilities (SQL injection, XSS)',
|
|
243
|
+
'Identify performance bottlenecks (N+1 queries)',
|
|
244
|
+
'Ensure all comments reference actual source lines (zero noise)',
|
|
245
|
+
'Suggest refactoring to reduce cyclomatic complexity by 2+ points'
|
|
246
|
+
]
|
|
247
|
+
};
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
/**
|
|
251
|
+
* Creates sample lead generation challenge data for tournament testing.
|
|
252
|
+
*/
|
|
253
|
+
function createSampleLeadGenChallenge(): any {
|
|
254
|
+
return {
|
|
255
|
+
targetCriteria: {
|
|
256
|
+
emailFormat: 'Valid email format matching /^[^\\s@]+@[^\\s@]+\\.[^\\s@]+$/',
|
|
257
|
+
companySize: 'Minimum 50 employees',
|
|
258
|
+
roleQualification: 'Not "intern" or "student" (case-insensitive)',
|
|
259
|
+
linkedInFormat: 'Must start with "https://www.linkedin.com/in/"'
|
|
260
|
+
},
|
|
261
|
+
requirements: [
|
|
262
|
+
'Generate valid email address',
|
|
263
|
+
'Verify company size >= 50 employees',
|
|
264
|
+
'Ensure role is not intern/student',
|
|
265
|
+
'Provide valid LinkedIn URL format'
|
|
266
|
+
]
|
|
267
|
+
};
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
/**
|
|
271
|
+
* Handler for the `outcome run` command.
|
|
272
|
+
*
|
|
273
|
+
* @param name - Outcome name to run
|
|
274
|
+
* @param options - Command options
|
|
275
|
+
*
|
|
276
|
+
* @see Requirements 7.1
|
|
277
|
+
*/
|
|
278
|
+
export async function runCommand(name: string, options: RunOptions): Promise<void> {
|
|
279
|
+
const agentCount = parseInt(options.agents, 10);
|
|
280
|
+
const globalSpendCeiling = parseInt(options.ceiling, 10);
|
|
281
|
+
|
|
282
|
+
console.log('\n🚀 Earnd Bounty Engine - League Run');
|
|
283
|
+
console.log('═'.repeat(50));
|
|
284
|
+
console.log(`📋 Outcome: ${name}`);
|
|
285
|
+
console.log(`👥 Agents: ${agentCount}`);
|
|
286
|
+
console.log(`💰 Global Ceiling: ${globalSpendCeiling.toLocaleString()} tokens`);
|
|
287
|
+
console.log(`🤖 Provider: ${options.provider}`);
|
|
288
|
+
console.log(`📦 Model: ${options.model}`);
|
|
289
|
+
console.log(`🧪 Mock Mode: ${options.mock ? 'Yes' : 'No'}`);
|
|
290
|
+
console.log('═'.repeat(50));
|
|
291
|
+
|
|
292
|
+
// Validate agent count
|
|
293
|
+
if (isNaN(agentCount) || agentCount < 1) {
|
|
294
|
+
console.error('❌ Error: Agent count must be a positive number');
|
|
295
|
+
process.exit(1);
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
// Get the outcome
|
|
299
|
+
let outcome;
|
|
300
|
+
if (name === 'qualified_sales_interest') {
|
|
301
|
+
outcome = qualifiedSalesInterest;
|
|
302
|
+
} else if (name === 'code_review_battle') {
|
|
303
|
+
outcome = codeReviewBattle;
|
|
304
|
+
} else if (name === 'lead_gen_battle') {
|
|
305
|
+
outcome = leadGenBattle;
|
|
306
|
+
} else if (CODE_DELIVERY_OUTCOMES[name]) {
|
|
307
|
+
outcome = CODE_DELIVERY_OUTCOMES[name];
|
|
308
|
+
} else {
|
|
309
|
+
console.error(`❌ Error: Unknown outcome "${name}"`);
|
|
310
|
+
console.error('Available outcomes: qualified_sales_interest, code_review_battle, lead_gen_battle, feature_implementation, refactor_task, test_generation');
|
|
311
|
+
process.exit(1);
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
// Create agent configurations
|
|
315
|
+
const agentConfigs: AgentConfig[] = [];
|
|
316
|
+
for (let i = 0; i < agentCount; i++) {
|
|
317
|
+
agentConfigs.push(createDefaultAgent(i, options.provider, options.model, name));
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
// Create sample data based on outcome type
|
|
321
|
+
let lead;
|
|
322
|
+
if (name === 'qualified_sales_interest') {
|
|
323
|
+
lead = createSampleLead();
|
|
324
|
+
console.log('\n📍 Lead Information:');
|
|
325
|
+
console.log(` Email: ${lead.email}`);
|
|
326
|
+
console.log(` Company: ${lead.company} (${lead.companySize} employees)`);
|
|
327
|
+
console.log(` Role: ${lead.role}`);
|
|
328
|
+
} else if (name === 'code_review_battle') {
|
|
329
|
+
lead = createSampleCodeReview();
|
|
330
|
+
console.log('\n📍 Code Review Challenge:');
|
|
331
|
+
console.log(` Target: Security vulnerabilities and performance bottlenecks`);
|
|
332
|
+
console.log(` Requirements: CRITICAL security issues, performance bottlenecks, zero noise, complexity reduction`);
|
|
333
|
+
} else if (name === 'lead_gen_battle') {
|
|
334
|
+
lead = createSampleLeadGenChallenge();
|
|
335
|
+
console.log('\n📍 Lead Generation Challenge:');
|
|
336
|
+
console.log(` Target: Generate qualified leads with precision validation`);
|
|
337
|
+
console.log(` Requirements: Valid email, company size >= 50, qualified role, valid LinkedIn URL`);
|
|
338
|
+
} else {
|
|
339
|
+
lead = createSampleCodeArtifact();
|
|
340
|
+
console.log('\n📍 Code Delivery Challenge:');
|
|
341
|
+
console.log(` Repo: ${lead.repoPath}`);
|
|
342
|
+
console.log(` Test cmd: ${lead.testCommand}`);
|
|
343
|
+
console.log(` Build cmd: ${lead.buildCommand}`);
|
|
344
|
+
console.log(` Lint cmd: ${lead.lintCommand}`);
|
|
345
|
+
console.log(` Benchmark cmd: ${lead.benchmarkCommand}`);
|
|
346
|
+
}
|
|
347
|
+
console.log('');
|
|
348
|
+
|
|
349
|
+
// Check for API key if not in mock mode
|
|
350
|
+
if (!options.mock) {
|
|
351
|
+
const apiKeyEnvVar = options.provider === 'claude' ? 'ANTHROPIC_API_KEY' : 'OPENAI_API_KEY';
|
|
352
|
+
const apiKey = process.env[apiKeyEnvVar];
|
|
353
|
+
|
|
354
|
+
if (!apiKey) {
|
|
355
|
+
console.error(`❌ Error: ${apiKeyEnvVar} environment variable is required`);
|
|
356
|
+
console.error(` Set it with: export ${apiKeyEnvVar}=your-api-key`);
|
|
357
|
+
console.error(' Or use --mock flag to run without real API calls');
|
|
358
|
+
process.exit(1);
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
// Build league configuration
|
|
363
|
+
const leagueConfig: LeagueConfig = {
|
|
364
|
+
outcomeId: name,
|
|
365
|
+
agentCount,
|
|
366
|
+
globalSpendCeiling,
|
|
367
|
+
agentConfigs,
|
|
368
|
+
outcome,
|
|
369
|
+
lead,
|
|
370
|
+
mockMode: options.mock,
|
|
371
|
+
};
|
|
372
|
+
|
|
373
|
+
console.log('🏃 Starting league run...\n');
|
|
374
|
+
|
|
375
|
+
try {
|
|
376
|
+
const startTime = Date.now();
|
|
377
|
+
const result = options.mock
|
|
378
|
+
? await runLeagueMock(leagueConfig)
|
|
379
|
+
: await runLeague(leagueConfig);
|
|
380
|
+
const elapsed = Date.now() - startTime;
|
|
381
|
+
|
|
382
|
+
console.log('\n' + '═'.repeat(50));
|
|
383
|
+
console.log('📊 League Results');
|
|
384
|
+
console.log('═'.repeat(50));
|
|
385
|
+
|
|
386
|
+
if (result.winnerId) {
|
|
387
|
+
console.log(`\n🏆 Winner: ${result.winnerId}`);
|
|
388
|
+
console.log(`💵 Payout: $${outcome.payoutAmount}`);
|
|
389
|
+
} else {
|
|
390
|
+
console.log('\n❌ No winner - all agents failed');
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
console.log(`\n📈 Statistics:`);
|
|
394
|
+
console.log(` Total Cost: ${result.totalCost.toLocaleString()} tokens`);
|
|
395
|
+
console.log(` Duration: ${(elapsed / 1000).toFixed(2)}s`);
|
|
396
|
+
console.log(` Global Ceiling Hit: ${result.globalCeilingHit ? 'Yes' : 'No'}`);
|
|
397
|
+
|
|
398
|
+
console.log(`\n👥 Agent Results:`);
|
|
399
|
+
for (const agent of result.agents) {
|
|
400
|
+
const statusIcon = agent.status === 'winner' ? '🏆' : agent.status === 'killed' ? '💀' : '❌';
|
|
401
|
+
console.log(` ${statusIcon} ${agent.agentId}: ${agent.status}`);
|
|
402
|
+
console.log(` Attempts: ${agent.attempts}, Tokens: ${agent.tokensSpent}`);
|
|
403
|
+
if (agent.killReason) {
|
|
404
|
+
console.log(` Kill Reason: ${agent.killReason}`);
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
console.log('\n' + '═'.repeat(50));
|
|
409
|
+
|
|
410
|
+
// Exit with appropriate code
|
|
411
|
+
process.exit(result.winnerId ? 0 : 1);
|
|
412
|
+
} catch (error) {
|
|
413
|
+
console.error('\n❌ League run failed:', error instanceof Error ? error.message : error);
|
|
414
|
+
process.exit(1);
|
|
415
|
+
}
|
|
416
|
+
}
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Outcome Verify Command
|
|
3
|
+
*
|
|
4
|
+
* Verifies that a given artifact meets the success criteria for an outcome.
|
|
5
|
+
* Useful for testing outcomes and artifacts before running a full league.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { Command } from 'commander';
|
|
9
|
+
import { readFileSync } from 'fs';
|
|
10
|
+
import { resolve } from 'path';
|
|
11
|
+
import { evaluateOutcome } from '../../eval/evaluateOutcome.js';
|
|
12
|
+
import { getOutcomeByName } from '../../outcomes/index.js';
|
|
13
|
+
import { enrichCodeExecution } from '../../league/runLeague.js';
|
|
14
|
+
import { CODE_DELIVERY_OUTCOMES } from '../../outcomes/code-delivery-outcomes.js';
|
|
15
|
+
|
|
16
|
+
export function createVerifyCommand(): Command {
|
|
17
|
+
const verifyCmd = new Command('verify');
|
|
18
|
+
|
|
19
|
+
verifyCmd
|
|
20
|
+
.description('Verify an artifact against outcome success criteria')
|
|
21
|
+
.argument('<outcome>', 'Name of the outcome to verify against')
|
|
22
|
+
.argument('<artifact-file>', 'Path to JSON file containing the artifact')
|
|
23
|
+
.option('--execute-commands', 'Execute test/build/lint commands if present (default: false)', false)
|
|
24
|
+
.option('--json', 'Output results as JSON', false)
|
|
25
|
+
.action(async (outcomeName: string, artifactFile: string, options: { executeCommands?: boolean; json?: boolean }) => {
|
|
26
|
+
try {
|
|
27
|
+
// Load the outcome
|
|
28
|
+
const outcome = getOutcomeByName(outcomeName);
|
|
29
|
+
if (!outcome) {
|
|
30
|
+
console.error(`Error: Unknown outcome "${outcomeName}"`);
|
|
31
|
+
console.error('Use "outcome list" to see available outcomes');
|
|
32
|
+
process.exit(1);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// Load the artifact
|
|
36
|
+
const artifactPath = resolve(artifactFile);
|
|
37
|
+
const artifactData = JSON.parse(readFileSync(artifactPath, 'utf-8'));
|
|
38
|
+
|
|
39
|
+
// If it's a code outcome and execution is enabled, enrich with command results
|
|
40
|
+
let artifact = artifactData;
|
|
41
|
+
const isCodeOutcome = Boolean(CODE_DELIVERY_OUTCOMES[outcome.name]);
|
|
42
|
+
if (isCodeOutcome && options.executeCommands) {
|
|
43
|
+
console.log('Executing commands...');
|
|
44
|
+
artifact = await enrichCodeExecution(artifact);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Evaluate the artifact
|
|
48
|
+
console.log(`Verifying artifact against "${outcomeName}"...`);
|
|
49
|
+
const result = await evaluateOutcome(outcome, artifact);
|
|
50
|
+
|
|
51
|
+
if (options.json) {
|
|
52
|
+
console.log(JSON.stringify(result, null, 2));
|
|
53
|
+
} else {
|
|
54
|
+
console.log(`\nVerification Result: ${result.status}`);
|
|
55
|
+
if (result.status === 'SUCCESS') {
|
|
56
|
+
console.log('✅ Artifact meets all success criteria');
|
|
57
|
+
} else {
|
|
58
|
+
console.log('❌ Artifact failed validation');
|
|
59
|
+
console.log(`Reason: ${result.reason}`);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
if (result.criteriaResults && result.criteriaResults.length > 0) {
|
|
63
|
+
console.log('\nCriteria Results:');
|
|
64
|
+
result.criteriaResults.forEach((criteria, index) => {
|
|
65
|
+
const icon = criteria.passed ? '✅' : '❌';
|
|
66
|
+
console.log(` ${index + 1}. ${icon} ${criteria.name}`);
|
|
67
|
+
if (!criteria.passed && criteria.errors.length > 0) {
|
|
68
|
+
criteria.errors.forEach(error => {
|
|
69
|
+
console.log(` ${error}`);
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
if (isCodeOutcome && options.executeCommands) {
|
|
76
|
+
console.log('\nCommand Execution Results:');
|
|
77
|
+
const content = artifact.content as any;
|
|
78
|
+
if (content.testResult) {
|
|
79
|
+
const test = content.testResult as any;
|
|
80
|
+
console.log(` Tests: ${test.success ? '✅' : '❌'} (${test.passedTests}/${test.totalTests} passed)`);
|
|
81
|
+
}
|
|
82
|
+
if (content.buildResult) {
|
|
83
|
+
const build = content.buildResult as any;
|
|
84
|
+
console.log(` Build: ${build.success ? '✅' : '❌'} (exit ${build.exitCode})`);
|
|
85
|
+
}
|
|
86
|
+
if (content.lintResult) {
|
|
87
|
+
const lint = content.lintResult as any;
|
|
88
|
+
console.log(` Lint: ${lint.success ? '✅' : '❌'} (${lint.errorCount} errors, ${lint.warningCount} warnings)`);
|
|
89
|
+
}
|
|
90
|
+
if (content.benchmarkResult) {
|
|
91
|
+
const bench = content.benchmarkResult as any;
|
|
92
|
+
console.log(` Benchmark: ${bench.success ? '✅' : '❌'} (${bench.totalBenchmarks} benchmarks)`);
|
|
93
|
+
}
|
|
94
|
+
if (content.securityScanResult) {
|
|
95
|
+
const sec = content.securityScanResult as any;
|
|
96
|
+
console.log(` Security: ${sec.success ? '✅' : '❌'} (${sec.totalVulnerabilities} vulnerabilities)`);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
process.exit(result.status === 'SUCCESS' ? 0 : 1);
|
|
102
|
+
|
|
103
|
+
} catch (error) {
|
|
104
|
+
console.error(`Error: ${error instanceof Error ? error.message : String(error)}`);
|
|
105
|
+
process.exit(1);
|
|
106
|
+
}
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
return verifyCmd;
|
|
110
|
+
}
|
package/src/cli/index.ts
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* CLI Entry Point - Command line interface for Earnd Bounty Engine
|
|
4
|
+
*
|
|
5
|
+
* Commands:
|
|
6
|
+
* - outcome run <name> --agents=N - Start a league run with N agents
|
|
7
|
+
* - outcome logs <name> - Display logs for an outcome
|
|
8
|
+
* - outcome payout <name> --agent=<id> - Trigger payout for winning agent
|
|
9
|
+
*
|
|
10
|
+
* @module cli
|
|
11
|
+
* @see Requirements 7.1, 7.2, 7.3, 7.4
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { Command } from 'commander';
|
|
15
|
+
import { runCommand } from './commands/run.js';
|
|
16
|
+
import { logsCommand } from './commands/logs.js';
|
|
17
|
+
import { payoutCommand } from './commands/payout.js';
|
|
18
|
+
import { listCommand } from './commands/list.js';
|
|
19
|
+
import { createVerifyCommand } from './commands/verify.js';
|
|
20
|
+
import { createDefineCommand } from './commands/define.js';
|
|
21
|
+
|
|
22
|
+
const program = new Command();
|
|
23
|
+
|
|
24
|
+
program
|
|
25
|
+
.name('earnd')
|
|
26
|
+
.description('Earnd Bounty Engine - Outcome-based AI agent competition system')
|
|
27
|
+
.version('1.0.0');
|
|
28
|
+
|
|
29
|
+
// Add the outcome command group
|
|
30
|
+
const outcome = program
|
|
31
|
+
.command('outcome')
|
|
32
|
+
.description('Manage outcomes and run agent competitions');
|
|
33
|
+
|
|
34
|
+
// outcome list
|
|
35
|
+
outcome.addCommand(listCommand);
|
|
36
|
+
|
|
37
|
+
// outcome verify
|
|
38
|
+
outcome.addCommand(createVerifyCommand());
|
|
39
|
+
|
|
40
|
+
// outcome define
|
|
41
|
+
outcome.addCommand(createDefineCommand());
|
|
42
|
+
|
|
43
|
+
// outcome run <name> --agents=N
|
|
44
|
+
outcome
|
|
45
|
+
.command('run <name>')
|
|
46
|
+
.description('Start a league run with N agents competing for an outcome')
|
|
47
|
+
.option('-a, --agents <number>', 'Number of agents to run in parallel', '3')
|
|
48
|
+
.option('-c, --ceiling <number>', 'Global spend ceiling in tokens', '50000')
|
|
49
|
+
.option('--mock', 'Run in mock mode without real API calls', false)
|
|
50
|
+
.option('--provider <provider>', 'Model provider (claude or openai)', 'claude')
|
|
51
|
+
.option('--model <model>', 'Model ID to use', 'claude-sonnet-4-20250514')
|
|
52
|
+
.option('-m, --mode <mode>', 'Execution mode: league, swarm, or pipeline', 'league')
|
|
53
|
+
.option('-g, --goal <goal>', 'High-level goal for swarm and pipeline modes')
|
|
54
|
+
.option('-u, --target-url <url>', 'Target URL for OmniBridge extraction')
|
|
55
|
+
.option('--execute-commands', 'Execute test/build/lint commands for code outcomes (default: true)', true)
|
|
56
|
+
.option('--dry-run', 'Show execution plan without running', false)
|
|
57
|
+
.option('--json', 'Output results in JSON format', false)
|
|
58
|
+
.action(runCommand);
|
|
59
|
+
|
|
60
|
+
// outcome logs <name>
|
|
61
|
+
outcome
|
|
62
|
+
.command('logs <name>')
|
|
63
|
+
.description('Display all logs for the specified outcome')
|
|
64
|
+
.option('-a, --agent <id>', 'Filter logs by agent ID')
|
|
65
|
+
.option('--json', 'Output logs in JSON format', false)
|
|
66
|
+
.action(logsCommand);
|
|
67
|
+
|
|
68
|
+
// outcome payout <name> --agent=<id>
|
|
69
|
+
outcome
|
|
70
|
+
.command('payout <name>')
|
|
71
|
+
.description('Trigger payout for the winning agent')
|
|
72
|
+
.requiredOption('--agent <id>', 'Agent ID to trigger payout for')
|
|
73
|
+
.option('--method <method>', 'Payout method: auto, tremendous, or usdc', 'auto')
|
|
74
|
+
.option('--email <email>', 'Recipient email (for Tremendous payouts)')
|
|
75
|
+
.option('--wallet <address>', 'Recipient wallet address (for USDC payouts)')
|
|
76
|
+
.option('--name <name>', 'Recipient name')
|
|
77
|
+
.option('--dry-run', 'Show payout details without executing', false)
|
|
78
|
+
.action(payoutCommand);
|
|
79
|
+
|
|
80
|
+
// Parse arguments
|
|
81
|
+
program.parse();
|