outcome-cli 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +261 -0
- package/package.json +95 -0
- package/src/agents/README.md +139 -0
- package/src/agents/adapters/anthropic.adapter.ts +166 -0
- package/src/agents/adapters/dalle.adapter.ts +145 -0
- package/src/agents/adapters/gemini.adapter.ts +134 -0
- package/src/agents/adapters/imagen.adapter.ts +106 -0
- package/src/agents/adapters/nano-banana.adapter.ts +129 -0
- package/src/agents/adapters/openai.adapter.ts +165 -0
- package/src/agents/adapters/veo.adapter.ts +130 -0
- package/src/agents/agent.schema.property.test.ts +379 -0
- package/src/agents/agent.schema.test.ts +148 -0
- package/src/agents/agent.schema.ts +263 -0
- package/src/agents/index.ts +60 -0
- package/src/agents/registered-agent.schema.ts +356 -0
- package/src/agents/registry.ts +97 -0
- package/src/agents/tournament-configs.property.test.ts +266 -0
- package/src/cli/README.md +145 -0
- package/src/cli/commands/define.ts +79 -0
- package/src/cli/commands/list.ts +46 -0
- package/src/cli/commands/logs.ts +83 -0
- package/src/cli/commands/run.ts +416 -0
- package/src/cli/commands/verify.ts +110 -0
- package/src/cli/index.ts +81 -0
- package/src/config/README.md +128 -0
- package/src/config/env.ts +262 -0
- package/src/config/index.ts +19 -0
- package/src/eval/README.md +318 -0
- package/src/eval/ai-judge.test.ts +435 -0
- package/src/eval/ai-judge.ts +368 -0
- package/src/eval/code-validators.ts +414 -0
- package/src/eval/evaluateOutcome.property.test.ts +1174 -0
- package/src/eval/evaluateOutcome.ts +591 -0
- package/src/eval/immigration-validators.ts +122 -0
- package/src/eval/index.ts +90 -0
- package/src/eval/judge-cache.ts +402 -0
- package/src/eval/tournament-validators.property.test.ts +439 -0
- package/src/eval/validators.property.test.ts +1118 -0
- package/src/eval/validators.ts +1199 -0
- package/src/eval/weighted-scorer.ts +285 -0
- package/src/index.ts +17 -0
- package/src/league/README.md +188 -0
- package/src/league/health-check.ts +353 -0
- package/src/league/index.ts +93 -0
- package/src/league/killAgent.ts +151 -0
- package/src/league/league.test.ts +1151 -0
- package/src/league/runLeague.ts +843 -0
- package/src/league/scoreAgent.ts +175 -0
- package/src/modules/omnibridge/__tests__/.gitkeep +1 -0
- package/src/modules/omnibridge/__tests__/auth-tunnel.property.test.ts +524 -0
- package/src/modules/omnibridge/__tests__/deterministic-logger.property.test.ts +965 -0
- package/src/modules/omnibridge/__tests__/ghost-api.property.test.ts +461 -0
- package/src/modules/omnibridge/__tests__/omnibridge-integration.test.ts +542 -0
- package/src/modules/omnibridge/__tests__/parallel-executor.property.test.ts +671 -0
- package/src/modules/omnibridge/__tests__/semantic-normalizer.property.test.ts +521 -0
- package/src/modules/omnibridge/__tests__/semantic-normalizer.test.ts +254 -0
- package/src/modules/omnibridge/__tests__/session-vault.property.test.ts +367 -0
- package/src/modules/omnibridge/__tests__/shadow-session.property.test.ts +523 -0
- package/src/modules/omnibridge/__tests__/triangulation-engine.property.test.ts +292 -0
- package/src/modules/omnibridge/__tests__/verification-engine.property.test.ts +769 -0
- package/src/modules/omnibridge/api/.gitkeep +1 -0
- package/src/modules/omnibridge/api/ghost-api.ts +1087 -0
- package/src/modules/omnibridge/auth/.gitkeep +1 -0
- package/src/modules/omnibridge/auth/auth-tunnel.ts +843 -0
- package/src/modules/omnibridge/auth/session-vault.ts +577 -0
- package/src/modules/omnibridge/core/.gitkeep +1 -0
- package/src/modules/omnibridge/core/semantic-normalizer.ts +702 -0
- package/src/modules/omnibridge/core/triangulation-engine.ts +530 -0
- package/src/modules/omnibridge/core/types.ts +610 -0
- package/src/modules/omnibridge/execution/.gitkeep +1 -0
- package/src/modules/omnibridge/execution/deterministic-logger.ts +629 -0
- package/src/modules/omnibridge/execution/parallel-executor.ts +542 -0
- package/src/modules/omnibridge/execution/shadow-session.ts +794 -0
- package/src/modules/omnibridge/index.ts +212 -0
- package/src/modules/omnibridge/omnibridge.ts +510 -0
- package/src/modules/omnibridge/verification/.gitkeep +1 -0
- package/src/modules/omnibridge/verification/verification-engine.ts +783 -0
- package/src/outcomes/README.md +75 -0
- package/src/outcomes/acquire-pilot-customer.ts +297 -0
- package/src/outcomes/code-delivery-outcomes.ts +89 -0
- package/src/outcomes/code-outcomes.ts +256 -0
- package/src/outcomes/code_review_battle.test.ts +135 -0
- package/src/outcomes/code_review_battle.ts +135 -0
- package/src/outcomes/cold_email_battle.ts +97 -0
- package/src/outcomes/content_creation_battle.ts +160 -0
- package/src/outcomes/f1_stem_opt_compliance.ts +61 -0
- package/src/outcomes/index.ts +107 -0
- package/src/outcomes/lead_gen_battle.test.ts +113 -0
- package/src/outcomes/lead_gen_battle.ts +99 -0
- package/src/outcomes/outcome.schema.property.test.ts +229 -0
- package/src/outcomes/outcome.schema.ts +187 -0
- package/src/outcomes/qualified_sales_interest.ts +118 -0
- package/src/outcomes/swarm_planner.property.test.ts +370 -0
- package/src/outcomes/swarm_planner.ts +96 -0
- package/src/outcomes/web_extraction.ts +234 -0
- package/src/runtime/README.md +220 -0
- package/src/runtime/agentRunner.test.ts +341 -0
- package/src/runtime/agentRunner.ts +746 -0
- package/src/runtime/claudeAdapter.ts +232 -0
- package/src/runtime/costTracker.ts +123 -0
- package/src/runtime/index.ts +34 -0
- package/src/runtime/modelAdapter.property.test.ts +305 -0
- package/src/runtime/modelAdapter.ts +144 -0
- package/src/runtime/openaiAdapter.ts +235 -0
- package/src/utils/README.md +122 -0
- package/src/utils/command-runner.ts +134 -0
- package/src/utils/cost-guard.ts +379 -0
- package/src/utils/errors.test.ts +290 -0
- package/src/utils/errors.ts +442 -0
- package/src/utils/index.ts +37 -0
- package/src/utils/logger.test.ts +361 -0
- package/src/utils/logger.ts +419 -0
- package/src/utils/output-parsers.ts +216 -0
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
# Runtime Module
|
|
2
|
+
|
|
3
|
+
The runtime module handles agent execution, model adapters, and cost tracking for the Earnd Bounty Engine.
|
|
4
|
+
|
|
5
|
+
## Components
|
|
6
|
+
|
|
7
|
+
### Model Adapter (`modelAdapter.ts`, `claudeAdapter.ts`, `openaiAdapter.ts`)
|
|
8
|
+
|
|
9
|
+
Unified interface for AI model providers. Abstracts Claude and OpenAI behind a common interface with normalized responses and tool calling support.
|
|
10
|
+
|
|
11
|
+
**Key Interfaces:**
|
|
12
|
+
|
|
13
|
+
- `ModelAdapter` - Common interface for all model providers (includes `provider` and `modelId` readonly properties)
|
|
14
|
+
- `ModelResponse` - Normalized response format (content, tokensUsed, model, toolCalls, requiresToolResponse)
|
|
15
|
+
- `ModelOptions` - Request configuration (maxTokens, temperature, systemPrompt, tools)
|
|
16
|
+
- `ModelAdapterConfig` - Configuration for creating adapters (provider, modelId, apiKey)
|
|
17
|
+
- `ConversationMessage` - Message format for multi-turn tool use conversations
|
|
18
|
+
- `ToolDefinition` - Tool schema definition (from skills module)
|
|
19
|
+
- `ToolCall` - Tool invocation request from model
|
|
20
|
+
|
|
21
|
+
**Key Functions:**
|
|
22
|
+
|
|
23
|
+
- `createAdapter(config)` - Factory function to create adapter based on provider
|
|
24
|
+
- `createClaudeAdapter(apiKey, modelId)` - Creates Claude-specific adapter
|
|
25
|
+
- `createOpenAIAdapter(apiKey, modelId)` - Creates OpenAI-specific adapter
|
|
26
|
+
|
|
27
|
+
**Usage:**
|
|
28
|
+
|
|
29
|
+
```typescript
|
|
30
|
+
import { createAdapter, createClaudeAdapter, createOpenAIAdapter } from './index.js';
|
|
31
|
+
|
|
32
|
+
// Using factory function
|
|
33
|
+
const adapter = await createAdapter({
|
|
34
|
+
provider: 'claude',
|
|
35
|
+
modelId: 'claude-3-sonnet-20240229',
|
|
36
|
+
apiKey: process.env.ANTHROPIC_API_KEY!
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
// Or directly create specific adapters
|
|
40
|
+
const claudeAdapter = createClaudeAdapter(
|
|
41
|
+
process.env.ANTHROPIC_API_KEY!,
|
|
42
|
+
'claude-3-sonnet-20240229'
|
|
43
|
+
);
|
|
44
|
+
|
|
45
|
+
const openaiAdapter = createOpenAIAdapter(
|
|
46
|
+
process.env.OPENAI_API_KEY!,
|
|
47
|
+
'gpt-4-turbo-preview'
|
|
48
|
+
);
|
|
49
|
+
|
|
50
|
+
// Make completion request
|
|
51
|
+
const response = await adapter.complete('Hello, how are you?', {
|
|
52
|
+
maxTokens: 1024,
|
|
53
|
+
temperature: 0.7,
|
|
54
|
+
systemPrompt: 'You are a helpful assistant.'
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
console.log(response.content); // Generated text
|
|
58
|
+
console.log(response.tokensUsed); // Total tokens used
|
|
59
|
+
console.log(response.model); // Model identifier
|
|
60
|
+
|
|
61
|
+
// Access adapter properties
|
|
62
|
+
console.log(adapter.provider); // 'claude' or 'openai'
|
|
63
|
+
console.log(adapter.modelId); // The specific model ID
|
|
64
|
+
|
|
65
|
+
// Estimate tokens before sending
|
|
66
|
+
const estimatedTokens = adapter.countTokens('Some text to count');
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
**Tool Calling:**
|
|
70
|
+
|
|
71
|
+
Both adapters support function/tool calling with a unified interface:
|
|
72
|
+
|
|
73
|
+
```typescript
|
|
74
|
+
import type { ToolDefinition, ConversationMessage } from './modelAdapter.js';
|
|
75
|
+
|
|
76
|
+
// Define tools available to the model
|
|
77
|
+
const tools: ToolDefinition[] = [
|
|
78
|
+
{
|
|
79
|
+
name: 'get_company_info',
|
|
80
|
+
description: 'Get information about a company',
|
|
81
|
+
inputSchema: {
|
|
82
|
+
type: 'object',
|
|
83
|
+
properties: {
|
|
84
|
+
companyName: { type: 'string', description: 'Name of the company' }
|
|
85
|
+
},
|
|
86
|
+
required: ['companyName']
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
];
|
|
90
|
+
|
|
91
|
+
// Request with tools
|
|
92
|
+
const response = await adapter.complete('Look up TechCorp Industries', {
|
|
93
|
+
tools,
|
|
94
|
+
systemPrompt: 'You are a sales assistant.'
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
// Check if model wants to call tools
|
|
98
|
+
if (response.toolCalls && response.toolCalls.length > 0) {
|
|
99
|
+
// Execute tool calls and gather results
|
|
100
|
+
const messages: ConversationMessage[] = [
|
|
101
|
+
{ role: 'user', content: 'Look up TechCorp Industries' },
|
|
102
|
+
{ role: 'assistant', content: response.content, toolCalls: response.toolCalls },
|
|
103
|
+
];
|
|
104
|
+
|
|
105
|
+
// Add tool results
|
|
106
|
+
for (const toolCall of response.toolCalls) {
|
|
107
|
+
const result = await executeToolCall(toolCall); // Your tool execution logic
|
|
108
|
+
messages.push({
|
|
109
|
+
role: 'tool',
|
|
110
|
+
toolCallId: toolCall.id,
|
|
111
|
+
content: JSON.stringify(result)
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Continue conversation with tool results
|
|
116
|
+
const finalResponse = await adapter.continueWithToolResults(messages, { tools });
|
|
117
|
+
console.log(finalResponse.content);
|
|
118
|
+
}
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### Agent Runner (`agentRunner.ts`)
|
|
122
|
+
|
|
123
|
+
Agent execution engine with attempt loop, cost tracking, and retry logic.
|
|
124
|
+
|
|
125
|
+
**Key Interfaces:**
|
|
126
|
+
|
|
127
|
+
- `AgentRun` - Result of running an agent (agentId, outcomeId, status, attempts, tokensSpent, artifacts)
|
|
128
|
+
- `AgentRunConfig` - Configuration for running an agent (agent, outcome, lead, apiKey, onKillSignal)
|
|
129
|
+
- `KillReason` - Reason for agent termination ('cost_exceeded', 'attempts_exceeded', 'timeout', 'competitor_won', 'success')
|
|
130
|
+
|
|
131
|
+
**Key Functions:**
|
|
132
|
+
|
|
133
|
+
- `runAgent(config)` - Executes an agent to achieve an outcome with full tracking
|
|
134
|
+
- `runAgentMock(config)` - Mock runner for testing without API calls
|
|
135
|
+
|
|
136
|
+
**Features:**
|
|
137
|
+
|
|
138
|
+
- **Attempt Loop** - Tracks attempts against maxAttempts limit
|
|
139
|
+
- **Cost Integration** - Records token usage to CostTracker in real-time
|
|
140
|
+
- **Exponential Backoff** - Retries failed API calls with backoff (max 3 retries)
|
|
141
|
+
- **Kill Conditions** - Monitors cost ceiling, attempt limit, runtime limit, and external signals
|
|
142
|
+
- **Artifact Generation** - Creates AgentArtifact from model responses
|
|
143
|
+
|
|
144
|
+
**Usage:**
|
|
145
|
+
|
|
146
|
+
```typescript
|
|
147
|
+
import { runAgent, runAgentMock } from './agentRunner.js';
|
|
148
|
+
|
|
149
|
+
// Run agent with real API calls
|
|
150
|
+
const result = await runAgent({
|
|
151
|
+
agent: salesAgentConfig,
|
|
152
|
+
outcome: qualifiedSalesInterest,
|
|
153
|
+
lead: leadData,
|
|
154
|
+
apiKey: process.env.ANTHROPIC_API_KEY,
|
|
155
|
+
onKillSignal: () => competitorWon, // Optional external kill signal
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
// Check result
|
|
159
|
+
if (result.status === 'completed') {
|
|
160
|
+
console.log('Agent completed with', result.artifacts.length, 'artifacts');
|
|
161
|
+
} else if (result.status === 'killed') {
|
|
162
|
+
console.log('Agent killed:', result.killReason);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// For testing without API calls
|
|
166
|
+
const mockResult = await runAgentMock(config);
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
### Cost Tracker (`costTracker.ts`)
|
|
170
|
+
|
|
171
|
+
Real-time token and cost tracking per agent. Enforces hard token caps to prevent runaway costs.
|
|
172
|
+
|
|
173
|
+
**Key Functions:**
|
|
174
|
+
|
|
175
|
+
- `createCostTracker(agentId, ceiling)` - Creates a new tracker for an agent
|
|
176
|
+
- `recordUsage(tracker, tokens)` - Records token usage in real-time
|
|
177
|
+
- `isOverBudget(tracker)` - Checks if agent exceeded its ceiling
|
|
178
|
+
- `getRemainingBudget(tracker)` - Gets remaining token budget
|
|
179
|
+
- `getBudgetUsagePercent(tracker)` - Gets percentage of budget used
|
|
180
|
+
|
|
181
|
+
**Usage:**
|
|
182
|
+
|
|
183
|
+
```typescript
|
|
184
|
+
import { createCostTracker, recordUsage, isOverBudget } from './costTracker.js';
|
|
185
|
+
|
|
186
|
+
// Create tracker with 10,000 token ceiling
|
|
187
|
+
const tracker = createCostTracker('agent-1', 10000);
|
|
188
|
+
|
|
189
|
+
// Record usage after each model call
|
|
190
|
+
recordUsage(tracker, 500);
|
|
191
|
+
|
|
192
|
+
// Check if agent should be terminated
|
|
193
|
+
if (isOverBudget(tracker)) {
|
|
194
|
+
// Kill agent with reason 'cost_exceeded'
|
|
195
|
+
}
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
## Requirements Reference
|
|
199
|
+
|
|
200
|
+
- **4.2** - Track cost per agent independently
|
|
201
|
+
- **4.3** - Terminate agent on attempt limit exceeded
|
|
202
|
+
- **10.1** - Enforce hard token cap per agent
|
|
203
|
+
- **10.2** - Enforce max runtime per agent
|
|
204
|
+
- **10.5** - Track tokens spent per agent in real-time
|
|
205
|
+
- **11.1** - Abstract underlying AI model provider
|
|
206
|
+
- **11.2** - Route requests to Claude API
|
|
207
|
+
- **11.3** - Route requests to OpenAI API
|
|
208
|
+
- **11.4** - Normalize response format
|
|
209
|
+
|
|
210
|
+
## Design Principles
|
|
211
|
+
|
|
212
|
+
1. **Isolation** - Each agent has its own tracker, no cross-contamination
|
|
213
|
+
2. **Real-time** - Usage is tracked immediately after each model call
|
|
214
|
+
3. **Fail-closed** - When over budget, agents are terminated with no payout
|
|
215
|
+
4. **Provider Agnostic** - Same interface regardless of Claude or OpenAI
|
|
216
|
+
5. **Normalized Responses** - Consistent format for all model responses
|
|
217
|
+
|
|
218
|
+
## Related Modules
|
|
219
|
+
|
|
220
|
+
- **Skills** (`/src/skills`) - Defines agent tools/capabilities that can be invoked via model function calling. The `ToolDefinition`, `ToolCall`, and `ToolCallResult` types are imported from the skills module and re-exported by `modelAdapter.ts` for convenience.
|
|
@@ -0,0 +1,341 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agent Runner Tests
|
|
3
|
+
*
|
|
4
|
+
* Tests for the agent execution engine including:
|
|
5
|
+
* - Attempt loop logic
|
|
6
|
+
* - Cost tracking integration
|
|
7
|
+
* - Kill condition handling
|
|
8
|
+
* - Mock runner functionality
|
|
9
|
+
*
|
|
10
|
+
* @module runtime/agentRunner.test
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
|
14
|
+
import {
|
|
15
|
+
runAgent,
|
|
16
|
+
runAgentMock,
|
|
17
|
+
type AgentRunConfig,
|
|
18
|
+
type AgentRun,
|
|
19
|
+
} from './agentRunner.js';
|
|
20
|
+
import type { AgentConfig } from '../agents/agent.schema.js';
|
|
21
|
+
import type { Outcome } from '../outcomes/outcome.schema.js';
|
|
22
|
+
import type { Lead } from '../jobs/job.interface.js';
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Test fixtures
|
|
26
|
+
*/
|
|
27
|
+
const createTestAgent = (overrides?: Partial<AgentConfig>): AgentConfig => ({
|
|
28
|
+
id: 'test-agent-001',
|
|
29
|
+
name: 'Test Agent',
|
|
30
|
+
prompt: 'You are a test agent. Respond with a helpful message.',
|
|
31
|
+
strategyDescription: 'Test strategy',
|
|
32
|
+
toolAccess: [],
|
|
33
|
+
costCeiling: 5000,
|
|
34
|
+
modelProvider: 'claude',
|
|
35
|
+
modelId: 'claude-3-sonnet-20240229',
|
|
36
|
+
...overrides,
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
const createTestOutcome = (overrides?: Partial<Outcome>): Outcome => ({
|
|
40
|
+
name: 'test_outcome',
|
|
41
|
+
description: 'Test outcome for unit tests',
|
|
42
|
+
payoutAmount: 100,
|
|
43
|
+
maxAttempts: 3,
|
|
44
|
+
timeLimitMs: 60000,
|
|
45
|
+
successCriteria: [
|
|
46
|
+
{
|
|
47
|
+
name: 'test_criterion',
|
|
48
|
+
validator: 'validateMessageLength',
|
|
49
|
+
params: { minWords: 5 },
|
|
50
|
+
},
|
|
51
|
+
],
|
|
52
|
+
failureReasons: ['Test failure reason'],
|
|
53
|
+
...overrides,
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
const createTestLead = (overrides?: Partial<Lead>): Lead => ({
|
|
57
|
+
email: 'test@example.com',
|
|
58
|
+
company: 'Test Company',
|
|
59
|
+
companySize: 100,
|
|
60
|
+
role: 'Manager',
|
|
61
|
+
previousInteractions: ['Downloaded whitepaper'],
|
|
62
|
+
...overrides,
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
describe('agentRunner', () => {
|
|
66
|
+
describe('runAgentMock', () => {
|
|
67
|
+
it('should return a completed run with artifact', async () => {
|
|
68
|
+
const config: AgentRunConfig = {
|
|
69
|
+
agent: createTestAgent(),
|
|
70
|
+
outcome: createTestOutcome(),
|
|
71
|
+
lead: createTestLead(),
|
|
72
|
+
};
|
|
73
|
+
|
|
74
|
+
const result = await runAgentMock(config);
|
|
75
|
+
|
|
76
|
+
expect(result.agentId).toBe('test-agent-001');
|
|
77
|
+
expect(result.outcomeId).toBe('test_outcome');
|
|
78
|
+
expect(result.status).toBe('completed');
|
|
79
|
+
expect(result.attempts).toBe(1);
|
|
80
|
+
expect(result.tokensSpent).toBeGreaterThan(0);
|
|
81
|
+
expect(result.artifacts).toHaveLength(1);
|
|
82
|
+
expect(result.durationMs).toBeGreaterThan(0);
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
it('should include lead data in artifact content', async () => {
|
|
86
|
+
const lead = createTestLead({
|
|
87
|
+
email: 'custom@test.com',
|
|
88
|
+
company: 'Custom Corp',
|
|
89
|
+
companySize: 250,
|
|
90
|
+
role: 'Director',
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
const config: AgentRunConfig = {
|
|
94
|
+
agent: createTestAgent(),
|
|
95
|
+
outcome: createTestOutcome(),
|
|
96
|
+
lead,
|
|
97
|
+
};
|
|
98
|
+
|
|
99
|
+
const result = await runAgentMock(config);
|
|
100
|
+
const artifact = result.artifacts[0];
|
|
101
|
+
|
|
102
|
+
expect(artifact.content.targetEmail).toBe('custom@test.com');
|
|
103
|
+
expect(artifact.content.targetCompany).toBe('Custom Corp');
|
|
104
|
+
expect(artifact.content.targetCompanySize).toBe(250);
|
|
105
|
+
expect(artifact.content.targetRole).toBe('Director');
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
it('should set correct timestamp on artifact', async () => {
|
|
109
|
+
const config: AgentRunConfig = {
|
|
110
|
+
agent: createTestAgent(),
|
|
111
|
+
outcome: createTestOutcome(),
|
|
112
|
+
lead: createTestLead(),
|
|
113
|
+
};
|
|
114
|
+
|
|
115
|
+
const beforeRun = new Date().toISOString();
|
|
116
|
+
const result = await runAgentMock(config);
|
|
117
|
+
const afterRun = new Date().toISOString();
|
|
118
|
+
|
|
119
|
+
const artifactTime = result.artifacts[0].timestamp;
|
|
120
|
+
expect(artifactTime >= beforeRun).toBe(true);
|
|
121
|
+
expect(artifactTime <= afterRun).toBe(true);
|
|
122
|
+
});
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
describe('runAgent - configuration validation', () => {
|
|
126
|
+
it('should fail without API key', async () => {
|
|
127
|
+
const originalEnv = process.env.ANTHROPIC_API_KEY;
|
|
128
|
+
delete process.env.ANTHROPIC_API_KEY;
|
|
129
|
+
|
|
130
|
+
const config: AgentRunConfig = {
|
|
131
|
+
agent: createTestAgent(),
|
|
132
|
+
outcome: createTestOutcome(),
|
|
133
|
+
lead: createTestLead(),
|
|
134
|
+
};
|
|
135
|
+
|
|
136
|
+
const result = await runAgent(config);
|
|
137
|
+
|
|
138
|
+
expect(result.status).toBe('killed');
|
|
139
|
+
expect(result.error).toContain('Missing API key');
|
|
140
|
+
|
|
141
|
+
// Restore env
|
|
142
|
+
if (originalEnv) {
|
|
143
|
+
process.env.ANTHROPIC_API_KEY = originalEnv;
|
|
144
|
+
}
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
it('should use provided API key over environment variable', async () => {
|
|
148
|
+
const config: AgentRunConfig = {
|
|
149
|
+
agent: createTestAgent(),
|
|
150
|
+
outcome: createTestOutcome({ timeLimitMs: 100 }), // Short timeout for test
|
|
151
|
+
lead: createTestLead(),
|
|
152
|
+
apiKey: 'test-api-key',
|
|
153
|
+
onKillSignal: () => true, // Immediately kill to avoid API call
|
|
154
|
+
};
|
|
155
|
+
|
|
156
|
+
const result = await runAgent(config);
|
|
157
|
+
|
|
158
|
+
// Should have been killed by signal, not due to missing API key
|
|
159
|
+
// error is undefined when killed by signal (no API key error)
|
|
160
|
+
expect(result.error === undefined || !result.error.includes('Missing API key')).toBe(true);
|
|
161
|
+
expect(result.killReason).toBe('competitor_won');
|
|
162
|
+
});
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
describe('runAgent - kill conditions', () => {
|
|
166
|
+
it('should kill agent when cost ceiling is exceeded via onKillSignal', async () => {
|
|
167
|
+
let killSignalCalled = false;
|
|
168
|
+
const config: AgentRunConfig = {
|
|
169
|
+
agent: createTestAgent({ costCeiling: 100 }),
|
|
170
|
+
outcome: createTestOutcome(),
|
|
171
|
+
lead: createTestLead(),
|
|
172
|
+
apiKey: 'test-key',
|
|
173
|
+
onKillSignal: () => {
|
|
174
|
+
killSignalCalled = true;
|
|
175
|
+
return true; // Simulate competitor won
|
|
176
|
+
},
|
|
177
|
+
};
|
|
178
|
+
|
|
179
|
+
const result = await runAgent(config);
|
|
180
|
+
|
|
181
|
+
expect(killSignalCalled).toBe(true);
|
|
182
|
+
expect(result.status).toBe('killed');
|
|
183
|
+
expect(result.killReason).toBe('competitor_won');
|
|
184
|
+
});
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
describe('AgentRun structure', () => {
|
|
188
|
+
it('should have all required fields', async () => {
|
|
189
|
+
const config: AgentRunConfig = {
|
|
190
|
+
agent: createTestAgent(),
|
|
191
|
+
outcome: createTestOutcome(),
|
|
192
|
+
lead: createTestLead(),
|
|
193
|
+
};
|
|
194
|
+
|
|
195
|
+
const result = await runAgentMock(config);
|
|
196
|
+
|
|
197
|
+
// Check all required AgentRun fields exist
|
|
198
|
+
expect(result).toHaveProperty('agentId');
|
|
199
|
+
expect(result).toHaveProperty('outcomeId');
|
|
200
|
+
expect(result).toHaveProperty('status');
|
|
201
|
+
expect(result).toHaveProperty('attempts');
|
|
202
|
+
expect(result).toHaveProperty('tokensSpent');
|
|
203
|
+
expect(result).toHaveProperty('artifacts');
|
|
204
|
+
expect(result).toHaveProperty('durationMs');
|
|
205
|
+
|
|
206
|
+
// Check types
|
|
207
|
+
expect(typeof result.agentId).toBe('string');
|
|
208
|
+
expect(typeof result.outcomeId).toBe('string');
|
|
209
|
+
expect(['running', 'completed', 'killed']).toContain(result.status);
|
|
210
|
+
expect(typeof result.attempts).toBe('number');
|
|
211
|
+
expect(typeof result.tokensSpent).toBe('number');
|
|
212
|
+
expect(Array.isArray(result.artifacts)).toBe(true);
|
|
213
|
+
expect(typeof result.durationMs).toBe('number');
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
it('should track attempt count correctly', async () => {
|
|
217
|
+
const config: AgentRunConfig = {
|
|
218
|
+
agent: createTestAgent(),
|
|
219
|
+
outcome: createTestOutcome({ maxAttempts: 5 }),
|
|
220
|
+
lead: createTestLead(),
|
|
221
|
+
};
|
|
222
|
+
|
|
223
|
+
const result = await runAgentMock(config);
|
|
224
|
+
|
|
225
|
+
expect(result.attempts).toBeGreaterThanOrEqual(1);
|
|
226
|
+
expect(result.attempts).toBeLessThanOrEqual(5);
|
|
227
|
+
});
|
|
228
|
+
});
|
|
229
|
+
|
|
230
|
+
describe('Artifact structure', () => {
|
|
231
|
+
it('should create valid artifact with all required fields', async () => {
|
|
232
|
+
const config: AgentRunConfig = {
|
|
233
|
+
agent: createTestAgent({ id: 'artifact-test-agent' }),
|
|
234
|
+
outcome: createTestOutcome({ name: 'artifact_test_outcome' }),
|
|
235
|
+
lead: createTestLead(),
|
|
236
|
+
};
|
|
237
|
+
|
|
238
|
+
const result = await runAgentMock(config);
|
|
239
|
+
const artifact = result.artifacts[0];
|
|
240
|
+
|
|
241
|
+
expect(artifact.agentId).toBe('artifact-test-agent');
|
|
242
|
+
expect(artifact.outcomeId).toBe('artifact_test_outcome');
|
|
243
|
+
expect(artifact.attemptNumber).toBe(1);
|
|
244
|
+
expect(artifact.content).toBeDefined();
|
|
245
|
+
expect(artifact.timestamp).toBeDefined();
|
|
246
|
+
|
|
247
|
+
// Check content structure
|
|
248
|
+
expect(artifact.content).toHaveProperty('message');
|
|
249
|
+
expect(artifact.content).toHaveProperty('targetEmail');
|
|
250
|
+
expect(artifact.content).toHaveProperty('targetCompany');
|
|
251
|
+
expect(artifact.content).toHaveProperty('targetCompanySize');
|
|
252
|
+
expect(artifact.content).toHaveProperty('targetRole');
|
|
253
|
+
});
|
|
254
|
+
});
|
|
255
|
+
});
|
|
256
|
+
|
|
257
|
+
describe('AgentRunner - Property Tests', () => {
|
|
258
|
+
/**
|
|
259
|
+
* **Feature: earnd-bounty-engine, Property: Agent Run Structure Compliance**
|
|
260
|
+
* For any agent run, the result SHALL conform to the AgentRun interface
|
|
261
|
+
* with all required fields populated.
|
|
262
|
+
* **Validates: Requirements 4.2, 4.3**
|
|
263
|
+
*/
|
|
264
|
+
it('Property: AgentRun always has valid structure', async () => {
|
|
265
|
+
const configs = [
|
|
266
|
+
{
|
|
267
|
+
agent: createTestAgent(),
|
|
268
|
+
outcome: createTestOutcome(),
|
|
269
|
+
lead: createTestLead(),
|
|
270
|
+
},
|
|
271
|
+
{
|
|
272
|
+
agent: createTestAgent({ id: 'agent-2', costCeiling: 1000 }),
|
|
273
|
+
outcome: createTestOutcome({ maxAttempts: 1 }),
|
|
274
|
+
lead: createTestLead({ companySize: 10 }),
|
|
275
|
+
},
|
|
276
|
+
{
|
|
277
|
+
agent: createTestAgent({ id: 'agent-3', modelProvider: 'openai' as const }),
|
|
278
|
+
outcome: createTestOutcome({ timeLimitMs: 1000 }),
|
|
279
|
+
lead: createTestLead({ role: 'Intern' }),
|
|
280
|
+
},
|
|
281
|
+
];
|
|
282
|
+
|
|
283
|
+
for (const config of configs) {
|
|
284
|
+
const result = await runAgentMock(config);
|
|
285
|
+
|
|
286
|
+
// Verify structure invariants
|
|
287
|
+
expect(result.agentId).toBe(config.agent.id);
|
|
288
|
+
expect(result.outcomeId).toBe(config.outcome.name);
|
|
289
|
+
expect(['running', 'completed', 'killed']).toContain(result.status);
|
|
290
|
+
expect(result.attempts).toBeGreaterThanOrEqual(0);
|
|
291
|
+
expect(result.tokensSpent).toBeGreaterThanOrEqual(0);
|
|
292
|
+
expect(Array.isArray(result.artifacts)).toBe(true);
|
|
293
|
+
expect(result.durationMs).toBeGreaterThanOrEqual(0);
|
|
294
|
+
}
|
|
295
|
+
});
|
|
296
|
+
|
|
297
|
+
/**
|
|
298
|
+
* **Feature: earnd-bounty-engine, Property: Completed runs have artifacts**
|
|
299
|
+
* For any completed agent run, there SHALL be at least one artifact.
|
|
300
|
+
*/
|
|
301
|
+
it('Property: Completed runs always have at least one artifact', async () => {
|
|
302
|
+
const config: AgentRunConfig = {
|
|
303
|
+
agent: createTestAgent(),
|
|
304
|
+
outcome: createTestOutcome(),
|
|
305
|
+
lead: createTestLead(),
|
|
306
|
+
};
|
|
307
|
+
|
|
308
|
+
const result = await runAgentMock(config);
|
|
309
|
+
|
|
310
|
+
if (result.status === 'completed') {
|
|
311
|
+
expect(result.artifacts.length).toBeGreaterThanOrEqual(1);
|
|
312
|
+
}
|
|
313
|
+
});
|
|
314
|
+
|
|
315
|
+
/**
|
|
316
|
+
* **Feature: earnd-bounty-engine, Property: Killed runs have kill reason**
|
|
317
|
+
* For any killed agent run, there SHALL be a kill reason specified.
|
|
318
|
+
*/
|
|
319
|
+
it('Property: Killed runs always have kill reason', async () => {
|
|
320
|
+
const config: AgentRunConfig = {
|
|
321
|
+
agent: createTestAgent(),
|
|
322
|
+
outcome: createTestOutcome(),
|
|
323
|
+
lead: createTestLead(),
|
|
324
|
+
apiKey: 'test-key',
|
|
325
|
+
onKillSignal: () => true, // Immediately signal kill
|
|
326
|
+
};
|
|
327
|
+
|
|
328
|
+
const result = await runAgent(config);
|
|
329
|
+
|
|
330
|
+
if (result.status === 'killed') {
|
|
331
|
+
expect(result.killReason).toBeDefined();
|
|
332
|
+
expect([
|
|
333
|
+
'cost_exceeded',
|
|
334
|
+
'attempts_exceeded',
|
|
335
|
+
'timeout',
|
|
336
|
+
'competitor_won',
|
|
337
|
+
'success',
|
|
338
|
+
]).toContain(result.killReason);
|
|
339
|
+
}
|
|
340
|
+
});
|
|
341
|
+
});
|