outcome-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/README.md +261 -0
  2. package/package.json +95 -0
  3. package/src/agents/README.md +139 -0
  4. package/src/agents/adapters/anthropic.adapter.ts +166 -0
  5. package/src/agents/adapters/dalle.adapter.ts +145 -0
  6. package/src/agents/adapters/gemini.adapter.ts +134 -0
  7. package/src/agents/adapters/imagen.adapter.ts +106 -0
  8. package/src/agents/adapters/nano-banana.adapter.ts +129 -0
  9. package/src/agents/adapters/openai.adapter.ts +165 -0
  10. package/src/agents/adapters/veo.adapter.ts +130 -0
  11. package/src/agents/agent.schema.property.test.ts +379 -0
  12. package/src/agents/agent.schema.test.ts +148 -0
  13. package/src/agents/agent.schema.ts +263 -0
  14. package/src/agents/index.ts +60 -0
  15. package/src/agents/registered-agent.schema.ts +356 -0
  16. package/src/agents/registry.ts +97 -0
  17. package/src/agents/tournament-configs.property.test.ts +266 -0
  18. package/src/cli/README.md +145 -0
  19. package/src/cli/commands/define.ts +79 -0
  20. package/src/cli/commands/list.ts +46 -0
  21. package/src/cli/commands/logs.ts +83 -0
  22. package/src/cli/commands/run.ts +416 -0
  23. package/src/cli/commands/verify.ts +110 -0
  24. package/src/cli/index.ts +81 -0
  25. package/src/config/README.md +128 -0
  26. package/src/config/env.ts +262 -0
  27. package/src/config/index.ts +19 -0
  28. package/src/eval/README.md +318 -0
  29. package/src/eval/ai-judge.test.ts +435 -0
  30. package/src/eval/ai-judge.ts +368 -0
  31. package/src/eval/code-validators.ts +414 -0
  32. package/src/eval/evaluateOutcome.property.test.ts +1174 -0
  33. package/src/eval/evaluateOutcome.ts +591 -0
  34. package/src/eval/immigration-validators.ts +122 -0
  35. package/src/eval/index.ts +90 -0
  36. package/src/eval/judge-cache.ts +402 -0
  37. package/src/eval/tournament-validators.property.test.ts +439 -0
  38. package/src/eval/validators.property.test.ts +1118 -0
  39. package/src/eval/validators.ts +1199 -0
  40. package/src/eval/weighted-scorer.ts +285 -0
  41. package/src/index.ts +17 -0
  42. package/src/league/README.md +188 -0
  43. package/src/league/health-check.ts +353 -0
  44. package/src/league/index.ts +93 -0
  45. package/src/league/killAgent.ts +151 -0
  46. package/src/league/league.test.ts +1151 -0
  47. package/src/league/runLeague.ts +843 -0
  48. package/src/league/scoreAgent.ts +175 -0
  49. package/src/modules/omnibridge/__tests__/.gitkeep +1 -0
  50. package/src/modules/omnibridge/__tests__/auth-tunnel.property.test.ts +524 -0
  51. package/src/modules/omnibridge/__tests__/deterministic-logger.property.test.ts +965 -0
  52. package/src/modules/omnibridge/__tests__/ghost-api.property.test.ts +461 -0
  53. package/src/modules/omnibridge/__tests__/omnibridge-integration.test.ts +542 -0
  54. package/src/modules/omnibridge/__tests__/parallel-executor.property.test.ts +671 -0
  55. package/src/modules/omnibridge/__tests__/semantic-normalizer.property.test.ts +521 -0
  56. package/src/modules/omnibridge/__tests__/semantic-normalizer.test.ts +254 -0
  57. package/src/modules/omnibridge/__tests__/session-vault.property.test.ts +367 -0
  58. package/src/modules/omnibridge/__tests__/shadow-session.property.test.ts +523 -0
  59. package/src/modules/omnibridge/__tests__/triangulation-engine.property.test.ts +292 -0
  60. package/src/modules/omnibridge/__tests__/verification-engine.property.test.ts +769 -0
  61. package/src/modules/omnibridge/api/.gitkeep +1 -0
  62. package/src/modules/omnibridge/api/ghost-api.ts +1087 -0
  63. package/src/modules/omnibridge/auth/.gitkeep +1 -0
  64. package/src/modules/omnibridge/auth/auth-tunnel.ts +843 -0
  65. package/src/modules/omnibridge/auth/session-vault.ts +577 -0
  66. package/src/modules/omnibridge/core/.gitkeep +1 -0
  67. package/src/modules/omnibridge/core/semantic-normalizer.ts +702 -0
  68. package/src/modules/omnibridge/core/triangulation-engine.ts +530 -0
  69. package/src/modules/omnibridge/core/types.ts +610 -0
  70. package/src/modules/omnibridge/execution/.gitkeep +1 -0
  71. package/src/modules/omnibridge/execution/deterministic-logger.ts +629 -0
  72. package/src/modules/omnibridge/execution/parallel-executor.ts +542 -0
  73. package/src/modules/omnibridge/execution/shadow-session.ts +794 -0
  74. package/src/modules/omnibridge/index.ts +212 -0
  75. package/src/modules/omnibridge/omnibridge.ts +510 -0
  76. package/src/modules/omnibridge/verification/.gitkeep +1 -0
  77. package/src/modules/omnibridge/verification/verification-engine.ts +783 -0
  78. package/src/outcomes/README.md +75 -0
  79. package/src/outcomes/acquire-pilot-customer.ts +297 -0
  80. package/src/outcomes/code-delivery-outcomes.ts +89 -0
  81. package/src/outcomes/code-outcomes.ts +256 -0
  82. package/src/outcomes/code_review_battle.test.ts +135 -0
  83. package/src/outcomes/code_review_battle.ts +135 -0
  84. package/src/outcomes/cold_email_battle.ts +97 -0
  85. package/src/outcomes/content_creation_battle.ts +160 -0
  86. package/src/outcomes/f1_stem_opt_compliance.ts +61 -0
  87. package/src/outcomes/index.ts +107 -0
  88. package/src/outcomes/lead_gen_battle.test.ts +113 -0
  89. package/src/outcomes/lead_gen_battle.ts +99 -0
  90. package/src/outcomes/outcome.schema.property.test.ts +229 -0
  91. package/src/outcomes/outcome.schema.ts +187 -0
  92. package/src/outcomes/qualified_sales_interest.ts +118 -0
  93. package/src/outcomes/swarm_planner.property.test.ts +370 -0
  94. package/src/outcomes/swarm_planner.ts +96 -0
  95. package/src/outcomes/web_extraction.ts +234 -0
  96. package/src/runtime/README.md +220 -0
  97. package/src/runtime/agentRunner.test.ts +341 -0
  98. package/src/runtime/agentRunner.ts +746 -0
  99. package/src/runtime/claudeAdapter.ts +232 -0
  100. package/src/runtime/costTracker.ts +123 -0
  101. package/src/runtime/index.ts +34 -0
  102. package/src/runtime/modelAdapter.property.test.ts +305 -0
  103. package/src/runtime/modelAdapter.ts +144 -0
  104. package/src/runtime/openaiAdapter.ts +235 -0
  105. package/src/utils/README.md +122 -0
  106. package/src/utils/command-runner.ts +134 -0
  107. package/src/utils/cost-guard.ts +379 -0
  108. package/src/utils/errors.test.ts +290 -0
  109. package/src/utils/errors.ts +442 -0
  110. package/src/utils/index.ts +37 -0
  111. package/src/utils/logger.test.ts +361 -0
  112. package/src/utils/logger.ts +419 -0
  113. package/src/utils/output-parsers.ts +216 -0
@@ -0,0 +1,220 @@
1
+ # Runtime Module
2
+
3
+ The runtime module handles agent execution, model adapters, and cost tracking for the Earnd Bounty Engine.
4
+
5
+ ## Components
6
+
7
+ ### Model Adapter (`modelAdapter.ts`, `claudeAdapter.ts`, `openaiAdapter.ts`)
8
+
9
+ Unified interface for AI model providers. Abstracts Claude and OpenAI behind a common interface with normalized responses and tool calling support.
10
+
11
+ **Key Interfaces:**
12
+
13
+ - `ModelAdapter` - Common interface for all model providers (includes `provider` and `modelId` readonly properties)
14
+ - `ModelResponse` - Normalized response format (content, tokensUsed, model, toolCalls, requiresToolResponse)
15
+ - `ModelOptions` - Request configuration (maxTokens, temperature, systemPrompt, tools)
16
+ - `ModelAdapterConfig` - Configuration for creating adapters (provider, modelId, apiKey)
17
+ - `ConversationMessage` - Message format for multi-turn tool use conversations
18
+ - `ToolDefinition` - Tool schema definition (from skills module)
19
+ - `ToolCall` - Tool invocation request from model
20
+
21
+ **Key Functions:**
22
+
23
+ - `createAdapter(config)` - Factory function to create adapter based on provider
24
+ - `createClaudeAdapter(apiKey, modelId)` - Creates Claude-specific adapter
25
+ - `createOpenAIAdapter(apiKey, modelId)` - Creates OpenAI-specific adapter
26
+
27
+ **Usage:**
28
+
29
+ ```typescript
30
+ import { createAdapter, createClaudeAdapter, createOpenAIAdapter } from './index.js';
31
+
32
+ // Using factory function
33
+ const adapter = await createAdapter({
34
+ provider: 'claude',
35
+ modelId: 'claude-3-sonnet-20240229',
36
+ apiKey: process.env.ANTHROPIC_API_KEY!
37
+ });
38
+
39
+ // Or directly create specific adapters
40
+ const claudeAdapter = createClaudeAdapter(
41
+ process.env.ANTHROPIC_API_KEY!,
42
+ 'claude-3-sonnet-20240229'
43
+ );
44
+
45
+ const openaiAdapter = createOpenAIAdapter(
46
+ process.env.OPENAI_API_KEY!,
47
+ 'gpt-4-turbo-preview'
48
+ );
49
+
50
+ // Make completion request
51
+ const response = await adapter.complete('Hello, how are you?', {
52
+ maxTokens: 1024,
53
+ temperature: 0.7,
54
+ systemPrompt: 'You are a helpful assistant.'
55
+ });
56
+
57
+ console.log(response.content); // Generated text
58
+ console.log(response.tokensUsed); // Total tokens used
59
+ console.log(response.model); // Model identifier
60
+
61
+ // Access adapter properties
62
+ console.log(adapter.provider); // 'claude' or 'openai'
63
+ console.log(adapter.modelId); // The specific model ID
64
+
65
+ // Estimate tokens before sending
66
+ const estimatedTokens = adapter.countTokens('Some text to count');
67
+ ```
68
+
69
+ **Tool Calling:**
70
+
71
+ Both adapters support function/tool calling with a unified interface:
72
+
73
+ ```typescript
74
+ import type { ToolDefinition, ConversationMessage } from './modelAdapter.js';
75
+
76
+ // Define tools available to the model
77
+ const tools: ToolDefinition[] = [
78
+ {
79
+ name: 'get_company_info',
80
+ description: 'Get information about a company',
81
+ inputSchema: {
82
+ type: 'object',
83
+ properties: {
84
+ companyName: { type: 'string', description: 'Name of the company' }
85
+ },
86
+ required: ['companyName']
87
+ }
88
+ }
89
+ ];
90
+
91
+ // Request with tools
92
+ const response = await adapter.complete('Look up TechCorp Industries', {
93
+ tools,
94
+ systemPrompt: 'You are a sales assistant.'
95
+ });
96
+
97
+ // Check if model wants to call tools
98
+ if (response.toolCalls && response.toolCalls.length > 0) {
99
+ // Execute tool calls and gather results
100
+ const messages: ConversationMessage[] = [
101
+ { role: 'user', content: 'Look up TechCorp Industries' },
102
+ { role: 'assistant', content: response.content, toolCalls: response.toolCalls },
103
+ ];
104
+
105
+ // Add tool results
106
+ for (const toolCall of response.toolCalls) {
107
+ const result = await executeToolCall(toolCall); // Your tool execution logic
108
+ messages.push({
109
+ role: 'tool',
110
+ toolCallId: toolCall.id,
111
+ content: JSON.stringify(result)
112
+ });
113
+ }
114
+
115
+ // Continue conversation with tool results
116
+ const finalResponse = await adapter.continueWithToolResults(messages, { tools });
117
+ console.log(finalResponse.content);
118
+ }
119
+ ```
120
+
121
+ ### Agent Runner (`agentRunner.ts`)
122
+
123
+ Agent execution engine with attempt loop, cost tracking, and retry logic.
124
+
125
+ **Key Interfaces:**
126
+
127
+ - `AgentRun` - Result of running an agent (agentId, outcomeId, status, attempts, tokensSpent, artifacts)
128
+ - `AgentRunConfig` - Configuration for running an agent (agent, outcome, lead, apiKey, onKillSignal)
129
+ - `KillReason` - Reason for agent termination ('cost_exceeded', 'attempts_exceeded', 'timeout', 'competitor_won', 'success')
130
+
131
+ **Key Functions:**
132
+
133
+ - `runAgent(config)` - Executes an agent to achieve an outcome with full tracking
134
+ - `runAgentMock(config)` - Mock runner for testing without API calls
135
+
136
+ **Features:**
137
+
138
+ - **Attempt Loop** - Tracks attempts against maxAttempts limit
139
+ - **Cost Integration** - Records token usage to CostTracker in real-time
140
+ - **Exponential Backoff** - Retries failed API calls with backoff (max 3 retries)
141
+ - **Kill Conditions** - Monitors cost ceiling, attempt limit, runtime limit, and external signals
142
+ - **Artifact Generation** - Creates AgentArtifact from model responses
143
+
144
+ **Usage:**
145
+
146
+ ```typescript
147
+ import { runAgent, runAgentMock } from './agentRunner.js';
148
+
149
+ // Run agent with real API calls
150
+ const result = await runAgent({
151
+ agent: salesAgentConfig,
152
+ outcome: qualifiedSalesInterest,
153
+ lead: leadData,
154
+ apiKey: process.env.ANTHROPIC_API_KEY,
155
+ onKillSignal: () => competitorWon, // Optional external kill signal
156
+ });
157
+
158
+ // Check result
159
+ if (result.status === 'completed') {
160
+ console.log('Agent completed with', result.artifacts.length, 'artifacts');
161
+ } else if (result.status === 'killed') {
162
+ console.log('Agent killed:', result.killReason);
163
+ }
164
+
165
+ // For testing without API calls
166
+ const mockResult = await runAgentMock(config);
167
+ ```
168
+
169
+ ### Cost Tracker (`costTracker.ts`)
170
+
171
+ Real-time token and cost tracking per agent. Enforces hard token caps to prevent runaway costs.
172
+
173
+ **Key Functions:**
174
+
175
+ - `createCostTracker(agentId, ceiling)` - Creates a new tracker for an agent
176
+ - `recordUsage(tracker, tokens)` - Records token usage in real-time
177
+ - `isOverBudget(tracker)` - Checks if agent exceeded its ceiling
178
+ - `getRemainingBudget(tracker)` - Gets remaining token budget
179
+ - `getBudgetUsagePercent(tracker)` - Gets percentage of budget used
180
+
181
+ **Usage:**
182
+
183
+ ```typescript
184
+ import { createCostTracker, recordUsage, isOverBudget } from './costTracker.js';
185
+
186
+ // Create tracker with 10,000 token ceiling
187
+ const tracker = createCostTracker('agent-1', 10000);
188
+
189
+ // Record usage after each model call
190
+ recordUsage(tracker, 500);
191
+
192
+ // Check if agent should be terminated
193
+ if (isOverBudget(tracker)) {
194
+ // Kill agent with reason 'cost_exceeded'
195
+ }
196
+ ```
197
+
198
+ ## Requirements Reference
199
+
200
+ - **4.2** - Track cost per agent independently
201
+ - **4.3** - Terminate agent on attempt limit exceeded
202
+ - **10.1** - Enforce hard token cap per agent
203
+ - **10.2** - Enforce max runtime per agent
204
+ - **10.5** - Track tokens spent per agent in real-time
205
+ - **11.1** - Abstract underlying AI model provider
206
+ - **11.2** - Route requests to Claude API
207
+ - **11.3** - Route requests to OpenAI API
208
+ - **11.4** - Normalize response format
209
+
210
+ ## Design Principles
211
+
212
+ 1. **Isolation** - Each agent has its own tracker, no cross-contamination
213
+ 2. **Real-time** - Usage is tracked immediately after each model call
214
+ 3. **Fail-closed** - When over budget, agents are terminated with no payout
215
+ 4. **Provider Agnostic** - Same interface regardless of Claude or OpenAI
216
+ 5. **Normalized Responses** - Consistent format for all model responses
217
+
218
+ ## Related Modules
219
+
220
+ - **Skills** (`/src/skills`) - Defines agent tools/capabilities that can be invoked via model function calling. The `ToolDefinition`, `ToolCall`, and `ToolCallResult` types are imported from the skills module and re-exported by `modelAdapter.ts` for convenience.
@@ -0,0 +1,341 @@
1
+ /**
2
+ * Agent Runner Tests
3
+ *
4
+ * Tests for the agent execution engine including:
5
+ * - Attempt loop logic
6
+ * - Cost tracking integration
7
+ * - Kill condition handling
8
+ * - Mock runner functionality
9
+ *
10
+ * @module runtime/agentRunner.test
11
+ */
12
+
13
+ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
14
+ import {
15
+ runAgent,
16
+ runAgentMock,
17
+ type AgentRunConfig,
18
+ type AgentRun,
19
+ } from './agentRunner.js';
20
+ import type { AgentConfig } from '../agents/agent.schema.js';
21
+ import type { Outcome } from '../outcomes/outcome.schema.js';
22
+ import type { Lead } from '../jobs/job.interface.js';
23
+
24
+ /**
25
+ * Test fixtures
26
+ */
27
+ const createTestAgent = (overrides?: Partial<AgentConfig>): AgentConfig => ({
28
+ id: 'test-agent-001',
29
+ name: 'Test Agent',
30
+ prompt: 'You are a test agent. Respond with a helpful message.',
31
+ strategyDescription: 'Test strategy',
32
+ toolAccess: [],
33
+ costCeiling: 5000,
34
+ modelProvider: 'claude',
35
+ modelId: 'claude-3-sonnet-20240229',
36
+ ...overrides,
37
+ });
38
+
39
+ const createTestOutcome = (overrides?: Partial<Outcome>): Outcome => ({
40
+ name: 'test_outcome',
41
+ description: 'Test outcome for unit tests',
42
+ payoutAmount: 100,
43
+ maxAttempts: 3,
44
+ timeLimitMs: 60000,
45
+ successCriteria: [
46
+ {
47
+ name: 'test_criterion',
48
+ validator: 'validateMessageLength',
49
+ params: { minWords: 5 },
50
+ },
51
+ ],
52
+ failureReasons: ['Test failure reason'],
53
+ ...overrides,
54
+ });
55
+
56
+ const createTestLead = (overrides?: Partial<Lead>): Lead => ({
57
+ email: 'test@example.com',
58
+ company: 'Test Company',
59
+ companySize: 100,
60
+ role: 'Manager',
61
+ previousInteractions: ['Downloaded whitepaper'],
62
+ ...overrides,
63
+ });
64
+
65
+ describe('agentRunner', () => {
66
+ describe('runAgentMock', () => {
67
+ it('should return a completed run with artifact', async () => {
68
+ const config: AgentRunConfig = {
69
+ agent: createTestAgent(),
70
+ outcome: createTestOutcome(),
71
+ lead: createTestLead(),
72
+ };
73
+
74
+ const result = await runAgentMock(config);
75
+
76
+ expect(result.agentId).toBe('test-agent-001');
77
+ expect(result.outcomeId).toBe('test_outcome');
78
+ expect(result.status).toBe('completed');
79
+ expect(result.attempts).toBe(1);
80
+ expect(result.tokensSpent).toBeGreaterThan(0);
81
+ expect(result.artifacts).toHaveLength(1);
82
+ expect(result.durationMs).toBeGreaterThan(0);
83
+ });
84
+
85
+ it('should include lead data in artifact content', async () => {
86
+ const lead = createTestLead({
87
+ email: 'custom@test.com',
88
+ company: 'Custom Corp',
89
+ companySize: 250,
90
+ role: 'Director',
91
+ });
92
+
93
+ const config: AgentRunConfig = {
94
+ agent: createTestAgent(),
95
+ outcome: createTestOutcome(),
96
+ lead,
97
+ };
98
+
99
+ const result = await runAgentMock(config);
100
+ const artifact = result.artifacts[0];
101
+
102
+ expect(artifact.content.targetEmail).toBe('custom@test.com');
103
+ expect(artifact.content.targetCompany).toBe('Custom Corp');
104
+ expect(artifact.content.targetCompanySize).toBe(250);
105
+ expect(artifact.content.targetRole).toBe('Director');
106
+ });
107
+
108
+ it('should set correct timestamp on artifact', async () => {
109
+ const config: AgentRunConfig = {
110
+ agent: createTestAgent(),
111
+ outcome: createTestOutcome(),
112
+ lead: createTestLead(),
113
+ };
114
+
115
+ const beforeRun = new Date().toISOString();
116
+ const result = await runAgentMock(config);
117
+ const afterRun = new Date().toISOString();
118
+
119
+ const artifactTime = result.artifacts[0].timestamp;
120
+ expect(artifactTime >= beforeRun).toBe(true);
121
+ expect(artifactTime <= afterRun).toBe(true);
122
+ });
123
+ });
124
+
125
+ describe('runAgent - configuration validation', () => {
126
+ it('should fail without API key', async () => {
127
+ const originalEnv = process.env.ANTHROPIC_API_KEY;
128
+ delete process.env.ANTHROPIC_API_KEY;
129
+
130
+ const config: AgentRunConfig = {
131
+ agent: createTestAgent(),
132
+ outcome: createTestOutcome(),
133
+ lead: createTestLead(),
134
+ };
135
+
136
+ const result = await runAgent(config);
137
+
138
+ expect(result.status).toBe('killed');
139
+ expect(result.error).toContain('Missing API key');
140
+
141
+ // Restore env
142
+ if (originalEnv) {
143
+ process.env.ANTHROPIC_API_KEY = originalEnv;
144
+ }
145
+ });
146
+
147
+ it('should use provided API key over environment variable', async () => {
148
+ const config: AgentRunConfig = {
149
+ agent: createTestAgent(),
150
+ outcome: createTestOutcome({ timeLimitMs: 100 }), // Short timeout for test
151
+ lead: createTestLead(),
152
+ apiKey: 'test-api-key',
153
+ onKillSignal: () => true, // Immediately kill to avoid API call
154
+ };
155
+
156
+ const result = await runAgent(config);
157
+
158
+ // Should have been killed by signal, not due to missing API key
159
+ // error is undefined when killed by signal (no API key error)
160
+ expect(result.error === undefined || !result.error.includes('Missing API key')).toBe(true);
161
+ expect(result.killReason).toBe('competitor_won');
162
+ });
163
+ });
164
+
165
+ describe('runAgent - kill conditions', () => {
166
+ it('should kill agent when cost ceiling is exceeded via onKillSignal', async () => {
167
+ let killSignalCalled = false;
168
+ const config: AgentRunConfig = {
169
+ agent: createTestAgent({ costCeiling: 100 }),
170
+ outcome: createTestOutcome(),
171
+ lead: createTestLead(),
172
+ apiKey: 'test-key',
173
+ onKillSignal: () => {
174
+ killSignalCalled = true;
175
+ return true; // Simulate competitor won
176
+ },
177
+ };
178
+
179
+ const result = await runAgent(config);
180
+
181
+ expect(killSignalCalled).toBe(true);
182
+ expect(result.status).toBe('killed');
183
+ expect(result.killReason).toBe('competitor_won');
184
+ });
185
+ });
186
+
187
+ describe('AgentRun structure', () => {
188
+ it('should have all required fields', async () => {
189
+ const config: AgentRunConfig = {
190
+ agent: createTestAgent(),
191
+ outcome: createTestOutcome(),
192
+ lead: createTestLead(),
193
+ };
194
+
195
+ const result = await runAgentMock(config);
196
+
197
+ // Check all required AgentRun fields exist
198
+ expect(result).toHaveProperty('agentId');
199
+ expect(result).toHaveProperty('outcomeId');
200
+ expect(result).toHaveProperty('status');
201
+ expect(result).toHaveProperty('attempts');
202
+ expect(result).toHaveProperty('tokensSpent');
203
+ expect(result).toHaveProperty('artifacts');
204
+ expect(result).toHaveProperty('durationMs');
205
+
206
+ // Check types
207
+ expect(typeof result.agentId).toBe('string');
208
+ expect(typeof result.outcomeId).toBe('string');
209
+ expect(['running', 'completed', 'killed']).toContain(result.status);
210
+ expect(typeof result.attempts).toBe('number');
211
+ expect(typeof result.tokensSpent).toBe('number');
212
+ expect(Array.isArray(result.artifacts)).toBe(true);
213
+ expect(typeof result.durationMs).toBe('number');
214
+ });
215
+
216
+ it('should track attempt count correctly', async () => {
217
+ const config: AgentRunConfig = {
218
+ agent: createTestAgent(),
219
+ outcome: createTestOutcome({ maxAttempts: 5 }),
220
+ lead: createTestLead(),
221
+ };
222
+
223
+ const result = await runAgentMock(config);
224
+
225
+ expect(result.attempts).toBeGreaterThanOrEqual(1);
226
+ expect(result.attempts).toBeLessThanOrEqual(5);
227
+ });
228
+ });
229
+
230
+ describe('Artifact structure', () => {
231
+ it('should create valid artifact with all required fields', async () => {
232
+ const config: AgentRunConfig = {
233
+ agent: createTestAgent({ id: 'artifact-test-agent' }),
234
+ outcome: createTestOutcome({ name: 'artifact_test_outcome' }),
235
+ lead: createTestLead(),
236
+ };
237
+
238
+ const result = await runAgentMock(config);
239
+ const artifact = result.artifacts[0];
240
+
241
+ expect(artifact.agentId).toBe('artifact-test-agent');
242
+ expect(artifact.outcomeId).toBe('artifact_test_outcome');
243
+ expect(artifact.attemptNumber).toBe(1);
244
+ expect(artifact.content).toBeDefined();
245
+ expect(artifact.timestamp).toBeDefined();
246
+
247
+ // Check content structure
248
+ expect(artifact.content).toHaveProperty('message');
249
+ expect(artifact.content).toHaveProperty('targetEmail');
250
+ expect(artifact.content).toHaveProperty('targetCompany');
251
+ expect(artifact.content).toHaveProperty('targetCompanySize');
252
+ expect(artifact.content).toHaveProperty('targetRole');
253
+ });
254
+ });
255
+ });
256
+
257
+ describe('AgentRunner - Property Tests', () => {
258
+ /**
259
+ * **Feature: earnd-bounty-engine, Property: Agent Run Structure Compliance**
260
+ * For any agent run, the result SHALL conform to the AgentRun interface
261
+ * with all required fields populated.
262
+ * **Validates: Requirements 4.2, 4.3**
263
+ */
264
+ it('Property: AgentRun always has valid structure', async () => {
265
+ const configs = [
266
+ {
267
+ agent: createTestAgent(),
268
+ outcome: createTestOutcome(),
269
+ lead: createTestLead(),
270
+ },
271
+ {
272
+ agent: createTestAgent({ id: 'agent-2', costCeiling: 1000 }),
273
+ outcome: createTestOutcome({ maxAttempts: 1 }),
274
+ lead: createTestLead({ companySize: 10 }),
275
+ },
276
+ {
277
+ agent: createTestAgent({ id: 'agent-3', modelProvider: 'openai' as const }),
278
+ outcome: createTestOutcome({ timeLimitMs: 1000 }),
279
+ lead: createTestLead({ role: 'Intern' }),
280
+ },
281
+ ];
282
+
283
+ for (const config of configs) {
284
+ const result = await runAgentMock(config);
285
+
286
+ // Verify structure invariants
287
+ expect(result.agentId).toBe(config.agent.id);
288
+ expect(result.outcomeId).toBe(config.outcome.name);
289
+ expect(['running', 'completed', 'killed']).toContain(result.status);
290
+ expect(result.attempts).toBeGreaterThanOrEqual(0);
291
+ expect(result.tokensSpent).toBeGreaterThanOrEqual(0);
292
+ expect(Array.isArray(result.artifacts)).toBe(true);
293
+ expect(result.durationMs).toBeGreaterThanOrEqual(0);
294
+ }
295
+ });
296
+
297
+ /**
298
+ * **Feature: earnd-bounty-engine, Property: Completed runs have artifacts**
299
+ * For any completed agent run, there SHALL be at least one artifact.
300
+ */
301
+ it('Property: Completed runs always have at least one artifact', async () => {
302
+ const config: AgentRunConfig = {
303
+ agent: createTestAgent(),
304
+ outcome: createTestOutcome(),
305
+ lead: createTestLead(),
306
+ };
307
+
308
+ const result = await runAgentMock(config);
309
+
310
+ if (result.status === 'completed') {
311
+ expect(result.artifacts.length).toBeGreaterThanOrEqual(1);
312
+ }
313
+ });
314
+
315
+ /**
316
+ * **Feature: earnd-bounty-engine, Property: Killed runs have kill reason**
317
+ * For any killed agent run, there SHALL be a kill reason specified.
318
+ */
319
+ it('Property: Killed runs always have kill reason', async () => {
320
+ const config: AgentRunConfig = {
321
+ agent: createTestAgent(),
322
+ outcome: createTestOutcome(),
323
+ lead: createTestLead(),
324
+ apiKey: 'test-key',
325
+ onKillSignal: () => true, // Immediately signal kill
326
+ };
327
+
328
+ const result = await runAgent(config);
329
+
330
+ if (result.status === 'killed') {
331
+ expect(result.killReason).toBeDefined();
332
+ expect([
333
+ 'cost_exceeded',
334
+ 'attempts_exceeded',
335
+ 'timeout',
336
+ 'competitor_won',
337
+ 'success',
338
+ ]).toContain(result.killReason);
339
+ }
340
+ });
341
+ });