outcome-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/README.md +261 -0
  2. package/package.json +95 -0
  3. package/src/agents/README.md +139 -0
  4. package/src/agents/adapters/anthropic.adapter.ts +166 -0
  5. package/src/agents/adapters/dalle.adapter.ts +145 -0
  6. package/src/agents/adapters/gemini.adapter.ts +134 -0
  7. package/src/agents/adapters/imagen.adapter.ts +106 -0
  8. package/src/agents/adapters/nano-banana.adapter.ts +129 -0
  9. package/src/agents/adapters/openai.adapter.ts +165 -0
  10. package/src/agents/adapters/veo.adapter.ts +130 -0
  11. package/src/agents/agent.schema.property.test.ts +379 -0
  12. package/src/agents/agent.schema.test.ts +148 -0
  13. package/src/agents/agent.schema.ts +263 -0
  14. package/src/agents/index.ts +60 -0
  15. package/src/agents/registered-agent.schema.ts +356 -0
  16. package/src/agents/registry.ts +97 -0
  17. package/src/agents/tournament-configs.property.test.ts +266 -0
  18. package/src/cli/README.md +145 -0
  19. package/src/cli/commands/define.ts +79 -0
  20. package/src/cli/commands/list.ts +46 -0
  21. package/src/cli/commands/logs.ts +83 -0
  22. package/src/cli/commands/run.ts +416 -0
  23. package/src/cli/commands/verify.ts +110 -0
  24. package/src/cli/index.ts +81 -0
  25. package/src/config/README.md +128 -0
  26. package/src/config/env.ts +262 -0
  27. package/src/config/index.ts +19 -0
  28. package/src/eval/README.md +318 -0
  29. package/src/eval/ai-judge.test.ts +435 -0
  30. package/src/eval/ai-judge.ts +368 -0
  31. package/src/eval/code-validators.ts +414 -0
  32. package/src/eval/evaluateOutcome.property.test.ts +1174 -0
  33. package/src/eval/evaluateOutcome.ts +591 -0
  34. package/src/eval/immigration-validators.ts +122 -0
  35. package/src/eval/index.ts +90 -0
  36. package/src/eval/judge-cache.ts +402 -0
  37. package/src/eval/tournament-validators.property.test.ts +439 -0
  38. package/src/eval/validators.property.test.ts +1118 -0
  39. package/src/eval/validators.ts +1199 -0
  40. package/src/eval/weighted-scorer.ts +285 -0
  41. package/src/index.ts +17 -0
  42. package/src/league/README.md +188 -0
  43. package/src/league/health-check.ts +353 -0
  44. package/src/league/index.ts +93 -0
  45. package/src/league/killAgent.ts +151 -0
  46. package/src/league/league.test.ts +1151 -0
  47. package/src/league/runLeague.ts +843 -0
  48. package/src/league/scoreAgent.ts +175 -0
  49. package/src/modules/omnibridge/__tests__/.gitkeep +1 -0
  50. package/src/modules/omnibridge/__tests__/auth-tunnel.property.test.ts +524 -0
  51. package/src/modules/omnibridge/__tests__/deterministic-logger.property.test.ts +965 -0
  52. package/src/modules/omnibridge/__tests__/ghost-api.property.test.ts +461 -0
  53. package/src/modules/omnibridge/__tests__/omnibridge-integration.test.ts +542 -0
  54. package/src/modules/omnibridge/__tests__/parallel-executor.property.test.ts +671 -0
  55. package/src/modules/omnibridge/__tests__/semantic-normalizer.property.test.ts +521 -0
  56. package/src/modules/omnibridge/__tests__/semantic-normalizer.test.ts +254 -0
  57. package/src/modules/omnibridge/__tests__/session-vault.property.test.ts +367 -0
  58. package/src/modules/omnibridge/__tests__/shadow-session.property.test.ts +523 -0
  59. package/src/modules/omnibridge/__tests__/triangulation-engine.property.test.ts +292 -0
  60. package/src/modules/omnibridge/__tests__/verification-engine.property.test.ts +769 -0
  61. package/src/modules/omnibridge/api/.gitkeep +1 -0
  62. package/src/modules/omnibridge/api/ghost-api.ts +1087 -0
  63. package/src/modules/omnibridge/auth/.gitkeep +1 -0
  64. package/src/modules/omnibridge/auth/auth-tunnel.ts +843 -0
  65. package/src/modules/omnibridge/auth/session-vault.ts +577 -0
  66. package/src/modules/omnibridge/core/.gitkeep +1 -0
  67. package/src/modules/omnibridge/core/semantic-normalizer.ts +702 -0
  68. package/src/modules/omnibridge/core/triangulation-engine.ts +530 -0
  69. package/src/modules/omnibridge/core/types.ts +610 -0
  70. package/src/modules/omnibridge/execution/.gitkeep +1 -0
  71. package/src/modules/omnibridge/execution/deterministic-logger.ts +629 -0
  72. package/src/modules/omnibridge/execution/parallel-executor.ts +542 -0
  73. package/src/modules/omnibridge/execution/shadow-session.ts +794 -0
  74. package/src/modules/omnibridge/index.ts +212 -0
  75. package/src/modules/omnibridge/omnibridge.ts +510 -0
  76. package/src/modules/omnibridge/verification/.gitkeep +1 -0
  77. package/src/modules/omnibridge/verification/verification-engine.ts +783 -0
  78. package/src/outcomes/README.md +75 -0
  79. package/src/outcomes/acquire-pilot-customer.ts +297 -0
  80. package/src/outcomes/code-delivery-outcomes.ts +89 -0
  81. package/src/outcomes/code-outcomes.ts +256 -0
  82. package/src/outcomes/code_review_battle.test.ts +135 -0
  83. package/src/outcomes/code_review_battle.ts +135 -0
  84. package/src/outcomes/cold_email_battle.ts +97 -0
  85. package/src/outcomes/content_creation_battle.ts +160 -0
  86. package/src/outcomes/f1_stem_opt_compliance.ts +61 -0
  87. package/src/outcomes/index.ts +107 -0
  88. package/src/outcomes/lead_gen_battle.test.ts +113 -0
  89. package/src/outcomes/lead_gen_battle.ts +99 -0
  90. package/src/outcomes/outcome.schema.property.test.ts +229 -0
  91. package/src/outcomes/outcome.schema.ts +187 -0
  92. package/src/outcomes/qualified_sales_interest.ts +118 -0
  93. package/src/outcomes/swarm_planner.property.test.ts +370 -0
  94. package/src/outcomes/swarm_planner.ts +96 -0
  95. package/src/outcomes/web_extraction.ts +234 -0
  96. package/src/runtime/README.md +220 -0
  97. package/src/runtime/agentRunner.test.ts +341 -0
  98. package/src/runtime/agentRunner.ts +746 -0
  99. package/src/runtime/claudeAdapter.ts +232 -0
  100. package/src/runtime/costTracker.ts +123 -0
  101. package/src/runtime/index.ts +34 -0
  102. package/src/runtime/modelAdapter.property.test.ts +305 -0
  103. package/src/runtime/modelAdapter.ts +144 -0
  104. package/src/runtime/openaiAdapter.ts +235 -0
  105. package/src/utils/README.md +122 -0
  106. package/src/utils/command-runner.ts +134 -0
  107. package/src/utils/cost-guard.ts +379 -0
  108. package/src/utils/errors.test.ts +290 -0
  109. package/src/utils/errors.ts +442 -0
  110. package/src/utils/index.ts +37 -0
  111. package/src/utils/logger.test.ts +361 -0
  112. package/src/utils/logger.ts +419 -0
  113. package/src/utils/output-parsers.ts +216 -0
@@ -0,0 +1,144 @@
1
+ /**
2
+ * Model Adapter - AI model abstraction for Claude and OpenAI
3
+ *
4
+ * Provides a unified interface for interacting with different AI model providers.
5
+ * Normalizes responses to a common format regardless of underlying provider.
6
+ *
7
+ * @module runtime/modelAdapter
8
+ */
9
+
10
+ import type { ToolDefinition, ToolCall, ToolCallResult } from '../skills/skill.interface.js';
11
+
12
+ /**
13
+ * Options for model completion requests.
14
+ */
15
+ export interface ModelOptions {
16
+ /** Maximum tokens to generate in the response */
17
+ maxTokens?: number;
18
+ /** Temperature for response randomness (0-1) */
19
+ temperature?: number;
20
+ /** System prompt to set context */
21
+ systemPrompt?: string;
22
+ /** Tool definitions available to the model */
23
+ tools?: ToolDefinition[];
24
+ }
25
+
26
+ /**
27
+ * Normalized response from any model provider.
28
+ * Ensures consistent format regardless of Claude or OpenAI.
29
+ */
30
+ export interface ModelResponse {
31
+ /** The generated text content */
32
+ content: string;
33
+ /** Total tokens used (input + output) */
34
+ tokensUsed: number;
35
+ /** The model identifier that generated the response */
36
+ model: string;
37
+ /** Tool calls requested by the model (if any) */
38
+ toolCalls?: ToolCall[];
39
+ /** Whether the model wants to continue after tool results */
40
+ requiresToolResponse?: boolean;
41
+ }
42
+
43
+ /**
44
+ * Message in a conversation for multi-turn tool use.
45
+ */
46
+ export interface ConversationMessage {
47
+ role: 'user' | 'assistant' | 'tool';
48
+ content: string;
49
+ /** Tool call ID (for tool role messages) */
50
+ toolCallId?: string;
51
+ /** Tool calls made by assistant */
52
+ toolCalls?: ToolCall[];
53
+ }
54
+
55
+ // Re-export types for convenience
56
+ export type { ToolDefinition, ToolCall, ToolCallResult };
57
+
58
+ /**
59
+ * Unified interface for AI model adapters.
60
+ * Abstracts the underlying provider (Claude or OpenAI).
61
+ */
62
+ export interface ModelAdapter {
63
+ /**
64
+ * Sends a completion request to the model.
65
+ *
66
+ * @param prompt - The user prompt to send
67
+ * @param options - Optional configuration for the request
68
+ * @returns Normalized model response
69
+ */
70
+ complete(prompt: string, options?: ModelOptions): Promise<ModelResponse>;
71
+
72
+ /**
73
+ * Continues a conversation with tool results.
74
+ * Used for multi-turn tool use interactions.
75
+ *
76
+ * @param messages - The conversation history including tool results
77
+ * @param options - Optional configuration for the request
78
+ * @returns Normalized model response
79
+ */
80
+ continueWithToolResults(
81
+ messages: ConversationMessage[],
82
+ options?: ModelOptions
83
+ ): Promise<ModelResponse>;
84
+
85
+ /**
86
+ * Estimates token count for a given text.
87
+ *
88
+ * @param text - The text to count tokens for
89
+ * @returns Estimated token count
90
+ */
91
+ countTokens(text: string): number;
92
+
93
+ /** The provider name ('claude' or 'openai') */
94
+ readonly provider: 'claude' | 'openai';
95
+
96
+ /** The specific model ID being used */
97
+ readonly modelId: string;
98
+ }
99
+
100
+ /**
101
+ * Configuration for creating a model adapter.
102
+ */
103
+ export interface ModelAdapterConfig {
104
+ /** The AI provider to use */
105
+ provider: 'claude' | 'openai';
106
+ /** The specific model ID (e.g., 'claude-3-sonnet-20240229', 'gpt-4-turbo-preview') */
107
+ modelId: string;
108
+ /** API key for the provider */
109
+ apiKey: string;
110
+ }
111
+
112
+
113
+ /**
114
+ * Creates a model adapter based on the provided configuration.
115
+ *
116
+ * @param config - Configuration specifying provider, model, and API key
117
+ * @returns ModelAdapter for the specified provider
118
+ * @throws Error if provider is not supported
119
+ *
120
+ * @example
121
+ * const adapter = await createAdapter({
122
+ * provider: 'claude',
123
+ * modelId: 'claude-3-sonnet-20240229',
124
+ * apiKey: process.env.ANTHROPIC_API_KEY!
125
+ * });
126
+ *
127
+ * @see Requirements 11.1, 11.2, 11.3
128
+ */
129
+ export async function createAdapter(config: ModelAdapterConfig): Promise<ModelAdapter> {
130
+ switch (config.provider) {
131
+ case 'claude': {
132
+ const { createClaudeAdapter } = await import('./claudeAdapter.js');
133
+ return createClaudeAdapter(config.apiKey, config.modelId);
134
+ }
135
+ case 'openai': {
136
+ const { createOpenAIAdapter } = await import('./openaiAdapter.js');
137
+ return createOpenAIAdapter(config.apiKey, config.modelId);
138
+ }
139
+ default: {
140
+ const exhaustiveCheck: never = config.provider;
141
+ throw new Error(`Unsupported model provider: ${exhaustiveCheck}`);
142
+ }
143
+ }
144
+ }
@@ -0,0 +1,235 @@
1
+ /**
2
+ * OpenAI Adapter - OpenAI API integration
3
+ *
4
+ * Implements the ModelAdapter interface for OpenAI models.
5
+ * Uses the official openai npm package following Chat Completions API.
6
+ *
7
+ * @module runtime/openaiAdapter
8
+ * @see https://platform.openai.com/docs/api-reference
9
+ */
10
+
11
+ import OpenAI from 'openai';
12
+ import type {
13
+ ModelAdapter,
14
+ ModelOptions,
15
+ ModelResponse,
16
+ ConversationMessage,
17
+ ToolDefinition,
18
+ ToolCall,
19
+ } from './modelAdapter.js';
20
+
21
+ /**
22
+ * Default max tokens for OpenAI responses.
23
+ */
24
+ const DEFAULT_MAX_TOKENS = 1024;
25
+
26
+ /**
27
+ * Approximate characters per token for estimation.
28
+ * OpenAI uses ~4 characters per token on average.
29
+ */
30
+ const CHARS_PER_TOKEN = 4;
31
+
32
+ /**
33
+ * Converts our tool definitions to OpenAI's tool format.
34
+ *
35
+ * @param tools - Our normalized tool definitions
36
+ * @returns OpenAI-formatted tools
37
+ */
38
+ function toOpenAITools(tools: ToolDefinition[]): OpenAI.ChatCompletionTool[] {
39
+ return tools.map((tool) => ({
40
+ type: 'function' as const,
41
+ function: {
42
+ name: tool.name,
43
+ description: tool.description,
44
+ parameters: {
45
+ type: 'object',
46
+ properties: tool.inputSchema.properties,
47
+ required: tool.inputSchema.required,
48
+ },
49
+ },
50
+ }));
51
+ }
52
+
53
+ /**
54
+ * Converts our conversation messages to OpenAI's message format.
55
+ *
56
+ * @param messages - Our normalized conversation messages
57
+ * @returns OpenAI-formatted messages
58
+ */
59
+ function toOpenAIMessages(messages: ConversationMessage[]): OpenAI.ChatCompletionMessageParam[] {
60
+ const openaiMessages: OpenAI.ChatCompletionMessageParam[] = [];
61
+
62
+ for (const msg of messages) {
63
+ if (msg.role === 'user') {
64
+ openaiMessages.push({ role: 'user', content: msg.content });
65
+ } else if (msg.role === 'assistant') {
66
+ // Assistant message with potential tool calls
67
+ const assistantMsg: OpenAI.ChatCompletionAssistantMessageParam = {
68
+ role: 'assistant',
69
+ content: msg.content || null,
70
+ };
71
+ if (msg.toolCalls && msg.toolCalls.length > 0) {
72
+ assistantMsg.tool_calls = msg.toolCalls.map((tc) => ({
73
+ id: tc.id,
74
+ type: 'function' as const,
75
+ function: {
76
+ name: tc.name,
77
+ arguments: JSON.stringify(tc.arguments),
78
+ },
79
+ }));
80
+ }
81
+ openaiMessages.push(assistantMsg);
82
+ } else if (msg.role === 'tool') {
83
+ // Tool result
84
+ openaiMessages.push({
85
+ role: 'tool',
86
+ tool_call_id: msg.toolCallId!,
87
+ content: msg.content,
88
+ });
89
+ }
90
+ }
91
+
92
+ return openaiMessages;
93
+ }
94
+
95
+ /**
96
+ * Extracts tool calls from OpenAI's response.
97
+ *
98
+ * @param toolCalls - OpenAI tool calls from response
99
+ * @returns Array of normalized tool calls
100
+ */
101
+ function extractToolCalls(
102
+ toolCalls: OpenAI.ChatCompletionMessageToolCall[] | undefined
103
+ ): ToolCall[] {
104
+ if (!toolCalls) return [];
105
+
106
+ return toolCalls
107
+ .filter((tc): tc is OpenAI.ChatCompletionMessageToolCall & { type: 'function' } =>
108
+ tc.type === 'function'
109
+ )
110
+ .map((tc) => ({
111
+ id: tc.id,
112
+ name: tc.function.name,
113
+ arguments: JSON.parse(tc.function.arguments) as Record<string, unknown>,
114
+ }));
115
+ }
116
+
117
+ /**
118
+ * Creates an OpenAI model adapter.
119
+ *
120
+ * @param apiKey - OpenAI API key
121
+ * @param modelId - OpenAI model ID (e.g., 'gpt-4-turbo-preview')
122
+ * @returns ModelAdapter implementation for OpenAI
123
+ *
124
+ * @example
125
+ * const adapter = createOpenAIAdapter(process.env.OPENAI_API_KEY!, 'gpt-4-turbo-preview');
126
+ * const response = await adapter.complete('Hello, GPT');
127
+ *
128
+ * @see Requirements 11.1, 11.3, 11.4
129
+ */
130
+ export function createOpenAIAdapter(apiKey: string, modelId: string): ModelAdapter {
131
+ const client = new OpenAI({ apiKey });
132
+
133
+ return {
134
+ provider: 'openai',
135
+ modelId,
136
+
137
+ async complete(prompt: string, options?: ModelOptions): Promise<ModelResponse> {
138
+ const messages: OpenAI.ChatCompletionMessageParam[] = [];
139
+
140
+ // Add system message if provided
141
+ if (options?.systemPrompt) {
142
+ messages.push({ role: 'system', content: options.systemPrompt });
143
+ }
144
+
145
+ // Add user message
146
+ messages.push({ role: 'user', content: prompt });
147
+
148
+ const requestParams: OpenAI.ChatCompletionCreateParams = {
149
+ model: modelId,
150
+ max_tokens: options?.maxTokens ?? DEFAULT_MAX_TOKENS,
151
+ ...(options?.temperature !== undefined && { temperature: options.temperature }),
152
+ messages,
153
+ };
154
+
155
+ // Add tools if provided
156
+ if (options?.tools && options.tools.length > 0) {
157
+ requestParams.tools = toOpenAITools(options.tools);
158
+ }
159
+
160
+ const completion = await client.chat.completions.create(requestParams);
161
+
162
+ // Extract content from response
163
+ const content = completion.choices[0]?.message?.content ?? '';
164
+
165
+ // Extract tool calls if any
166
+ const toolCalls = extractToolCalls(completion.choices[0]?.message?.tool_calls);
167
+
168
+ // Calculate total tokens used
169
+ const tokensUsed = completion.usage?.total_tokens ?? 0;
170
+
171
+ // Check if model wants to use tools
172
+ const finishReason = completion.choices[0]?.finish_reason;
173
+
174
+ return {
175
+ content,
176
+ tokensUsed,
177
+ model: completion.model,
178
+ toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
179
+ requiresToolResponse: finishReason === 'tool_calls',
180
+ };
181
+ },
182
+
183
+ async continueWithToolResults(
184
+ messages: ConversationMessage[],
185
+ options?: ModelOptions
186
+ ): Promise<ModelResponse> {
187
+ const openaiMessages = toOpenAIMessages(messages);
188
+
189
+ // Add system message at the beginning if provided
190
+ if (options?.systemPrompt) {
191
+ openaiMessages.unshift({ role: 'system', content: options.systemPrompt });
192
+ }
193
+
194
+ const requestParams: OpenAI.ChatCompletionCreateParams = {
195
+ model: modelId,
196
+ max_tokens: options?.maxTokens ?? DEFAULT_MAX_TOKENS,
197
+ ...(options?.temperature !== undefined && { temperature: options.temperature }),
198
+ messages: openaiMessages,
199
+ };
200
+
201
+ // Add tools if provided
202
+ if (options?.tools && options.tools.length > 0) {
203
+ requestParams.tools = toOpenAITools(options.tools);
204
+ }
205
+
206
+ const completion = await client.chat.completions.create(requestParams);
207
+
208
+ // Extract content from response
209
+ const content = completion.choices[0]?.message?.content ?? '';
210
+
211
+ // Extract tool calls if any
212
+ const toolCalls = extractToolCalls(completion.choices[0]?.message?.tool_calls);
213
+
214
+ // Calculate total tokens used
215
+ const tokensUsed = completion.usage?.total_tokens ?? 0;
216
+
217
+ // Check if model wants to use tools
218
+ const finishReason = completion.choices[0]?.finish_reason;
219
+
220
+ return {
221
+ content,
222
+ tokensUsed,
223
+ model: completion.model,
224
+ toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
225
+ requiresToolResponse: finishReason === 'tool_calls',
226
+ };
227
+ },
228
+
229
+ countTokens(text: string): number {
230
+ // Approximate token count based on character length
231
+ // OpenAI uses ~4 characters per token on average
232
+ return Math.ceil(text.length / CHARS_PER_TOKEN);
233
+ },
234
+ };
235
+ }
@@ -0,0 +1,122 @@
1
+ # Utils Module
2
+
3
+ Shared utilities for the Earnd Bounty Engine, including structured logging.
4
+
5
+ ## Components
6
+
7
+ ### Logger (`logger.ts`)
8
+
9
+ Mandatory structured logging for all agent attempts. Every agent attempt is logged for auditing and debugging.
10
+
11
+ **Key Interfaces:**
12
+
13
+ - `LogEntry` - Structured log entry with timestamp, agentId, outcomeId, promptVersion, tokensSpent, result, failureReason
14
+ - `LogEntryInput` - Input for creating log entries (timestamp auto-generated)
15
+ - `LogResult` - Result status: 'SUCCESS' | 'FAILURE' | 'PENDING'
16
+
17
+ **Key Functions:**
18
+
19
+ - `log(entry)` - Creates and stores a log entry
20
+ - `getLogs(outcomeId)` - Retrieves all logs for an outcome
21
+ - `getAgentLogs(outcomeId, agentId)` - Retrieves logs for a specific agent
22
+ - `clearLogs(outcomeId)` - Clears logs for an outcome
23
+ - `formatLogsForCli(outcomeId)` - Formats logs for CLI display
24
+
25
+ **Convenience Functions:**
26
+
27
+ - `logSuccess(...)` - Log a successful attempt
28
+ - `logFailure(...)` - Log a failed attempt with reason
29
+ - `logPending(...)` - Log a pending attempt
30
+
31
+ **Usage:**
32
+
33
+ ```typescript
34
+ import { log, getLogs, logSuccess, logFailure, formatLogsForCli } from './logger.js';
35
+
36
+ // Log an attempt
37
+ log({
38
+ agentId: 'agent-001',
39
+ outcomeId: 'qualified_sales_interest',
40
+ promptVersion: 'v1.0.0',
41
+ tokensSpent: 500,
42
+ result: 'SUCCESS'
43
+ });
44
+
45
+ // Or use convenience functions
46
+ logSuccess('agent-001', 'qualified_sales_interest', 'v1.0.0', 500);
47
+ logFailure('agent-002', 'qualified_sales_interest', 'v1.0.0', 300, 'Company too small');
48
+
49
+ // Retrieve logs
50
+ const logs = getLogs('qualified_sales_interest');
51
+ console.log(`Found ${logs.length} log entries`);
52
+
53
+ // Format for CLI display
54
+ console.log(formatLogsForCli('qualified_sales_interest'));
55
+ ```
56
+
57
+ **Output Format:**
58
+
59
+ ```text
60
+ ✅ [14:32:15] Agent:agent-001 | Outcome:qualified_sales_interest | Tokens:500 | SUCCESS
61
+ ❌ [14:32:18] Agent:agent-002 | Outcome:qualified_sales_interest | Tokens:300 | FAILURE | Reason: Company too small
62
+ ```
63
+
64
+ ## Requirements Reference
65
+
66
+ - **6.1** - Record agent ID, outcome ID, prompt version, tokens spent, result, and failure reason
67
+ - **6.2** - Provide logs viewable via CLI
68
+ - **6.3** - Persist all attempt logs after agent run completes
69
+
70
+ ### Error Types (`errors.ts`)
71
+
72
+ Custom error classes for structured error handling across the system.
73
+
74
+ **Error Classes:**
75
+
76
+ - `ValidationError` - Invalid schemas, malformed data (not recoverable)
77
+ - `ExecutionError` - Model API failures, timeouts, network issues (may be recoverable)
78
+ - `LimitError` - Cost ceiling, attempt limit, time limit exceeded (not recoverable)
79
+ - `SystemError` - Infrastructure failures, state corruption (not recoverable)
80
+
81
+ **Key Interfaces:**
82
+
83
+ - `ErrorResponse` - Structured error format with code, message, details, recoverable flag
84
+ - `ErrorCode` - Enum of all error codes for programmatic handling
85
+
86
+ **Type Guards:**
87
+
88
+ - `isEarndError(error)` - Check if error is any EarndError subclass
89
+ - `isValidationError(error)` - Check if error is ValidationError
90
+ - `isExecutionError(error)` - Check if error is ExecutionError
91
+ - `isLimitError(error)` - Check if error is LimitError
92
+ - `isSystemError(error)` - Check if error is SystemError
93
+
94
+ **Usage:**
95
+
96
+ ```typescript
97
+ import { ValidationError, LimitError, toErrorResponse } from './errors.js';
98
+
99
+ // Create validation error
100
+ throw ValidationError.missingField('email', 'Lead');
101
+
102
+ // Create limit error
103
+ throw LimitError.costExceeded('agent-1', 15000, 10000);
104
+
105
+ // Convert any error to ErrorResponse
106
+ try {
107
+ // ... code that may throw
108
+ } catch (error) {
109
+ const response = toErrorResponse(error);
110
+ console.log(response.code, response.message, response.recoverable);
111
+ }
112
+ ```
113
+
114
+ ## Design Principles
115
+
116
+ 1. **Mandatory Logging** - Every agent attempt must be logged
117
+ 2. **Structured Data** - All logs follow the LogEntry schema
118
+ 3. **Audit Trail** - Logs persist for debugging and compliance
119
+ 4. **CLI-Friendly** - Output formatted for terminal readability
120
+ 5. **No Silent Failures** - All errors and failures are logged with reasons
121
+ 6. **Fail Closed** - Limit errors terminate agents with no payout possibility
122
+ 7. **Typed Errors** - All errors have codes for programmatic handling
@@ -0,0 +1,134 @@
1
+ /**
2
+ * Safe Command Execution - Async command runner with timeouts and limits
3
+ *
4
+ * Safely executes shell commands with:
5
+ * - Timeouts to prevent hanging
6
+ * - Output size limits to prevent memory exhaustion
7
+ * - Proper error handling and cleanup
8
+ *
9
+ * Used for running tests, builds, linting, benchmarks, and security scans
10
+ * in the Outcome evaluation pipeline.
11
+ */
12
+
13
+ import { spawn } from 'child_process';
14
+
15
+ export interface CommandResult {
16
+ exitCode: number;
17
+ stdout: string;
18
+ stderr: string;
19
+ truncated?: boolean;
20
+ timeout?: boolean;
21
+ }
22
+
23
+ export interface CommandOptions {
24
+ cwd?: string;
25
+ timeoutMs?: number;
26
+ maxOutputSize?: number; // max chars for stdout+stderr combined
27
+ env?: Record<string, string>;
28
+ }
29
+
30
+ /**
31
+ * Safely execute a shell command asynchronously.
32
+ *
33
+ * @param command - Command to execute (e.g., "npm test")
34
+ * @param args - Command arguments (if needed)
35
+ * @param options - Execution options
36
+ * @returns Promise resolving to CommandResult
37
+ */
38
+ export async function runCommand(
39
+ command: string,
40
+ args: string[] = [],
41
+ options: CommandOptions = {}
42
+ ): Promise<CommandResult> {
43
+ const {
44
+ cwd,
45
+ timeoutMs = 30000, // 30 seconds default
46
+ maxOutputSize = 100000, // 100k chars
47
+ env = process.env,
48
+ } = options;
49
+
50
+ return new Promise((resolve) => {
51
+ let stdout = '';
52
+ let stderr = '';
53
+ let truncated = false;
54
+
55
+ const proc = spawn(command, args, {
56
+ cwd,
57
+ env,
58
+ stdio: ['ignore', 'pipe', 'pipe'],
59
+ shell: true, // Allow shell commands like "npm test"
60
+ });
61
+
62
+ let timeoutId: NodeJS.Timeout | undefined;
63
+
64
+ const cleanup = () => {
65
+ if (timeoutId) clearTimeout(timeoutId);
66
+ proc.kill();
67
+ };
68
+
69
+ const checkTruncation = () => {
70
+ if (stdout.length + stderr.length > maxOutputSize) {
71
+ truncated = true;
72
+ stdout = stdout.slice(0, maxOutputSize / 2);
73
+ stderr = stderr.slice(0, maxOutputSize / 2);
74
+ }
75
+ };
76
+
77
+ // Set timeout
78
+ timeoutId = setTimeout(() => {
79
+ cleanup();
80
+ resolve({
81
+ exitCode: -1,
82
+ stdout,
83
+ stderr,
84
+ timeout: true,
85
+ });
86
+ }, timeoutMs);
87
+
88
+ proc.stdout?.on('data', (data) => {
89
+ stdout += data.toString();
90
+ checkTruncation();
91
+ });
92
+
93
+ proc.stderr?.on('data', (data) => {
94
+ stderr += data.toString();
95
+ checkTruncation();
96
+ });
97
+
98
+ proc.on('close', (code) => {
99
+ if (timeoutId) clearTimeout(timeoutId);
100
+ resolve({
101
+ exitCode: code ?? 0,
102
+ stdout,
103
+ stderr,
104
+ truncated,
105
+ });
106
+ });
107
+
108
+ proc.on('error', (error) => {
109
+ cleanup();
110
+ resolve({
111
+ exitCode: -1,
112
+ stdout,
113
+ stderr,
114
+ truncated,
115
+ });
116
+ });
117
+ });
118
+ }
119
+
120
+ /**
121
+ * Execute a single command string (convenience wrapper).
122
+ *
123
+ * @param commandLine - Full command line (e.g., "npm test --watchAll=false")
124
+ * @param options - Execution options
125
+ * @returns Promise resolving to CommandResult
126
+ */
127
+ export async function runCommandLine(
128
+ commandLine: string,
129
+ options: CommandOptions = {}
130
+ ): Promise<CommandResult> {
131
+ // Simple split on spaces - not perfect but works for our use cases
132
+ const [cmd, ...args] = commandLine.split(' ');
133
+ return runCommand(cmd, args, options);
134
+ }