outcome-cli 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +261 -0
- package/package.json +95 -0
- package/src/agents/README.md +139 -0
- package/src/agents/adapters/anthropic.adapter.ts +166 -0
- package/src/agents/adapters/dalle.adapter.ts +145 -0
- package/src/agents/adapters/gemini.adapter.ts +134 -0
- package/src/agents/adapters/imagen.adapter.ts +106 -0
- package/src/agents/adapters/nano-banana.adapter.ts +129 -0
- package/src/agents/adapters/openai.adapter.ts +165 -0
- package/src/agents/adapters/veo.adapter.ts +130 -0
- package/src/agents/agent.schema.property.test.ts +379 -0
- package/src/agents/agent.schema.test.ts +148 -0
- package/src/agents/agent.schema.ts +263 -0
- package/src/agents/index.ts +60 -0
- package/src/agents/registered-agent.schema.ts +356 -0
- package/src/agents/registry.ts +97 -0
- package/src/agents/tournament-configs.property.test.ts +266 -0
- package/src/cli/README.md +145 -0
- package/src/cli/commands/define.ts +79 -0
- package/src/cli/commands/list.ts +46 -0
- package/src/cli/commands/logs.ts +83 -0
- package/src/cli/commands/run.ts +416 -0
- package/src/cli/commands/verify.ts +110 -0
- package/src/cli/index.ts +81 -0
- package/src/config/README.md +128 -0
- package/src/config/env.ts +262 -0
- package/src/config/index.ts +19 -0
- package/src/eval/README.md +318 -0
- package/src/eval/ai-judge.test.ts +435 -0
- package/src/eval/ai-judge.ts +368 -0
- package/src/eval/code-validators.ts +414 -0
- package/src/eval/evaluateOutcome.property.test.ts +1174 -0
- package/src/eval/evaluateOutcome.ts +591 -0
- package/src/eval/immigration-validators.ts +122 -0
- package/src/eval/index.ts +90 -0
- package/src/eval/judge-cache.ts +402 -0
- package/src/eval/tournament-validators.property.test.ts +439 -0
- package/src/eval/validators.property.test.ts +1118 -0
- package/src/eval/validators.ts +1199 -0
- package/src/eval/weighted-scorer.ts +285 -0
- package/src/index.ts +17 -0
- package/src/league/README.md +188 -0
- package/src/league/health-check.ts +353 -0
- package/src/league/index.ts +93 -0
- package/src/league/killAgent.ts +151 -0
- package/src/league/league.test.ts +1151 -0
- package/src/league/runLeague.ts +843 -0
- package/src/league/scoreAgent.ts +175 -0
- package/src/modules/omnibridge/__tests__/.gitkeep +1 -0
- package/src/modules/omnibridge/__tests__/auth-tunnel.property.test.ts +524 -0
- package/src/modules/omnibridge/__tests__/deterministic-logger.property.test.ts +965 -0
- package/src/modules/omnibridge/__tests__/ghost-api.property.test.ts +461 -0
- package/src/modules/omnibridge/__tests__/omnibridge-integration.test.ts +542 -0
- package/src/modules/omnibridge/__tests__/parallel-executor.property.test.ts +671 -0
- package/src/modules/omnibridge/__tests__/semantic-normalizer.property.test.ts +521 -0
- package/src/modules/omnibridge/__tests__/semantic-normalizer.test.ts +254 -0
- package/src/modules/omnibridge/__tests__/session-vault.property.test.ts +367 -0
- package/src/modules/omnibridge/__tests__/shadow-session.property.test.ts +523 -0
- package/src/modules/omnibridge/__tests__/triangulation-engine.property.test.ts +292 -0
- package/src/modules/omnibridge/__tests__/verification-engine.property.test.ts +769 -0
- package/src/modules/omnibridge/api/.gitkeep +1 -0
- package/src/modules/omnibridge/api/ghost-api.ts +1087 -0
- package/src/modules/omnibridge/auth/.gitkeep +1 -0
- package/src/modules/omnibridge/auth/auth-tunnel.ts +843 -0
- package/src/modules/omnibridge/auth/session-vault.ts +577 -0
- package/src/modules/omnibridge/core/.gitkeep +1 -0
- package/src/modules/omnibridge/core/semantic-normalizer.ts +702 -0
- package/src/modules/omnibridge/core/triangulation-engine.ts +530 -0
- package/src/modules/omnibridge/core/types.ts +610 -0
- package/src/modules/omnibridge/execution/.gitkeep +1 -0
- package/src/modules/omnibridge/execution/deterministic-logger.ts +629 -0
- package/src/modules/omnibridge/execution/parallel-executor.ts +542 -0
- package/src/modules/omnibridge/execution/shadow-session.ts +794 -0
- package/src/modules/omnibridge/index.ts +212 -0
- package/src/modules/omnibridge/omnibridge.ts +510 -0
- package/src/modules/omnibridge/verification/.gitkeep +1 -0
- package/src/modules/omnibridge/verification/verification-engine.ts +783 -0
- package/src/outcomes/README.md +75 -0
- package/src/outcomes/acquire-pilot-customer.ts +297 -0
- package/src/outcomes/code-delivery-outcomes.ts +89 -0
- package/src/outcomes/code-outcomes.ts +256 -0
- package/src/outcomes/code_review_battle.test.ts +135 -0
- package/src/outcomes/code_review_battle.ts +135 -0
- package/src/outcomes/cold_email_battle.ts +97 -0
- package/src/outcomes/content_creation_battle.ts +160 -0
- package/src/outcomes/f1_stem_opt_compliance.ts +61 -0
- package/src/outcomes/index.ts +107 -0
- package/src/outcomes/lead_gen_battle.test.ts +113 -0
- package/src/outcomes/lead_gen_battle.ts +99 -0
- package/src/outcomes/outcome.schema.property.test.ts +229 -0
- package/src/outcomes/outcome.schema.ts +187 -0
- package/src/outcomes/qualified_sales_interest.ts +118 -0
- package/src/outcomes/swarm_planner.property.test.ts +370 -0
- package/src/outcomes/swarm_planner.ts +96 -0
- package/src/outcomes/web_extraction.ts +234 -0
- package/src/runtime/README.md +220 -0
- package/src/runtime/agentRunner.test.ts +341 -0
- package/src/runtime/agentRunner.ts +746 -0
- package/src/runtime/claudeAdapter.ts +232 -0
- package/src/runtime/costTracker.ts +123 -0
- package/src/runtime/index.ts +34 -0
- package/src/runtime/modelAdapter.property.test.ts +305 -0
- package/src/runtime/modelAdapter.ts +144 -0
- package/src/runtime/openaiAdapter.ts +235 -0
- package/src/utils/README.md +122 -0
- package/src/utils/command-runner.ts +134 -0
- package/src/utils/cost-guard.ts +379 -0
- package/src/utils/errors.test.ts +290 -0
- package/src/utils/errors.ts +442 -0
- package/src/utils/index.ts +37 -0
- package/src/utils/logger.test.ts +361 -0
- package/src/utils/logger.ts +419 -0
- package/src/utils/output-parsers.ts +216 -0
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Model Adapter - AI model abstraction for Claude and OpenAI
|
|
3
|
+
*
|
|
4
|
+
* Provides a unified interface for interacting with different AI model providers.
|
|
5
|
+
* Normalizes responses to a common format regardless of underlying provider.
|
|
6
|
+
*
|
|
7
|
+
* @module runtime/modelAdapter
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import type { ToolDefinition, ToolCall, ToolCallResult } from '../skills/skill.interface.js';
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Options for model completion requests.
|
|
14
|
+
*/
|
|
15
|
+
export interface ModelOptions {
|
|
16
|
+
/** Maximum tokens to generate in the response */
|
|
17
|
+
maxTokens?: number;
|
|
18
|
+
/** Temperature for response randomness (0-1) */
|
|
19
|
+
temperature?: number;
|
|
20
|
+
/** System prompt to set context */
|
|
21
|
+
systemPrompt?: string;
|
|
22
|
+
/** Tool definitions available to the model */
|
|
23
|
+
tools?: ToolDefinition[];
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Normalized response from any model provider.
|
|
28
|
+
* Ensures consistent format regardless of Claude or OpenAI.
|
|
29
|
+
*/
|
|
30
|
+
export interface ModelResponse {
|
|
31
|
+
/** The generated text content */
|
|
32
|
+
content: string;
|
|
33
|
+
/** Total tokens used (input + output) */
|
|
34
|
+
tokensUsed: number;
|
|
35
|
+
/** The model identifier that generated the response */
|
|
36
|
+
model: string;
|
|
37
|
+
/** Tool calls requested by the model (if any) */
|
|
38
|
+
toolCalls?: ToolCall[];
|
|
39
|
+
/** Whether the model wants to continue after tool results */
|
|
40
|
+
requiresToolResponse?: boolean;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Message in a conversation for multi-turn tool use.
|
|
45
|
+
*/
|
|
46
|
+
export interface ConversationMessage {
|
|
47
|
+
role: 'user' | 'assistant' | 'tool';
|
|
48
|
+
content: string;
|
|
49
|
+
/** Tool call ID (for tool role messages) */
|
|
50
|
+
toolCallId?: string;
|
|
51
|
+
/** Tool calls made by assistant */
|
|
52
|
+
toolCalls?: ToolCall[];
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// Re-export types for convenience
|
|
56
|
+
export type { ToolDefinition, ToolCall, ToolCallResult };
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Unified interface for AI model adapters.
|
|
60
|
+
* Abstracts the underlying provider (Claude or OpenAI).
|
|
61
|
+
*/
|
|
62
|
+
export interface ModelAdapter {
|
|
63
|
+
/**
|
|
64
|
+
* Sends a completion request to the model.
|
|
65
|
+
*
|
|
66
|
+
* @param prompt - The user prompt to send
|
|
67
|
+
* @param options - Optional configuration for the request
|
|
68
|
+
* @returns Normalized model response
|
|
69
|
+
*/
|
|
70
|
+
complete(prompt: string, options?: ModelOptions): Promise<ModelResponse>;
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Continues a conversation with tool results.
|
|
74
|
+
* Used for multi-turn tool use interactions.
|
|
75
|
+
*
|
|
76
|
+
* @param messages - The conversation history including tool results
|
|
77
|
+
* @param options - Optional configuration for the request
|
|
78
|
+
* @returns Normalized model response
|
|
79
|
+
*/
|
|
80
|
+
continueWithToolResults(
|
|
81
|
+
messages: ConversationMessage[],
|
|
82
|
+
options?: ModelOptions
|
|
83
|
+
): Promise<ModelResponse>;
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Estimates token count for a given text.
|
|
87
|
+
*
|
|
88
|
+
* @param text - The text to count tokens for
|
|
89
|
+
* @returns Estimated token count
|
|
90
|
+
*/
|
|
91
|
+
countTokens(text: string): number;
|
|
92
|
+
|
|
93
|
+
/** The provider name ('claude' or 'openai') */
|
|
94
|
+
readonly provider: 'claude' | 'openai';
|
|
95
|
+
|
|
96
|
+
/** The specific model ID being used */
|
|
97
|
+
readonly modelId: string;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Configuration for creating a model adapter.
|
|
102
|
+
*/
|
|
103
|
+
export interface ModelAdapterConfig {
|
|
104
|
+
/** The AI provider to use */
|
|
105
|
+
provider: 'claude' | 'openai';
|
|
106
|
+
/** The specific model ID (e.g., 'claude-3-sonnet-20240229', 'gpt-4-turbo-preview') */
|
|
107
|
+
modelId: string;
|
|
108
|
+
/** API key for the provider */
|
|
109
|
+
apiKey: string;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Creates a model adapter based on the provided configuration.
|
|
115
|
+
*
|
|
116
|
+
* @param config - Configuration specifying provider, model, and API key
|
|
117
|
+
* @returns ModelAdapter for the specified provider
|
|
118
|
+
* @throws Error if provider is not supported
|
|
119
|
+
*
|
|
120
|
+
* @example
|
|
121
|
+
* const adapter = await createAdapter({
|
|
122
|
+
* provider: 'claude',
|
|
123
|
+
* modelId: 'claude-3-sonnet-20240229',
|
|
124
|
+
* apiKey: process.env.ANTHROPIC_API_KEY!
|
|
125
|
+
* });
|
|
126
|
+
*
|
|
127
|
+
* @see Requirements 11.1, 11.2, 11.3
|
|
128
|
+
*/
|
|
129
|
+
export async function createAdapter(config: ModelAdapterConfig): Promise<ModelAdapter> {
|
|
130
|
+
switch (config.provider) {
|
|
131
|
+
case 'claude': {
|
|
132
|
+
const { createClaudeAdapter } = await import('./claudeAdapter.js');
|
|
133
|
+
return createClaudeAdapter(config.apiKey, config.modelId);
|
|
134
|
+
}
|
|
135
|
+
case 'openai': {
|
|
136
|
+
const { createOpenAIAdapter } = await import('./openaiAdapter.js');
|
|
137
|
+
return createOpenAIAdapter(config.apiKey, config.modelId);
|
|
138
|
+
}
|
|
139
|
+
default: {
|
|
140
|
+
const exhaustiveCheck: never = config.provider;
|
|
141
|
+
throw new Error(`Unsupported model provider: ${exhaustiveCheck}`);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenAI Adapter - OpenAI API integration
|
|
3
|
+
*
|
|
4
|
+
* Implements the ModelAdapter interface for OpenAI models.
|
|
5
|
+
* Uses the official openai npm package following Chat Completions API.
|
|
6
|
+
*
|
|
7
|
+
* @module runtime/openaiAdapter
|
|
8
|
+
* @see https://platform.openai.com/docs/api-reference
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import OpenAI from 'openai';
|
|
12
|
+
import type {
|
|
13
|
+
ModelAdapter,
|
|
14
|
+
ModelOptions,
|
|
15
|
+
ModelResponse,
|
|
16
|
+
ConversationMessage,
|
|
17
|
+
ToolDefinition,
|
|
18
|
+
ToolCall,
|
|
19
|
+
} from './modelAdapter.js';
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Default max tokens for OpenAI responses.
|
|
23
|
+
*/
|
|
24
|
+
const DEFAULT_MAX_TOKENS = 1024;
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Approximate characters per token for estimation.
|
|
28
|
+
* OpenAI uses ~4 characters per token on average.
|
|
29
|
+
*/
|
|
30
|
+
const CHARS_PER_TOKEN = 4;
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Converts our tool definitions to OpenAI's tool format.
|
|
34
|
+
*
|
|
35
|
+
* @param tools - Our normalized tool definitions
|
|
36
|
+
* @returns OpenAI-formatted tools
|
|
37
|
+
*/
|
|
38
|
+
function toOpenAITools(tools: ToolDefinition[]): OpenAI.ChatCompletionTool[] {
|
|
39
|
+
return tools.map((tool) => ({
|
|
40
|
+
type: 'function' as const,
|
|
41
|
+
function: {
|
|
42
|
+
name: tool.name,
|
|
43
|
+
description: tool.description,
|
|
44
|
+
parameters: {
|
|
45
|
+
type: 'object',
|
|
46
|
+
properties: tool.inputSchema.properties,
|
|
47
|
+
required: tool.inputSchema.required,
|
|
48
|
+
},
|
|
49
|
+
},
|
|
50
|
+
}));
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Converts our conversation messages to OpenAI's message format.
|
|
55
|
+
*
|
|
56
|
+
* @param messages - Our normalized conversation messages
|
|
57
|
+
* @returns OpenAI-formatted messages
|
|
58
|
+
*/
|
|
59
|
+
function toOpenAIMessages(messages: ConversationMessage[]): OpenAI.ChatCompletionMessageParam[] {
|
|
60
|
+
const openaiMessages: OpenAI.ChatCompletionMessageParam[] = [];
|
|
61
|
+
|
|
62
|
+
for (const msg of messages) {
|
|
63
|
+
if (msg.role === 'user') {
|
|
64
|
+
openaiMessages.push({ role: 'user', content: msg.content });
|
|
65
|
+
} else if (msg.role === 'assistant') {
|
|
66
|
+
// Assistant message with potential tool calls
|
|
67
|
+
const assistantMsg: OpenAI.ChatCompletionAssistantMessageParam = {
|
|
68
|
+
role: 'assistant',
|
|
69
|
+
content: msg.content || null,
|
|
70
|
+
};
|
|
71
|
+
if (msg.toolCalls && msg.toolCalls.length > 0) {
|
|
72
|
+
assistantMsg.tool_calls = msg.toolCalls.map((tc) => ({
|
|
73
|
+
id: tc.id,
|
|
74
|
+
type: 'function' as const,
|
|
75
|
+
function: {
|
|
76
|
+
name: tc.name,
|
|
77
|
+
arguments: JSON.stringify(tc.arguments),
|
|
78
|
+
},
|
|
79
|
+
}));
|
|
80
|
+
}
|
|
81
|
+
openaiMessages.push(assistantMsg);
|
|
82
|
+
} else if (msg.role === 'tool') {
|
|
83
|
+
// Tool result
|
|
84
|
+
openaiMessages.push({
|
|
85
|
+
role: 'tool',
|
|
86
|
+
tool_call_id: msg.toolCallId!,
|
|
87
|
+
content: msg.content,
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
return openaiMessages;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Extracts tool calls from OpenAI's response.
|
|
97
|
+
*
|
|
98
|
+
* @param toolCalls - OpenAI tool calls from response
|
|
99
|
+
* @returns Array of normalized tool calls
|
|
100
|
+
*/
|
|
101
|
+
function extractToolCalls(
|
|
102
|
+
toolCalls: OpenAI.ChatCompletionMessageToolCall[] | undefined
|
|
103
|
+
): ToolCall[] {
|
|
104
|
+
if (!toolCalls) return [];
|
|
105
|
+
|
|
106
|
+
return toolCalls
|
|
107
|
+
.filter((tc): tc is OpenAI.ChatCompletionMessageToolCall & { type: 'function' } =>
|
|
108
|
+
tc.type === 'function'
|
|
109
|
+
)
|
|
110
|
+
.map((tc) => ({
|
|
111
|
+
id: tc.id,
|
|
112
|
+
name: tc.function.name,
|
|
113
|
+
arguments: JSON.parse(tc.function.arguments) as Record<string, unknown>,
|
|
114
|
+
}));
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Creates an OpenAI model adapter.
|
|
119
|
+
*
|
|
120
|
+
* @param apiKey - OpenAI API key
|
|
121
|
+
* @param modelId - OpenAI model ID (e.g., 'gpt-4-turbo-preview')
|
|
122
|
+
* @returns ModelAdapter implementation for OpenAI
|
|
123
|
+
*
|
|
124
|
+
* @example
|
|
125
|
+
* const adapter = createOpenAIAdapter(process.env.OPENAI_API_KEY!, 'gpt-4-turbo-preview');
|
|
126
|
+
* const response = await adapter.complete('Hello, GPT');
|
|
127
|
+
*
|
|
128
|
+
* @see Requirements 11.1, 11.3, 11.4
|
|
129
|
+
*/
|
|
130
|
+
export function createOpenAIAdapter(apiKey: string, modelId: string): ModelAdapter {
|
|
131
|
+
const client = new OpenAI({ apiKey });
|
|
132
|
+
|
|
133
|
+
return {
|
|
134
|
+
provider: 'openai',
|
|
135
|
+
modelId,
|
|
136
|
+
|
|
137
|
+
async complete(prompt: string, options?: ModelOptions): Promise<ModelResponse> {
|
|
138
|
+
const messages: OpenAI.ChatCompletionMessageParam[] = [];
|
|
139
|
+
|
|
140
|
+
// Add system message if provided
|
|
141
|
+
if (options?.systemPrompt) {
|
|
142
|
+
messages.push({ role: 'system', content: options.systemPrompt });
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// Add user message
|
|
146
|
+
messages.push({ role: 'user', content: prompt });
|
|
147
|
+
|
|
148
|
+
const requestParams: OpenAI.ChatCompletionCreateParams = {
|
|
149
|
+
model: modelId,
|
|
150
|
+
max_tokens: options?.maxTokens ?? DEFAULT_MAX_TOKENS,
|
|
151
|
+
...(options?.temperature !== undefined && { temperature: options.temperature }),
|
|
152
|
+
messages,
|
|
153
|
+
};
|
|
154
|
+
|
|
155
|
+
// Add tools if provided
|
|
156
|
+
if (options?.tools && options.tools.length > 0) {
|
|
157
|
+
requestParams.tools = toOpenAITools(options.tools);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
const completion = await client.chat.completions.create(requestParams);
|
|
161
|
+
|
|
162
|
+
// Extract content from response
|
|
163
|
+
const content = completion.choices[0]?.message?.content ?? '';
|
|
164
|
+
|
|
165
|
+
// Extract tool calls if any
|
|
166
|
+
const toolCalls = extractToolCalls(completion.choices[0]?.message?.tool_calls);
|
|
167
|
+
|
|
168
|
+
// Calculate total tokens used
|
|
169
|
+
const tokensUsed = completion.usage?.total_tokens ?? 0;
|
|
170
|
+
|
|
171
|
+
// Check if model wants to use tools
|
|
172
|
+
const finishReason = completion.choices[0]?.finish_reason;
|
|
173
|
+
|
|
174
|
+
return {
|
|
175
|
+
content,
|
|
176
|
+
tokensUsed,
|
|
177
|
+
model: completion.model,
|
|
178
|
+
toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
|
|
179
|
+
requiresToolResponse: finishReason === 'tool_calls',
|
|
180
|
+
};
|
|
181
|
+
},
|
|
182
|
+
|
|
183
|
+
async continueWithToolResults(
|
|
184
|
+
messages: ConversationMessage[],
|
|
185
|
+
options?: ModelOptions
|
|
186
|
+
): Promise<ModelResponse> {
|
|
187
|
+
const openaiMessages = toOpenAIMessages(messages);
|
|
188
|
+
|
|
189
|
+
// Add system message at the beginning if provided
|
|
190
|
+
if (options?.systemPrompt) {
|
|
191
|
+
openaiMessages.unshift({ role: 'system', content: options.systemPrompt });
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
const requestParams: OpenAI.ChatCompletionCreateParams = {
|
|
195
|
+
model: modelId,
|
|
196
|
+
max_tokens: options?.maxTokens ?? DEFAULT_MAX_TOKENS,
|
|
197
|
+
...(options?.temperature !== undefined && { temperature: options.temperature }),
|
|
198
|
+
messages: openaiMessages,
|
|
199
|
+
};
|
|
200
|
+
|
|
201
|
+
// Add tools if provided
|
|
202
|
+
if (options?.tools && options.tools.length > 0) {
|
|
203
|
+
requestParams.tools = toOpenAITools(options.tools);
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
const completion = await client.chat.completions.create(requestParams);
|
|
207
|
+
|
|
208
|
+
// Extract content from response
|
|
209
|
+
const content = completion.choices[0]?.message?.content ?? '';
|
|
210
|
+
|
|
211
|
+
// Extract tool calls if any
|
|
212
|
+
const toolCalls = extractToolCalls(completion.choices[0]?.message?.tool_calls);
|
|
213
|
+
|
|
214
|
+
// Calculate total tokens used
|
|
215
|
+
const tokensUsed = completion.usage?.total_tokens ?? 0;
|
|
216
|
+
|
|
217
|
+
// Check if model wants to use tools
|
|
218
|
+
const finishReason = completion.choices[0]?.finish_reason;
|
|
219
|
+
|
|
220
|
+
return {
|
|
221
|
+
content,
|
|
222
|
+
tokensUsed,
|
|
223
|
+
model: completion.model,
|
|
224
|
+
toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
|
|
225
|
+
requiresToolResponse: finishReason === 'tool_calls',
|
|
226
|
+
};
|
|
227
|
+
},
|
|
228
|
+
|
|
229
|
+
countTokens(text: string): number {
|
|
230
|
+
// Approximate token count based on character length
|
|
231
|
+
// OpenAI uses ~4 characters per token on average
|
|
232
|
+
return Math.ceil(text.length / CHARS_PER_TOKEN);
|
|
233
|
+
},
|
|
234
|
+
};
|
|
235
|
+
}
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# Utils Module
|
|
2
|
+
|
|
3
|
+
Shared utilities for the Earnd Bounty Engine, including structured logging.
|
|
4
|
+
|
|
5
|
+
## Components
|
|
6
|
+
|
|
7
|
+
### Logger (`logger.ts`)
|
|
8
|
+
|
|
9
|
+
Mandatory structured logging for all agent attempts. Every agent attempt is logged for auditing and debugging.
|
|
10
|
+
|
|
11
|
+
**Key Interfaces:**
|
|
12
|
+
|
|
13
|
+
- `LogEntry` - Structured log entry with timestamp, agentId, outcomeId, promptVersion, tokensSpent, result, failureReason
|
|
14
|
+
- `LogEntryInput` - Input for creating log entries (timestamp auto-generated)
|
|
15
|
+
- `LogResult` - Result status: 'SUCCESS' | 'FAILURE' | 'PENDING'
|
|
16
|
+
|
|
17
|
+
**Key Functions:**
|
|
18
|
+
|
|
19
|
+
- `log(entry)` - Creates and stores a log entry
|
|
20
|
+
- `getLogs(outcomeId)` - Retrieves all logs for an outcome
|
|
21
|
+
- `getAgentLogs(outcomeId, agentId)` - Retrieves logs for a specific agent
|
|
22
|
+
- `clearLogs(outcomeId)` - Clears logs for an outcome
|
|
23
|
+
- `formatLogsForCli(outcomeId)` - Formats logs for CLI display
|
|
24
|
+
|
|
25
|
+
**Convenience Functions:**
|
|
26
|
+
|
|
27
|
+
- `logSuccess(...)` - Log a successful attempt
|
|
28
|
+
- `logFailure(...)` - Log a failed attempt with reason
|
|
29
|
+
- `logPending(...)` - Log a pending attempt
|
|
30
|
+
|
|
31
|
+
**Usage:**
|
|
32
|
+
|
|
33
|
+
```typescript
|
|
34
|
+
import { log, getLogs, logSuccess, logFailure, formatLogsForCli } from './logger.js';
|
|
35
|
+
|
|
36
|
+
// Log an attempt
|
|
37
|
+
log({
|
|
38
|
+
agentId: 'agent-001',
|
|
39
|
+
outcomeId: 'qualified_sales_interest',
|
|
40
|
+
promptVersion: 'v1.0.0',
|
|
41
|
+
tokensSpent: 500,
|
|
42
|
+
result: 'SUCCESS'
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
// Or use convenience functions
|
|
46
|
+
logSuccess('agent-001', 'qualified_sales_interest', 'v1.0.0', 500);
|
|
47
|
+
logFailure('agent-002', 'qualified_sales_interest', 'v1.0.0', 300, 'Company too small');
|
|
48
|
+
|
|
49
|
+
// Retrieve logs
|
|
50
|
+
const logs = getLogs('qualified_sales_interest');
|
|
51
|
+
console.log(`Found ${logs.length} log entries`);
|
|
52
|
+
|
|
53
|
+
// Format for CLI display
|
|
54
|
+
console.log(formatLogsForCli('qualified_sales_interest'));
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
**Output Format:**
|
|
58
|
+
|
|
59
|
+
```text
|
|
60
|
+
✅ [14:32:15] Agent:agent-001 | Outcome:qualified_sales_interest | Tokens:500 | SUCCESS
|
|
61
|
+
❌ [14:32:18] Agent:agent-002 | Outcome:qualified_sales_interest | Tokens:300 | FAILURE | Reason: Company too small
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Requirements Reference
|
|
65
|
+
|
|
66
|
+
- **6.1** - Record agent ID, outcome ID, prompt version, tokens spent, result, and failure reason
|
|
67
|
+
- **6.2** - Provide logs viewable via CLI
|
|
68
|
+
- **6.3** - Persist all attempt logs after agent run completes
|
|
69
|
+
|
|
70
|
+
### Error Types (`errors.ts`)
|
|
71
|
+
|
|
72
|
+
Custom error classes for structured error handling across the system.
|
|
73
|
+
|
|
74
|
+
**Error Classes:**
|
|
75
|
+
|
|
76
|
+
- `ValidationError` - Invalid schemas, malformed data (not recoverable)
|
|
77
|
+
- `ExecutionError` - Model API failures, timeouts, network issues (may be recoverable)
|
|
78
|
+
- `LimitError` - Cost ceiling, attempt limit, time limit exceeded (not recoverable)
|
|
79
|
+
- `SystemError` - Infrastructure failures, state corruption (not recoverable)
|
|
80
|
+
|
|
81
|
+
**Key Interfaces:**
|
|
82
|
+
|
|
83
|
+
- `ErrorResponse` - Structured error format with code, message, details, recoverable flag
|
|
84
|
+
- `ErrorCode` - Enum of all error codes for programmatic handling
|
|
85
|
+
|
|
86
|
+
**Type Guards:**
|
|
87
|
+
|
|
88
|
+
- `isEarndError(error)` - Check if error is any EarndError subclass
|
|
89
|
+
- `isValidationError(error)` - Check if error is ValidationError
|
|
90
|
+
- `isExecutionError(error)` - Check if error is ExecutionError
|
|
91
|
+
- `isLimitError(error)` - Check if error is LimitError
|
|
92
|
+
- `isSystemError(error)` - Check if error is SystemError
|
|
93
|
+
|
|
94
|
+
**Usage:**
|
|
95
|
+
|
|
96
|
+
```typescript
|
|
97
|
+
import { ValidationError, LimitError, toErrorResponse } from './errors.js';
|
|
98
|
+
|
|
99
|
+
// Create validation error
|
|
100
|
+
throw ValidationError.missingField('email', 'Lead');
|
|
101
|
+
|
|
102
|
+
// Create limit error
|
|
103
|
+
throw LimitError.costExceeded('agent-1', 15000, 10000);
|
|
104
|
+
|
|
105
|
+
// Convert any error to ErrorResponse
|
|
106
|
+
try {
|
|
107
|
+
// ... code that may throw
|
|
108
|
+
} catch (error) {
|
|
109
|
+
const response = toErrorResponse(error);
|
|
110
|
+
console.log(response.code, response.message, response.recoverable);
|
|
111
|
+
}
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
## Design Principles
|
|
115
|
+
|
|
116
|
+
1. **Mandatory Logging** - Every agent attempt must be logged
|
|
117
|
+
2. **Structured Data** - All logs follow the LogEntry schema
|
|
118
|
+
3. **Audit Trail** - Logs persist for debugging and compliance
|
|
119
|
+
4. **CLI-Friendly** - Output formatted for terminal readability
|
|
120
|
+
5. **No Silent Failures** - All errors and failures are logged with reasons
|
|
121
|
+
6. **Fail Closed** - Limit errors terminate agents with no payout possibility
|
|
122
|
+
7. **Typed Errors** - All errors have codes for programmatic handling
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Safe Command Execution - Async command runner with timeouts and limits
|
|
3
|
+
*
|
|
4
|
+
* Safely executes shell commands with:
|
|
5
|
+
* - Timeouts to prevent hanging
|
|
6
|
+
* - Output size limits to prevent memory exhaustion
|
|
7
|
+
* - Proper error handling and cleanup
|
|
8
|
+
*
|
|
9
|
+
* Used for running tests, builds, linting, benchmarks, and security scans
|
|
10
|
+
* in the Outcome evaluation pipeline.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { spawn } from 'child_process';
|
|
14
|
+
|
|
15
|
+
export interface CommandResult {
|
|
16
|
+
exitCode: number;
|
|
17
|
+
stdout: string;
|
|
18
|
+
stderr: string;
|
|
19
|
+
truncated?: boolean;
|
|
20
|
+
timeout?: boolean;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export interface CommandOptions {
|
|
24
|
+
cwd?: string;
|
|
25
|
+
timeoutMs?: number;
|
|
26
|
+
maxOutputSize?: number; // max chars for stdout+stderr combined
|
|
27
|
+
env?: Record<string, string>;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Safely execute a shell command asynchronously.
|
|
32
|
+
*
|
|
33
|
+
* @param command - Command to execute (e.g., "npm test")
|
|
34
|
+
* @param args - Command arguments (if needed)
|
|
35
|
+
* @param options - Execution options
|
|
36
|
+
* @returns Promise resolving to CommandResult
|
|
37
|
+
*/
|
|
38
|
+
export async function runCommand(
|
|
39
|
+
command: string,
|
|
40
|
+
args: string[] = [],
|
|
41
|
+
options: CommandOptions = {}
|
|
42
|
+
): Promise<CommandResult> {
|
|
43
|
+
const {
|
|
44
|
+
cwd,
|
|
45
|
+
timeoutMs = 30000, // 30 seconds default
|
|
46
|
+
maxOutputSize = 100000, // 100k chars
|
|
47
|
+
env = process.env,
|
|
48
|
+
} = options;
|
|
49
|
+
|
|
50
|
+
return new Promise((resolve) => {
|
|
51
|
+
let stdout = '';
|
|
52
|
+
let stderr = '';
|
|
53
|
+
let truncated = false;
|
|
54
|
+
|
|
55
|
+
const proc = spawn(command, args, {
|
|
56
|
+
cwd,
|
|
57
|
+
env,
|
|
58
|
+
stdio: ['ignore', 'pipe', 'pipe'],
|
|
59
|
+
shell: true, // Allow shell commands like "npm test"
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
let timeoutId: NodeJS.Timeout | undefined;
|
|
63
|
+
|
|
64
|
+
const cleanup = () => {
|
|
65
|
+
if (timeoutId) clearTimeout(timeoutId);
|
|
66
|
+
proc.kill();
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
const checkTruncation = () => {
|
|
70
|
+
if (stdout.length + stderr.length > maxOutputSize) {
|
|
71
|
+
truncated = true;
|
|
72
|
+
stdout = stdout.slice(0, maxOutputSize / 2);
|
|
73
|
+
stderr = stderr.slice(0, maxOutputSize / 2);
|
|
74
|
+
}
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
// Set timeout
|
|
78
|
+
timeoutId = setTimeout(() => {
|
|
79
|
+
cleanup();
|
|
80
|
+
resolve({
|
|
81
|
+
exitCode: -1,
|
|
82
|
+
stdout,
|
|
83
|
+
stderr,
|
|
84
|
+
timeout: true,
|
|
85
|
+
});
|
|
86
|
+
}, timeoutMs);
|
|
87
|
+
|
|
88
|
+
proc.stdout?.on('data', (data) => {
|
|
89
|
+
stdout += data.toString();
|
|
90
|
+
checkTruncation();
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
proc.stderr?.on('data', (data) => {
|
|
94
|
+
stderr += data.toString();
|
|
95
|
+
checkTruncation();
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
proc.on('close', (code) => {
|
|
99
|
+
if (timeoutId) clearTimeout(timeoutId);
|
|
100
|
+
resolve({
|
|
101
|
+
exitCode: code ?? 0,
|
|
102
|
+
stdout,
|
|
103
|
+
stderr,
|
|
104
|
+
truncated,
|
|
105
|
+
});
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
proc.on('error', (error) => {
|
|
109
|
+
cleanup();
|
|
110
|
+
resolve({
|
|
111
|
+
exitCode: -1,
|
|
112
|
+
stdout,
|
|
113
|
+
stderr,
|
|
114
|
+
truncated,
|
|
115
|
+
});
|
|
116
|
+
});
|
|
117
|
+
});
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Execute a single command string (convenience wrapper).
|
|
122
|
+
*
|
|
123
|
+
* @param commandLine - Full command line (e.g., "npm test --watchAll=false")
|
|
124
|
+
* @param options - Execution options
|
|
125
|
+
* @returns Promise resolving to CommandResult
|
|
126
|
+
*/
|
|
127
|
+
export async function runCommandLine(
|
|
128
|
+
commandLine: string,
|
|
129
|
+
options: CommandOptions = {}
|
|
130
|
+
): Promise<CommandResult> {
|
|
131
|
+
// Simple split on spaces - not perfect but works for our use cases
|
|
132
|
+
const [cmd, ...args] = commandLine.split(' ');
|
|
133
|
+
return runCommand(cmd, args, options);
|
|
134
|
+
}
|