outcome-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/README.md +261 -0
  2. package/package.json +95 -0
  3. package/src/agents/README.md +139 -0
  4. package/src/agents/adapters/anthropic.adapter.ts +166 -0
  5. package/src/agents/adapters/dalle.adapter.ts +145 -0
  6. package/src/agents/adapters/gemini.adapter.ts +134 -0
  7. package/src/agents/adapters/imagen.adapter.ts +106 -0
  8. package/src/agents/adapters/nano-banana.adapter.ts +129 -0
  9. package/src/agents/adapters/openai.adapter.ts +165 -0
  10. package/src/agents/adapters/veo.adapter.ts +130 -0
  11. package/src/agents/agent.schema.property.test.ts +379 -0
  12. package/src/agents/agent.schema.test.ts +148 -0
  13. package/src/agents/agent.schema.ts +263 -0
  14. package/src/agents/index.ts +60 -0
  15. package/src/agents/registered-agent.schema.ts +356 -0
  16. package/src/agents/registry.ts +97 -0
  17. package/src/agents/tournament-configs.property.test.ts +266 -0
  18. package/src/cli/README.md +145 -0
  19. package/src/cli/commands/define.ts +79 -0
  20. package/src/cli/commands/list.ts +46 -0
  21. package/src/cli/commands/logs.ts +83 -0
  22. package/src/cli/commands/run.ts +416 -0
  23. package/src/cli/commands/verify.ts +110 -0
  24. package/src/cli/index.ts +81 -0
  25. package/src/config/README.md +128 -0
  26. package/src/config/env.ts +262 -0
  27. package/src/config/index.ts +19 -0
  28. package/src/eval/README.md +318 -0
  29. package/src/eval/ai-judge.test.ts +435 -0
  30. package/src/eval/ai-judge.ts +368 -0
  31. package/src/eval/code-validators.ts +414 -0
  32. package/src/eval/evaluateOutcome.property.test.ts +1174 -0
  33. package/src/eval/evaluateOutcome.ts +591 -0
  34. package/src/eval/immigration-validators.ts +122 -0
  35. package/src/eval/index.ts +90 -0
  36. package/src/eval/judge-cache.ts +402 -0
  37. package/src/eval/tournament-validators.property.test.ts +439 -0
  38. package/src/eval/validators.property.test.ts +1118 -0
  39. package/src/eval/validators.ts +1199 -0
  40. package/src/eval/weighted-scorer.ts +285 -0
  41. package/src/index.ts +17 -0
  42. package/src/league/README.md +188 -0
  43. package/src/league/health-check.ts +353 -0
  44. package/src/league/index.ts +93 -0
  45. package/src/league/killAgent.ts +151 -0
  46. package/src/league/league.test.ts +1151 -0
  47. package/src/league/runLeague.ts +843 -0
  48. package/src/league/scoreAgent.ts +175 -0
  49. package/src/modules/omnibridge/__tests__/.gitkeep +1 -0
  50. package/src/modules/omnibridge/__tests__/auth-tunnel.property.test.ts +524 -0
  51. package/src/modules/omnibridge/__tests__/deterministic-logger.property.test.ts +965 -0
  52. package/src/modules/omnibridge/__tests__/ghost-api.property.test.ts +461 -0
  53. package/src/modules/omnibridge/__tests__/omnibridge-integration.test.ts +542 -0
  54. package/src/modules/omnibridge/__tests__/parallel-executor.property.test.ts +671 -0
  55. package/src/modules/omnibridge/__tests__/semantic-normalizer.property.test.ts +521 -0
  56. package/src/modules/omnibridge/__tests__/semantic-normalizer.test.ts +254 -0
  57. package/src/modules/omnibridge/__tests__/session-vault.property.test.ts +367 -0
  58. package/src/modules/omnibridge/__tests__/shadow-session.property.test.ts +523 -0
  59. package/src/modules/omnibridge/__tests__/triangulation-engine.property.test.ts +292 -0
  60. package/src/modules/omnibridge/__tests__/verification-engine.property.test.ts +769 -0
  61. package/src/modules/omnibridge/api/.gitkeep +1 -0
  62. package/src/modules/omnibridge/api/ghost-api.ts +1087 -0
  63. package/src/modules/omnibridge/auth/.gitkeep +1 -0
  64. package/src/modules/omnibridge/auth/auth-tunnel.ts +843 -0
  65. package/src/modules/omnibridge/auth/session-vault.ts +577 -0
  66. package/src/modules/omnibridge/core/.gitkeep +1 -0
  67. package/src/modules/omnibridge/core/semantic-normalizer.ts +702 -0
  68. package/src/modules/omnibridge/core/triangulation-engine.ts +530 -0
  69. package/src/modules/omnibridge/core/types.ts +610 -0
  70. package/src/modules/omnibridge/execution/.gitkeep +1 -0
  71. package/src/modules/omnibridge/execution/deterministic-logger.ts +629 -0
  72. package/src/modules/omnibridge/execution/parallel-executor.ts +542 -0
  73. package/src/modules/omnibridge/execution/shadow-session.ts +794 -0
  74. package/src/modules/omnibridge/index.ts +212 -0
  75. package/src/modules/omnibridge/omnibridge.ts +510 -0
  76. package/src/modules/omnibridge/verification/.gitkeep +1 -0
  77. package/src/modules/omnibridge/verification/verification-engine.ts +783 -0
  78. package/src/outcomes/README.md +75 -0
  79. package/src/outcomes/acquire-pilot-customer.ts +297 -0
  80. package/src/outcomes/code-delivery-outcomes.ts +89 -0
  81. package/src/outcomes/code-outcomes.ts +256 -0
  82. package/src/outcomes/code_review_battle.test.ts +135 -0
  83. package/src/outcomes/code_review_battle.ts +135 -0
  84. package/src/outcomes/cold_email_battle.ts +97 -0
  85. package/src/outcomes/content_creation_battle.ts +160 -0
  86. package/src/outcomes/f1_stem_opt_compliance.ts +61 -0
  87. package/src/outcomes/index.ts +107 -0
  88. package/src/outcomes/lead_gen_battle.test.ts +113 -0
  89. package/src/outcomes/lead_gen_battle.ts +99 -0
  90. package/src/outcomes/outcome.schema.property.test.ts +229 -0
  91. package/src/outcomes/outcome.schema.ts +187 -0
  92. package/src/outcomes/qualified_sales_interest.ts +118 -0
  93. package/src/outcomes/swarm_planner.property.test.ts +370 -0
  94. package/src/outcomes/swarm_planner.ts +96 -0
  95. package/src/outcomes/web_extraction.ts +234 -0
  96. package/src/runtime/README.md +220 -0
  97. package/src/runtime/agentRunner.test.ts +341 -0
  98. package/src/runtime/agentRunner.ts +746 -0
  99. package/src/runtime/claudeAdapter.ts +232 -0
  100. package/src/runtime/costTracker.ts +123 -0
  101. package/src/runtime/index.ts +34 -0
  102. package/src/runtime/modelAdapter.property.test.ts +305 -0
  103. package/src/runtime/modelAdapter.ts +144 -0
  104. package/src/runtime/openaiAdapter.ts +235 -0
  105. package/src/utils/README.md +122 -0
  106. package/src/utils/command-runner.ts +134 -0
  107. package/src/utils/cost-guard.ts +379 -0
  108. package/src/utils/errors.test.ts +290 -0
  109. package/src/utils/errors.ts +442 -0
  110. package/src/utils/index.ts +37 -0
  111. package/src/utils/logger.test.ts +361 -0
  112. package/src/utils/logger.ts +419 -0
  113. package/src/utils/output-parsers.ts +216 -0
@@ -0,0 +1,746 @@
1
+ /**
2
+ * Agent Runner - Agent execution engine with attempt loop
3
+ *
4
+ * Implements the agent execution loop with:
5
+ * - Attempt tracking and limits
6
+ * - Cost tracking integration
7
+ * - Model adapter integration
8
+ * - Exponential backoff for retries
9
+ * - Runtime limit enforcement
10
+ * - Skill (tool) execution support
11
+ *
12
+ * @module runtime/agentRunner
13
+ * @see Requirements 4.2, 4.3, 10.1, 10.2, 11.2
14
+ */
15
+
16
+ import type { AgentConfig } from '../agents/agent.schema.js';
17
+ import type { Outcome } from '../outcomes/outcome.schema.js';
18
+ import type { Lead } from '../jobs/job.interface.js';
19
+ import type { ArtifactContent, AgentArtifact } from '../eval/evaluateOutcome.js';
20
+ import {
21
+ type CostTracker,
22
+ createCostTracker,
23
+ recordUsage,
24
+ isOverBudget,
25
+ } from './costTracker.js';
26
+ import {
27
+ type ModelAdapter,
28
+ type ModelResponse,
29
+ type ConversationMessage,
30
+ type ToolDefinition,
31
+ createAdapter,
32
+ } from './modelAdapter.js';
33
+ import {
34
+ type Skill,
35
+ type SkillRegistry,
36
+ allSkills,
37
+ createFilteredRegistry,
38
+ } from '../skills/index.js';
39
+
40
+ /**
41
+ * Reason for agent termination.
42
+ *
43
+ * @see GLOSSARY.md - Kill Reason
44
+ */
45
+ export type KillReason =
46
+ | 'cost_exceeded'
47
+ | 'attempts_exceeded'
48
+ | 'timeout'
49
+ | 'competitor_won'
50
+ | 'success';
51
+
52
+ /**
53
+ * Status of an agent run.
54
+ */
55
+ export type AgentRunStatus = 'running' | 'completed' | 'killed';
56
+
57
+ /**
58
+ * Represents the state and results of an agent execution.
59
+ *
60
+ * @see design.md - Runtime System
61
+ */
62
+ export interface AgentRun {
63
+ /** ID of the agent being run */
64
+ agentId: string;
65
+ /** ID of the outcome being attempted */
66
+ outcomeId: string;
67
+ /** Current status of the run */
68
+ status: AgentRunStatus;
69
+ /** Number of attempts made */
70
+ attempts: number;
71
+ /** Total tokens spent across all attempts */
72
+ tokensSpent: number;
73
+ /** All artifacts produced during the run */
74
+ artifacts: AgentArtifact[];
75
+ /** Reason for termination if killed */
76
+ killReason?: KillReason;
77
+ /** Duration of the run in milliseconds */
78
+ durationMs: number;
79
+ /** Error message if run failed */
80
+ error?: string;
81
+ }
82
+
83
+ /**
84
+ * Configuration for running an agent.
85
+ */
86
+ export interface AgentRunConfig {
87
+ /** Agent configuration */
88
+ agent: AgentConfig;
89
+ /** Outcome to achieve */
90
+ outcome: Outcome;
91
+ /** Lead data to process */
92
+ lead: Lead;
93
+ /** Optional API key override (defaults to env var) */
94
+ apiKey?: string;
95
+ /** Optional callback when agent is killed externally (e.g., competitor won) */
96
+ onKillSignal?: () => boolean;
97
+ }
98
+
99
+ /**
100
+ * Result of a single attempt.
101
+ */
102
+ interface AttemptResult {
103
+ /** Whether the attempt was successful (model call succeeded) */
104
+ success: boolean;
105
+ /** The generated artifact if successful */
106
+ artifact?: AgentArtifact;
107
+ /** Tokens used in this attempt */
108
+ tokensUsed: number;
109
+ /** Error message if attempt failed */
110
+ error?: string;
111
+ }
112
+
113
+ /**
114
+ * Default retry configuration.
115
+ */
116
+ const DEFAULT_MAX_RETRIES = 3;
117
+ const DEFAULT_BASE_DELAY_MS = 1000;
118
+ const DEFAULT_MAX_DELAY_MS = 10000;
119
+ const MAX_TOOL_ITERATIONS = 10;
120
+
121
+ /**
122
+ * Calculates exponential backoff delay with jitter.
123
+ *
124
+ * @param attempt - Current retry attempt (0-indexed)
125
+ * @param baseDelayMs - Base delay in milliseconds
126
+ * @param maxDelayMs - Maximum delay cap
127
+ * @returns Delay in milliseconds
128
+ */
129
+ function calculateBackoffDelay(
130
+ attempt: number,
131
+ baseDelayMs: number = DEFAULT_BASE_DELAY_MS,
132
+ maxDelayMs: number = DEFAULT_MAX_DELAY_MS
133
+ ): number {
134
+ // Exponential backoff: base * 2^attempt
135
+ const exponentialDelay = baseDelayMs * Math.pow(2, attempt);
136
+ // Cap at max delay
137
+ const cappedDelay = Math.min(exponentialDelay, maxDelayMs);
138
+ // Add jitter (±25%) to prevent thundering herd
139
+ const jitter = cappedDelay * 0.25 * (Math.random() * 2 - 1);
140
+ return Math.floor(cappedDelay + jitter);
141
+ }
142
+
143
+ /**
144
+ * Sleeps for the specified duration.
145
+ *
146
+ * @param ms - Duration in milliseconds
147
+ */
148
+ async function sleep(ms: number): Promise<void> {
149
+ return new Promise((resolve) => setTimeout(resolve, ms));
150
+ }
151
+
152
+ /**
153
+ * Loads skills for an agent based on their toolAccess configuration.
154
+ *
155
+ * @param toolAccess - List of skill names the agent can access
156
+ * @returns SkillRegistry with allowed skills
157
+ */
158
+ function loadSkillsForAgent(toolAccess: string[]): SkillRegistry {
159
+ return createFilteredRegistry(allSkills, toolAccess);
160
+ }
161
+
162
+ /**
163
+ * Gets tool definitions from a skill registry.
164
+ *
165
+ * @param registry - The skill registry
166
+ * @returns Array of tool definitions for the model
167
+ */
168
+ function getToolDefinitions(registry: SkillRegistry): ToolDefinition[] {
169
+ return registry.getToolDefinitions();
170
+ }
171
+
172
+ /**
173
+ * Executes a skill and returns the result as a string.
174
+ *
175
+ * @param skill - The skill to execute
176
+ * @param args - Arguments to pass to the skill
177
+ * @returns Result string for the model
178
+ */
179
+ async function executeSkill(skill: Skill, args: Record<string, unknown>): Promise<string> {
180
+ const result = await skill.execute(args);
181
+ if (result.success) {
182
+ return JSON.stringify(result.data);
183
+ }
184
+ return JSON.stringify({ error: (result as { success: false; error: string }).error });
185
+ }
186
+
187
+ /**
188
+ * Builds the prompt for the model including context based on outcome type.
189
+ *
190
+ * @param agentPrompt - The agent's base prompt
191
+ * @param outcome - The outcome being attempted
192
+ * @param lead - The lead data to include (for sales outcomes)
193
+ * @returns Complete prompt string
194
+ */
195
+ function buildPrompt(agentPrompt: string, outcome: Outcome, lead: Lead): string {
196
+ if (outcome.name === 'code_review_battle') {
197
+ return `${agentPrompt}
198
+
199
+ ## Code Review Challenge
200
+
201
+ You are performing a code review battle. Your task is to analyze the following vulnerable code and provide a comprehensive review.
202
+
203
+ **Source Code to Review:**
204
+ \`\`\`javascript
205
+ function authenticateUser(username, password) {
206
+ const query = "SELECT * FROM users WHERE username = '" + username + "' AND password = '" + password + "'";
207
+ const result = db.query(query);
208
+
209
+ if (result.length > 0) {
210
+ for (let i = 0; i < result.length; i++) {
211
+ const user = result[i];
212
+ const permissions = db.query("SELECT * FROM permissions WHERE user_id = " + user.id);
213
+ if (permissions.length > 0) {
214
+ return { success: true, user: user, permissions: permissions };
215
+ }
216
+ }
217
+ }
218
+ return { success: false };
219
+ }
220
+ \`\`\`
221
+
222
+ **Your Response Must Be Valid JSON** with this exact structure:
223
+ \`\`\`json
224
+ {
225
+ "issues": [
226
+ {
227
+ "type": "security" | "performance" | "style" | "logic",
228
+ "severity": "CRITICAL" | "HIGH" | "MEDIUM" | "LOW",
229
+ "description": "Description of the issue",
230
+ "lineNumber": 1
231
+ }
232
+ ],
233
+ "comments": [
234
+ {
235
+ "lineContent": "exact line from code above",
236
+ "comment": "your comment about this line",
237
+ "lineNumber": 1
238
+ }
239
+ ],
240
+ "refactorSuggestion": {
241
+ "originalComplexity": 8,
242
+ "suggestedComplexity": 4,
243
+ "description": "How to refactor to reduce complexity"
244
+ }
245
+ }
246
+ \`\`\`
247
+
248
+ **Requirements:**
249
+ - Identify at least one CRITICAL security vulnerability (SQL injection, XSS)
250
+ - Identify at least one performance bottleneck (N+1 queries)
251
+ - All comments must reference actual lines from the source code above
252
+ - Suggest refactoring that reduces cyclomatic complexity by at least 2 points
253
+ - Respond ONLY with valid JSON, no additional text`;
254
+
255
+ } else if (outcome.name === 'lead_gen_battle') {
256
+ return `${agentPrompt}
257
+
258
+ ## Lead Generation Challenge
259
+
260
+ You are competing in a lead generation precision battle. Your task is to generate a qualified lead that meets all validation criteria.
261
+
262
+ **Your Response Must Be Valid JSON** with this exact structure:
263
+ \`\`\`json
264
+ {
265
+ "email": "valid.email@company.com",
266
+ "companySize": 100,
267
+ "role": "Decision Maker Role",
268
+ "linkedIn": "https://www.linkedin.com/in/profile"
269
+ }
270
+ \`\`\`
271
+
272
+ **Requirements:**
273
+ - Email must have valid format (user@domain.com)
274
+ - Company size must be at least 50 employees
275
+ - Role must NOT be "intern" or "student" (case-insensitive)
276
+ - LinkedIn URL must start with "https://www.linkedin.com/in/"
277
+ - Respond ONLY with valid JSON, no additional text`;
278
+
279
+ } else {
280
+ // Default for qualified_sales_interest and other outcomes
281
+ return `${agentPrompt}
282
+
283
+ ## Current Lead Information
284
+ - Email: ${lead.email}
285
+ - Company: ${lead.company}
286
+ - Company Size: ${lead.companySize} employees
287
+ - Role: ${lead.role}
288
+ - Previous Interactions: ${lead.previousInteractions.join(', ') || 'None'}
289
+ ${lead.leadSource ? `- Lead Source: ${lead.leadSource}` : ''}
290
+ ${lead.leadOrigin ? `- Lead Origin: ${lead.leadOrigin}` : ''}
291
+
292
+ Please engage with this lead and work towards qualifying them based on the criteria outlined above.`;
293
+ }
294
+ }
295
+
296
+ /**
297
+ * Creates artifact content based on the outcome type.
298
+ * Agents should generate real content, not hardcoded mock data.
299
+ * This function only handles the structure, not the content.
300
+ */
301
+ function createArtifactContent(outcomeId: string, content: string, lead: Lead): ArtifactContent {
302
+ // Try to parse the content as JSON first (for structured outcomes)
303
+ try {
304
+ const parsed = JSON.parse(content);
305
+ if (typeof parsed === 'object' && parsed !== null &&
306
+ 'message' in parsed && 'targetEmail' in parsed) {
307
+ return parsed as ArtifactContent;
308
+ }
309
+ } catch {
310
+ // Content is not JSON, continue with text processing
311
+ }
312
+
313
+ if (outcomeId === 'code_review_battle') {
314
+ // For code review battles, create ArtifactContent structure
315
+ return {
316
+ message: content,
317
+ targetEmail: lead.email,
318
+ targetCompany: lead.company,
319
+ targetCompanySize: lead.companySize,
320
+ targetRole: lead.role,
321
+ };
322
+ } else if (outcomeId === 'lead_gen_battle') {
323
+ // For lead gen battles, create ArtifactContent structure
324
+ return {
325
+ message: content,
326
+ targetEmail: lead.email,
327
+ targetCompany: lead.company,
328
+ targetCompanySize: lead.companySize,
329
+ targetRole: lead.role,
330
+ };
331
+ } else {
332
+ // Default for qualified_sales_interest and other outcomes
333
+ return {
334
+ message: content,
335
+ targetEmail: lead.email,
336
+ targetCompany: lead.company,
337
+ targetCompanySize: lead.companySize,
338
+ targetRole: lead.role,
339
+ };
340
+ }
341
+ }
342
+
343
+ /**
344
+ * Creates an artifact from model response.
345
+ *
346
+ * @param agentId - ID of the agent
347
+ * @param outcomeId - ID of the outcome
348
+ * @param attemptNumber - Current attempt number
349
+ * @param content - Generated content
350
+ * @param lead - Lead data for artifact
351
+ * @returns AgentArtifact
352
+ */
353
+ function createArtifact(
354
+ agentId: string,
355
+ outcomeId: string,
356
+ attemptNumber: number,
357
+ content: string,
358
+ lead: Lead
359
+ ): AgentArtifact {
360
+ return {
361
+ agentId,
362
+ outcomeId,
363
+ attemptNumber,
364
+ content: createArtifactContent(outcomeId, content, lead),
365
+ timestamp: new Date().toISOString(),
366
+ };
367
+ }
368
+
369
+ /**
370
+ * Executes a single attempt with retry logic and tool use support.
371
+ *
372
+ * @param adapter - Model adapter to use
373
+ * @param prompt - Prompt to send
374
+ * @param agentId - Agent ID for artifact
375
+ * @param outcomeId - Outcome ID for artifact
376
+ * @param attemptNumber - Current attempt number
377
+ * @param lead - Lead data
378
+ * @param skillRegistry - Registry of available skills
379
+ * @returns AttemptResult
380
+ */
381
+ async function executeAttemptWithRetry(
382
+ adapter: ModelAdapter,
383
+ prompt: string,
384
+ agentId: string,
385
+ outcomeId: string,
386
+ attemptNumber: number,
387
+ lead: Lead,
388
+ skillRegistry?: SkillRegistry
389
+ ): Promise<AttemptResult> {
390
+ let lastError: string | undefined;
391
+ let totalTokensUsed = 0;
392
+
393
+ // Get tool definitions if skills are available
394
+ const tools = skillRegistry ? getToolDefinitions(skillRegistry) : undefined;
395
+ const hasTools = tools && tools.length > 0;
396
+
397
+ for (let retry = 0; retry < DEFAULT_MAX_RETRIES; retry++) {
398
+ try {
399
+ let response: ModelResponse = await adapter.complete(prompt, {
400
+ maxTokens: 1024,
401
+ temperature: 0.7,
402
+ tools: hasTools ? tools : undefined,
403
+ });
404
+
405
+ totalTokensUsed += response.tokensUsed;
406
+
407
+ // Handle tool use loop
408
+ if (hasTools && response.toolCalls && response.toolCalls.length > 0) {
409
+ const conversation: ConversationMessage[] = [
410
+ { role: 'user', content: prompt },
411
+ ];
412
+
413
+ let toolIterations = 0;
414
+
415
+ // Continue until model stops requesting tools or we hit the limit
416
+ while (
417
+ response.toolCalls &&
418
+ response.toolCalls.length > 0 &&
419
+ toolIterations < MAX_TOOL_ITERATIONS
420
+ ) {
421
+ toolIterations++;
422
+
423
+ // Add assistant message with tool calls
424
+ conversation.push({
425
+ role: 'assistant',
426
+ content: response.content,
427
+ toolCalls: response.toolCalls,
428
+ });
429
+
430
+ // Execute each tool call
431
+ for (const toolCall of response.toolCalls) {
432
+ const skill = skillRegistry!.get(toolCall.name);
433
+ let resultContent: string;
434
+
435
+ if (skill) {
436
+ resultContent = await executeSkill(skill, toolCall.arguments);
437
+ } else {
438
+ resultContent = JSON.stringify({
439
+ error: `Unknown tool: ${toolCall.name}`,
440
+ });
441
+ }
442
+
443
+ // Add tool result to conversation
444
+ conversation.push({
445
+ role: 'tool',
446
+ content: resultContent,
447
+ toolCallId: toolCall.id,
448
+ });
449
+ }
450
+
451
+ // Continue conversation with tool results
452
+ response = await adapter.continueWithToolResults(conversation, {
453
+ maxTokens: 1024,
454
+ temperature: 0.7,
455
+ tools,
456
+ });
457
+
458
+ totalTokensUsed += response.tokensUsed;
459
+ }
460
+ }
461
+
462
+ const artifact = createArtifact(
463
+ agentId,
464
+ outcomeId,
465
+ attemptNumber,
466
+ response.content,
467
+ lead
468
+ );
469
+
470
+ return {
471
+ success: true,
472
+ artifact,
473
+ tokensUsed: totalTokensUsed,
474
+ };
475
+ } catch (error) {
476
+ lastError = error instanceof Error ? error.message : String(error);
477
+ totalTokensUsed += adapter.countTokens(prompt); // Estimate tokens for failed attempt
478
+
479
+ // Don't retry on final attempt
480
+ if (retry < DEFAULT_MAX_RETRIES - 1) {
481
+ const delay = calculateBackoffDelay(retry);
482
+ await sleep(delay);
483
+ }
484
+ }
485
+ }
486
+
487
+ return {
488
+ success: false,
489
+ tokensUsed: totalTokensUsed,
490
+ error: `Failed after ${DEFAULT_MAX_RETRIES} retries: ${lastError}`,
491
+ };
492
+ }
493
+
494
+ /**
495
+ * Checks if the agent should be killed based on current state.
496
+ *
497
+ * @param attempts - Current attempt count
498
+ * @param maxAttempts - Maximum allowed attempts
499
+ * @param costTracker - Cost tracker instance
500
+ * @param startTime - Run start time
501
+ * @param timeLimitMs - Time limit in milliseconds
502
+ * @param onKillSignal - Optional external kill signal callback
503
+ * @returns KillReason if agent should be killed, null otherwise
504
+ */
505
+ function checkKillConditions(
506
+ attempts: number,
507
+ maxAttempts: number,
508
+ costTracker: CostTracker,
509
+ startTime: number,
510
+ timeLimitMs: number,
511
+ onKillSignal?: () => boolean
512
+ ): KillReason | null {
513
+ // Check external kill signal (competitor won)
514
+ if (onKillSignal && onKillSignal()) {
515
+ return 'competitor_won';
516
+ }
517
+
518
+ // Check cost ceiling (Requirement 10.1)
519
+ if (isOverBudget(costTracker)) {
520
+ return 'cost_exceeded';
521
+ }
522
+
523
+ // Check attempt limit (Requirement 4.3)
524
+ if (attempts >= maxAttempts) {
525
+ return 'attempts_exceeded';
526
+ }
527
+
528
+ // Check runtime limit (Requirement 10.2)
529
+ const elapsed = Date.now() - startTime;
530
+ if (elapsed >= timeLimitMs) {
531
+ return 'timeout';
532
+ }
533
+
534
+ return null;
535
+ }
536
+
537
+ /**
538
+ * Runs an agent to achieve an outcome.
539
+ *
540
+ * Implements the agent execution loop with:
541
+ * - Attempt tracking and limits
542
+ * - Cost tracking (Requirement 4.2)
543
+ * - Attempt limit enforcement (Requirement 4.3)
544
+ * - Token cap enforcement (Requirement 10.1)
545
+ * - Runtime limit enforcement (Requirement 10.2)
546
+ * - Skill (tool) execution (Requirement 11.2)
547
+ * - Exponential backoff for retries
548
+ *
549
+ * @param config - Agent run configuration
550
+ * @returns AgentRun result
551
+ *
552
+ * @example
553
+ * const result = await runAgent({
554
+ * agent: salesAgent,
555
+ * outcome: qualifiedSalesInterest,
556
+ * lead: leadData,
557
+ * });
558
+ *
559
+ * @see Requirements 4.2, 4.3, 10.1, 10.2, 11.2
560
+ */
561
+ export async function runAgent(config: AgentRunConfig): Promise<AgentRun> {
562
+ const { agent, outcome, lead, apiKey, onKillSignal } = config;
563
+ const startTime = Date.now();
564
+
565
+ // Initialize cost tracker (Requirement 4.2)
566
+ const costTracker = createCostTracker(agent.id, agent.costCeiling);
567
+
568
+ // Initialize run state
569
+ const run: AgentRun = {
570
+ agentId: agent.id,
571
+ outcomeId: outcome.name,
572
+ status: 'running',
573
+ attempts: 0,
574
+ tokensSpent: 0,
575
+ artifacts: [],
576
+ durationMs: 0,
577
+ };
578
+
579
+ // Load skills based on agent's toolAccess configuration (Requirement 11.2)
580
+ const skillRegistry = agent.toolAccess.length > 0
581
+ ? loadSkillsForAgent(agent.toolAccess)
582
+ : undefined;
583
+
584
+ // Create model adapter
585
+ let adapter: ModelAdapter;
586
+ try {
587
+ const resolvedApiKey =
588
+ apiKey ??
589
+ (agent.modelProvider === 'claude'
590
+ ? process.env.ANTHROPIC_API_KEY
591
+ : process.env.OPENAI_API_KEY);
592
+
593
+ if (!resolvedApiKey) {
594
+ throw new Error(
595
+ `Missing API key for ${agent.modelProvider}. Set ${agent.modelProvider === 'claude' ? 'ANTHROPIC_API_KEY' : 'OPENAI_API_KEY'
596
+ } environment variable.`
597
+ );
598
+ }
599
+
600
+ adapter = await createAdapter({
601
+ provider: agent.modelProvider === 'ollama' ? 'openai' : agent.modelProvider,
602
+ modelId: agent.modelId,
603
+ apiKey: resolvedApiKey,
604
+ });
605
+ } catch (error) {
606
+ run.status = 'killed';
607
+ run.error = error instanceof Error ? error.message : String(error);
608
+ run.durationMs = Date.now() - startTime;
609
+ return run;
610
+ }
611
+
612
+ // Build prompt with outcome-specific context
613
+ const prompt = buildPrompt(agent.prompt, outcome, lead);
614
+
615
+ // Main attempt loop
616
+ while (run.status === 'running') {
617
+ // Check kill conditions before each attempt
618
+ const killReason = checkKillConditions(
619
+ run.attempts,
620
+ outcome.maxAttempts,
621
+ costTracker,
622
+ startTime,
623
+ outcome.timeLimitMs,
624
+ onKillSignal
625
+ );
626
+
627
+ if (killReason) {
628
+ run.status = 'killed';
629
+ run.killReason = killReason;
630
+ break;
631
+ }
632
+
633
+ // Execute attempt with retry logic and skill support
634
+ run.attempts += 1;
635
+ const attemptResult = await executeAttemptWithRetry(
636
+ adapter,
637
+ prompt,
638
+ agent.id,
639
+ outcome.name,
640
+ run.attempts,
641
+ lead,
642
+ skillRegistry
643
+ );
644
+
645
+ // Record token usage (Requirement 10.5)
646
+ recordUsage(costTracker, attemptResult.tokensUsed);
647
+ run.tokensSpent = costTracker.tokensSpent;
648
+
649
+ if (attemptResult.success && attemptResult.artifact) {
650
+ run.artifacts.push(attemptResult.artifact);
651
+ // Mark as completed - evaluation happens in league system
652
+ run.status = 'completed';
653
+ } else if (attemptResult.error) {
654
+ // Log error but continue if we have attempts left
655
+ run.error = attemptResult.error;
656
+
657
+ // Check if we should stop due to limits after failed attempt
658
+ const postAttemptKillReason = checkKillConditions(
659
+ run.attempts,
660
+ outcome.maxAttempts,
661
+ costTracker,
662
+ startTime,
663
+ outcome.timeLimitMs,
664
+ onKillSignal
665
+ );
666
+
667
+ if (postAttemptKillReason) {
668
+ run.status = 'killed';
669
+ run.killReason = postAttemptKillReason;
670
+ }
671
+ }
672
+ }
673
+
674
+ run.durationMs = Date.now() - startTime;
675
+ return run;
676
+ }
677
+
678
+ /**
679
+ * Creates a mock agent run for testing without making API calls.
680
+ *
681
+ * @param config - Agent run configuration
682
+ * @returns Mock AgentRun result
683
+ */
684
+ export async function runAgentMock(config: AgentRunConfig): Promise<AgentRun> {
685
+ const { agent, outcome, lead } = config;
686
+ const startTime = Date.now();
687
+
688
+ // Simulate processing time deterministically based on agent ID
689
+ // Use a simple hash of the agent ID to get consistent timing
690
+ let hash = 0;
691
+ for (let i = 0; i < agent.id.length; i++) {
692
+ hash = ((hash << 5) - hash + agent.id.charCodeAt(i)) & 0xffffffff;
693
+ }
694
+ const deterministicDelay = 100 + (Math.abs(hash) % 200);
695
+ await sleep(deterministicDelay);
696
+
697
+ // Create mock artifact content based on outcome
698
+ let mockContent: string;
699
+
700
+ if (outcome.name === 'swarm_planner') {
701
+ mockContent = JSON.stringify({
702
+ tasks: [
703
+ {
704
+ id: 'task-1',
705
+ description: 'Mock task 1: Analyze input data',
706
+ input: { data: lead },
707
+ priority: 1
708
+ },
709
+ {
710
+ id: 'task-2',
711
+ description: 'Mock task 2: Generate response',
712
+ input: { data: lead },
713
+ priority: 2
714
+ }
715
+ ]
716
+ });
717
+ } else {
718
+ mockContent = `Hello! I've analyzed ${lead.company} and their mission.
719
+ Score: 9/10. Fit: Excellent. Evidence: They are a high-growth product company.
720
+ Summary: Strong ICP match with high revenue potential.
721
+ Recommendation: Prioritize for enterprise outreach.
722
+ Based on our conversation, I understand that you're looking for a solution to improve your sales process. With your team of ${lead.companySize} employees, you're at the perfect size to benefit from our platform.
723
+
724
+ I'd love to schedule a demo to show you how we can help. The pricing starts at $99/month for teams your size, and we can discuss next steps during a call.
725
+
726
+ Could you confirm your email at ${lead.email} so I can send over the meeting invite?`;
727
+ }
728
+
729
+ const artifact = createArtifact(
730
+ agent.id,
731
+ outcome.name,
732
+ 1,
733
+ mockContent,
734
+ lead
735
+ );
736
+
737
+ return {
738
+ agentId: agent.id,
739
+ outcomeId: outcome.name,
740
+ status: 'completed',
741
+ attempts: 1,
742
+ tokensSpent: 500,
743
+ artifacts: [artifact],
744
+ durationMs: Date.now() - startTime,
745
+ };
746
+ }