outcome-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/README.md +261 -0
  2. package/package.json +95 -0
  3. package/src/agents/README.md +139 -0
  4. package/src/agents/adapters/anthropic.adapter.ts +166 -0
  5. package/src/agents/adapters/dalle.adapter.ts +145 -0
  6. package/src/agents/adapters/gemini.adapter.ts +134 -0
  7. package/src/agents/adapters/imagen.adapter.ts +106 -0
  8. package/src/agents/adapters/nano-banana.adapter.ts +129 -0
  9. package/src/agents/adapters/openai.adapter.ts +165 -0
  10. package/src/agents/adapters/veo.adapter.ts +130 -0
  11. package/src/agents/agent.schema.property.test.ts +379 -0
  12. package/src/agents/agent.schema.test.ts +148 -0
  13. package/src/agents/agent.schema.ts +263 -0
  14. package/src/agents/index.ts +60 -0
  15. package/src/agents/registered-agent.schema.ts +356 -0
  16. package/src/agents/registry.ts +97 -0
  17. package/src/agents/tournament-configs.property.test.ts +266 -0
  18. package/src/cli/README.md +145 -0
  19. package/src/cli/commands/define.ts +79 -0
  20. package/src/cli/commands/list.ts +46 -0
  21. package/src/cli/commands/logs.ts +83 -0
  22. package/src/cli/commands/run.ts +416 -0
  23. package/src/cli/commands/verify.ts +110 -0
  24. package/src/cli/index.ts +81 -0
  25. package/src/config/README.md +128 -0
  26. package/src/config/env.ts +262 -0
  27. package/src/config/index.ts +19 -0
  28. package/src/eval/README.md +318 -0
  29. package/src/eval/ai-judge.test.ts +435 -0
  30. package/src/eval/ai-judge.ts +368 -0
  31. package/src/eval/code-validators.ts +414 -0
  32. package/src/eval/evaluateOutcome.property.test.ts +1174 -0
  33. package/src/eval/evaluateOutcome.ts +591 -0
  34. package/src/eval/immigration-validators.ts +122 -0
  35. package/src/eval/index.ts +90 -0
  36. package/src/eval/judge-cache.ts +402 -0
  37. package/src/eval/tournament-validators.property.test.ts +439 -0
  38. package/src/eval/validators.property.test.ts +1118 -0
  39. package/src/eval/validators.ts +1199 -0
  40. package/src/eval/weighted-scorer.ts +285 -0
  41. package/src/index.ts +17 -0
  42. package/src/league/README.md +188 -0
  43. package/src/league/health-check.ts +353 -0
  44. package/src/league/index.ts +93 -0
  45. package/src/league/killAgent.ts +151 -0
  46. package/src/league/league.test.ts +1151 -0
  47. package/src/league/runLeague.ts +843 -0
  48. package/src/league/scoreAgent.ts +175 -0
  49. package/src/modules/omnibridge/__tests__/.gitkeep +1 -0
  50. package/src/modules/omnibridge/__tests__/auth-tunnel.property.test.ts +524 -0
  51. package/src/modules/omnibridge/__tests__/deterministic-logger.property.test.ts +965 -0
  52. package/src/modules/omnibridge/__tests__/ghost-api.property.test.ts +461 -0
  53. package/src/modules/omnibridge/__tests__/omnibridge-integration.test.ts +542 -0
  54. package/src/modules/omnibridge/__tests__/parallel-executor.property.test.ts +671 -0
  55. package/src/modules/omnibridge/__tests__/semantic-normalizer.property.test.ts +521 -0
  56. package/src/modules/omnibridge/__tests__/semantic-normalizer.test.ts +254 -0
  57. package/src/modules/omnibridge/__tests__/session-vault.property.test.ts +367 -0
  58. package/src/modules/omnibridge/__tests__/shadow-session.property.test.ts +523 -0
  59. package/src/modules/omnibridge/__tests__/triangulation-engine.property.test.ts +292 -0
  60. package/src/modules/omnibridge/__tests__/verification-engine.property.test.ts +769 -0
  61. package/src/modules/omnibridge/api/.gitkeep +1 -0
  62. package/src/modules/omnibridge/api/ghost-api.ts +1087 -0
  63. package/src/modules/omnibridge/auth/.gitkeep +1 -0
  64. package/src/modules/omnibridge/auth/auth-tunnel.ts +843 -0
  65. package/src/modules/omnibridge/auth/session-vault.ts +577 -0
  66. package/src/modules/omnibridge/core/.gitkeep +1 -0
  67. package/src/modules/omnibridge/core/semantic-normalizer.ts +702 -0
  68. package/src/modules/omnibridge/core/triangulation-engine.ts +530 -0
  69. package/src/modules/omnibridge/core/types.ts +610 -0
  70. package/src/modules/omnibridge/execution/.gitkeep +1 -0
  71. package/src/modules/omnibridge/execution/deterministic-logger.ts +629 -0
  72. package/src/modules/omnibridge/execution/parallel-executor.ts +542 -0
  73. package/src/modules/omnibridge/execution/shadow-session.ts +794 -0
  74. package/src/modules/omnibridge/index.ts +212 -0
  75. package/src/modules/omnibridge/omnibridge.ts +510 -0
  76. package/src/modules/omnibridge/verification/.gitkeep +1 -0
  77. package/src/modules/omnibridge/verification/verification-engine.ts +783 -0
  78. package/src/outcomes/README.md +75 -0
  79. package/src/outcomes/acquire-pilot-customer.ts +297 -0
  80. package/src/outcomes/code-delivery-outcomes.ts +89 -0
  81. package/src/outcomes/code-outcomes.ts +256 -0
  82. package/src/outcomes/code_review_battle.test.ts +135 -0
  83. package/src/outcomes/code_review_battle.ts +135 -0
  84. package/src/outcomes/cold_email_battle.ts +97 -0
  85. package/src/outcomes/content_creation_battle.ts +160 -0
  86. package/src/outcomes/f1_stem_opt_compliance.ts +61 -0
  87. package/src/outcomes/index.ts +107 -0
  88. package/src/outcomes/lead_gen_battle.test.ts +113 -0
  89. package/src/outcomes/lead_gen_battle.ts +99 -0
  90. package/src/outcomes/outcome.schema.property.test.ts +229 -0
  91. package/src/outcomes/outcome.schema.ts +187 -0
  92. package/src/outcomes/qualified_sales_interest.ts +118 -0
  93. package/src/outcomes/swarm_planner.property.test.ts +370 -0
  94. package/src/outcomes/swarm_planner.ts +96 -0
  95. package/src/outcomes/web_extraction.ts +234 -0
  96. package/src/runtime/README.md +220 -0
  97. package/src/runtime/agentRunner.test.ts +341 -0
  98. package/src/runtime/agentRunner.ts +746 -0
  99. package/src/runtime/claudeAdapter.ts +232 -0
  100. package/src/runtime/costTracker.ts +123 -0
  101. package/src/runtime/index.ts +34 -0
  102. package/src/runtime/modelAdapter.property.test.ts +305 -0
  103. package/src/runtime/modelAdapter.ts +144 -0
  104. package/src/runtime/openaiAdapter.ts +235 -0
  105. package/src/utils/README.md +122 -0
  106. package/src/utils/command-runner.ts +134 -0
  107. package/src/utils/cost-guard.ts +379 -0
  108. package/src/utils/errors.test.ts +290 -0
  109. package/src/utils/errors.ts +442 -0
  110. package/src/utils/index.ts +37 -0
  111. package/src/utils/logger.test.ts +361 -0
  112. package/src/utils/logger.ts +419 -0
  113. package/src/utils/output-parsers.ts +216 -0
@@ -0,0 +1,285 @@
1
+ /**
2
+ * Weighted Scoring System
3
+ *
4
+ * Implements granular scoring with weighted criteria instead of binary evaluation.
5
+ * Agents are rewarded for partial success based on criterion importance.
6
+ *
7
+ * @module eval/weighted-scorer
8
+ * @see Requirements 9.1, 9.2, 9.3, 9.4
9
+ */
10
+
11
+ /**
12
+ * Result of a weighted validation operation.
13
+ *
14
+ * Unlike binary validation, this includes a numeric score (0.0 to 1.0)
15
+ * representing partial success.
16
+ *
17
+ * @see Requirements 9.1
18
+ */
19
+ export interface WeightedValidationResult {
20
+ /** Whether the criterion passed (score >= threshold) */
21
+ success: boolean;
22
+ /** Numeric score from 0.0 to 1.0 */
23
+ score: number;
24
+ /** Human-readable reason for the result */
25
+ reason: string;
26
+ /** Optional additional details */
27
+ details?: Record<string, unknown>;
28
+ }
29
+
30
+ // DAWS presets for code generation (Outcome pivot)
31
+ // GCR: tests pass, QAS: AI review, CEF: cost efficiency, DTT: turnaround, RES: resilience
32
+ export const DAWS_CODE_WEIGHTS = {
33
+ GCR: 0.35,
34
+ QAS: 0.25,
35
+ CEF: 0.15,
36
+ DTT: 0.15,
37
+ RES: 0.10,
38
+ } as const;
39
+
40
+ export type DawsMetric = keyof typeof DAWS_CODE_WEIGHTS;
41
+
42
+ export interface DawsScores {
43
+ GCR: number; // tests / correctness
44
+ QAS: number; // AI review score (0..1)
45
+ CEF: number; // cost efficiency normalized 0..1
46
+ DTT: number; // speed normalized 0..1
47
+ RES: number; // resilience / retries normalized 0..1
48
+ }
49
+
50
+ /**
51
+ * Calculates DAWS weighted score for code-gen tasks using the code defaults.
52
+ */
53
+ export function calculateDawsCodeScore(scores: DawsScores): ScoringResult {
54
+ const criteria: WeightedCriterion[] = [
55
+ { name: 'GCR', weight: DAWS_CODE_WEIGHTS.GCR, validator: () => ({ success: scores.GCR >= 1, score: scores.GCR, reason: 'Goal completion (tests)' }) },
56
+ { name: 'QAS', weight: DAWS_CODE_WEIGHTS.QAS, validator: () => ({ success: scores.QAS >= 0.7, score: scores.QAS, reason: 'Quality alignment' }) },
57
+ { name: 'CEF', weight: DAWS_CODE_WEIGHTS.CEF, validator: () => ({ success: scores.CEF >= 0.5, score: scores.CEF, reason: 'Cost efficiency' }) },
58
+ { name: 'DTT', weight: DAWS_CODE_WEIGHTS.DTT, validator: () => ({ success: scores.DTT >= 0.5, score: scores.DTT, reason: 'Decision turnaround time' }) },
59
+ { name: 'RES', weight: DAWS_CODE_WEIGHTS.RES, validator: () => ({ success: scores.RES >= 0.5, score: scores.RES, reason: 'Resilience' }) },
60
+ ];
61
+
62
+ return calculateWeightedScore(scores, criteria);
63
+ }
64
+
65
+ /**
66
+ * A weighted criterion for evaluation.
67
+ *
68
+ * Each criterion has a weight representing its importance in the final score.
69
+ * The sum of all weights should equal 1.0.
70
+ *
71
+ * @see Requirements 9.1
72
+ */
73
+ export interface WeightedCriterion {
74
+ /** Name of the criterion */
75
+ name: string;
76
+ /** Weight of this criterion (0.0 to 1.0, sum of all weights should be 1.0) */
77
+ weight: number;
78
+ /** Validator function that returns a WeightedValidationResult */
79
+ validator: (artifact: unknown) => WeightedValidationResult;
80
+ }
81
+
82
+ /**
83
+ * Result of a single criterion evaluation.
84
+ */
85
+ export interface CriterionEvaluationResult {
86
+ /** Name of the criterion */
87
+ name: string;
88
+ /** Weight of this criterion */
89
+ weight: number;
90
+ /** Score achieved (0.0 to 1.0) */
91
+ score: number;
92
+ /** Human-readable reason */
93
+ reason: string;
94
+ }
95
+
96
+ /**
97
+ * Final scoring result after evaluating all criteria.
98
+ *
99
+ * @see Requirements 9.2
100
+ */
101
+ export interface ScoringResult {
102
+ /** Final weighted score (0.0 to 1.0) */
103
+ finalScore: number;
104
+ /** Whether the artifact passed (finalScore >= passThreshold) */
105
+ passed: boolean;
106
+ /** Results for each individual criterion */
107
+ criteriaResults: CriterionEvaluationResult[];
108
+ }
109
+
110
+ /**
111
+ * Leaderboard entry with weighted scoring fields.
112
+ *
113
+ * @see Requirements 9.3, 9.4
114
+ */
115
+ export interface WeightedLeaderboardEntry {
116
+ /** Rank position (1-indexed) */
117
+ rank: number;
118
+ /** Agent identifier */
119
+ agentId: string;
120
+ /** Agent display name */
121
+ agentName: string;
122
+ /** Owner user ID */
123
+ userId: string;
124
+ /** Cumulative weighted score across all battles */
125
+ cumulativeScore: number;
126
+ /** Total tokens used across all battles */
127
+ totalTokensUsed: number;
128
+ /** Efficiency: score per token (used as tiebreaker) */
129
+ efficiency: number;
130
+ /** Number of battles participated */
131
+ battlesCount: number;
132
+ /** Total earnings in USD */
133
+ totalEarnings: number;
134
+ }
135
+
136
+ /**
137
+ * Validates that weights sum to approximately 1.0.
138
+ *
139
+ * @param criteria - Array of weighted criteria
140
+ * @returns true if weights sum to 1.0 (within floating point tolerance)
141
+ */
142
+ export function validateWeights(criteria: WeightedCriterion[]): boolean {
143
+ const sum = criteria.reduce((acc, c) => acc + c.weight, 0);
144
+ return Math.abs(sum - 1.0) < 0.0001;
145
+ }
146
+
147
+ /**
148
+ * Calculates the weighted score for an artifact against a set of criteria.
149
+ *
150
+ * The final score is computed as the weighted average of all criterion scores:
151
+ * finalScore = Σ(weight_i × score_i)
152
+ *
153
+ * @param artifact - The artifact to evaluate
154
+ * @param criteria - Array of weighted criteria to evaluate against
155
+ * @param passThreshold - Minimum score to pass (default 0.7)
156
+ * @returns ScoringResult with final score and individual criterion results
157
+ *
158
+ * @example
159
+ * const result = calculateWeightedScore(artifact, [
160
+ * { name: 'accuracy', weight: 0.4, validator: validateAccuracy },
161
+ * { name: 'completeness', weight: 0.3, validator: validateCompleteness },
162
+ * { name: 'format', weight: 0.3, validator: validateFormat },
163
+ * ]);
164
+ *
165
+ * @see Requirements 9.2
166
+ */
167
+ export function calculateWeightedScore(
168
+ artifact: unknown,
169
+ criteria: WeightedCriterion[],
170
+ passThreshold: number = 0.7
171
+ ): ScoringResult {
172
+ // Evaluate each criterion
173
+ const criteriaResults: CriterionEvaluationResult[] = criteria.map((criterion) => {
174
+ try {
175
+ const result = criterion.validator(artifact);
176
+ return {
177
+ name: criterion.name,
178
+ weight: criterion.weight,
179
+ score: Math.max(0, Math.min(1, result.score)), // Clamp to [0, 1]
180
+ reason: result.reason,
181
+ };
182
+ } catch (error) {
183
+ // Fail closed on validator errors
184
+ const errorMessage = error instanceof Error ? error.message : 'Unknown error';
185
+ return {
186
+ name: criterion.name,
187
+ weight: criterion.weight,
188
+ score: 0,
189
+ reason: `Validator error: ${errorMessage}`,
190
+ };
191
+ }
192
+ });
193
+
194
+ // Calculate weighted average
195
+ const finalScore = criteriaResults.reduce(
196
+ (sum, result) => sum + result.weight * result.score,
197
+ 0
198
+ );
199
+
200
+ return {
201
+ finalScore,
202
+ passed: finalScore >= passThreshold,
203
+ criteriaResults,
204
+ };
205
+ }
206
+
207
+ /**
208
+ * Calculates efficiency (score per token) for tiebreaker purposes.
209
+ *
210
+ * @param score - The weighted score achieved
211
+ * @param tokensUsed - Number of tokens used
212
+ * @returns Efficiency ratio (higher is better)
213
+ *
214
+ * @see Requirements 9.4
215
+ */
216
+ export function calculateEfficiency(score: number, tokensUsed: number): number {
217
+ if (tokensUsed <= 0) {
218
+ return score > 0 ? Infinity : 0;
219
+ }
220
+ return score / tokensUsed;
221
+ }
222
+
223
+ /**
224
+ * Ranks leaderboard entries by cumulative weighted score.
225
+ *
226
+ * Primary sort: cumulative score (descending)
227
+ * Tiebreaker: efficiency (score per token, descending)
228
+ *
229
+ * @param entries - Array of leaderboard entries to rank
230
+ * @returns Sorted array with updated rank positions
231
+ *
232
+ * @see Requirements 9.3, 9.4
233
+ */
234
+ export function rankLeaderboardEntries(
235
+ entries: WeightedLeaderboardEntry[]
236
+ ): WeightedLeaderboardEntry[] {
237
+ // Sort by cumulative score (descending), then by efficiency (descending) for ties
238
+ const sorted = [...entries].sort((a, b) => {
239
+ // Primary: cumulative score
240
+ const scoreDiff = b.cumulativeScore - a.cumulativeScore;
241
+ if (Math.abs(scoreDiff) > 0.0001) {
242
+ return scoreDiff;
243
+ }
244
+ // Tiebreaker: efficiency (score per token)
245
+ return b.efficiency - a.efficiency;
246
+ });
247
+
248
+ // Update ranks
249
+ return sorted.map((entry, index) => ({
250
+ ...entry,
251
+ rank: index + 1,
252
+ }));
253
+ }
254
+
255
+ /**
256
+ * Creates a leaderboard entry from battle results.
257
+ *
258
+ * @param agentId - Agent identifier
259
+ * @param agentName - Agent display name
260
+ * @param userId - Owner user ID
261
+ * @param battleResults - Array of battle results with scores and tokens
262
+ * @returns WeightedLeaderboardEntry with calculated fields
263
+ */
264
+ export function createLeaderboardEntry(
265
+ agentId: string,
266
+ agentName: string,
267
+ userId: string,
268
+ battleResults: Array<{ score: number; tokensUsed: number; earnings: number }>
269
+ ): WeightedLeaderboardEntry {
270
+ const cumulativeScore = battleResults.reduce((sum, r) => sum + r.score, 0);
271
+ const totalTokensUsed = battleResults.reduce((sum, r) => sum + r.tokensUsed, 0);
272
+ const totalEarnings = battleResults.reduce((sum, r) => sum + r.earnings, 0);
273
+
274
+ return {
275
+ rank: 0, // Will be set by rankLeaderboardEntries
276
+ agentId,
277
+ agentName,
278
+ userId,
279
+ cumulativeScore,
280
+ totalTokensUsed,
281
+ efficiency: calculateEfficiency(cumulativeScore, totalTokensUsed),
282
+ battlesCount: battleResults.length,
283
+ totalEarnings,
284
+ };
285
+ }
package/src/index.ts ADDED
@@ -0,0 +1,17 @@
1
+ /**
2
+ * Earnd Bounty Engine
3
+ *
4
+ * Outcome-based AI agent competition system where business outcomes are defined as code,
5
+ * multiple agents compete to achieve outcomes, success is deterministically evaluated,
6
+ * and payment is only possible when success criteria are met.
7
+ */
8
+
9
+ export const VERSION = '1.0.0';
10
+
11
+ // Re-export modules with explicit naming to avoid conflicts
12
+ export * as outcomes from './outcomes/index.js';
13
+ export * as agents from './agents/index.js';
14
+ export * as eval from './eval/index.js';
15
+ export * as jobs from './jobs/index.js';
16
+ export * as runtime from './runtime/index.js';
17
+ export * as utils from './utils/index.js';
@@ -0,0 +1,188 @@
1
+ # League Module
2
+
3
+ The league module implements the parallel agent competition system where N agents compete to achieve an outcome. The first agent to succeed wins the bounty.
4
+
5
+ ## Components
6
+
7
+ ### League Runner (`runLeague.ts`)
8
+
9
+ Main orchestration for parallel agent execution.
10
+
11
+ **Key Interfaces:**
12
+
13
+ - `LeagueConfig` - Configuration for a league run (outcomeId, agentCount, globalSpendCeiling, etc.)
14
+ - `LeagueResult` - Result of a league run (winnerId, agents, totalCost, duration)
15
+ - `AgentResult` - Result for individual agent (status, killReason, tokensSpent, evaluationResult)
16
+
17
+ **Key Functions:**
18
+
19
+ - `runLeague(config)` - Runs N agents in parallel competing for an outcome
20
+ - `runLeagueMock(config)` - Runs league in mock mode without real API calls
21
+
22
+ **Usage:**
23
+
24
+ ```typescript
25
+ import { runLeague, runLeagueMock } from './runLeague.js';
26
+
27
+ const result = await runLeague({
28
+ outcomeId: 'qualified_sales_interest',
29
+ agentCount: 3,
30
+ globalSpendCeiling: 50000,
31
+ agentConfigs: [agent1, agent2, agent3],
32
+ outcome: qualifiedSalesInterest,
33
+ lead: leadData,
34
+ apiKey: process.env.ANTHROPIC_API_KEY,
35
+ });
36
+
37
+ if (result.winnerId) {
38
+ console.log(`Winner: ${result.winnerId}`);
39
+ console.log(`Total cost: ${result.totalCost} tokens`);
40
+ }
41
+ ```
42
+
43
+ ### Kill Agent (`killAgent.ts`)
44
+
45
+ Agent termination logic based on limits.
46
+
47
+ **Key Functions:**
48
+
49
+ - `shouldKillAgent(agent, limits)` - Checks if agent should be terminated
50
+ - `killAgent(agentId, reason)` - Terminates an agent
51
+ - `checkAllAgents(agents, limits)` - Checks all agents for termination
52
+
53
+ **Kill Reasons:**
54
+
55
+ - `cost_exceeded` - Agent exceeded token ceiling
56
+ - `attempts_exceeded` - Agent exceeded max attempts
57
+ - `timeout` - Agent exceeded runtime limit
58
+ - `competitor_won` - Another agent achieved success first
59
+
60
+ ### Score Agent (`scoreAgent.ts`)
61
+
62
+ Agent scoring and winner determination.
63
+
64
+ **Key Functions:**
65
+
66
+ - `scoreAgent(result, metrics)` - Creates a score for an agent
67
+ - `determineWinner(scores)` - Finds the winning agent
68
+ - `rankAgents(scores)` - Ranks all agents by performance
69
+ - `calculateLeagueStats(scores)` - Calculates aggregate statistics
70
+
71
+ ## League Execution Flow
72
+
73
+ ```text
74
+ 1. Start N agents in parallel
75
+ 2. Each agent attempts to achieve the outcome
76
+ 3. First agent to get SUCCESS evaluation wins
77
+ 4. All other agents are terminated with 'competitor_won'
78
+ 5. If no agent succeeds, league ends with no winner
79
+ 6. Global spend ceiling terminates all agents if exceeded
80
+ ```
81
+
82
+ ## Requirements Reference
83
+
84
+ - **4.1** - Spin up N agents in parallel
85
+ - **4.3** - Terminate agent on attempt limit exceeded
86
+ - **4.4** - Terminate agent on cost ceiling exceeded
87
+ - **4.5** - Promote winning agent and halt others
88
+ - **10.2** - Enforce max runtime per agent
89
+ - **10.3** - Enforce global spend ceiling
90
+
91
+ ### Multi-Step Orchestrator (`multi-step-orchestrator.ts`)
92
+
93
+ Manages multi-step bounty execution with sequential dependent tasks.
94
+
95
+ **Key Interfaces:**
96
+
97
+ - `TaskNode` - A single task in a multi-step bounty (id, name, description, dependencies, validator)
98
+ - `MultiStepBounty` - Bounty definition with task graph (tasks, finalTaskId, payoutAmount)
99
+ - `TaskContext` - Context passed to agent for task execution (task, dependencyOutputs, bounty info)
100
+ - `TaskExecutionResult` - Result of a single task (success, score, output, tokensUsed)
101
+ - `MultiStepResult` - Overall bounty execution result (taskResults, skippedTaskIds, overallScore)
102
+ - `MultiStepAgent` - Agent interface for multi-step execution
103
+
104
+ **Error Classes:**
105
+
106
+ - `CyclicDependencyError` - Thrown when task graph contains cycles
107
+ - `InvalidDependencyError` - Thrown when task references non-existent dependency
108
+ - `InvalidFinalTaskError` - Thrown when final task ID doesn't exist
109
+ - `TaskExecutionError` - Thrown when a task fails during execution
110
+
111
+ **Usage:**
112
+
113
+ ```typescript
114
+ import {
115
+ MultiStepBounty,
116
+ TaskNode,
117
+ CyclicDependencyError
118
+ } from './multi-step-orchestrator.js';
119
+
120
+ const bounty: MultiStepBounty = {
121
+ id: 'research-bounty',
122
+ name: 'Market Research',
123
+ description: 'Complete market research workflow',
124
+ tasks: [
125
+ { id: 'gather', name: 'Gather Data', dependencies: [], ... },
126
+ { id: 'analyze', name: 'Analyze Data', dependencies: ['gather'], ... },
127
+ { id: 'report', name: 'Generate Report', dependencies: ['analyze'], ... },
128
+ ],
129
+ finalTaskId: 'report',
130
+ payoutAmount: 500,
131
+ };
132
+ ```
133
+
134
+ ### Team Coordinator (`team-coordinator.ts`)
135
+
136
+ Manages team battles where multiple agents collaborate on a bounty.
137
+
138
+ **Key Interfaces:**
139
+
140
+ - `TeamConfig` - Team configuration (teamId, memberIds, sharedStateEnabled)
141
+ - `TeamState` - Shared state with optimistic locking (data, version, lastModifiedBy)
142
+ - `StateUpdateResult` - Result of state update operation
143
+ - `MemberContribution` - Contribution metrics per team member
144
+ - `TeamPayoutDistribution` - Payout distribution across team members
145
+ - `TeamStateChangeEvent` - Event emitted on state changes
146
+
147
+ **Error Classes:**
148
+
149
+ - `StateConflictError` - Thrown when optimistic locking detects a conflict
150
+ - `TeamNotFoundError` - Thrown when team doesn't exist
151
+ - `NotTeamMemberError` - Thrown when agent isn't a team member
152
+
153
+ **Usage:**
154
+
155
+ ```typescript
156
+ import {
157
+ TeamConfig,
158
+ TeamState,
159
+ StateConflictError
160
+ } from './team-coordinator.js';
161
+
162
+ const team: TeamConfig = {
163
+ teamId: 'team-alpha',
164
+ memberIds: ['agent-1', 'agent-2', 'agent-3'],
165
+ sharedStateEnabled: true,
166
+ };
167
+
168
+ // Shared state uses optimistic locking
169
+ const state: TeamState = {
170
+ data: { researchNotes: [], completedTasks: [] },
171
+ version: 1,
172
+ lastModifiedBy: 'agent-1',
173
+ lastModifiedAt: new Date(),
174
+ };
175
+ ```
176
+
177
+ ## Design Principles
178
+
179
+ 1. **Parallel Execution** - All agents run simultaneously
180
+ 2. **First Win** - First successful agent wins, no ties
181
+ 3. **Fail Closed** - Limit violations terminate without payout
182
+ 4. **Isolation** - Each agent tracks costs independently (solo battles)
183
+ 5. **Observable** - All terminations and results are logged
184
+ 6. **Team Coordination** - Shared state with optimistic locking for team battles
185
+ 7. **Fair Distribution** - Contribution-based payout distribution for teams
186
+ 8. **DAG Validation** - Multi-step bounties validate task graphs for cycles
187
+ 9. **Output Propagation** - Task outputs flow to dependent tasks automatically
188
+ 10. **Partial Completion** - Multi-step bounties track which tasks completed on failure