outcome-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/README.md +261 -0
  2. package/package.json +95 -0
  3. package/src/agents/README.md +139 -0
  4. package/src/agents/adapters/anthropic.adapter.ts +166 -0
  5. package/src/agents/adapters/dalle.adapter.ts +145 -0
  6. package/src/agents/adapters/gemini.adapter.ts +134 -0
  7. package/src/agents/adapters/imagen.adapter.ts +106 -0
  8. package/src/agents/adapters/nano-banana.adapter.ts +129 -0
  9. package/src/agents/adapters/openai.adapter.ts +165 -0
  10. package/src/agents/adapters/veo.adapter.ts +130 -0
  11. package/src/agents/agent.schema.property.test.ts +379 -0
  12. package/src/agents/agent.schema.test.ts +148 -0
  13. package/src/agents/agent.schema.ts +263 -0
  14. package/src/agents/index.ts +60 -0
  15. package/src/agents/registered-agent.schema.ts +356 -0
  16. package/src/agents/registry.ts +97 -0
  17. package/src/agents/tournament-configs.property.test.ts +266 -0
  18. package/src/cli/README.md +145 -0
  19. package/src/cli/commands/define.ts +79 -0
  20. package/src/cli/commands/list.ts +46 -0
  21. package/src/cli/commands/logs.ts +83 -0
  22. package/src/cli/commands/run.ts +416 -0
  23. package/src/cli/commands/verify.ts +110 -0
  24. package/src/cli/index.ts +81 -0
  25. package/src/config/README.md +128 -0
  26. package/src/config/env.ts +262 -0
  27. package/src/config/index.ts +19 -0
  28. package/src/eval/README.md +318 -0
  29. package/src/eval/ai-judge.test.ts +435 -0
  30. package/src/eval/ai-judge.ts +368 -0
  31. package/src/eval/code-validators.ts +414 -0
  32. package/src/eval/evaluateOutcome.property.test.ts +1174 -0
  33. package/src/eval/evaluateOutcome.ts +591 -0
  34. package/src/eval/immigration-validators.ts +122 -0
  35. package/src/eval/index.ts +90 -0
  36. package/src/eval/judge-cache.ts +402 -0
  37. package/src/eval/tournament-validators.property.test.ts +439 -0
  38. package/src/eval/validators.property.test.ts +1118 -0
  39. package/src/eval/validators.ts +1199 -0
  40. package/src/eval/weighted-scorer.ts +285 -0
  41. package/src/index.ts +17 -0
  42. package/src/league/README.md +188 -0
  43. package/src/league/health-check.ts +353 -0
  44. package/src/league/index.ts +93 -0
  45. package/src/league/killAgent.ts +151 -0
  46. package/src/league/league.test.ts +1151 -0
  47. package/src/league/runLeague.ts +843 -0
  48. package/src/league/scoreAgent.ts +175 -0
  49. package/src/modules/omnibridge/__tests__/.gitkeep +1 -0
  50. package/src/modules/omnibridge/__tests__/auth-tunnel.property.test.ts +524 -0
  51. package/src/modules/omnibridge/__tests__/deterministic-logger.property.test.ts +965 -0
  52. package/src/modules/omnibridge/__tests__/ghost-api.property.test.ts +461 -0
  53. package/src/modules/omnibridge/__tests__/omnibridge-integration.test.ts +542 -0
  54. package/src/modules/omnibridge/__tests__/parallel-executor.property.test.ts +671 -0
  55. package/src/modules/omnibridge/__tests__/semantic-normalizer.property.test.ts +521 -0
  56. package/src/modules/omnibridge/__tests__/semantic-normalizer.test.ts +254 -0
  57. package/src/modules/omnibridge/__tests__/session-vault.property.test.ts +367 -0
  58. package/src/modules/omnibridge/__tests__/shadow-session.property.test.ts +523 -0
  59. package/src/modules/omnibridge/__tests__/triangulation-engine.property.test.ts +292 -0
  60. package/src/modules/omnibridge/__tests__/verification-engine.property.test.ts +769 -0
  61. package/src/modules/omnibridge/api/.gitkeep +1 -0
  62. package/src/modules/omnibridge/api/ghost-api.ts +1087 -0
  63. package/src/modules/omnibridge/auth/.gitkeep +1 -0
  64. package/src/modules/omnibridge/auth/auth-tunnel.ts +843 -0
  65. package/src/modules/omnibridge/auth/session-vault.ts +577 -0
  66. package/src/modules/omnibridge/core/.gitkeep +1 -0
  67. package/src/modules/omnibridge/core/semantic-normalizer.ts +702 -0
  68. package/src/modules/omnibridge/core/triangulation-engine.ts +530 -0
  69. package/src/modules/omnibridge/core/types.ts +610 -0
  70. package/src/modules/omnibridge/execution/.gitkeep +1 -0
  71. package/src/modules/omnibridge/execution/deterministic-logger.ts +629 -0
  72. package/src/modules/omnibridge/execution/parallel-executor.ts +542 -0
  73. package/src/modules/omnibridge/execution/shadow-session.ts +794 -0
  74. package/src/modules/omnibridge/index.ts +212 -0
  75. package/src/modules/omnibridge/omnibridge.ts +510 -0
  76. package/src/modules/omnibridge/verification/.gitkeep +1 -0
  77. package/src/modules/omnibridge/verification/verification-engine.ts +783 -0
  78. package/src/outcomes/README.md +75 -0
  79. package/src/outcomes/acquire-pilot-customer.ts +297 -0
  80. package/src/outcomes/code-delivery-outcomes.ts +89 -0
  81. package/src/outcomes/code-outcomes.ts +256 -0
  82. package/src/outcomes/code_review_battle.test.ts +135 -0
  83. package/src/outcomes/code_review_battle.ts +135 -0
  84. package/src/outcomes/cold_email_battle.ts +97 -0
  85. package/src/outcomes/content_creation_battle.ts +160 -0
  86. package/src/outcomes/f1_stem_opt_compliance.ts +61 -0
  87. package/src/outcomes/index.ts +107 -0
  88. package/src/outcomes/lead_gen_battle.test.ts +113 -0
  89. package/src/outcomes/lead_gen_battle.ts +99 -0
  90. package/src/outcomes/outcome.schema.property.test.ts +229 -0
  91. package/src/outcomes/outcome.schema.ts +187 -0
  92. package/src/outcomes/qualified_sales_interest.ts +118 -0
  93. package/src/outcomes/swarm_planner.property.test.ts +370 -0
  94. package/src/outcomes/swarm_planner.ts +96 -0
  95. package/src/outcomes/web_extraction.ts +234 -0
  96. package/src/runtime/README.md +220 -0
  97. package/src/runtime/agentRunner.test.ts +341 -0
  98. package/src/runtime/agentRunner.ts +746 -0
  99. package/src/runtime/claudeAdapter.ts +232 -0
  100. package/src/runtime/costTracker.ts +123 -0
  101. package/src/runtime/index.ts +34 -0
  102. package/src/runtime/modelAdapter.property.test.ts +305 -0
  103. package/src/runtime/modelAdapter.ts +144 -0
  104. package/src/runtime/openaiAdapter.ts +235 -0
  105. package/src/utils/README.md +122 -0
  106. package/src/utils/command-runner.ts +134 -0
  107. package/src/utils/cost-guard.ts +379 -0
  108. package/src/utils/errors.test.ts +290 -0
  109. package/src/utils/errors.ts +442 -0
  110. package/src/utils/index.ts +37 -0
  111. package/src/utils/logger.test.ts +361 -0
  112. package/src/utils/logger.ts +419 -0
  113. package/src/utils/output-parsers.ts +216 -0
@@ -0,0 +1,671 @@
1
+ /**
2
+ * Parallel Executor Property Tests
3
+ *
4
+ * Property-based tests for competition orchestration, identical starting states,
5
+ * and crash isolation.
6
+ *
7
+ * Requirements: 7.2, 7.6
8
+ */
9
+
10
+ import { describe, test, expect, beforeEach, afterEach } from 'vitest';
11
+ import * as fc from 'fast-check';
12
+ import {
13
+ ParallelExecutor,
14
+ createParallelExecutor,
15
+ type CompetitionConfig,
16
+ type AgentExecutionTask,
17
+ type AgentExecutionResult,
18
+ } from '../execution/parallel-executor.js';
19
+ import { createSessionVault } from '../auth/session-vault.js';
20
+ import { createShadowSessionOrchestrator } from '../execution/shadow-session.js';
21
+ import type { SerializedSession, EncryptedBlob } from '../core/types.js';
22
+
23
+ // =============================================================================
24
+ // Test Setup
25
+ // =============================================================================
26
+
27
+ let executor: ParallelExecutor;
28
+
29
+ beforeEach(() => {
30
+ const vault = createSessionVault();
31
+ const orchestrator = createShadowSessionOrchestrator({
32
+ vault,
33
+ heartbeatIntervalMs: 60000,
34
+ sessionTimeoutMs: 3600000,
35
+ });
36
+
37
+ executor = createParallelExecutor({
38
+ sessionOrchestrator: orchestrator,
39
+ sessionVault: vault,
40
+ maxConcurrentCompetitions: 10,
41
+ defaultTimeoutMs: 5000,
42
+ });
43
+ });
44
+
45
+ afterEach(async () => {
46
+ await executor.clear();
47
+ });
48
+
49
+ // =============================================================================
50
+ // Arbitraries
51
+ // =============================================================================
52
+
53
+ /**
54
+ * Generate arbitrary bounty IDs.
55
+ */
56
+ const bountyIdArbitrary = fc.stringMatching(/^bounty_[a-z0-9]{8,16}$/);
57
+
58
+ /**
59
+ * Generate arbitrary domain names.
60
+ */
61
+ const domainArbitrary = fc.stringMatching(/^[a-z][a-z0-9-]{2,20}\.(com|org|net|io)$/);
62
+
63
+ /**
64
+ * Generate arbitrary agent counts (reasonable range for testing).
65
+ */
66
+ const agentCountArbitrary = fc.integer({ min: 2, max: 10 });
67
+
68
+ /**
69
+ * Generate arbitrary agent IDs.
70
+ */
71
+ const agentIdArbitrary = fc.stringMatching(/^agent_[a-z0-9]{8,16}$/);
72
+
73
+ /**
74
+ * Generate arbitrary session data.
75
+ */
76
+ const sessionDataArbitrary = fc.record({
77
+ cookies: fc.string({ minLength: 0, maxLength: 100 }),
78
+ localStorage: fc.string({ minLength: 0, maxLength: 100 }),
79
+ sessionStorage: fc.string({ minLength: 0, maxLength: 100 }),
80
+ });
81
+
82
+ // =============================================================================
83
+ // Property 15: Identical Starting States
84
+ // =============================================================================
85
+
86
+ describe('Property 15: Identical Starting States', () => {
87
+ /**
88
+ * **Feature: omnibridge, Property 15: Identical Starting States**
89
+ *
90
+ * *For any* Competition with N agents, all N Shadow_Sessions SHALL have
91
+ * byte-identical `startState` serializations.
92
+ *
93
+ * **Validates: Requirements 7.2**
94
+ */
95
+ test(
96
+ 'Property 15: all agents in a competition have byte-identical starting states',
97
+ async () => {
98
+ await fc.assert(
99
+ fc.asyncProperty(
100
+ bountyIdArbitrary,
101
+ domainArbitrary,
102
+ agentCountArbitrary,
103
+ async (bountyId, domain, agentCount) => {
104
+ const config: CompetitionConfig = {
105
+ bountyId,
106
+ agentCount,
107
+ targetDomain: domain,
108
+ };
109
+
110
+ const result = await executor.createCompetition(config);
111
+
112
+ expect(result.success).toBe(true);
113
+ expect(result.competition).toBeDefined();
114
+
115
+ const competition = result.competition!;
116
+
117
+ // Verify we have the correct number of sessions
118
+ expect(competition.sessions.length).toBe(agentCount);
119
+
120
+ // Get the serialized start state
121
+ const startState = competition.startState;
122
+ const startStateStr = JSON.stringify(startState);
123
+
124
+ // Verify all sessions would start with identical state
125
+ // The startState is shared across all sessions
126
+ for (let i = 0; i < competition.sessions.length; i++) {
127
+ // Each session should reference the same startState
128
+ const sessionStartState = executor.serializeStartState(startState);
129
+ const sessionStartStateStr = JSON.stringify(sessionStartState);
130
+
131
+ // Byte-identical check
132
+ expect(sessionStartStateStr).toBe(startStateStr);
133
+ }
134
+
135
+ // Verify the areStatesIdentical helper works correctly
136
+ for (let i = 0; i < competition.sessions.length - 1; i++) {
137
+ const state1 = executor.serializeStartState(startState);
138
+ const state2 = executor.serializeStartState(startState);
139
+ expect(executor.areStatesIdentical(state1, state2)).toBe(true);
140
+ }
141
+
142
+ // Clean up
143
+ await executor.cleanupCompetition(competition.id);
144
+
145
+ return true;
146
+ }
147
+ ),
148
+ { numRuns: 100 }
149
+ );
150
+ }
151
+ );
152
+
153
+ /**
154
+ * Custom initial state should be preserved identically for all agents.
155
+ */
156
+ test(
157
+ 'custom initial state is byte-identical for all agents',
158
+ async () => {
159
+ const vault = createSessionVault();
160
+
161
+ await fc.assert(
162
+ fc.asyncProperty(
163
+ bountyIdArbitrary,
164
+ domainArbitrary,
165
+ agentCountArbitrary,
166
+ sessionDataArbitrary,
167
+ async (bountyId, domain, agentCount, sessionData) => {
168
+ // Create a custom initial state
169
+ const customState: SerializedSession = {
170
+ cookies: vault.encrypt(sessionData.cookies),
171
+ localStorage: vault.encrypt(sessionData.localStorage),
172
+ sessionStorage: vault.encrypt(sessionData.sessionStorage),
173
+ expiresAt: Date.now() + 86400000,
174
+ };
175
+
176
+ const config: CompetitionConfig = {
177
+ bountyId,
178
+ agentCount,
179
+ targetDomain: domain,
180
+ initialState: customState,
181
+ };
182
+
183
+ const result = await executor.createCompetition(config);
184
+
185
+ expect(result.success).toBe(true);
186
+ expect(result.competition).toBeDefined();
187
+
188
+ const competition = result.competition!;
189
+
190
+ // The start state should be a serialized copy of the custom state
191
+ const startStateStr = JSON.stringify(competition.startState);
192
+ const customStateStr = JSON.stringify(customState);
193
+
194
+ // Should be identical
195
+ expect(startStateStr).toBe(customStateStr);
196
+
197
+ // All serialized copies should be identical
198
+ for (let i = 0; i < agentCount; i++) {
199
+ const copy = executor.serializeStartState(competition.startState);
200
+ expect(JSON.stringify(copy)).toBe(startStateStr);
201
+ }
202
+
203
+ // Clean up
204
+ await executor.cleanupCompetition(competition.id);
205
+
206
+ return true;
207
+ }
208
+ ),
209
+ { numRuns: 100 }
210
+ );
211
+ }
212
+ );
213
+
214
+ /**
215
+ * Sessions in a competition should all be isolated from each other.
216
+ */
217
+ test(
218
+ 'all sessions in a competition are isolated',
219
+ async () => {
220
+ await fc.assert(
221
+ fc.asyncProperty(
222
+ bountyIdArbitrary,
223
+ domainArbitrary,
224
+ agentCountArbitrary,
225
+ async (bountyId, domain, agentCount) => {
226
+ const config: CompetitionConfig = {
227
+ bountyId,
228
+ agentCount,
229
+ targetDomain: domain,
230
+ };
231
+
232
+ const result = await executor.createCompetition(config);
233
+
234
+ expect(result.success).toBe(true);
235
+ expect(result.competition).toBeDefined();
236
+
237
+ const competition = result.competition!;
238
+ const orchestrator = executor.getSessionOrchestrator();
239
+
240
+ // Check all pairs of sessions are isolated
241
+ for (let i = 0; i < competition.sessions.length; i++) {
242
+ for (let j = i + 1; j < competition.sessions.length; j++) {
243
+ const session1 = competition.sessions[i];
244
+ const session2 = competition.sessions[j];
245
+
246
+ expect(orchestrator.areSessionsIsolated(session1.id, session2.id)).toBe(true);
247
+ }
248
+ }
249
+
250
+ // Clean up
251
+ await executor.cleanupCompetition(competition.id);
252
+
253
+ return true;
254
+ }
255
+ ),
256
+ { numRuns: 100 }
257
+ );
258
+ }
259
+ );
260
+ });
261
+
262
+ // =============================================================================
263
+ // Property 17: Crash Isolation
264
+ // =============================================================================
265
+
266
+ describe('Property 17: Crash Isolation', () => {
267
+ /**
268
+ * **Feature: omnibridge, Property 17: Crash Isolation**
269
+ *
270
+ * *For any* Competition where agent A's session crashes,
271
+ * all other agents' sessions SHALL continue executing and return results independently.
272
+ *
273
+ * **Validates: Requirements 7.6**
274
+ */
275
+ test(
276
+ 'Property 17: crashed session does not affect other sessions',
277
+ async () => {
278
+ await fc.assert(
279
+ fc.asyncProperty(
280
+ bountyIdArbitrary,
281
+ domainArbitrary,
282
+ fc.integer({ min: 3, max: 8 }), // Need at least 3 agents to test isolation
283
+ fc.integer({ min: 0, max: 7 }), // Index of agent to crash
284
+ async (bountyId, domain, agentCount, crashIndexRaw) => {
285
+ const crashIndex = crashIndexRaw % agentCount;
286
+
287
+ const config: CompetitionConfig = {
288
+ bountyId,
289
+ agentCount,
290
+ targetDomain: domain,
291
+ };
292
+
293
+ const result = await executor.createCompetition(config);
294
+
295
+ expect(result.success).toBe(true);
296
+ expect(result.competition).toBeDefined();
297
+
298
+ const competition = result.competition!;
299
+
300
+ // Create execution tasks - one will crash
301
+ const tasks: AgentExecutionTask[] = competition.sessions.map((session, index) => ({
302
+ agentId: `agent_${index}`,
303
+ sessionId: session.id,
304
+ execute: async (): Promise<AgentExecutionResult> => {
305
+ if (index === crashIndex) {
306
+ // Simulate a crash
307
+ throw new Error('Simulated crash');
308
+ }
309
+
310
+ // Simulate successful execution
311
+ return {
312
+ agentId: `agent_${index}`,
313
+ sessionId: session.id,
314
+ status: 'completed',
315
+ actionLog: [],
316
+ executionTimeMs: 100,
317
+ result: {
318
+ data: { success: true },
319
+ metadata: {
320
+ confidence: 1.0,
321
+ executionTimeMs: 100,
322
+ actionsPerformed: 1,
323
+ triangulationHeals: 0,
324
+ },
325
+ verificationHash: 'test_hash',
326
+ },
327
+ };
328
+ },
329
+ }));
330
+
331
+ // Track results
332
+ const completedResults: string[] = [];
333
+ const failedResults: string[] = [];
334
+
335
+ // Execute all tasks
336
+ await executor.startAll(competition, tasks, {
337
+ timeoutMs: 5000,
338
+ onAgentComplete: (agentResult) => {
339
+ if (agentResult.status === 'completed') {
340
+ completedResults.push(agentResult.agentId);
341
+ } else {
342
+ failedResults.push(agentResult.agentId);
343
+ }
344
+ },
345
+ });
346
+
347
+ // Verify crash isolation:
348
+ // 1. The crashed agent should be marked as failed
349
+ expect(failedResults).toContain(`agent_${crashIndex}`);
350
+
351
+ // 2. All other agents should have completed successfully
352
+ for (let i = 0; i < agentCount; i++) {
353
+ if (i !== crashIndex) {
354
+ expect(completedResults).toContain(`agent_${i}`);
355
+ }
356
+ }
357
+
358
+ // 3. Total completed should be agentCount - 1
359
+ expect(completedResults.length).toBe(agentCount - 1);
360
+
361
+ // 4. Other sessions should not be affected by the crash
362
+ expect(executor.areOtherSessionsAffected(competition.id, competition.sessions[crashIndex].id)).toBe(false);
363
+
364
+ // Clean up
365
+ await executor.cleanupCompetition(competition.id);
366
+
367
+ return true;
368
+ }
369
+ ),
370
+ { numRuns: 100 }
371
+ );
372
+ }
373
+ );
374
+
375
+ /**
376
+ * Multiple crashes should not cascade to other agents.
377
+ */
378
+ test(
379
+ 'multiple crashes do not cascade to other agents',
380
+ async () => {
381
+ await fc.assert(
382
+ fc.asyncProperty(
383
+ bountyIdArbitrary,
384
+ domainArbitrary,
385
+ fc.integer({ min: 5, max: 10 }), // Need enough agents
386
+ fc.integer({ min: 1, max: 3 }), // Number of crashes
387
+ async (bountyId, domain, agentCount, crashCount) => {
388
+ // Ensure we don't crash more agents than we have
389
+ const actualCrashCount = Math.min(crashCount, agentCount - 1);
390
+
391
+ const config: CompetitionConfig = {
392
+ bountyId,
393
+ agentCount,
394
+ targetDomain: domain,
395
+ };
396
+
397
+ const result = await executor.createCompetition(config);
398
+
399
+ expect(result.success).toBe(true);
400
+ expect(result.competition).toBeDefined();
401
+
402
+ const competition = result.competition!;
403
+
404
+ // Determine which agents will crash
405
+ const crashIndices = new Set<number>();
406
+ for (let i = 0; i < actualCrashCount; i++) {
407
+ crashIndices.add(i);
408
+ }
409
+
410
+ // Create execution tasks
411
+ const tasks: AgentExecutionTask[] = competition.sessions.map((session, index) => ({
412
+ agentId: `agent_${index}`,
413
+ sessionId: session.id,
414
+ execute: async (): Promise<AgentExecutionResult> => {
415
+ if (crashIndices.has(index)) {
416
+ throw new Error(`Simulated crash for agent ${index}`);
417
+ }
418
+
419
+ return {
420
+ agentId: `agent_${index}`,
421
+ sessionId: session.id,
422
+ status: 'completed',
423
+ actionLog: [],
424
+ executionTimeMs: 50,
425
+ result: {
426
+ data: { success: true },
427
+ metadata: {
428
+ confidence: 1.0,
429
+ executionTimeMs: 50,
430
+ actionsPerformed: 1,
431
+ triangulationHeals: 0,
432
+ },
433
+ verificationHash: 'test_hash',
434
+ },
435
+ };
436
+ },
437
+ }));
438
+
439
+ let completedCount = 0;
440
+ let failedCount = 0;
441
+
442
+ await executor.startAll(competition, tasks, {
443
+ timeoutMs: 5000,
444
+ onAgentComplete: (agentResult) => {
445
+ if (agentResult.status === 'completed') {
446
+ completedCount++;
447
+ } else {
448
+ failedCount++;
449
+ }
450
+ },
451
+ });
452
+
453
+ // Verify:
454
+ // 1. Exactly crashCount agents failed
455
+ expect(failedCount).toBe(actualCrashCount);
456
+
457
+ // 2. All other agents completed successfully
458
+ expect(completedCount).toBe(agentCount - actualCrashCount);
459
+
460
+ // Clean up
461
+ await executor.cleanupCompetition(competition.id);
462
+
463
+ return true;
464
+ }
465
+ ),
466
+ { numRuns: 100 }
467
+ );
468
+ }
469
+ );
470
+
471
+ /**
472
+ * Timeout should be treated as a crash and isolated.
473
+ */
474
+ test(
475
+ 'timeout is isolated like a crash',
476
+ { timeout: 30000 },
477
+ async () => {
478
+ await fc.assert(
479
+ fc.asyncProperty(
480
+ bountyIdArbitrary,
481
+ domainArbitrary,
482
+ fc.integer({ min: 3, max: 6 }),
483
+ async (bountyId, domain, agentCount) => {
484
+ const config: CompetitionConfig = {
485
+ bountyId,
486
+ agentCount,
487
+ targetDomain: domain,
488
+ };
489
+
490
+ const result = await executor.createCompetition(config);
491
+
492
+ expect(result.success).toBe(true);
493
+ expect(result.competition).toBeDefined();
494
+
495
+ const competition = result.competition!;
496
+
497
+ // First agent will timeout, others complete quickly
498
+ const tasks: AgentExecutionTask[] = competition.sessions.map((session, index) => ({
499
+ agentId: `agent_${index}`,
500
+ sessionId: session.id,
501
+ execute: async (): Promise<AgentExecutionResult> => {
502
+ if (index === 0) {
503
+ // Simulate a long-running task that will timeout
504
+ await new Promise((resolve) => setTimeout(resolve, 500));
505
+ }
506
+
507
+ return {
508
+ agentId: `agent_${index}`,
509
+ sessionId: session.id,
510
+ status: 'completed',
511
+ actionLog: [],
512
+ executionTimeMs: 10,
513
+ result: {
514
+ data: { success: true },
515
+ metadata: {
516
+ confidence: 1.0,
517
+ executionTimeMs: 10,
518
+ actionsPerformed: 1,
519
+ triangulationHeals: 0,
520
+ },
521
+ verificationHash: 'test_hash',
522
+ },
523
+ };
524
+ },
525
+ }));
526
+
527
+ let completedCount = 0;
528
+ let timeoutCount = 0;
529
+
530
+ await executor.startAll(competition, tasks, {
531
+ timeoutMs: 50, // Very short timeout to trigger timeout quickly
532
+ onAgentComplete: (agentResult) => {
533
+ if (agentResult.status === 'completed') {
534
+ completedCount++;
535
+ } else if (agentResult.status === 'timeout') {
536
+ timeoutCount++;
537
+ }
538
+ },
539
+ });
540
+
541
+ // Agent 0 should timeout
542
+ expect(timeoutCount).toBe(1);
543
+
544
+ // All other agents should complete
545
+ expect(completedCount).toBe(agentCount - 1);
546
+
547
+ // Clean up
548
+ await executor.cleanupCompetition(competition.id);
549
+
550
+ return true;
551
+ }
552
+ ),
553
+ { numRuns: 20 } // Reduced runs for timeout test
554
+ );
555
+ }
556
+ );
557
+ });
558
+
559
+ // =============================================================================
560
+ // Additional Property Tests
561
+ // =============================================================================
562
+
563
+ describe('Competition Creation', () => {
564
+ /**
565
+ * Competition should create the correct number of sessions.
566
+ */
567
+ test(
568
+ 'competition creates correct number of sessions',
569
+ async () => {
570
+ await fc.assert(
571
+ fc.asyncProperty(
572
+ bountyIdArbitrary,
573
+ domainArbitrary,
574
+ agentCountArbitrary,
575
+ async (bountyId, domain, agentCount) => {
576
+ const config: CompetitionConfig = {
577
+ bountyId,
578
+ agentCount,
579
+ targetDomain: domain,
580
+ };
581
+
582
+ const result = await executor.createCompetition(config);
583
+
584
+ expect(result.success).toBe(true);
585
+ expect(result.competition).toBeDefined();
586
+ expect(result.competition!.sessions.length).toBe(agentCount);
587
+
588
+ // Clean up
589
+ await executor.cleanupCompetition(result.competition!.id);
590
+
591
+ return true;
592
+ }
593
+ ),
594
+ { numRuns: 100 }
595
+ );
596
+ }
597
+ );
598
+
599
+ /**
600
+ * Competition with custom agent IDs should use those IDs.
601
+ */
602
+ test(
603
+ 'competition uses provided agent IDs',
604
+ async () => {
605
+ await fc.assert(
606
+ fc.asyncProperty(
607
+ bountyIdArbitrary,
608
+ domainArbitrary,
609
+ fc.array(agentIdArbitrary, { minLength: 2, maxLength: 5 }),
610
+ async (bountyId, domain, agentIds) => {
611
+ // Ensure unique agent IDs
612
+ const uniqueAgentIds = [...new Set(agentIds)];
613
+ if (uniqueAgentIds.length < 2) {
614
+ return true; // Skip if not enough unique IDs
615
+ }
616
+
617
+ const config: CompetitionConfig = {
618
+ bountyId,
619
+ agentCount: uniqueAgentIds.length,
620
+ targetDomain: domain,
621
+ agentIds: uniqueAgentIds,
622
+ };
623
+
624
+ const result = await executor.createCompetition(config);
625
+
626
+ expect(result.success).toBe(true);
627
+ expect(result.competition).toBeDefined();
628
+ expect(result.competition!.sessions.length).toBe(uniqueAgentIds.length);
629
+
630
+ // Clean up
631
+ await executor.cleanupCompetition(result.competition!.id);
632
+
633
+ return true;
634
+ }
635
+ ),
636
+ { numRuns: 100 }
637
+ );
638
+ }
639
+ );
640
+
641
+ /**
642
+ * Invalid agent count should fail.
643
+ */
644
+ test(
645
+ 'invalid agent count fails gracefully',
646
+ async () => {
647
+ await fc.assert(
648
+ fc.asyncProperty(
649
+ bountyIdArbitrary,
650
+ domainArbitrary,
651
+ fc.integer({ min: -10, max: 0 }),
652
+ async (bountyId, domain, invalidCount) => {
653
+ const config: CompetitionConfig = {
654
+ bountyId,
655
+ agentCount: invalidCount,
656
+ targetDomain: domain,
657
+ };
658
+
659
+ const result = await executor.createCompetition(config);
660
+
661
+ expect(result.success).toBe(false);
662
+ expect(result.error).toBeDefined();
663
+
664
+ return true;
665
+ }
666
+ ),
667
+ { numRuns: 100 }
668
+ );
669
+ }
670
+ );
671
+ });