outcome-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/README.md +261 -0
  2. package/package.json +95 -0
  3. package/src/agents/README.md +139 -0
  4. package/src/agents/adapters/anthropic.adapter.ts +166 -0
  5. package/src/agents/adapters/dalle.adapter.ts +145 -0
  6. package/src/agents/adapters/gemini.adapter.ts +134 -0
  7. package/src/agents/adapters/imagen.adapter.ts +106 -0
  8. package/src/agents/adapters/nano-banana.adapter.ts +129 -0
  9. package/src/agents/adapters/openai.adapter.ts +165 -0
  10. package/src/agents/adapters/veo.adapter.ts +130 -0
  11. package/src/agents/agent.schema.property.test.ts +379 -0
  12. package/src/agents/agent.schema.test.ts +148 -0
  13. package/src/agents/agent.schema.ts +263 -0
  14. package/src/agents/index.ts +60 -0
  15. package/src/agents/registered-agent.schema.ts +356 -0
  16. package/src/agents/registry.ts +97 -0
  17. package/src/agents/tournament-configs.property.test.ts +266 -0
  18. package/src/cli/README.md +145 -0
  19. package/src/cli/commands/define.ts +79 -0
  20. package/src/cli/commands/list.ts +46 -0
  21. package/src/cli/commands/logs.ts +83 -0
  22. package/src/cli/commands/run.ts +416 -0
  23. package/src/cli/commands/verify.ts +110 -0
  24. package/src/cli/index.ts +81 -0
  25. package/src/config/README.md +128 -0
  26. package/src/config/env.ts +262 -0
  27. package/src/config/index.ts +19 -0
  28. package/src/eval/README.md +318 -0
  29. package/src/eval/ai-judge.test.ts +435 -0
  30. package/src/eval/ai-judge.ts +368 -0
  31. package/src/eval/code-validators.ts +414 -0
  32. package/src/eval/evaluateOutcome.property.test.ts +1174 -0
  33. package/src/eval/evaluateOutcome.ts +591 -0
  34. package/src/eval/immigration-validators.ts +122 -0
  35. package/src/eval/index.ts +90 -0
  36. package/src/eval/judge-cache.ts +402 -0
  37. package/src/eval/tournament-validators.property.test.ts +439 -0
  38. package/src/eval/validators.property.test.ts +1118 -0
  39. package/src/eval/validators.ts +1199 -0
  40. package/src/eval/weighted-scorer.ts +285 -0
  41. package/src/index.ts +17 -0
  42. package/src/league/README.md +188 -0
  43. package/src/league/health-check.ts +353 -0
  44. package/src/league/index.ts +93 -0
  45. package/src/league/killAgent.ts +151 -0
  46. package/src/league/league.test.ts +1151 -0
  47. package/src/league/runLeague.ts +843 -0
  48. package/src/league/scoreAgent.ts +175 -0
  49. package/src/modules/omnibridge/__tests__/.gitkeep +1 -0
  50. package/src/modules/omnibridge/__tests__/auth-tunnel.property.test.ts +524 -0
  51. package/src/modules/omnibridge/__tests__/deterministic-logger.property.test.ts +965 -0
  52. package/src/modules/omnibridge/__tests__/ghost-api.property.test.ts +461 -0
  53. package/src/modules/omnibridge/__tests__/omnibridge-integration.test.ts +542 -0
  54. package/src/modules/omnibridge/__tests__/parallel-executor.property.test.ts +671 -0
  55. package/src/modules/omnibridge/__tests__/semantic-normalizer.property.test.ts +521 -0
  56. package/src/modules/omnibridge/__tests__/semantic-normalizer.test.ts +254 -0
  57. package/src/modules/omnibridge/__tests__/session-vault.property.test.ts +367 -0
  58. package/src/modules/omnibridge/__tests__/shadow-session.property.test.ts +523 -0
  59. package/src/modules/omnibridge/__tests__/triangulation-engine.property.test.ts +292 -0
  60. package/src/modules/omnibridge/__tests__/verification-engine.property.test.ts +769 -0
  61. package/src/modules/omnibridge/api/.gitkeep +1 -0
  62. package/src/modules/omnibridge/api/ghost-api.ts +1087 -0
  63. package/src/modules/omnibridge/auth/.gitkeep +1 -0
  64. package/src/modules/omnibridge/auth/auth-tunnel.ts +843 -0
  65. package/src/modules/omnibridge/auth/session-vault.ts +577 -0
  66. package/src/modules/omnibridge/core/.gitkeep +1 -0
  67. package/src/modules/omnibridge/core/semantic-normalizer.ts +702 -0
  68. package/src/modules/omnibridge/core/triangulation-engine.ts +530 -0
  69. package/src/modules/omnibridge/core/types.ts +610 -0
  70. package/src/modules/omnibridge/execution/.gitkeep +1 -0
  71. package/src/modules/omnibridge/execution/deterministic-logger.ts +629 -0
  72. package/src/modules/omnibridge/execution/parallel-executor.ts +542 -0
  73. package/src/modules/omnibridge/execution/shadow-session.ts +794 -0
  74. package/src/modules/omnibridge/index.ts +212 -0
  75. package/src/modules/omnibridge/omnibridge.ts +510 -0
  76. package/src/modules/omnibridge/verification/.gitkeep +1 -0
  77. package/src/modules/omnibridge/verification/verification-engine.ts +783 -0
  78. package/src/outcomes/README.md +75 -0
  79. package/src/outcomes/acquire-pilot-customer.ts +297 -0
  80. package/src/outcomes/code-delivery-outcomes.ts +89 -0
  81. package/src/outcomes/code-outcomes.ts +256 -0
  82. package/src/outcomes/code_review_battle.test.ts +135 -0
  83. package/src/outcomes/code_review_battle.ts +135 -0
  84. package/src/outcomes/cold_email_battle.ts +97 -0
  85. package/src/outcomes/content_creation_battle.ts +160 -0
  86. package/src/outcomes/f1_stem_opt_compliance.ts +61 -0
  87. package/src/outcomes/index.ts +107 -0
  88. package/src/outcomes/lead_gen_battle.test.ts +113 -0
  89. package/src/outcomes/lead_gen_battle.ts +99 -0
  90. package/src/outcomes/outcome.schema.property.test.ts +229 -0
  91. package/src/outcomes/outcome.schema.ts +187 -0
  92. package/src/outcomes/qualified_sales_interest.ts +118 -0
  93. package/src/outcomes/swarm_planner.property.test.ts +370 -0
  94. package/src/outcomes/swarm_planner.ts +96 -0
  95. package/src/outcomes/web_extraction.ts +234 -0
  96. package/src/runtime/README.md +220 -0
  97. package/src/runtime/agentRunner.test.ts +341 -0
  98. package/src/runtime/agentRunner.ts +746 -0
  99. package/src/runtime/claudeAdapter.ts +232 -0
  100. package/src/runtime/costTracker.ts +123 -0
  101. package/src/runtime/index.ts +34 -0
  102. package/src/runtime/modelAdapter.property.test.ts +305 -0
  103. package/src/runtime/modelAdapter.ts +144 -0
  104. package/src/runtime/openaiAdapter.ts +235 -0
  105. package/src/utils/README.md +122 -0
  106. package/src/utils/command-runner.ts +134 -0
  107. package/src/utils/cost-guard.ts +379 -0
  108. package/src/utils/errors.test.ts +290 -0
  109. package/src/utils/errors.ts +442 -0
  110. package/src/utils/index.ts +37 -0
  111. package/src/utils/logger.test.ts +361 -0
  112. package/src/utils/logger.ts +419 -0
  113. package/src/utils/output-parsers.ts +216 -0
@@ -0,0 +1,542 @@
1
+ /**
2
+ * OmniBridge Integration Tests
3
+ *
4
+ * End-to-end tests for OmniBridge WAIESL integration including:
5
+ * - Ghost-API flow
6
+ * - Competition flow with verification
7
+ * - Web league execution
8
+ *
9
+ * @module modules/omnibridge/__tests__/omnibridge-integration.test
10
+ * @see Requirements 7.1-7.6, 8.1-8.6
11
+ */
12
+
13
+ import { describe, test, expect, beforeEach } from 'vitest';
14
+
15
+ import {
16
+ OmniBridge,
17
+ createOmniBridge,
18
+ type GhostResponse,
19
+ type ActionLogEntry,
20
+ } from '../index.js';
21
+
22
+ import {
23
+ createWebExtractionOutcome,
24
+ type WebExtractionOutcome,
25
+ } from '../../../outcomes/web_extraction.js';
26
+
27
+ import { runWebLeagueMock } from '../../../league/runWebLeague.js';
28
+ import type { AgentConfig } from '../../../agents/agent.schema.js';
29
+ import type { Lead } from '../../../jobs/job.interface.js';
30
+
31
+ describe('OmniBridge Integration', () => {
32
+ let omni: OmniBridge;
33
+
34
+ beforeEach(() => {
35
+ omni = createOmniBridge({ debug: true });
36
+ });
37
+
38
+ describe('Ghost-API Flow', () => {
39
+ test('defines goal and generates schema mapping', async () => {
40
+ const ghostApi = omni.getGhostApi();
41
+
42
+ const schema = await ghostApi.defineGoal({
43
+ name: 'fetch_invoices',
44
+ targetUrl: 'https://billing.example.com/invoices',
45
+ description: 'Fetch all unpaid invoices',
46
+ });
47
+
48
+ expect(schema).toBeDefined();
49
+ expect(schema.endpoint).toContain('fetch'); // Endpoint contains goal name parts
50
+ expect(typeof schema.estimatedActions).toBe('number');
51
+ });
52
+
53
+ test('executes goal and returns structured response', async () => {
54
+ const ghostApi = omni.getGhostApi();
55
+
56
+ // Define goal first
57
+ const schema = await ghostApi.defineGoal({
58
+ name: 'test_extraction',
59
+ targetUrl: 'https://example.com/data',
60
+ description: 'Extract test data',
61
+ });
62
+
63
+ // Execute using the returned endpoint
64
+ const response = await ghostApi.execute(schema.endpoint);
65
+
66
+ expect(response).toBeDefined();
67
+ expect(response.data).toBeDefined();
68
+ expect(response.metadata).toBeDefined();
69
+ expect(response.metadata.confidence).toBeGreaterThanOrEqual(0);
70
+ expect(response.metadata.confidence).toBeLessThanOrEqual(1);
71
+ expect(typeof response.metadata.executionTimeMs).toBe('number');
72
+ expect(typeof response.metadata.actionsPerformed).toBe('number');
73
+ expect(typeof response.verificationHash).toBe('string');
74
+ });
75
+
76
+ test('response includes all required metadata fields', async () => {
77
+ const ghostApi = omni.getGhostApi();
78
+
79
+ const schema = await ghostApi.defineGoal({
80
+ name: 'metadata_test',
81
+ targetUrl: 'https://example.com',
82
+ description: 'Test metadata fields',
83
+ });
84
+
85
+ const response = await ghostApi.execute(schema.endpoint);
86
+
87
+ // Verify Property 8: Ghost-API Response Conformance
88
+ expect(response.metadata).toHaveProperty('confidence');
89
+ expect(response.metadata).toHaveProperty('executionTimeMs');
90
+ expect(response.metadata).toHaveProperty('actionsPerformed');
91
+ expect(response.metadata).toHaveProperty('triangulationHeals');
92
+ expect(response).toHaveProperty('verificationHash');
93
+ });
94
+ });
95
+
96
+ describe('Competition Flow', () => {
97
+ test('creates competition with identical starting states', async () => {
98
+ const result = await omni.createCompetition('test-bounty', 5);
99
+
100
+ expect(result.competitionId).toBeDefined();
101
+ expect(result.sessionCount).toBe(5);
102
+ expect(result.identicalStartStates).toBe(true);
103
+ expect(result.competition).toBeDefined();
104
+ expect(result.competition.sessions).toHaveLength(5);
105
+ });
106
+
107
+ test('executes bounty with parallel agents', async () => {
108
+ const competition = await omni.createCompetition('parallel-test', 3, 'example.com');
109
+
110
+ const agentTasks = [
111
+ {
112
+ agentId: 'agent-1',
113
+ execute: async (): Promise<GhostResponse> => ({
114
+ data: { result: 'success' },
115
+ metadata: {
116
+ confidence: 0.95,
117
+ executionTimeMs: 100,
118
+ actionsPerformed: 5,
119
+ triangulationHeals: 0,
120
+ },
121
+ verificationHash: 'hash-1',
122
+ }),
123
+ },
124
+ {
125
+ agentId: 'agent-2',
126
+ execute: async (): Promise<GhostResponse> => ({
127
+ data: { result: 'success' },
128
+ metadata: {
129
+ confidence: 0.90,
130
+ executionTimeMs: 150,
131
+ actionsPerformed: 6,
132
+ triangulationHeals: 1,
133
+ },
134
+ verificationHash: 'hash-2',
135
+ }),
136
+ },
137
+ {
138
+ agentId: 'agent-3',
139
+ execute: async (): Promise<GhostResponse> => {
140
+ throw new Error('Agent failed');
141
+ },
142
+ },
143
+ ];
144
+
145
+ const result = await omni.executeBounty(
146
+ competition.competition,
147
+ {
148
+ targetUrl: 'https://example.com',
149
+ description: 'Test extraction',
150
+ },
151
+ agentTasks
152
+ );
153
+
154
+ // First successful agent wins (agent-1 or agent-2 depending on execution order)
155
+ expect(result.winnerId).toBeDefined();
156
+ expect(result.results).toHaveLength(3);
157
+ expect(result.executionTimeMs).toBeGreaterThanOrEqual(0);
158
+
159
+ // Check that at least one agent completed successfully
160
+ const completedAgents = result.results.filter(r => r.status === 'completed');
161
+ expect(completedAgents.length).toBeGreaterThanOrEqual(1);
162
+
163
+ // Check that agent-3 failed
164
+ const agent3Result = result.results.find(r => r.agentId === 'agent-3');
165
+ expect(agent3Result?.status).toBe('failed');
166
+ });
167
+
168
+ test('detects hallucinations in agent results', async () => {
169
+ const competition = await omni.createCompetition('hallucination-test', 2, 'example.com');
170
+
171
+ // Log some actions for agent-1
172
+ omni.logAction({
173
+ timestamp: Date.now(),
174
+ sessionId: 'session-0',
175
+ action: 'click',
176
+ intentId: 'ACTION_ID:SUBMIT',
177
+ result: 'success',
178
+ });
179
+
180
+ const agentTasks = [
181
+ {
182
+ agentId: 'agent-1',
183
+ execute: async (): Promise<GhostResponse> => ({
184
+ // Claims to have extracted 100 items but only clicked once
185
+ data: { items: Array(100).fill({ id: 'fake' }) },
186
+ metadata: {
187
+ confidence: 0.99,
188
+ executionTimeMs: 50,
189
+ actionsPerformed: 100, // Suspicious: claims 100 actions
190
+ triangulationHeals: 0,
191
+ },
192
+ verificationHash: 'suspicious-hash',
193
+ }),
194
+ },
195
+ {
196
+ agentId: 'agent-2',
197
+ execute: async (): Promise<GhostResponse> => ({
198
+ data: { items: [{ id: 'real' }] },
199
+ metadata: {
200
+ confidence: 0.85,
201
+ executionTimeMs: 200,
202
+ actionsPerformed: 3,
203
+ triangulationHeals: 0,
204
+ },
205
+ verificationHash: 'honest-hash',
206
+ }),
207
+ },
208
+ ];
209
+
210
+ const result = await omni.executeBounty(
211
+ competition.competition,
212
+ {
213
+ targetUrl: 'https://example.com',
214
+ description: 'Test hallucination detection',
215
+ },
216
+ agentTasks
217
+ );
218
+
219
+ // The system should process results (hallucination detection is internal)
220
+ expect(result).toBeDefined();
221
+ expect(result.results).toHaveLength(2);
222
+ });
223
+ });
224
+
225
+ describe('Verification Flow', () => {
226
+ test('generates cryptographic proof for session', () => {
227
+ const sessionId = 'proof-test-session';
228
+
229
+ // Log some actions
230
+ omni.logAction({
231
+ timestamp: Date.now(),
232
+ sessionId,
233
+ action: 'navigate',
234
+ intentId: 'NAV_ID:HOME',
235
+ result: 'success',
236
+ });
237
+
238
+ omni.logAction({
239
+ timestamp: Date.now() + 100,
240
+ sessionId,
241
+ action: 'click',
242
+ intentId: 'ACTION_ID:LOGIN',
243
+ result: 'success',
244
+ });
245
+
246
+ omni.logAction({
247
+ timestamp: Date.now() + 200,
248
+ sessionId,
249
+ action: 'extract',
250
+ intentId: 'DISPLAY_ID:DATA',
251
+ result: 'success',
252
+ });
253
+
254
+ const proof = omni.generateProof(sessionId);
255
+
256
+ expect(proof).toBeDefined();
257
+ expect(proof.sessionId).toBe(sessionId);
258
+ expect(proof.actionCount).toBe(3);
259
+ expect(typeof proof.hash).toBe('string');
260
+ expect(proof.hash.length).toBeGreaterThan(0);
261
+ });
262
+
263
+ test('compares action logs of two agents', () => {
264
+ const agentA = 'compare-agent-a';
265
+ const agentB = 'compare-agent-b';
266
+
267
+ // Agent A actions
268
+ omni.logAction({
269
+ timestamp: 1000,
270
+ sessionId: agentA,
271
+ action: 'navigate',
272
+ intentId: 'NAV_ID:START',
273
+ result: 'success',
274
+ });
275
+ omni.logAction({
276
+ timestamp: 1100,
277
+ sessionId: agentA,
278
+ action: 'click',
279
+ intentId: 'ACTION_ID:BUTTON_A',
280
+ result: 'success',
281
+ });
282
+
283
+ // Agent B actions (diverges at second action)
284
+ omni.logAction({
285
+ timestamp: 1000,
286
+ sessionId: agentB,
287
+ action: 'navigate',
288
+ intentId: 'NAV_ID:START',
289
+ result: 'success',
290
+ });
291
+ omni.logAction({
292
+ timestamp: 1100,
293
+ sessionId: agentB,
294
+ action: 'click',
295
+ intentId: 'ACTION_ID:BUTTON_B', // Different button
296
+ result: 'success',
297
+ });
298
+
299
+ const comparison = omni.compareAgents(agentA, agentB);
300
+
301
+ expect(comparison).toBeDefined();
302
+ expect(comparison.analysis).toBeDefined();
303
+ expect(comparison.analysis?.divergencePoint).toBeGreaterThanOrEqual(0);
304
+ });
305
+
306
+ test('calculates confidence score for results', () => {
307
+ const actionLog: ActionLogEntry[] = [
308
+ {
309
+ timestamp: Date.now(),
310
+ sessionId: 'confidence-test',
311
+ action: 'navigate',
312
+ intentId: 'NAV_ID:PAGE',
313
+ result: 'success',
314
+ },
315
+ {
316
+ timestamp: Date.now() + 100,
317
+ sessionId: 'confidence-test',
318
+ action: 'extract',
319
+ intentId: 'DISPLAY_ID:TABLE',
320
+ result: 'success',
321
+ },
322
+ ];
323
+
324
+ const claimedResult = {
325
+ data: [{ id: 1 }, { id: 2 }],
326
+ };
327
+
328
+ const confidenceResult = omni.calculateConfidence(claimedResult, actionLog);
329
+
330
+ expect(confidenceResult).toBeDefined();
331
+ expect(confidenceResult.score).toBeGreaterThanOrEqual(0);
332
+ expect(confidenceResult.score).toBeLessThanOrEqual(1);
333
+ expect(confidenceResult.isReliable).toBeDefined();
334
+ });
335
+ });
336
+
337
+ describe('Semantic Normalization', () => {
338
+ test('normalizes HTML to Intent Document', () => {
339
+ const html = `
340
+ <html>
341
+ <head><style>.btn { color: red; }</style></head>
342
+ <body>
343
+ <button aria-label="Submit Form" class="btn primary">Submit</button>
344
+ <input type="email" placeholder="Enter email" required />
345
+ </body>
346
+ </html>
347
+ `;
348
+
349
+ const document = omni.normalize(html);
350
+
351
+ expect(document).toBeDefined();
352
+ expect(document.elements).toBeDefined();
353
+ // Token reduction can be negative for very small HTML with lots of semantic content
354
+ expect(typeof document.tokenReduction).toBe('number');
355
+ });
356
+ });
357
+
358
+ describe('Capability Tokens', () => {
359
+ test('issues and revokes capability tokens', async () => {
360
+ // First store a session for the domain using RawSessionData format
361
+ const vault = omni.getSessionVault();
362
+ await vault.store('example.com', {
363
+ cookies: 'test-cookie-data',
364
+ localStorage: 'test-local-storage',
365
+ sessionStorage: 'test-session-storage',
366
+ });
367
+
368
+ const token = await omni.issueCapabilityToken('example.com', {
369
+ allowedActions: ['read', 'extract'],
370
+ blockedActions: ['delete', 'modify'],
371
+ maxExecutions: 10,
372
+ expiresAt: Date.now() + 3600000,
373
+ });
374
+
375
+ expect(token).toBeDefined();
376
+ expect(token.id).toBeDefined();
377
+ expect(token.domain).toBe('example.com');
378
+ expect(token.scope.allowedActions).toContain('read');
379
+ expect(token.scope.blockedActions).toContain('delete');
380
+
381
+ // Revoke the token
382
+ omni.revokeCapabilityToken(token.id);
383
+
384
+ // Token should be revoked (implementation detail)
385
+ });
386
+ });
387
+ });
388
+
389
+ describe('Web League Integration', () => {
390
+ const mockAgentConfigs: AgentConfig[] = [
391
+ {
392
+ id: 'test-agent-1',
393
+ name: 'Test Agent 1',
394
+ modelProvider: 'openai',
395
+ modelId: 'gpt-4',
396
+ prompt: 'You are a test agent for web extraction',
397
+ strategyDescription: 'Test strategy for web extraction',
398
+ toolAccess: ['web_extract', 'navigate'],
399
+ costCeiling: 1000,
400
+ },
401
+ {
402
+ id: 'test-agent-2',
403
+ name: 'Test Agent 2',
404
+ modelProvider: 'claude',
405
+ modelId: 'claude-3-sonnet',
406
+ prompt: 'You are a test agent for web extraction',
407
+ strategyDescription: 'Test strategy for web extraction',
408
+ toolAccess: ['web_extract', 'navigate'],
409
+ costCeiling: 1000,
410
+ },
411
+ {
412
+ id: 'test-agent-3',
413
+ name: 'Test Agent 3',
414
+ modelProvider: 'openai',
415
+ modelId: 'gpt-4-turbo',
416
+ prompt: 'You are a test agent for web extraction',
417
+ strategyDescription: 'Test strategy for web extraction',
418
+ toolAccess: ['web_extract', 'navigate'],
419
+ costCeiling: 1000,
420
+ },
421
+ ];
422
+
423
+ const mockLead: Lead = {
424
+ email: 'test@example.com',
425
+ company: 'Test Company',
426
+ companySize: 100,
427
+ role: 'Engineer',
428
+ previousInteractions: [],
429
+ };
430
+
431
+ const mockOutcome: WebExtractionOutcome = createWebExtractionOutcome({
432
+ name: 'test_extraction',
433
+ description: 'Test web extraction',
434
+ payoutAmount: 50,
435
+ targetUrl: 'https://example.com/data',
436
+ goalDescription: 'Extract test data from the page',
437
+ outputSchema: {
438
+ type: 'object',
439
+ properties: {
440
+ items: { type: 'array' },
441
+ count: { type: 'number' },
442
+ },
443
+ },
444
+ });
445
+
446
+ test('runs web league in mock mode', async () => {
447
+ const result = await runWebLeagueMock({
448
+ outcomeId: 'test_extraction',
449
+ agentCount: 3,
450
+ globalSpendCeiling: 50000,
451
+ agentConfigs: mockAgentConfigs,
452
+ outcome: mockOutcome,
453
+ lead: mockLead,
454
+ });
455
+
456
+ expect(result).toBeDefined();
457
+ expect(result.agents).toHaveLength(3);
458
+ expect(result.duration).toBeGreaterThan(0);
459
+ expect(typeof result.hallucinationsDetected).toBe('boolean');
460
+ expect(result.confidence).toBeGreaterThanOrEqual(0);
461
+ });
462
+
463
+ test('determines winner from parallel execution', async () => {
464
+ const result = await runWebLeagueMock({
465
+ outcomeId: 'winner_test',
466
+ agentCount: 2,
467
+ globalSpendCeiling: 50000,
468
+ agentConfigs: mockAgentConfigs.slice(0, 2),
469
+ outcome: mockOutcome,
470
+ lead: mockLead,
471
+ });
472
+
473
+ // In mock mode, we should have results
474
+ expect(result).toBeDefined();
475
+ expect(result.agents).toHaveLength(2);
476
+
477
+ // Check that at least one agent has a result
478
+ const agentsWithResults = result.agents.filter(a => a.response !== undefined);
479
+ expect(agentsWithResults.length).toBeGreaterThanOrEqual(0);
480
+ });
481
+
482
+ test('validates agent count configuration', async () => {
483
+ await expect(
484
+ runWebLeagueMock({
485
+ outcomeId: 'invalid_count',
486
+ agentCount: 0,
487
+ globalSpendCeiling: 50000,
488
+ agentConfigs: mockAgentConfigs,
489
+ outcome: mockOutcome,
490
+ lead: mockLead,
491
+ })
492
+ ).rejects.toThrow('Agent count must be positive');
493
+ });
494
+
495
+ test('validates agent configs availability', async () => {
496
+ await expect(
497
+ runWebLeagueMock({
498
+ outcomeId: 'not_enough_agents',
499
+ agentCount: 10,
500
+ globalSpendCeiling: 50000,
501
+ agentConfigs: mockAgentConfigs, // Only 3 configs
502
+ outcome: mockOutcome,
503
+ lead: mockLead,
504
+ })
505
+ ).rejects.toThrow('Not enough agent configs');
506
+ });
507
+
508
+ test('tracks action logs for each agent', async () => {
509
+ const result = await runWebLeagueMock({
510
+ outcomeId: 'action_log_test',
511
+ agentCount: 2,
512
+ globalSpendCeiling: 50000,
513
+ agentConfigs: mockAgentConfigs.slice(0, 2),
514
+ outcome: mockOutcome,
515
+ lead: mockLead,
516
+ });
517
+
518
+ // Each agent should have an action log
519
+ for (const agent of result.agents) {
520
+ expect(agent.actionLog).toBeDefined();
521
+ expect(Array.isArray(agent.actionLog)).toBe(true);
522
+ }
523
+ });
524
+
525
+ test('includes verification proof for winner', async () => {
526
+ const result = await runWebLeagueMock({
527
+ outcomeId: 'proof_test',
528
+ agentCount: 2,
529
+ globalSpendCeiling: 50000,
530
+ agentConfigs: mockAgentConfigs.slice(0, 2),
531
+ outcome: mockOutcome,
532
+ lead: mockLead,
533
+ });
534
+
535
+ if (result.winnerId) {
536
+ expect(result.winnerProof).toBeDefined();
537
+
538
+ const winner = result.agents.find(a => a.agentId === result.winnerId);
539
+ expect(winner?.proof).toBeDefined();
540
+ }
541
+ });
542
+ });