outcome-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/README.md +261 -0
  2. package/package.json +95 -0
  3. package/src/agents/README.md +139 -0
  4. package/src/agents/adapters/anthropic.adapter.ts +166 -0
  5. package/src/agents/adapters/dalle.adapter.ts +145 -0
  6. package/src/agents/adapters/gemini.adapter.ts +134 -0
  7. package/src/agents/adapters/imagen.adapter.ts +106 -0
  8. package/src/agents/adapters/nano-banana.adapter.ts +129 -0
  9. package/src/agents/adapters/openai.adapter.ts +165 -0
  10. package/src/agents/adapters/veo.adapter.ts +130 -0
  11. package/src/agents/agent.schema.property.test.ts +379 -0
  12. package/src/agents/agent.schema.test.ts +148 -0
  13. package/src/agents/agent.schema.ts +263 -0
  14. package/src/agents/index.ts +60 -0
  15. package/src/agents/registered-agent.schema.ts +356 -0
  16. package/src/agents/registry.ts +97 -0
  17. package/src/agents/tournament-configs.property.test.ts +266 -0
  18. package/src/cli/README.md +145 -0
  19. package/src/cli/commands/define.ts +79 -0
  20. package/src/cli/commands/list.ts +46 -0
  21. package/src/cli/commands/logs.ts +83 -0
  22. package/src/cli/commands/run.ts +416 -0
  23. package/src/cli/commands/verify.ts +110 -0
  24. package/src/cli/index.ts +81 -0
  25. package/src/config/README.md +128 -0
  26. package/src/config/env.ts +262 -0
  27. package/src/config/index.ts +19 -0
  28. package/src/eval/README.md +318 -0
  29. package/src/eval/ai-judge.test.ts +435 -0
  30. package/src/eval/ai-judge.ts +368 -0
  31. package/src/eval/code-validators.ts +414 -0
  32. package/src/eval/evaluateOutcome.property.test.ts +1174 -0
  33. package/src/eval/evaluateOutcome.ts +591 -0
  34. package/src/eval/immigration-validators.ts +122 -0
  35. package/src/eval/index.ts +90 -0
  36. package/src/eval/judge-cache.ts +402 -0
  37. package/src/eval/tournament-validators.property.test.ts +439 -0
  38. package/src/eval/validators.property.test.ts +1118 -0
  39. package/src/eval/validators.ts +1199 -0
  40. package/src/eval/weighted-scorer.ts +285 -0
  41. package/src/index.ts +17 -0
  42. package/src/league/README.md +188 -0
  43. package/src/league/health-check.ts +353 -0
  44. package/src/league/index.ts +93 -0
  45. package/src/league/killAgent.ts +151 -0
  46. package/src/league/league.test.ts +1151 -0
  47. package/src/league/runLeague.ts +843 -0
  48. package/src/league/scoreAgent.ts +175 -0
  49. package/src/modules/omnibridge/__tests__/.gitkeep +1 -0
  50. package/src/modules/omnibridge/__tests__/auth-tunnel.property.test.ts +524 -0
  51. package/src/modules/omnibridge/__tests__/deterministic-logger.property.test.ts +965 -0
  52. package/src/modules/omnibridge/__tests__/ghost-api.property.test.ts +461 -0
  53. package/src/modules/omnibridge/__tests__/omnibridge-integration.test.ts +542 -0
  54. package/src/modules/omnibridge/__tests__/parallel-executor.property.test.ts +671 -0
  55. package/src/modules/omnibridge/__tests__/semantic-normalizer.property.test.ts +521 -0
  56. package/src/modules/omnibridge/__tests__/semantic-normalizer.test.ts +254 -0
  57. package/src/modules/omnibridge/__tests__/session-vault.property.test.ts +367 -0
  58. package/src/modules/omnibridge/__tests__/shadow-session.property.test.ts +523 -0
  59. package/src/modules/omnibridge/__tests__/triangulation-engine.property.test.ts +292 -0
  60. package/src/modules/omnibridge/__tests__/verification-engine.property.test.ts +769 -0
  61. package/src/modules/omnibridge/api/.gitkeep +1 -0
  62. package/src/modules/omnibridge/api/ghost-api.ts +1087 -0
  63. package/src/modules/omnibridge/auth/.gitkeep +1 -0
  64. package/src/modules/omnibridge/auth/auth-tunnel.ts +843 -0
  65. package/src/modules/omnibridge/auth/session-vault.ts +577 -0
  66. package/src/modules/omnibridge/core/.gitkeep +1 -0
  67. package/src/modules/omnibridge/core/semantic-normalizer.ts +702 -0
  68. package/src/modules/omnibridge/core/triangulation-engine.ts +530 -0
  69. package/src/modules/omnibridge/core/types.ts +610 -0
  70. package/src/modules/omnibridge/execution/.gitkeep +1 -0
  71. package/src/modules/omnibridge/execution/deterministic-logger.ts +629 -0
  72. package/src/modules/omnibridge/execution/parallel-executor.ts +542 -0
  73. package/src/modules/omnibridge/execution/shadow-session.ts +794 -0
  74. package/src/modules/omnibridge/index.ts +212 -0
  75. package/src/modules/omnibridge/omnibridge.ts +510 -0
  76. package/src/modules/omnibridge/verification/.gitkeep +1 -0
  77. package/src/modules/omnibridge/verification/verification-engine.ts +783 -0
  78. package/src/outcomes/README.md +75 -0
  79. package/src/outcomes/acquire-pilot-customer.ts +297 -0
  80. package/src/outcomes/code-delivery-outcomes.ts +89 -0
  81. package/src/outcomes/code-outcomes.ts +256 -0
  82. package/src/outcomes/code_review_battle.test.ts +135 -0
  83. package/src/outcomes/code_review_battle.ts +135 -0
  84. package/src/outcomes/cold_email_battle.ts +97 -0
  85. package/src/outcomes/content_creation_battle.ts +160 -0
  86. package/src/outcomes/f1_stem_opt_compliance.ts +61 -0
  87. package/src/outcomes/index.ts +107 -0
  88. package/src/outcomes/lead_gen_battle.test.ts +113 -0
  89. package/src/outcomes/lead_gen_battle.ts +99 -0
  90. package/src/outcomes/outcome.schema.property.test.ts +229 -0
  91. package/src/outcomes/outcome.schema.ts +187 -0
  92. package/src/outcomes/qualified_sales_interest.ts +118 -0
  93. package/src/outcomes/swarm_planner.property.test.ts +370 -0
  94. package/src/outcomes/swarm_planner.ts +96 -0
  95. package/src/outcomes/web_extraction.ts +234 -0
  96. package/src/runtime/README.md +220 -0
  97. package/src/runtime/agentRunner.test.ts +341 -0
  98. package/src/runtime/agentRunner.ts +746 -0
  99. package/src/runtime/claudeAdapter.ts +232 -0
  100. package/src/runtime/costTracker.ts +123 -0
  101. package/src/runtime/index.ts +34 -0
  102. package/src/runtime/modelAdapter.property.test.ts +305 -0
  103. package/src/runtime/modelAdapter.ts +144 -0
  104. package/src/runtime/openaiAdapter.ts +235 -0
  105. package/src/utils/README.md +122 -0
  106. package/src/utils/command-runner.ts +134 -0
  107. package/src/utils/cost-guard.ts +379 -0
  108. package/src/utils/errors.test.ts +290 -0
  109. package/src/utils/errors.ts +442 -0
  110. package/src/utils/index.ts +37 -0
  111. package/src/utils/logger.test.ts +361 -0
  112. package/src/utils/logger.ts +419 -0
  113. package/src/utils/output-parsers.ts +216 -0
@@ -0,0 +1,629 @@
1
+ /**
2
+ * Deterministic Logger
3
+ *
4
+ * Records every DOM interaction for verification and audit purposes.
5
+ * Provides cryptographic proofs of action sequences and supports
6
+ * diff analysis between competing agents.
7
+ *
8
+ * Requirements: 7.3, 8.1, 8.2, 8.3, 8.4, 8.5, 8.6
9
+ */
10
+
11
+ import { createHash } from 'node:crypto';
12
+ import type {
13
+ ActionLogEntry,
14
+ VerificationProof,
15
+ DiffAnalysis,
16
+ } from '../core/types.js';
17
+
18
+ // =============================================================================
19
+ // Types
20
+ // =============================================================================
21
+
22
+ /**
23
+ * Configuration for the Deterministic Logger.
24
+ */
25
+ export interface DeterministicLoggerConfig {
26
+ /** Maximum number of screenshots to store per session */
27
+ maxScreenshotsPerSession: number;
28
+ /** Whether to capture screenshots at key decision points */
29
+ captureScreenshots: boolean;
30
+ /** Hash algorithm to use for verification proofs */
31
+ hashAlgorithm: 'sha256' | 'sha384' | 'sha512';
32
+ }
33
+
34
+ /**
35
+ * Result of logging an action.
36
+ */
37
+ export interface LogActionResult {
38
+ /** Whether the action was logged successfully */
39
+ success: boolean;
40
+ /** The logged entry */
41
+ entry?: ActionLogEntry;
42
+ /** Error message if logging failed */
43
+ error?: string;
44
+ }
45
+
46
+ /**
47
+ * Result of generating a verification proof.
48
+ */
49
+ export interface GenerateProofResult {
50
+ /** Whether proof generation succeeded */
51
+ success: boolean;
52
+ /** The generated proof */
53
+ proof?: VerificationProof;
54
+ /** Error message if generation failed */
55
+ error?: string;
56
+ }
57
+
58
+ /**
59
+ * Result of comparing two agents' action logs.
60
+ */
61
+ export interface CompareAgentsResult {
62
+ /** Whether comparison succeeded */
63
+ success: boolean;
64
+ /** The diff analysis */
65
+ analysis?: DiffAnalysis;
66
+ /** Error message if comparison failed */
67
+ error?: string;
68
+ }
69
+
70
+ /**
71
+ * Key decision point types that trigger screenshot capture.
72
+ */
73
+ export type KeyDecisionPoint =
74
+ | 'form_submit'
75
+ | 'navigation'
76
+ | 'authentication'
77
+ | 'data_extraction'
78
+ | 'error_recovery'
79
+ | 'mfa_challenge';
80
+
81
+ /**
82
+ * Screenshot metadata.
83
+ */
84
+ export interface ScreenshotMetadata {
85
+ /** Timestamp when screenshot was taken */
86
+ timestamp: number;
87
+ /** Session ID */
88
+ sessionId: string;
89
+ /** Action index that triggered the screenshot */
90
+ actionIndex: number;
91
+ /** Type of decision point */
92
+ decisionPoint: KeyDecisionPoint;
93
+ /** Intent ID of the element involved */
94
+ intentId: string;
95
+ }
96
+
97
+ // =============================================================================
98
+ // Default Configuration
99
+ // =============================================================================
100
+
101
+ const DEFAULT_CONFIG: DeterministicLoggerConfig = {
102
+ maxScreenshotsPerSession: 50,
103
+ captureScreenshots: true,
104
+ hashAlgorithm: 'sha256',
105
+ };
106
+
107
+ // =============================================================================
108
+ // Deterministic Logger Implementation
109
+ // =============================================================================
110
+
111
+ /**
112
+ * Deterministic Logger for recording and verifying agent actions.
113
+ */
114
+ export class DeterministicLogger {
115
+ private config: DeterministicLoggerConfig;
116
+ private logs: Map<string, ActionLogEntry[]>;
117
+ private screenshots: Map<string, ScreenshotMetadata[]>;
118
+
119
+ constructor(config: Partial<DeterministicLoggerConfig> = {}) {
120
+ this.config = { ...DEFAULT_CONFIG, ...config };
121
+ this.logs = new Map();
122
+ this.screenshots = new Map();
123
+ }
124
+
125
+ // ===========================================================================
126
+ // Action Logging
127
+ // ===========================================================================
128
+
129
+ /**
130
+ * Log a DOM interaction action.
131
+ *
132
+ * Requirements: 7.3, 8.1, 8.3
133
+ */
134
+ logAction(entry: Omit<ActionLogEntry, 'timestamp'>): LogActionResult {
135
+ try {
136
+ // Validate required fields
137
+ if (!entry.sessionId || !entry.intentId || !entry.action) {
138
+ return {
139
+ success: false,
140
+ error: 'Missing required fields: sessionId, intentId, or action',
141
+ };
142
+ }
143
+
144
+ // Create the complete entry with timestamp
145
+ const completeEntry: ActionLogEntry = {
146
+ ...entry,
147
+ timestamp: Date.now(),
148
+ };
149
+
150
+ // Get or create the log for this session
151
+ const sessionLog = this.logs.get(entry.sessionId) || [];
152
+ sessionLog.push(completeEntry);
153
+ this.logs.set(entry.sessionId, sessionLog);
154
+
155
+ // Track screenshot if present
156
+ if (completeEntry.screenshot && this.config.captureScreenshots) {
157
+ this.trackScreenshot(completeEntry, sessionLog.length - 1);
158
+ }
159
+
160
+ return {
161
+ success: true,
162
+ entry: completeEntry,
163
+ };
164
+ } catch (error) {
165
+ return {
166
+ success: false,
167
+ error: error instanceof Error ? error.message : 'Unknown error',
168
+ };
169
+ }
170
+ }
171
+
172
+ /**
173
+ * Log an action with a screenshot at a key decision point.
174
+ *
175
+ * Requirements: 8.3
176
+ */
177
+ logActionWithScreenshot(
178
+ entry: Omit<ActionLogEntry, 'timestamp'>,
179
+ screenshot: string,
180
+ _decisionPoint: KeyDecisionPoint
181
+ ): LogActionResult {
182
+ // Check screenshot limit
183
+ const sessionScreenshots = this.screenshots.get(entry.sessionId) || [];
184
+ if (sessionScreenshots.length >= this.config.maxScreenshotsPerSession) {
185
+ // Log without screenshot if limit reached
186
+ return this.logAction(entry);
187
+ }
188
+
189
+ return this.logAction({
190
+ ...entry,
191
+ screenshot,
192
+ });
193
+ }
194
+
195
+ /**
196
+ * Track screenshot metadata.
197
+ */
198
+ private trackScreenshot(entry: ActionLogEntry, actionIndex: number): void {
199
+ const metadata: ScreenshotMetadata = {
200
+ timestamp: entry.timestamp,
201
+ sessionId: entry.sessionId,
202
+ actionIndex,
203
+ decisionPoint: this.inferDecisionPoint(entry),
204
+ intentId: entry.intentId,
205
+ };
206
+
207
+ const sessionScreenshots = this.screenshots.get(entry.sessionId) || [];
208
+ sessionScreenshots.push(metadata);
209
+ this.screenshots.set(entry.sessionId, sessionScreenshots);
210
+ }
211
+
212
+ /**
213
+ * Infer the decision point type from an action entry.
214
+ */
215
+ private inferDecisionPoint(entry: ActionLogEntry): KeyDecisionPoint {
216
+ const intentLower = entry.intentId.toLowerCase();
217
+
218
+ if (intentLower.includes('submit') || intentLower.includes('form')) {
219
+ return 'form_submit';
220
+ }
221
+ if (entry.action === 'navigate' || intentLower.includes('nav')) {
222
+ return 'navigation';
223
+ }
224
+ if (intentLower.includes('login') || intentLower.includes('auth')) {
225
+ return 'authentication';
226
+ }
227
+ if (entry.action === 'extract' || intentLower.includes('data')) {
228
+ return 'data_extraction';
229
+ }
230
+ if (intentLower.includes('mfa') || intentLower.includes('2fa')) {
231
+ return 'mfa_challenge';
232
+ }
233
+ if (entry.result === 'failure') {
234
+ return 'error_recovery';
235
+ }
236
+
237
+ return 'data_extraction'; // Default
238
+ }
239
+
240
+ /**
241
+ * Get the action log for a session.
242
+ *
243
+ * Requirements: 7.3, 8.1
244
+ */
245
+ getLog(sessionId: string): ActionLogEntry[] {
246
+ return this.logs.get(sessionId) || [];
247
+ }
248
+
249
+ /**
250
+ * Get all screenshots for a session.
251
+ */
252
+ getScreenshots(sessionId: string): string[] {
253
+ const log = this.getLog(sessionId);
254
+ return log
255
+ .filter((entry) => entry.screenshot)
256
+ .map((entry) => entry.screenshot!);
257
+ }
258
+
259
+ /**
260
+ * Get screenshot metadata for a session.
261
+ */
262
+ getScreenshotMetadata(sessionId: string): ScreenshotMetadata[] {
263
+ return this.screenshots.get(sessionId) || [];
264
+ }
265
+
266
+ // ===========================================================================
267
+ // Cryptographic Proof Generation
268
+ // ===========================================================================
269
+
270
+ /**
271
+ * Generate a cryptographic proof of the action sequence.
272
+ *
273
+ * Requirements: 8.2
274
+ */
275
+ generateProof(sessionId: string, claimedResult: unknown): GenerateProofResult {
276
+ try {
277
+ const log = this.getLog(sessionId);
278
+
279
+ if (log.length === 0) {
280
+ return {
281
+ success: false,
282
+ error: `No action log found for session: ${sessionId}`,
283
+ };
284
+ }
285
+
286
+ // Generate hash of the action sequence
287
+ const hash = this.hashActionSequence(log);
288
+
289
+ // Get screenshots at key decision points
290
+ const screenshots = this.getScreenshots(sessionId);
291
+
292
+ // Check if claimed result matches actions
293
+ const resultMatchesActions = this.validateResultAgainstActions(
294
+ claimedResult,
295
+ log
296
+ );
297
+
298
+ const proof: VerificationProof = {
299
+ sessionId,
300
+ actionCount: log.length,
301
+ hash,
302
+ screenshots,
303
+ claimedResult,
304
+ resultMatchesActions,
305
+ };
306
+
307
+ return {
308
+ success: true,
309
+ proof,
310
+ };
311
+ } catch (error) {
312
+ return {
313
+ success: false,
314
+ error: error instanceof Error ? error.message : 'Unknown error',
315
+ };
316
+ }
317
+ }
318
+
319
+ /**
320
+ * Hash an action sequence to create a cryptographic proof.
321
+ *
322
+ * Requirements: 8.2
323
+ */
324
+ hashActionSequence(log: ActionLogEntry[]): string {
325
+ // Create a deterministic string representation of the action sequence
326
+ const serialized = log
327
+ .map((entry) => this.serializeActionEntry(entry))
328
+ .join('|');
329
+
330
+ // Generate hash
331
+ const hash = createHash(this.config.hashAlgorithm);
332
+ hash.update(serialized);
333
+ return hash.digest('hex');
334
+ }
335
+
336
+ /**
337
+ * Serialize an action entry for hashing.
338
+ * Excludes screenshots and timestamps to keep hash deterministic and focused on actions.
339
+ */
340
+ private serializeActionEntry(entry: ActionLogEntry): string {
341
+ return JSON.stringify({
342
+ // Note: timestamp excluded for deterministic hashing
343
+ sessionId: entry.sessionId,
344
+ action: entry.action,
345
+ intentId: entry.intentId,
346
+ value: entry.value,
347
+ result: entry.result,
348
+ });
349
+ }
350
+
351
+ /**
352
+ * Validate that a claimed result can be derived from the action log.
353
+ *
354
+ * Requirements: 8.5
355
+ */
356
+ validateResultAgainstActions(
357
+ claimedResult: unknown,
358
+ log: ActionLogEntry[]
359
+ ): boolean {
360
+ // If no result claimed, it's valid (no claim to verify)
361
+ if (claimedResult === null || claimedResult === undefined) {
362
+ return true;
363
+ }
364
+
365
+ // If no actions, result cannot be derived
366
+ if (log.length === 0) {
367
+ return false;
368
+ }
369
+
370
+ // Check for extract actions that could produce the result
371
+ const extractActions = log.filter((entry) => entry.action === 'extract');
372
+
373
+ // If claiming data but no extract actions, it's a hallucination
374
+ if (typeof claimedResult === 'object' && extractActions.length === 0) {
375
+ // Check if result contains data fields
376
+ const resultObj = claimedResult as Record<string, unknown>;
377
+ const hasDataFields =
378
+ 'data' in resultObj ||
379
+ Object.keys(resultObj).some(
380
+ (key) =>
381
+ !['metadata', 'verificationHash', 'confidence'].includes(key)
382
+ );
383
+
384
+ if (hasDataFields) {
385
+ return false;
386
+ }
387
+ }
388
+
389
+ // Check for failed actions that would prevent result
390
+ const failedActions = log.filter((entry) => entry.result === 'failure');
391
+ const successfulActions = log.filter((entry) => entry.result === 'success');
392
+
393
+ // If all actions failed, result cannot be valid
394
+ if (successfulActions.length === 0 && failedActions.length > 0) {
395
+ return false;
396
+ }
397
+
398
+ // Basic validation passed
399
+ return true;
400
+ }
401
+
402
+ /**
403
+ * Verify that a proof hash matches the current action log.
404
+ *
405
+ * Requirements: 8.2
406
+ */
407
+ verifyProof(proof: VerificationProof): boolean {
408
+ const log = this.getLog(proof.sessionId);
409
+
410
+ if (log.length !== proof.actionCount) {
411
+ return false;
412
+ }
413
+
414
+ const currentHash = this.hashActionSequence(log);
415
+ return currentHash === proof.hash;
416
+ }
417
+
418
+ // ===========================================================================
419
+ // Diff Analysis
420
+ // ===========================================================================
421
+
422
+ /**
423
+ * Compare action logs of two agents to find divergence.
424
+ *
425
+ * Requirements: 8.4
426
+ */
427
+ compareAgents(agentASessionId: string, agentBSessionId: string): CompareAgentsResult {
428
+ try {
429
+ const logA = this.getLog(agentASessionId);
430
+ const logB = this.getLog(agentBSessionId);
431
+
432
+ if (logA.length === 0 && logB.length === 0) {
433
+ return {
434
+ success: false,
435
+ error: 'Both agents have empty action logs',
436
+ };
437
+ }
438
+
439
+ // Find divergence point
440
+ const divergencePoint = this.findDivergencePoint(logA, logB);
441
+
442
+ // Get paths from divergence
443
+ const agentAPath = logA.slice(divergencePoint);
444
+ const agentBPath = logB.slice(divergencePoint);
445
+
446
+ // Determine recommendation
447
+ const recommendation = this.determineRecommendation(
448
+ logA,
449
+ logB,
450
+ divergencePoint
451
+ );
452
+
453
+ const analysis: DiffAnalysis = {
454
+ divergencePoint,
455
+ agentAPath,
456
+ agentBPath,
457
+ recommendation,
458
+ };
459
+
460
+ return {
461
+ success: true,
462
+ analysis,
463
+ };
464
+ } catch (error) {
465
+ return {
466
+ success: false,
467
+ error: error instanceof Error ? error.message : 'Unknown error',
468
+ };
469
+ }
470
+ }
471
+
472
+ /**
473
+ * Find the index where two action logs diverge.
474
+ */
475
+ private findDivergencePoint(
476
+ logA: ActionLogEntry[],
477
+ logB: ActionLogEntry[]
478
+ ): number {
479
+ const minLength = Math.min(logA.length, logB.length);
480
+
481
+ for (let i = 0; i < minLength; i++) {
482
+ if (!this.actionsMatch(logA[i], logB[i])) {
483
+ return i;
484
+ }
485
+ }
486
+
487
+ // If one log is longer, divergence is at the end of the shorter one
488
+ if (logA.length !== logB.length) {
489
+ return minLength;
490
+ }
491
+
492
+ // Logs are identical
493
+ return logA.length;
494
+ }
495
+
496
+ /**
497
+ * Check if two action entries match (ignoring timestamps).
498
+ */
499
+ private actionsMatch(a: ActionLogEntry, b: ActionLogEntry): boolean {
500
+ return (
501
+ a.action === b.action &&
502
+ a.intentId === b.intentId &&
503
+ a.value === b.value &&
504
+ a.result === b.result
505
+ );
506
+ }
507
+
508
+ /**
509
+ * Determine recommendation based on action log analysis.
510
+ */
511
+ private determineRecommendation(
512
+ logA: ActionLogEntry[],
513
+ logB: ActionLogEntry[],
514
+ divergencePoint: number
515
+ ): 'agent_a' | 'agent_b' | 'tie' | 'both_invalid' {
516
+ // Count successful actions after divergence
517
+ const successA = logA
518
+ .slice(divergencePoint)
519
+ .filter((e) => e.result === 'success').length;
520
+ const successB = logB
521
+ .slice(divergencePoint)
522
+ .filter((e) => e.result === 'success').length;
523
+
524
+ // Count failures
525
+ const failuresA = logA.filter((e) => e.result === 'failure').length;
526
+ const failuresB = logB.filter((e) => e.result === 'failure').length;
527
+
528
+ // If both have only failures, both invalid
529
+ if (
530
+ logA.every((e) => e.result === 'failure') &&
531
+ logB.every((e) => e.result === 'failure')
532
+ ) {
533
+ return 'both_invalid';
534
+ }
535
+
536
+ // If one has all failures, the other wins
537
+ if (logA.every((e) => e.result === 'failure')) {
538
+ return 'agent_b';
539
+ }
540
+ if (logB.every((e) => e.result === 'failure')) {
541
+ return 'agent_a';
542
+ }
543
+
544
+ // Compare success rates
545
+ if (successA > successB) {
546
+ return 'agent_a';
547
+ }
548
+ if (successB > successA) {
549
+ return 'agent_b';
550
+ }
551
+
552
+ // If equal successes, prefer fewer failures
553
+ if (failuresA < failuresB) {
554
+ return 'agent_a';
555
+ }
556
+ if (failuresB < failuresA) {
557
+ return 'agent_b';
558
+ }
559
+
560
+ // Truly tied
561
+ return 'tie';
562
+ }
563
+
564
+ // ===========================================================================
565
+ // Utility Methods
566
+ // ===========================================================================
567
+
568
+ /**
569
+ * Clear the log for a session.
570
+ */
571
+ clearLog(sessionId: string): void {
572
+ this.logs.delete(sessionId);
573
+ this.screenshots.delete(sessionId);
574
+ }
575
+
576
+ /**
577
+ * Clear all logs.
578
+ */
579
+ clearAll(): void {
580
+ this.logs.clear();
581
+ this.screenshots.clear();
582
+ }
583
+
584
+ /**
585
+ * Get the total number of actions logged across all sessions.
586
+ */
587
+ getTotalActionCount(): number {
588
+ let total = 0;
589
+ for (const log of this.logs.values()) {
590
+ total += log.length;
591
+ }
592
+ return total;
593
+ }
594
+
595
+ /**
596
+ * Get all session IDs with logs.
597
+ */
598
+ getSessionIds(): string[] {
599
+ return Array.from(this.logs.keys());
600
+ }
601
+
602
+ /**
603
+ * Check if a session has any logged actions.
604
+ */
605
+ hasLog(sessionId: string): boolean {
606
+ const log = this.logs.get(sessionId);
607
+ return log !== undefined && log.length > 0;
608
+ }
609
+
610
+ /**
611
+ * Get the configuration.
612
+ */
613
+ getConfig(): DeterministicLoggerConfig {
614
+ return { ...this.config };
615
+ }
616
+ }
617
+
618
+ // =============================================================================
619
+ // Factory Function
620
+ // =============================================================================
621
+
622
+ /**
623
+ * Create a new Deterministic Logger instance.
624
+ */
625
+ export function createDeterministicLogger(
626
+ config: Partial<DeterministicLoggerConfig> = {}
627
+ ): DeterministicLogger {
628
+ return new DeterministicLogger(config);
629
+ }