outcome-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/README.md +261 -0
  2. package/package.json +95 -0
  3. package/src/agents/README.md +139 -0
  4. package/src/agents/adapters/anthropic.adapter.ts +166 -0
  5. package/src/agents/adapters/dalle.adapter.ts +145 -0
  6. package/src/agents/adapters/gemini.adapter.ts +134 -0
  7. package/src/agents/adapters/imagen.adapter.ts +106 -0
  8. package/src/agents/adapters/nano-banana.adapter.ts +129 -0
  9. package/src/agents/adapters/openai.adapter.ts +165 -0
  10. package/src/agents/adapters/veo.adapter.ts +130 -0
  11. package/src/agents/agent.schema.property.test.ts +379 -0
  12. package/src/agents/agent.schema.test.ts +148 -0
  13. package/src/agents/agent.schema.ts +263 -0
  14. package/src/agents/index.ts +60 -0
  15. package/src/agents/registered-agent.schema.ts +356 -0
  16. package/src/agents/registry.ts +97 -0
  17. package/src/agents/tournament-configs.property.test.ts +266 -0
  18. package/src/cli/README.md +145 -0
  19. package/src/cli/commands/define.ts +79 -0
  20. package/src/cli/commands/list.ts +46 -0
  21. package/src/cli/commands/logs.ts +83 -0
  22. package/src/cli/commands/run.ts +416 -0
  23. package/src/cli/commands/verify.ts +110 -0
  24. package/src/cli/index.ts +81 -0
  25. package/src/config/README.md +128 -0
  26. package/src/config/env.ts +262 -0
  27. package/src/config/index.ts +19 -0
  28. package/src/eval/README.md +318 -0
  29. package/src/eval/ai-judge.test.ts +435 -0
  30. package/src/eval/ai-judge.ts +368 -0
  31. package/src/eval/code-validators.ts +414 -0
  32. package/src/eval/evaluateOutcome.property.test.ts +1174 -0
  33. package/src/eval/evaluateOutcome.ts +591 -0
  34. package/src/eval/immigration-validators.ts +122 -0
  35. package/src/eval/index.ts +90 -0
  36. package/src/eval/judge-cache.ts +402 -0
  37. package/src/eval/tournament-validators.property.test.ts +439 -0
  38. package/src/eval/validators.property.test.ts +1118 -0
  39. package/src/eval/validators.ts +1199 -0
  40. package/src/eval/weighted-scorer.ts +285 -0
  41. package/src/index.ts +17 -0
  42. package/src/league/README.md +188 -0
  43. package/src/league/health-check.ts +353 -0
  44. package/src/league/index.ts +93 -0
  45. package/src/league/killAgent.ts +151 -0
  46. package/src/league/league.test.ts +1151 -0
  47. package/src/league/runLeague.ts +843 -0
  48. package/src/league/scoreAgent.ts +175 -0
  49. package/src/modules/omnibridge/__tests__/.gitkeep +1 -0
  50. package/src/modules/omnibridge/__tests__/auth-tunnel.property.test.ts +524 -0
  51. package/src/modules/omnibridge/__tests__/deterministic-logger.property.test.ts +965 -0
  52. package/src/modules/omnibridge/__tests__/ghost-api.property.test.ts +461 -0
  53. package/src/modules/omnibridge/__tests__/omnibridge-integration.test.ts +542 -0
  54. package/src/modules/omnibridge/__tests__/parallel-executor.property.test.ts +671 -0
  55. package/src/modules/omnibridge/__tests__/semantic-normalizer.property.test.ts +521 -0
  56. package/src/modules/omnibridge/__tests__/semantic-normalizer.test.ts +254 -0
  57. package/src/modules/omnibridge/__tests__/session-vault.property.test.ts +367 -0
  58. package/src/modules/omnibridge/__tests__/shadow-session.property.test.ts +523 -0
  59. package/src/modules/omnibridge/__tests__/triangulation-engine.property.test.ts +292 -0
  60. package/src/modules/omnibridge/__tests__/verification-engine.property.test.ts +769 -0
  61. package/src/modules/omnibridge/api/.gitkeep +1 -0
  62. package/src/modules/omnibridge/api/ghost-api.ts +1087 -0
  63. package/src/modules/omnibridge/auth/.gitkeep +1 -0
  64. package/src/modules/omnibridge/auth/auth-tunnel.ts +843 -0
  65. package/src/modules/omnibridge/auth/session-vault.ts +577 -0
  66. package/src/modules/omnibridge/core/.gitkeep +1 -0
  67. package/src/modules/omnibridge/core/semantic-normalizer.ts +702 -0
  68. package/src/modules/omnibridge/core/triangulation-engine.ts +530 -0
  69. package/src/modules/omnibridge/core/types.ts +610 -0
  70. package/src/modules/omnibridge/execution/.gitkeep +1 -0
  71. package/src/modules/omnibridge/execution/deterministic-logger.ts +629 -0
  72. package/src/modules/omnibridge/execution/parallel-executor.ts +542 -0
  73. package/src/modules/omnibridge/execution/shadow-session.ts +794 -0
  74. package/src/modules/omnibridge/index.ts +212 -0
  75. package/src/modules/omnibridge/omnibridge.ts +510 -0
  76. package/src/modules/omnibridge/verification/.gitkeep +1 -0
  77. package/src/modules/omnibridge/verification/verification-engine.ts +783 -0
  78. package/src/outcomes/README.md +75 -0
  79. package/src/outcomes/acquire-pilot-customer.ts +297 -0
  80. package/src/outcomes/code-delivery-outcomes.ts +89 -0
  81. package/src/outcomes/code-outcomes.ts +256 -0
  82. package/src/outcomes/code_review_battle.test.ts +135 -0
  83. package/src/outcomes/code_review_battle.ts +135 -0
  84. package/src/outcomes/cold_email_battle.ts +97 -0
  85. package/src/outcomes/content_creation_battle.ts +160 -0
  86. package/src/outcomes/f1_stem_opt_compliance.ts +61 -0
  87. package/src/outcomes/index.ts +107 -0
  88. package/src/outcomes/lead_gen_battle.test.ts +113 -0
  89. package/src/outcomes/lead_gen_battle.ts +99 -0
  90. package/src/outcomes/outcome.schema.property.test.ts +229 -0
  91. package/src/outcomes/outcome.schema.ts +187 -0
  92. package/src/outcomes/qualified_sales_interest.ts +118 -0
  93. package/src/outcomes/swarm_planner.property.test.ts +370 -0
  94. package/src/outcomes/swarm_planner.ts +96 -0
  95. package/src/outcomes/web_extraction.ts +234 -0
  96. package/src/runtime/README.md +220 -0
  97. package/src/runtime/agentRunner.test.ts +341 -0
  98. package/src/runtime/agentRunner.ts +746 -0
  99. package/src/runtime/claudeAdapter.ts +232 -0
  100. package/src/runtime/costTracker.ts +123 -0
  101. package/src/runtime/index.ts +34 -0
  102. package/src/runtime/modelAdapter.property.test.ts +305 -0
  103. package/src/runtime/modelAdapter.ts +144 -0
  104. package/src/runtime/openaiAdapter.ts +235 -0
  105. package/src/utils/README.md +122 -0
  106. package/src/utils/command-runner.ts +134 -0
  107. package/src/utils/cost-guard.ts +379 -0
  108. package/src/utils/errors.test.ts +290 -0
  109. package/src/utils/errors.ts +442 -0
  110. package/src/utils/index.ts +37 -0
  111. package/src/utils/logger.test.ts +361 -0
  112. package/src/utils/logger.ts +419 -0
  113. package/src/utils/output-parsers.ts +216 -0
@@ -0,0 +1,361 @@
1
+ /**
2
+ * Logger Tests
3
+ *
4
+ * Tests for the structured logging utility including:
5
+ * - Log entry creation and storage
6
+ * - Log retrieval and filtering
7
+ * - CLI formatting
8
+ * - Property tests for log completeness
9
+ *
10
+ * @module utils/logger.test
11
+ */
12
+
13
+ import { describe, it, expect, beforeEach } from 'vitest';
14
+ import * as fc from 'fast-check';
15
+ import {
16
+ log,
17
+ getLogs,
18
+ getAgentLogs,
19
+ clearLogs,
20
+ clearAllLogs,
21
+ getTotalLogCount,
22
+ formatLogsForCli,
23
+ logSuccess,
24
+ logFailure,
25
+ logPending,
26
+ type LogEntry,
27
+ type LogEntryInput,
28
+ type LogResult,
29
+ } from './logger.js';
30
+
31
+ describe('Logger', () => {
32
+ // Clear logs before each test to ensure isolation
33
+ beforeEach(() => {
34
+ clearAllLogs();
35
+ });
36
+
37
+ describe('log', () => {
38
+ it('should create a log entry with timestamp', () => {
39
+ const entry = log({
40
+ agentId: 'test-agent',
41
+ outcomeId: 'test-outcome',
42
+ promptVersion: 'v1.0.0',
43
+ tokensSpent: 100,
44
+ result: 'SUCCESS',
45
+ });
46
+
47
+ expect(entry.agentId).toBe('test-agent');
48
+ expect(entry.outcomeId).toBe('test-outcome');
49
+ expect(entry.promptVersion).toBe('v1.0.0');
50
+ expect(entry.tokensSpent).toBe(100);
51
+ expect(entry.result).toBe('SUCCESS');
52
+ expect(entry.timestamp).toBeDefined();
53
+ expect(new Date(entry.timestamp).getTime()).not.toBeNaN();
54
+ });
55
+
56
+ it('should store the log entry for retrieval', () => {
57
+ log({
58
+ agentId: 'agent-1',
59
+ outcomeId: 'outcome-1',
60
+ promptVersion: 'v1.0.0',
61
+ tokensSpent: 50,
62
+ result: 'PENDING',
63
+ });
64
+
65
+ const logs = getLogs('outcome-1');
66
+ expect(logs).toHaveLength(1);
67
+ expect(logs[0].agentId).toBe('agent-1');
68
+ });
69
+
70
+ it('should include failureReason when result is FAILURE', () => {
71
+ const entry = log({
72
+ agentId: 'agent-1',
73
+ outcomeId: 'outcome-1',
74
+ promptVersion: 'v1.0.0',
75
+ tokensSpent: 100,
76
+ result: 'FAILURE',
77
+ failureReason: 'Company too small',
78
+ });
79
+
80
+ expect(entry.result).toBe('FAILURE');
81
+ expect(entry.failureReason).toBe('Company too small');
82
+ });
83
+
84
+ it('should include metadata when provided', () => {
85
+ const entry = log({
86
+ agentId: 'agent-1',
87
+ outcomeId: 'outcome-1',
88
+ promptVersion: 'v1.0.0',
89
+ tokensSpent: 100,
90
+ result: 'SUCCESS',
91
+ metadata: { attemptNumber: 3, leadId: 'lead-123' },
92
+ });
93
+
94
+ expect(entry.metadata).toEqual({ attemptNumber: 3, leadId: 'lead-123' });
95
+ });
96
+ });
97
+
98
+ describe('getLogs', () => {
99
+ it('should return empty array for outcome with no logs', () => {
100
+ const logs = getLogs('nonexistent-outcome');
101
+ expect(logs).toEqual([]);
102
+ });
103
+
104
+ it('should return all logs for an outcome in order', () => {
105
+ log({ agentId: 'a1', outcomeId: 'o1', promptVersion: 'v1', tokensSpent: 10, result: 'PENDING' });
106
+ log({ agentId: 'a2', outcomeId: 'o1', promptVersion: 'v1', tokensSpent: 20, result: 'SUCCESS' });
107
+ log({ agentId: 'a1', outcomeId: 'o1', promptVersion: 'v1', tokensSpent: 30, result: 'FAILURE', failureReason: 'test' });
108
+
109
+ const logs = getLogs('o1');
110
+ expect(logs).toHaveLength(3);
111
+ expect(logs[0].agentId).toBe('a1');
112
+ expect(logs[1].agentId).toBe('a2');
113
+ expect(logs[2].agentId).toBe('a1');
114
+ });
115
+
116
+ it('should isolate logs by outcome', () => {
117
+ log({ agentId: 'a1', outcomeId: 'outcome-A', promptVersion: 'v1', tokensSpent: 10, result: 'SUCCESS' });
118
+ log({ agentId: 'a1', outcomeId: 'outcome-B', promptVersion: 'v1', tokensSpent: 20, result: 'SUCCESS' });
119
+
120
+ expect(getLogs('outcome-A')).toHaveLength(1);
121
+ expect(getLogs('outcome-B')).toHaveLength(1);
122
+ });
123
+ });
124
+
125
+ describe('getAgentLogs', () => {
126
+ it('should filter logs by agent ID', () => {
127
+ log({ agentId: 'agent-1', outcomeId: 'o1', promptVersion: 'v1', tokensSpent: 10, result: 'PENDING' });
128
+ log({ agentId: 'agent-2', outcomeId: 'o1', promptVersion: 'v1', tokensSpent: 20, result: 'SUCCESS' });
129
+ log({ agentId: 'agent-1', outcomeId: 'o1', promptVersion: 'v1', tokensSpent: 30, result: 'SUCCESS' });
130
+
131
+ const agent1Logs = getAgentLogs('o1', 'agent-1');
132
+ expect(agent1Logs).toHaveLength(2);
133
+ expect(agent1Logs.every(l => l.agentId === 'agent-1')).toBe(true);
134
+
135
+ const agent2Logs = getAgentLogs('o1', 'agent-2');
136
+ expect(agent2Logs).toHaveLength(1);
137
+ });
138
+ });
139
+
140
+ describe('clearLogs', () => {
141
+ it('should clear logs for specific outcome only', () => {
142
+ log({ agentId: 'a1', outcomeId: 'o1', promptVersion: 'v1', tokensSpent: 10, result: 'SUCCESS' });
143
+ log({ agentId: 'a1', outcomeId: 'o2', promptVersion: 'v1', tokensSpent: 20, result: 'SUCCESS' });
144
+
145
+ clearLogs('o1');
146
+
147
+ expect(getLogs('o1')).toHaveLength(0);
148
+ expect(getLogs('o2')).toHaveLength(1);
149
+ });
150
+ });
151
+
152
+ describe('clearAllLogs', () => {
153
+ it('should clear all logs', () => {
154
+ log({ agentId: 'a1', outcomeId: 'o1', promptVersion: 'v1', tokensSpent: 10, result: 'SUCCESS' });
155
+ log({ agentId: 'a1', outcomeId: 'o2', promptVersion: 'v1', tokensSpent: 20, result: 'SUCCESS' });
156
+
157
+ clearAllLogs();
158
+
159
+ expect(getLogs('o1')).toHaveLength(0);
160
+ expect(getLogs('o2')).toHaveLength(0);
161
+ expect(getTotalLogCount()).toBe(0);
162
+ });
163
+ });
164
+
165
+ describe('getTotalLogCount', () => {
166
+ it('should return total count across all outcomes', () => {
167
+ log({ agentId: 'a1', outcomeId: 'o1', promptVersion: 'v1', tokensSpent: 10, result: 'SUCCESS' });
168
+ log({ agentId: 'a1', outcomeId: 'o1', promptVersion: 'v1', tokensSpent: 20, result: 'SUCCESS' });
169
+ log({ agentId: 'a1', outcomeId: 'o2', promptVersion: 'v1', tokensSpent: 30, result: 'SUCCESS' });
170
+
171
+ expect(getTotalLogCount()).toBe(3);
172
+ });
173
+ });
174
+
175
+ describe('formatLogsForCli', () => {
176
+ it('should return message for empty logs', () => {
177
+ const output = formatLogsForCli('empty-outcome');
178
+ expect(output).toContain('No logs found');
179
+ });
180
+
181
+ it('should format logs with header and entries', () => {
182
+ log({ agentId: 'a1', outcomeId: 'o1', promptVersion: 'v1', tokensSpent: 100, result: 'SUCCESS' });
183
+ log({ agentId: 'a2', outcomeId: 'o1', promptVersion: 'v1', tokensSpent: 200, result: 'FAILURE', failureReason: 'Test failure' });
184
+
185
+ const output = formatLogsForCli('o1');
186
+ expect(output).toContain('o1');
187
+ expect(output).toContain('2 entries');
188
+ expect(output).toContain('a1');
189
+ expect(output).toContain('a2');
190
+ expect(output).toContain('SUCCESS');
191
+ expect(output).toContain('FAILURE');
192
+ expect(output).toContain('Test failure');
193
+ });
194
+ });
195
+
196
+ describe('convenience functions', () => {
197
+ it('logSuccess should create SUCCESS entry', () => {
198
+ const entry = logSuccess('agent-1', 'outcome-1', 'v1.0.0', 500);
199
+ expect(entry.result).toBe('SUCCESS');
200
+ expect(entry.failureReason).toBeUndefined();
201
+ });
202
+
203
+ it('logFailure should create FAILURE entry with reason', () => {
204
+ const entry = logFailure('agent-1', 'outcome-1', 'v1.0.0', 300, 'Test failure');
205
+ expect(entry.result).toBe('FAILURE');
206
+ expect(entry.failureReason).toBe('Test failure');
207
+ });
208
+
209
+ it('logPending should create PENDING entry', () => {
210
+ const entry = logPending('agent-1', 'outcome-1', 'v1.0.0', 100);
211
+ expect(entry.result).toBe('PENDING');
212
+ });
213
+ });
214
+ });
215
+
216
+ describe('Logger Property Tests', () => {
217
+ beforeEach(() => {
218
+ clearAllLogs();
219
+ });
220
+
221
+ /**
222
+ * **Feature: earnd-bounty-engine, Property 12: Log Entry Completeness**
223
+ * For any agent attempt, the log entry SHALL contain agentId, outcomeId,
224
+ * promptVersion, tokensSpent, result, and failureReason (if result is FAILURE).
225
+ * **Validates: Requirements 6.1**
226
+ */
227
+ it('Property 12: Log Entry Completeness', () => {
228
+ fc.assert(
229
+ fc.property(
230
+ fc.record({
231
+ agentId: fc.string({ minLength: 1 }),
232
+ outcomeId: fc.string({ minLength: 1 }),
233
+ promptVersion: fc.string({ minLength: 1 }),
234
+ tokensSpent: fc.nat(),
235
+ result: fc.constantFrom('SUCCESS', 'FAILURE', 'PENDING') as fc.Arbitrary<LogResult>,
236
+ failureReason: fc.option(fc.string({ minLength: 1 }), { nil: undefined }),
237
+ }),
238
+ (input) => {
239
+ clearAllLogs();
240
+
241
+ const entry = log({
242
+ agentId: input.agentId,
243
+ outcomeId: input.outcomeId,
244
+ promptVersion: input.promptVersion,
245
+ tokensSpent: input.tokensSpent,
246
+ result: input.result,
247
+ failureReason: input.failureReason,
248
+ });
249
+
250
+ // Verify all required fields are present
251
+ expect(entry.agentId).toBe(input.agentId);
252
+ expect(entry.outcomeId).toBe(input.outcomeId);
253
+ expect(entry.promptVersion).toBe(input.promptVersion);
254
+ expect(entry.tokensSpent).toBe(input.tokensSpent);
255
+ expect(entry.result).toBe(input.result);
256
+ expect(entry.timestamp).toBeDefined();
257
+ expect(typeof entry.timestamp).toBe('string');
258
+
259
+ // Verify timestamp is valid ISO string
260
+ const parsedTime = new Date(entry.timestamp).getTime();
261
+ expect(Number.isNaN(parsedTime)).toBe(false);
262
+
263
+ // If failure, reason should be included
264
+ if (input.result === 'FAILURE' && input.failureReason) {
265
+ expect(entry.failureReason).toBe(input.failureReason);
266
+ }
267
+ }
268
+ ),
269
+ { numRuns: 100 }
270
+ );
271
+ });
272
+
273
+ /**
274
+ * **Feature: earnd-bounty-engine, Property 13: Log Persistence**
275
+ * For any completed agent run with N attempts, querying logs SHALL return
276
+ * exactly N log entries for that agent.
277
+ * **Validates: Requirements 6.3**
278
+ */
279
+ it('Property 13: Log Persistence', () => {
280
+ fc.assert(
281
+ fc.property(
282
+ fc.nat({ max: 20 }), // Number of attempts (0-20)
283
+ fc.string({ minLength: 1 }), // agentId
284
+ fc.string({ minLength: 1 }), // outcomeId
285
+ (attemptCount, agentId, outcomeId) => {
286
+ clearAllLogs();
287
+
288
+ // Log N attempts
289
+ for (let i = 0; i < attemptCount; i++) {
290
+ log({
291
+ agentId,
292
+ outcomeId,
293
+ promptVersion: `v1.${i}`,
294
+ tokensSpent: 100 + i * 10,
295
+ result: i === attemptCount - 1 ? 'SUCCESS' : 'PENDING',
296
+ });
297
+ }
298
+
299
+ // Verify exactly N entries exist
300
+ const logs = getAgentLogs(outcomeId, agentId);
301
+ expect(logs.length).toBe(attemptCount);
302
+
303
+ // Verify all entries belong to the correct agent
304
+ for (const log of logs) {
305
+ expect(log.agentId).toBe(agentId);
306
+ expect(log.outcomeId).toBe(outcomeId);
307
+ }
308
+ }
309
+ ),
310
+ { numRuns: 100 }
311
+ );
312
+ });
313
+
314
+ /**
315
+ * Property: Log isolation between outcomes
316
+ * Logs for different outcomes SHALL be isolated from each other.
317
+ */
318
+ it('Property: Log isolation between outcomes', () => {
319
+ fc.assert(
320
+ fc.property(
321
+ fc.string({ minLength: 1 }), // outcomeId1
322
+ fc.string({ minLength: 1 }), // outcomeId2
323
+ fc.nat({ max: 10 }), // count1
324
+ fc.nat({ max: 10 }), // count2
325
+ (outcomeId1, outcomeId2, count1, count2) => {
326
+ // Skip if outcomes are the same
327
+ if (outcomeId1 === outcomeId2) return;
328
+
329
+ clearAllLogs();
330
+
331
+ // Log to first outcome
332
+ for (let i = 0; i < count1; i++) {
333
+ log({
334
+ agentId: `agent-${i}`,
335
+ outcomeId: outcomeId1,
336
+ promptVersion: 'v1',
337
+ tokensSpent: 100,
338
+ result: 'SUCCESS',
339
+ });
340
+ }
341
+
342
+ // Log to second outcome
343
+ for (let i = 0; i < count2; i++) {
344
+ log({
345
+ agentId: `agent-${i}`,
346
+ outcomeId: outcomeId2,
347
+ promptVersion: 'v1',
348
+ tokensSpent: 100,
349
+ result: 'SUCCESS',
350
+ });
351
+ }
352
+
353
+ // Verify isolation
354
+ expect(getLogs(outcomeId1).length).toBe(count1);
355
+ expect(getLogs(outcomeId2).length).toBe(count2);
356
+ }
357
+ ),
358
+ { numRuns: 100 }
359
+ );
360
+ });
361
+ });