outcome-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/README.md +261 -0
  2. package/package.json +95 -0
  3. package/src/agents/README.md +139 -0
  4. package/src/agents/adapters/anthropic.adapter.ts +166 -0
  5. package/src/agents/adapters/dalle.adapter.ts +145 -0
  6. package/src/agents/adapters/gemini.adapter.ts +134 -0
  7. package/src/agents/adapters/imagen.adapter.ts +106 -0
  8. package/src/agents/adapters/nano-banana.adapter.ts +129 -0
  9. package/src/agents/adapters/openai.adapter.ts +165 -0
  10. package/src/agents/adapters/veo.adapter.ts +130 -0
  11. package/src/agents/agent.schema.property.test.ts +379 -0
  12. package/src/agents/agent.schema.test.ts +148 -0
  13. package/src/agents/agent.schema.ts +263 -0
  14. package/src/agents/index.ts +60 -0
  15. package/src/agents/registered-agent.schema.ts +356 -0
  16. package/src/agents/registry.ts +97 -0
  17. package/src/agents/tournament-configs.property.test.ts +266 -0
  18. package/src/cli/README.md +145 -0
  19. package/src/cli/commands/define.ts +79 -0
  20. package/src/cli/commands/list.ts +46 -0
  21. package/src/cli/commands/logs.ts +83 -0
  22. package/src/cli/commands/run.ts +416 -0
  23. package/src/cli/commands/verify.ts +110 -0
  24. package/src/cli/index.ts +81 -0
  25. package/src/config/README.md +128 -0
  26. package/src/config/env.ts +262 -0
  27. package/src/config/index.ts +19 -0
  28. package/src/eval/README.md +318 -0
  29. package/src/eval/ai-judge.test.ts +435 -0
  30. package/src/eval/ai-judge.ts +368 -0
  31. package/src/eval/code-validators.ts +414 -0
  32. package/src/eval/evaluateOutcome.property.test.ts +1174 -0
  33. package/src/eval/evaluateOutcome.ts +591 -0
  34. package/src/eval/immigration-validators.ts +122 -0
  35. package/src/eval/index.ts +90 -0
  36. package/src/eval/judge-cache.ts +402 -0
  37. package/src/eval/tournament-validators.property.test.ts +439 -0
  38. package/src/eval/validators.property.test.ts +1118 -0
  39. package/src/eval/validators.ts +1199 -0
  40. package/src/eval/weighted-scorer.ts +285 -0
  41. package/src/index.ts +17 -0
  42. package/src/league/README.md +188 -0
  43. package/src/league/health-check.ts +353 -0
  44. package/src/league/index.ts +93 -0
  45. package/src/league/killAgent.ts +151 -0
  46. package/src/league/league.test.ts +1151 -0
  47. package/src/league/runLeague.ts +843 -0
  48. package/src/league/scoreAgent.ts +175 -0
  49. package/src/modules/omnibridge/__tests__/.gitkeep +1 -0
  50. package/src/modules/omnibridge/__tests__/auth-tunnel.property.test.ts +524 -0
  51. package/src/modules/omnibridge/__tests__/deterministic-logger.property.test.ts +965 -0
  52. package/src/modules/omnibridge/__tests__/ghost-api.property.test.ts +461 -0
  53. package/src/modules/omnibridge/__tests__/omnibridge-integration.test.ts +542 -0
  54. package/src/modules/omnibridge/__tests__/parallel-executor.property.test.ts +671 -0
  55. package/src/modules/omnibridge/__tests__/semantic-normalizer.property.test.ts +521 -0
  56. package/src/modules/omnibridge/__tests__/semantic-normalizer.test.ts +254 -0
  57. package/src/modules/omnibridge/__tests__/session-vault.property.test.ts +367 -0
  58. package/src/modules/omnibridge/__tests__/shadow-session.property.test.ts +523 -0
  59. package/src/modules/omnibridge/__tests__/triangulation-engine.property.test.ts +292 -0
  60. package/src/modules/omnibridge/__tests__/verification-engine.property.test.ts +769 -0
  61. package/src/modules/omnibridge/api/.gitkeep +1 -0
  62. package/src/modules/omnibridge/api/ghost-api.ts +1087 -0
  63. package/src/modules/omnibridge/auth/.gitkeep +1 -0
  64. package/src/modules/omnibridge/auth/auth-tunnel.ts +843 -0
  65. package/src/modules/omnibridge/auth/session-vault.ts +577 -0
  66. package/src/modules/omnibridge/core/.gitkeep +1 -0
  67. package/src/modules/omnibridge/core/semantic-normalizer.ts +702 -0
  68. package/src/modules/omnibridge/core/triangulation-engine.ts +530 -0
  69. package/src/modules/omnibridge/core/types.ts +610 -0
  70. package/src/modules/omnibridge/execution/.gitkeep +1 -0
  71. package/src/modules/omnibridge/execution/deterministic-logger.ts +629 -0
  72. package/src/modules/omnibridge/execution/parallel-executor.ts +542 -0
  73. package/src/modules/omnibridge/execution/shadow-session.ts +794 -0
  74. package/src/modules/omnibridge/index.ts +212 -0
  75. package/src/modules/omnibridge/omnibridge.ts +510 -0
  76. package/src/modules/omnibridge/verification/.gitkeep +1 -0
  77. package/src/modules/omnibridge/verification/verification-engine.ts +783 -0
  78. package/src/outcomes/README.md +75 -0
  79. package/src/outcomes/acquire-pilot-customer.ts +297 -0
  80. package/src/outcomes/code-delivery-outcomes.ts +89 -0
  81. package/src/outcomes/code-outcomes.ts +256 -0
  82. package/src/outcomes/code_review_battle.test.ts +135 -0
  83. package/src/outcomes/code_review_battle.ts +135 -0
  84. package/src/outcomes/cold_email_battle.ts +97 -0
  85. package/src/outcomes/content_creation_battle.ts +160 -0
  86. package/src/outcomes/f1_stem_opt_compliance.ts +61 -0
  87. package/src/outcomes/index.ts +107 -0
  88. package/src/outcomes/lead_gen_battle.test.ts +113 -0
  89. package/src/outcomes/lead_gen_battle.ts +99 -0
  90. package/src/outcomes/outcome.schema.property.test.ts +229 -0
  91. package/src/outcomes/outcome.schema.ts +187 -0
  92. package/src/outcomes/qualified_sales_interest.ts +118 -0
  93. package/src/outcomes/swarm_planner.property.test.ts +370 -0
  94. package/src/outcomes/swarm_planner.ts +96 -0
  95. package/src/outcomes/web_extraction.ts +234 -0
  96. package/src/runtime/README.md +220 -0
  97. package/src/runtime/agentRunner.test.ts +341 -0
  98. package/src/runtime/agentRunner.ts +746 -0
  99. package/src/runtime/claudeAdapter.ts +232 -0
  100. package/src/runtime/costTracker.ts +123 -0
  101. package/src/runtime/index.ts +34 -0
  102. package/src/runtime/modelAdapter.property.test.ts +305 -0
  103. package/src/runtime/modelAdapter.ts +144 -0
  104. package/src/runtime/openaiAdapter.ts +235 -0
  105. package/src/utils/README.md +122 -0
  106. package/src/utils/command-runner.ts +134 -0
  107. package/src/utils/cost-guard.ts +379 -0
  108. package/src/utils/errors.test.ts +290 -0
  109. package/src/utils/errors.ts +442 -0
  110. package/src/utils/index.ts +37 -0
  111. package/src/utils/logger.test.ts +361 -0
  112. package/src/utils/logger.ts +419 -0
  113. package/src/utils/output-parsers.ts +216 -0
@@ -0,0 +1,232 @@
1
+ /**
2
+ * Claude Adapter - Anthropic Claude API integration
3
+ *
4
+ * Implements the ModelAdapter interface for Claude models.
5
+ * Uses the official @anthropic-ai/sdk package following Messages API.
6
+ *
7
+ * @module runtime/claudeAdapter
8
+ * @see https://docs.anthropic.com/en/api/getting-started
9
+ */
10
+
11
+ import Anthropic from '@anthropic-ai/sdk';
12
+ import type {
13
+ ModelAdapter,
14
+ ModelOptions,
15
+ ModelResponse,
16
+ ConversationMessage,
17
+ ToolDefinition,
18
+ ToolCall,
19
+ } from './modelAdapter.js';
20
+
21
+ /**
22
+ * Default max tokens for Claude responses.
23
+ */
24
+ const DEFAULT_MAX_TOKENS = 1024;
25
+
26
+ /**
27
+ * Approximate characters per token for estimation.
28
+ * Claude uses ~4 characters per token on average.
29
+ */
30
+ const CHARS_PER_TOKEN = 4;
31
+
32
+ /**
33
+ * Converts our tool definitions to Claude's tool format.
34
+ *
35
+ * @param tools - Our normalized tool definitions
36
+ * @returns Claude-formatted tools
37
+ */
38
+ function toClaudeTools(tools: ToolDefinition[]): Anthropic.Tool[] {
39
+ return tools.map((tool) => ({
40
+ name: tool.name,
41
+ description: tool.description,
42
+ input_schema: {
43
+ type: 'object' as const,
44
+ properties: tool.inputSchema.properties,
45
+ required: tool.inputSchema.required,
46
+ },
47
+ }));
48
+ }
49
+
50
+ /**
51
+ * Converts our conversation messages to Claude's message format.
52
+ *
53
+ * @param messages - Our normalized conversation messages
54
+ * @returns Claude-formatted messages
55
+ */
56
+ function toClaudeMessages(messages: ConversationMessage[]): Anthropic.MessageParam[] {
57
+ const claudeMessages: Anthropic.MessageParam[] = [];
58
+
59
+ for (const msg of messages) {
60
+ if (msg.role === 'user') {
61
+ claudeMessages.push({ role: 'user', content: msg.content });
62
+ } else if (msg.role === 'assistant') {
63
+ // Assistant message with potential tool calls
64
+ const content: Anthropic.ContentBlockParam[] = [];
65
+ if (msg.content) {
66
+ content.push({ type: 'text', text: msg.content });
67
+ }
68
+ if (msg.toolCalls) {
69
+ for (const tc of msg.toolCalls) {
70
+ content.push({
71
+ type: 'tool_use',
72
+ id: tc.id,
73
+ name: tc.name,
74
+ input: tc.arguments,
75
+ });
76
+ }
77
+ }
78
+ claudeMessages.push({ role: 'assistant', content });
79
+ } else if (msg.role === 'tool') {
80
+ // Tool result - Claude expects this as a user message with tool_result content
81
+ claudeMessages.push({
82
+ role: 'user',
83
+ content: [
84
+ {
85
+ type: 'tool_result',
86
+ tool_use_id: msg.toolCallId!,
87
+ content: msg.content,
88
+ },
89
+ ],
90
+ });
91
+ }
92
+ }
93
+
94
+ return claudeMessages;
95
+ }
96
+
97
+ /**
98
+ * Extracts tool calls from Claude's response content.
99
+ *
100
+ * @param content - Claude response content blocks
101
+ * @returns Array of tool calls
102
+ */
103
+ function extractToolCalls(content: Anthropic.ContentBlock[]): ToolCall[] {
104
+ const toolCalls: ToolCall[] = [];
105
+
106
+ for (const block of content) {
107
+ if (block.type === 'tool_use') {
108
+ toolCalls.push({
109
+ id: block.id,
110
+ name: block.name,
111
+ arguments: block.input as Record<string, unknown>,
112
+ });
113
+ }
114
+ }
115
+
116
+ return toolCalls;
117
+ }
118
+
119
+ /**
120
+ * Creates a Claude model adapter.
121
+ *
122
+ * @param apiKey - Anthropic API key
123
+ * @param modelId - Claude model ID (e.g., 'claude-3-sonnet-20240229')
124
+ * @returns ModelAdapter implementation for Claude
125
+ *
126
+ * @example
127
+ * const adapter = createClaudeAdapter(process.env.ANTHROPIC_API_KEY!, 'claude-3-sonnet-20240229');
128
+ * const response = await adapter.complete('Hello, Claude');
129
+ *
130
+ * @see Requirements 11.1, 11.2, 11.4
131
+ */
132
+ export function createClaudeAdapter(apiKey: string, modelId: string): ModelAdapter {
133
+ const client = new Anthropic({ apiKey });
134
+
135
+ return {
136
+ provider: 'claude',
137
+ modelId,
138
+
139
+ async complete(prompt: string, options?: ModelOptions): Promise<ModelResponse> {
140
+ const messages: Anthropic.MessageParam[] = [
141
+ { role: 'user', content: prompt },
142
+ ];
143
+
144
+ const requestParams: Anthropic.MessageCreateParams = {
145
+ model: modelId,
146
+ max_tokens: options?.maxTokens ?? DEFAULT_MAX_TOKENS,
147
+ ...(options?.temperature !== undefined && { temperature: options.temperature }),
148
+ ...(options?.systemPrompt && { system: options.systemPrompt }),
149
+ messages,
150
+ };
151
+
152
+ // Add tools if provided
153
+ if (options?.tools && options.tools.length > 0) {
154
+ requestParams.tools = toClaudeTools(options.tools);
155
+ }
156
+
157
+ const message = await client.messages.create(requestParams);
158
+
159
+ // Extract text content from response
160
+ let content = '';
161
+ for (const block of message.content) {
162
+ if (block.type === 'text') {
163
+ content += block.text;
164
+ }
165
+ }
166
+
167
+ // Extract tool calls if any
168
+ const toolCalls = extractToolCalls(message.content);
169
+
170
+ // Calculate total tokens used (input + output)
171
+ const tokensUsed = message.usage.input_tokens + message.usage.output_tokens;
172
+
173
+ return {
174
+ content,
175
+ tokensUsed,
176
+ model: message.model,
177
+ toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
178
+ requiresToolResponse: message.stop_reason === 'tool_use',
179
+ };
180
+ },
181
+
182
+ async continueWithToolResults(
183
+ messages: ConversationMessage[],
184
+ options?: ModelOptions
185
+ ): Promise<ModelResponse> {
186
+ const claudeMessages = toClaudeMessages(messages);
187
+
188
+ const requestParams: Anthropic.MessageCreateParams = {
189
+ model: modelId,
190
+ max_tokens: options?.maxTokens ?? DEFAULT_MAX_TOKENS,
191
+ ...(options?.temperature !== undefined && { temperature: options.temperature }),
192
+ ...(options?.systemPrompt && { system: options.systemPrompt }),
193
+ messages: claudeMessages,
194
+ };
195
+
196
+ // Add tools if provided
197
+ if (options?.tools && options.tools.length > 0) {
198
+ requestParams.tools = toClaudeTools(options.tools);
199
+ }
200
+
201
+ const message = await client.messages.create(requestParams);
202
+
203
+ // Extract text content from response
204
+ let content = '';
205
+ for (const block of message.content) {
206
+ if (block.type === 'text') {
207
+ content += block.text;
208
+ }
209
+ }
210
+
211
+ // Extract tool calls if any
212
+ const toolCalls = extractToolCalls(message.content);
213
+
214
+ // Calculate total tokens used (input + output)
215
+ const tokensUsed = message.usage.input_tokens + message.usage.output_tokens;
216
+
217
+ return {
218
+ content,
219
+ tokensUsed,
220
+ model: message.model,
221
+ toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
222
+ requiresToolResponse: message.stop_reason === 'tool_use',
223
+ };
224
+ },
225
+
226
+ countTokens(text: string): number {
227
+ // Approximate token count based on character length
228
+ // Claude uses ~4 characters per token on average
229
+ return Math.ceil(text.length / CHARS_PER_TOKEN);
230
+ },
231
+ };
232
+ }
@@ -0,0 +1,123 @@
1
+ /**
2
+ * Cost Tracker - Real-time token and cost tracking per agent
3
+ *
4
+ * Tracks tokens spent per agent in real-time and enforces cost ceilings.
5
+ * Used by the league system to monitor and terminate agents that exceed limits.
6
+ *
7
+ * @module runtime/costTracker
8
+ */
9
+
10
+ /**
11
+ * Default cost per token in USD (approximate for Claude/OpenAI models).
12
+ * This is a simplified rate - actual costs vary by model and token type.
13
+ */
14
+ const DEFAULT_COST_PER_TOKEN_USD = 0.00001;
15
+
16
+ /**
17
+ * Represents a cost tracker for a single agent.
18
+ * Tracks tokens spent and calculates cost in real-time.
19
+ */
20
+ export interface CostTracker {
21
+ /** Unique identifier for the agent being tracked */
22
+ agentId: string;
23
+ /** Total tokens spent by this agent */
24
+ tokensSpent: number;
25
+ /** Estimated cost in USD based on tokens spent */
26
+ costUsd: number;
27
+ /** Maximum token ceiling for this agent */
28
+ ceiling: number;
29
+ }
30
+
31
+ /**
32
+ * Creates a new cost tracker for an agent.
33
+ *
34
+ * @param agentId - Unique identifier for the agent
35
+ * @param ceiling - Maximum token ceiling for this agent
36
+ * @returns A new CostTracker initialized with zero usage
37
+ *
38
+ * @example
39
+ * const tracker = createCostTracker('agent-1', 10000);
40
+ * // { agentId: 'agent-1', tokensSpent: 0, costUsd: 0, ceiling: 10000 }
41
+ *
42
+ * @see Requirements 10.1, 10.5
43
+ */
44
+ export function createCostTracker(agentId: string, ceiling: number): CostTracker {
45
+ return {
46
+ agentId,
47
+ tokensSpent: 0,
48
+ costUsd: 0,
49
+ ceiling,
50
+ };
51
+ }
52
+
53
+ /**
54
+ * Records token usage for an agent.
55
+ *
56
+ * Updates the tracker's tokensSpent and costUsd fields in place.
57
+ * Cost is calculated using a default rate per token.
58
+ *
59
+ * @param tracker - The cost tracker to update
60
+ * @param tokens - Number of tokens to record
61
+ *
62
+ * @example
63
+ * const tracker = createCostTracker('agent-1', 10000);
64
+ * recordUsage(tracker, 500);
65
+ * // tracker.tokensSpent === 500
66
+ * // tracker.costUsd === 0.005
67
+ *
68
+ * @see Requirements 10.5
69
+ */
70
+ export function recordUsage(tracker: CostTracker, tokens: number): void {
71
+ tracker.tokensSpent += tokens;
72
+ tracker.costUsd = tracker.tokensSpent * DEFAULT_COST_PER_TOKEN_USD;
73
+ }
74
+
75
+ /**
76
+ * Checks if an agent has exceeded its token ceiling.
77
+ *
78
+ * @param tracker - The cost tracker to check
79
+ * @returns True if tokensSpent exceeds ceiling
80
+ *
81
+ * @example
82
+ * const tracker = createCostTracker('agent-1', 1000);
83
+ * recordUsage(tracker, 1001);
84
+ * isOverBudget(tracker); // true
85
+ *
86
+ * @see Requirements 10.1
87
+ */
88
+ export function isOverBudget(tracker: CostTracker): boolean {
89
+ return tracker.tokensSpent > tracker.ceiling;
90
+ }
91
+
92
+ /**
93
+ * Gets the remaining token budget for an agent.
94
+ *
95
+ * @param tracker - The cost tracker to check
96
+ * @returns Number of tokens remaining (can be negative if over budget)
97
+ *
98
+ * @example
99
+ * const tracker = createCostTracker('agent-1', 10000);
100
+ * recordUsage(tracker, 3000);
101
+ * getRemainingBudget(tracker); // 7000
102
+ */
103
+ export function getRemainingBudget(tracker: CostTracker): number {
104
+ return tracker.ceiling - tracker.tokensSpent;
105
+ }
106
+
107
+ /**
108
+ * Gets the percentage of budget used.
109
+ *
110
+ * @param tracker - The cost tracker to check
111
+ * @returns Percentage of budget used (0-100+, can exceed 100 if over budget)
112
+ *
113
+ * @example
114
+ * const tracker = createCostTracker('agent-1', 10000);
115
+ * recordUsage(tracker, 5000);
116
+ * getBudgetUsagePercent(tracker); // 50
117
+ */
118
+ export function getBudgetUsagePercent(tracker: CostTracker): number {
119
+ if (tracker.ceiling === 0) {
120
+ return tracker.tokensSpent > 0 ? 100 : 0;
121
+ }
122
+ return (tracker.tokensSpent / tracker.ceiling) * 100;
123
+ }
@@ -0,0 +1,34 @@
1
+ /**
2
+ * Runtime Module - Agent execution, model adapters, and cost tracking
3
+ *
4
+ * @module runtime
5
+ */
6
+
7
+ export {
8
+ CostTracker,
9
+ createCostTracker,
10
+ recordUsage,
11
+ isOverBudget,
12
+ getRemainingBudget,
13
+ getBudgetUsagePercent,
14
+ } from './costTracker.js';
15
+
16
+ export {
17
+ ModelAdapter,
18
+ ModelOptions,
19
+ ModelResponse,
20
+ ModelAdapterConfig,
21
+ createAdapter,
22
+ } from './modelAdapter.js';
23
+
24
+ export { createClaudeAdapter } from './claudeAdapter.js';
25
+ export { createOpenAIAdapter } from './openaiAdapter.js';
26
+
27
+ export {
28
+ type KillReason,
29
+ type AgentRunStatus,
30
+ type AgentRun,
31
+ type AgentRunConfig,
32
+ runAgent,
33
+ runAgentMock,
34
+ } from './agentRunner.js';
@@ -0,0 +1,305 @@
1
+ /**
2
+ * Property-based tests for Model Response Normalization
3
+ *
4
+ * **Feature: earnd-bounty-engine, Property 24: Model Response Normalization**
5
+ * **Validates: Requirements 11.4**
6
+ *
7
+ * Property 24: Model Response Normalization
8
+ * *For any* model response from Claude or OpenAI, the adapter SHALL return
9
+ * a normalized ModelResponse with content, tokensUsed, and model fields.
10
+ *
11
+ * Since we cannot call actual APIs in tests, we test the normalization logic
12
+ * by verifying that:
13
+ * 1. ModelResponse interface structure is correct
14
+ * 2. Adapters expose correct provider and modelId
15
+ * 3. countTokens function returns consistent results
16
+ */
17
+
18
+ import { describe, test, expect } from 'vitest';
19
+ import * as fc from 'fast-check';
20
+ import type { ModelResponse, ModelAdapter } from './modelAdapter.js';
21
+
22
+ /**
23
+ * Arbitrary for generating valid ModelResponse objects.
24
+ * This represents what a normalized response should look like.
25
+ */
26
+ const modelResponseArb = fc.record({
27
+ content: fc.string(),
28
+ tokensUsed: fc.integer({ min: 0, max: 1000000 }),
29
+ model: fc.string({ minLength: 1 }),
30
+ });
31
+
32
+ /**
33
+ * Arbitrary for generating provider types.
34
+ */
35
+ const providerArb = fc.constantFrom('claude', 'openai') as fc.Arbitrary<'claude' | 'openai'>;
36
+
37
+ /**
38
+ * Arbitrary for generating model IDs.
39
+ */
40
+ const modelIdArb = fc.constantFrom(
41
+ 'claude-3-sonnet-20240229',
42
+ 'claude-3-opus-20240229',
43
+ 'claude-3-haiku-20240307',
44
+ 'gpt-4-turbo-preview',
45
+ 'gpt-4',
46
+ 'gpt-3.5-turbo'
47
+ );
48
+
49
+ describe('Model Response Normalization - Property Tests', () => {
50
+ // **Feature: earnd-bounty-engine, Property 24: Model Response Normalization**
51
+ test('ModelResponse has required fields with correct types', () => {
52
+ fc.assert(
53
+ fc.property(modelResponseArb, (response) => {
54
+ // Verify content is a string
55
+ expect(typeof response.content).toBe('string');
56
+
57
+ // Verify tokensUsed is a non-negative number
58
+ expect(typeof response.tokensUsed).toBe('number');
59
+ expect(response.tokensUsed).toBeGreaterThanOrEqual(0);
60
+
61
+ // Verify model is a non-empty string
62
+ expect(typeof response.model).toBe('string');
63
+ expect(response.model.length).toBeGreaterThan(0);
64
+ }),
65
+ { numRuns: 100 }
66
+ );
67
+ });
68
+
69
+ // **Feature: earnd-bounty-engine, Property 24: Model Response Normalization**
70
+ test('ModelResponse structure is consistent across all generated responses', () => {
71
+ fc.assert(
72
+ fc.property(modelResponseArb, (response) => {
73
+ // All required keys must be present
74
+ const requiredKeys = ['content', 'tokensUsed', 'model'];
75
+ const responseKeys = Object.keys(response);
76
+
77
+ for (const key of requiredKeys) {
78
+ expect(responseKeys).toContain(key);
79
+ }
80
+ }),
81
+ { numRuns: 100 }
82
+ );
83
+ });
84
+
85
+ // **Feature: earnd-bounty-engine, Property 24: Model Response Normalization**
86
+ test('tokensUsed is always a non-negative integer', () => {
87
+ fc.assert(
88
+ fc.property(
89
+ fc.integer({ min: 0, max: 1000000 }),
90
+ (tokens) => {
91
+ // Simulating what the adapter should return
92
+ const response: ModelResponse = {
93
+ content: 'test content',
94
+ tokensUsed: tokens,
95
+ model: 'test-model',
96
+ };
97
+
98
+ expect(Number.isInteger(response.tokensUsed)).toBe(true);
99
+ expect(response.tokensUsed).toBeGreaterThanOrEqual(0);
100
+ }
101
+ ),
102
+ { numRuns: 100 }
103
+ );
104
+ });
105
+
106
+ // **Feature: earnd-bounty-engine, Property 24: Model Response Normalization**
107
+ test('content can be empty string but must be defined', () => {
108
+ fc.assert(
109
+ fc.property(fc.string(), (content) => {
110
+ const response: ModelResponse = {
111
+ content,
112
+ tokensUsed: 100,
113
+ model: 'test-model',
114
+ };
115
+
116
+ expect(response.content).toBeDefined();
117
+ expect(typeof response.content).toBe('string');
118
+ }),
119
+ { numRuns: 100 }
120
+ );
121
+ });
122
+
123
+ // **Feature: earnd-bounty-engine, Property 24: Model Response Normalization**
124
+ test('model field preserves the model identifier', () => {
125
+ fc.assert(
126
+ fc.property(modelIdArb, (modelId) => {
127
+ const response: ModelResponse = {
128
+ content: 'test',
129
+ tokensUsed: 50,
130
+ model: modelId,
131
+ };
132
+
133
+ expect(response.model).toBe(modelId);
134
+ expect(response.model.length).toBeGreaterThan(0);
135
+ }),
136
+ { numRuns: 100 }
137
+ );
138
+ });
139
+ });
140
+
141
+ describe('Model Adapter Interface - Property Tests', () => {
142
+ // **Feature: earnd-bounty-engine, Property 24: Model Response Normalization**
143
+ test('countTokens returns consistent results for same input', () => {
144
+ // Test the token counting approximation logic used by both adapters
145
+ const CHARS_PER_TOKEN = 4;
146
+
147
+ const countTokens = (text: string): number => {
148
+ return Math.ceil(text.length / CHARS_PER_TOKEN);
149
+ };
150
+
151
+ fc.assert(
152
+ fc.property(fc.string(), (text) => {
153
+ const count1 = countTokens(text);
154
+ const count2 = countTokens(text);
155
+
156
+ // Same input should always produce same output (determinism)
157
+ expect(count1).toBe(count2);
158
+
159
+ // Token count should be non-negative
160
+ expect(count1).toBeGreaterThanOrEqual(0);
161
+
162
+ // Token count should be an integer
163
+ expect(Number.isInteger(count1)).toBe(true);
164
+ }),
165
+ { numRuns: 100 }
166
+ );
167
+ });
168
+
169
+ // **Feature: earnd-bounty-engine, Property 24: Model Response Normalization**
170
+ test('countTokens scales with text length', () => {
171
+ const CHARS_PER_TOKEN = 4;
172
+
173
+ const countTokens = (text: string): number => {
174
+ return Math.ceil(text.length / CHARS_PER_TOKEN);
175
+ };
176
+
177
+ fc.assert(
178
+ fc.property(
179
+ fc.string({ minLength: 0, maxLength: 100 }),
180
+ fc.string({ minLength: 0, maxLength: 100 }),
181
+ (text1, text2) => {
182
+ const count1 = countTokens(text1);
183
+ const count2 = countTokens(text2);
184
+
185
+ // Longer text should have >= token count
186
+ if (text1.length > text2.length) {
187
+ expect(count1).toBeGreaterThanOrEqual(count2);
188
+ } else if (text2.length > text1.length) {
189
+ expect(count2).toBeGreaterThanOrEqual(count1);
190
+ }
191
+ }
192
+ ),
193
+ { numRuns: 100 }
194
+ );
195
+ });
196
+
197
+ // **Feature: earnd-bounty-engine, Property 24: Model Response Normalization**
198
+ test('empty text returns zero or minimal tokens', () => {
199
+ const CHARS_PER_TOKEN = 4;
200
+
201
+ const countTokens = (text: string): number => {
202
+ return Math.ceil(text.length / CHARS_PER_TOKEN);
203
+ };
204
+
205
+ const count = countTokens('');
206
+ expect(count).toBe(0);
207
+ });
208
+
209
+ // **Feature: earnd-bounty-engine, Property 24: Model Response Normalization**
210
+ test('provider field is valid enum value', () => {
211
+ fc.assert(
212
+ fc.property(providerArb, (provider) => {
213
+ expect(['claude', 'openai']).toContain(provider);
214
+ }),
215
+ { numRuns: 100 }
216
+ );
217
+ });
218
+ });
219
+
220
+ describe('Response Normalization Invariants - Property Tests', () => {
221
+ // **Feature: earnd-bounty-engine, Property 24: Model Response Normalization**
222
+ test('normalized response preserves content integrity', () => {
223
+ fc.assert(
224
+ fc.property(fc.string(), (originalContent) => {
225
+ // Simulate normalization: content should be preserved exactly
226
+ const response: ModelResponse = {
227
+ content: originalContent,
228
+ tokensUsed: 100,
229
+ model: 'test-model',
230
+ };
231
+
232
+ expect(response.content).toBe(originalContent);
233
+ }),
234
+ { numRuns: 100 }
235
+ );
236
+ });
237
+
238
+ // **Feature: earnd-bounty-engine, Property 24: Model Response Normalization**
239
+ test('tokensUsed reflects combined input and output tokens', () => {
240
+ fc.assert(
241
+ fc.property(
242
+ fc.integer({ min: 0, max: 500000 }),
243
+ fc.integer({ min: 0, max: 500000 }),
244
+ (inputTokens, outputTokens) => {
245
+ // Simulate how adapters calculate total tokens
246
+ const totalTokens = inputTokens + outputTokens;
247
+
248
+ const response: ModelResponse = {
249
+ content: 'test',
250
+ tokensUsed: totalTokens,
251
+ model: 'test-model',
252
+ };
253
+
254
+ expect(response.tokensUsed).toBe(inputTokens + outputTokens);
255
+ expect(response.tokensUsed).toBeGreaterThanOrEqual(0);
256
+ }
257
+ ),
258
+ { numRuns: 100 }
259
+ );
260
+ });
261
+
262
+ // **Feature: earnd-bounty-engine, Property 24: Model Response Normalization**
263
+ test('model field matches the configured model ID', () => {
264
+ fc.assert(
265
+ fc.property(modelIdArb, (configuredModelId) => {
266
+ // The response model should match what was configured
267
+ const response: ModelResponse = {
268
+ content: 'test',
269
+ tokensUsed: 100,
270
+ model: configuredModelId,
271
+ };
272
+
273
+ expect(response.model).toBe(configuredModelId);
274
+ }),
275
+ { numRuns: 100 }
276
+ );
277
+ });
278
+
279
+ // **Feature: earnd-bounty-engine, Property 24: Model Response Normalization**
280
+ test('response structure is identical regardless of provider', () => {
281
+ fc.assert(
282
+ fc.property(
283
+ providerArb,
284
+ fc.string(),
285
+ fc.integer({ min: 0, max: 100000 }),
286
+ modelIdArb,
287
+ (provider, content, tokens, modelId) => {
288
+ // Both Claude and OpenAI should produce the same structure
289
+ const response: ModelResponse = {
290
+ content,
291
+ tokensUsed: tokens,
292
+ model: modelId,
293
+ };
294
+
295
+ // Verify structure is consistent
296
+ expect(Object.keys(response).sort()).toEqual(['content', 'model', 'tokensUsed']);
297
+ expect(typeof response.content).toBe('string');
298
+ expect(typeof response.tokensUsed).toBe('number');
299
+ expect(typeof response.model).toBe('string');
300
+ }
301
+ ),
302
+ { numRuns: 100 }
303
+ );
304
+ });
305
+ });