outcome-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/README.md +261 -0
  2. package/package.json +95 -0
  3. package/src/agents/README.md +139 -0
  4. package/src/agents/adapters/anthropic.adapter.ts +166 -0
  5. package/src/agents/adapters/dalle.adapter.ts +145 -0
  6. package/src/agents/adapters/gemini.adapter.ts +134 -0
  7. package/src/agents/adapters/imagen.adapter.ts +106 -0
  8. package/src/agents/adapters/nano-banana.adapter.ts +129 -0
  9. package/src/agents/adapters/openai.adapter.ts +165 -0
  10. package/src/agents/adapters/veo.adapter.ts +130 -0
  11. package/src/agents/agent.schema.property.test.ts +379 -0
  12. package/src/agents/agent.schema.test.ts +148 -0
  13. package/src/agents/agent.schema.ts +263 -0
  14. package/src/agents/index.ts +60 -0
  15. package/src/agents/registered-agent.schema.ts +356 -0
  16. package/src/agents/registry.ts +97 -0
  17. package/src/agents/tournament-configs.property.test.ts +266 -0
  18. package/src/cli/README.md +145 -0
  19. package/src/cli/commands/define.ts +79 -0
  20. package/src/cli/commands/list.ts +46 -0
  21. package/src/cli/commands/logs.ts +83 -0
  22. package/src/cli/commands/run.ts +416 -0
  23. package/src/cli/commands/verify.ts +110 -0
  24. package/src/cli/index.ts +81 -0
  25. package/src/config/README.md +128 -0
  26. package/src/config/env.ts +262 -0
  27. package/src/config/index.ts +19 -0
  28. package/src/eval/README.md +318 -0
  29. package/src/eval/ai-judge.test.ts +435 -0
  30. package/src/eval/ai-judge.ts +368 -0
  31. package/src/eval/code-validators.ts +414 -0
  32. package/src/eval/evaluateOutcome.property.test.ts +1174 -0
  33. package/src/eval/evaluateOutcome.ts +591 -0
  34. package/src/eval/immigration-validators.ts +122 -0
  35. package/src/eval/index.ts +90 -0
  36. package/src/eval/judge-cache.ts +402 -0
  37. package/src/eval/tournament-validators.property.test.ts +439 -0
  38. package/src/eval/validators.property.test.ts +1118 -0
  39. package/src/eval/validators.ts +1199 -0
  40. package/src/eval/weighted-scorer.ts +285 -0
  41. package/src/index.ts +17 -0
  42. package/src/league/README.md +188 -0
  43. package/src/league/health-check.ts +353 -0
  44. package/src/league/index.ts +93 -0
  45. package/src/league/killAgent.ts +151 -0
  46. package/src/league/league.test.ts +1151 -0
  47. package/src/league/runLeague.ts +843 -0
  48. package/src/league/scoreAgent.ts +175 -0
  49. package/src/modules/omnibridge/__tests__/.gitkeep +1 -0
  50. package/src/modules/omnibridge/__tests__/auth-tunnel.property.test.ts +524 -0
  51. package/src/modules/omnibridge/__tests__/deterministic-logger.property.test.ts +965 -0
  52. package/src/modules/omnibridge/__tests__/ghost-api.property.test.ts +461 -0
  53. package/src/modules/omnibridge/__tests__/omnibridge-integration.test.ts +542 -0
  54. package/src/modules/omnibridge/__tests__/parallel-executor.property.test.ts +671 -0
  55. package/src/modules/omnibridge/__tests__/semantic-normalizer.property.test.ts +521 -0
  56. package/src/modules/omnibridge/__tests__/semantic-normalizer.test.ts +254 -0
  57. package/src/modules/omnibridge/__tests__/session-vault.property.test.ts +367 -0
  58. package/src/modules/omnibridge/__tests__/shadow-session.property.test.ts +523 -0
  59. package/src/modules/omnibridge/__tests__/triangulation-engine.property.test.ts +292 -0
  60. package/src/modules/omnibridge/__tests__/verification-engine.property.test.ts +769 -0
  61. package/src/modules/omnibridge/api/.gitkeep +1 -0
  62. package/src/modules/omnibridge/api/ghost-api.ts +1087 -0
  63. package/src/modules/omnibridge/auth/.gitkeep +1 -0
  64. package/src/modules/omnibridge/auth/auth-tunnel.ts +843 -0
  65. package/src/modules/omnibridge/auth/session-vault.ts +577 -0
  66. package/src/modules/omnibridge/core/.gitkeep +1 -0
  67. package/src/modules/omnibridge/core/semantic-normalizer.ts +702 -0
  68. package/src/modules/omnibridge/core/triangulation-engine.ts +530 -0
  69. package/src/modules/omnibridge/core/types.ts +610 -0
  70. package/src/modules/omnibridge/execution/.gitkeep +1 -0
  71. package/src/modules/omnibridge/execution/deterministic-logger.ts +629 -0
  72. package/src/modules/omnibridge/execution/parallel-executor.ts +542 -0
  73. package/src/modules/omnibridge/execution/shadow-session.ts +794 -0
  74. package/src/modules/omnibridge/index.ts +212 -0
  75. package/src/modules/omnibridge/omnibridge.ts +510 -0
  76. package/src/modules/omnibridge/verification/.gitkeep +1 -0
  77. package/src/modules/omnibridge/verification/verification-engine.ts +783 -0
  78. package/src/outcomes/README.md +75 -0
  79. package/src/outcomes/acquire-pilot-customer.ts +297 -0
  80. package/src/outcomes/code-delivery-outcomes.ts +89 -0
  81. package/src/outcomes/code-outcomes.ts +256 -0
  82. package/src/outcomes/code_review_battle.test.ts +135 -0
  83. package/src/outcomes/code_review_battle.ts +135 -0
  84. package/src/outcomes/cold_email_battle.ts +97 -0
  85. package/src/outcomes/content_creation_battle.ts +160 -0
  86. package/src/outcomes/f1_stem_opt_compliance.ts +61 -0
  87. package/src/outcomes/index.ts +107 -0
  88. package/src/outcomes/lead_gen_battle.test.ts +113 -0
  89. package/src/outcomes/lead_gen_battle.ts +99 -0
  90. package/src/outcomes/outcome.schema.property.test.ts +229 -0
  91. package/src/outcomes/outcome.schema.ts +187 -0
  92. package/src/outcomes/qualified_sales_interest.ts +118 -0
  93. package/src/outcomes/swarm_planner.property.test.ts +370 -0
  94. package/src/outcomes/swarm_planner.ts +96 -0
  95. package/src/outcomes/web_extraction.ts +234 -0
  96. package/src/runtime/README.md +220 -0
  97. package/src/runtime/agentRunner.test.ts +341 -0
  98. package/src/runtime/agentRunner.ts +746 -0
  99. package/src/runtime/claudeAdapter.ts +232 -0
  100. package/src/runtime/costTracker.ts +123 -0
  101. package/src/runtime/index.ts +34 -0
  102. package/src/runtime/modelAdapter.property.test.ts +305 -0
  103. package/src/runtime/modelAdapter.ts +144 -0
  104. package/src/runtime/openaiAdapter.ts +235 -0
  105. package/src/utils/README.md +122 -0
  106. package/src/utils/command-runner.ts +134 -0
  107. package/src/utils/cost-guard.ts +379 -0
  108. package/src/utils/errors.test.ts +290 -0
  109. package/src/utils/errors.ts +442 -0
  110. package/src/utils/index.ts +37 -0
  111. package/src/utils/logger.test.ts +361 -0
  112. package/src/utils/logger.ts +419 -0
  113. package/src/utils/output-parsers.ts +216 -0
@@ -0,0 +1,1174 @@
1
+ /**
2
+ * Property-based tests for Evaluation System
3
+ *
4
+ * **Feature: earnd-bounty-engine, Property 3: Evaluation Determinism**
5
+ * **Validates: Requirements 1.3**
6
+ *
7
+ * **Feature: earnd-bounty-engine, Property 10: Evaluation Result Structure**
8
+ * **Validates: Requirements 5.1, 5.2, 5.3**
9
+ *
10
+ * **Feature: earnd-bounty-engine, Property 19: Qualified Sales Interest Payout**
11
+ * **Validates: Requirements 8.6**
12
+ *
13
+ * Property 3: Evaluation Determinism
14
+ * *For any* outcome and artifact pair, evaluating the same inputs multiple times
15
+ * SHALL always produce the same result (SUCCESS or FAILURE with identical reason).
16
+ *
17
+ * Property 10: Evaluation Result Structure
18
+ * *For any* evaluation result, the status SHALL be exactly 'SUCCESS' or 'FAILURE',
19
+ * FAILURE results SHALL have a non-empty reason, and SUCCESS results SHALL have
20
+ * verificationDetails populated.
21
+ *
22
+ * Property 19: Qualified Sales Interest Payout
23
+ * *For any* artifact that passes all 5 criteria (buying intent, company size, role,
24
+ * message length, email), the evaluation SHALL return SUCCESS with payoutAmount of 250.
25
+ */
26
+
27
+ import { describe, test, expect } from 'vitest';
28
+ import * as fc from 'fast-check';
29
+ import { evaluateOutcome, type AgentArtifact } from './evaluateOutcome.js';
30
+ import type { Outcome, SuccessCriterion } from '../outcomes/outcome.schema.js';
31
+
32
+ /**
33
+ * Arbitrary for generating valid SuccessCriterion objects.
34
+ * Uses validators that exist in the validatorMap.
35
+ */
36
+ const successCriterionArb = fc.oneof(
37
+ fc.record({
38
+ name: fc.string({ minLength: 1 }),
39
+ validator: fc.constant('validateBuyingIntent'),
40
+ params: fc.record({
41
+ keywords: fc.array(fc.string({ minLength: 1 }), { minLength: 1, maxLength: 5 }),
42
+ }),
43
+ }),
44
+ fc.record({
45
+ name: fc.string({ minLength: 1 }),
46
+ validator: fc.constant('validateCompanySize'),
47
+ params: fc.record({
48
+ minimum: fc.integer({ min: 1, max: 10000 }),
49
+ }),
50
+ }),
51
+ fc.record({
52
+ name: fc.string({ minLength: 1 }),
53
+ validator: fc.constant('validateRole'),
54
+ params: fc.record({
55
+ excludedRoles: fc.array(fc.string({ minLength: 1 }), { minLength: 1, maxLength: 5 }),
56
+ }),
57
+ }),
58
+ fc.record({
59
+ name: fc.string({ minLength: 1 }),
60
+ validator: fc.constant('validateMessageLength'),
61
+ params: fc.record({
62
+ minWords: fc.integer({ min: 1, max: 100 }),
63
+ }),
64
+ }),
65
+ fc.record({
66
+ name: fc.string({ minLength: 1 }),
67
+ validator: fc.constant('validateEmail'),
68
+ params: fc.constant({}),
69
+ })
70
+ ) as fc.Arbitrary<SuccessCriterion>;
71
+
72
+ /**
73
+ * Arbitrary for generating valid Outcome objects.
74
+ */
75
+ const outcomeArb: fc.Arbitrary<Outcome> = fc.record({
76
+ name: fc.string({ minLength: 1 }).filter((s) => s.trim().length > 0),
77
+ description: fc.string(),
78
+ payoutAmount: fc.double({ min: 0.01, max: 10000, noNaN: true }),
79
+ maxAttempts: fc.integer({ min: 1, max: 100 }),
80
+ timeLimitMs: fc.integer({ min: 1, max: 3600000 }),
81
+ successCriteria: fc.array(successCriterionArb, { minLength: 1, maxLength: 5 }),
82
+ failureReasons: fc.array(fc.string(), { minLength: 0, maxLength: 10 }),
83
+ });
84
+
85
+ /**
86
+ * Arbitrary for generating valid AgentArtifact objects.
87
+ */
88
+ const artifactContentArb = fc.record({
89
+ message: fc.string(),
90
+ targetEmail: fc.string(),
91
+ targetCompany: fc.string(),
92
+ targetCompanySize: fc.integer({ min: 0, max: 1000000 }),
93
+ targetRole: fc.string(),
94
+ });
95
+
96
+ /**
97
+ * Generate valid ISO timestamp strings using integer timestamps.
98
+ * Avoids invalid date issues with fc.date().
99
+ */
100
+ const validTimestampArb = fc
101
+ .integer({ min: 1577836800000, max: 1924905600000 }) // 2020-01-01 to 2030-12-31
102
+ .map((ts) => new Date(ts).toISOString());
103
+
104
+ const agentArtifactArb = (outcomeName: string): fc.Arbitrary<AgentArtifact> =>
105
+ fc.record({
106
+ agentId: fc.string({ minLength: 1 }).filter((s) => s.trim().length > 0),
107
+ outcomeId: fc.constant(outcomeName),
108
+ attemptNumber: fc.integer({ min: 1, max: 100 }),
109
+ content: artifactContentArb,
110
+ timestamp: validTimestampArb,
111
+ });
112
+
113
+ /**
114
+ * Arbitrary for generating outcome and matching artifact pairs.
115
+ */
116
+ const outcomeArtifactPairArb = outcomeArb.chain((outcome) =>
117
+ agentArtifactArb(outcome.name).map((artifact) => ({ outcome, artifact }))
118
+ );
119
+
120
+ describe('Evaluation Determinism - Property Tests', () => {
121
+ // **Feature: earnd-bounty-engine, Property 3: Evaluation Determinism**
122
+ test('same inputs produce identical results on multiple evaluations', async () => {
123
+ await fc.assert(
124
+ fc.asyncProperty(outcomeArtifactPairArb, async ({ outcome, artifact }) => {
125
+ // Evaluate the same inputs multiple times
126
+ const result1 = await evaluateOutcome(outcome, artifact);
127
+ const result2 = await evaluateOutcome(outcome, artifact);
128
+ const result3 = await evaluateOutcome(outcome, artifact);
129
+
130
+ // All results must have identical status
131
+ expect(result1.status).toBe(result2.status);
132
+ expect(result2.status).toBe(result3.status);
133
+
134
+ // All results must have identical reason
135
+ expect(result1.reason).toBe(result2.reason);
136
+ expect(result2.reason).toBe(result3.reason);
137
+
138
+ // All results must have identical criteriaResults
139
+ expect(result1.criteriaResults).toEqual(result2.criteriaResults);
140
+ expect(result2.criteriaResults).toEqual(result3.criteriaResults);
141
+ }),
142
+ { numRuns: 100 }
143
+ );
144
+ });
145
+
146
+ // **Feature: earnd-bounty-engine, Property 3: Evaluation Determinism**
147
+ test('evaluation result is always binary (SUCCESS or FAILURE)', async () => {
148
+ await fc.assert(
149
+ fc.asyncProperty(outcomeArtifactPairArb, async ({ outcome, artifact }) => {
150
+ const result = await evaluateOutcome(outcome, artifact);
151
+
152
+ // Status must be exactly SUCCESS or FAILURE
153
+ expect(['SUCCESS', 'FAILURE']).toContain(result.status);
154
+ }),
155
+ { numRuns: 100 }
156
+ );
157
+ });
158
+
159
+ // **Feature: earnd-bounty-engine, Property 3: Evaluation Determinism**
160
+ test('evaluation result structure is consistent', async () => {
161
+ await fc.assert(
162
+ fc.asyncProperty(outcomeArtifactPairArb, async ({ outcome, artifact }) => {
163
+ const result = await evaluateOutcome(outcome, artifact);
164
+
165
+ // Result must have required fields
166
+ expect(typeof result.status).toBe('string');
167
+ expect(typeof result.reason).toBe('string');
168
+ expect(Array.isArray(result.criteriaResults)).toBe(true);
169
+
170
+ // Reason must not be empty
171
+ expect(result.reason.length).toBeGreaterThan(0);
172
+ }),
173
+ { numRuns: 100 }
174
+ );
175
+ });
176
+
177
+ // **Feature: earnd-bounty-engine, Property 3: Evaluation Determinism**
178
+ test('SUCCESS results include verificationDetails', async () => {
179
+ // Generate artifacts that are likely to pass all criteria
180
+ const passingArtifactArb = (outcomeName: string): fc.Arbitrary<AgentArtifact> =>
181
+ fc.record({
182
+ agentId: fc.string({ minLength: 1 }).filter((s) => s.trim().length > 0),
183
+ outcomeId: fc.constant(outcomeName),
184
+ attemptNumber: fc.integer({ min: 1, max: 100 }),
185
+ content: fc.record({
186
+ // Include buying intent keywords
187
+ message: fc
188
+ .array(fc.string({ minLength: 1 }).filter((s) => !/\s/.test(s)), {
189
+ minLength: 20,
190
+ maxLength: 50,
191
+ })
192
+ .map((words) => [...words, 'pricing', 'demo'].join(' ')),
193
+ // Valid email format
194
+ targetEmail: fc.constant('test@example.com'),
195
+ targetCompany: fc.string({ minLength: 1 }),
196
+ // Large company size
197
+ targetCompanySize: fc.integer({ min: 100, max: 10000 }),
198
+ // Valid role (not intern or student)
199
+ targetRole: fc.constantFrom('Manager', 'Director', 'VP', 'CEO', 'Engineer'),
200
+ }),
201
+ timestamp: validTimestampArb,
202
+ });
203
+
204
+ // Create a simple outcome with known validators
205
+ const simpleOutcome: Outcome = {
206
+ name: 'test_outcome',
207
+ description: 'Test outcome for determinism',
208
+ payoutAmount: 100,
209
+ maxAttempts: 3,
210
+ timeLimitMs: 60000,
211
+ successCriteria: [
212
+ {
213
+ name: 'buying_intent',
214
+ validator: 'validateBuyingIntent',
215
+ params: { keywords: ['pricing', 'demo', 'next steps'] },
216
+ },
217
+ {
218
+ name: 'company_size',
219
+ validator: 'validateCompanySize',
220
+ params: { minimum: 50 },
221
+ },
222
+ {
223
+ name: 'valid_role',
224
+ validator: 'validateRole',
225
+ params: { excludedRoles: ['intern', 'student'] },
226
+ },
227
+ {
228
+ name: 'message_length',
229
+ validator: 'validateMessageLength',
230
+ params: { minWords: 20 },
231
+ },
232
+ {
233
+ name: 'valid_email',
234
+ validator: 'validateEmail',
235
+ params: {},
236
+ },
237
+ ],
238
+ failureReasons: ['No buying intent', 'Company too small'],
239
+ };
240
+
241
+ await fc.assert(
242
+ fc.asyncProperty(passingArtifactArb(simpleOutcome.name), async (artifact) => {
243
+ const result = await evaluateOutcome(simpleOutcome, artifact);
244
+
245
+ if (result.status === 'SUCCESS') {
246
+ // SUCCESS results must have verificationDetails
247
+ expect(result.verificationDetails).toBeDefined();
248
+ expect(typeof result.verificationDetails).toBe('object');
249
+ }
250
+ }),
251
+ { numRuns: 100 }
252
+ );
253
+ });
254
+
255
+ // **Feature: earnd-bounty-engine, Property 3: Evaluation Determinism**
256
+ test('FAILURE results do not include verificationDetails', async () => {
257
+ // Generate artifacts that will fail (invalid email)
258
+ const failingArtifactArb = (outcomeName: string): fc.Arbitrary<AgentArtifact> =>
259
+ fc.record({
260
+ agentId: fc.string({ minLength: 1 }).filter((s) => s.trim().length > 0),
261
+ outcomeId: fc.constant(outcomeName),
262
+ attemptNumber: fc.integer({ min: 1, max: 100 }),
263
+ content: fc.record({
264
+ message: fc.constant('short'),
265
+ targetEmail: fc.constant('invalid-email'),
266
+ targetCompany: fc.string({ minLength: 1 }),
267
+ targetCompanySize: fc.integer({ min: 0, max: 10 }),
268
+ targetRole: fc.constant('intern'),
269
+ }),
270
+ timestamp: validTimestampArb,
271
+ });
272
+
273
+ const simpleOutcome: Outcome = {
274
+ name: 'test_outcome',
275
+ description: 'Test outcome',
276
+ payoutAmount: 100,
277
+ maxAttempts: 3,
278
+ timeLimitMs: 60000,
279
+ successCriteria: [
280
+ {
281
+ name: 'valid_email',
282
+ validator: 'validateEmail',
283
+ params: {},
284
+ },
285
+ ],
286
+ failureReasons: ['Invalid email'],
287
+ };
288
+
289
+ await fc.assert(
290
+ fc.asyncProperty(failingArtifactArb(simpleOutcome.name), async (artifact) => {
291
+ const result = await evaluateOutcome(simpleOutcome, artifact);
292
+
293
+ if (result.status === 'FAILURE') {
294
+ // FAILURE results should not have verificationDetails
295
+ expect(result.verificationDetails).toBeUndefined();
296
+ }
297
+ }),
298
+ { numRuns: 100 }
299
+ );
300
+ });
301
+
302
+ // **Feature: earnd-bounty-engine, Property 3: Evaluation Determinism**
303
+ test('criteriaResults count matches successCriteria count for valid artifacts', async () => {
304
+ await fc.assert(
305
+ fc.asyncProperty(outcomeArtifactPairArb, async ({ outcome, artifact }) => {
306
+ const result = await evaluateOutcome(outcome, artifact);
307
+
308
+ // If artifact schema is valid and outcome matches, criteriaResults should match criteria count
309
+ if (
310
+ result.reason.indexOf('Invalid artifact') === -1 &&
311
+ result.reason.indexOf('outcome mismatch') === -1
312
+ ) {
313
+ expect(result.criteriaResults.length).toBe(outcome.successCriteria.length);
314
+ }
315
+ }),
316
+ { numRuns: 100 }
317
+ );
318
+ });
319
+ });
320
+
321
+
322
+ /**
323
+ * Property 10: Evaluation Result Structure
324
+ *
325
+ * **Feature: earnd-bounty-engine, Property 10: Evaluation Result Structure**
326
+ * **Validates: Requirements 5.1, 5.2, 5.3**
327
+ *
328
+ * *For any* evaluation result, the status SHALL be exactly 'SUCCESS' or 'FAILURE',
329
+ * FAILURE results SHALL have a non-empty reason, and SUCCESS results SHALL have
330
+ * verificationDetails populated.
331
+ */
332
+ describe('Evaluation Result Structure - Property Tests', () => {
333
+ /**
334
+ * Arbitrary for generating artifacts that will definitely pass all criteria.
335
+ * Used to test SUCCESS result structure.
336
+ */
337
+ const guaranteedPassingArtifactArb = (outcomeName: string): fc.Arbitrary<AgentArtifact> =>
338
+ fc.record({
339
+ agentId: fc.string({ minLength: 1 }).filter((s) => s.trim().length > 0),
340
+ outcomeId: fc.constant(outcomeName),
341
+ attemptNumber: fc.integer({ min: 1, max: 100 }),
342
+ content: fc.record({
343
+ // Message with 25+ words including buying intent keywords
344
+ message: fc
345
+ .array(fc.constantFrom('word', 'test', 'content', 'data', 'info'), {
346
+ minLength: 22,
347
+ maxLength: 30,
348
+ })
349
+ .map((words) => [...words, 'pricing', 'demo', 'next steps'].join(' ')),
350
+ // Valid email format
351
+ targetEmail: fc.constantFrom('user@example.com', 'test@company.org', 'lead@business.net'),
352
+ targetCompany: fc.string({ minLength: 1 }),
353
+ // Company size >= 50
354
+ targetCompanySize: fc.integer({ min: 50, max: 10000 }),
355
+ // Valid role (not intern or student)
356
+ targetRole: fc.constantFrom('Manager', 'Director', 'VP', 'CEO', 'Engineer', 'Developer'),
357
+ }),
358
+ timestamp: validTimestampArb,
359
+ });
360
+
361
+ /**
362
+ * Arbitrary for generating artifacts that will definitely fail at least one criterion.
363
+ * Used to test FAILURE result structure.
364
+ */
365
+ const guaranteedFailingArtifactArb = (outcomeName: string): fc.Arbitrary<AgentArtifact> =>
366
+ fc.record({
367
+ agentId: fc.string({ minLength: 1 }).filter((s) => s.trim().length > 0),
368
+ outcomeId: fc.constant(outcomeName),
369
+ attemptNumber: fc.integer({ min: 1, max: 100 }),
370
+ content: fc.record({
371
+ // Short message without keywords - will fail message length and buying intent
372
+ message: fc.constant('short message'),
373
+ // Invalid email format
374
+ targetEmail: fc.constant('not-an-email'),
375
+ targetCompany: fc.string({ minLength: 1 }),
376
+ // Company size < 50
377
+ targetCompanySize: fc.integer({ min: 0, max: 49 }),
378
+ // Invalid role
379
+ targetRole: fc.constantFrom('intern', 'student'),
380
+ }),
381
+ timestamp: validTimestampArb,
382
+ });
383
+
384
+ /**
385
+ * Standard outcome with all 5 validators for comprehensive testing.
386
+ */
387
+ const standardOutcome: Outcome = {
388
+ name: 'test_qualified_sales',
389
+ description: 'Test outcome for result structure validation',
390
+ payoutAmount: 250,
391
+ maxAttempts: 5,
392
+ timeLimitMs: 300000,
393
+ successCriteria: [
394
+ {
395
+ name: 'buying_intent',
396
+ validator: 'validateBuyingIntent',
397
+ params: { keywords: ['pricing', 'demo', 'next steps'] },
398
+ },
399
+ {
400
+ name: 'company_size',
401
+ validator: 'validateCompanySize',
402
+ params: { minimum: 50 },
403
+ },
404
+ {
405
+ name: 'valid_role',
406
+ validator: 'validateRole',
407
+ params: { excludedRoles: ['intern', 'student'] },
408
+ },
409
+ {
410
+ name: 'message_length',
411
+ validator: 'validateMessageLength',
412
+ params: { minWords: 20 },
413
+ },
414
+ {
415
+ name: 'valid_email',
416
+ validator: 'validateEmail',
417
+ params: {},
418
+ },
419
+ ],
420
+ failureReasons: ['No buying intent', 'Company too small', 'Invalid role', 'Message too short', 'Invalid email'],
421
+ };
422
+
423
+ // **Feature: earnd-bounty-engine, Property 10: Evaluation Result Structure**
424
+ // Validates: Requirement 5.1 - status SHALL be exactly 'SUCCESS' or 'FAILURE'
425
+ test('status is exactly SUCCESS or FAILURE for any evaluation', async () => {
426
+ await fc.assert(
427
+ fc.asyncProperty(outcomeArtifactPairArb, async ({ outcome, artifact }) => {
428
+ const result = await evaluateOutcome(outcome, artifact);
429
+
430
+ // Status must be exactly one of the two valid values
431
+ expect(result.status === 'SUCCESS' || result.status === 'FAILURE').toBe(true);
432
+ // Ensure it's not any other string
433
+ expect(['SUCCESS', 'FAILURE']).toContain(result.status);
434
+ }),
435
+ { numRuns: 100 }
436
+ );
437
+ });
438
+
439
+ // **Feature: earnd-bounty-engine, Property 10: Evaluation Result Structure**
440
+ // Validates: Requirement 5.2 - FAILURE results SHALL have a non-empty reason
441
+ test('FAILURE results have non-empty reason', async () => {
442
+ await fc.assert(
443
+ fc.asyncProperty(
444
+ guaranteedFailingArtifactArb(standardOutcome.name),
445
+ async (artifact) => {
446
+ const result = await evaluateOutcome(standardOutcome, artifact);
447
+
448
+ // This artifact is designed to fail
449
+ expect(result.status).toBe('FAILURE');
450
+
451
+ // FAILURE must have a non-empty reason
452
+ expect(typeof result.reason).toBe('string');
453
+ expect(result.reason.length).toBeGreaterThan(0);
454
+ expect(result.reason.trim().length).toBeGreaterThan(0);
455
+ }
456
+ ),
457
+ { numRuns: 100 }
458
+ );
459
+ });
460
+
461
+ // **Feature: earnd-bounty-engine, Property 10: Evaluation Result Structure**
462
+ // Validates: Requirement 5.3 - SUCCESS results SHALL have verificationDetails populated
463
+ test('SUCCESS results have verificationDetails populated', async () => {
464
+ await fc.assert(
465
+ fc.asyncProperty(
466
+ guaranteedPassingArtifactArb(standardOutcome.name),
467
+ async (artifact) => {
468
+ const result = await evaluateOutcome(standardOutcome, artifact);
469
+
470
+ // This artifact is designed to pass
471
+ expect(result.status).toBe('SUCCESS');
472
+
473
+ // SUCCESS must have verificationDetails
474
+ expect(result.verificationDetails).toBeDefined();
475
+ expect(result.verificationDetails).not.toBeNull();
476
+ expect(typeof result.verificationDetails).toBe('object');
477
+
478
+ // verificationDetails should contain expected fields
479
+ expect(result.verificationDetails).toHaveProperty('outcomeId');
480
+ expect(result.verificationDetails).toHaveProperty('payoutAmount');
481
+ expect(result.verificationDetails).toHaveProperty('agentId');
482
+ expect(result.verificationDetails).toHaveProperty('attemptNumber');
483
+ expect(result.verificationDetails).toHaveProperty('evaluatedAt');
484
+ expect(result.verificationDetails).toHaveProperty('criteriaCount');
485
+ }
486
+ ),
487
+ { numRuns: 100 }
488
+ );
489
+ });
490
+
491
+ // **Feature: earnd-bounty-engine, Property 10: Evaluation Result Structure**
492
+ // Combined test: validates all three requirements together
493
+ test('evaluation result structure is complete and correct', async () => {
494
+ await fc.assert(
495
+ fc.asyncProperty(outcomeArtifactPairArb, async ({ outcome, artifact }) => {
496
+ const result = await evaluateOutcome(outcome, artifact);
497
+
498
+ // Requirement 5.1: Status is exactly SUCCESS or FAILURE
499
+ expect(['SUCCESS', 'FAILURE']).toContain(result.status);
500
+
501
+ // Result always has reason and criteriaResults
502
+ expect(typeof result.reason).toBe('string');
503
+ expect(Array.isArray(result.criteriaResults)).toBe(true);
504
+
505
+ if (result.status === 'FAILURE') {
506
+ // Requirement 5.2: FAILURE has non-empty reason
507
+ expect(result.reason.length).toBeGreaterThan(0);
508
+ // FAILURE should not have verificationDetails
509
+ expect(result.verificationDetails).toBeUndefined();
510
+ }
511
+
512
+ if (result.status === 'SUCCESS') {
513
+ // Requirement 5.3: SUCCESS has verificationDetails populated
514
+ expect(result.verificationDetails).toBeDefined();
515
+ expect(typeof result.verificationDetails).toBe('object');
516
+ }
517
+ }),
518
+ { numRuns: 100 }
519
+ );
520
+ });
521
+
522
+ // **Feature: earnd-bounty-engine, Property 10: Evaluation Result Structure**
523
+ // Edge case: invalid artifact schema results in FAILURE with reason
524
+ test('invalid artifact schema results in FAILURE with structured reason', async () => {
525
+ const invalidArtifactArb = fc.record({
526
+ agentId: fc.string({ minLength: 1 }).filter((s) => s.trim().length > 0),
527
+ outcomeId: fc.constant(standardOutcome.name),
528
+ attemptNumber: fc.integer({ min: 1, max: 100 }),
529
+ content: fc.record({
530
+ // Missing or invalid fields
531
+ message: fc.constant(undefined as unknown as string),
532
+ targetEmail: fc.string(),
533
+ targetCompany: fc.string(),
534
+ targetCompanySize: fc.integer(),
535
+ targetRole: fc.string(),
536
+ }),
537
+ timestamp: validTimestampArb,
538
+ }) as fc.Arbitrary<AgentArtifact>;
539
+
540
+ await fc.assert(
541
+ fc.asyncProperty(invalidArtifactArb, async (artifact) => {
542
+ const result = await evaluateOutcome(standardOutcome, artifact);
543
+
544
+ // Invalid artifacts should result in FAILURE
545
+ expect(result.status).toBe('FAILURE');
546
+ // Should have a reason explaining the failure
547
+ expect(result.reason.length).toBeGreaterThan(0);
548
+ // Should not have verificationDetails
549
+ expect(result.verificationDetails).toBeUndefined();
550
+ }),
551
+ { numRuns: 100 }
552
+ );
553
+ });
554
+
555
+ // **Feature: earnd-bounty-engine, Property 10: Evaluation Result Structure**
556
+ // Edge case: outcome mismatch results in FAILURE with reason
557
+ test('outcome mismatch results in FAILURE with structured reason', async () => {
558
+ const mismatchedArtifactArb = fc.record({
559
+ agentId: fc.string({ minLength: 1 }).filter((s) => s.trim().length > 0),
560
+ // Different outcome ID than the one being evaluated
561
+ outcomeId: fc.constant('different_outcome_id'),
562
+ attemptNumber: fc.integer({ min: 1, max: 100 }),
563
+ content: fc.record({
564
+ message: fc.string(),
565
+ targetEmail: fc.string(),
566
+ targetCompany: fc.string(),
567
+ targetCompanySize: fc.integer({ min: 0, max: 1000000 }),
568
+ targetRole: fc.string(),
569
+ }),
570
+ timestamp: validTimestampArb,
571
+ });
572
+
573
+ await fc.assert(
574
+ fc.asyncProperty(mismatchedArtifactArb, async (artifact) => {
575
+ const result = await evaluateOutcome(standardOutcome, artifact);
576
+
577
+ // Mismatched outcome should result in FAILURE
578
+ expect(result.status).toBe('FAILURE');
579
+ // Should have a reason mentioning the mismatch
580
+ expect(result.reason).toContain('mismatch');
581
+ // Should not have verificationDetails
582
+ expect(result.verificationDetails).toBeUndefined();
583
+ }),
584
+ { numRuns: 100 }
585
+ );
586
+ });
587
+ });
588
+
589
+
590
+ /**
591
+ * Property 11: Fail-Closed Evaluation
592
+ *
593
+ * **Feature: earnd-bounty-engine, Property 11: Fail-Closed Evaluation**
594
+ * **Validates: Requirements 5.4**
595
+ *
596
+ * *For any* artifact with invalid or unverifiable data, evaluation SHALL return FAILURE.
597
+ *
598
+ * This property ensures the system fails safely - when data is invalid, malformed,
599
+ * or cannot be verified, the evaluation returns FAILURE rather than SUCCESS or
600
+ * throwing an exception. This prevents accidental payouts on bad data.
601
+ */
602
+ describe('Fail-Closed Evaluation - Property Tests', () => {
603
+ /**
604
+ * Standard outcome with all 5 validators for comprehensive testing.
605
+ */
606
+ const standardOutcome: Outcome = {
607
+ name: 'test_qualified_sales',
608
+ description: 'Test outcome for fail-closed validation',
609
+ payoutAmount: 250,
610
+ maxAttempts: 5,
611
+ timeLimitMs: 300000,
612
+ successCriteria: [
613
+ {
614
+ name: 'buying_intent',
615
+ validator: 'validateBuyingIntent',
616
+ params: { keywords: ['pricing', 'demo', 'next steps'] },
617
+ },
618
+ {
619
+ name: 'company_size',
620
+ validator: 'validateCompanySize',
621
+ params: { minimum: 50 },
622
+ },
623
+ {
624
+ name: 'valid_role',
625
+ validator: 'validateRole',
626
+ params: { excludedRoles: ['intern', 'student'] },
627
+ },
628
+ {
629
+ name: 'message_length',
630
+ validator: 'validateMessageLength',
631
+ params: { minWords: 20 },
632
+ },
633
+ {
634
+ name: 'valid_email',
635
+ validator: 'validateEmail',
636
+ params: {},
637
+ },
638
+ ],
639
+ failureReasons: ['No buying intent', 'Company too small', 'Invalid role', 'Message too short', 'Invalid email'],
640
+ };
641
+
642
+ // **Feature: earnd-bounty-engine, Property 11: Fail-Closed Evaluation**
643
+ // Test: Artifacts with missing required fields return FAILURE
644
+ test('artifacts with missing agentId return FAILURE', async () => {
645
+ const missingAgentIdArb = fc.record({
646
+ agentId: fc.constantFrom('', ' ', '\t', '\n'), // Empty or whitespace-only
647
+ outcomeId: fc.constant(standardOutcome.name),
648
+ attemptNumber: fc.integer({ min: 1, max: 100 }),
649
+ content: fc.record({
650
+ message: fc.string(),
651
+ targetEmail: fc.string(),
652
+ targetCompany: fc.string(),
653
+ targetCompanySize: fc.integer({ min: 0, max: 1000000 }),
654
+ targetRole: fc.string(),
655
+ }),
656
+ timestamp: validTimestampArb,
657
+ });
658
+
659
+ await fc.assert(
660
+ fc.asyncProperty(missingAgentIdArb, async (artifact) => {
661
+ const result = await evaluateOutcome(standardOutcome, artifact);
662
+
663
+ // Invalid artifact must return FAILURE (fail closed)
664
+ expect(result.status).toBe('FAILURE');
665
+ expect(result.reason).toContain('Invalid artifact');
666
+ expect(result.verificationDetails).toBeUndefined();
667
+ }),
668
+ { numRuns: 100 }
669
+ );
670
+ });
671
+
672
+ // **Feature: earnd-bounty-engine, Property 11: Fail-Closed Evaluation**
673
+ // Test: Artifacts with missing outcomeId return FAILURE
674
+ test('artifacts with missing outcomeId return FAILURE', async () => {
675
+ const missingOutcomeIdArb = fc.record({
676
+ agentId: fc.string({ minLength: 1 }).filter((s) => s.trim().length > 0),
677
+ outcomeId: fc.constantFrom('', ' ', '\t', '\n'), // Empty or whitespace-only
678
+ attemptNumber: fc.integer({ min: 1, max: 100 }),
679
+ content: fc.record({
680
+ message: fc.string(),
681
+ targetEmail: fc.string(),
682
+ targetCompany: fc.string(),
683
+ targetCompanySize: fc.integer({ min: 0, max: 1000000 }),
684
+ targetRole: fc.string(),
685
+ }),
686
+ timestamp: validTimestampArb,
687
+ });
688
+
689
+ await fc.assert(
690
+ fc.asyncProperty(missingOutcomeIdArb, async (artifact) => {
691
+ const result = await evaluateOutcome(standardOutcome, artifact);
692
+
693
+ // Invalid artifact must return FAILURE (fail closed)
694
+ expect(result.status).toBe('FAILURE');
695
+ expect(result.verificationDetails).toBeUndefined();
696
+ }),
697
+ { numRuns: 100 }
698
+ );
699
+ });
700
+
701
+ // **Feature: earnd-bounty-engine, Property 11: Fail-Closed Evaluation**
702
+ // Test: Artifacts with invalid attemptNumber return FAILURE
703
+ test('artifacts with invalid attemptNumber return FAILURE', async () => {
704
+ const invalidAttemptNumberArb = fc.record({
705
+ agentId: fc.string({ minLength: 1 }).filter((s) => s.trim().length > 0),
706
+ outcomeId: fc.constant(standardOutcome.name),
707
+ attemptNumber: fc.constantFrom(0, -1, -100, 1.5, NaN), // Invalid attempt numbers
708
+ content: fc.record({
709
+ message: fc.string(),
710
+ targetEmail: fc.string(),
711
+ targetCompany: fc.string(),
712
+ targetCompanySize: fc.integer({ min: 0, max: 1000000 }),
713
+ targetRole: fc.string(),
714
+ }),
715
+ timestamp: validTimestampArb,
716
+ });
717
+
718
+ await fc.assert(
719
+ fc.asyncProperty(invalidAttemptNumberArb, async (artifact) => {
720
+ const result = await evaluateOutcome(standardOutcome, artifact);
721
+
722
+ // Invalid artifact must return FAILURE (fail closed)
723
+ expect(result.status).toBe('FAILURE');
724
+ expect(result.verificationDetails).toBeUndefined();
725
+ }),
726
+ { numRuns: 100 }
727
+ );
728
+ });
729
+
730
+ // **Feature: earnd-bounty-engine, Property 11: Fail-Closed Evaluation**
731
+ // Test: Artifacts with missing content fields return FAILURE
732
+ test('artifacts with null content return FAILURE', async () => {
733
+ const nullContentArb = fc.record({
734
+ agentId: fc.string({ minLength: 1 }).filter((s) => s.trim().length > 0),
735
+ outcomeId: fc.constant(standardOutcome.name),
736
+ attemptNumber: fc.integer({ min: 1, max: 100 }),
737
+ content: fc.constant(null as unknown as AgentArtifact['content']),
738
+ timestamp: validTimestampArb,
739
+ });
740
+
741
+ await fc.assert(
742
+ fc.asyncProperty(nullContentArb, async (artifact) => {
743
+ const result = await evaluateOutcome(standardOutcome, artifact);
744
+
745
+ // Invalid artifact must return FAILURE (fail closed)
746
+ expect(result.status).toBe('FAILURE');
747
+ expect(result.reason).toContain('Invalid artifact');
748
+ expect(result.verificationDetails).toBeUndefined();
749
+ }),
750
+ { numRuns: 100 }
751
+ );
752
+ });
753
+
754
+ // **Feature: earnd-bounty-engine, Property 11: Fail-Closed Evaluation**
755
+ // Test: Artifacts with wrong content field types return FAILURE
756
+ test('artifacts with wrong content field types return FAILURE', async () => {
757
+ const wrongTypeContentArb = fc.record({
758
+ agentId: fc.string({ minLength: 1 }).filter((s) => s.trim().length > 0),
759
+ outcomeId: fc.constant(standardOutcome.name),
760
+ attemptNumber: fc.integer({ min: 1, max: 100 }),
761
+ content: fc.record({
762
+ message: fc.constant(123 as unknown as string), // Wrong type: number instead of string
763
+ targetEmail: fc.string(),
764
+ targetCompany: fc.string(),
765
+ targetCompanySize: fc.integer({ min: 0, max: 1000000 }),
766
+ targetRole: fc.string(),
767
+ }),
768
+ timestamp: validTimestampArb,
769
+ });
770
+
771
+ await fc.assert(
772
+ fc.asyncProperty(wrongTypeContentArb, async (artifact) => {
773
+ const result = await evaluateOutcome(standardOutcome, artifact);
774
+
775
+ // Invalid artifact must return FAILURE (fail closed)
776
+ expect(result.status).toBe('FAILURE');
777
+ expect(result.verificationDetails).toBeUndefined();
778
+ }),
779
+ { numRuns: 100 }
780
+ );
781
+ });
782
+
783
+ // **Feature: earnd-bounty-engine, Property 11: Fail-Closed Evaluation**
784
+ // Test: Artifacts with unknown validator references return FAILURE
785
+ test('outcomes with unknown validators return FAILURE for criteria', async () => {
786
+ const outcomeWithUnknownValidator: Outcome = {
787
+ name: 'test_unknown_validator',
788
+ description: 'Test outcome with unknown validator',
789
+ payoutAmount: 100,
790
+ maxAttempts: 3,
791
+ timeLimitMs: 60000,
792
+ successCriteria: [
793
+ {
794
+ name: 'unknown_criterion',
795
+ validator: 'validateNonExistent', // Unknown validator
796
+ params: {},
797
+ },
798
+ ],
799
+ failureReasons: ['Unknown validator'],
800
+ };
801
+
802
+ const validArtifactArb = fc.record({
803
+ agentId: fc.string({ minLength: 1 }).filter((s) => s.trim().length > 0),
804
+ outcomeId: fc.constant(outcomeWithUnknownValidator.name),
805
+ attemptNumber: fc.integer({ min: 1, max: 100 }),
806
+ content: fc.record({
807
+ message: fc.string(),
808
+ targetEmail: fc.string(),
809
+ targetCompany: fc.string(),
810
+ targetCompanySize: fc.integer({ min: 0, max: 1000000 }),
811
+ targetRole: fc.string(),
812
+ }),
813
+ timestamp: validTimestampArb,
814
+ });
815
+
816
+ await fc.assert(
817
+ fc.asyncProperty(validArtifactArb, async (artifact) => {
818
+ const result = await evaluateOutcome(outcomeWithUnknownValidator, artifact);
819
+
820
+ // Unknown validator must return FAILURE (fail closed)
821
+ expect(result.status).toBe('FAILURE');
822
+ expect(result.verificationDetails).toBeUndefined();
823
+ // Should have a criterion result indicating the unknown validator
824
+ const unknownCriterion = result.criteriaResults.find(
825
+ (c) => c.name === 'unknown_criterion'
826
+ );
827
+ expect(unknownCriterion).toBeDefined();
828
+ expect(unknownCriterion?.passed).toBe(false);
829
+ expect(unknownCriterion?.reason).toContain('Unknown validator');
830
+ }),
831
+ { numRuns: 100 }
832
+ );
833
+ });
834
+
835
+ // **Feature: earnd-bounty-engine, Property 11: Fail-Closed Evaluation**
836
+ // Test: Outcome mismatch returns FAILURE (unverifiable data)
837
+ test('outcome ID mismatch returns FAILURE', async () => {
838
+ const mismatchedOutcomeArb = fc.record({
839
+ agentId: fc.string({ minLength: 1 }).filter((s) => s.trim().length > 0),
840
+ outcomeId: fc.string({ minLength: 1 }).filter(
841
+ (s) => s.trim().length > 0 && s !== standardOutcome.name
842
+ ),
843
+ attemptNumber: fc.integer({ min: 1, max: 100 }),
844
+ content: fc.record({
845
+ message: fc.string(),
846
+ targetEmail: fc.string(),
847
+ targetCompany: fc.string(),
848
+ targetCompanySize: fc.integer({ min: 0, max: 1000000 }),
849
+ targetRole: fc.string(),
850
+ }),
851
+ timestamp: validTimestampArb,
852
+ });
853
+
854
+ await fc.assert(
855
+ fc.asyncProperty(mismatchedOutcomeArb, async (artifact) => {
856
+ const result = await evaluateOutcome(standardOutcome, artifact);
857
+
858
+ // Mismatched outcome must return FAILURE (fail closed)
859
+ expect(result.status).toBe('FAILURE');
860
+ expect(result.reason).toContain('mismatch');
861
+ expect(result.verificationDetails).toBeUndefined();
862
+ }),
863
+ { numRuns: 100 }
864
+ );
865
+ });
866
+
867
+ // **Feature: earnd-bounty-engine, Property 11: Fail-Closed Evaluation**
868
+ // Test: Completely malformed artifacts return FAILURE
869
+ test('completely malformed artifacts return FAILURE', async () => {
870
+ // Generate various malformed artifact shapes
871
+ const malformedArtifactArb = fc.oneof(
872
+ // Null artifact
873
+ fc.constant(null as unknown as AgentArtifact),
874
+ // Undefined artifact
875
+ fc.constant(undefined as unknown as AgentArtifact),
876
+ // Empty object
877
+ fc.constant({} as unknown as AgentArtifact),
878
+ // Array instead of object
879
+ fc.constant([] as unknown as AgentArtifact),
880
+ // String instead of object
881
+ fc.constant('not an artifact' as unknown as AgentArtifact),
882
+ // Number instead of object
883
+ fc.constant(42 as unknown as AgentArtifact)
884
+ );
885
+
886
+ await fc.assert(
887
+ fc.asyncProperty(malformedArtifactArb, async (artifact) => {
888
+ const result = await evaluateOutcome(standardOutcome, artifact);
889
+
890
+ // Malformed artifact must return FAILURE (fail closed)
891
+ expect(result.status).toBe('FAILURE');
892
+ expect(result.verificationDetails).toBeUndefined();
893
+ }),
894
+ { numRuns: 100 }
895
+ );
896
+ });
897
+
898
+ // **Feature: earnd-bounty-engine, Property 11: Fail-Closed Evaluation**
899
+ // Test: Evaluation never throws exceptions - always returns FAILURE for bad data
900
+ test('evaluation never throws exceptions for any input', async () => {
901
+ // Generate completely random inputs
902
+ const anyInputArb = fc.anything();
903
+
904
+ await fc.assert(
905
+ fc.asyncProperty(anyInputArb, async (randomInput) => {
906
+ // Should never throw, always return a result
907
+ let result: Awaited<ReturnType<typeof evaluateOutcome>>;
908
+ let threw = false;
909
+
910
+ try {
911
+ result = await evaluateOutcome(
912
+ standardOutcome,
913
+ randomInput as AgentArtifact
914
+ );
915
+ } catch {
916
+ threw = true;
917
+ result = { status: 'FAILURE', reason: 'Exception thrown', criteriaResults: [] };
918
+ }
919
+
920
+ // Evaluation should not throw exceptions
921
+ expect(threw).toBe(false);
922
+
923
+ // If input is invalid, result should be FAILURE
924
+ if (
925
+ typeof randomInput !== 'object' ||
926
+ randomInput === null ||
927
+ !('agentId' in randomInput) ||
928
+ !('content' in randomInput)
929
+ ) {
930
+ expect(result.status).toBe('FAILURE');
931
+ expect(result.verificationDetails).toBeUndefined();
932
+ }
933
+ }),
934
+ { numRuns: 100 }
935
+ );
936
+ });
937
+
938
+ // **Feature: earnd-bounty-engine, Property 11: Fail-Closed Evaluation**
939
+ // Test: Missing timestamp returns FAILURE
940
+ test('artifacts with missing timestamp return FAILURE', async () => {
941
+ const missingTimestampArb = fc.record({
942
+ agentId: fc.string({ minLength: 1 }).filter((s) => s.trim().length > 0),
943
+ outcomeId: fc.constant(standardOutcome.name),
944
+ attemptNumber: fc.integer({ min: 1, max: 100 }),
945
+ content: fc.record({
946
+ message: fc.string(),
947
+ targetEmail: fc.string(),
948
+ targetCompany: fc.string(),
949
+ targetCompanySize: fc.integer({ min: 0, max: 1000000 }),
950
+ targetRole: fc.string(),
951
+ }),
952
+ timestamp: fc.constantFrom('', ' ', '\t', '\n'), // Empty or whitespace-only
953
+ });
954
+
955
+ await fc.assert(
956
+ fc.asyncProperty(missingTimestampArb, async (artifact) => {
957
+ const result = await evaluateOutcome(standardOutcome, artifact);
958
+
959
+ // Invalid artifact must return FAILURE (fail closed)
960
+ expect(result.status).toBe('FAILURE');
961
+ expect(result.verificationDetails).toBeUndefined();
962
+ }),
963
+ { numRuns: 100 }
964
+ );
965
+ });
966
+ });
967
+
968
+
969
+ /**
970
+ * Property 19: Qualified Sales Interest Payout
971
+ *
972
+ * **Feature: earnd-bounty-engine, Property 19: Qualified Sales Interest Payout**
973
+ * **Validates: Requirements 8.6**
974
+ *
975
+ * *For any* artifact that passes all 5 criteria (buying intent, company size, role,
976
+ * message length, email), the evaluation SHALL return SUCCESS with payoutAmount of 250.
977
+ */
978
+ describe('Qualified Sales Interest Payout - Property Tests', () => {
979
+ /**
980
+ * Import the qualified_sales_interest outcome for testing.
981
+ * This is the demo outcome with $250 payout.
982
+ */
983
+ const qualifiedSalesInterestOutcome: Outcome = {
984
+ name: 'qualified_sales_interest',
985
+ description: 'Generate a qualified sales lead',
986
+ payoutAmount: 250,
987
+ maxAttempts: 5,
988
+ timeLimitMs: 300000,
989
+ successCriteria: [
990
+ {
991
+ name: 'buying_intent',
992
+ validator: 'validateBuyingIntent',
993
+ params: { keywords: ['pricing', 'demo', 'next steps'] },
994
+ },
995
+ {
996
+ name: 'company_size',
997
+ validator: 'validateCompanySize',
998
+ params: { minimum: 50 },
999
+ },
1000
+ {
1001
+ name: 'valid_role',
1002
+ validator: 'validateRole',
1003
+ params: { excludedRoles: ['intern', 'student'] },
1004
+ },
1005
+ {
1006
+ name: 'message_length',
1007
+ validator: 'validateMessageLength',
1008
+ params: { minWords: 20 },
1009
+ },
1010
+ {
1011
+ name: 'valid_email',
1012
+ validator: 'validateEmail',
1013
+ params: {},
1014
+ },
1015
+ ],
1016
+ failureReasons: [
1017
+ 'No buying intent detected',
1018
+ 'Company too small',
1019
+ 'Invalid role',
1020
+ 'Message too short',
1021
+ 'Invalid email format',
1022
+ ],
1023
+ };
1024
+
1025
+ /**
1026
+ * Arbitrary for generating artifacts that pass ALL 5 criteria:
1027
+ * 1. Message contains buying intent keywords (pricing, demo, next steps)
1028
+ * 2. Company size >= 50
1029
+ * 3. Role is not intern or student
1030
+ * 4. Message length >= 20 words
1031
+ * 5. Email is syntactically valid
1032
+ */
1033
+ const passingAllCriteriaArtifactArb: fc.Arbitrary<AgentArtifact> = fc.record({
1034
+ agentId: fc.string({ minLength: 1 }).filter((s) => s.trim().length > 0),
1035
+ outcomeId: fc.constant('qualified_sales_interest'),
1036
+ attemptNumber: fc.integer({ min: 1, max: 100 }),
1037
+ content: fc.record({
1038
+ // Message with 20+ words AND at least one buying intent keyword
1039
+ message: fc
1040
+ .tuple(
1041
+ // Generate 17+ filler words (non-whitespace)
1042
+ fc.array(
1043
+ fc.string({ minLength: 1, maxLength: 10 }).filter((s) => !/\s/.test(s) && s.length > 0),
1044
+ { minLength: 17, maxLength: 30 }
1045
+ ),
1046
+ // Pick at least one buying intent keyword
1047
+ fc.constantFrom('pricing', 'demo', 'next steps')
1048
+ )
1049
+ .map(([words, keyword]) => [...words, keyword, 'additional', 'words'].join(' ')),
1050
+ // Valid email format
1051
+ targetEmail: fc.tuple(
1052
+ fc.string({ minLength: 1, maxLength: 10 }).filter((s) => /^[a-zA-Z0-9]+$/.test(s)),
1053
+ fc.constantFrom('example.com', 'company.org', 'business.net')
1054
+ ).map(([local, domain]) => `${local}@${domain}`),
1055
+ targetCompany: fc.string({ minLength: 1 }),
1056
+ // Company size >= 50
1057
+ targetCompanySize: fc.integer({ min: 50, max: 100000 }),
1058
+ // Valid role (not intern or student)
1059
+ targetRole: fc.constantFrom(
1060
+ 'Manager',
1061
+ 'Director',
1062
+ 'VP',
1063
+ 'CEO',
1064
+ 'CTO',
1065
+ 'Engineer',
1066
+ 'Developer',
1067
+ 'Analyst',
1068
+ 'Consultant',
1069
+ 'Lead'
1070
+ ),
1071
+ }),
1072
+ timestamp: validTimestampArb,
1073
+ });
1074
+
1075
+ // **Feature: earnd-bounty-engine, Property 19: Qualified Sales Interest Payout**
1076
+ test('artifacts passing all 5 criteria return SUCCESS with payoutAmount of 250', async () => {
1077
+ await fc.assert(
1078
+ fc.asyncProperty(passingAllCriteriaArtifactArb, async (artifact) => {
1079
+ const result = await evaluateOutcome(qualifiedSalesInterestOutcome, artifact);
1080
+
1081
+ // Must return SUCCESS
1082
+ expect(result.status).toBe('SUCCESS');
1083
+
1084
+ // Must have verificationDetails
1085
+ expect(result.verificationDetails).toBeDefined();
1086
+
1087
+ // verificationDetails must contain payoutAmount of 250
1088
+ expect(result.verificationDetails?.payoutAmount).toBe(250);
1089
+
1090
+ // All 5 criteria must have passed
1091
+ expect(result.criteriaResults.length).toBe(5);
1092
+ expect(result.criteriaResults.every((c) => c.passed)).toBe(true);
1093
+ }),
1094
+ { numRuns: 100 }
1095
+ );
1096
+ });
1097
+
1098
+ // **Feature: earnd-bounty-engine, Property 19: Qualified Sales Interest Payout**
1099
+ test('payoutAmount in verificationDetails matches outcome definition', async () => {
1100
+ await fc.assert(
1101
+ fc.asyncProperty(passingAllCriteriaArtifactArb, async (artifact) => {
1102
+ const result = await evaluateOutcome(qualifiedSalesInterestOutcome, artifact);
1103
+
1104
+ if (result.status === 'SUCCESS') {
1105
+ // The payoutAmount in verificationDetails must match the outcome's payoutAmount
1106
+ expect(result.verificationDetails?.payoutAmount).toBe(
1107
+ qualifiedSalesInterestOutcome.payoutAmount
1108
+ );
1109
+ // Specifically for qualified_sales_interest, this must be $250
1110
+ expect(result.verificationDetails?.payoutAmount).toBe(250);
1111
+ }
1112
+ }),
1113
+ { numRuns: 100 }
1114
+ );
1115
+ });
1116
+
1117
+ // **Feature: earnd-bounty-engine, Property 19: Qualified Sales Interest Payout**
1118
+ test('SUCCESS result includes all required verification fields', async () => {
1119
+ await fc.assert(
1120
+ fc.asyncProperty(passingAllCriteriaArtifactArb, async (artifact) => {
1121
+ const result = await evaluateOutcome(qualifiedSalesInterestOutcome, artifact);
1122
+
1123
+ expect(result.status).toBe('SUCCESS');
1124
+ expect(result.verificationDetails).toBeDefined();
1125
+
1126
+ // Verify all required fields are present
1127
+ expect(result.verificationDetails).toHaveProperty('outcomeId', 'qualified_sales_interest');
1128
+ expect(result.verificationDetails).toHaveProperty('payoutAmount', 250);
1129
+ expect(result.verificationDetails).toHaveProperty('agentId', artifact.agentId);
1130
+ expect(result.verificationDetails).toHaveProperty('attemptNumber', artifact.attemptNumber);
1131
+ expect(result.verificationDetails).toHaveProperty('evaluatedAt');
1132
+ expect(result.verificationDetails).toHaveProperty('criteriaCount', 5);
1133
+ }),
1134
+ { numRuns: 100 }
1135
+ );
1136
+ });
1137
+
1138
+ // **Feature: earnd-bounty-engine, Property 19: Qualified Sales Interest Payout**
1139
+ test('failing any single criterion prevents payout', async () => {
1140
+ // Generate artifacts that fail exactly one criterion
1141
+ const failingBuyingIntentArb: fc.Arbitrary<AgentArtifact> = fc.record({
1142
+ agentId: fc.string({ minLength: 1 }).filter((s) => s.trim().length > 0),
1143
+ outcomeId: fc.constant('qualified_sales_interest'),
1144
+ attemptNumber: fc.integer({ min: 1, max: 100 }),
1145
+ content: fc.record({
1146
+ // Message WITHOUT buying intent keywords but with 20+ words
1147
+ message: fc
1148
+ .array(
1149
+ fc.constantFrom('hello', 'world', 'test', 'data', 'content', 'info', 'text', 'word'),
1150
+ { minLength: 20, maxLength: 30 }
1151
+ )
1152
+ .map((words) => words.join(' ')),
1153
+ targetEmail: fc.constant('valid@example.com'),
1154
+ targetCompany: fc.string({ minLength: 1 }),
1155
+ targetCompanySize: fc.integer({ min: 50, max: 100000 }),
1156
+ targetRole: fc.constantFrom('Manager', 'Director', 'VP'),
1157
+ }),
1158
+ timestamp: validTimestampArb,
1159
+ });
1160
+
1161
+ await fc.assert(
1162
+ fc.asyncProperty(failingBuyingIntentArb, async (artifact) => {
1163
+ const result = await evaluateOutcome(qualifiedSalesInterestOutcome, artifact);
1164
+
1165
+ // Must return FAILURE when any criterion fails
1166
+ expect(result.status).toBe('FAILURE');
1167
+
1168
+ // No payout should be triggered
1169
+ expect(result.verificationDetails).toBeUndefined();
1170
+ }),
1171
+ { numRuns: 100 }
1172
+ );
1173
+ });
1174
+ });