outcome-cli 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +261 -0
- package/package.json +95 -0
- package/src/agents/README.md +139 -0
- package/src/agents/adapters/anthropic.adapter.ts +166 -0
- package/src/agents/adapters/dalle.adapter.ts +145 -0
- package/src/agents/adapters/gemini.adapter.ts +134 -0
- package/src/agents/adapters/imagen.adapter.ts +106 -0
- package/src/agents/adapters/nano-banana.adapter.ts +129 -0
- package/src/agents/adapters/openai.adapter.ts +165 -0
- package/src/agents/adapters/veo.adapter.ts +130 -0
- package/src/agents/agent.schema.property.test.ts +379 -0
- package/src/agents/agent.schema.test.ts +148 -0
- package/src/agents/agent.schema.ts +263 -0
- package/src/agents/index.ts +60 -0
- package/src/agents/registered-agent.schema.ts +356 -0
- package/src/agents/registry.ts +97 -0
- package/src/agents/tournament-configs.property.test.ts +266 -0
- package/src/cli/README.md +145 -0
- package/src/cli/commands/define.ts +79 -0
- package/src/cli/commands/list.ts +46 -0
- package/src/cli/commands/logs.ts +83 -0
- package/src/cli/commands/run.ts +416 -0
- package/src/cli/commands/verify.ts +110 -0
- package/src/cli/index.ts +81 -0
- package/src/config/README.md +128 -0
- package/src/config/env.ts +262 -0
- package/src/config/index.ts +19 -0
- package/src/eval/README.md +318 -0
- package/src/eval/ai-judge.test.ts +435 -0
- package/src/eval/ai-judge.ts +368 -0
- package/src/eval/code-validators.ts +414 -0
- package/src/eval/evaluateOutcome.property.test.ts +1174 -0
- package/src/eval/evaluateOutcome.ts +591 -0
- package/src/eval/immigration-validators.ts +122 -0
- package/src/eval/index.ts +90 -0
- package/src/eval/judge-cache.ts +402 -0
- package/src/eval/tournament-validators.property.test.ts +439 -0
- package/src/eval/validators.property.test.ts +1118 -0
- package/src/eval/validators.ts +1199 -0
- package/src/eval/weighted-scorer.ts +285 -0
- package/src/index.ts +17 -0
- package/src/league/README.md +188 -0
- package/src/league/health-check.ts +353 -0
- package/src/league/index.ts +93 -0
- package/src/league/killAgent.ts +151 -0
- package/src/league/league.test.ts +1151 -0
- package/src/league/runLeague.ts +843 -0
- package/src/league/scoreAgent.ts +175 -0
- package/src/modules/omnibridge/__tests__/.gitkeep +1 -0
- package/src/modules/omnibridge/__tests__/auth-tunnel.property.test.ts +524 -0
- package/src/modules/omnibridge/__tests__/deterministic-logger.property.test.ts +965 -0
- package/src/modules/omnibridge/__tests__/ghost-api.property.test.ts +461 -0
- package/src/modules/omnibridge/__tests__/omnibridge-integration.test.ts +542 -0
- package/src/modules/omnibridge/__tests__/parallel-executor.property.test.ts +671 -0
- package/src/modules/omnibridge/__tests__/semantic-normalizer.property.test.ts +521 -0
- package/src/modules/omnibridge/__tests__/semantic-normalizer.test.ts +254 -0
- package/src/modules/omnibridge/__tests__/session-vault.property.test.ts +367 -0
- package/src/modules/omnibridge/__tests__/shadow-session.property.test.ts +523 -0
- package/src/modules/omnibridge/__tests__/triangulation-engine.property.test.ts +292 -0
- package/src/modules/omnibridge/__tests__/verification-engine.property.test.ts +769 -0
- package/src/modules/omnibridge/api/.gitkeep +1 -0
- package/src/modules/omnibridge/api/ghost-api.ts +1087 -0
- package/src/modules/omnibridge/auth/.gitkeep +1 -0
- package/src/modules/omnibridge/auth/auth-tunnel.ts +843 -0
- package/src/modules/omnibridge/auth/session-vault.ts +577 -0
- package/src/modules/omnibridge/core/.gitkeep +1 -0
- package/src/modules/omnibridge/core/semantic-normalizer.ts +702 -0
- package/src/modules/omnibridge/core/triangulation-engine.ts +530 -0
- package/src/modules/omnibridge/core/types.ts +610 -0
- package/src/modules/omnibridge/execution/.gitkeep +1 -0
- package/src/modules/omnibridge/execution/deterministic-logger.ts +629 -0
- package/src/modules/omnibridge/execution/parallel-executor.ts +542 -0
- package/src/modules/omnibridge/execution/shadow-session.ts +794 -0
- package/src/modules/omnibridge/index.ts +212 -0
- package/src/modules/omnibridge/omnibridge.ts +510 -0
- package/src/modules/omnibridge/verification/.gitkeep +1 -0
- package/src/modules/omnibridge/verification/verification-engine.ts +783 -0
- package/src/outcomes/README.md +75 -0
- package/src/outcomes/acquire-pilot-customer.ts +297 -0
- package/src/outcomes/code-delivery-outcomes.ts +89 -0
- package/src/outcomes/code-outcomes.ts +256 -0
- package/src/outcomes/code_review_battle.test.ts +135 -0
- package/src/outcomes/code_review_battle.ts +135 -0
- package/src/outcomes/cold_email_battle.ts +97 -0
- package/src/outcomes/content_creation_battle.ts +160 -0
- package/src/outcomes/f1_stem_opt_compliance.ts +61 -0
- package/src/outcomes/index.ts +107 -0
- package/src/outcomes/lead_gen_battle.test.ts +113 -0
- package/src/outcomes/lead_gen_battle.ts +99 -0
- package/src/outcomes/outcome.schema.property.test.ts +229 -0
- package/src/outcomes/outcome.schema.ts +187 -0
- package/src/outcomes/qualified_sales_interest.ts +118 -0
- package/src/outcomes/swarm_planner.property.test.ts +370 -0
- package/src/outcomes/swarm_planner.ts +96 -0
- package/src/outcomes/web_extraction.ts +234 -0
- package/src/runtime/README.md +220 -0
- package/src/runtime/agentRunner.test.ts +341 -0
- package/src/runtime/agentRunner.ts +746 -0
- package/src/runtime/claudeAdapter.ts +232 -0
- package/src/runtime/costTracker.ts +123 -0
- package/src/runtime/index.ts +34 -0
- package/src/runtime/modelAdapter.property.test.ts +305 -0
- package/src/runtime/modelAdapter.ts +144 -0
- package/src/runtime/openaiAdapter.ts +235 -0
- package/src/utils/README.md +122 -0
- package/src/utils/command-runner.ts +134 -0
- package/src/utils/cost-guard.ts +379 -0
- package/src/utils/errors.test.ts +290 -0
- package/src/utils/errors.ts +442 -0
- package/src/utils/index.ts +37 -0
- package/src/utils/logger.test.ts +361 -0
- package/src/utils/logger.ts +419 -0
- package/src/utils/output-parsers.ts +216 -0
|
@@ -0,0 +1,1174 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Property-based tests for Evaluation System
|
|
3
|
+
*
|
|
4
|
+
* **Feature: earnd-bounty-engine, Property 3: Evaluation Determinism**
|
|
5
|
+
* **Validates: Requirements 1.3**
|
|
6
|
+
*
|
|
7
|
+
* **Feature: earnd-bounty-engine, Property 10: Evaluation Result Structure**
|
|
8
|
+
* **Validates: Requirements 5.1, 5.2, 5.3**
|
|
9
|
+
*
|
|
10
|
+
* **Feature: earnd-bounty-engine, Property 19: Qualified Sales Interest Payout**
|
|
11
|
+
* **Validates: Requirements 8.6**
|
|
12
|
+
*
|
|
13
|
+
* Property 3: Evaluation Determinism
|
|
14
|
+
* *For any* outcome and artifact pair, evaluating the same inputs multiple times
|
|
15
|
+
* SHALL always produce the same result (SUCCESS or FAILURE with identical reason).
|
|
16
|
+
*
|
|
17
|
+
* Property 10: Evaluation Result Structure
|
|
18
|
+
* *For any* evaluation result, the status SHALL be exactly 'SUCCESS' or 'FAILURE',
|
|
19
|
+
* FAILURE results SHALL have a non-empty reason, and SUCCESS results SHALL have
|
|
20
|
+
* verificationDetails populated.
|
|
21
|
+
*
|
|
22
|
+
* Property 19: Qualified Sales Interest Payout
|
|
23
|
+
* *For any* artifact that passes all 5 criteria (buying intent, company size, role,
|
|
24
|
+
* message length, email), the evaluation SHALL return SUCCESS with payoutAmount of 250.
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
import { describe, test, expect } from 'vitest';
|
|
28
|
+
import * as fc from 'fast-check';
|
|
29
|
+
import { evaluateOutcome, type AgentArtifact } from './evaluateOutcome.js';
|
|
30
|
+
import type { Outcome, SuccessCriterion } from '../outcomes/outcome.schema.js';
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Arbitrary for generating valid SuccessCriterion objects.
|
|
34
|
+
* Uses validators that exist in the validatorMap.
|
|
35
|
+
*/
|
|
36
|
+
const successCriterionArb = fc.oneof(
|
|
37
|
+
fc.record({
|
|
38
|
+
name: fc.string({ minLength: 1 }),
|
|
39
|
+
validator: fc.constant('validateBuyingIntent'),
|
|
40
|
+
params: fc.record({
|
|
41
|
+
keywords: fc.array(fc.string({ minLength: 1 }), { minLength: 1, maxLength: 5 }),
|
|
42
|
+
}),
|
|
43
|
+
}),
|
|
44
|
+
fc.record({
|
|
45
|
+
name: fc.string({ minLength: 1 }),
|
|
46
|
+
validator: fc.constant('validateCompanySize'),
|
|
47
|
+
params: fc.record({
|
|
48
|
+
minimum: fc.integer({ min: 1, max: 10000 }),
|
|
49
|
+
}),
|
|
50
|
+
}),
|
|
51
|
+
fc.record({
|
|
52
|
+
name: fc.string({ minLength: 1 }),
|
|
53
|
+
validator: fc.constant('validateRole'),
|
|
54
|
+
params: fc.record({
|
|
55
|
+
excludedRoles: fc.array(fc.string({ minLength: 1 }), { minLength: 1, maxLength: 5 }),
|
|
56
|
+
}),
|
|
57
|
+
}),
|
|
58
|
+
fc.record({
|
|
59
|
+
name: fc.string({ minLength: 1 }),
|
|
60
|
+
validator: fc.constant('validateMessageLength'),
|
|
61
|
+
params: fc.record({
|
|
62
|
+
minWords: fc.integer({ min: 1, max: 100 }),
|
|
63
|
+
}),
|
|
64
|
+
}),
|
|
65
|
+
fc.record({
|
|
66
|
+
name: fc.string({ minLength: 1 }),
|
|
67
|
+
validator: fc.constant('validateEmail'),
|
|
68
|
+
params: fc.constant({}),
|
|
69
|
+
})
|
|
70
|
+
) as fc.Arbitrary<SuccessCriterion>;
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Arbitrary for generating valid Outcome objects.
|
|
74
|
+
*/
|
|
75
|
+
const outcomeArb: fc.Arbitrary<Outcome> = fc.record({
|
|
76
|
+
name: fc.string({ minLength: 1 }).filter((s) => s.trim().length > 0),
|
|
77
|
+
description: fc.string(),
|
|
78
|
+
payoutAmount: fc.double({ min: 0.01, max: 10000, noNaN: true }),
|
|
79
|
+
maxAttempts: fc.integer({ min: 1, max: 100 }),
|
|
80
|
+
timeLimitMs: fc.integer({ min: 1, max: 3600000 }),
|
|
81
|
+
successCriteria: fc.array(successCriterionArb, { minLength: 1, maxLength: 5 }),
|
|
82
|
+
failureReasons: fc.array(fc.string(), { minLength: 0, maxLength: 10 }),
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Arbitrary for generating valid AgentArtifact objects.
|
|
87
|
+
*/
|
|
88
|
+
const artifactContentArb = fc.record({
|
|
89
|
+
message: fc.string(),
|
|
90
|
+
targetEmail: fc.string(),
|
|
91
|
+
targetCompany: fc.string(),
|
|
92
|
+
targetCompanySize: fc.integer({ min: 0, max: 1000000 }),
|
|
93
|
+
targetRole: fc.string(),
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Generate valid ISO timestamp strings using integer timestamps.
|
|
98
|
+
* Avoids invalid date issues with fc.date().
|
|
99
|
+
*/
|
|
100
|
+
const validTimestampArb = fc
|
|
101
|
+
.integer({ min: 1577836800000, max: 1924905600000 }) // 2020-01-01 to 2030-12-31
|
|
102
|
+
.map((ts) => new Date(ts).toISOString());
|
|
103
|
+
|
|
104
|
+
const agentArtifactArb = (outcomeName: string): fc.Arbitrary<AgentArtifact> =>
|
|
105
|
+
fc.record({
|
|
106
|
+
agentId: fc.string({ minLength: 1 }).filter((s) => s.trim().length > 0),
|
|
107
|
+
outcomeId: fc.constant(outcomeName),
|
|
108
|
+
attemptNumber: fc.integer({ min: 1, max: 100 }),
|
|
109
|
+
content: artifactContentArb,
|
|
110
|
+
timestamp: validTimestampArb,
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Arbitrary for generating outcome and matching artifact pairs.
|
|
115
|
+
*/
|
|
116
|
+
const outcomeArtifactPairArb = outcomeArb.chain((outcome) =>
|
|
117
|
+
agentArtifactArb(outcome.name).map((artifact) => ({ outcome, artifact }))
|
|
118
|
+
);
|
|
119
|
+
|
|
120
|
+
describe('Evaluation Determinism - Property Tests', () => {
|
|
121
|
+
// **Feature: earnd-bounty-engine, Property 3: Evaluation Determinism**
|
|
122
|
+
test('same inputs produce identical results on multiple evaluations', async () => {
|
|
123
|
+
await fc.assert(
|
|
124
|
+
fc.asyncProperty(outcomeArtifactPairArb, async ({ outcome, artifact }) => {
|
|
125
|
+
// Evaluate the same inputs multiple times
|
|
126
|
+
const result1 = await evaluateOutcome(outcome, artifact);
|
|
127
|
+
const result2 = await evaluateOutcome(outcome, artifact);
|
|
128
|
+
const result3 = await evaluateOutcome(outcome, artifact);
|
|
129
|
+
|
|
130
|
+
// All results must have identical status
|
|
131
|
+
expect(result1.status).toBe(result2.status);
|
|
132
|
+
expect(result2.status).toBe(result3.status);
|
|
133
|
+
|
|
134
|
+
// All results must have identical reason
|
|
135
|
+
expect(result1.reason).toBe(result2.reason);
|
|
136
|
+
expect(result2.reason).toBe(result3.reason);
|
|
137
|
+
|
|
138
|
+
// All results must have identical criteriaResults
|
|
139
|
+
expect(result1.criteriaResults).toEqual(result2.criteriaResults);
|
|
140
|
+
expect(result2.criteriaResults).toEqual(result3.criteriaResults);
|
|
141
|
+
}),
|
|
142
|
+
{ numRuns: 100 }
|
|
143
|
+
);
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
// **Feature: earnd-bounty-engine, Property 3: Evaluation Determinism**
|
|
147
|
+
test('evaluation result is always binary (SUCCESS or FAILURE)', async () => {
|
|
148
|
+
await fc.assert(
|
|
149
|
+
fc.asyncProperty(outcomeArtifactPairArb, async ({ outcome, artifact }) => {
|
|
150
|
+
const result = await evaluateOutcome(outcome, artifact);
|
|
151
|
+
|
|
152
|
+
// Status must be exactly SUCCESS or FAILURE
|
|
153
|
+
expect(['SUCCESS', 'FAILURE']).toContain(result.status);
|
|
154
|
+
}),
|
|
155
|
+
{ numRuns: 100 }
|
|
156
|
+
);
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
// **Feature: earnd-bounty-engine, Property 3: Evaluation Determinism**
|
|
160
|
+
test('evaluation result structure is consistent', async () => {
|
|
161
|
+
await fc.assert(
|
|
162
|
+
fc.asyncProperty(outcomeArtifactPairArb, async ({ outcome, artifact }) => {
|
|
163
|
+
const result = await evaluateOutcome(outcome, artifact);
|
|
164
|
+
|
|
165
|
+
// Result must have required fields
|
|
166
|
+
expect(typeof result.status).toBe('string');
|
|
167
|
+
expect(typeof result.reason).toBe('string');
|
|
168
|
+
expect(Array.isArray(result.criteriaResults)).toBe(true);
|
|
169
|
+
|
|
170
|
+
// Reason must not be empty
|
|
171
|
+
expect(result.reason.length).toBeGreaterThan(0);
|
|
172
|
+
}),
|
|
173
|
+
{ numRuns: 100 }
|
|
174
|
+
);
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
// **Feature: earnd-bounty-engine, Property 3: Evaluation Determinism**
|
|
178
|
+
test('SUCCESS results include verificationDetails', async () => {
|
|
179
|
+
// Generate artifacts that are likely to pass all criteria
|
|
180
|
+
const passingArtifactArb = (outcomeName: string): fc.Arbitrary<AgentArtifact> =>
|
|
181
|
+
fc.record({
|
|
182
|
+
agentId: fc.string({ minLength: 1 }).filter((s) => s.trim().length > 0),
|
|
183
|
+
outcomeId: fc.constant(outcomeName),
|
|
184
|
+
attemptNumber: fc.integer({ min: 1, max: 100 }),
|
|
185
|
+
content: fc.record({
|
|
186
|
+
// Include buying intent keywords
|
|
187
|
+
message: fc
|
|
188
|
+
.array(fc.string({ minLength: 1 }).filter((s) => !/\s/.test(s)), {
|
|
189
|
+
minLength: 20,
|
|
190
|
+
maxLength: 50,
|
|
191
|
+
})
|
|
192
|
+
.map((words) => [...words, 'pricing', 'demo'].join(' ')),
|
|
193
|
+
// Valid email format
|
|
194
|
+
targetEmail: fc.constant('test@example.com'),
|
|
195
|
+
targetCompany: fc.string({ minLength: 1 }),
|
|
196
|
+
// Large company size
|
|
197
|
+
targetCompanySize: fc.integer({ min: 100, max: 10000 }),
|
|
198
|
+
// Valid role (not intern or student)
|
|
199
|
+
targetRole: fc.constantFrom('Manager', 'Director', 'VP', 'CEO', 'Engineer'),
|
|
200
|
+
}),
|
|
201
|
+
timestamp: validTimestampArb,
|
|
202
|
+
});
|
|
203
|
+
|
|
204
|
+
// Create a simple outcome with known validators
|
|
205
|
+
const simpleOutcome: Outcome = {
|
|
206
|
+
name: 'test_outcome',
|
|
207
|
+
description: 'Test outcome for determinism',
|
|
208
|
+
payoutAmount: 100,
|
|
209
|
+
maxAttempts: 3,
|
|
210
|
+
timeLimitMs: 60000,
|
|
211
|
+
successCriteria: [
|
|
212
|
+
{
|
|
213
|
+
name: 'buying_intent',
|
|
214
|
+
validator: 'validateBuyingIntent',
|
|
215
|
+
params: { keywords: ['pricing', 'demo', 'next steps'] },
|
|
216
|
+
},
|
|
217
|
+
{
|
|
218
|
+
name: 'company_size',
|
|
219
|
+
validator: 'validateCompanySize',
|
|
220
|
+
params: { minimum: 50 },
|
|
221
|
+
},
|
|
222
|
+
{
|
|
223
|
+
name: 'valid_role',
|
|
224
|
+
validator: 'validateRole',
|
|
225
|
+
params: { excludedRoles: ['intern', 'student'] },
|
|
226
|
+
},
|
|
227
|
+
{
|
|
228
|
+
name: 'message_length',
|
|
229
|
+
validator: 'validateMessageLength',
|
|
230
|
+
params: { minWords: 20 },
|
|
231
|
+
},
|
|
232
|
+
{
|
|
233
|
+
name: 'valid_email',
|
|
234
|
+
validator: 'validateEmail',
|
|
235
|
+
params: {},
|
|
236
|
+
},
|
|
237
|
+
],
|
|
238
|
+
failureReasons: ['No buying intent', 'Company too small'],
|
|
239
|
+
};
|
|
240
|
+
|
|
241
|
+
await fc.assert(
|
|
242
|
+
fc.asyncProperty(passingArtifactArb(simpleOutcome.name), async (artifact) => {
|
|
243
|
+
const result = await evaluateOutcome(simpleOutcome, artifact);
|
|
244
|
+
|
|
245
|
+
if (result.status === 'SUCCESS') {
|
|
246
|
+
// SUCCESS results must have verificationDetails
|
|
247
|
+
expect(result.verificationDetails).toBeDefined();
|
|
248
|
+
expect(typeof result.verificationDetails).toBe('object');
|
|
249
|
+
}
|
|
250
|
+
}),
|
|
251
|
+
{ numRuns: 100 }
|
|
252
|
+
);
|
|
253
|
+
});
|
|
254
|
+
|
|
255
|
+
// **Feature: earnd-bounty-engine, Property 3: Evaluation Determinism**
|
|
256
|
+
test('FAILURE results do not include verificationDetails', async () => {
|
|
257
|
+
// Generate artifacts that will fail (invalid email)
|
|
258
|
+
const failingArtifactArb = (outcomeName: string): fc.Arbitrary<AgentArtifact> =>
|
|
259
|
+
fc.record({
|
|
260
|
+
agentId: fc.string({ minLength: 1 }).filter((s) => s.trim().length > 0),
|
|
261
|
+
outcomeId: fc.constant(outcomeName),
|
|
262
|
+
attemptNumber: fc.integer({ min: 1, max: 100 }),
|
|
263
|
+
content: fc.record({
|
|
264
|
+
message: fc.constant('short'),
|
|
265
|
+
targetEmail: fc.constant('invalid-email'),
|
|
266
|
+
targetCompany: fc.string({ minLength: 1 }),
|
|
267
|
+
targetCompanySize: fc.integer({ min: 0, max: 10 }),
|
|
268
|
+
targetRole: fc.constant('intern'),
|
|
269
|
+
}),
|
|
270
|
+
timestamp: validTimestampArb,
|
|
271
|
+
});
|
|
272
|
+
|
|
273
|
+
const simpleOutcome: Outcome = {
|
|
274
|
+
name: 'test_outcome',
|
|
275
|
+
description: 'Test outcome',
|
|
276
|
+
payoutAmount: 100,
|
|
277
|
+
maxAttempts: 3,
|
|
278
|
+
timeLimitMs: 60000,
|
|
279
|
+
successCriteria: [
|
|
280
|
+
{
|
|
281
|
+
name: 'valid_email',
|
|
282
|
+
validator: 'validateEmail',
|
|
283
|
+
params: {},
|
|
284
|
+
},
|
|
285
|
+
],
|
|
286
|
+
failureReasons: ['Invalid email'],
|
|
287
|
+
};
|
|
288
|
+
|
|
289
|
+
await fc.assert(
|
|
290
|
+
fc.asyncProperty(failingArtifactArb(simpleOutcome.name), async (artifact) => {
|
|
291
|
+
const result = await evaluateOutcome(simpleOutcome, artifact);
|
|
292
|
+
|
|
293
|
+
if (result.status === 'FAILURE') {
|
|
294
|
+
// FAILURE results should not have verificationDetails
|
|
295
|
+
expect(result.verificationDetails).toBeUndefined();
|
|
296
|
+
}
|
|
297
|
+
}),
|
|
298
|
+
{ numRuns: 100 }
|
|
299
|
+
);
|
|
300
|
+
});
|
|
301
|
+
|
|
302
|
+
// **Feature: earnd-bounty-engine, Property 3: Evaluation Determinism**
|
|
303
|
+
test('criteriaResults count matches successCriteria count for valid artifacts', async () => {
|
|
304
|
+
await fc.assert(
|
|
305
|
+
fc.asyncProperty(outcomeArtifactPairArb, async ({ outcome, artifact }) => {
|
|
306
|
+
const result = await evaluateOutcome(outcome, artifact);
|
|
307
|
+
|
|
308
|
+
// If artifact schema is valid and outcome matches, criteriaResults should match criteria count
|
|
309
|
+
if (
|
|
310
|
+
result.reason.indexOf('Invalid artifact') === -1 &&
|
|
311
|
+
result.reason.indexOf('outcome mismatch') === -1
|
|
312
|
+
) {
|
|
313
|
+
expect(result.criteriaResults.length).toBe(outcome.successCriteria.length);
|
|
314
|
+
}
|
|
315
|
+
}),
|
|
316
|
+
{ numRuns: 100 }
|
|
317
|
+
);
|
|
318
|
+
});
|
|
319
|
+
});
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
/**
|
|
323
|
+
* Property 10: Evaluation Result Structure
|
|
324
|
+
*
|
|
325
|
+
* **Feature: earnd-bounty-engine, Property 10: Evaluation Result Structure**
|
|
326
|
+
* **Validates: Requirements 5.1, 5.2, 5.3**
|
|
327
|
+
*
|
|
328
|
+
* *For any* evaluation result, the status SHALL be exactly 'SUCCESS' or 'FAILURE',
|
|
329
|
+
* FAILURE results SHALL have a non-empty reason, and SUCCESS results SHALL have
|
|
330
|
+
* verificationDetails populated.
|
|
331
|
+
*/
|
|
332
|
+
describe('Evaluation Result Structure - Property Tests', () => {
|
|
333
|
+
/**
|
|
334
|
+
* Arbitrary for generating artifacts that will definitely pass all criteria.
|
|
335
|
+
* Used to test SUCCESS result structure.
|
|
336
|
+
*/
|
|
337
|
+
const guaranteedPassingArtifactArb = (outcomeName: string): fc.Arbitrary<AgentArtifact> =>
|
|
338
|
+
fc.record({
|
|
339
|
+
agentId: fc.string({ minLength: 1 }).filter((s) => s.trim().length > 0),
|
|
340
|
+
outcomeId: fc.constant(outcomeName),
|
|
341
|
+
attemptNumber: fc.integer({ min: 1, max: 100 }),
|
|
342
|
+
content: fc.record({
|
|
343
|
+
// Message with 25+ words including buying intent keywords
|
|
344
|
+
message: fc
|
|
345
|
+
.array(fc.constantFrom('word', 'test', 'content', 'data', 'info'), {
|
|
346
|
+
minLength: 22,
|
|
347
|
+
maxLength: 30,
|
|
348
|
+
})
|
|
349
|
+
.map((words) => [...words, 'pricing', 'demo', 'next steps'].join(' ')),
|
|
350
|
+
// Valid email format
|
|
351
|
+
targetEmail: fc.constantFrom('user@example.com', 'test@company.org', 'lead@business.net'),
|
|
352
|
+
targetCompany: fc.string({ minLength: 1 }),
|
|
353
|
+
// Company size >= 50
|
|
354
|
+
targetCompanySize: fc.integer({ min: 50, max: 10000 }),
|
|
355
|
+
// Valid role (not intern or student)
|
|
356
|
+
targetRole: fc.constantFrom('Manager', 'Director', 'VP', 'CEO', 'Engineer', 'Developer'),
|
|
357
|
+
}),
|
|
358
|
+
timestamp: validTimestampArb,
|
|
359
|
+
});
|
|
360
|
+
|
|
361
|
+
/**
|
|
362
|
+
* Arbitrary for generating artifacts that will definitely fail at least one criterion.
|
|
363
|
+
* Used to test FAILURE result structure.
|
|
364
|
+
*/
|
|
365
|
+
const guaranteedFailingArtifactArb = (outcomeName: string): fc.Arbitrary<AgentArtifact> =>
|
|
366
|
+
fc.record({
|
|
367
|
+
agentId: fc.string({ minLength: 1 }).filter((s) => s.trim().length > 0),
|
|
368
|
+
outcomeId: fc.constant(outcomeName),
|
|
369
|
+
attemptNumber: fc.integer({ min: 1, max: 100 }),
|
|
370
|
+
content: fc.record({
|
|
371
|
+
// Short message without keywords - will fail message length and buying intent
|
|
372
|
+
message: fc.constant('short message'),
|
|
373
|
+
// Invalid email format
|
|
374
|
+
targetEmail: fc.constant('not-an-email'),
|
|
375
|
+
targetCompany: fc.string({ minLength: 1 }),
|
|
376
|
+
// Company size < 50
|
|
377
|
+
targetCompanySize: fc.integer({ min: 0, max: 49 }),
|
|
378
|
+
// Invalid role
|
|
379
|
+
targetRole: fc.constantFrom('intern', 'student'),
|
|
380
|
+
}),
|
|
381
|
+
timestamp: validTimestampArb,
|
|
382
|
+
});
|
|
383
|
+
|
|
384
|
+
/**
|
|
385
|
+
* Standard outcome with all 5 validators for comprehensive testing.
|
|
386
|
+
*/
|
|
387
|
+
const standardOutcome: Outcome = {
|
|
388
|
+
name: 'test_qualified_sales',
|
|
389
|
+
description: 'Test outcome for result structure validation',
|
|
390
|
+
payoutAmount: 250,
|
|
391
|
+
maxAttempts: 5,
|
|
392
|
+
timeLimitMs: 300000,
|
|
393
|
+
successCriteria: [
|
|
394
|
+
{
|
|
395
|
+
name: 'buying_intent',
|
|
396
|
+
validator: 'validateBuyingIntent',
|
|
397
|
+
params: { keywords: ['pricing', 'demo', 'next steps'] },
|
|
398
|
+
},
|
|
399
|
+
{
|
|
400
|
+
name: 'company_size',
|
|
401
|
+
validator: 'validateCompanySize',
|
|
402
|
+
params: { minimum: 50 },
|
|
403
|
+
},
|
|
404
|
+
{
|
|
405
|
+
name: 'valid_role',
|
|
406
|
+
validator: 'validateRole',
|
|
407
|
+
params: { excludedRoles: ['intern', 'student'] },
|
|
408
|
+
},
|
|
409
|
+
{
|
|
410
|
+
name: 'message_length',
|
|
411
|
+
validator: 'validateMessageLength',
|
|
412
|
+
params: { minWords: 20 },
|
|
413
|
+
},
|
|
414
|
+
{
|
|
415
|
+
name: 'valid_email',
|
|
416
|
+
validator: 'validateEmail',
|
|
417
|
+
params: {},
|
|
418
|
+
},
|
|
419
|
+
],
|
|
420
|
+
failureReasons: ['No buying intent', 'Company too small', 'Invalid role', 'Message too short', 'Invalid email'],
|
|
421
|
+
};
|
|
422
|
+
|
|
423
|
+
// **Feature: earnd-bounty-engine, Property 10: Evaluation Result Structure**
|
|
424
|
+
// Validates: Requirement 5.1 - status SHALL be exactly 'SUCCESS' or 'FAILURE'
|
|
425
|
+
test('status is exactly SUCCESS or FAILURE for any evaluation', async () => {
|
|
426
|
+
await fc.assert(
|
|
427
|
+
fc.asyncProperty(outcomeArtifactPairArb, async ({ outcome, artifact }) => {
|
|
428
|
+
const result = await evaluateOutcome(outcome, artifact);
|
|
429
|
+
|
|
430
|
+
// Status must be exactly one of the two valid values
|
|
431
|
+
expect(result.status === 'SUCCESS' || result.status === 'FAILURE').toBe(true);
|
|
432
|
+
// Ensure it's not any other string
|
|
433
|
+
expect(['SUCCESS', 'FAILURE']).toContain(result.status);
|
|
434
|
+
}),
|
|
435
|
+
{ numRuns: 100 }
|
|
436
|
+
);
|
|
437
|
+
});
|
|
438
|
+
|
|
439
|
+
// **Feature: earnd-bounty-engine, Property 10: Evaluation Result Structure**
|
|
440
|
+
// Validates: Requirement 5.2 - FAILURE results SHALL have a non-empty reason
|
|
441
|
+
test('FAILURE results have non-empty reason', async () => {
|
|
442
|
+
await fc.assert(
|
|
443
|
+
fc.asyncProperty(
|
|
444
|
+
guaranteedFailingArtifactArb(standardOutcome.name),
|
|
445
|
+
async (artifact) => {
|
|
446
|
+
const result = await evaluateOutcome(standardOutcome, artifact);
|
|
447
|
+
|
|
448
|
+
// This artifact is designed to fail
|
|
449
|
+
expect(result.status).toBe('FAILURE');
|
|
450
|
+
|
|
451
|
+
// FAILURE must have a non-empty reason
|
|
452
|
+
expect(typeof result.reason).toBe('string');
|
|
453
|
+
expect(result.reason.length).toBeGreaterThan(0);
|
|
454
|
+
expect(result.reason.trim().length).toBeGreaterThan(0);
|
|
455
|
+
}
|
|
456
|
+
),
|
|
457
|
+
{ numRuns: 100 }
|
|
458
|
+
);
|
|
459
|
+
});
|
|
460
|
+
|
|
461
|
+
// **Feature: earnd-bounty-engine, Property 10: Evaluation Result Structure**
|
|
462
|
+
// Validates: Requirement 5.3 - SUCCESS results SHALL have verificationDetails populated
|
|
463
|
+
test('SUCCESS results have verificationDetails populated', async () => {
|
|
464
|
+
await fc.assert(
|
|
465
|
+
fc.asyncProperty(
|
|
466
|
+
guaranteedPassingArtifactArb(standardOutcome.name),
|
|
467
|
+
async (artifact) => {
|
|
468
|
+
const result = await evaluateOutcome(standardOutcome, artifact);
|
|
469
|
+
|
|
470
|
+
// This artifact is designed to pass
|
|
471
|
+
expect(result.status).toBe('SUCCESS');
|
|
472
|
+
|
|
473
|
+
// SUCCESS must have verificationDetails
|
|
474
|
+
expect(result.verificationDetails).toBeDefined();
|
|
475
|
+
expect(result.verificationDetails).not.toBeNull();
|
|
476
|
+
expect(typeof result.verificationDetails).toBe('object');
|
|
477
|
+
|
|
478
|
+
// verificationDetails should contain expected fields
|
|
479
|
+
expect(result.verificationDetails).toHaveProperty('outcomeId');
|
|
480
|
+
expect(result.verificationDetails).toHaveProperty('payoutAmount');
|
|
481
|
+
expect(result.verificationDetails).toHaveProperty('agentId');
|
|
482
|
+
expect(result.verificationDetails).toHaveProperty('attemptNumber');
|
|
483
|
+
expect(result.verificationDetails).toHaveProperty('evaluatedAt');
|
|
484
|
+
expect(result.verificationDetails).toHaveProperty('criteriaCount');
|
|
485
|
+
}
|
|
486
|
+
),
|
|
487
|
+
{ numRuns: 100 }
|
|
488
|
+
);
|
|
489
|
+
});
|
|
490
|
+
|
|
491
|
+
// **Feature: earnd-bounty-engine, Property 10: Evaluation Result Structure**
|
|
492
|
+
// Combined test: validates all three requirements together
|
|
493
|
+
test('evaluation result structure is complete and correct', async () => {
|
|
494
|
+
await fc.assert(
|
|
495
|
+
fc.asyncProperty(outcomeArtifactPairArb, async ({ outcome, artifact }) => {
|
|
496
|
+
const result = await evaluateOutcome(outcome, artifact);
|
|
497
|
+
|
|
498
|
+
// Requirement 5.1: Status is exactly SUCCESS or FAILURE
|
|
499
|
+
expect(['SUCCESS', 'FAILURE']).toContain(result.status);
|
|
500
|
+
|
|
501
|
+
// Result always has reason and criteriaResults
|
|
502
|
+
expect(typeof result.reason).toBe('string');
|
|
503
|
+
expect(Array.isArray(result.criteriaResults)).toBe(true);
|
|
504
|
+
|
|
505
|
+
if (result.status === 'FAILURE') {
|
|
506
|
+
// Requirement 5.2: FAILURE has non-empty reason
|
|
507
|
+
expect(result.reason.length).toBeGreaterThan(0);
|
|
508
|
+
// FAILURE should not have verificationDetails
|
|
509
|
+
expect(result.verificationDetails).toBeUndefined();
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
if (result.status === 'SUCCESS') {
|
|
513
|
+
// Requirement 5.3: SUCCESS has verificationDetails populated
|
|
514
|
+
expect(result.verificationDetails).toBeDefined();
|
|
515
|
+
expect(typeof result.verificationDetails).toBe('object');
|
|
516
|
+
}
|
|
517
|
+
}),
|
|
518
|
+
{ numRuns: 100 }
|
|
519
|
+
);
|
|
520
|
+
});
|
|
521
|
+
|
|
522
|
+
// **Feature: earnd-bounty-engine, Property 10: Evaluation Result Structure**
|
|
523
|
+
// Edge case: invalid artifact schema results in FAILURE with reason
|
|
524
|
+
test('invalid artifact schema results in FAILURE with structured reason', async () => {
|
|
525
|
+
const invalidArtifactArb = fc.record({
|
|
526
|
+
agentId: fc.string({ minLength: 1 }).filter((s) => s.trim().length > 0),
|
|
527
|
+
outcomeId: fc.constant(standardOutcome.name),
|
|
528
|
+
attemptNumber: fc.integer({ min: 1, max: 100 }),
|
|
529
|
+
content: fc.record({
|
|
530
|
+
// Missing or invalid fields
|
|
531
|
+
message: fc.constant(undefined as unknown as string),
|
|
532
|
+
targetEmail: fc.string(),
|
|
533
|
+
targetCompany: fc.string(),
|
|
534
|
+
targetCompanySize: fc.integer(),
|
|
535
|
+
targetRole: fc.string(),
|
|
536
|
+
}),
|
|
537
|
+
timestamp: validTimestampArb,
|
|
538
|
+
}) as fc.Arbitrary<AgentArtifact>;
|
|
539
|
+
|
|
540
|
+
await fc.assert(
|
|
541
|
+
fc.asyncProperty(invalidArtifactArb, async (artifact) => {
|
|
542
|
+
const result = await evaluateOutcome(standardOutcome, artifact);
|
|
543
|
+
|
|
544
|
+
// Invalid artifacts should result in FAILURE
|
|
545
|
+
expect(result.status).toBe('FAILURE');
|
|
546
|
+
// Should have a reason explaining the failure
|
|
547
|
+
expect(result.reason.length).toBeGreaterThan(0);
|
|
548
|
+
// Should not have verificationDetails
|
|
549
|
+
expect(result.verificationDetails).toBeUndefined();
|
|
550
|
+
}),
|
|
551
|
+
{ numRuns: 100 }
|
|
552
|
+
);
|
|
553
|
+
});
|
|
554
|
+
|
|
555
|
+
// **Feature: earnd-bounty-engine, Property 10: Evaluation Result Structure**
|
|
556
|
+
// Edge case: outcome mismatch results in FAILURE with reason
|
|
557
|
+
test('outcome mismatch results in FAILURE with structured reason', async () => {
|
|
558
|
+
const mismatchedArtifactArb = fc.record({
|
|
559
|
+
agentId: fc.string({ minLength: 1 }).filter((s) => s.trim().length > 0),
|
|
560
|
+
// Different outcome ID than the one being evaluated
|
|
561
|
+
outcomeId: fc.constant('different_outcome_id'),
|
|
562
|
+
attemptNumber: fc.integer({ min: 1, max: 100 }),
|
|
563
|
+
content: fc.record({
|
|
564
|
+
message: fc.string(),
|
|
565
|
+
targetEmail: fc.string(),
|
|
566
|
+
targetCompany: fc.string(),
|
|
567
|
+
targetCompanySize: fc.integer({ min: 0, max: 1000000 }),
|
|
568
|
+
targetRole: fc.string(),
|
|
569
|
+
}),
|
|
570
|
+
timestamp: validTimestampArb,
|
|
571
|
+
});
|
|
572
|
+
|
|
573
|
+
await fc.assert(
|
|
574
|
+
fc.asyncProperty(mismatchedArtifactArb, async (artifact) => {
|
|
575
|
+
const result = await evaluateOutcome(standardOutcome, artifact);
|
|
576
|
+
|
|
577
|
+
// Mismatched outcome should result in FAILURE
|
|
578
|
+
expect(result.status).toBe('FAILURE');
|
|
579
|
+
// Should have a reason mentioning the mismatch
|
|
580
|
+
expect(result.reason).toContain('mismatch');
|
|
581
|
+
// Should not have verificationDetails
|
|
582
|
+
expect(result.verificationDetails).toBeUndefined();
|
|
583
|
+
}),
|
|
584
|
+
{ numRuns: 100 }
|
|
585
|
+
);
|
|
586
|
+
});
|
|
587
|
+
});
|
|
588
|
+
|
|
589
|
+
|
|
590
|
+
/**
|
|
591
|
+
* Property 11: Fail-Closed Evaluation
|
|
592
|
+
*
|
|
593
|
+
* **Feature: earnd-bounty-engine, Property 11: Fail-Closed Evaluation**
|
|
594
|
+
* **Validates: Requirements 5.4**
|
|
595
|
+
*
|
|
596
|
+
* *For any* artifact with invalid or unverifiable data, evaluation SHALL return FAILURE.
|
|
597
|
+
*
|
|
598
|
+
* This property ensures the system fails safely - when data is invalid, malformed,
|
|
599
|
+
* or cannot be verified, the evaluation returns FAILURE rather than SUCCESS or
|
|
600
|
+
* throwing an exception. This prevents accidental payouts on bad data.
|
|
601
|
+
*/
|
|
602
|
+
describe('Fail-Closed Evaluation - Property Tests', () => {
|
|
603
|
+
/**
|
|
604
|
+
* Standard outcome with all 5 validators for comprehensive testing.
|
|
605
|
+
*/
|
|
606
|
+
const standardOutcome: Outcome = {
|
|
607
|
+
name: 'test_qualified_sales',
|
|
608
|
+
description: 'Test outcome for fail-closed validation',
|
|
609
|
+
payoutAmount: 250,
|
|
610
|
+
maxAttempts: 5,
|
|
611
|
+
timeLimitMs: 300000,
|
|
612
|
+
successCriteria: [
|
|
613
|
+
{
|
|
614
|
+
name: 'buying_intent',
|
|
615
|
+
validator: 'validateBuyingIntent',
|
|
616
|
+
params: { keywords: ['pricing', 'demo', 'next steps'] },
|
|
617
|
+
},
|
|
618
|
+
{
|
|
619
|
+
name: 'company_size',
|
|
620
|
+
validator: 'validateCompanySize',
|
|
621
|
+
params: { minimum: 50 },
|
|
622
|
+
},
|
|
623
|
+
{
|
|
624
|
+
name: 'valid_role',
|
|
625
|
+
validator: 'validateRole',
|
|
626
|
+
params: { excludedRoles: ['intern', 'student'] },
|
|
627
|
+
},
|
|
628
|
+
{
|
|
629
|
+
name: 'message_length',
|
|
630
|
+
validator: 'validateMessageLength',
|
|
631
|
+
params: { minWords: 20 },
|
|
632
|
+
},
|
|
633
|
+
{
|
|
634
|
+
name: 'valid_email',
|
|
635
|
+
validator: 'validateEmail',
|
|
636
|
+
params: {},
|
|
637
|
+
},
|
|
638
|
+
],
|
|
639
|
+
failureReasons: ['No buying intent', 'Company too small', 'Invalid role', 'Message too short', 'Invalid email'],
|
|
640
|
+
};
|
|
641
|
+
|
|
642
|
+
// **Feature: earnd-bounty-engine, Property 11: Fail-Closed Evaluation**
|
|
643
|
+
// Test: Artifacts with missing required fields return FAILURE
|
|
644
|
+
test('artifacts with missing agentId return FAILURE', async () => {
|
|
645
|
+
const missingAgentIdArb = fc.record({
|
|
646
|
+
agentId: fc.constantFrom('', ' ', '\t', '\n'), // Empty or whitespace-only
|
|
647
|
+
outcomeId: fc.constant(standardOutcome.name),
|
|
648
|
+
attemptNumber: fc.integer({ min: 1, max: 100 }),
|
|
649
|
+
content: fc.record({
|
|
650
|
+
message: fc.string(),
|
|
651
|
+
targetEmail: fc.string(),
|
|
652
|
+
targetCompany: fc.string(),
|
|
653
|
+
targetCompanySize: fc.integer({ min: 0, max: 1000000 }),
|
|
654
|
+
targetRole: fc.string(),
|
|
655
|
+
}),
|
|
656
|
+
timestamp: validTimestampArb,
|
|
657
|
+
});
|
|
658
|
+
|
|
659
|
+
await fc.assert(
|
|
660
|
+
fc.asyncProperty(missingAgentIdArb, async (artifact) => {
|
|
661
|
+
const result = await evaluateOutcome(standardOutcome, artifact);
|
|
662
|
+
|
|
663
|
+
// Invalid artifact must return FAILURE (fail closed)
|
|
664
|
+
expect(result.status).toBe('FAILURE');
|
|
665
|
+
expect(result.reason).toContain('Invalid artifact');
|
|
666
|
+
expect(result.verificationDetails).toBeUndefined();
|
|
667
|
+
}),
|
|
668
|
+
{ numRuns: 100 }
|
|
669
|
+
);
|
|
670
|
+
});
|
|
671
|
+
|
|
672
|
+
// **Feature: earnd-bounty-engine, Property 11: Fail-Closed Evaluation**
|
|
673
|
+
// Test: Artifacts with missing outcomeId return FAILURE
|
|
674
|
+
test('artifacts with missing outcomeId return FAILURE', async () => {
|
|
675
|
+
const missingOutcomeIdArb = fc.record({
|
|
676
|
+
agentId: fc.string({ minLength: 1 }).filter((s) => s.trim().length > 0),
|
|
677
|
+
outcomeId: fc.constantFrom('', ' ', '\t', '\n'), // Empty or whitespace-only
|
|
678
|
+
attemptNumber: fc.integer({ min: 1, max: 100 }),
|
|
679
|
+
content: fc.record({
|
|
680
|
+
message: fc.string(),
|
|
681
|
+
targetEmail: fc.string(),
|
|
682
|
+
targetCompany: fc.string(),
|
|
683
|
+
targetCompanySize: fc.integer({ min: 0, max: 1000000 }),
|
|
684
|
+
targetRole: fc.string(),
|
|
685
|
+
}),
|
|
686
|
+
timestamp: validTimestampArb,
|
|
687
|
+
});
|
|
688
|
+
|
|
689
|
+
await fc.assert(
|
|
690
|
+
fc.asyncProperty(missingOutcomeIdArb, async (artifact) => {
|
|
691
|
+
const result = await evaluateOutcome(standardOutcome, artifact);
|
|
692
|
+
|
|
693
|
+
// Invalid artifact must return FAILURE (fail closed)
|
|
694
|
+
expect(result.status).toBe('FAILURE');
|
|
695
|
+
expect(result.verificationDetails).toBeUndefined();
|
|
696
|
+
}),
|
|
697
|
+
{ numRuns: 100 }
|
|
698
|
+
);
|
|
699
|
+
});
|
|
700
|
+
|
|
701
|
+
// **Feature: earnd-bounty-engine, Property 11: Fail-Closed Evaluation**
|
|
702
|
+
// Test: Artifacts with invalid attemptNumber return FAILURE
|
|
703
|
+
test('artifacts with invalid attemptNumber return FAILURE', async () => {
|
|
704
|
+
const invalidAttemptNumberArb = fc.record({
|
|
705
|
+
agentId: fc.string({ minLength: 1 }).filter((s) => s.trim().length > 0),
|
|
706
|
+
outcomeId: fc.constant(standardOutcome.name),
|
|
707
|
+
attemptNumber: fc.constantFrom(0, -1, -100, 1.5, NaN), // Invalid attempt numbers
|
|
708
|
+
content: fc.record({
|
|
709
|
+
message: fc.string(),
|
|
710
|
+
targetEmail: fc.string(),
|
|
711
|
+
targetCompany: fc.string(),
|
|
712
|
+
targetCompanySize: fc.integer({ min: 0, max: 1000000 }),
|
|
713
|
+
targetRole: fc.string(),
|
|
714
|
+
}),
|
|
715
|
+
timestamp: validTimestampArb,
|
|
716
|
+
});
|
|
717
|
+
|
|
718
|
+
await fc.assert(
|
|
719
|
+
fc.asyncProperty(invalidAttemptNumberArb, async (artifact) => {
|
|
720
|
+
const result = await evaluateOutcome(standardOutcome, artifact);
|
|
721
|
+
|
|
722
|
+
// Invalid artifact must return FAILURE (fail closed)
|
|
723
|
+
expect(result.status).toBe('FAILURE');
|
|
724
|
+
expect(result.verificationDetails).toBeUndefined();
|
|
725
|
+
}),
|
|
726
|
+
{ numRuns: 100 }
|
|
727
|
+
);
|
|
728
|
+
});
|
|
729
|
+
|
|
730
|
+
// **Feature: earnd-bounty-engine, Property 11: Fail-Closed Evaluation**
|
|
731
|
+
// Test: Artifacts with missing content fields return FAILURE
|
|
732
|
+
test('artifacts with null content return FAILURE', async () => {
|
|
733
|
+
const nullContentArb = fc.record({
|
|
734
|
+
agentId: fc.string({ minLength: 1 }).filter((s) => s.trim().length > 0),
|
|
735
|
+
outcomeId: fc.constant(standardOutcome.name),
|
|
736
|
+
attemptNumber: fc.integer({ min: 1, max: 100 }),
|
|
737
|
+
content: fc.constant(null as unknown as AgentArtifact['content']),
|
|
738
|
+
timestamp: validTimestampArb,
|
|
739
|
+
});
|
|
740
|
+
|
|
741
|
+
await fc.assert(
|
|
742
|
+
fc.asyncProperty(nullContentArb, async (artifact) => {
|
|
743
|
+
const result = await evaluateOutcome(standardOutcome, artifact);
|
|
744
|
+
|
|
745
|
+
// Invalid artifact must return FAILURE (fail closed)
|
|
746
|
+
expect(result.status).toBe('FAILURE');
|
|
747
|
+
expect(result.reason).toContain('Invalid artifact');
|
|
748
|
+
expect(result.verificationDetails).toBeUndefined();
|
|
749
|
+
}),
|
|
750
|
+
{ numRuns: 100 }
|
|
751
|
+
);
|
|
752
|
+
});
|
|
753
|
+
|
|
754
|
+
// **Feature: earnd-bounty-engine, Property 11: Fail-Closed Evaluation**
|
|
755
|
+
// Test: Artifacts with wrong content field types return FAILURE
|
|
756
|
+
test('artifacts with wrong content field types return FAILURE', async () => {
|
|
757
|
+
const wrongTypeContentArb = fc.record({
|
|
758
|
+
agentId: fc.string({ minLength: 1 }).filter((s) => s.trim().length > 0),
|
|
759
|
+
outcomeId: fc.constant(standardOutcome.name),
|
|
760
|
+
attemptNumber: fc.integer({ min: 1, max: 100 }),
|
|
761
|
+
content: fc.record({
|
|
762
|
+
message: fc.constant(123 as unknown as string), // Wrong type: number instead of string
|
|
763
|
+
targetEmail: fc.string(),
|
|
764
|
+
targetCompany: fc.string(),
|
|
765
|
+
targetCompanySize: fc.integer({ min: 0, max: 1000000 }),
|
|
766
|
+
targetRole: fc.string(),
|
|
767
|
+
}),
|
|
768
|
+
timestamp: validTimestampArb,
|
|
769
|
+
});
|
|
770
|
+
|
|
771
|
+
await fc.assert(
|
|
772
|
+
fc.asyncProperty(wrongTypeContentArb, async (artifact) => {
|
|
773
|
+
const result = await evaluateOutcome(standardOutcome, artifact);
|
|
774
|
+
|
|
775
|
+
// Invalid artifact must return FAILURE (fail closed)
|
|
776
|
+
expect(result.status).toBe('FAILURE');
|
|
777
|
+
expect(result.verificationDetails).toBeUndefined();
|
|
778
|
+
}),
|
|
779
|
+
{ numRuns: 100 }
|
|
780
|
+
);
|
|
781
|
+
});
|
|
782
|
+
|
|
783
|
+
// **Feature: earnd-bounty-engine, Property 11: Fail-Closed Evaluation**
|
|
784
|
+
// Test: Artifacts with unknown validator references return FAILURE
|
|
785
|
+
test('outcomes with unknown validators return FAILURE for criteria', async () => {
|
|
786
|
+
const outcomeWithUnknownValidator: Outcome = {
|
|
787
|
+
name: 'test_unknown_validator',
|
|
788
|
+
description: 'Test outcome with unknown validator',
|
|
789
|
+
payoutAmount: 100,
|
|
790
|
+
maxAttempts: 3,
|
|
791
|
+
timeLimitMs: 60000,
|
|
792
|
+
successCriteria: [
|
|
793
|
+
{
|
|
794
|
+
name: 'unknown_criterion',
|
|
795
|
+
validator: 'validateNonExistent', // Unknown validator
|
|
796
|
+
params: {},
|
|
797
|
+
},
|
|
798
|
+
],
|
|
799
|
+
failureReasons: ['Unknown validator'],
|
|
800
|
+
};
|
|
801
|
+
|
|
802
|
+
const validArtifactArb = fc.record({
|
|
803
|
+
agentId: fc.string({ minLength: 1 }).filter((s) => s.trim().length > 0),
|
|
804
|
+
outcomeId: fc.constant(outcomeWithUnknownValidator.name),
|
|
805
|
+
attemptNumber: fc.integer({ min: 1, max: 100 }),
|
|
806
|
+
content: fc.record({
|
|
807
|
+
message: fc.string(),
|
|
808
|
+
targetEmail: fc.string(),
|
|
809
|
+
targetCompany: fc.string(),
|
|
810
|
+
targetCompanySize: fc.integer({ min: 0, max: 1000000 }),
|
|
811
|
+
targetRole: fc.string(),
|
|
812
|
+
}),
|
|
813
|
+
timestamp: validTimestampArb,
|
|
814
|
+
});
|
|
815
|
+
|
|
816
|
+
await fc.assert(
|
|
817
|
+
fc.asyncProperty(validArtifactArb, async (artifact) => {
|
|
818
|
+
const result = await evaluateOutcome(outcomeWithUnknownValidator, artifact);
|
|
819
|
+
|
|
820
|
+
// Unknown validator must return FAILURE (fail closed)
|
|
821
|
+
expect(result.status).toBe('FAILURE');
|
|
822
|
+
expect(result.verificationDetails).toBeUndefined();
|
|
823
|
+
// Should have a criterion result indicating the unknown validator
|
|
824
|
+
const unknownCriterion = result.criteriaResults.find(
|
|
825
|
+
(c) => c.name === 'unknown_criterion'
|
|
826
|
+
);
|
|
827
|
+
expect(unknownCriterion).toBeDefined();
|
|
828
|
+
expect(unknownCriterion?.passed).toBe(false);
|
|
829
|
+
expect(unknownCriterion?.reason).toContain('Unknown validator');
|
|
830
|
+
}),
|
|
831
|
+
{ numRuns: 100 }
|
|
832
|
+
);
|
|
833
|
+
});
|
|
834
|
+
|
|
835
|
+
// **Feature: earnd-bounty-engine, Property 11: Fail-Closed Evaluation**
|
|
836
|
+
// Test: Outcome mismatch returns FAILURE (unverifiable data)
|
|
837
|
+
test('outcome ID mismatch returns FAILURE', async () => {
|
|
838
|
+
const mismatchedOutcomeArb = fc.record({
|
|
839
|
+
agentId: fc.string({ minLength: 1 }).filter((s) => s.trim().length > 0),
|
|
840
|
+
outcomeId: fc.string({ minLength: 1 }).filter(
|
|
841
|
+
(s) => s.trim().length > 0 && s !== standardOutcome.name
|
|
842
|
+
),
|
|
843
|
+
attemptNumber: fc.integer({ min: 1, max: 100 }),
|
|
844
|
+
content: fc.record({
|
|
845
|
+
message: fc.string(),
|
|
846
|
+
targetEmail: fc.string(),
|
|
847
|
+
targetCompany: fc.string(),
|
|
848
|
+
targetCompanySize: fc.integer({ min: 0, max: 1000000 }),
|
|
849
|
+
targetRole: fc.string(),
|
|
850
|
+
}),
|
|
851
|
+
timestamp: validTimestampArb,
|
|
852
|
+
});
|
|
853
|
+
|
|
854
|
+
await fc.assert(
|
|
855
|
+
fc.asyncProperty(mismatchedOutcomeArb, async (artifact) => {
|
|
856
|
+
const result = await evaluateOutcome(standardOutcome, artifact);
|
|
857
|
+
|
|
858
|
+
// Mismatched outcome must return FAILURE (fail closed)
|
|
859
|
+
expect(result.status).toBe('FAILURE');
|
|
860
|
+
expect(result.reason).toContain('mismatch');
|
|
861
|
+
expect(result.verificationDetails).toBeUndefined();
|
|
862
|
+
}),
|
|
863
|
+
{ numRuns: 100 }
|
|
864
|
+
);
|
|
865
|
+
});
|
|
866
|
+
|
|
867
|
+
// **Feature: earnd-bounty-engine, Property 11: Fail-Closed Evaluation**
|
|
868
|
+
// Test: Completely malformed artifacts return FAILURE
|
|
869
|
+
test('completely malformed artifacts return FAILURE', async () => {
|
|
870
|
+
// Generate various malformed artifact shapes
|
|
871
|
+
const malformedArtifactArb = fc.oneof(
|
|
872
|
+
// Null artifact
|
|
873
|
+
fc.constant(null as unknown as AgentArtifact),
|
|
874
|
+
// Undefined artifact
|
|
875
|
+
fc.constant(undefined as unknown as AgentArtifact),
|
|
876
|
+
// Empty object
|
|
877
|
+
fc.constant({} as unknown as AgentArtifact),
|
|
878
|
+
// Array instead of object
|
|
879
|
+
fc.constant([] as unknown as AgentArtifact),
|
|
880
|
+
// String instead of object
|
|
881
|
+
fc.constant('not an artifact' as unknown as AgentArtifact),
|
|
882
|
+
// Number instead of object
|
|
883
|
+
fc.constant(42 as unknown as AgentArtifact)
|
|
884
|
+
);
|
|
885
|
+
|
|
886
|
+
await fc.assert(
|
|
887
|
+
fc.asyncProperty(malformedArtifactArb, async (artifact) => {
|
|
888
|
+
const result = await evaluateOutcome(standardOutcome, artifact);
|
|
889
|
+
|
|
890
|
+
// Malformed artifact must return FAILURE (fail closed)
|
|
891
|
+
expect(result.status).toBe('FAILURE');
|
|
892
|
+
expect(result.verificationDetails).toBeUndefined();
|
|
893
|
+
}),
|
|
894
|
+
{ numRuns: 100 }
|
|
895
|
+
);
|
|
896
|
+
});
|
|
897
|
+
|
|
898
|
+
// **Feature: earnd-bounty-engine, Property 11: Fail-Closed Evaluation**
|
|
899
|
+
// Test: Evaluation never throws exceptions - always returns FAILURE for bad data
|
|
900
|
+
test('evaluation never throws exceptions for any input', async () => {
|
|
901
|
+
// Generate completely random inputs
|
|
902
|
+
const anyInputArb = fc.anything();
|
|
903
|
+
|
|
904
|
+
await fc.assert(
|
|
905
|
+
fc.asyncProperty(anyInputArb, async (randomInput) => {
|
|
906
|
+
// Should never throw, always return a result
|
|
907
|
+
let result: Awaited<ReturnType<typeof evaluateOutcome>>;
|
|
908
|
+
let threw = false;
|
|
909
|
+
|
|
910
|
+
try {
|
|
911
|
+
result = await evaluateOutcome(
|
|
912
|
+
standardOutcome,
|
|
913
|
+
randomInput as AgentArtifact
|
|
914
|
+
);
|
|
915
|
+
} catch {
|
|
916
|
+
threw = true;
|
|
917
|
+
result = { status: 'FAILURE', reason: 'Exception thrown', criteriaResults: [] };
|
|
918
|
+
}
|
|
919
|
+
|
|
920
|
+
// Evaluation should not throw exceptions
|
|
921
|
+
expect(threw).toBe(false);
|
|
922
|
+
|
|
923
|
+
// If input is invalid, result should be FAILURE
|
|
924
|
+
if (
|
|
925
|
+
typeof randomInput !== 'object' ||
|
|
926
|
+
randomInput === null ||
|
|
927
|
+
!('agentId' in randomInput) ||
|
|
928
|
+
!('content' in randomInput)
|
|
929
|
+
) {
|
|
930
|
+
expect(result.status).toBe('FAILURE');
|
|
931
|
+
expect(result.verificationDetails).toBeUndefined();
|
|
932
|
+
}
|
|
933
|
+
}),
|
|
934
|
+
{ numRuns: 100 }
|
|
935
|
+
);
|
|
936
|
+
});
|
|
937
|
+
|
|
938
|
+
// **Feature: earnd-bounty-engine, Property 11: Fail-Closed Evaluation**
|
|
939
|
+
// Test: Missing timestamp returns FAILURE
|
|
940
|
+
test('artifacts with missing timestamp return FAILURE', async () => {
|
|
941
|
+
const missingTimestampArb = fc.record({
|
|
942
|
+
agentId: fc.string({ minLength: 1 }).filter((s) => s.trim().length > 0),
|
|
943
|
+
outcomeId: fc.constant(standardOutcome.name),
|
|
944
|
+
attemptNumber: fc.integer({ min: 1, max: 100 }),
|
|
945
|
+
content: fc.record({
|
|
946
|
+
message: fc.string(),
|
|
947
|
+
targetEmail: fc.string(),
|
|
948
|
+
targetCompany: fc.string(),
|
|
949
|
+
targetCompanySize: fc.integer({ min: 0, max: 1000000 }),
|
|
950
|
+
targetRole: fc.string(),
|
|
951
|
+
}),
|
|
952
|
+
timestamp: fc.constantFrom('', ' ', '\t', '\n'), // Empty or whitespace-only
|
|
953
|
+
});
|
|
954
|
+
|
|
955
|
+
await fc.assert(
|
|
956
|
+
fc.asyncProperty(missingTimestampArb, async (artifact) => {
|
|
957
|
+
const result = await evaluateOutcome(standardOutcome, artifact);
|
|
958
|
+
|
|
959
|
+
// Invalid artifact must return FAILURE (fail closed)
|
|
960
|
+
expect(result.status).toBe('FAILURE');
|
|
961
|
+
expect(result.verificationDetails).toBeUndefined();
|
|
962
|
+
}),
|
|
963
|
+
{ numRuns: 100 }
|
|
964
|
+
);
|
|
965
|
+
});
|
|
966
|
+
});
|
|
967
|
+
|
|
968
|
+
|
|
969
|
+
/**
|
|
970
|
+
* Property 19: Qualified Sales Interest Payout
|
|
971
|
+
*
|
|
972
|
+
* **Feature: earnd-bounty-engine, Property 19: Qualified Sales Interest Payout**
|
|
973
|
+
* **Validates: Requirements 8.6**
|
|
974
|
+
*
|
|
975
|
+
* *For any* artifact that passes all 5 criteria (buying intent, company size, role,
|
|
976
|
+
* message length, email), the evaluation SHALL return SUCCESS with payoutAmount of 250.
|
|
977
|
+
*/
|
|
978
|
+
describe('Qualified Sales Interest Payout - Property Tests', () => {
|
|
979
|
+
/**
|
|
980
|
+
* Import the qualified_sales_interest outcome for testing.
|
|
981
|
+
* This is the demo outcome with $250 payout.
|
|
982
|
+
*/
|
|
983
|
+
const qualifiedSalesInterestOutcome: Outcome = {
|
|
984
|
+
name: 'qualified_sales_interest',
|
|
985
|
+
description: 'Generate a qualified sales lead',
|
|
986
|
+
payoutAmount: 250,
|
|
987
|
+
maxAttempts: 5,
|
|
988
|
+
timeLimitMs: 300000,
|
|
989
|
+
successCriteria: [
|
|
990
|
+
{
|
|
991
|
+
name: 'buying_intent',
|
|
992
|
+
validator: 'validateBuyingIntent',
|
|
993
|
+
params: { keywords: ['pricing', 'demo', 'next steps'] },
|
|
994
|
+
},
|
|
995
|
+
{
|
|
996
|
+
name: 'company_size',
|
|
997
|
+
validator: 'validateCompanySize',
|
|
998
|
+
params: { minimum: 50 },
|
|
999
|
+
},
|
|
1000
|
+
{
|
|
1001
|
+
name: 'valid_role',
|
|
1002
|
+
validator: 'validateRole',
|
|
1003
|
+
params: { excludedRoles: ['intern', 'student'] },
|
|
1004
|
+
},
|
|
1005
|
+
{
|
|
1006
|
+
name: 'message_length',
|
|
1007
|
+
validator: 'validateMessageLength',
|
|
1008
|
+
params: { minWords: 20 },
|
|
1009
|
+
},
|
|
1010
|
+
{
|
|
1011
|
+
name: 'valid_email',
|
|
1012
|
+
validator: 'validateEmail',
|
|
1013
|
+
params: {},
|
|
1014
|
+
},
|
|
1015
|
+
],
|
|
1016
|
+
failureReasons: [
|
|
1017
|
+
'No buying intent detected',
|
|
1018
|
+
'Company too small',
|
|
1019
|
+
'Invalid role',
|
|
1020
|
+
'Message too short',
|
|
1021
|
+
'Invalid email format',
|
|
1022
|
+
],
|
|
1023
|
+
};
|
|
1024
|
+
|
|
1025
|
+
/**
|
|
1026
|
+
* Arbitrary for generating artifacts that pass ALL 5 criteria:
|
|
1027
|
+
* 1. Message contains buying intent keywords (pricing, demo, next steps)
|
|
1028
|
+
* 2. Company size >= 50
|
|
1029
|
+
* 3. Role is not intern or student
|
|
1030
|
+
* 4. Message length >= 20 words
|
|
1031
|
+
* 5. Email is syntactically valid
|
|
1032
|
+
*/
|
|
1033
|
+
const passingAllCriteriaArtifactArb: fc.Arbitrary<AgentArtifact> = fc.record({
|
|
1034
|
+
agentId: fc.string({ minLength: 1 }).filter((s) => s.trim().length > 0),
|
|
1035
|
+
outcomeId: fc.constant('qualified_sales_interest'),
|
|
1036
|
+
attemptNumber: fc.integer({ min: 1, max: 100 }),
|
|
1037
|
+
content: fc.record({
|
|
1038
|
+
// Message with 20+ words AND at least one buying intent keyword
|
|
1039
|
+
message: fc
|
|
1040
|
+
.tuple(
|
|
1041
|
+
// Generate 17+ filler words (non-whitespace)
|
|
1042
|
+
fc.array(
|
|
1043
|
+
fc.string({ minLength: 1, maxLength: 10 }).filter((s) => !/\s/.test(s) && s.length > 0),
|
|
1044
|
+
{ minLength: 17, maxLength: 30 }
|
|
1045
|
+
),
|
|
1046
|
+
// Pick at least one buying intent keyword
|
|
1047
|
+
fc.constantFrom('pricing', 'demo', 'next steps')
|
|
1048
|
+
)
|
|
1049
|
+
.map(([words, keyword]) => [...words, keyword, 'additional', 'words'].join(' ')),
|
|
1050
|
+
// Valid email format
|
|
1051
|
+
targetEmail: fc.tuple(
|
|
1052
|
+
fc.string({ minLength: 1, maxLength: 10 }).filter((s) => /^[a-zA-Z0-9]+$/.test(s)),
|
|
1053
|
+
fc.constantFrom('example.com', 'company.org', 'business.net')
|
|
1054
|
+
).map(([local, domain]) => `${local}@${domain}`),
|
|
1055
|
+
targetCompany: fc.string({ minLength: 1 }),
|
|
1056
|
+
// Company size >= 50
|
|
1057
|
+
targetCompanySize: fc.integer({ min: 50, max: 100000 }),
|
|
1058
|
+
// Valid role (not intern or student)
|
|
1059
|
+
targetRole: fc.constantFrom(
|
|
1060
|
+
'Manager',
|
|
1061
|
+
'Director',
|
|
1062
|
+
'VP',
|
|
1063
|
+
'CEO',
|
|
1064
|
+
'CTO',
|
|
1065
|
+
'Engineer',
|
|
1066
|
+
'Developer',
|
|
1067
|
+
'Analyst',
|
|
1068
|
+
'Consultant',
|
|
1069
|
+
'Lead'
|
|
1070
|
+
),
|
|
1071
|
+
}),
|
|
1072
|
+
timestamp: validTimestampArb,
|
|
1073
|
+
});
|
|
1074
|
+
|
|
1075
|
+
// **Feature: earnd-bounty-engine, Property 19: Qualified Sales Interest Payout**
|
|
1076
|
+
test('artifacts passing all 5 criteria return SUCCESS with payoutAmount of 250', async () => {
|
|
1077
|
+
await fc.assert(
|
|
1078
|
+
fc.asyncProperty(passingAllCriteriaArtifactArb, async (artifact) => {
|
|
1079
|
+
const result = await evaluateOutcome(qualifiedSalesInterestOutcome, artifact);
|
|
1080
|
+
|
|
1081
|
+
// Must return SUCCESS
|
|
1082
|
+
expect(result.status).toBe('SUCCESS');
|
|
1083
|
+
|
|
1084
|
+
// Must have verificationDetails
|
|
1085
|
+
expect(result.verificationDetails).toBeDefined();
|
|
1086
|
+
|
|
1087
|
+
// verificationDetails must contain payoutAmount of 250
|
|
1088
|
+
expect(result.verificationDetails?.payoutAmount).toBe(250);
|
|
1089
|
+
|
|
1090
|
+
// All 5 criteria must have passed
|
|
1091
|
+
expect(result.criteriaResults.length).toBe(5);
|
|
1092
|
+
expect(result.criteriaResults.every((c) => c.passed)).toBe(true);
|
|
1093
|
+
}),
|
|
1094
|
+
{ numRuns: 100 }
|
|
1095
|
+
);
|
|
1096
|
+
});
|
|
1097
|
+
|
|
1098
|
+
// **Feature: earnd-bounty-engine, Property 19: Qualified Sales Interest Payout**
|
|
1099
|
+
test('payoutAmount in verificationDetails matches outcome definition', async () => {
|
|
1100
|
+
await fc.assert(
|
|
1101
|
+
fc.asyncProperty(passingAllCriteriaArtifactArb, async (artifact) => {
|
|
1102
|
+
const result = await evaluateOutcome(qualifiedSalesInterestOutcome, artifact);
|
|
1103
|
+
|
|
1104
|
+
if (result.status === 'SUCCESS') {
|
|
1105
|
+
// The payoutAmount in verificationDetails must match the outcome's payoutAmount
|
|
1106
|
+
expect(result.verificationDetails?.payoutAmount).toBe(
|
|
1107
|
+
qualifiedSalesInterestOutcome.payoutAmount
|
|
1108
|
+
);
|
|
1109
|
+
// Specifically for qualified_sales_interest, this must be $250
|
|
1110
|
+
expect(result.verificationDetails?.payoutAmount).toBe(250);
|
|
1111
|
+
}
|
|
1112
|
+
}),
|
|
1113
|
+
{ numRuns: 100 }
|
|
1114
|
+
);
|
|
1115
|
+
});
|
|
1116
|
+
|
|
1117
|
+
// **Feature: earnd-bounty-engine, Property 19: Qualified Sales Interest Payout**
|
|
1118
|
+
test('SUCCESS result includes all required verification fields', async () => {
|
|
1119
|
+
await fc.assert(
|
|
1120
|
+
fc.asyncProperty(passingAllCriteriaArtifactArb, async (artifact) => {
|
|
1121
|
+
const result = await evaluateOutcome(qualifiedSalesInterestOutcome, artifact);
|
|
1122
|
+
|
|
1123
|
+
expect(result.status).toBe('SUCCESS');
|
|
1124
|
+
expect(result.verificationDetails).toBeDefined();
|
|
1125
|
+
|
|
1126
|
+
// Verify all required fields are present
|
|
1127
|
+
expect(result.verificationDetails).toHaveProperty('outcomeId', 'qualified_sales_interest');
|
|
1128
|
+
expect(result.verificationDetails).toHaveProperty('payoutAmount', 250);
|
|
1129
|
+
expect(result.verificationDetails).toHaveProperty('agentId', artifact.agentId);
|
|
1130
|
+
expect(result.verificationDetails).toHaveProperty('attemptNumber', artifact.attemptNumber);
|
|
1131
|
+
expect(result.verificationDetails).toHaveProperty('evaluatedAt');
|
|
1132
|
+
expect(result.verificationDetails).toHaveProperty('criteriaCount', 5);
|
|
1133
|
+
}),
|
|
1134
|
+
{ numRuns: 100 }
|
|
1135
|
+
);
|
|
1136
|
+
});
|
|
1137
|
+
|
|
1138
|
+
// **Feature: earnd-bounty-engine, Property 19: Qualified Sales Interest Payout**
|
|
1139
|
+
test('failing any single criterion prevents payout', async () => {
|
|
1140
|
+
// Generate artifacts that fail exactly one criterion
|
|
1141
|
+
const failingBuyingIntentArb: fc.Arbitrary<AgentArtifact> = fc.record({
|
|
1142
|
+
agentId: fc.string({ minLength: 1 }).filter((s) => s.trim().length > 0),
|
|
1143
|
+
outcomeId: fc.constant('qualified_sales_interest'),
|
|
1144
|
+
attemptNumber: fc.integer({ min: 1, max: 100 }),
|
|
1145
|
+
content: fc.record({
|
|
1146
|
+
// Message WITHOUT buying intent keywords but with 20+ words
|
|
1147
|
+
message: fc
|
|
1148
|
+
.array(
|
|
1149
|
+
fc.constantFrom('hello', 'world', 'test', 'data', 'content', 'info', 'text', 'word'),
|
|
1150
|
+
{ minLength: 20, maxLength: 30 }
|
|
1151
|
+
)
|
|
1152
|
+
.map((words) => words.join(' ')),
|
|
1153
|
+
targetEmail: fc.constant('valid@example.com'),
|
|
1154
|
+
targetCompany: fc.string({ minLength: 1 }),
|
|
1155
|
+
targetCompanySize: fc.integer({ min: 50, max: 100000 }),
|
|
1156
|
+
targetRole: fc.constantFrom('Manager', 'Director', 'VP'),
|
|
1157
|
+
}),
|
|
1158
|
+
timestamp: validTimestampArb,
|
|
1159
|
+
});
|
|
1160
|
+
|
|
1161
|
+
await fc.assert(
|
|
1162
|
+
fc.asyncProperty(failingBuyingIntentArb, async (artifact) => {
|
|
1163
|
+
const result = await evaluateOutcome(qualifiedSalesInterestOutcome, artifact);
|
|
1164
|
+
|
|
1165
|
+
// Must return FAILURE when any criterion fails
|
|
1166
|
+
expect(result.status).toBe('FAILURE');
|
|
1167
|
+
|
|
1168
|
+
// No payout should be triggered
|
|
1169
|
+
expect(result.verificationDetails).toBeUndefined();
|
|
1170
|
+
}),
|
|
1171
|
+
{ numRuns: 100 }
|
|
1172
|
+
);
|
|
1173
|
+
});
|
|
1174
|
+
});
|