outcome-cli 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +261 -0
- package/package.json +95 -0
- package/src/agents/README.md +139 -0
- package/src/agents/adapters/anthropic.adapter.ts +166 -0
- package/src/agents/adapters/dalle.adapter.ts +145 -0
- package/src/agents/adapters/gemini.adapter.ts +134 -0
- package/src/agents/adapters/imagen.adapter.ts +106 -0
- package/src/agents/adapters/nano-banana.adapter.ts +129 -0
- package/src/agents/adapters/openai.adapter.ts +165 -0
- package/src/agents/adapters/veo.adapter.ts +130 -0
- package/src/agents/agent.schema.property.test.ts +379 -0
- package/src/agents/agent.schema.test.ts +148 -0
- package/src/agents/agent.schema.ts +263 -0
- package/src/agents/index.ts +60 -0
- package/src/agents/registered-agent.schema.ts +356 -0
- package/src/agents/registry.ts +97 -0
- package/src/agents/tournament-configs.property.test.ts +266 -0
- package/src/cli/README.md +145 -0
- package/src/cli/commands/define.ts +79 -0
- package/src/cli/commands/list.ts +46 -0
- package/src/cli/commands/logs.ts +83 -0
- package/src/cli/commands/run.ts +416 -0
- package/src/cli/commands/verify.ts +110 -0
- package/src/cli/index.ts +81 -0
- package/src/config/README.md +128 -0
- package/src/config/env.ts +262 -0
- package/src/config/index.ts +19 -0
- package/src/eval/README.md +318 -0
- package/src/eval/ai-judge.test.ts +435 -0
- package/src/eval/ai-judge.ts +368 -0
- package/src/eval/code-validators.ts +414 -0
- package/src/eval/evaluateOutcome.property.test.ts +1174 -0
- package/src/eval/evaluateOutcome.ts +591 -0
- package/src/eval/immigration-validators.ts +122 -0
- package/src/eval/index.ts +90 -0
- package/src/eval/judge-cache.ts +402 -0
- package/src/eval/tournament-validators.property.test.ts +439 -0
- package/src/eval/validators.property.test.ts +1118 -0
- package/src/eval/validators.ts +1199 -0
- package/src/eval/weighted-scorer.ts +285 -0
- package/src/index.ts +17 -0
- package/src/league/README.md +188 -0
- package/src/league/health-check.ts +353 -0
- package/src/league/index.ts +93 -0
- package/src/league/killAgent.ts +151 -0
- package/src/league/league.test.ts +1151 -0
- package/src/league/runLeague.ts +843 -0
- package/src/league/scoreAgent.ts +175 -0
- package/src/modules/omnibridge/__tests__/.gitkeep +1 -0
- package/src/modules/omnibridge/__tests__/auth-tunnel.property.test.ts +524 -0
- package/src/modules/omnibridge/__tests__/deterministic-logger.property.test.ts +965 -0
- package/src/modules/omnibridge/__tests__/ghost-api.property.test.ts +461 -0
- package/src/modules/omnibridge/__tests__/omnibridge-integration.test.ts +542 -0
- package/src/modules/omnibridge/__tests__/parallel-executor.property.test.ts +671 -0
- package/src/modules/omnibridge/__tests__/semantic-normalizer.property.test.ts +521 -0
- package/src/modules/omnibridge/__tests__/semantic-normalizer.test.ts +254 -0
- package/src/modules/omnibridge/__tests__/session-vault.property.test.ts +367 -0
- package/src/modules/omnibridge/__tests__/shadow-session.property.test.ts +523 -0
- package/src/modules/omnibridge/__tests__/triangulation-engine.property.test.ts +292 -0
- package/src/modules/omnibridge/__tests__/verification-engine.property.test.ts +769 -0
- package/src/modules/omnibridge/api/.gitkeep +1 -0
- package/src/modules/omnibridge/api/ghost-api.ts +1087 -0
- package/src/modules/omnibridge/auth/.gitkeep +1 -0
- package/src/modules/omnibridge/auth/auth-tunnel.ts +843 -0
- package/src/modules/omnibridge/auth/session-vault.ts +577 -0
- package/src/modules/omnibridge/core/.gitkeep +1 -0
- package/src/modules/omnibridge/core/semantic-normalizer.ts +702 -0
- package/src/modules/omnibridge/core/triangulation-engine.ts +530 -0
- package/src/modules/omnibridge/core/types.ts +610 -0
- package/src/modules/omnibridge/execution/.gitkeep +1 -0
- package/src/modules/omnibridge/execution/deterministic-logger.ts +629 -0
- package/src/modules/omnibridge/execution/parallel-executor.ts +542 -0
- package/src/modules/omnibridge/execution/shadow-session.ts +794 -0
- package/src/modules/omnibridge/index.ts +212 -0
- package/src/modules/omnibridge/omnibridge.ts +510 -0
- package/src/modules/omnibridge/verification/.gitkeep +1 -0
- package/src/modules/omnibridge/verification/verification-engine.ts +783 -0
- package/src/outcomes/README.md +75 -0
- package/src/outcomes/acquire-pilot-customer.ts +297 -0
- package/src/outcomes/code-delivery-outcomes.ts +89 -0
- package/src/outcomes/code-outcomes.ts +256 -0
- package/src/outcomes/code_review_battle.test.ts +135 -0
- package/src/outcomes/code_review_battle.ts +135 -0
- package/src/outcomes/cold_email_battle.ts +97 -0
- package/src/outcomes/content_creation_battle.ts +160 -0
- package/src/outcomes/f1_stem_opt_compliance.ts +61 -0
- package/src/outcomes/index.ts +107 -0
- package/src/outcomes/lead_gen_battle.test.ts +113 -0
- package/src/outcomes/lead_gen_battle.ts +99 -0
- package/src/outcomes/outcome.schema.property.test.ts +229 -0
- package/src/outcomes/outcome.schema.ts +187 -0
- package/src/outcomes/qualified_sales_interest.ts +118 -0
- package/src/outcomes/swarm_planner.property.test.ts +370 -0
- package/src/outcomes/swarm_planner.ts +96 -0
- package/src/outcomes/web_extraction.ts +234 -0
- package/src/runtime/README.md +220 -0
- package/src/runtime/agentRunner.test.ts +341 -0
- package/src/runtime/agentRunner.ts +746 -0
- package/src/runtime/claudeAdapter.ts +232 -0
- package/src/runtime/costTracker.ts +123 -0
- package/src/runtime/index.ts +34 -0
- package/src/runtime/modelAdapter.property.test.ts +305 -0
- package/src/runtime/modelAdapter.ts +144 -0
- package/src/runtime/openaiAdapter.ts +235 -0
- package/src/utils/README.md +122 -0
- package/src/utils/command-runner.ts +134 -0
- package/src/utils/cost-guard.ts +379 -0
- package/src/utils/errors.test.ts +290 -0
- package/src/utils/errors.ts +442 -0
- package/src/utils/index.ts +37 -0
- package/src/utils/logger.test.ts +361 -0
- package/src/utils/logger.ts +419 -0
- package/src/utils/output-parsers.ts +216 -0
|
@@ -0,0 +1,1199 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Validators - Pure validation functions for outcome evaluation
|
|
3
|
+
*
|
|
4
|
+
* Each validator is deterministic and side-effect free.
|
|
5
|
+
* Used by the evaluation system to verify success criteria.
|
|
6
|
+
*
|
|
7
|
+
* @module eval/validators
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { CommandResult } from '../utils/command-runner';
|
|
11
|
+
import {
|
|
12
|
+
parseTestOutput,
|
|
13
|
+
parseLintOutput,
|
|
14
|
+
parseBenchmarkOutput,
|
|
15
|
+
parseSecurityScanOutput,
|
|
16
|
+
TestResult,
|
|
17
|
+
LintResult,
|
|
18
|
+
BenchmarkResult,
|
|
19
|
+
SecurityScanResult,
|
|
20
|
+
} from '../utils/output-parsers';
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Result of a validation operation.
|
|
24
|
+
*/
|
|
25
|
+
export interface ValidationResult {
|
|
26
|
+
/** Whether validation passed */
|
|
27
|
+
valid: boolean;
|
|
28
|
+
/** Error messages if validation failed */
|
|
29
|
+
errors: string[];
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Validates that a message contains buying intent keywords.
|
|
34
|
+
*
|
|
35
|
+
* A message demonstrates buying intent if it contains at least one
|
|
36
|
+
* of the specified keywords (case-insensitive).
|
|
37
|
+
*
|
|
38
|
+
* @param message - The message text to validate
|
|
39
|
+
* @param keywords - Array of keywords indicating buying intent
|
|
40
|
+
* @returns ValidationResult indicating if buying intent was detected
|
|
41
|
+
*
|
|
42
|
+
* @example
|
|
43
|
+
* validateBuyingIntent("I'd like to see a demo", ["pricing", "demo", "next steps"])
|
|
44
|
+
* // { valid: true, errors: [] }
|
|
45
|
+
*
|
|
46
|
+
* @see Requirements 8.1
|
|
47
|
+
*/
|
|
48
|
+
export function validateBuyingIntent(
|
|
49
|
+
message: string,
|
|
50
|
+
keywords: string[]
|
|
51
|
+
): ValidationResult {
|
|
52
|
+
const lowerMessage = message.toLowerCase();
|
|
53
|
+
const hasIntent = keywords.some((keyword) =>
|
|
54
|
+
lowerMessage.includes(keyword.toLowerCase())
|
|
55
|
+
);
|
|
56
|
+
|
|
57
|
+
if (hasIntent) {
|
|
58
|
+
return { valid: true, errors: [] };
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
return {
|
|
62
|
+
valid: false,
|
|
63
|
+
errors: [
|
|
64
|
+
`No buying intent detected - message must contain one of: ${keywords.join(', ')}`,
|
|
65
|
+
],
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Validates that company size meets the minimum threshold.
|
|
71
|
+
*
|
|
72
|
+
* @param size - The company size (number of employees)
|
|
73
|
+
* @param minimum - The minimum required company size
|
|
74
|
+
* @returns ValidationResult indicating if company size is sufficient
|
|
75
|
+
*
|
|
76
|
+
* @example
|
|
77
|
+
* validateCompanySize(100, 50)
|
|
78
|
+
* // { valid: true, errors: [] }
|
|
79
|
+
*
|
|
80
|
+
* @see Requirements 8.2
|
|
81
|
+
*/
|
|
82
|
+
export function validateCompanySize(
|
|
83
|
+
size: number,
|
|
84
|
+
minimum: number
|
|
85
|
+
): ValidationResult {
|
|
86
|
+
if (size >= minimum) {
|
|
87
|
+
return { valid: true, errors: [] };
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
return {
|
|
91
|
+
valid: false,
|
|
92
|
+
errors: [`Company too small - must have at least ${minimum} employees, got ${size}`],
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Validates that a role is not in the excluded roles list.
|
|
98
|
+
*
|
|
99
|
+
* Comparison is case-insensitive.
|
|
100
|
+
*
|
|
101
|
+
* @param role - The role to validate
|
|
102
|
+
* @param excludedRoles - Array of roles that are not allowed
|
|
103
|
+
* @returns ValidationResult indicating if the role is valid
|
|
104
|
+
*
|
|
105
|
+
* @example
|
|
106
|
+
* validateRole("Engineering Manager", ["intern", "student"])
|
|
107
|
+
* // { valid: true, errors: [] }
|
|
108
|
+
*
|
|
109
|
+
* @see Requirements 8.3
|
|
110
|
+
*/
|
|
111
|
+
export function validateRole(
|
|
112
|
+
role: string,
|
|
113
|
+
excludedRoles: string[]
|
|
114
|
+
): ValidationResult {
|
|
115
|
+
const lowerRole = role.toLowerCase().trim();
|
|
116
|
+
const isExcluded = excludedRoles.some(
|
|
117
|
+
(excluded) => lowerRole === excluded.toLowerCase()
|
|
118
|
+
);
|
|
119
|
+
|
|
120
|
+
if (!isExcluded) {
|
|
121
|
+
return { valid: true, errors: [] };
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
return {
|
|
125
|
+
valid: false,
|
|
126
|
+
errors: [`Invalid role "${role}" - excluded roles: ${excludedRoles.join(', ')}`],
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Validates that a message meets the minimum word count.
|
|
132
|
+
*
|
|
133
|
+
* Words are counted by splitting on whitespace and filtering empty strings.
|
|
134
|
+
*
|
|
135
|
+
* @param message - The message text to validate
|
|
136
|
+
* @param minWords - The minimum number of words required
|
|
137
|
+
* @returns ValidationResult indicating if message length is sufficient
|
|
138
|
+
*
|
|
139
|
+
* @example
|
|
140
|
+
* validateMessageLength("This is a test message with enough words", 5)
|
|
141
|
+
* // { valid: true, errors: [] }
|
|
142
|
+
*
|
|
143
|
+
* @see Requirements 8.4
|
|
144
|
+
*/
|
|
145
|
+
export function validateMessageLength(
|
|
146
|
+
message: string,
|
|
147
|
+
minWords: number
|
|
148
|
+
): ValidationResult {
|
|
149
|
+
const words = message.split(/\s+/).filter((word) => word.length > 0);
|
|
150
|
+
const wordCount = words.length;
|
|
151
|
+
|
|
152
|
+
if (wordCount >= minWords) {
|
|
153
|
+
return { valid: true, errors: [] };
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
return {
|
|
157
|
+
valid: false,
|
|
158
|
+
errors: [
|
|
159
|
+
`Message too short - must be at least ${minWords} words, got ${wordCount}`,
|
|
160
|
+
],
|
|
161
|
+
};
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Validates that an email address has valid syntax.
|
|
166
|
+
*
|
|
167
|
+
* Uses a standard email regex pattern that validates:
|
|
168
|
+
* - Local part (before @) with allowed characters
|
|
169
|
+
* - @ symbol
|
|
170
|
+
* - Domain part with at least one dot
|
|
171
|
+
* - TLD of at least 2 characters
|
|
172
|
+
*
|
|
173
|
+
* @param email - The email address to validate
|
|
174
|
+
* @returns ValidationResult indicating if email syntax is valid
|
|
175
|
+
*
|
|
176
|
+
* @example
|
|
177
|
+
* validateEmail("user@example.com")
|
|
178
|
+
* // { valid: true, errors: [] }
|
|
179
|
+
*
|
|
180
|
+
* @see Requirements 8.5
|
|
181
|
+
*/
|
|
182
|
+
export function validateEmail(email: string): ValidationResult {
|
|
183
|
+
// Standard email regex pattern
|
|
184
|
+
// Matches: local-part@domain.tld
|
|
185
|
+
// Local part: alphanumeric, dots, underscores, hyphens, plus signs
|
|
186
|
+
// Domain: alphanumeric and hyphens, with at least one dot
|
|
187
|
+
// TLD: at least 2 alphabetic characters
|
|
188
|
+
const emailRegex = /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/;
|
|
189
|
+
|
|
190
|
+
if (emailRegex.test(email)) {
|
|
191
|
+
return { valid: true, errors: [] };
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
return {
|
|
195
|
+
valid: false,
|
|
196
|
+
errors: [`Invalid email format: "${email}"`],
|
|
197
|
+
};
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// ============================================================================
|
|
201
|
+
// Code Review Validators - Tournament Seed Bounties
|
|
202
|
+
// ============================================================================
|
|
203
|
+
|
|
204
|
+
/**
|
|
205
|
+
* Validates that a code review artifact contains at least one security issue
|
|
206
|
+
* with the required severity level.
|
|
207
|
+
*
|
|
208
|
+
* @param artifact - The code review artifact containing issues
|
|
209
|
+
* @param requiredSeverity - The minimum severity level required (default: CRITICAL)
|
|
210
|
+
* @returns ValidationResult
|
|
211
|
+
*
|
|
212
|
+
* @example
|
|
213
|
+
* validateSecurityIssue({ issues: [{ type: 'security', severity: 'CRITICAL' }] }, 'CRITICAL')
|
|
214
|
+
* // { valid: true, errors: [] }
|
|
215
|
+
*
|
|
216
|
+
* @see Requirements 1.4, 3.3
|
|
217
|
+
*/
|
|
218
|
+
export function validateSecurityIssue(
|
|
219
|
+
artifact: { issues: Array<{ type: string; severity: string }> },
|
|
220
|
+
requiredSeverity: string = 'CRITICAL'
|
|
221
|
+
): ValidationResult {
|
|
222
|
+
const hasSecurityIssue = artifact.issues.some(
|
|
223
|
+
(issue) => issue.type === 'security' && issue.severity === requiredSeverity
|
|
224
|
+
);
|
|
225
|
+
|
|
226
|
+
if (hasSecurityIssue) {
|
|
227
|
+
return { valid: true, errors: [] };
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
return {
|
|
231
|
+
valid: false,
|
|
232
|
+
errors: [`No security vulnerability with severity ${requiredSeverity} identified`],
|
|
233
|
+
};
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
// ============================================================================
|
|
237
|
+
// Code Execution Validators - Outcome-based code delivery
|
|
238
|
+
// ============================================================================
|
|
239
|
+
|
|
240
|
+
/**
|
|
241
|
+
* Validates that tests passed based on a test run summary.
|
|
242
|
+
* Expects deterministic inputs from a prior execution (no command execution here).
|
|
243
|
+
*/
|
|
244
|
+
export function validateTestsPass(
|
|
245
|
+
testResult: TestResult | CommandResult,
|
|
246
|
+
params: { minPassRate?: number } = {}
|
|
247
|
+
): ValidationResult {
|
|
248
|
+
const { minPassRate = 1 } = params;
|
|
249
|
+
|
|
250
|
+
// If it's a CommandResult, parse it first
|
|
251
|
+
let parsedResult: TestResult;
|
|
252
|
+
if ('stdout' in testResult && 'stderr' in testResult && 'exitCode' in testResult) {
|
|
253
|
+
parsedResult = parseTestOutput(testResult as CommandResult);
|
|
254
|
+
} else {
|
|
255
|
+
parsedResult = testResult as TestResult;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
const passRate = parsedResult.totalTests > 0
|
|
259
|
+
? (parsedResult.passedTests / parsedResult.totalTests)
|
|
260
|
+
: (parsedResult.success ? 1 : 0);
|
|
261
|
+
|
|
262
|
+
if (passRate >= minPassRate) {
|
|
263
|
+
return { valid: true, errors: [] };
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
const failedList = parsedResult.testNames.slice(0, 5).join(', '); // Limit to 5 names
|
|
267
|
+
return {
|
|
268
|
+
valid: false,
|
|
269
|
+
errors: [
|
|
270
|
+
`Tests did not meet pass rate (${(passRate * 100).toFixed(1)}% < ${(minPassRate * 100).toFixed(1)}%)` +
|
|
271
|
+
(failedList ? `; recent tests: ${failedList}` : ''),
|
|
272
|
+
],
|
|
273
|
+
};
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
/**
|
|
277
|
+
* Validates that a build succeeded based on an execution summary.
|
|
278
|
+
*/
|
|
279
|
+
export function validateBuilds(
|
|
280
|
+
buildResult: { success?: boolean; exitCode?: number; logPath?: string } | CommandResult
|
|
281
|
+
): ValidationResult {
|
|
282
|
+
// If it's a CommandResult, use its exit code directly
|
|
283
|
+
if ('stdout' in buildResult && 'stderr' in buildResult && 'exitCode' in buildResult) {
|
|
284
|
+
const cmdResult = buildResult as CommandResult;
|
|
285
|
+
const success = cmdResult.exitCode === 0;
|
|
286
|
+
if (success) {
|
|
287
|
+
return { valid: true, errors: [] };
|
|
288
|
+
}
|
|
289
|
+
return {
|
|
290
|
+
valid: false,
|
|
291
|
+
errors: [
|
|
292
|
+
`Build failed (exit ${cmdResult.exitCode})`,
|
|
293
|
+
],
|
|
294
|
+
};
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
// Legacy interface
|
|
298
|
+
const legacyResult = buildResult as { success?: boolean; exitCode?: number; logPath?: string };
|
|
299
|
+
const success = legacyResult.success === true || legacyResult.exitCode === 0;
|
|
300
|
+
if (success) {
|
|
301
|
+
return { valid: true, errors: [] };
|
|
302
|
+
}
|
|
303
|
+
return {
|
|
304
|
+
valid: false,
|
|
305
|
+
errors: [
|
|
306
|
+
`Build failed${legacyResult.exitCode !== undefined ? ` (exit ${legacyResult.exitCode})` : ''}` +
|
|
307
|
+
(legacyResult.logPath ? `; log: ${legacyResult.logPath}` : ''),
|
|
308
|
+
],
|
|
309
|
+
};
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
/**
|
|
313
|
+
* Validates that linting is clean (no errors, optional warnings allowed).
|
|
314
|
+
*/
|
|
315
|
+
export function validateLintClean(
|
|
316
|
+
lintResult: LintResult | CommandResult | { errors?: number; warnings?: number; exitCode?: number; logPath?: string },
|
|
317
|
+
params: { allowWarnings?: boolean } = {}
|
|
318
|
+
): ValidationResult {
|
|
319
|
+
const { allowWarnings = true } = params;
|
|
320
|
+
|
|
321
|
+
// If it's a CommandResult, parse it first
|
|
322
|
+
let parsedResult: LintResult;
|
|
323
|
+
if ('stdout' in lintResult && 'stderr' in lintResult && 'exitCode' in lintResult) {
|
|
324
|
+
parsedResult = parseLintOutput(lintResult as CommandResult);
|
|
325
|
+
} else if ('issues' in lintResult && 'totalIssues' in lintResult) {
|
|
326
|
+
parsedResult = lintResult as LintResult;
|
|
327
|
+
} else {
|
|
328
|
+
// Legacy interface
|
|
329
|
+
const legacyResult = lintResult as { errors?: number; warnings?: number; exitCode?: number; logPath?: string };
|
|
330
|
+
const errors = legacyResult.errors ?? 0;
|
|
331
|
+
const warnings = legacyResult.warnings ?? 0;
|
|
332
|
+
const exitCodeOk = legacyResult.exitCode === undefined || legacyResult.exitCode === 0;
|
|
333
|
+
|
|
334
|
+
const hasErrors = errors > 0 || !exitCodeOk;
|
|
335
|
+
const hasDisallowedWarnings = !allowWarnings && warnings > 0;
|
|
336
|
+
|
|
337
|
+
if (!hasErrors && !hasDisallowedWarnings) {
|
|
338
|
+
return { valid: true, errors: [] };
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
const messages: string[] = [];
|
|
342
|
+
if (hasErrors) messages.push(`lint errors: ${errors}`);
|
|
343
|
+
if (hasDisallowedWarnings) messages.push(`warnings present (${warnings}) but allowWarnings=false`);
|
|
344
|
+
if (!exitCodeOk) messages.push(`lint exit code ${legacyResult.exitCode}`);
|
|
345
|
+
|
|
346
|
+
return { valid: false, errors: messages };
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
const hasErrors = parsedResult.errorCount > 0;
|
|
350
|
+
const hasDisallowedWarnings = !allowWarnings && parsedResult.warningCount > 0;
|
|
351
|
+
|
|
352
|
+
if (!hasErrors && !hasDisallowedWarnings) {
|
|
353
|
+
return { valid: true, errors: [] };
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
const messages: string[] = [];
|
|
357
|
+
if (hasErrors) messages.push(`lint errors: ${parsedResult.errorCount}`);
|
|
358
|
+
if (hasDisallowedWarnings) messages.push(`warnings present (${parsedResult.warningCount}) but allowWarnings=false`);
|
|
359
|
+
|
|
360
|
+
return { valid: false, errors: messages };
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
/**
|
|
364
|
+
* Validates benchmark performance against a latency threshold (p95 in ms).
|
|
365
|
+
*/
|
|
366
|
+
export function validateBenchmark(
|
|
367
|
+
benchmarkResult: BenchmarkResult | CommandResult | { p95Ms?: number; maxMs?: number; meanMs?: number },
|
|
368
|
+
params: { p95ThresholdMs: number }
|
|
369
|
+
): ValidationResult {
|
|
370
|
+
const { p95ThresholdMs } = params;
|
|
371
|
+
|
|
372
|
+
// If it's a CommandResult, parse it first
|
|
373
|
+
let parsedResult: BenchmarkResult;
|
|
374
|
+
if ('stdout' in benchmarkResult && 'stderr' in benchmarkResult && 'exitCode' in benchmarkResult) {
|
|
375
|
+
parsedResult = parseBenchmarkOutput(benchmarkResult as CommandResult);
|
|
376
|
+
} else if ('metrics' in benchmarkResult && 'totalBenchmarks' in benchmarkResult) {
|
|
377
|
+
parsedResult = benchmarkResult as BenchmarkResult;
|
|
378
|
+
} else {
|
|
379
|
+
// Legacy interface - find the slowest metric as p95
|
|
380
|
+
const legacyResult = benchmarkResult as { p95Ms?: number; maxMs?: number; meanMs?: number };
|
|
381
|
+
if (typeof legacyResult.p95Ms !== 'number') {
|
|
382
|
+
return { valid: false, errors: ['Benchmark result missing p95Ms'] };
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
if (legacyResult.p95Ms <= p95ThresholdMs) {
|
|
386
|
+
return { valid: true, errors: [] };
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
return {
|
|
390
|
+
valid: false,
|
|
391
|
+
errors: [
|
|
392
|
+
`Performance regression: p95 ${legacyResult.p95Ms}ms exceeds threshold ${p95ThresholdMs}ms` +
|
|
393
|
+
(legacyResult.maxMs ? `; max ${legacyResult.maxMs}ms` : ''),
|
|
394
|
+
],
|
|
395
|
+
};
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
// For parsed results, check if any metric exceeds the threshold
|
|
399
|
+
const failingMetrics = parsedResult.metrics.filter(m =>
|
|
400
|
+
m.timeMs && m.timeMs > p95ThresholdMs
|
|
401
|
+
);
|
|
402
|
+
|
|
403
|
+
if (failingMetrics.length === 0) {
|
|
404
|
+
return { valid: true, errors: [] };
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
const details = failingMetrics
|
|
408
|
+
.slice(0, 3) // Limit to 3 failing metrics
|
|
409
|
+
.map(m => `${m.name}: ${m.timeMs}ms`)
|
|
410
|
+
.join(', ');
|
|
411
|
+
|
|
412
|
+
return {
|
|
413
|
+
valid: false,
|
|
414
|
+
errors: [
|
|
415
|
+
`Performance regression: ${failingMetrics.length} benchmark(s) exceed ${p95ThresholdMs}ms threshold: ${details}`,
|
|
416
|
+
],
|
|
417
|
+
};
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
/**
|
|
421
|
+
* Validates security scan results for severity threshold.
|
|
422
|
+
*/
|
|
423
|
+
export function validateSecurityScan(
|
|
424
|
+
scanResult: SecurityScanResult | CommandResult | { findings?: Array<{ severity: string; id?: string; description?: string }> },
|
|
425
|
+
params: { maxSeverity?: 'critical' | 'high' | 'medium' | 'low' } = {}
|
|
426
|
+
): ValidationResult {
|
|
427
|
+
const severityOrder = ['low', 'medium', 'high', 'critical'];
|
|
428
|
+
const maxSeverity = params.maxSeverity ?? 'high';
|
|
429
|
+
const maxIndex = severityOrder.indexOf(maxSeverity);
|
|
430
|
+
|
|
431
|
+
// If it's a CommandResult, parse it first
|
|
432
|
+
let parsedResult: SecurityScanResult;
|
|
433
|
+
if ('stdout' in scanResult && 'stderr' in scanResult && 'exitCode' in scanResult) {
|
|
434
|
+
parsedResult = parseSecurityScanOutput(scanResult as CommandResult);
|
|
435
|
+
} else if ('vulnerabilities' in scanResult && 'totalVulnerabilities' in scanResult) {
|
|
436
|
+
parsedResult = scanResult as SecurityScanResult;
|
|
437
|
+
} else {
|
|
438
|
+
// Legacy interface
|
|
439
|
+
const legacyResult = scanResult as { findings?: Array<{ severity: string; id?: string; description?: string }> };
|
|
440
|
+
const findings = legacyResult.findings || [];
|
|
441
|
+
|
|
442
|
+
const blocking = findings.filter((f) => severityOrder.indexOf(f.severity?.toLowerCase() || '') >= maxIndex);
|
|
443
|
+
|
|
444
|
+
if (blocking.length === 0) {
|
|
445
|
+
return { valid: true, errors: [] };
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
const details = blocking
|
|
449
|
+
.slice(0, 3) // Limit to 3 findings
|
|
450
|
+
.map((f) => `${f.severity.toUpperCase()}: ${f.id ?? 'unknown'}${f.description ? ` - ${f.description}` : ''}`)
|
|
451
|
+
.join('; ');
|
|
452
|
+
|
|
453
|
+
return {
|
|
454
|
+
valid: false,
|
|
455
|
+
errors: [`Security scan has ${blocking.length} blocking findings (>= ${maxSeverity}): ${details}`],
|
|
456
|
+
};
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
// For parsed results, check severity counts
|
|
460
|
+
const blockingCount = (() => {
|
|
461
|
+
switch (maxSeverity) {
|
|
462
|
+
case 'critical': return parsedResult.criticalCount;
|
|
463
|
+
case 'high': return parsedResult.criticalCount + parsedResult.highCount;
|
|
464
|
+
case 'medium': return parsedResult.criticalCount + parsedResult.highCount + parsedResult.mediumCount;
|
|
465
|
+
case 'low': return parsedResult.totalVulnerabilities;
|
|
466
|
+
default: return parsedResult.criticalCount + parsedResult.highCount;
|
|
467
|
+
}
|
|
468
|
+
})();
|
|
469
|
+
|
|
470
|
+
if (blockingCount === 0) {
|
|
471
|
+
return { valid: true, errors: [] };
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
const details = parsedResult.vulnerabilities
|
|
475
|
+
.filter(v => severityOrder.indexOf(v.severity) >= maxIndex)
|
|
476
|
+
.slice(0, 3) // Limit to 3 vulnerabilities
|
|
477
|
+
.map(v => `${v.severity.toUpperCase()}: ${v.package ?? 'unknown'}${v.description ? ` - ${v.description}` : ''}`)
|
|
478
|
+
.join('; ');
|
|
479
|
+
|
|
480
|
+
return {
|
|
481
|
+
valid: false,
|
|
482
|
+
errors: [`Security scan has ${blockingCount} blocking vulnerabilities (>= ${maxSeverity}): ${details}`],
|
|
483
|
+
};
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
/**
|
|
487
|
+
* Validates that a code review artifact contains at least one performance issue.
|
|
488
|
+
*
|
|
489
|
+
* @param artifact - The code review artifact containing issues
|
|
490
|
+
* @returns ValidationResult
|
|
491
|
+
*
|
|
492
|
+
* @example
|
|
493
|
+
* validatePerformanceIssue({ issues: [{ type: 'performance' }] })
|
|
494
|
+
* // { valid: true, errors: [] }
|
|
495
|
+
*
|
|
496
|
+
* @see Requirements 1.5, 3.4
|
|
497
|
+
*/
|
|
498
|
+
export function validatePerformanceIssue(
|
|
499
|
+
artifact: { issues: Array<{ type: string }> }
|
|
500
|
+
): ValidationResult {
|
|
501
|
+
const hasPerformanceIssue = artifact.issues.some(
|
|
502
|
+
(issue) => issue.type === 'performance'
|
|
503
|
+
);
|
|
504
|
+
|
|
505
|
+
if (hasPerformanceIssue) {
|
|
506
|
+
return { valid: true, errors: [] };
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
return {
|
|
510
|
+
valid: false,
|
|
511
|
+
errors: ['No performance bottleneck identified'],
|
|
512
|
+
};
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
/**
|
|
516
|
+
* Validates that all comments in a code review artifact reside within
|
|
517
|
+
* the provided source diff boundaries (Zero-Noise Standard).
|
|
518
|
+
*
|
|
519
|
+
* @param artifact - The code review artifact containing comments
|
|
520
|
+
* @param sourceDiff - The source diff string defining valid boundaries
|
|
521
|
+
* @returns ValidationResult
|
|
522
|
+
*
|
|
523
|
+
* @example
|
|
524
|
+
* validateNoiseFreeness({ comments: [{ lineContent: 'const x = 1;' }] }, 'const x = 1;')
|
|
525
|
+
* // { valid: true, errors: [] }
|
|
526
|
+
*
|
|
527
|
+
* @see Requirements 1.6, 3.5
|
|
528
|
+
*/
|
|
529
|
+
export function validateNoiseFreeness(
|
|
530
|
+
artifact: { comments: Array<{ lineContent: string }> },
|
|
531
|
+
sourceDiff: string
|
|
532
|
+
): ValidationResult {
|
|
533
|
+
const outOfBoundsComments = artifact.comments.filter(
|
|
534
|
+
(comment) => !sourceDiff.includes(comment.lineContent)
|
|
535
|
+
);
|
|
536
|
+
|
|
537
|
+
if (outOfBoundsComments.length === 0) {
|
|
538
|
+
return { valid: true, errors: [] };
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
return {
|
|
542
|
+
valid: false,
|
|
543
|
+
errors: [
|
|
544
|
+
`${outOfBoundsComments.length} comment(s) found outside source diff boundaries`,
|
|
545
|
+
],
|
|
546
|
+
};
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
/**
|
|
550
|
+
* Validates that a refactor suggestion reduces cyclomatic complexity
|
|
551
|
+
* by at least the minimum required amount.
|
|
552
|
+
*
|
|
553
|
+
* @param artifact - The code review artifact with refactor suggestion
|
|
554
|
+
* @param minReduction - Minimum complexity reduction required (default: 2)
|
|
555
|
+
* @returns ValidationResult
|
|
556
|
+
*
|
|
557
|
+
* @example
|
|
558
|
+
* validateComplexityReduction({ refactorSuggestion: { originalComplexity: 10, suggestedComplexity: 7 } }, 2)
|
|
559
|
+
* // { valid: true, errors: [] }
|
|
560
|
+
*
|
|
561
|
+
* @see Requirements 1.7, 3.7
|
|
562
|
+
*/
|
|
563
|
+
export function validateComplexityReduction(
|
|
564
|
+
artifact: {
|
|
565
|
+
refactorSuggestion?: {
|
|
566
|
+
originalComplexity: number;
|
|
567
|
+
suggestedComplexity: number;
|
|
568
|
+
};
|
|
569
|
+
},
|
|
570
|
+
minReduction: number = 2
|
|
571
|
+
): ValidationResult {
|
|
572
|
+
if (!artifact.refactorSuggestion) {
|
|
573
|
+
return {
|
|
574
|
+
valid: false,
|
|
575
|
+
errors: ['No refactor suggestion provided'],
|
|
576
|
+
};
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
const { originalComplexity, suggestedComplexity } = artifact.refactorSuggestion;
|
|
580
|
+
const reduction = originalComplexity - suggestedComplexity;
|
|
581
|
+
|
|
582
|
+
if (reduction >= minReduction) {
|
|
583
|
+
return { valid: true, errors: [] };
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
return {
|
|
587
|
+
valid: false,
|
|
588
|
+
errors: [
|
|
589
|
+
`Refactor reduces complexity by ${reduction}, minimum required is ${minReduction}`,
|
|
590
|
+
],
|
|
591
|
+
};
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
/**
|
|
595
|
+
* Composite validator for expert code review.
|
|
596
|
+
* Validates security, performance, noise-freeness, and complexity reduction.
|
|
597
|
+
*
|
|
598
|
+
* @param artifact - The code review artifact
|
|
599
|
+
* @param sourceDiff - The source diff for noise validation
|
|
600
|
+
* @returns ValidationResult with all errors combined
|
|
601
|
+
*
|
|
602
|
+
* @example
|
|
603
|
+
* validateExpertReview(artifact, sourceDiff)
|
|
604
|
+
* // { valid: false, errors: ['No critical security vulnerability identified (SQLi/XSS)', ...] }
|
|
605
|
+
*
|
|
606
|
+
* @see Requirements 3.1, 3.2
|
|
607
|
+
*/
|
|
608
|
+
export function validateExpertReview(
|
|
609
|
+
artifact: {
|
|
610
|
+
issues: Array<{ type: string; severity: string }>;
|
|
611
|
+
comments: Array<{ lineContent: string }>;
|
|
612
|
+
refactorSuggestion?: {
|
|
613
|
+
originalComplexity: number;
|
|
614
|
+
suggestedComplexity: number;
|
|
615
|
+
};
|
|
616
|
+
},
|
|
617
|
+
sourceDiff: string
|
|
618
|
+
): ValidationResult {
|
|
619
|
+
const errors: string[] = [];
|
|
620
|
+
|
|
621
|
+
// Security validation
|
|
622
|
+
const hasSecurity = artifact.issues.some(
|
|
623
|
+
(i) => i.type === 'security' && i.severity === 'CRITICAL'
|
|
624
|
+
);
|
|
625
|
+
if (!hasSecurity) {
|
|
626
|
+
errors.push('No critical security vulnerability identified (SQLi/XSS)');
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
// Performance validation
|
|
630
|
+
const hasPerformance = artifact.issues.some((i) => i.type === 'performance');
|
|
631
|
+
if (!hasPerformance) {
|
|
632
|
+
errors.push('No performance bottleneck identified (N+1 queries)');
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
// Noise-free validation
|
|
636
|
+
const isNoiseFree = artifact.comments.every((c) =>
|
|
637
|
+
sourceDiff.includes(c.lineContent)
|
|
638
|
+
);
|
|
639
|
+
if (!isNoiseFree) {
|
|
640
|
+
errors.push('Comments found outside source diff boundaries (noise)');
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
// Complexity reduction validation
|
|
644
|
+
if (!artifact.refactorSuggestion) {
|
|
645
|
+
errors.push('No refactor suggestion provided');
|
|
646
|
+
} else {
|
|
647
|
+
const reduction =
|
|
648
|
+
artifact.refactorSuggestion.originalComplexity -
|
|
649
|
+
artifact.refactorSuggestion.suggestedComplexity;
|
|
650
|
+
if (reduction < 2) {
|
|
651
|
+
errors.push(
|
|
652
|
+
`Refactor reduces complexity by ${reduction}, minimum required is 2`
|
|
653
|
+
);
|
|
654
|
+
}
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
return {
|
|
658
|
+
valid: errors.length === 0,
|
|
659
|
+
errors,
|
|
660
|
+
};
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
// ============================================================================
|
|
664
|
+
// Lead Gen Validators - Tournament Seed Bounties
|
|
665
|
+
// ============================================================================
|
|
666
|
+
|
|
667
|
+
/**
|
|
668
|
+
* Validates that a LinkedIn URL has the correct format.
|
|
669
|
+
*
|
|
670
|
+
* @param linkedIn - The LinkedIn URL to validate
|
|
671
|
+
* @returns ValidationResult
|
|
672
|
+
*
|
|
673
|
+
* @example
|
|
674
|
+
* validateLinkedIn("https://www.linkedin.com/in/johndoe")
|
|
675
|
+
* // { valid: true, errors: [] }
|
|
676
|
+
*
|
|
677
|
+
* @see Requirements 2.7, 4.6
|
|
678
|
+
*/
|
|
679
|
+
export function validateLinkedIn(linkedIn: string): ValidationResult {
|
|
680
|
+
const validPrefix = 'https://www.linkedin.com/in/';
|
|
681
|
+
|
|
682
|
+
if (linkedIn.startsWith(validPrefix)) {
|
|
683
|
+
return { valid: true, errors: [] };
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
return {
|
|
687
|
+
valid: false,
|
|
688
|
+
errors: [
|
|
689
|
+
`Invalid LinkedIn URL - must start with ${validPrefix}, got: ${linkedIn}`,
|
|
690
|
+
],
|
|
691
|
+
};
|
|
692
|
+
}
|
|
693
|
+
|
|
694
|
+
/**
|
|
695
|
+
* Composite validator for lead generation precision.
|
|
696
|
+
* Validates email, company size, role, and LinkedIn URL in one call.
|
|
697
|
+
*
|
|
698
|
+
* @param artifact - The lead artifact with all required fields
|
|
699
|
+
* @returns ValidationResult with all errors combined
|
|
700
|
+
*
|
|
701
|
+
* @example
|
|
702
|
+
* validateLeadGenPrecision({ email: 'user@example.com', companySize: 100, role: 'CEO', linkedIn: 'https://www.linkedin.com/in/user' })
|
|
703
|
+
* // { valid: true, errors: [] }
|
|
704
|
+
*
|
|
705
|
+
* @see Requirements 4.1-4.6
|
|
706
|
+
*/
|
|
707
|
+
export function validateLeadGenPrecision(artifact: {
|
|
708
|
+
email: string;
|
|
709
|
+
companySize: number;
|
|
710
|
+
role: string;
|
|
711
|
+
linkedIn: string;
|
|
712
|
+
}): ValidationResult {
|
|
713
|
+
const errors: string[] = [];
|
|
714
|
+
|
|
715
|
+
// Email validation
|
|
716
|
+
const emailRegex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;
|
|
717
|
+
if (!emailRegex.test(artifact.email)) {
|
|
718
|
+
errors.push(`Invalid email format: ${artifact.email}`);
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
// Company size validation
|
|
722
|
+
if (artifact.companySize < 50) {
|
|
723
|
+
errors.push(
|
|
724
|
+
`Company too small - must have at least 50 employees, got ${artifact.companySize}`
|
|
725
|
+
);
|
|
726
|
+
}
|
|
727
|
+
|
|
728
|
+
// Role validation
|
|
729
|
+
const excludedRoles = ['intern', 'student'];
|
|
730
|
+
const lowerRole = artifact.role.toLowerCase().trim();
|
|
731
|
+
if (excludedRoles.includes(lowerRole)) {
|
|
732
|
+
errors.push(`Invalid role "${artifact.role}" - excluded roles: ${excludedRoles.join(', ')}`);
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
// LinkedIn validation
|
|
736
|
+
if (!artifact.linkedIn.startsWith('https://www.linkedin.com/in/')) {
|
|
737
|
+
errors.push(
|
|
738
|
+
`Invalid LinkedIn URL - must start with https://www.linkedin.com/in/, got: ${artifact.linkedIn}`
|
|
739
|
+
);
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
return {
|
|
743
|
+
valid: errors.length === 0,
|
|
744
|
+
errors,
|
|
745
|
+
};
|
|
746
|
+
}
|
|
747
|
+
|
|
748
|
+
// ============================================================================
|
|
749
|
+
// Swarm Planner Validators - MVP Unified Engine
|
|
750
|
+
// ============================================================================
|
|
751
|
+
|
|
752
|
+
/**
|
|
753
|
+
* Represents a single task in the Planner output.
|
|
754
|
+
*/
|
|
755
|
+
export interface PlannerTaskInput {
|
|
756
|
+
id: string;
|
|
757
|
+
description: string;
|
|
758
|
+
input: Record<string, unknown>;
|
|
759
|
+
expectedOutput?: Record<string, unknown>;
|
|
760
|
+
priority?: number;
|
|
761
|
+
}
|
|
762
|
+
|
|
763
|
+
/**
|
|
764
|
+
* Represents the Planner agent output structure.
|
|
765
|
+
*/
|
|
766
|
+
export interface PlannerOutputInput {
|
|
767
|
+
tasks: PlannerTaskInput[];
|
|
768
|
+
estimatedTimeMs?: number;
|
|
769
|
+
reasoning?: string;
|
|
770
|
+
}
|
|
771
|
+
|
|
772
|
+
/**
|
|
773
|
+
* Validates that the Planner output contains a valid task array
|
|
774
|
+
* within the specified bounds (1-100 tasks by default).
|
|
775
|
+
*
|
|
776
|
+
* @param output - The Planner agent output to validate
|
|
777
|
+
* @param minTasks - Minimum number of tasks required (default: 1)
|
|
778
|
+
* @param maxTasks - Maximum number of tasks allowed (default: 100)
|
|
779
|
+
* @returns ValidationResult
|
|
780
|
+
*
|
|
781
|
+
* @example
|
|
782
|
+
* validatePlannerOutput({ tasks: [{ id: '1', description: 'Task 1', input: {} }] }, 1, 100)
|
|
783
|
+
* // { valid: true, errors: [] }
|
|
784
|
+
*
|
|
785
|
+
* @see Requirements 3.2, 3.5
|
|
786
|
+
*/
|
|
787
|
+
export function validatePlannerOutput(
|
|
788
|
+
output: PlannerOutputInput,
|
|
789
|
+
minTasks: number = 1,
|
|
790
|
+
maxTasks: number = 100
|
|
791
|
+
): ValidationResult {
|
|
792
|
+
const errors: string[] = [];
|
|
793
|
+
|
|
794
|
+
// Check if tasks array exists
|
|
795
|
+
if (!output || !Array.isArray(output.tasks)) {
|
|
796
|
+
return {
|
|
797
|
+
valid: false,
|
|
798
|
+
errors: ['Planner output must contain a tasks array'],
|
|
799
|
+
};
|
|
800
|
+
}
|
|
801
|
+
|
|
802
|
+
const taskCount = output.tasks.length;
|
|
803
|
+
|
|
804
|
+
// Check minimum bound
|
|
805
|
+
if (taskCount < minTasks) {
|
|
806
|
+
errors.push(
|
|
807
|
+
`Task array too small - must have at least ${minTasks} task(s), got ${taskCount}`
|
|
808
|
+
);
|
|
809
|
+
}
|
|
810
|
+
|
|
811
|
+
// Check maximum bound
|
|
812
|
+
if (taskCount > maxTasks) {
|
|
813
|
+
errors.push(
|
|
814
|
+
`Task array too large - must have at most ${maxTasks} tasks, got ${taskCount}`
|
|
815
|
+
);
|
|
816
|
+
}
|
|
817
|
+
|
|
818
|
+
return {
|
|
819
|
+
valid: errors.length === 0,
|
|
820
|
+
errors,
|
|
821
|
+
};
|
|
822
|
+
}
|
|
823
|
+
|
|
824
|
+
/**
|
|
825
|
+
* Validates that all task IDs in the Planner output are unique.
|
|
826
|
+
*
|
|
827
|
+
* @param output - The Planner agent output to validate
|
|
828
|
+
* @returns ValidationResult
|
|
829
|
+
*
|
|
830
|
+
* @example
|
|
831
|
+
* validateUniqueTaskIds({ tasks: [{ id: '1', ... }, { id: '2', ... }] })
|
|
832
|
+
* // { valid: true, errors: [] }
|
|
833
|
+
*
|
|
834
|
+
* @see Requirements 3.4
|
|
835
|
+
*/
|
|
836
|
+
export function validateUniqueTaskIds(
|
|
837
|
+
output: PlannerOutputInput
|
|
838
|
+
): ValidationResult {
|
|
839
|
+
if (!output || !Array.isArray(output.tasks)) {
|
|
840
|
+
return {
|
|
841
|
+
valid: false,
|
|
842
|
+
errors: ['Planner output must contain a tasks array'],
|
|
843
|
+
};
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
const seenIds = new Set<string>();
|
|
847
|
+
const duplicateIds: string[] = [];
|
|
848
|
+
|
|
849
|
+
for (const task of output.tasks) {
|
|
850
|
+
if (task.id !== undefined && task.id !== null) {
|
|
851
|
+
const idStr = String(task.id);
|
|
852
|
+
if (seenIds.has(idStr)) {
|
|
853
|
+
duplicateIds.push(idStr);
|
|
854
|
+
} else {
|
|
855
|
+
seenIds.add(idStr);
|
|
856
|
+
}
|
|
857
|
+
}
|
|
858
|
+
}
|
|
859
|
+
|
|
860
|
+
if (duplicateIds.length === 0) {
|
|
861
|
+
return { valid: true, errors: [] };
|
|
862
|
+
}
|
|
863
|
+
|
|
864
|
+
return {
|
|
865
|
+
valid: false,
|
|
866
|
+
errors: [
|
|
867
|
+
`Duplicate task IDs found: ${[...new Set(duplicateIds)].join(', ')}`,
|
|
868
|
+
],
|
|
869
|
+
};
|
|
870
|
+
}
|
|
871
|
+
|
|
872
|
+
/**
|
|
873
|
+
* Validates that each task in the Planner output has all required fields.
|
|
874
|
+
*
|
|
875
|
+
* @param output - The Planner agent output to validate
|
|
876
|
+
* @param requiredFields - Array of required field names (default: ['id', 'description', 'input'])
|
|
877
|
+
* @returns ValidationResult
|
|
878
|
+
*
|
|
879
|
+
* @example
|
|
880
|
+
* validateTaskSchema({ tasks: [{ id: '1', description: 'Task', input: {} }] }, ['id', 'description', 'input'])
|
|
881
|
+
* // { valid: true, errors: [] }
|
|
882
|
+
*
|
|
883
|
+
* @see Requirements 3.3
|
|
884
|
+
*/
|
|
885
|
+
export function validateTaskSchema(
|
|
886
|
+
output: PlannerOutputInput,
|
|
887
|
+
requiredFields: string[] = ['id', 'description', 'input']
|
|
888
|
+
): ValidationResult {
|
|
889
|
+
if (!output || !Array.isArray(output.tasks)) {
|
|
890
|
+
return {
|
|
891
|
+
valid: false,
|
|
892
|
+
errors: ['Planner output must contain a tasks array'],
|
|
893
|
+
};
|
|
894
|
+
}
|
|
895
|
+
|
|
896
|
+
const errors: string[] = [];
|
|
897
|
+
|
|
898
|
+
output.tasks.forEach((task, index) => {
|
|
899
|
+
const missingFields: string[] = [];
|
|
900
|
+
const taskRecord = task as unknown as Record<string, unknown>;
|
|
901
|
+
|
|
902
|
+
for (const field of requiredFields) {
|
|
903
|
+
const value = taskRecord[field];
|
|
904
|
+
|
|
905
|
+
if (value === undefined || value === null) {
|
|
906
|
+
missingFields.push(field);
|
|
907
|
+
} else if (field === 'id' && typeof value !== 'string') {
|
|
908
|
+
missingFields.push(`${field} (must be string)`);
|
|
909
|
+
} else if (field === 'description' && typeof value !== 'string') {
|
|
910
|
+
missingFields.push(`${field} (must be string)`);
|
|
911
|
+
} else if (field === 'input' && (typeof value !== 'object' || Array.isArray(value))) {
|
|
912
|
+
missingFields.push(`${field} (must be object)`);
|
|
913
|
+
}
|
|
914
|
+
}
|
|
915
|
+
|
|
916
|
+
if (missingFields.length > 0) {
|
|
917
|
+
errors.push(
|
|
918
|
+
`Task at index ${index} missing or invalid fields: ${missingFields.join(', ')}`
|
|
919
|
+
);
|
|
920
|
+
}
|
|
921
|
+
});
|
|
922
|
+
|
|
923
|
+
return {
|
|
924
|
+
valid: errors.length === 0,
|
|
925
|
+
errors,
|
|
926
|
+
};
|
|
927
|
+
}
|
|
928
|
+
|
|
929
|
+
/**
|
|
930
|
+
* Composite validator for Planner output.
|
|
931
|
+
* Validates task array bounds, unique IDs, and task schema in one call.
|
|
932
|
+
*
|
|
933
|
+
* @param output - The Planner agent output to validate
|
|
934
|
+
* @param minTasks - Minimum number of tasks required (default: 1)
|
|
935
|
+
* @param maxTasks - Maximum number of tasks allowed (default: 100)
|
|
936
|
+
* @returns ValidationResult with all errors combined
|
|
937
|
+
*
|
|
938
|
+
* @example
|
|
939
|
+
* validatePlannerOutputComplete({ tasks: [{ id: '1', description: 'Task', input: {} }] })
|
|
940
|
+
* // { valid: true, errors: [] }
|
|
941
|
+
*
|
|
942
|
+
* @see Requirements 3.2, 3.3, 3.4, 3.5
|
|
943
|
+
*/
|
|
944
|
+
export function validatePlannerOutputComplete(
|
|
945
|
+
output: PlannerOutputInput,
|
|
946
|
+
minTasks: number = 1,
|
|
947
|
+
maxTasks: number = 100
|
|
948
|
+
): ValidationResult {
|
|
949
|
+
const errors: string[] = [];
|
|
950
|
+
|
|
951
|
+
// Validate task array bounds
|
|
952
|
+
const boundsResult = validatePlannerOutput(output, minTasks, maxTasks);
|
|
953
|
+
errors.push(...boundsResult.errors);
|
|
954
|
+
|
|
955
|
+
// Only continue validation if we have a valid tasks array
|
|
956
|
+
if (output && Array.isArray(output.tasks)) {
|
|
957
|
+
// Validate unique IDs
|
|
958
|
+
const uniqueResult = validateUniqueTaskIds(output);
|
|
959
|
+
errors.push(...uniqueResult.errors);
|
|
960
|
+
|
|
961
|
+
// Validate task schema
|
|
962
|
+
const schemaResult = validateTaskSchema(output);
|
|
963
|
+
errors.push(...schemaResult.errors);
|
|
964
|
+
}
|
|
965
|
+
|
|
966
|
+
return {
|
|
967
|
+
valid: errors.length === 0,
|
|
968
|
+
errors,
|
|
969
|
+
};
|
|
970
|
+
}
|
|
971
|
+
|
|
972
|
+
// ============================================================================
|
|
973
|
+
// Dataset Validators - Outcome-Verified Marketplace
|
|
974
|
+
// ============================================================================
|
|
975
|
+
|
|
976
|
+
/**
|
|
977
|
+
* Represents a company row in a dataset.
|
|
978
|
+
*/
|
|
979
|
+
export interface CompanyRow {
|
|
980
|
+
name: string;
|
|
981
|
+
domain: string;
|
|
982
|
+
category: string;
|
|
983
|
+
sourceUrls: string[];
|
|
984
|
+
}
|
|
985
|
+
|
|
986
|
+
/**
|
|
987
|
+
* Represents a company dataset artifact.
|
|
988
|
+
*/
|
|
989
|
+
export interface CompanyDataset {
|
|
990
|
+
companies: CompanyRow[];
|
|
991
|
+
generatedAt: string;
|
|
992
|
+
}
|
|
993
|
+
|
|
994
|
+
/**
|
|
995
|
+
* Validates that a dataset contains at least the minimum number of rows.
|
|
996
|
+
*
|
|
997
|
+
* @param dataset - The dataset to validate
|
|
998
|
+
* @param minRows - Minimum number of rows required (default: 25)
|
|
999
|
+
* @returns ValidationResult
|
|
1000
|
+
*
|
|
1001
|
+
* @example
|
|
1002
|
+
* validateDatasetMinRows({ companies: [...25 rows] }, 25)
|
|
1003
|
+
* // { valid: true, errors: [] }
|
|
1004
|
+
*/
|
|
1005
|
+
export function validateDatasetMinRows(
|
|
1006
|
+
dataset: CompanyDataset,
|
|
1007
|
+
minRows: number = 25
|
|
1008
|
+
): ValidationResult {
|
|
1009
|
+
if (!dataset || !Array.isArray(dataset.companies)) {
|
|
1010
|
+
return {
|
|
1011
|
+
valid: false,
|
|
1012
|
+
errors: ['Dataset must contain a companies array'],
|
|
1013
|
+
};
|
|
1014
|
+
}
|
|
1015
|
+
|
|
1016
|
+
const rowCount = dataset.companies.length;
|
|
1017
|
+
|
|
1018
|
+
if (rowCount >= minRows) {
|
|
1019
|
+
return { valid: true, errors: [] };
|
|
1020
|
+
}
|
|
1021
|
+
|
|
1022
|
+
return {
|
|
1023
|
+
valid: false,
|
|
1024
|
+
errors: [
|
|
1025
|
+
`Dataset too small - must have at least ${minRows} companies, got ${rowCount}`,
|
|
1026
|
+
],
|
|
1027
|
+
};
|
|
1028
|
+
}
|
|
1029
|
+
|
|
1030
|
+
/**
|
|
1031
|
+
* Validates that all domains in a dataset are unique (deduplicated).
|
|
1032
|
+
*
|
|
1033
|
+
* @param dataset - The dataset to validate
|
|
1034
|
+
* @returns ValidationResult
|
|
1035
|
+
*
|
|
1036
|
+
* @example
|
|
1037
|
+
* validateDatasetUniqueDomains({ companies: [{ domain: 'a.com' }, { domain: 'b.com' }] })
|
|
1038
|
+
* // { valid: true, errors: [] }
|
|
1039
|
+
*/
|
|
1040
|
+
export function validateDatasetUniqueDomains(
|
|
1041
|
+
dataset: CompanyDataset
|
|
1042
|
+
): ValidationResult {
|
|
1043
|
+
if (!dataset || !Array.isArray(dataset.companies)) {
|
|
1044
|
+
return {
|
|
1045
|
+
valid: false,
|
|
1046
|
+
errors: ['Dataset must contain a companies array'],
|
|
1047
|
+
};
|
|
1048
|
+
}
|
|
1049
|
+
|
|
1050
|
+
const domains = dataset.companies.map((c) => c.domain.toLowerCase());
|
|
1051
|
+
const uniqueDomains = new Set(domains);
|
|
1052
|
+
|
|
1053
|
+
if (domains.length === uniqueDomains.size) {
|
|
1054
|
+
return { valid: true, errors: [] };
|
|
1055
|
+
}
|
|
1056
|
+
|
|
1057
|
+
const duplicates = domains.filter(
|
|
1058
|
+
(domain, index) => domains.indexOf(domain) !== index
|
|
1059
|
+
);
|
|
1060
|
+
const uniqueDuplicates = [...new Set(duplicates)];
|
|
1061
|
+
|
|
1062
|
+
return {
|
|
1063
|
+
valid: false,
|
|
1064
|
+
errors: [
|
|
1065
|
+
`Duplicate domains found: ${uniqueDuplicates.join(', ')}`,
|
|
1066
|
+
],
|
|
1067
|
+
};
|
|
1068
|
+
}
|
|
1069
|
+
|
|
1070
|
+
/**
|
|
1071
|
+
* Validates that all rows have required fields populated.
|
|
1072
|
+
*
|
|
1073
|
+
* @param dataset - The dataset to validate
|
|
1074
|
+
* @param requiredFields - Array of required field names
|
|
1075
|
+
* @returns ValidationResult
|
|
1076
|
+
*
|
|
1077
|
+
* @example
|
|
1078
|
+
* validateDatasetRequiredFields({ companies: [...] }, ['name', 'domain', 'category', 'sourceUrls'])
|
|
1079
|
+
* // { valid: true, errors: [] }
|
|
1080
|
+
*/
|
|
1081
|
+
export function validateDatasetRequiredFields(
|
|
1082
|
+
dataset: CompanyDataset,
|
|
1083
|
+
requiredFields: string[] = ['name', 'domain', 'category', 'sourceUrls']
|
|
1084
|
+
): ValidationResult {
|
|
1085
|
+
if (!dataset || !Array.isArray(dataset.companies)) {
|
|
1086
|
+
return {
|
|
1087
|
+
valid: false,
|
|
1088
|
+
errors: ['Dataset must contain a companies array'],
|
|
1089
|
+
};
|
|
1090
|
+
}
|
|
1091
|
+
|
|
1092
|
+
const errors: string[] = [];
|
|
1093
|
+
|
|
1094
|
+
dataset.companies.forEach((company, index) => {
|
|
1095
|
+
const missingFields: string[] = [];
|
|
1096
|
+
const companyRecord = company as unknown as Record<string, unknown>;
|
|
1097
|
+
|
|
1098
|
+
for (const field of requiredFields) {
|
|
1099
|
+
const value = companyRecord[field];
|
|
1100
|
+
|
|
1101
|
+
if (value === undefined || value === null || value === '') {
|
|
1102
|
+
missingFields.push(field);
|
|
1103
|
+
} else if (field === 'sourceUrls' && (!Array.isArray(value) || value.length === 0)) {
|
|
1104
|
+
missingFields.push(`${field} (must be non-empty array)`);
|
|
1105
|
+
}
|
|
1106
|
+
}
|
|
1107
|
+
|
|
1108
|
+
if (missingFields.length > 0) {
|
|
1109
|
+
errors.push(
|
|
1110
|
+
`Company at index ${index} (${company.name || 'unknown'}) missing fields: ${missingFields.join(', ')}`
|
|
1111
|
+
);
|
|
1112
|
+
}
|
|
1113
|
+
});
|
|
1114
|
+
|
|
1115
|
+
return {
|
|
1116
|
+
valid: errors.length === 0,
|
|
1117
|
+
errors,
|
|
1118
|
+
};
|
|
1119
|
+
}
|
|
1120
|
+
|
|
1121
|
+
/**
|
|
1122
|
+
* Validates that each company row has at least one source URL.
|
|
1123
|
+
*
|
|
1124
|
+
* @param dataset - The dataset to validate
|
|
1125
|
+
* @param minSourcesPerRow - Minimum sources required per row (default: 1)
|
|
1126
|
+
* @returns ValidationResult
|
|
1127
|
+
*
|
|
1128
|
+
* @example
|
|
1129
|
+
* validateDatasetSourceUrls({ companies: [{ sourceUrls: ['https://...'] }] }, 1)
|
|
1130
|
+
* // { valid: true, errors: [] }
|
|
1131
|
+
*/
|
|
1132
|
+
export function validateDatasetSourceUrls(
|
|
1133
|
+
dataset: CompanyDataset,
|
|
1134
|
+
minSourcesPerRow: number = 1
|
|
1135
|
+
): ValidationResult {
|
|
1136
|
+
if (!dataset || !Array.isArray(dataset.companies)) {
|
|
1137
|
+
return {
|
|
1138
|
+
valid: false,
|
|
1139
|
+
errors: ['Dataset must contain a companies array'],
|
|
1140
|
+
};
|
|
1141
|
+
}
|
|
1142
|
+
|
|
1143
|
+
const errors: string[] = [];
|
|
1144
|
+
|
|
1145
|
+
dataset.companies.forEach((company, index) => {
|
|
1146
|
+
if (!Array.isArray(company.sourceUrls)) {
|
|
1147
|
+
errors.push(
|
|
1148
|
+
`Company at index ${index} (${company.name || 'unknown'}) has invalid sourceUrls (must be array)`
|
|
1149
|
+
);
|
|
1150
|
+
} else if (company.sourceUrls.length < minSourcesPerRow) {
|
|
1151
|
+
errors.push(
|
|
1152
|
+
`Company at index ${index} (${company.name || 'unknown'}) has ${company.sourceUrls.length} source(s), minimum required is ${minSourcesPerRow}`
|
|
1153
|
+
);
|
|
1154
|
+
}
|
|
1155
|
+
});
|
|
1156
|
+
|
|
1157
|
+
return {
|
|
1158
|
+
valid: errors.length === 0,
|
|
1159
|
+
errors,
|
|
1160
|
+
};
|
|
1161
|
+
}
|
|
1162
|
+
|
|
1163
|
+
/**
|
|
1164
|
+
* Composite validator for company datasets.
|
|
1165
|
+
* Validates minimum rows, unique domains, required fields, and source URLs.
|
|
1166
|
+
*
|
|
1167
|
+
* @param dataset - The dataset to validate
|
|
1168
|
+
* @param minRows - Minimum number of rows required (default: 25)
|
|
1169
|
+
* @returns ValidationResult with all errors combined
|
|
1170
|
+
*
|
|
1171
|
+
* @example
|
|
1172
|
+
* validateCompanyDataset({ companies: [...], generatedAt: '...' }, 25)
|
|
1173
|
+
* // { valid: true, errors: [] }
|
|
1174
|
+
*/
|
|
1175
|
+
export function validateCompanyDataset(
|
|
1176
|
+
dataset: CompanyDataset,
|
|
1177
|
+
minRows: number = 25
|
|
1178
|
+
): ValidationResult {
|
|
1179
|
+
const errors: string[] = [];
|
|
1180
|
+
|
|
1181
|
+
const minRowsResult = validateDatasetMinRows(dataset, minRows);
|
|
1182
|
+
errors.push(...minRowsResult.errors);
|
|
1183
|
+
|
|
1184
|
+
if (dataset && Array.isArray(dataset.companies) && dataset.companies.length > 0) {
|
|
1185
|
+
const uniqueDomainsResult = validateDatasetUniqueDomains(dataset);
|
|
1186
|
+
errors.push(...uniqueDomainsResult.errors);
|
|
1187
|
+
|
|
1188
|
+
const requiredFieldsResult = validateDatasetRequiredFields(dataset);
|
|
1189
|
+
errors.push(...requiredFieldsResult.errors);
|
|
1190
|
+
|
|
1191
|
+
const sourceUrlsResult = validateDatasetSourceUrls(dataset);
|
|
1192
|
+
errors.push(...sourceUrlsResult.errors);
|
|
1193
|
+
}
|
|
1194
|
+
|
|
1195
|
+
return {
|
|
1196
|
+
valid: errors.length === 0,
|
|
1197
|
+
errors,
|
|
1198
|
+
};
|
|
1199
|
+
}
|