outcome-cli 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +261 -0
- package/package.json +95 -0
- package/src/agents/README.md +139 -0
- package/src/agents/adapters/anthropic.adapter.ts +166 -0
- package/src/agents/adapters/dalle.adapter.ts +145 -0
- package/src/agents/adapters/gemini.adapter.ts +134 -0
- package/src/agents/adapters/imagen.adapter.ts +106 -0
- package/src/agents/adapters/nano-banana.adapter.ts +129 -0
- package/src/agents/adapters/openai.adapter.ts +165 -0
- package/src/agents/adapters/veo.adapter.ts +130 -0
- package/src/agents/agent.schema.property.test.ts +379 -0
- package/src/agents/agent.schema.test.ts +148 -0
- package/src/agents/agent.schema.ts +263 -0
- package/src/agents/index.ts +60 -0
- package/src/agents/registered-agent.schema.ts +356 -0
- package/src/agents/registry.ts +97 -0
- package/src/agents/tournament-configs.property.test.ts +266 -0
- package/src/cli/README.md +145 -0
- package/src/cli/commands/define.ts +79 -0
- package/src/cli/commands/list.ts +46 -0
- package/src/cli/commands/logs.ts +83 -0
- package/src/cli/commands/run.ts +416 -0
- package/src/cli/commands/verify.ts +110 -0
- package/src/cli/index.ts +81 -0
- package/src/config/README.md +128 -0
- package/src/config/env.ts +262 -0
- package/src/config/index.ts +19 -0
- package/src/eval/README.md +318 -0
- package/src/eval/ai-judge.test.ts +435 -0
- package/src/eval/ai-judge.ts +368 -0
- package/src/eval/code-validators.ts +414 -0
- package/src/eval/evaluateOutcome.property.test.ts +1174 -0
- package/src/eval/evaluateOutcome.ts +591 -0
- package/src/eval/immigration-validators.ts +122 -0
- package/src/eval/index.ts +90 -0
- package/src/eval/judge-cache.ts +402 -0
- package/src/eval/tournament-validators.property.test.ts +439 -0
- package/src/eval/validators.property.test.ts +1118 -0
- package/src/eval/validators.ts +1199 -0
- package/src/eval/weighted-scorer.ts +285 -0
- package/src/index.ts +17 -0
- package/src/league/README.md +188 -0
- package/src/league/health-check.ts +353 -0
- package/src/league/index.ts +93 -0
- package/src/league/killAgent.ts +151 -0
- package/src/league/league.test.ts +1151 -0
- package/src/league/runLeague.ts +843 -0
- package/src/league/scoreAgent.ts +175 -0
- package/src/modules/omnibridge/__tests__/.gitkeep +1 -0
- package/src/modules/omnibridge/__tests__/auth-tunnel.property.test.ts +524 -0
- package/src/modules/omnibridge/__tests__/deterministic-logger.property.test.ts +965 -0
- package/src/modules/omnibridge/__tests__/ghost-api.property.test.ts +461 -0
- package/src/modules/omnibridge/__tests__/omnibridge-integration.test.ts +542 -0
- package/src/modules/omnibridge/__tests__/parallel-executor.property.test.ts +671 -0
- package/src/modules/omnibridge/__tests__/semantic-normalizer.property.test.ts +521 -0
- package/src/modules/omnibridge/__tests__/semantic-normalizer.test.ts +254 -0
- package/src/modules/omnibridge/__tests__/session-vault.property.test.ts +367 -0
- package/src/modules/omnibridge/__tests__/shadow-session.property.test.ts +523 -0
- package/src/modules/omnibridge/__tests__/triangulation-engine.property.test.ts +292 -0
- package/src/modules/omnibridge/__tests__/verification-engine.property.test.ts +769 -0
- package/src/modules/omnibridge/api/.gitkeep +1 -0
- package/src/modules/omnibridge/api/ghost-api.ts +1087 -0
- package/src/modules/omnibridge/auth/.gitkeep +1 -0
- package/src/modules/omnibridge/auth/auth-tunnel.ts +843 -0
- package/src/modules/omnibridge/auth/session-vault.ts +577 -0
- package/src/modules/omnibridge/core/.gitkeep +1 -0
- package/src/modules/omnibridge/core/semantic-normalizer.ts +702 -0
- package/src/modules/omnibridge/core/triangulation-engine.ts +530 -0
- package/src/modules/omnibridge/core/types.ts +610 -0
- package/src/modules/omnibridge/execution/.gitkeep +1 -0
- package/src/modules/omnibridge/execution/deterministic-logger.ts +629 -0
- package/src/modules/omnibridge/execution/parallel-executor.ts +542 -0
- package/src/modules/omnibridge/execution/shadow-session.ts +794 -0
- package/src/modules/omnibridge/index.ts +212 -0
- package/src/modules/omnibridge/omnibridge.ts +510 -0
- package/src/modules/omnibridge/verification/.gitkeep +1 -0
- package/src/modules/omnibridge/verification/verification-engine.ts +783 -0
- package/src/outcomes/README.md +75 -0
- package/src/outcomes/acquire-pilot-customer.ts +297 -0
- package/src/outcomes/code-delivery-outcomes.ts +89 -0
- package/src/outcomes/code-outcomes.ts +256 -0
- package/src/outcomes/code_review_battle.test.ts +135 -0
- package/src/outcomes/code_review_battle.ts +135 -0
- package/src/outcomes/cold_email_battle.ts +97 -0
- package/src/outcomes/content_creation_battle.ts +160 -0
- package/src/outcomes/f1_stem_opt_compliance.ts +61 -0
- package/src/outcomes/index.ts +107 -0
- package/src/outcomes/lead_gen_battle.test.ts +113 -0
- package/src/outcomes/lead_gen_battle.ts +99 -0
- package/src/outcomes/outcome.schema.property.test.ts +229 -0
- package/src/outcomes/outcome.schema.ts +187 -0
- package/src/outcomes/qualified_sales_interest.ts +118 -0
- package/src/outcomes/swarm_planner.property.test.ts +370 -0
- package/src/outcomes/swarm_planner.ts +96 -0
- package/src/outcomes/web_extraction.ts +234 -0
- package/src/runtime/README.md +220 -0
- package/src/runtime/agentRunner.test.ts +341 -0
- package/src/runtime/agentRunner.ts +746 -0
- package/src/runtime/claudeAdapter.ts +232 -0
- package/src/runtime/costTracker.ts +123 -0
- package/src/runtime/index.ts +34 -0
- package/src/runtime/modelAdapter.property.test.ts +305 -0
- package/src/runtime/modelAdapter.ts +144 -0
- package/src/runtime/openaiAdapter.ts +235 -0
- package/src/utils/README.md +122 -0
- package/src/utils/command-runner.ts +134 -0
- package/src/utils/cost-guard.ts +379 -0
- package/src/utils/errors.test.ts +290 -0
- package/src/utils/errors.ts +442 -0
- package/src/utils/index.ts +37 -0
- package/src/utils/logger.test.ts +361 -0
- package/src/utils/logger.ts +419 -0
- package/src/utils/output-parsers.ts +216 -0
|
@@ -0,0 +1,419 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Logger Utility - Structured logging for agent attempts
|
|
3
|
+
*
|
|
4
|
+
* Provides mandatory logging for all agent attempts including:
|
|
5
|
+
* - Agent and outcome identification
|
|
6
|
+
* - Token usage tracking
|
|
7
|
+
* - Result status and failure reasons
|
|
8
|
+
* - Secret redaction and log capping for security and performance
|
|
9
|
+
*
|
|
10
|
+
* @module utils/logger
|
|
11
|
+
* @see Requirements 6.1, 6.2, 6.3
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { readFileSync, writeFileSync, existsSync } from 'fs';
|
|
15
|
+
import { join } from 'path';
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Maximum number of log entries per outcome to prevent memory exhaustion.
|
|
19
|
+
*/
|
|
20
|
+
const MAX_LOGS_PER_OUTCOME = 1000;
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Patterns for detecting and redacting secrets in log messages.
|
|
24
|
+
* Add more patterns as needed for different secret types.
|
|
25
|
+
*/
|
|
26
|
+
const SECRET_PATTERNS = [
|
|
27
|
+
// API keys (common formats)
|
|
28
|
+
/\b(sk-[a-zA-Z0-9]{48})\b/g, // OpenAI
|
|
29
|
+
/\b(xoxb-[0-9]+-[0-9]+-[a-zA-Z0-9]+)\b/g, // Slack
|
|
30
|
+
/\b([a-zA-Z0-9]{32})\b/g, // Generic 32-char keys
|
|
31
|
+
/\b([a-zA-Z0-9]{40})\b/g, // GitHub tokens, etc.
|
|
32
|
+
// Password patterns
|
|
33
|
+
/\bpassword["\s]*:[\s"]*([^\s,"}]+)\b/gi,
|
|
34
|
+
/\btoken["\s]*:[\s"]*([^\s,"}]+)\b/gi,
|
|
35
|
+
/\bsecret["\s]*:[\s"]*([^\s,"}]+)\b/gi,
|
|
36
|
+
/\bkey["\s]*:[\s"]*([^\s,"}]+)\b/gi,
|
|
37
|
+
// Authorization headers
|
|
38
|
+
/\bAuthorization["\s]*:[\s"]*([^\s,"}]+)\b/gi,
|
|
39
|
+
/\bBearer\s+([a-zA-Z0-9._-]+)\b/gi,
|
|
40
|
+
];
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Redacts secrets from a string using predefined patterns.
|
|
44
|
+
*
|
|
45
|
+
* @param text - The text to redact secrets from
|
|
46
|
+
* @returns The text with secrets replaced with [REDACTED]
|
|
47
|
+
*/
|
|
48
|
+
function redactSecrets(text: string): string {
|
|
49
|
+
let redacted = text;
|
|
50
|
+
for (const pattern of SECRET_PATTERNS) {
|
|
51
|
+
redacted = redacted.replace(pattern, '[REDACTED]');
|
|
52
|
+
}
|
|
53
|
+
return redacted;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Redacts secrets from log entry metadata recursively.
|
|
58
|
+
*
|
|
59
|
+
* @param obj - The object to redact secrets from
|
|
60
|
+
* @returns A new object with secrets redacted
|
|
61
|
+
*/
|
|
62
|
+
function redactSecretsInObject(obj: any): any {
|
|
63
|
+
if (typeof obj === 'string') {
|
|
64
|
+
return redactSecrets(obj);
|
|
65
|
+
}
|
|
66
|
+
if (Array.isArray(obj)) {
|
|
67
|
+
return obj.map(redactSecretsInObject);
|
|
68
|
+
}
|
|
69
|
+
if (obj !== null && typeof obj === 'object') {
|
|
70
|
+
const result: any = {};
|
|
71
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
72
|
+
result[key] = redactSecretsInObject(value);
|
|
73
|
+
}
|
|
74
|
+
return result;
|
|
75
|
+
}
|
|
76
|
+
return obj;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Result status for a log entry.
|
|
81
|
+
*/
|
|
82
|
+
export type LogResult = 'SUCCESS' | 'FAILURE' | 'PENDING';
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Structured log entry for agent attempts.
|
|
86
|
+
*
|
|
87
|
+
* Every agent attempt is logged with this structure for auditing.
|
|
88
|
+
*
|
|
89
|
+
* @see Requirements 6.1 - Record agent ID, outcome ID, prompt version, tokens spent, result, failure reason
|
|
90
|
+
*/
|
|
91
|
+
export interface LogEntry {
|
|
92
|
+
/** ISO timestamp when the log entry was created */
|
|
93
|
+
timestamp: string;
|
|
94
|
+
/** Unique identifier for the agent */
|
|
95
|
+
agentId: string;
|
|
96
|
+
/** Unique identifier for the outcome being attempted */
|
|
97
|
+
outcomeId: string;
|
|
98
|
+
/** Version identifier for the prompt used */
|
|
99
|
+
promptVersion: string;
|
|
100
|
+
/** Number of tokens spent in this attempt */
|
|
101
|
+
tokensSpent: number;
|
|
102
|
+
/** Result status of the attempt */
|
|
103
|
+
result: LogResult;
|
|
104
|
+
/** Reason for failure (required when result is FAILURE) */
|
|
105
|
+
failureReason?: string;
|
|
106
|
+
/** Additional metadata for the log entry */
|
|
107
|
+
metadata?: Record<string, unknown>;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Input for creating a log entry.
|
|
112
|
+
* Timestamp is auto-generated if not provided.
|
|
113
|
+
*/
|
|
114
|
+
export interface LogEntryInput {
|
|
115
|
+
agentId: string;
|
|
116
|
+
outcomeId: string;
|
|
117
|
+
promptVersion: string;
|
|
118
|
+
tokensSpent: number;
|
|
119
|
+
result: LogResult;
|
|
120
|
+
failureReason?: string;
|
|
121
|
+
metadata?: Record<string, unknown>;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Log file path for local persistence.
|
|
126
|
+
* In production, this would be replaced with Durable Objects storage.
|
|
127
|
+
*/
|
|
128
|
+
const LOG_FILE = join(process.cwd(), 'logs.json');
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* In-memory log storage with file persistence for CLI.
|
|
132
|
+
*/
|
|
133
|
+
const logStore: Map<string, LogEntry[]> = new Map();
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Load logs from file on startup.
|
|
137
|
+
*/
|
|
138
|
+
function loadLogsFromFile(): void {
|
|
139
|
+
try {
|
|
140
|
+
if (existsSync(LOG_FILE)) {
|
|
141
|
+
const data = readFileSync(LOG_FILE, 'utf-8');
|
|
142
|
+
const parsed = JSON.parse(data) as Record<string, LogEntry[]>;
|
|
143
|
+
for (const [key, entries] of Object.entries(parsed)) {
|
|
144
|
+
logStore.set(key, entries);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
} catch {
|
|
148
|
+
// Ignore errors, start fresh
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* Save logs to file for persistence.
|
|
154
|
+
*/
|
|
155
|
+
function saveLogsToFile(): void {
|
|
156
|
+
try {
|
|
157
|
+
const obj: Record<string, LogEntry[]> = {};
|
|
158
|
+
for (const [key, entries] of logStore.entries()) {
|
|
159
|
+
obj[key] = entries;
|
|
160
|
+
}
|
|
161
|
+
writeFileSync(LOG_FILE, JSON.stringify(obj, null, 2));
|
|
162
|
+
} catch {
|
|
163
|
+
// Ignore errors
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// Load logs on module initialization
|
|
168
|
+
loadLogsFromFile();
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Generates a storage key for logs based on outcome ID.
|
|
172
|
+
*
|
|
173
|
+
* @param outcomeId - The outcome ID to generate key for
|
|
174
|
+
* @returns Storage key string
|
|
175
|
+
*/
|
|
176
|
+
function getLogKey(outcomeId: string): string {
|
|
177
|
+
return `logs:${outcomeId}`;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Logs an agent attempt with structured data.
|
|
182
|
+
*
|
|
183
|
+
* @param entry - The log entry data
|
|
184
|
+
* @returns The created LogEntry with timestamp
|
|
185
|
+
*
|
|
186
|
+
* @example
|
|
187
|
+
* log({
|
|
188
|
+
* agentId: 'agent-001',
|
|
189
|
+
* outcomeId: 'qualified_sales_interest',
|
|
190
|
+
* promptVersion: 'v1.0.0',
|
|
191
|
+
* tokensSpent: 500,
|
|
192
|
+
* result: 'SUCCESS'
|
|
193
|
+
* });
|
|
194
|
+
*
|
|
195
|
+
* @see Requirements 6.1
|
|
196
|
+
*/
|
|
197
|
+
export function log(entry: LogEntryInput): LogEntry {
|
|
198
|
+
const logEntry: LogEntry = {
|
|
199
|
+
timestamp: new Date().toISOString(),
|
|
200
|
+
agentId: entry.agentId,
|
|
201
|
+
outcomeId: entry.outcomeId,
|
|
202
|
+
promptVersion: entry.promptVersion,
|
|
203
|
+
tokensSpent: entry.tokensSpent,
|
|
204
|
+
result: entry.result,
|
|
205
|
+
...(entry.failureReason && { failureReason: redactSecrets(entry.failureReason) }),
|
|
206
|
+
...(entry.metadata && { metadata: redactSecretsInObject(entry.metadata) }),
|
|
207
|
+
};
|
|
208
|
+
|
|
209
|
+
// Store the log entry
|
|
210
|
+
const key = getLogKey(entry.outcomeId);
|
|
211
|
+
const existingLogs = logStore.get(key) ?? [];
|
|
212
|
+
|
|
213
|
+
// Implement log capping - keep only the most recent entries
|
|
214
|
+
if (existingLogs.length >= MAX_LOGS_PER_OUTCOME) {
|
|
215
|
+
existingLogs.splice(0, existingLogs.length - MAX_LOGS_PER_OUTCOME + 1);
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
existingLogs.push(logEntry);
|
|
219
|
+
logStore.set(key, existingLogs);
|
|
220
|
+
|
|
221
|
+
// Persist to file for CLI usage
|
|
222
|
+
saveLogsToFile();
|
|
223
|
+
|
|
224
|
+
// Also output to console for CLI visibility (with redaction)
|
|
225
|
+
const formattedEntry = formatLogEntry(logEntry);
|
|
226
|
+
if (logEntry.result === 'FAILURE') {
|
|
227
|
+
console.error(formattedEntry);
|
|
228
|
+
} else {
|
|
229
|
+
console.log(formattedEntry);
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
return logEntry;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
/**
|
|
236
|
+
* Retrieves all logs for a specific outcome.
|
|
237
|
+
*
|
|
238
|
+
* @param outcomeId - The outcome ID to retrieve logs for
|
|
239
|
+
* @returns Array of log entries for the outcome
|
|
240
|
+
*
|
|
241
|
+
* @example
|
|
242
|
+
* const logs = getLogs('qualified_sales_interest');
|
|
243
|
+
* console.log(`Found ${logs.length} log entries`);
|
|
244
|
+
*
|
|
245
|
+
* @see Requirements 6.2, 6.3
|
|
246
|
+
*/
|
|
247
|
+
export function getLogs(outcomeId: string): LogEntry[] {
|
|
248
|
+
const key = getLogKey(outcomeId);
|
|
249
|
+
return logStore.get(key) ?? [];
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
/**
|
|
253
|
+
* Retrieves logs for a specific agent within an outcome.
|
|
254
|
+
*
|
|
255
|
+
* @param outcomeId - The outcome ID
|
|
256
|
+
* @param agentId - The agent ID to filter by
|
|
257
|
+
* @returns Array of log entries for the agent
|
|
258
|
+
*/
|
|
259
|
+
export function getAgentLogs(outcomeId: string, agentId: string): LogEntry[] {
|
|
260
|
+
const logs = getLogs(outcomeId);
|
|
261
|
+
return logs.filter((entry) => entry.agentId === agentId);
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
/**
|
|
265
|
+
* Clears all logs for a specific outcome.
|
|
266
|
+
* Useful for testing and cleanup.
|
|
267
|
+
*
|
|
268
|
+
* @param outcomeId - The outcome ID to clear logs for
|
|
269
|
+
*/
|
|
270
|
+
export function clearLogs(outcomeId: string): void {
|
|
271
|
+
const key = getLogKey(outcomeId);
|
|
272
|
+
logStore.delete(key);
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
/**
|
|
276
|
+
* Clears all logs from the store.
|
|
277
|
+
* Useful for testing.
|
|
278
|
+
*/
|
|
279
|
+
export function clearAllLogs(): void {
|
|
280
|
+
logStore.clear();
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
/**
|
|
284
|
+
* Gets the total number of log entries across all outcomes.
|
|
285
|
+
*
|
|
286
|
+
* @returns Total log count
|
|
287
|
+
*/
|
|
288
|
+
export function getTotalLogCount(): number {
|
|
289
|
+
let count = 0;
|
|
290
|
+
for (const logs of logStore.values()) {
|
|
291
|
+
count += logs.length;
|
|
292
|
+
}
|
|
293
|
+
return count;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
/**
|
|
297
|
+
* Formats a log entry for console output.
|
|
298
|
+
*
|
|
299
|
+
* @param entry - The log entry to format
|
|
300
|
+
* @returns Formatted string for display
|
|
301
|
+
*/
|
|
302
|
+
function formatLogEntry(entry: LogEntry): string {
|
|
303
|
+
const statusIcon = entry.result === 'SUCCESS' ? '✅' : entry.result === 'FAILURE' ? '❌' : '⏳';
|
|
304
|
+
const timestamp = entry.timestamp.split('T')[1]?.split('.')[0] ?? entry.timestamp;
|
|
305
|
+
|
|
306
|
+
let message = `${statusIcon} [${timestamp}] Agent:${entry.agentId} | Outcome:${entry.outcomeId} | Tokens:${entry.tokensSpent} | ${entry.result}`;
|
|
307
|
+
|
|
308
|
+
if (entry.failureReason) {
|
|
309
|
+
message += ` | Reason: ${entry.failureReason}`;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
return message;
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
/**
|
|
316
|
+
* Formats all logs for an outcome for CLI display.
|
|
317
|
+
*
|
|
318
|
+
* @param outcomeId - The outcome ID to format logs for
|
|
319
|
+
* @returns Formatted string for CLI output
|
|
320
|
+
*
|
|
321
|
+
* @see Requirements 6.2 - Logs viewable via CLI
|
|
322
|
+
*/
|
|
323
|
+
export function formatLogsForCli(outcomeId: string): string {
|
|
324
|
+
const logs = getLogs(outcomeId);
|
|
325
|
+
|
|
326
|
+
if (logs.length === 0) {
|
|
327
|
+
return `No logs found for outcome: ${outcomeId}`;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
const header = `\n📋 Logs for outcome: ${outcomeId} (${logs.length} entries)\n${'─'.repeat(60)}`;
|
|
331
|
+
const formattedLogs = logs.map(formatLogEntry).join('\n');
|
|
332
|
+
const footer = `${'─'.repeat(60)}\n`;
|
|
333
|
+
|
|
334
|
+
return `${header}\n${formattedLogs}\n${footer}`;
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
/**
|
|
338
|
+
* Creates a log entry for a successful attempt.
|
|
339
|
+
*
|
|
340
|
+
* @param agentId - Agent ID
|
|
341
|
+
* @param outcomeId - Outcome ID
|
|
342
|
+
* @param promptVersion - Prompt version
|
|
343
|
+
* @param tokensSpent - Tokens spent
|
|
344
|
+
* @param metadata - Optional metadata
|
|
345
|
+
* @returns The created log entry
|
|
346
|
+
*/
|
|
347
|
+
export function logSuccess(
|
|
348
|
+
agentId: string,
|
|
349
|
+
outcomeId: string,
|
|
350
|
+
promptVersion: string,
|
|
351
|
+
tokensSpent: number,
|
|
352
|
+
metadata?: Record<string, unknown>
|
|
353
|
+
): LogEntry {
|
|
354
|
+
return log({
|
|
355
|
+
agentId,
|
|
356
|
+
outcomeId,
|
|
357
|
+
promptVersion,
|
|
358
|
+
tokensSpent,
|
|
359
|
+
result: 'SUCCESS',
|
|
360
|
+
metadata,
|
|
361
|
+
});
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
/**
|
|
365
|
+
* Creates a log entry for a failed attempt.
|
|
366
|
+
*
|
|
367
|
+
* @param agentId - Agent ID
|
|
368
|
+
* @param outcomeId - Outcome ID
|
|
369
|
+
* @param promptVersion - Prompt version
|
|
370
|
+
* @param tokensSpent - Tokens spent
|
|
371
|
+
* @param failureReason - Reason for failure
|
|
372
|
+
* @param metadata - Optional metadata
|
|
373
|
+
* @returns The created log entry
|
|
374
|
+
*/
|
|
375
|
+
export function logFailure(
|
|
376
|
+
agentId: string,
|
|
377
|
+
outcomeId: string,
|
|
378
|
+
promptVersion: string,
|
|
379
|
+
tokensSpent: number,
|
|
380
|
+
failureReason: string,
|
|
381
|
+
metadata?: Record<string, unknown>
|
|
382
|
+
): LogEntry {
|
|
383
|
+
return log({
|
|
384
|
+
agentId,
|
|
385
|
+
outcomeId,
|
|
386
|
+
promptVersion,
|
|
387
|
+
tokensSpent,
|
|
388
|
+
result: 'FAILURE',
|
|
389
|
+
failureReason,
|
|
390
|
+
metadata,
|
|
391
|
+
});
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
/**
|
|
395
|
+
* Creates a log entry for a pending attempt.
|
|
396
|
+
*
|
|
397
|
+
* @param agentId - Agent ID
|
|
398
|
+
* @param outcomeId - Outcome ID
|
|
399
|
+
* @param promptVersion - Prompt version
|
|
400
|
+
* @param tokensSpent - Tokens spent so far
|
|
401
|
+
* @param metadata - Optional metadata
|
|
402
|
+
* @returns The created log entry
|
|
403
|
+
*/
|
|
404
|
+
export function logPending(
|
|
405
|
+
agentId: string,
|
|
406
|
+
outcomeId: string,
|
|
407
|
+
promptVersion: string,
|
|
408
|
+
tokensSpent: number,
|
|
409
|
+
metadata?: Record<string, unknown>
|
|
410
|
+
): LogEntry {
|
|
411
|
+
return log({
|
|
412
|
+
agentId,
|
|
413
|
+
outcomeId,
|
|
414
|
+
promptVersion,
|
|
415
|
+
tokensSpent,
|
|
416
|
+
result: 'PENDING',
|
|
417
|
+
metadata,
|
|
418
|
+
});
|
|
419
|
+
}
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Structured Output Parsers for Command Results
|
|
3
|
+
*
|
|
4
|
+
* Parses outputs from tests, linting, benchmarks, and security scans
|
|
5
|
+
* into structured data for evaluation and scoring.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { CommandResult } from './command-runner';
|
|
9
|
+
|
|
10
|
+
export interface TestResult {
|
|
11
|
+
totalTests: number;
|
|
12
|
+
passedTests: number;
|
|
13
|
+
failedTests: number;
|
|
14
|
+
skippedTests: number;
|
|
15
|
+
testErrors: string[];
|
|
16
|
+
testNames: string[];
|
|
17
|
+
success: boolean;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export interface LintResult {
|
|
21
|
+
totalIssues: number;
|
|
22
|
+
errorCount: number;
|
|
23
|
+
warningCount: number;
|
|
24
|
+
issues: Array<{
|
|
25
|
+
file: string;
|
|
26
|
+
line: number;
|
|
27
|
+
column: number;
|
|
28
|
+
severity: 'error' | 'warning' | 'info';
|
|
29
|
+
message: string;
|
|
30
|
+
rule?: string;
|
|
31
|
+
}>;
|
|
32
|
+
success: boolean;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export interface BenchmarkResult {
|
|
36
|
+
totalBenchmarks: number;
|
|
37
|
+
metrics: Array<{
|
|
38
|
+
name: string;
|
|
39
|
+
timeMs?: number;
|
|
40
|
+
memoryMb?: number;
|
|
41
|
+
opsPerSec?: number;
|
|
42
|
+
score?: number;
|
|
43
|
+
}>;
|
|
44
|
+
success: boolean;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export interface SecurityScanResult {
|
|
48
|
+
totalVulnerabilities: number;
|
|
49
|
+
criticalCount: number;
|
|
50
|
+
highCount: number;
|
|
51
|
+
mediumCount: number;
|
|
52
|
+
lowCount: number;
|
|
53
|
+
vulnerabilities: Array<{
|
|
54
|
+
severity: 'critical' | 'high' | 'medium' | 'low';
|
|
55
|
+
package?: string;
|
|
56
|
+
description: string;
|
|
57
|
+
cve?: string;
|
|
58
|
+
}>;
|
|
59
|
+
success: boolean;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Parse test output (supports Jest, Vitest, Mocha, etc.)
|
|
64
|
+
*/
|
|
65
|
+
export function parseTestOutput(result: CommandResult): TestResult {
|
|
66
|
+
const output = result.stdout + '\n' + result.stderr;
|
|
67
|
+
const testResult: TestResult = {
|
|
68
|
+
totalTests: 0,
|
|
69
|
+
passedTests: 0,
|
|
70
|
+
failedTests: 0,
|
|
71
|
+
skippedTests: 0,
|
|
72
|
+
testErrors: [],
|
|
73
|
+
testNames: [],
|
|
74
|
+
success: result.exitCode === 0,
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
// Jest/Vitest patterns
|
|
78
|
+
const jestPassedMatch = output.match(/Tests?:\s*(\d+)\s*passed/i);
|
|
79
|
+
const jestFailedMatch = output.match(/Tests?:\s*(\d+)\s*failed/i);
|
|
80
|
+
const jestSkippedMatch = output.match(/Tests?:\s*(\d+)\s*skipped/i);
|
|
81
|
+
|
|
82
|
+
if (jestPassedMatch) testResult.passedTests = parseInt(jestPassedMatch[1]);
|
|
83
|
+
if (jestFailedMatch) testResult.failedTests = parseInt(jestFailedMatch[1]);
|
|
84
|
+
if (jestSkippedMatch) testResult.skippedTests = parseInt(jestSkippedMatch[1]);
|
|
85
|
+
|
|
86
|
+
testResult.totalTests = testResult.passedTests + testResult.failedTests + testResult.skippedTests;
|
|
87
|
+
|
|
88
|
+
// Extract test names from passed/failed sections
|
|
89
|
+
const testNameRegex = /^\s*(✓|✗|✕|❌|PASS|FAIL)\s+(.+)$/gm;
|
|
90
|
+
let match;
|
|
91
|
+
while ((match = testNameRegex.exec(output)) !== null) {
|
|
92
|
+
testResult.testNames.push(match[2].trim());
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// Extract error messages
|
|
96
|
+
const errorRegex = /(Error:|FAIL|Failed to compile)/gi;
|
|
97
|
+
const errorSections = output.split(errorRegex).slice(1);
|
|
98
|
+
testResult.testErrors = errorSections.filter((_, i) => i % 2 === 0).slice(0, 10); // Limit to 10 errors
|
|
99
|
+
|
|
100
|
+
return testResult;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Parse lint output (supports ESLint, TSLint, Prettier, etc.)
|
|
105
|
+
*/
|
|
106
|
+
export function parseLintOutput(result: CommandResult): LintResult {
|
|
107
|
+
const output = result.stdout + '\n' + result.stderr;
|
|
108
|
+
const lintResult: LintResult = {
|
|
109
|
+
totalIssues: 0,
|
|
110
|
+
errorCount: 0,
|
|
111
|
+
warningCount: 0,
|
|
112
|
+
issues: [],
|
|
113
|
+
success: result.exitCode === 0,
|
|
114
|
+
};
|
|
115
|
+
|
|
116
|
+
// ESLint patterns
|
|
117
|
+
const eslintIssueRegex = /^(.+): line (\d+), col (\d+), (Error|Warning) - (.+?)(?:\s+\((.+?)\))?\s*$/gm;
|
|
118
|
+
let match;
|
|
119
|
+
while ((match = eslintIssueRegex.exec(output)) !== null) {
|
|
120
|
+
const [, file, line, col, severity, message, rule] = match;
|
|
121
|
+
lintResult.issues.push({
|
|
122
|
+
file,
|
|
123
|
+
line: parseInt(line),
|
|
124
|
+
column: parseInt(col),
|
|
125
|
+
severity: severity.toLowerCase() as 'error' | 'warning',
|
|
126
|
+
message,
|
|
127
|
+
rule,
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// Count issues
|
|
132
|
+
lintResult.errorCount = lintResult.issues.filter(i => i.severity === 'error').length;
|
|
133
|
+
lintResult.warningCount = lintResult.issues.filter(i => i.severity === 'warning').length;
|
|
134
|
+
lintResult.totalIssues = lintResult.issues.length;
|
|
135
|
+
|
|
136
|
+
return lintResult;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Parse benchmark output (supports Benchmark.js, custom benchmarks)
|
|
141
|
+
*/
|
|
142
|
+
export function parseBenchmarkOutput(result: CommandResult): BenchmarkResult {
|
|
143
|
+
const output = result.stdout + '\n' + result.stderr;
|
|
144
|
+
const benchResult: BenchmarkResult = {
|
|
145
|
+
totalBenchmarks: 0,
|
|
146
|
+
metrics: [],
|
|
147
|
+
success: result.exitCode === 0,
|
|
148
|
+
};
|
|
149
|
+
|
|
150
|
+
// Benchmark.js patterns
|
|
151
|
+
const benchRegex = /^(.+?)\s+x\s+([\d,]+)\s+ops\/sec\s+±([\d.]+)%\s+\(([\d]+)\s+runs\s+sampled\)$/gm;
|
|
152
|
+
let match;
|
|
153
|
+
while ((match = benchRegex.exec(output)) !== null) {
|
|
154
|
+
const [, name, opsPerSec] = match;
|
|
155
|
+
benchResult.metrics.push({
|
|
156
|
+
name: name.trim(),
|
|
157
|
+
opsPerSec: parseFloat(opsPerSec.replace(/,/g, '')),
|
|
158
|
+
});
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// Custom benchmark patterns (time-based)
|
|
162
|
+
const timeRegex = /^(.+?):\s*([\d.]+)\s*(ms|milliseconds?|s|seconds?)$/gm;
|
|
163
|
+
while ((match = timeRegex.exec(output)) !== null) {
|
|
164
|
+
const [, name, time, unit] = match;
|
|
165
|
+
benchResult.metrics.push({
|
|
166
|
+
name: name.trim(),
|
|
167
|
+
timeMs: unit.startsWith('ms') ? parseFloat(time) : parseFloat(time) * 1000,
|
|
168
|
+
});
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
benchResult.totalBenchmarks = benchResult.metrics.length;
|
|
172
|
+
|
|
173
|
+
return benchResult;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
/**
|
|
177
|
+
* Parse security scan output (supports npm audit, Snyk, etc.)
|
|
178
|
+
*/
|
|
179
|
+
export function parseSecurityScanOutput(result: CommandResult): SecurityScanResult {
|
|
180
|
+
const output = result.stdout + '\n' + result.stderr;
|
|
181
|
+
const scanResult: SecurityScanResult = {
|
|
182
|
+
totalVulnerabilities: 0,
|
|
183
|
+
criticalCount: 0,
|
|
184
|
+
highCount: 0,
|
|
185
|
+
mediumCount: 0,
|
|
186
|
+
lowCount: 0,
|
|
187
|
+
vulnerabilities: [],
|
|
188
|
+
success: result.exitCode === 0,
|
|
189
|
+
};
|
|
190
|
+
|
|
191
|
+
// npm audit patterns
|
|
192
|
+
const auditRegex = /^(\d+)\s+(low|moderate|high|critical)\s+severity\s+vulnerability\s+in\s+(.+?):\s*(.+?)\s*$/gm;
|
|
193
|
+
let match;
|
|
194
|
+
while ((match = auditRegex.exec(output)) !== null) {
|
|
195
|
+
const [, count, severity, package, description] = match;
|
|
196
|
+
const vulnCount = parseInt(count);
|
|
197
|
+
scanResult.vulnerabilities.push({
|
|
198
|
+
severity: severity === 'moderate' ? 'medium' : severity as 'critical' | 'high' | 'medium' | 'low',
|
|
199
|
+
package,
|
|
200
|
+
description,
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
// Update counts
|
|
204
|
+
switch (severity) {
|
|
205
|
+
case 'critical': scanResult.criticalCount += vulnCount; break;
|
|
206
|
+
case 'high': scanResult.highCount += vulnCount; break;
|
|
207
|
+
case 'moderate': scanResult.mediumCount += vulnCount; break;
|
|
208
|
+
case 'low': scanResult.lowCount += vulnCount; break;
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
scanResult.totalVulnerabilities = scanResult.criticalCount + scanResult.highCount +
|
|
213
|
+
scanResult.mediumCount + scanResult.lowCount;
|
|
214
|
+
|
|
215
|
+
return scanResult;
|
|
216
|
+
}
|