outcome-cli 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +261 -0
- package/package.json +95 -0
- package/src/agents/README.md +139 -0
- package/src/agents/adapters/anthropic.adapter.ts +166 -0
- package/src/agents/adapters/dalle.adapter.ts +145 -0
- package/src/agents/adapters/gemini.adapter.ts +134 -0
- package/src/agents/adapters/imagen.adapter.ts +106 -0
- package/src/agents/adapters/nano-banana.adapter.ts +129 -0
- package/src/agents/adapters/openai.adapter.ts +165 -0
- package/src/agents/adapters/veo.adapter.ts +130 -0
- package/src/agents/agent.schema.property.test.ts +379 -0
- package/src/agents/agent.schema.test.ts +148 -0
- package/src/agents/agent.schema.ts +263 -0
- package/src/agents/index.ts +60 -0
- package/src/agents/registered-agent.schema.ts +356 -0
- package/src/agents/registry.ts +97 -0
- package/src/agents/tournament-configs.property.test.ts +266 -0
- package/src/cli/README.md +145 -0
- package/src/cli/commands/define.ts +79 -0
- package/src/cli/commands/list.ts +46 -0
- package/src/cli/commands/logs.ts +83 -0
- package/src/cli/commands/run.ts +416 -0
- package/src/cli/commands/verify.ts +110 -0
- package/src/cli/index.ts +81 -0
- package/src/config/README.md +128 -0
- package/src/config/env.ts +262 -0
- package/src/config/index.ts +19 -0
- package/src/eval/README.md +318 -0
- package/src/eval/ai-judge.test.ts +435 -0
- package/src/eval/ai-judge.ts +368 -0
- package/src/eval/code-validators.ts +414 -0
- package/src/eval/evaluateOutcome.property.test.ts +1174 -0
- package/src/eval/evaluateOutcome.ts +591 -0
- package/src/eval/immigration-validators.ts +122 -0
- package/src/eval/index.ts +90 -0
- package/src/eval/judge-cache.ts +402 -0
- package/src/eval/tournament-validators.property.test.ts +439 -0
- package/src/eval/validators.property.test.ts +1118 -0
- package/src/eval/validators.ts +1199 -0
- package/src/eval/weighted-scorer.ts +285 -0
- package/src/index.ts +17 -0
- package/src/league/README.md +188 -0
- package/src/league/health-check.ts +353 -0
- package/src/league/index.ts +93 -0
- package/src/league/killAgent.ts +151 -0
- package/src/league/league.test.ts +1151 -0
- package/src/league/runLeague.ts +843 -0
- package/src/league/scoreAgent.ts +175 -0
- package/src/modules/omnibridge/__tests__/.gitkeep +1 -0
- package/src/modules/omnibridge/__tests__/auth-tunnel.property.test.ts +524 -0
- package/src/modules/omnibridge/__tests__/deterministic-logger.property.test.ts +965 -0
- package/src/modules/omnibridge/__tests__/ghost-api.property.test.ts +461 -0
- package/src/modules/omnibridge/__tests__/omnibridge-integration.test.ts +542 -0
- package/src/modules/omnibridge/__tests__/parallel-executor.property.test.ts +671 -0
- package/src/modules/omnibridge/__tests__/semantic-normalizer.property.test.ts +521 -0
- package/src/modules/omnibridge/__tests__/semantic-normalizer.test.ts +254 -0
- package/src/modules/omnibridge/__tests__/session-vault.property.test.ts +367 -0
- package/src/modules/omnibridge/__tests__/shadow-session.property.test.ts +523 -0
- package/src/modules/omnibridge/__tests__/triangulation-engine.property.test.ts +292 -0
- package/src/modules/omnibridge/__tests__/verification-engine.property.test.ts +769 -0
- package/src/modules/omnibridge/api/.gitkeep +1 -0
- package/src/modules/omnibridge/api/ghost-api.ts +1087 -0
- package/src/modules/omnibridge/auth/.gitkeep +1 -0
- package/src/modules/omnibridge/auth/auth-tunnel.ts +843 -0
- package/src/modules/omnibridge/auth/session-vault.ts +577 -0
- package/src/modules/omnibridge/core/.gitkeep +1 -0
- package/src/modules/omnibridge/core/semantic-normalizer.ts +702 -0
- package/src/modules/omnibridge/core/triangulation-engine.ts +530 -0
- package/src/modules/omnibridge/core/types.ts +610 -0
- package/src/modules/omnibridge/execution/.gitkeep +1 -0
- package/src/modules/omnibridge/execution/deterministic-logger.ts +629 -0
- package/src/modules/omnibridge/execution/parallel-executor.ts +542 -0
- package/src/modules/omnibridge/execution/shadow-session.ts +794 -0
- package/src/modules/omnibridge/index.ts +212 -0
- package/src/modules/omnibridge/omnibridge.ts +510 -0
- package/src/modules/omnibridge/verification/.gitkeep +1 -0
- package/src/modules/omnibridge/verification/verification-engine.ts +783 -0
- package/src/outcomes/README.md +75 -0
- package/src/outcomes/acquire-pilot-customer.ts +297 -0
- package/src/outcomes/code-delivery-outcomes.ts +89 -0
- package/src/outcomes/code-outcomes.ts +256 -0
- package/src/outcomes/code_review_battle.test.ts +135 -0
- package/src/outcomes/code_review_battle.ts +135 -0
- package/src/outcomes/cold_email_battle.ts +97 -0
- package/src/outcomes/content_creation_battle.ts +160 -0
- package/src/outcomes/f1_stem_opt_compliance.ts +61 -0
- package/src/outcomes/index.ts +107 -0
- package/src/outcomes/lead_gen_battle.test.ts +113 -0
- package/src/outcomes/lead_gen_battle.ts +99 -0
- package/src/outcomes/outcome.schema.property.test.ts +229 -0
- package/src/outcomes/outcome.schema.ts +187 -0
- package/src/outcomes/qualified_sales_interest.ts +118 -0
- package/src/outcomes/swarm_planner.property.test.ts +370 -0
- package/src/outcomes/swarm_planner.ts +96 -0
- package/src/outcomes/web_extraction.ts +234 -0
- package/src/runtime/README.md +220 -0
- package/src/runtime/agentRunner.test.ts +341 -0
- package/src/runtime/agentRunner.ts +746 -0
- package/src/runtime/claudeAdapter.ts +232 -0
- package/src/runtime/costTracker.ts +123 -0
- package/src/runtime/index.ts +34 -0
- package/src/runtime/modelAdapter.property.test.ts +305 -0
- package/src/runtime/modelAdapter.ts +144 -0
- package/src/runtime/openaiAdapter.ts +235 -0
- package/src/utils/README.md +122 -0
- package/src/utils/command-runner.ts +134 -0
- package/src/utils/cost-guard.ts +379 -0
- package/src/utils/errors.test.ts +290 -0
- package/src/utils/errors.ts +442 -0
- package/src/utils/index.ts +37 -0
- package/src/utils/logger.test.ts +361 -0
- package/src/utils/logger.ts +419 -0
- package/src/utils/output-parsers.ts +216 -0
|
@@ -0,0 +1,629 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Deterministic Logger
|
|
3
|
+
*
|
|
4
|
+
* Records every DOM interaction for verification and audit purposes.
|
|
5
|
+
* Provides cryptographic proofs of action sequences and supports
|
|
6
|
+
* diff analysis between competing agents.
|
|
7
|
+
*
|
|
8
|
+
* Requirements: 7.3, 8.1, 8.2, 8.3, 8.4, 8.5, 8.6
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { createHash } from 'node:crypto';
|
|
12
|
+
import type {
|
|
13
|
+
ActionLogEntry,
|
|
14
|
+
VerificationProof,
|
|
15
|
+
DiffAnalysis,
|
|
16
|
+
} from '../core/types.js';
|
|
17
|
+
|
|
18
|
+
// =============================================================================
|
|
19
|
+
// Types
|
|
20
|
+
// =============================================================================
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Configuration for the Deterministic Logger.
|
|
24
|
+
*/
|
|
25
|
+
export interface DeterministicLoggerConfig {
|
|
26
|
+
/** Maximum number of screenshots to store per session */
|
|
27
|
+
maxScreenshotsPerSession: number;
|
|
28
|
+
/** Whether to capture screenshots at key decision points */
|
|
29
|
+
captureScreenshots: boolean;
|
|
30
|
+
/** Hash algorithm to use for verification proofs */
|
|
31
|
+
hashAlgorithm: 'sha256' | 'sha384' | 'sha512';
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Result of logging an action.
|
|
36
|
+
*/
|
|
37
|
+
export interface LogActionResult {
|
|
38
|
+
/** Whether the action was logged successfully */
|
|
39
|
+
success: boolean;
|
|
40
|
+
/** The logged entry */
|
|
41
|
+
entry?: ActionLogEntry;
|
|
42
|
+
/** Error message if logging failed */
|
|
43
|
+
error?: string;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Result of generating a verification proof.
|
|
48
|
+
*/
|
|
49
|
+
export interface GenerateProofResult {
|
|
50
|
+
/** Whether proof generation succeeded */
|
|
51
|
+
success: boolean;
|
|
52
|
+
/** The generated proof */
|
|
53
|
+
proof?: VerificationProof;
|
|
54
|
+
/** Error message if generation failed */
|
|
55
|
+
error?: string;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Result of comparing two agents' action logs.
|
|
60
|
+
*/
|
|
61
|
+
export interface CompareAgentsResult {
|
|
62
|
+
/** Whether comparison succeeded */
|
|
63
|
+
success: boolean;
|
|
64
|
+
/** The diff analysis */
|
|
65
|
+
analysis?: DiffAnalysis;
|
|
66
|
+
/** Error message if comparison failed */
|
|
67
|
+
error?: string;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Key decision point types that trigger screenshot capture.
|
|
72
|
+
*/
|
|
73
|
+
export type KeyDecisionPoint =
|
|
74
|
+
| 'form_submit'
|
|
75
|
+
| 'navigation'
|
|
76
|
+
| 'authentication'
|
|
77
|
+
| 'data_extraction'
|
|
78
|
+
| 'error_recovery'
|
|
79
|
+
| 'mfa_challenge';
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Screenshot metadata.
|
|
83
|
+
*/
|
|
84
|
+
export interface ScreenshotMetadata {
|
|
85
|
+
/** Timestamp when screenshot was taken */
|
|
86
|
+
timestamp: number;
|
|
87
|
+
/** Session ID */
|
|
88
|
+
sessionId: string;
|
|
89
|
+
/** Action index that triggered the screenshot */
|
|
90
|
+
actionIndex: number;
|
|
91
|
+
/** Type of decision point */
|
|
92
|
+
decisionPoint: KeyDecisionPoint;
|
|
93
|
+
/** Intent ID of the element involved */
|
|
94
|
+
intentId: string;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// =============================================================================
|
|
98
|
+
// Default Configuration
|
|
99
|
+
// =============================================================================
|
|
100
|
+
|
|
101
|
+
const DEFAULT_CONFIG: DeterministicLoggerConfig = {
|
|
102
|
+
maxScreenshotsPerSession: 50,
|
|
103
|
+
captureScreenshots: true,
|
|
104
|
+
hashAlgorithm: 'sha256',
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
// =============================================================================
|
|
108
|
+
// Deterministic Logger Implementation
|
|
109
|
+
// =============================================================================
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Deterministic Logger for recording and verifying agent actions.
|
|
113
|
+
*/
|
|
114
|
+
export class DeterministicLogger {
|
|
115
|
+
private config: DeterministicLoggerConfig;
|
|
116
|
+
private logs: Map<string, ActionLogEntry[]>;
|
|
117
|
+
private screenshots: Map<string, ScreenshotMetadata[]>;
|
|
118
|
+
|
|
119
|
+
constructor(config: Partial<DeterministicLoggerConfig> = {}) {
|
|
120
|
+
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
121
|
+
this.logs = new Map();
|
|
122
|
+
this.screenshots = new Map();
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// ===========================================================================
|
|
126
|
+
// Action Logging
|
|
127
|
+
// ===========================================================================
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Log a DOM interaction action.
|
|
131
|
+
*
|
|
132
|
+
* Requirements: 7.3, 8.1, 8.3
|
|
133
|
+
*/
|
|
134
|
+
logAction(entry: Omit<ActionLogEntry, 'timestamp'>): LogActionResult {
|
|
135
|
+
try {
|
|
136
|
+
// Validate required fields
|
|
137
|
+
if (!entry.sessionId || !entry.intentId || !entry.action) {
|
|
138
|
+
return {
|
|
139
|
+
success: false,
|
|
140
|
+
error: 'Missing required fields: sessionId, intentId, or action',
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Create the complete entry with timestamp
|
|
145
|
+
const completeEntry: ActionLogEntry = {
|
|
146
|
+
...entry,
|
|
147
|
+
timestamp: Date.now(),
|
|
148
|
+
};
|
|
149
|
+
|
|
150
|
+
// Get or create the log for this session
|
|
151
|
+
const sessionLog = this.logs.get(entry.sessionId) || [];
|
|
152
|
+
sessionLog.push(completeEntry);
|
|
153
|
+
this.logs.set(entry.sessionId, sessionLog);
|
|
154
|
+
|
|
155
|
+
// Track screenshot if present
|
|
156
|
+
if (completeEntry.screenshot && this.config.captureScreenshots) {
|
|
157
|
+
this.trackScreenshot(completeEntry, sessionLog.length - 1);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
return {
|
|
161
|
+
success: true,
|
|
162
|
+
entry: completeEntry,
|
|
163
|
+
};
|
|
164
|
+
} catch (error) {
|
|
165
|
+
return {
|
|
166
|
+
success: false,
|
|
167
|
+
error: error instanceof Error ? error.message : 'Unknown error',
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Log an action with a screenshot at a key decision point.
|
|
174
|
+
*
|
|
175
|
+
* Requirements: 8.3
|
|
176
|
+
*/
|
|
177
|
+
logActionWithScreenshot(
|
|
178
|
+
entry: Omit<ActionLogEntry, 'timestamp'>,
|
|
179
|
+
screenshot: string,
|
|
180
|
+
_decisionPoint: KeyDecisionPoint
|
|
181
|
+
): LogActionResult {
|
|
182
|
+
// Check screenshot limit
|
|
183
|
+
const sessionScreenshots = this.screenshots.get(entry.sessionId) || [];
|
|
184
|
+
if (sessionScreenshots.length >= this.config.maxScreenshotsPerSession) {
|
|
185
|
+
// Log without screenshot if limit reached
|
|
186
|
+
return this.logAction(entry);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
return this.logAction({
|
|
190
|
+
...entry,
|
|
191
|
+
screenshot,
|
|
192
|
+
});
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/**
|
|
196
|
+
* Track screenshot metadata.
|
|
197
|
+
*/
|
|
198
|
+
private trackScreenshot(entry: ActionLogEntry, actionIndex: number): void {
|
|
199
|
+
const metadata: ScreenshotMetadata = {
|
|
200
|
+
timestamp: entry.timestamp,
|
|
201
|
+
sessionId: entry.sessionId,
|
|
202
|
+
actionIndex,
|
|
203
|
+
decisionPoint: this.inferDecisionPoint(entry),
|
|
204
|
+
intentId: entry.intentId,
|
|
205
|
+
};
|
|
206
|
+
|
|
207
|
+
const sessionScreenshots = this.screenshots.get(entry.sessionId) || [];
|
|
208
|
+
sessionScreenshots.push(metadata);
|
|
209
|
+
this.screenshots.set(entry.sessionId, sessionScreenshots);
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
/**
|
|
213
|
+
* Infer the decision point type from an action entry.
|
|
214
|
+
*/
|
|
215
|
+
private inferDecisionPoint(entry: ActionLogEntry): KeyDecisionPoint {
|
|
216
|
+
const intentLower = entry.intentId.toLowerCase();
|
|
217
|
+
|
|
218
|
+
if (intentLower.includes('submit') || intentLower.includes('form')) {
|
|
219
|
+
return 'form_submit';
|
|
220
|
+
}
|
|
221
|
+
if (entry.action === 'navigate' || intentLower.includes('nav')) {
|
|
222
|
+
return 'navigation';
|
|
223
|
+
}
|
|
224
|
+
if (intentLower.includes('login') || intentLower.includes('auth')) {
|
|
225
|
+
return 'authentication';
|
|
226
|
+
}
|
|
227
|
+
if (entry.action === 'extract' || intentLower.includes('data')) {
|
|
228
|
+
return 'data_extraction';
|
|
229
|
+
}
|
|
230
|
+
if (intentLower.includes('mfa') || intentLower.includes('2fa')) {
|
|
231
|
+
return 'mfa_challenge';
|
|
232
|
+
}
|
|
233
|
+
if (entry.result === 'failure') {
|
|
234
|
+
return 'error_recovery';
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
return 'data_extraction'; // Default
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
/**
|
|
241
|
+
* Get the action log for a session.
|
|
242
|
+
*
|
|
243
|
+
* Requirements: 7.3, 8.1
|
|
244
|
+
*/
|
|
245
|
+
getLog(sessionId: string): ActionLogEntry[] {
|
|
246
|
+
return this.logs.get(sessionId) || [];
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/**
|
|
250
|
+
* Get all screenshots for a session.
|
|
251
|
+
*/
|
|
252
|
+
getScreenshots(sessionId: string): string[] {
|
|
253
|
+
const log = this.getLog(sessionId);
|
|
254
|
+
return log
|
|
255
|
+
.filter((entry) => entry.screenshot)
|
|
256
|
+
.map((entry) => entry.screenshot!);
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
/**
|
|
260
|
+
* Get screenshot metadata for a session.
|
|
261
|
+
*/
|
|
262
|
+
getScreenshotMetadata(sessionId: string): ScreenshotMetadata[] {
|
|
263
|
+
return this.screenshots.get(sessionId) || [];
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
// ===========================================================================
|
|
267
|
+
// Cryptographic Proof Generation
|
|
268
|
+
// ===========================================================================
|
|
269
|
+
|
|
270
|
+
/**
|
|
271
|
+
* Generate a cryptographic proof of the action sequence.
|
|
272
|
+
*
|
|
273
|
+
* Requirements: 8.2
|
|
274
|
+
*/
|
|
275
|
+
generateProof(sessionId: string, claimedResult: unknown): GenerateProofResult {
|
|
276
|
+
try {
|
|
277
|
+
const log = this.getLog(sessionId);
|
|
278
|
+
|
|
279
|
+
if (log.length === 0) {
|
|
280
|
+
return {
|
|
281
|
+
success: false,
|
|
282
|
+
error: `No action log found for session: ${sessionId}`,
|
|
283
|
+
};
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
// Generate hash of the action sequence
|
|
287
|
+
const hash = this.hashActionSequence(log);
|
|
288
|
+
|
|
289
|
+
// Get screenshots at key decision points
|
|
290
|
+
const screenshots = this.getScreenshots(sessionId);
|
|
291
|
+
|
|
292
|
+
// Check if claimed result matches actions
|
|
293
|
+
const resultMatchesActions = this.validateResultAgainstActions(
|
|
294
|
+
claimedResult,
|
|
295
|
+
log
|
|
296
|
+
);
|
|
297
|
+
|
|
298
|
+
const proof: VerificationProof = {
|
|
299
|
+
sessionId,
|
|
300
|
+
actionCount: log.length,
|
|
301
|
+
hash,
|
|
302
|
+
screenshots,
|
|
303
|
+
claimedResult,
|
|
304
|
+
resultMatchesActions,
|
|
305
|
+
};
|
|
306
|
+
|
|
307
|
+
return {
|
|
308
|
+
success: true,
|
|
309
|
+
proof,
|
|
310
|
+
};
|
|
311
|
+
} catch (error) {
|
|
312
|
+
return {
|
|
313
|
+
success: false,
|
|
314
|
+
error: error instanceof Error ? error.message : 'Unknown error',
|
|
315
|
+
};
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
/**
|
|
320
|
+
* Hash an action sequence to create a cryptographic proof.
|
|
321
|
+
*
|
|
322
|
+
* Requirements: 8.2
|
|
323
|
+
*/
|
|
324
|
+
hashActionSequence(log: ActionLogEntry[]): string {
|
|
325
|
+
// Create a deterministic string representation of the action sequence
|
|
326
|
+
const serialized = log
|
|
327
|
+
.map((entry) => this.serializeActionEntry(entry))
|
|
328
|
+
.join('|');
|
|
329
|
+
|
|
330
|
+
// Generate hash
|
|
331
|
+
const hash = createHash(this.config.hashAlgorithm);
|
|
332
|
+
hash.update(serialized);
|
|
333
|
+
return hash.digest('hex');
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
/**
|
|
337
|
+
* Serialize an action entry for hashing.
|
|
338
|
+
* Excludes screenshots and timestamps to keep hash deterministic and focused on actions.
|
|
339
|
+
*/
|
|
340
|
+
private serializeActionEntry(entry: ActionLogEntry): string {
|
|
341
|
+
return JSON.stringify({
|
|
342
|
+
// Note: timestamp excluded for deterministic hashing
|
|
343
|
+
sessionId: entry.sessionId,
|
|
344
|
+
action: entry.action,
|
|
345
|
+
intentId: entry.intentId,
|
|
346
|
+
value: entry.value,
|
|
347
|
+
result: entry.result,
|
|
348
|
+
});
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
/**
|
|
352
|
+
* Validate that a claimed result can be derived from the action log.
|
|
353
|
+
*
|
|
354
|
+
* Requirements: 8.5
|
|
355
|
+
*/
|
|
356
|
+
validateResultAgainstActions(
|
|
357
|
+
claimedResult: unknown,
|
|
358
|
+
log: ActionLogEntry[]
|
|
359
|
+
): boolean {
|
|
360
|
+
// If no result claimed, it's valid (no claim to verify)
|
|
361
|
+
if (claimedResult === null || claimedResult === undefined) {
|
|
362
|
+
return true;
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
// If no actions, result cannot be derived
|
|
366
|
+
if (log.length === 0) {
|
|
367
|
+
return false;
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
// Check for extract actions that could produce the result
|
|
371
|
+
const extractActions = log.filter((entry) => entry.action === 'extract');
|
|
372
|
+
|
|
373
|
+
// If claiming data but no extract actions, it's a hallucination
|
|
374
|
+
if (typeof claimedResult === 'object' && extractActions.length === 0) {
|
|
375
|
+
// Check if result contains data fields
|
|
376
|
+
const resultObj = claimedResult as Record<string, unknown>;
|
|
377
|
+
const hasDataFields =
|
|
378
|
+
'data' in resultObj ||
|
|
379
|
+
Object.keys(resultObj).some(
|
|
380
|
+
(key) =>
|
|
381
|
+
!['metadata', 'verificationHash', 'confidence'].includes(key)
|
|
382
|
+
);
|
|
383
|
+
|
|
384
|
+
if (hasDataFields) {
|
|
385
|
+
return false;
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
// Check for failed actions that would prevent result
|
|
390
|
+
const failedActions = log.filter((entry) => entry.result === 'failure');
|
|
391
|
+
const successfulActions = log.filter((entry) => entry.result === 'success');
|
|
392
|
+
|
|
393
|
+
// If all actions failed, result cannot be valid
|
|
394
|
+
if (successfulActions.length === 0 && failedActions.length > 0) {
|
|
395
|
+
return false;
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
// Basic validation passed
|
|
399
|
+
return true;
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
/**
|
|
403
|
+
* Verify that a proof hash matches the current action log.
|
|
404
|
+
*
|
|
405
|
+
* Requirements: 8.2
|
|
406
|
+
*/
|
|
407
|
+
verifyProof(proof: VerificationProof): boolean {
|
|
408
|
+
const log = this.getLog(proof.sessionId);
|
|
409
|
+
|
|
410
|
+
if (log.length !== proof.actionCount) {
|
|
411
|
+
return false;
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
const currentHash = this.hashActionSequence(log);
|
|
415
|
+
return currentHash === proof.hash;
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
// ===========================================================================
|
|
419
|
+
// Diff Analysis
|
|
420
|
+
// ===========================================================================
|
|
421
|
+
|
|
422
|
+
/**
|
|
423
|
+
* Compare action logs of two agents to find divergence.
|
|
424
|
+
*
|
|
425
|
+
* Requirements: 8.4
|
|
426
|
+
*/
|
|
427
|
+
compareAgents(agentASessionId: string, agentBSessionId: string): CompareAgentsResult {
|
|
428
|
+
try {
|
|
429
|
+
const logA = this.getLog(agentASessionId);
|
|
430
|
+
const logB = this.getLog(agentBSessionId);
|
|
431
|
+
|
|
432
|
+
if (logA.length === 0 && logB.length === 0) {
|
|
433
|
+
return {
|
|
434
|
+
success: false,
|
|
435
|
+
error: 'Both agents have empty action logs',
|
|
436
|
+
};
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
// Find divergence point
|
|
440
|
+
const divergencePoint = this.findDivergencePoint(logA, logB);
|
|
441
|
+
|
|
442
|
+
// Get paths from divergence
|
|
443
|
+
const agentAPath = logA.slice(divergencePoint);
|
|
444
|
+
const agentBPath = logB.slice(divergencePoint);
|
|
445
|
+
|
|
446
|
+
// Determine recommendation
|
|
447
|
+
const recommendation = this.determineRecommendation(
|
|
448
|
+
logA,
|
|
449
|
+
logB,
|
|
450
|
+
divergencePoint
|
|
451
|
+
);
|
|
452
|
+
|
|
453
|
+
const analysis: DiffAnalysis = {
|
|
454
|
+
divergencePoint,
|
|
455
|
+
agentAPath,
|
|
456
|
+
agentBPath,
|
|
457
|
+
recommendation,
|
|
458
|
+
};
|
|
459
|
+
|
|
460
|
+
return {
|
|
461
|
+
success: true,
|
|
462
|
+
analysis,
|
|
463
|
+
};
|
|
464
|
+
} catch (error) {
|
|
465
|
+
return {
|
|
466
|
+
success: false,
|
|
467
|
+
error: error instanceof Error ? error.message : 'Unknown error',
|
|
468
|
+
};
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
/**
|
|
473
|
+
* Find the index where two action logs diverge.
|
|
474
|
+
*/
|
|
475
|
+
private findDivergencePoint(
|
|
476
|
+
logA: ActionLogEntry[],
|
|
477
|
+
logB: ActionLogEntry[]
|
|
478
|
+
): number {
|
|
479
|
+
const minLength = Math.min(logA.length, logB.length);
|
|
480
|
+
|
|
481
|
+
for (let i = 0; i < minLength; i++) {
|
|
482
|
+
if (!this.actionsMatch(logA[i], logB[i])) {
|
|
483
|
+
return i;
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
// If one log is longer, divergence is at the end of the shorter one
|
|
488
|
+
if (logA.length !== logB.length) {
|
|
489
|
+
return minLength;
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
// Logs are identical
|
|
493
|
+
return logA.length;
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
/**
|
|
497
|
+
* Check if two action entries match (ignoring timestamps).
|
|
498
|
+
*/
|
|
499
|
+
private actionsMatch(a: ActionLogEntry, b: ActionLogEntry): boolean {
|
|
500
|
+
return (
|
|
501
|
+
a.action === b.action &&
|
|
502
|
+
a.intentId === b.intentId &&
|
|
503
|
+
a.value === b.value &&
|
|
504
|
+
a.result === b.result
|
|
505
|
+
);
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
/**
|
|
509
|
+
* Determine recommendation based on action log analysis.
|
|
510
|
+
*/
|
|
511
|
+
private determineRecommendation(
|
|
512
|
+
logA: ActionLogEntry[],
|
|
513
|
+
logB: ActionLogEntry[],
|
|
514
|
+
divergencePoint: number
|
|
515
|
+
): 'agent_a' | 'agent_b' | 'tie' | 'both_invalid' {
|
|
516
|
+
// Count successful actions after divergence
|
|
517
|
+
const successA = logA
|
|
518
|
+
.slice(divergencePoint)
|
|
519
|
+
.filter((e) => e.result === 'success').length;
|
|
520
|
+
const successB = logB
|
|
521
|
+
.slice(divergencePoint)
|
|
522
|
+
.filter((e) => e.result === 'success').length;
|
|
523
|
+
|
|
524
|
+
// Count failures
|
|
525
|
+
const failuresA = logA.filter((e) => e.result === 'failure').length;
|
|
526
|
+
const failuresB = logB.filter((e) => e.result === 'failure').length;
|
|
527
|
+
|
|
528
|
+
// If both have only failures, both invalid
|
|
529
|
+
if (
|
|
530
|
+
logA.every((e) => e.result === 'failure') &&
|
|
531
|
+
logB.every((e) => e.result === 'failure')
|
|
532
|
+
) {
|
|
533
|
+
return 'both_invalid';
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
// If one has all failures, the other wins
|
|
537
|
+
if (logA.every((e) => e.result === 'failure')) {
|
|
538
|
+
return 'agent_b';
|
|
539
|
+
}
|
|
540
|
+
if (logB.every((e) => e.result === 'failure')) {
|
|
541
|
+
return 'agent_a';
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
// Compare success rates
|
|
545
|
+
if (successA > successB) {
|
|
546
|
+
return 'agent_a';
|
|
547
|
+
}
|
|
548
|
+
if (successB > successA) {
|
|
549
|
+
return 'agent_b';
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
// If equal successes, prefer fewer failures
|
|
553
|
+
if (failuresA < failuresB) {
|
|
554
|
+
return 'agent_a';
|
|
555
|
+
}
|
|
556
|
+
if (failuresB < failuresA) {
|
|
557
|
+
return 'agent_b';
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
// Truly tied
|
|
561
|
+
return 'tie';
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
// ===========================================================================
|
|
565
|
+
// Utility Methods
|
|
566
|
+
// ===========================================================================
|
|
567
|
+
|
|
568
|
+
/**
|
|
569
|
+
* Clear the log for a session.
|
|
570
|
+
*/
|
|
571
|
+
clearLog(sessionId: string): void {
|
|
572
|
+
this.logs.delete(sessionId);
|
|
573
|
+
this.screenshots.delete(sessionId);
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
/**
|
|
577
|
+
* Clear all logs.
|
|
578
|
+
*/
|
|
579
|
+
clearAll(): void {
|
|
580
|
+
this.logs.clear();
|
|
581
|
+
this.screenshots.clear();
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
/**
|
|
585
|
+
* Get the total number of actions logged across all sessions.
|
|
586
|
+
*/
|
|
587
|
+
getTotalActionCount(): number {
|
|
588
|
+
let total = 0;
|
|
589
|
+
for (const log of this.logs.values()) {
|
|
590
|
+
total += log.length;
|
|
591
|
+
}
|
|
592
|
+
return total;
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
/**
|
|
596
|
+
* Get all session IDs with logs.
|
|
597
|
+
*/
|
|
598
|
+
getSessionIds(): string[] {
|
|
599
|
+
return Array.from(this.logs.keys());
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
/**
|
|
603
|
+
* Check if a session has any logged actions.
|
|
604
|
+
*/
|
|
605
|
+
hasLog(sessionId: string): boolean {
|
|
606
|
+
const log = this.logs.get(sessionId);
|
|
607
|
+
return log !== undefined && log.length > 0;
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
/**
|
|
611
|
+
* Get the configuration.
|
|
612
|
+
*/
|
|
613
|
+
getConfig(): DeterministicLoggerConfig {
|
|
614
|
+
return { ...this.config };
|
|
615
|
+
}
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
// =============================================================================
|
|
619
|
+
// Factory Function
|
|
620
|
+
// =============================================================================
|
|
621
|
+
|
|
622
|
+
/**
|
|
623
|
+
* Create a new Deterministic Logger instance.
|
|
624
|
+
*/
|
|
625
|
+
export function createDeterministicLogger(
|
|
626
|
+
config: Partial<DeterministicLoggerConfig> = {}
|
|
627
|
+
): DeterministicLogger {
|
|
628
|
+
return new DeterministicLogger(config);
|
|
629
|
+
}
|