erosolar-cli 1.7.27 → 1.7.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser/BrowserSessionManager.js +1 -1
- package/dist/browser/BrowserSessionManager.js.map +1 -1
- package/dist/core/agentRulebook.js +1 -1
- package/dist/core/agentRulebook.js.map +1 -1
- package/dist/core/contextManager.js +2 -2
- package/dist/core/contextManager.js.map +1 -1
- package/dist/core/isolatedVerifier.d.ts +18 -0
- package/dist/core/isolatedVerifier.d.ts.map +1 -0
- package/dist/core/isolatedVerifier.js +349 -0
- package/dist/core/isolatedVerifier.js.map +1 -0
- package/dist/core/responseVerifier.d.ts +11 -2
- package/dist/core/responseVerifier.d.ts.map +1 -1
- package/dist/core/responseVerifier.js +108 -73
- package/dist/core/responseVerifier.js.map +1 -1
- package/dist/intelligence/testGenerator.js +2 -2
- package/dist/intelligence/testGenerator.js.map +1 -1
- package/dist/providers/openaiChatCompletionsProvider.js +5 -5
- package/dist/providers/openaiChatCompletionsProvider.js.map +1 -1
- package/dist/security/active-stack-security.js +2 -2
- package/dist/security/active-stack-security.js.map +1 -1
- package/dist/shell/bracketedPasteManager.enhanced.d.ts +2 -0
- package/dist/shell/bracketedPasteManager.enhanced.d.ts.map +1 -0
- package/dist/shell/bracketedPasteManager.enhanced.js +4 -0
- package/dist/shell/bracketedPasteManager.enhanced.js.map +1 -0
- package/dist/shell/inputProcessor.js +2 -2
- package/dist/shell/inputProcessor.js.map +1 -1
- package/dist/shell/interactiveShell.d.ts +16 -2
- package/dist/shell/interactiveShell.d.ts.map +1 -1
- package/dist/shell/interactiveShell.js +65 -22
- package/dist/shell/interactiveShell.js.map +1 -1
- package/dist/tools/cloudTools.js +2 -2
- package/dist/tools/cloudTools.js.map +1 -1
- package/dist/tools/devTools.js +2 -2
- package/dist/tools/devTools.js.map +1 -1
- package/dist/tools/diffUtils.js +22 -2
- package/dist/tools/diffUtils.js.map +1 -1
- package/dist/tools/fileTools.d.ts.map +1 -1
- package/dist/tools/fileTools.js +20 -2
- package/dist/tools/fileTools.js.map +1 -1
- package/dist/tools/frontendTestingTools.js +1 -1
- package/dist/tools/frontendTestingTools.js.map +1 -1
- package/dist/tools/globTools.js +1 -1
- package/dist/tools/globTools.js.map +1 -1
- package/dist/tools/grepTools.js +1 -1
- package/dist/tools/grepTools.js.map +1 -1
- package/dist/tools/webTools.js +2 -2
- package/dist/tools/webTools.js.map +1 -1
- package/dist/ui/ShellUIAdapter.d.ts +5 -0
- package/dist/ui/ShellUIAdapter.d.ts.map +1 -1
- package/dist/ui/ShellUIAdapter.js +30 -7
- package/dist/ui/ShellUIAdapter.js.map +1 -1
- package/dist/ui/advancedTheme.js +1 -1
- package/dist/ui/advancedTheme.js.map +1 -1
- package/dist/ui/diffViewer.js +1 -1
- package/dist/ui/diffViewer.js.map +1 -1
- package/dist/ui/display.js +3 -3
- package/dist/ui/display.js.map +1 -1
- package/dist/ui/logFormatter.js +1 -1
- package/dist/ui/logFormatter.js.map +1 -1
- package/dist/ui/outputSummarizer.js +6 -7
- package/dist/ui/outputSummarizer.js.map +1 -1
- package/dist/ui/persistentPrompt.js +4 -4
- package/dist/ui/persistentPrompt.js.map +1 -1
- package/dist/ui/tableFormatter.js +4 -4
- package/dist/ui/tableFormatter.js.map +1 -1
- package/dist/ui/textHighlighter.js +2 -2
- package/dist/ui/textHighlighter.js.map +1 -1
- package/dist/ui/toolDisplay.d.ts +18 -0
- package/dist/ui/toolDisplay.d.ts.map +1 -1
- package/dist/ui/toolDisplay.js +270 -6
- package/dist/ui/toolDisplay.js.map +1 -1
- package/dist/ui/treeVisualizer.js +1 -1
- package/dist/ui/treeVisualizer.js.map +1 -1
- package/package.json +12 -3
- package/scripts/isolated-verification-runner.mjs +364 -0
- package/scripts/isolated-verification-wrapper.mjs +276 -0
|
@@ -0,0 +1,349 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Isolated Verification Runner
|
|
4
|
+
*
|
|
5
|
+
* This script runs in a completely separate Node.js process to verify
|
|
6
|
+
* assistant responses. It receives verification requests via stdin and
|
|
7
|
+
* outputs results via stdout.
|
|
8
|
+
*
|
|
9
|
+
* This ensures verification is completely isolated from the main CLI process:
|
|
10
|
+
* - Separate memory space
|
|
11
|
+
* - Separate event loop
|
|
12
|
+
* - Independent error handling
|
|
13
|
+
* - No shared state with main process
|
|
14
|
+
*
|
|
15
|
+
* @license MIT
|
|
16
|
+
*/
|
|
17
|
+
import { createProvider } from '../providers/providerFactory.js';
|
|
18
|
+
import { exec } from 'node:child_process';
|
|
19
|
+
import { promisify } from 'node:util';
|
|
20
|
+
const execAsync = promisify(exec);
|
|
21
|
+
// ============================================================================
|
|
22
|
+
// CLAIM EXTRACTION
|
|
23
|
+
// ============================================================================
|
|
24
|
+
const EXTRACT_CLAIMS_PROMPT = `Extract ALL verifiable claims from this AI assistant response.
|
|
25
|
+
|
|
26
|
+
RESPONSE:
|
|
27
|
+
---
|
|
28
|
+
{RESPONSE}
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
CONTEXT: {CONTEXT}
|
|
32
|
+
WORKING_DIR: {WORKING_DIR}
|
|
33
|
+
|
|
34
|
+
For each claim, determine:
|
|
35
|
+
1. What specific assertion is being made
|
|
36
|
+
2. Category: file_op (created/modified/deleted files), code (compiles/tests pass), command (executed successfully), state (something changed), behavior (feature works), fact (verifiable truth)
|
|
37
|
+
3. How it can be verified (shell command, file check, etc.)
|
|
38
|
+
4. Priority: critical (must verify), high (should verify), medium (nice to verify), low (optional)
|
|
39
|
+
|
|
40
|
+
Return JSON array:
|
|
41
|
+
[{
|
|
42
|
+
"id": "c1",
|
|
43
|
+
"statement": "the specific claim",
|
|
44
|
+
"category": "file_op|code|command|state|behavior|fact",
|
|
45
|
+
"verifiable": true,
|
|
46
|
+
"priority": "critical|high|medium|low",
|
|
47
|
+
"context": {"path": "/path/if/relevant", "command": "if relevant"}
|
|
48
|
+
}]
|
|
49
|
+
|
|
50
|
+
Output ONLY valid JSON array.`;
|
|
51
|
+
async function extractClaims(response, workingDir, conversationHistory, llmQuery) {
|
|
52
|
+
try {
|
|
53
|
+
const prompt = EXTRACT_CLAIMS_PROMPT
|
|
54
|
+
.replace('{RESPONSE}', response.slice(0, 8000))
|
|
55
|
+
.replace('{CONTEXT}', conversationHistory.slice(-3).join('\n') || '')
|
|
56
|
+
.replace('{WORKING_DIR}', workingDir);
|
|
57
|
+
const result = await llmQuery(prompt);
|
|
58
|
+
const match = result.match(/\[[\s\S]*\]/);
|
|
59
|
+
if (match) {
|
|
60
|
+
return JSON.parse(match[0]);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
catch {
|
|
64
|
+
// Fall through
|
|
65
|
+
}
|
|
66
|
+
return [];
|
|
67
|
+
}
|
|
68
|
+
// ============================================================================
|
|
69
|
+
// TEST GENERATION
|
|
70
|
+
// ============================================================================
|
|
71
|
+
const GENERATE_TESTS_PROMPT = `Generate isolated runtime tests for these claims.
|
|
72
|
+
|
|
73
|
+
CLAIMS:
|
|
74
|
+
{CLAIMS}
|
|
75
|
+
|
|
76
|
+
WORKING_DIR: {WORKING_DIR}
|
|
77
|
+
PLATFORM: {PLATFORM}
|
|
78
|
+
|
|
79
|
+
For each claim, generate a test that verifies it using:
|
|
80
|
+
- Shell commands (for file checks, git status, etc.)
|
|
81
|
+
- Expected output patterns
|
|
82
|
+
|
|
83
|
+
Return JSON array:
|
|
84
|
+
[{
|
|
85
|
+
"id": "test-1",
|
|
86
|
+
"description": "what we're testing",
|
|
87
|
+
"shellCommands": ["ls -la path", "cat file"],
|
|
88
|
+
"expectedOutputs": ["pattern1", "pattern2"],
|
|
89
|
+
"expectedBehavior": "description for LLM assessment",
|
|
90
|
+
"timeout": 30000
|
|
91
|
+
}]
|
|
92
|
+
|
|
93
|
+
Use READ-ONLY commands only. No destructive operations.
|
|
94
|
+
Output ONLY valid JSON array.`;
|
|
95
|
+
async function generateTests(claims, workingDir, llmQuery) {
|
|
96
|
+
if (claims.length === 0)
|
|
97
|
+
return [];
|
|
98
|
+
try {
|
|
99
|
+
const prompt = GENERATE_TESTS_PROMPT
|
|
100
|
+
.replace('{CLAIMS}', JSON.stringify(claims.slice(0, 10)))
|
|
101
|
+
.replace('{WORKING_DIR}', workingDir)
|
|
102
|
+
.replace('{PLATFORM}', process.platform);
|
|
103
|
+
const result = await llmQuery(prompt);
|
|
104
|
+
const match = result.match(/\[[\s\S]*\]/);
|
|
105
|
+
if (match) {
|
|
106
|
+
return JSON.parse(match[0]);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
catch {
|
|
110
|
+
// Fall through to basic tests
|
|
111
|
+
}
|
|
112
|
+
// Fallback: generate basic tests
|
|
113
|
+
return claims.filter(c => c.verifiable && (c.priority === 'critical' || c.priority === 'high')).map((c, i) => {
|
|
114
|
+
const test = {
|
|
115
|
+
id: `test-${i}`,
|
|
116
|
+
description: c.statement,
|
|
117
|
+
shellCommands: [],
|
|
118
|
+
expectedBehavior: c.statement,
|
|
119
|
+
timeout: 30000
|
|
120
|
+
};
|
|
121
|
+
// Add basic verification based on category
|
|
122
|
+
if (c.category === 'file_op' && c.context['path']) {
|
|
123
|
+
test.shellCommands = [`test -f "${c.context['path']}" && echo "EXISTS" || echo "NOT_FOUND"`];
|
|
124
|
+
test.expectedOutputs = ['EXISTS'];
|
|
125
|
+
}
|
|
126
|
+
else if (c.category === 'code') {
|
|
127
|
+
test.shellCommands = ['npm run build 2>&1 | tail -5'];
|
|
128
|
+
}
|
|
129
|
+
return test;
|
|
130
|
+
});
|
|
131
|
+
}
|
|
132
|
+
// ============================================================================
|
|
133
|
+
// TEST EXECUTION
|
|
134
|
+
// ============================================================================
|
|
135
|
+
async function runShellCommand(cmd, cwd) {
|
|
136
|
+
// Safety check - block dangerous commands
|
|
137
|
+
const dangerous = [/\brm\s/i, /rmdir/i, /sudo/i, /chmod\s*7/i, /eval\s*\(/i, /DROP\s+TABLE/i, /DELETE\s+FROM/i];
|
|
138
|
+
for (const p of dangerous) {
|
|
139
|
+
if (p.test(cmd))
|
|
140
|
+
return { ok: false, out: `Blocked dangerous command: ${p.source}` };
|
|
141
|
+
}
|
|
142
|
+
try {
|
|
143
|
+
const { stdout, stderr } = await execAsync(cmd, { cwd, timeout: 30000 });
|
|
144
|
+
return { ok: true, out: stdout + stderr };
|
|
145
|
+
}
|
|
146
|
+
catch (e) {
|
|
147
|
+
return { ok: false, out: e instanceof Error ? e.message : 'Command failed' };
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
async function runTest(test, cwd, llmQuery) {
|
|
151
|
+
const result = {
|
|
152
|
+
test,
|
|
153
|
+
success: false,
|
|
154
|
+
output: '',
|
|
155
|
+
errors: '',
|
|
156
|
+
matchedPatterns: [],
|
|
157
|
+
unmatchedPatterns: []
|
|
158
|
+
};
|
|
159
|
+
try {
|
|
160
|
+
// Run shell commands
|
|
161
|
+
if (test.shellCommands && test.shellCommands.length > 0) {
|
|
162
|
+
for (const cmd of test.shellCommands) {
|
|
163
|
+
const shellResult = await runShellCommand(cmd, cwd);
|
|
164
|
+
result.output += `$ ${cmd}\n${shellResult.out}\n`;
|
|
165
|
+
if (!shellResult.ok) {
|
|
166
|
+
result.errors += `${shellResult.out}\n`;
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
// Check expected output patterns
|
|
171
|
+
if (test.expectedOutputs) {
|
|
172
|
+
for (const pattern of test.expectedOutputs) {
|
|
173
|
+
if (result.output.includes(pattern) || new RegExp(pattern, 'i').test(result.output)) {
|
|
174
|
+
result.matchedPatterns.push(pattern);
|
|
175
|
+
}
|
|
176
|
+
else {
|
|
177
|
+
result.unmatchedPatterns.push(pattern);
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
// LLM assessment of behavior
|
|
182
|
+
if (test.expectedBehavior) {
|
|
183
|
+
const assessPrompt = `Assess if this output demonstrates the expected behavior.
|
|
184
|
+
|
|
185
|
+
EXPECTED: ${test.expectedBehavior}
|
|
186
|
+
|
|
187
|
+
OUTPUT:
|
|
188
|
+
---
|
|
189
|
+
${result.output.slice(0, 4000)}
|
|
190
|
+
---
|
|
191
|
+
|
|
192
|
+
Return JSON: {"matches": true/false, "confidence": 0-100, "reasoning": "explanation"}`;
|
|
193
|
+
try {
|
|
194
|
+
const assessment = await llmQuery(assessPrompt);
|
|
195
|
+
const match = assessment.match(/\{[\s\S]*\}/);
|
|
196
|
+
if (match) {
|
|
197
|
+
const parsed = JSON.parse(match[0]);
|
|
198
|
+
result.llmAssessment = `${parsed.matches ? '✅' : '❌'} [${parsed.confidence}%] ${parsed.reasoning}`;
|
|
199
|
+
if (!parsed.matches || parsed.confidence < 70) {
|
|
200
|
+
result.unmatchedPatterns.push(`behavior: ${test.expectedBehavior}`);
|
|
201
|
+
}
|
|
202
|
+
else {
|
|
203
|
+
result.matchedPatterns.push(`behavior: ${test.expectedBehavior}`);
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
catch {
|
|
208
|
+
result.llmAssessment = 'LLM assessment failed';
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
// Determine success
|
|
212
|
+
result.success = result.unmatchedPatterns.length === 0 &&
|
|
213
|
+
(result.matchedPatterns.length > 0 || (!test.expectedOutputs?.length && !test.expectedBehavior));
|
|
214
|
+
}
|
|
215
|
+
catch (err) {
|
|
216
|
+
result.errors = err instanceof Error ? err.message : 'Unknown error';
|
|
217
|
+
}
|
|
218
|
+
return result;
|
|
219
|
+
}
|
|
220
|
+
// ============================================================================
|
|
221
|
+
// MAIN VERIFICATION
|
|
222
|
+
// ============================================================================
|
|
223
|
+
async function verify(request) {
|
|
224
|
+
const timestamp = new Date().toISOString();
|
|
225
|
+
const id = `verify-${Date.now()}`;
|
|
226
|
+
// Create LLM query function using isolated provider
|
|
227
|
+
const provider = createProvider({
|
|
228
|
+
provider: request.provider,
|
|
229
|
+
model: request.model,
|
|
230
|
+
});
|
|
231
|
+
const llmQuery = async (prompt) => {
|
|
232
|
+
const response = await provider.generate([{ role: 'user', content: prompt }], [] // No tools for verification queries
|
|
233
|
+
);
|
|
234
|
+
if (response.type === 'message' && response.content) {
|
|
235
|
+
return response.content;
|
|
236
|
+
}
|
|
237
|
+
return '';
|
|
238
|
+
};
|
|
239
|
+
// Extract claims
|
|
240
|
+
const claims = await extractClaims(request.response, request.workingDirectory, request.conversationHistory, llmQuery);
|
|
241
|
+
if (claims.length === 0) {
|
|
242
|
+
return {
|
|
243
|
+
responseId: id,
|
|
244
|
+
timestamp,
|
|
245
|
+
claims: [],
|
|
246
|
+
results: [],
|
|
247
|
+
summary: { total: 0, verified: 0, failed: 0, inconclusive: 0 },
|
|
248
|
+
overallVerdict: 'unverified',
|
|
249
|
+
trustScore: 50
|
|
250
|
+
};
|
|
251
|
+
}
|
|
252
|
+
// Generate tests
|
|
253
|
+
const tests = await generateTests(claims, request.workingDirectory, llmQuery);
|
|
254
|
+
// Run tests
|
|
255
|
+
const testResults = [];
|
|
256
|
+
for (const test of tests) {
|
|
257
|
+
const result = await runTest(test, request.workingDirectory, llmQuery);
|
|
258
|
+
testResults.push(result);
|
|
259
|
+
}
|
|
260
|
+
// Map results to claims
|
|
261
|
+
const results = claims.map((claim, i) => {
|
|
262
|
+
const testResult = testResults[i];
|
|
263
|
+
if (!testResult) {
|
|
264
|
+
return {
|
|
265
|
+
claim,
|
|
266
|
+
verified: false,
|
|
267
|
+
confidence: 'low',
|
|
268
|
+
evidence: 'No test generated',
|
|
269
|
+
method: 'skip',
|
|
270
|
+
timestamp
|
|
271
|
+
};
|
|
272
|
+
}
|
|
273
|
+
return {
|
|
274
|
+
claim,
|
|
275
|
+
verified: testResult.success,
|
|
276
|
+
confidence: testResult.success ? 'high' : (testResult.matchedPatterns.length > 0 ? 'medium' : 'low'),
|
|
277
|
+
evidence: testResult.success
|
|
278
|
+
? `Verified: ${testResult.matchedPatterns.join(', ')}`
|
|
279
|
+
: `Failed: ${testResult.unmatchedPatterns.join(', ')}`,
|
|
280
|
+
method: 'isolated-process',
|
|
281
|
+
reasoning: testResult.llmAssessment,
|
|
282
|
+
executedCode: (testResult.test.shellCommands || []).join('\n'),
|
|
283
|
+
rawOutput: testResult.output.slice(0, 2000),
|
|
284
|
+
error: testResult.errors || undefined,
|
|
285
|
+
timestamp
|
|
286
|
+
};
|
|
287
|
+
});
|
|
288
|
+
// Calculate summary
|
|
289
|
+
const verified = results.filter(r => r.verified).length;
|
|
290
|
+
const failed = results.filter(r => !r.verified && r.confidence === 'high').length;
|
|
291
|
+
const inconclusive = results.filter(r => !r.verified && r.confidence !== 'high').length;
|
|
292
|
+
// Determine verdict
|
|
293
|
+
let overallVerdict;
|
|
294
|
+
if (failed > 0) {
|
|
295
|
+
overallVerdict = 'contradicted';
|
|
296
|
+
}
|
|
297
|
+
else if (verified === claims.length && claims.length > 0) {
|
|
298
|
+
overallVerdict = 'verified';
|
|
299
|
+
}
|
|
300
|
+
else if (verified > 0) {
|
|
301
|
+
overallVerdict = 'partially_verified';
|
|
302
|
+
}
|
|
303
|
+
else {
|
|
304
|
+
overallVerdict = 'unverified';
|
|
305
|
+
}
|
|
306
|
+
const trustScore = claims.length > 0
|
|
307
|
+
? Math.round((verified / claims.length) * 100)
|
|
308
|
+
: 50;
|
|
309
|
+
return {
|
|
310
|
+
responseId: id,
|
|
311
|
+
timestamp,
|
|
312
|
+
claims,
|
|
313
|
+
results,
|
|
314
|
+
summary: { total: claims.length, verified, failed, inconclusive },
|
|
315
|
+
overallVerdict,
|
|
316
|
+
trustScore
|
|
317
|
+
};
|
|
318
|
+
}
|
|
319
|
+
// ============================================================================
|
|
320
|
+
// PROCESS ENTRY POINT
|
|
321
|
+
// ============================================================================
|
|
322
|
+
async function main() {
|
|
323
|
+
// Read request from stdin
|
|
324
|
+
let input = '';
|
|
325
|
+
process.stdin.setEncoding('utf8');
|
|
326
|
+
for await (const chunk of process.stdin) {
|
|
327
|
+
input += chunk;
|
|
328
|
+
}
|
|
329
|
+
try {
|
|
330
|
+
const request = JSON.parse(input);
|
|
331
|
+
if (request.type !== 'verify') {
|
|
332
|
+
throw new Error(`Unknown request type: ${request.type}`);
|
|
333
|
+
}
|
|
334
|
+
const report = await verify(request);
|
|
335
|
+
// Output result as JSON to stdout
|
|
336
|
+
process.stdout.write(JSON.stringify(report));
|
|
337
|
+
process.exit(0);
|
|
338
|
+
}
|
|
339
|
+
catch (error) {
|
|
340
|
+
// Output error as JSON
|
|
341
|
+
process.stdout.write(JSON.stringify({
|
|
342
|
+
error: true,
|
|
343
|
+
message: error instanceof Error ? error.message : 'Unknown error'
|
|
344
|
+
}));
|
|
345
|
+
process.exit(1);
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
main();
|
|
349
|
+
//# sourceMappingURL=isolatedVerifier.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"isolatedVerifier.js","sourceRoot":"","sources":["../../src/core/isolatedVerifier.ts"],"names":[],"mappings":";AACA;;;;;;;;;;;;;;GAcG;AAEH,OAAO,EAAE,cAAc,EAAE,MAAM,iCAAiC,CAAC;AACjE,OAAO,EAAE,IAAI,EAAE,MAAM,oBAAoB,CAAC;AAC1C,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAEtC,MAAM,SAAS,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;AA6DlC,+EAA+E;AAC/E,mBAAmB;AACnB,+EAA+E;AAE/E,MAAM,qBAAqB,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;8BA0BA,CAAC;AAE/B,KAAK,UAAU,aAAa,CAC1B,QAAgB,EAChB,UAAkB,EAClB,mBAA6B,EAC7B,QAA6C;IAE7C,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,qBAAqB;aACjC,OAAO,CAAC,YAAY,EAAE,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;aAC9C,OAAO,CAAC,WAAW,EAAE,mBAAmB,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;aACpE,OAAO,CAAC,eAAe,EAAE,UAAU,CAAC,CAAC;QAExC,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,MAAM,CAAC,CAAC;QACtC,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;QAC1C,IAAI,KAAK,EAAE,CAAC;YACV,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAY,CAAC;QACzC,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,eAAe;IACjB,CAAC;IAED,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,+EAA+E;AAC/E,kBAAkB;AAClB,+EAA+E;AAE/E,MAAM,qBAAqB,GAAG;;;;;;;;;;;;;;;;;;;;;;;8BAuBA,CAAC;AAE/B,KAAK,UAAU,aAAa,CAC1B,MAAe,EACf,UAAkB,EAClB,QAA6C;IAE7C,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEnC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,qBAAqB;aACjC,OAAO,CAAC,UAAU,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;aACxD,OAAO,CAAC,eAAe,EAAE,UAAU,CAAC;aACpC,OAAO,CAAC,YAAY,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;QAE3C,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,MAAM,CAAC,CAAC;QACtC,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;QAC1C,IAAI,KAAK,EAAE,CAAC;YACV,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAmB,CAAC;QAChD,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,8BAA8B;IAChC,CAAC;IAED,iCAAiC;IACjC,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,UAAU,IAAI,CAAC,CAAC,CAAC,QAAQ,KAAK,UAAU,IAAI,CAAC,CAAC,QAAQ,KAAK,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QAC3G,MAAM,IAAI,GAAiB;YACzB,EAAE,EAAE,QAAQ,CAAC,EAAE;YACf,WAAW,EAAE,CAAC,CAAC,SAAS;YACxB,aAAa,EAAE,EAAE;YACjB,gBAAgB,EAAE,CAAC,CAAC,SAAS;YAC7B,OAAO,EAAE,KAAK;SACf,CAAC;QAEF,2CAA2C;QAC3C,IAAI,CAAC,CAAC,QAAQ,KAAK,SAAS,IAAI,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;YAClD,IAAI,CAAC,aAAa,GAAG,CAAC,YAAY,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,wCAAwC,CAAC,CAAC;YAC7F,IAAI,CAAC,eAAe,GAAG,CAAC,QAAQ,CAAC,CAAC;QACpC,CAAC;aAAM,IAAI,CAAC,CAAC,QAAQ,KAAK,MAAM,EAAE,CAAC;YACjC,IAAI,CAAC,aAAa,GAAG,CAAC,8BAA8B,CAAC,CAAC;QACxD,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC;AACL,CAAC;AAED,+EAA+E;AAC/E,iBAAiB;AACjB,+EAA+E;AAE/E,KAAK,UAAU,eAAe,CAAC,GAAW,EAAE,GAAW;IACrD,0CAA0C;IAC1C,MAAM,SAAS,GAAG,CAAC,SAAS,EAAE,QAAQ,EAAE,OAAO,EAAE,YAAY,EAAE,YAAY,EAAE,eAAe,EAAE,gBAAgB,CAAC,CAAC;IAChH,KAAK,MAAM,CAAC,IAAI,SAAS,EAAE,CAAC;QAC1B,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC;YAAE,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,8BAA8B,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC;IACvF,CAAC;IAED,IAAI,CAAC;QACH,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,SAAS,CAAC,GAAG,EAAE,EAAE,GAAG,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;QACzE,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAC5C,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,gBAAgB,EAAE,CAAC;IAC/E,CAAC;AACH,CAAC;AAYD,KAAK,UAAU,OAAO,CACpB,IAAkB,EAClB,GAAW,EACX,QAA6C;IAE7C,MAAM,MAAM,GAAe;QACzB,IAAI;QACJ,OAAO,EAAE,KAAK;QACd,MAAM,EAAE,EAAE;QACV,MAAM,EAAE,EAAE;QACV,eAAe,EAAE,EAAE;QACnB,iBAAiB,EAAE,EAAE;KACtB,CAAC;IAEF,IAAI,CAAC;QACH,qBAAqB;QACrB,IAAI,IAAI,CAAC,aAAa,IAAI,IAAI,CAAC,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACxD,KAAK,MAAM,GAAG,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;gBACrC,MAAM,WAAW,GAAG,MAAM,eAAe,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;gBACpD,MAAM,CAAC,MAAM,IAAI,KAAK,GAAG,KAAK,WAAW,CAAC,GAAG,IAAI,CAAC;gBAClD,IAAI,CAAC,WAAW,CAAC,EAAE,EAAE,CAAC;oBACpB,MAAM,CAAC,MAAM,IAAI,GAAG,WAAW,CAAC,GAAG,IAAI,CAAC;gBAC1C,CAAC;YACH,CAAC;QACH,CAAC;QAED,iCAAiC;QACjC,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;YACzB,KAAK,MAAM,OAAO,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;gBAC3C,IAAI,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,IAAI,MAAM,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC;oBACpF,MAAM,CAAC,eAAe,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBACvC,CAAC;qBAAM,CAAC;oBACN,MAAM,CAAC,iBAAiB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBACzC,CAAC;YACH,CAAC;QACH,CAAC;QAED,6BAA6B;QAC7B,IAAI,IAAI,CAAC,gBAAgB,EAAE,CAAC;YAC1B,MAAM,YAAY,GAAG;;YAEf,IAAI,CAAC,gBAAgB;;;;EAI/B,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC;;;sFAGwD,CAAC;YAEjF,IAAI,CAAC;gBACH,MAAM,UAAU,GAAG,MAAM,QAAQ,CAAC,YAAY,CAAC,CAAC;gBAChD,MAAM,KAAK,GAAG,UAAU,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;gBAC9C,IAAI,KAAK,EAAE,CAAC;oBACV,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAgE,CAAC;oBACnG,MAAM,CAAC,aAAa,GAAG,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,KAAK,MAAM,CAAC,UAAU,MAAM,MAAM,CAAC,SAAS,EAAE,CAAC;oBACnG,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,UAAU,GAAG,EAAE,EAAE,CAAC;wBAC9C,MAAM,CAAC,iBAAiB,CAAC,IAAI,CAAC,aAAa,IAAI,CAAC,gBAAgB,EAAE,CAAC,CAAC;oBACtE,CAAC;yBAAM,CAAC;wBACN,MAAM,CAAC,eAAe,CAAC,IAAI,CAAC,aAAa,IAAI,CAAC,gBAAgB,EAAE,CAAC,CAAC;oBACpE,CAAC;gBACH,CAAC;YACH,CAAC;YAAC,MAAM,CAAC;gBACP,MAAM,CAAC,aAAa,GAAG,uBAAuB,CAAC;YACjD,CAAC;QACH,CAAC;QAED,oBAAoB;QACpB,MAAM,CAAC,OAAO,GAAG,MAAM,CAAC,iBAAiB,CAAC,MAAM,KAAK,CAAC;YACrC,CAAC,MAAM,CAAC,eAAe,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,eAAe,EAAE,MAAM,IAAI,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC,CAAC;IAEpH,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,CAAC,MAAM,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;IACvE,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,+EAA+E;AAC/E,oBAAoB;AACpB,+EAA+E;AAE/E,KAAK,UAAU,MAAM,CAAC,OAA4B;IAChD,MAAM,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAC3C,MAAM,EAAE,GAAG,UAAU,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC;IAElC,oDAAoD;IACpD,MAAM,QAAQ,GAAG,cAAc,CAAC;QAC9B,QAAQ,EAAE,OAAO,CAAC,QAAkF;QACpG,KAAK,EAAE,OAAO,CAAC,KAAK;KACrB,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAG,KAAK,EAAE,MAAc,EAAmB,EAAE;QACzD,MAAM,QAAQ,GAAG,MAAM,QAAQ,CAAC,QAAQ,CACtC,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,EACnC,EAAE,CAAC,oCAAoC;SACxC,CAAC;QAEF,IAAI,QAAQ,CAAC,IAAI,KAAK,SAAS,IAAI,QAAQ,CAAC,OAAO,EAAE,CAAC;YACpD,OAAO,QAAQ,CAAC,OAAO,CAAC;QAC1B,CAAC;QACD,OAAO,EAAE,CAAC;IACZ,CAAC,CAAC;IAEF,iBAAiB;IACjB,MAAM,MAAM,GAAG,MAAM,aAAa,CAChC,OAAO,CAAC,QAAQ,EAChB,OAAO,CAAC,gBAAgB,EACxB,OAAO,CAAC,mBAAmB,EAC3B,QAAQ,CACT,CAAC;IAEF,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO;YACL,UAAU,EAAE,EAAE;YACd,SAAS;YACT,MAAM,EAAE,EAAE;YACV,OAAO,EAAE,EAAE;YACX,OAAO,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE;YAC9D,cAAc,EAAE,YAAY;YAC5B,UAAU,EAAE,EAAE;SACf,CAAC;IACJ,CAAC;IAED,iBAAiB;IACjB,MAAM,KAAK,GAAG,MAAM,aAAa,CAAC,MAAM,EAAE,OAAO,CAAC,gBAAgB,EAAE,QAAQ,CAAC,CAAC;IAE9E,YAAY;IACZ,MAAM,WAAW,GAAiB,EAAE,CAAC;IACrC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,gBAAgB,EAAE,QAAQ,CAAC,CAAC;QACvE,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAC3B,CAAC;IAED,wBAAwB;IACxB,MAAM,OAAO,GAA8B,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,EAAE,EAAE;QACjE,MAAM,UAAU,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;QAClC,IAAI,CAAC,UAAU,EAAE,CAAC;YAChB,OAAO;gBACL,KAAK;gBACL,QAAQ,EAAE,KAAK;gBACf,UAAU,EAAE,KAAc;gBAC1B,QAAQ,EAAE,mBAAmB;gBAC7B,MAAM,EAAE,MAAM;gBACd,SAAS;aACV,CAAC;QACJ,CAAC;QAED,OAAO;YACL,KAAK;YACL,QAAQ,EAAE,UAAU,CAAC,OAAO;YAC5B,UAAU,EAAE,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,MAAe,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,eAAe,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAiB,CAAC,CAAC,CAAC,KAAc,CAAC;YAC/H,QAAQ,EAAE,UAAU,CAAC,OAAO;gBAC1B,CAAC,CAAC,aAAa,UAAU,CAAC,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE;gBACtD,CAAC,CAAC,WAAW,UAAU,CAAC,iBAAiB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE;YACxD,MAAM,EAAE,kBAAkB;YAC1B,SAAS,EAAE,UAAU,CAAC,aAAa;YACnC,YAAY,EAAE,CAAC,UAAU,CAAC,IAAI,CAAC,aAAa,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;YAC9D,SAAS,EAAE,UAAU,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC;YAC3C,KAAK,EAAE,UAAU,CAAC,MAAM,IAAI,SAAS;YACrC,SAAS;SACV,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,oBAAoB;IACpB,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC;IACxD,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,UAAU,KAAK,MAAM,CAAC,CAAC,MAAM,CAAC;IAClF,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,UAAU,KAAK,MAAM,CAAC,CAAC,MAAM,CAAC;IAExF,oBAAoB;IACpB,IAAI,cAAoD,CAAC;IACzD,IAAI,MAAM,GAAG,CAAC,EAAE,CAAC;QACf,cAAc,GAAG,cAAc,CAAC;IAClC,CAAC;SAAM,IAAI,QAAQ,KAAK,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC3D,cAAc,GAAG,UAAU,CAAC;IAC9B,CAAC;SAAM,IAAI,QAAQ,GAAG,CAAC,EAAE,CAAC;QACxB,cAAc,GAAG,oBAAoB,CAAC;IACxC,CAAC;SAAM,CAAC;QACN,cAAc,GAAG,YAAY,CAAC;IAChC,CAAC;IAED,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC;QAClC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC,GAAG,GAAG,CAAC;QAC9C,CAAC,CAAC,EAAE,CAAC;IAEP,OAAO;QACL,UAAU,EAAE,EAAE;QACd,SAAS;QACT,MAAM;QACN,OAAO;QACP,OAAO,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,YAAY,EAAE;QACjE,cAAc;QACd,UAAU;KACX,CAAC;AACJ,CAAC;AAED,+EAA+E;AAC/E,sBAAsB;AACtB,+EAA+E;AAE/E,KAAK,UAAU,IAAI;IACjB,0BAA0B;IAC1B,IAAI,KAAK,GAAG,EAAE,CAAC;IAEf,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;IAElC,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;QACxC,KAAK,IAAI,KAAK,CAAC;IACjB,CAAC;IAED,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAwB,CAAC;QAEzD,IAAI,OAAO,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC9B,MAAM,IAAI,KAAK,CAAC,yBAAyB,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;QAC3D,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,CAAC;QAErC,kCAAkC;QAClC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC;QAC7C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,uBAAuB;QACvB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC;YAClC,KAAK,EAAE,IAAI;YACX,OAAO,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe;SAClE,CAAC,CAAC,CAAC;QACJ,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC;AAED,IAAI,EAAE,CAAC"}
|
|
@@ -30,6 +30,8 @@ export interface IsolatedRuntimeResult {
|
|
|
30
30
|
export interface VerificationContext {
|
|
31
31
|
workingDirectory: string;
|
|
32
32
|
conversationHistory?: string[];
|
|
33
|
+
provider: string;
|
|
34
|
+
model: string;
|
|
33
35
|
llmVerifier?: (prompt: string) => Promise<string>;
|
|
34
36
|
}
|
|
35
37
|
export interface Claim {
|
|
@@ -71,8 +73,15 @@ export interface VerificationReport {
|
|
|
71
73
|
*/
|
|
72
74
|
export declare function runIsolatedTest(test: IsolatedRuntimeTest, cwd: string, llmVerifier?: (prompt: string) => Promise<string>): Promise<IsolatedRuntimeResult>;
|
|
73
75
|
/**
|
|
74
|
-
* Verify an assistant response using isolated
|
|
75
|
-
*
|
|
76
|
+
* Verify an assistant response using a completely isolated process.
|
|
77
|
+
*
|
|
78
|
+
* This spawns a separate Node.js process to run all verification:
|
|
79
|
+
* - Separate memory space from main CLI
|
|
80
|
+
* - Separate event loop
|
|
81
|
+
* - Independent error handling
|
|
82
|
+
* - No shared state
|
|
83
|
+
*
|
|
84
|
+
* This ensures verification cannot interfere with the main process and vice versa.
|
|
76
85
|
*/
|
|
77
86
|
export declare function verifyResponse(response: string, ctx: VerificationContext, responseId?: string): Promise<VerificationReport>;
|
|
78
87
|
/**
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"responseVerifier.d.ts","sourceRoot":"","sources":["../../src/core/responseVerifier.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAaH,MAAM,WAAW,mBAAmB;IAClC,EAAE,EAAE,MAAM,CAAC;IACX,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB;AAED,MAAM,WAAW,qBAAqB;IACpC,IAAI,EAAE,mBAAmB,CAAC;IAC1B,OAAO,EAAE,OAAO,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,QAAQ,EAAE,MAAM,CAAC;IACjB,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,iBAAiB,EAAE,MAAM,EAAE,CAAC;IAC5B,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,WAAW,mBAAmB;IAClC,gBAAgB,EAAE,MAAM,CAAC;IACzB,mBAAmB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC/B,WAAW,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;CACnD;AAED,MAAM,WAAW,KAAK;IACpB,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,OAAO,CAAC;IACpB,QAAQ,EAAE,UAAU,GAAG,MAAM,GAAG,QAAQ,GAAG,KAAK,CAAC;IACjD,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAClC;AAED,MAAM,WAAW,uBAAuB;IACtC,KAAK,EAAE,KAAK,CAAC;IACb,QAAQ,EAAE,OAAO,CAAC;IAClB,UAAU,EAAE,MAAM,GAAG,QAAQ,GAAG,KAAK,CAAC;IACtC,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,kBAAkB;IACjC,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,OAAO,EAAE,uBAAuB,EAAE,CAAC;IACnC,OAAO,EAAE;QACP,KAAK,EAAE,MAAM,CAAC;QACd,QAAQ,EAAE,MAAM,CAAC;QACjB,MAAM,EAAE,MAAM,CAAC;QACf,YAAY,EAAE,MAAM,CAAC;KACtB,CAAC;IACF,cAAc,EAAE,UAAU,GAAG,oBAAoB,GAAG,cAAc,GAAG,YAAY,CAAC;IAClF,UAAU,EAAE,MAAM,CAAC;CACpB;AAgHD;;GAEG;AACH,wBAAsB,eAAe,CACnC,IAAI,EAAE,mBAAmB,EACzB,GAAG,EAAE,MAAM,EACX,WAAW,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAChD,OAAO,CAAC,qBAAqB,CAAC,CAwGhC;AA4ID
|
|
1
|
+
{"version":3,"file":"responseVerifier.d.ts","sourceRoot":"","sources":["../../src/core/responseVerifier.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAaH,MAAM,WAAW,mBAAmB;IAClC,EAAE,EAAE,MAAM,CAAC;IACX,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB;AAED,MAAM,WAAW,qBAAqB;IACpC,IAAI,EAAE,mBAAmB,CAAC;IAC1B,OAAO,EAAE,OAAO,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,QAAQ,EAAE,MAAM,CAAC;IACjB,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,iBAAiB,EAAE,MAAM,EAAE,CAAC;IAC5B,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,WAAW,mBAAmB;IAClC,gBAAgB,EAAE,MAAM,CAAC;IACzB,mBAAmB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC/B,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IAEd,WAAW,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;CACnD;AAED,MAAM,WAAW,KAAK;IACpB,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,OAAO,CAAC;IACpB,QAAQ,EAAE,UAAU,GAAG,MAAM,GAAG,QAAQ,GAAG,KAAK,CAAC;IACjD,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAClC;AAED,MAAM,WAAW,uBAAuB;IACtC,KAAK,EAAE,KAAK,CAAC;IACb,QAAQ,EAAE,OAAO,CAAC;IAClB,UAAU,EAAE,MAAM,GAAG,QAAQ,GAAG,KAAK,CAAC;IACtC,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,kBAAkB;IACjC,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,OAAO,EAAE,uBAAuB,EAAE,CAAC;IACnC,OAAO,EAAE;QACP,KAAK,EAAE,MAAM,CAAC;QACd,QAAQ,EAAE,MAAM,CAAC;QACjB,MAAM,EAAE,MAAM,CAAC;QACf,YAAY,EAAE,MAAM,CAAC;KACtB,CAAC;IACF,cAAc,EAAE,UAAU,GAAG,oBAAoB,GAAG,cAAc,GAAG,YAAY,CAAC;IAClF,UAAU,EAAE,MAAM,CAAC;CACpB;AAgHD;;GAEG;AACH,wBAAsB,eAAe,CACnC,IAAI,EAAE,mBAAmB,EACzB,GAAG,EAAE,MAAM,EACX,WAAW,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAChD,OAAO,CAAC,qBAAqB,CAAC,CAwGhC;AA4ID;;;;;;;;;;GAUG;AACH,wBAAsB,cAAc,CAClC,QAAQ,EAAE,MAAM,EAChB,GAAG,EAAE,mBAAmB,EACxB,UAAU,CAAC,EAAE,MAAM,GAClB,OAAO,CAAC,kBAAkB,CAAC,CAwH7B;AAED;;GAEG;AACH,wBAAgB,wBAAwB,CAAC,MAAM,EAAE,kBAAkB,GAAG,MAAM,CA+B3E;AAED;;GAEG;AACH,wBAAsB,WAAW,CAC/B,QAAQ,EAAE,MAAM,EAChB,GAAG,EAAE,mBAAmB,GACvB,OAAO,CAAC;IAAE,UAAU,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC,CAoBlD"}
|
|
@@ -68,7 +68,7 @@ async function spawnIsolatedCLI(cwd, timeout = 60000) {
|
|
|
68
68
|
*/
|
|
69
69
|
async function sendCommand(cli, command, waitMs = 5000) {
|
|
70
70
|
const outputBefore = cli.output.length;
|
|
71
|
-
cli.stdin.write(command
|
|
71
|
+
cli.stdin.write(`${command}\n`);
|
|
72
72
|
await new Promise(resolve => {
|
|
73
73
|
let lastLength = cli.output.length;
|
|
74
74
|
const checkInterval = setInterval(() => {
|
|
@@ -135,7 +135,7 @@ export async function runIsolatedTest(test, cwd, llmVerifier) {
|
|
|
135
135
|
const shellResult = await runShellVerification(cmd, cwd);
|
|
136
136
|
result.output += `$ ${cmd}\n${shellResult.out}\n`;
|
|
137
137
|
if (!shellResult.ok) {
|
|
138
|
-
result.errors += shellResult.out
|
|
138
|
+
result.errors += `${shellResult.out}\n`;
|
|
139
139
|
}
|
|
140
140
|
}
|
|
141
141
|
}
|
|
@@ -333,15 +333,26 @@ async function generateTests(claims, ctx) {
|
|
|
333
333
|
// MAIN VERIFICATION API
|
|
334
334
|
// ============================================================================
|
|
335
335
|
/**
|
|
336
|
-
* Verify an assistant response using isolated
|
|
337
|
-
*
|
|
336
|
+
* Verify an assistant response using a completely isolated process.
|
|
337
|
+
*
|
|
338
|
+
* This spawns a separate Node.js process to run all verification:
|
|
339
|
+
* - Separate memory space from main CLI
|
|
340
|
+
* - Separate event loop
|
|
341
|
+
* - Independent error handling
|
|
342
|
+
* - No shared state
|
|
343
|
+
*
|
|
344
|
+
* This ensures verification cannot interfere with the main process and vice versa.
|
|
338
345
|
*/
|
|
339
346
|
export async function verifyResponse(response, ctx, responseId) {
|
|
340
347
|
const timestamp = new Date().toISOString();
|
|
341
348
|
const id = responseId || `verify-${Date.now()}`;
|
|
342
|
-
//
|
|
343
|
-
const
|
|
344
|
-
|
|
349
|
+
// Find the isolated verifier script
|
|
350
|
+
const verifierPath = path.join(ctx.workingDirectory, 'dist/core/isolatedVerifier.js');
|
|
351
|
+
try {
|
|
352
|
+
await fs.access(verifierPath);
|
|
353
|
+
}
|
|
354
|
+
catch {
|
|
355
|
+
// Fallback: return unverified if script not found
|
|
345
356
|
return {
|
|
346
357
|
responseId: id,
|
|
347
358
|
timestamp,
|
|
@@ -352,73 +363,97 @@ export async function verifyResponse(response, ctx, responseId) {
|
|
|
352
363
|
trustScore: 50
|
|
353
364
|
};
|
|
354
365
|
}
|
|
355
|
-
//
|
|
356
|
-
const
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
// Map test results back to claims
|
|
364
|
-
const results = claims.map((claim, i) => {
|
|
365
|
-
const testResult = testResults[i];
|
|
366
|
-
if (!testResult) {
|
|
367
|
-
return {
|
|
368
|
-
claim,
|
|
369
|
-
verified: false,
|
|
370
|
-
confidence: 'low',
|
|
371
|
-
evidence: 'No test generated',
|
|
372
|
-
method: 'skip',
|
|
373
|
-
timestamp
|
|
374
|
-
};
|
|
375
|
-
}
|
|
376
|
-
return {
|
|
377
|
-
claim,
|
|
378
|
-
verified: testResult.success,
|
|
379
|
-
confidence: testResult.success ? 'high' : (testResult.matchedPatterns.length > 0 ? 'medium' : 'low'),
|
|
380
|
-
evidence: testResult.success
|
|
381
|
-
? `Verified in isolated runtime: ${testResult.matchedPatterns.join(', ')}`
|
|
382
|
-
: `Failed: ${testResult.unmatchedPatterns.join(', ')}`,
|
|
383
|
-
method: 'isolated-runtime',
|
|
384
|
-
reasoning: testResult.llmAssessment,
|
|
385
|
-
executedCode: [...(testResult.test.shellCommands || []), ...(testResult.test.commands || [])].join('\n'),
|
|
386
|
-
rawOutput: testResult.output.slice(0, 2000),
|
|
387
|
-
error: testResult.errors || undefined,
|
|
388
|
-
timestamp
|
|
389
|
-
};
|
|
390
|
-
});
|
|
391
|
-
// Calculate summary
|
|
392
|
-
const verified = results.filter(r => r.verified).length;
|
|
393
|
-
const failed = results.filter(r => !r.verified && r.confidence === 'high').length;
|
|
394
|
-
const inconclusive = results.filter(r => !r.verified && r.confidence !== 'high').length;
|
|
395
|
-
// Determine verdict
|
|
396
|
-
let overallVerdict;
|
|
397
|
-
if (failed > 0) {
|
|
398
|
-
overallVerdict = 'contradicted';
|
|
399
|
-
}
|
|
400
|
-
else if (verified === claims.length && claims.length > 0) {
|
|
401
|
-
overallVerdict = 'verified';
|
|
402
|
-
}
|
|
403
|
-
else if (verified > 0) {
|
|
404
|
-
overallVerdict = 'partially_verified';
|
|
405
|
-
}
|
|
406
|
-
else {
|
|
407
|
-
overallVerdict = 'unverified';
|
|
408
|
-
}
|
|
409
|
-
// Calculate trust score
|
|
410
|
-
const trustScore = claims.length > 0
|
|
411
|
-
? Math.round((verified / claims.length) * 100)
|
|
412
|
-
: 50;
|
|
413
|
-
return {
|
|
414
|
-
responseId: id,
|
|
415
|
-
timestamp,
|
|
416
|
-
claims,
|
|
417
|
-
results,
|
|
418
|
-
summary: { total: claims.length, verified, failed, inconclusive },
|
|
419
|
-
overallVerdict,
|
|
420
|
-
trustScore
|
|
366
|
+
// Build request for isolated process
|
|
367
|
+
const request = {
|
|
368
|
+
type: 'verify',
|
|
369
|
+
response,
|
|
370
|
+
workingDirectory: ctx.workingDirectory,
|
|
371
|
+
conversationHistory: ctx.conversationHistory || [],
|
|
372
|
+
provider: ctx.provider,
|
|
373
|
+
model: ctx.model,
|
|
421
374
|
};
|
|
375
|
+
return new Promise((resolve) => {
|
|
376
|
+
// Spawn completely isolated Node.js process
|
|
377
|
+
const child = spawn('node', [verifierPath], {
|
|
378
|
+
cwd: ctx.workingDirectory,
|
|
379
|
+
env: {
|
|
380
|
+
...process.env,
|
|
381
|
+
EROSOLAR_ISOLATED_VERIFIER: '1',
|
|
382
|
+
NODE_OPTIONS: '--max-old-space-size=512', // Limit memory for safety
|
|
383
|
+
},
|
|
384
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
385
|
+
});
|
|
386
|
+
let stdout = '';
|
|
387
|
+
let stderr = '';
|
|
388
|
+
child.stdout.on('data', (data) => {
|
|
389
|
+
stdout += data.toString();
|
|
390
|
+
});
|
|
391
|
+
child.stderr.on('data', (data) => {
|
|
392
|
+
stderr += data.toString();
|
|
393
|
+
});
|
|
394
|
+
// Set timeout for verification (2 minutes max)
|
|
395
|
+
const timeout = setTimeout(() => {
|
|
396
|
+
child.kill('SIGTERM');
|
|
397
|
+
resolve({
|
|
398
|
+
responseId: id,
|
|
399
|
+
timestamp,
|
|
400
|
+
claims: [],
|
|
401
|
+
results: [],
|
|
402
|
+
summary: { total: 0, verified: 0, failed: 0, inconclusive: 0 },
|
|
403
|
+
overallVerdict: 'unverified',
|
|
404
|
+
trustScore: 50
|
|
405
|
+
});
|
|
406
|
+
}, 120000);
|
|
407
|
+
child.on('close', (_code) => {
|
|
408
|
+
clearTimeout(timeout);
|
|
409
|
+
try {
|
|
410
|
+
// Parse result from isolated process
|
|
411
|
+
const result = JSON.parse(stdout);
|
|
412
|
+
if (result.error) {
|
|
413
|
+
// Process returned error
|
|
414
|
+
resolve({
|
|
415
|
+
responseId: id,
|
|
416
|
+
timestamp,
|
|
417
|
+
claims: [],
|
|
418
|
+
results: [],
|
|
419
|
+
summary: { total: 0, verified: 0, failed: 0, inconclusive: 0 },
|
|
420
|
+
overallVerdict: 'unverified',
|
|
421
|
+
trustScore: 50
|
|
422
|
+
});
|
|
423
|
+
}
|
|
424
|
+
else {
|
|
425
|
+
resolve(result);
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
catch {
|
|
429
|
+
// JSON parse failed
|
|
430
|
+
resolve({
|
|
431
|
+
responseId: id,
|
|
432
|
+
timestamp,
|
|
433
|
+
claims: [],
|
|
434
|
+
results: [],
|
|
435
|
+
summary: { total: 0, verified: 0, failed: 0, inconclusive: 0 },
|
|
436
|
+
overallVerdict: 'unverified',
|
|
437
|
+
trustScore: 50
|
|
438
|
+
});
|
|
439
|
+
}
|
|
440
|
+
});
|
|
441
|
+
child.on('error', () => {
|
|
442
|
+
clearTimeout(timeout);
|
|
443
|
+
resolve({
|
|
444
|
+
responseId: id,
|
|
445
|
+
timestamp,
|
|
446
|
+
claims: [],
|
|
447
|
+
results: [],
|
|
448
|
+
summary: { total: 0, verified: 0, failed: 0, inconclusive: 0 },
|
|
449
|
+
overallVerdict: 'unverified',
|
|
450
|
+
trustScore: 50
|
|
451
|
+
});
|
|
452
|
+
});
|
|
453
|
+
// Send request to isolated process via stdin
|
|
454
|
+
child.stdin.write(JSON.stringify(request));
|
|
455
|
+
child.stdin.end();
|
|
456
|
+
});
|
|
422
457
|
}
|
|
423
458
|
/**
|
|
424
459
|
* Format verification report for display
|