erosolar-cli 1.7.27 → 1.7.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/dist/browser/BrowserSessionManager.js +1 -1
  2. package/dist/browser/BrowserSessionManager.js.map +1 -1
  3. package/dist/core/agentRulebook.js +1 -1
  4. package/dist/core/agentRulebook.js.map +1 -1
  5. package/dist/core/contextManager.js +2 -2
  6. package/dist/core/contextManager.js.map +1 -1
  7. package/dist/core/isolatedVerifier.d.ts +18 -0
  8. package/dist/core/isolatedVerifier.d.ts.map +1 -0
  9. package/dist/core/isolatedVerifier.js +349 -0
  10. package/dist/core/isolatedVerifier.js.map +1 -0
  11. package/dist/core/responseVerifier.d.ts +11 -2
  12. package/dist/core/responseVerifier.d.ts.map +1 -1
  13. package/dist/core/responseVerifier.js +108 -73
  14. package/dist/core/responseVerifier.js.map +1 -1
  15. package/dist/intelligence/testGenerator.js +2 -2
  16. package/dist/intelligence/testGenerator.js.map +1 -1
  17. package/dist/providers/openaiChatCompletionsProvider.js +5 -5
  18. package/dist/providers/openaiChatCompletionsProvider.js.map +1 -1
  19. package/dist/security/active-stack-security.js +2 -2
  20. package/dist/security/active-stack-security.js.map +1 -1
  21. package/dist/shell/bracketedPasteManager.enhanced.d.ts +2 -0
  22. package/dist/shell/bracketedPasteManager.enhanced.d.ts.map +1 -0
  23. package/dist/shell/bracketedPasteManager.enhanced.js +4 -0
  24. package/dist/shell/bracketedPasteManager.enhanced.js.map +1 -0
  25. package/dist/shell/inputProcessor.js +2 -2
  26. package/dist/shell/inputProcessor.js.map +1 -1
  27. package/dist/shell/interactiveShell.d.ts +16 -2
  28. package/dist/shell/interactiveShell.d.ts.map +1 -1
  29. package/dist/shell/interactiveShell.js +65 -22
  30. package/dist/shell/interactiveShell.js.map +1 -1
  31. package/dist/tools/cloudTools.js +2 -2
  32. package/dist/tools/cloudTools.js.map +1 -1
  33. package/dist/tools/devTools.js +2 -2
  34. package/dist/tools/devTools.js.map +1 -1
  35. package/dist/tools/diffUtils.js +22 -2
  36. package/dist/tools/diffUtils.js.map +1 -1
  37. package/dist/tools/fileTools.d.ts.map +1 -1
  38. package/dist/tools/fileTools.js +20 -2
  39. package/dist/tools/fileTools.js.map +1 -1
  40. package/dist/tools/frontendTestingTools.js +1 -1
  41. package/dist/tools/frontendTestingTools.js.map +1 -1
  42. package/dist/tools/globTools.js +1 -1
  43. package/dist/tools/globTools.js.map +1 -1
  44. package/dist/tools/grepTools.js +1 -1
  45. package/dist/tools/grepTools.js.map +1 -1
  46. package/dist/tools/webTools.js +2 -2
  47. package/dist/tools/webTools.js.map +1 -1
  48. package/dist/ui/ShellUIAdapter.d.ts +5 -0
  49. package/dist/ui/ShellUIAdapter.d.ts.map +1 -1
  50. package/dist/ui/ShellUIAdapter.js +30 -7
  51. package/dist/ui/ShellUIAdapter.js.map +1 -1
  52. package/dist/ui/advancedTheme.js +1 -1
  53. package/dist/ui/advancedTheme.js.map +1 -1
  54. package/dist/ui/diffViewer.js +1 -1
  55. package/dist/ui/diffViewer.js.map +1 -1
  56. package/dist/ui/display.js +3 -3
  57. package/dist/ui/display.js.map +1 -1
  58. package/dist/ui/logFormatter.js +1 -1
  59. package/dist/ui/logFormatter.js.map +1 -1
  60. package/dist/ui/outputSummarizer.js +6 -7
  61. package/dist/ui/outputSummarizer.js.map +1 -1
  62. package/dist/ui/persistentPrompt.js +4 -4
  63. package/dist/ui/persistentPrompt.js.map +1 -1
  64. package/dist/ui/tableFormatter.js +4 -4
  65. package/dist/ui/tableFormatter.js.map +1 -1
  66. package/dist/ui/textHighlighter.js +2 -2
  67. package/dist/ui/textHighlighter.js.map +1 -1
  68. package/dist/ui/toolDisplay.d.ts +18 -0
  69. package/dist/ui/toolDisplay.d.ts.map +1 -1
  70. package/dist/ui/toolDisplay.js +270 -6
  71. package/dist/ui/toolDisplay.js.map +1 -1
  72. package/dist/ui/treeVisualizer.js +1 -1
  73. package/dist/ui/treeVisualizer.js.map +1 -1
  74. package/package.json +12 -3
  75. package/scripts/isolated-verification-runner.mjs +364 -0
  76. package/scripts/isolated-verification-wrapper.mjs +276 -0
@@ -0,0 +1,349 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Isolated Verification Runner
4
+ *
5
+ * This script runs in a completely separate Node.js process to verify
6
+ * assistant responses. It receives verification requests via stdin and
7
+ * outputs results via stdout.
8
+ *
9
+ * This ensures verification is completely isolated from the main CLI process:
10
+ * - Separate memory space
11
+ * - Separate event loop
12
+ * - Independent error handling
13
+ * - No shared state with main process
14
+ *
15
+ * @license MIT
16
+ */
17
+ import { createProvider } from '../providers/providerFactory.js';
18
+ import { exec } from 'node:child_process';
19
+ import { promisify } from 'node:util';
20
+ const execAsync = promisify(exec);
21
+ // ============================================================================
22
+ // CLAIM EXTRACTION
23
+ // ============================================================================
24
+ const EXTRACT_CLAIMS_PROMPT = `Extract ALL verifiable claims from this AI assistant response.
25
+
26
+ RESPONSE:
27
+ ---
28
+ {RESPONSE}
29
+ ---
30
+
31
+ CONTEXT: {CONTEXT}
32
+ WORKING_DIR: {WORKING_DIR}
33
+
34
+ For each claim, determine:
35
+ 1. What specific assertion is being made
36
+ 2. Category: file_op (created/modified/deleted files), code (compiles/tests pass), command (executed successfully), state (something changed), behavior (feature works), fact (verifiable truth)
37
+ 3. How it can be verified (shell command, file check, etc.)
38
+ 4. Priority: critical (must verify), high (should verify), medium (nice to verify), low (optional)
39
+
40
+ Return JSON array:
41
+ [{
42
+ "id": "c1",
43
+ "statement": "the specific claim",
44
+ "category": "file_op|code|command|state|behavior|fact",
45
+ "verifiable": true,
46
+ "priority": "critical|high|medium|low",
47
+ "context": {"path": "/path/if/relevant", "command": "if relevant"}
48
+ }]
49
+
50
+ Output ONLY valid JSON array.`;
51
+ async function extractClaims(response, workingDir, conversationHistory, llmQuery) {
52
+ try {
53
+ const prompt = EXTRACT_CLAIMS_PROMPT
54
+ .replace('{RESPONSE}', response.slice(0, 8000))
55
+ .replace('{CONTEXT}', conversationHistory.slice(-3).join('\n') || '')
56
+ .replace('{WORKING_DIR}', workingDir);
57
+ const result = await llmQuery(prompt);
58
+ const match = result.match(/\[[\s\S]*\]/);
59
+ if (match) {
60
+ return JSON.parse(match[0]);
61
+ }
62
+ }
63
+ catch {
64
+ // Fall through
65
+ }
66
+ return [];
67
+ }
68
+ // ============================================================================
69
+ // TEST GENERATION
70
+ // ============================================================================
71
+ const GENERATE_TESTS_PROMPT = `Generate isolated runtime tests for these claims.
72
+
73
+ CLAIMS:
74
+ {CLAIMS}
75
+
76
+ WORKING_DIR: {WORKING_DIR}
77
+ PLATFORM: {PLATFORM}
78
+
79
+ For each claim, generate a test that verifies it using:
80
+ - Shell commands (for file checks, git status, etc.)
81
+ - Expected output patterns
82
+
83
+ Return JSON array:
84
+ [{
85
+ "id": "test-1",
86
+ "description": "what we're testing",
87
+ "shellCommands": ["ls -la path", "cat file"],
88
+ "expectedOutputs": ["pattern1", "pattern2"],
89
+ "expectedBehavior": "description for LLM assessment",
90
+ "timeout": 30000
91
+ }]
92
+
93
+ Use READ-ONLY commands only. No destructive operations.
94
+ Output ONLY valid JSON array.`;
95
+ async function generateTests(claims, workingDir, llmQuery) {
96
+ if (claims.length === 0)
97
+ return [];
98
+ try {
99
+ const prompt = GENERATE_TESTS_PROMPT
100
+ .replace('{CLAIMS}', JSON.stringify(claims.slice(0, 10)))
101
+ .replace('{WORKING_DIR}', workingDir)
102
+ .replace('{PLATFORM}', process.platform);
103
+ const result = await llmQuery(prompt);
104
+ const match = result.match(/\[[\s\S]*\]/);
105
+ if (match) {
106
+ return JSON.parse(match[0]);
107
+ }
108
+ }
109
+ catch {
110
+ // Fall through to basic tests
111
+ }
112
+ // Fallback: generate basic tests
113
+ return claims.filter(c => c.verifiable && (c.priority === 'critical' || c.priority === 'high')).map((c, i) => {
114
+ const test = {
115
+ id: `test-${i}`,
116
+ description: c.statement,
117
+ shellCommands: [],
118
+ expectedBehavior: c.statement,
119
+ timeout: 30000
120
+ };
121
+ // Add basic verification based on category
122
+ if (c.category === 'file_op' && c.context['path']) {
123
+ test.shellCommands = [`test -f "${c.context['path']}" && echo "EXISTS" || echo "NOT_FOUND"`];
124
+ test.expectedOutputs = ['EXISTS'];
125
+ }
126
+ else if (c.category === 'code') {
127
+ test.shellCommands = ['npm run build 2>&1 | tail -5'];
128
+ }
129
+ return test;
130
+ });
131
+ }
132
+ // ============================================================================
133
+ // TEST EXECUTION
134
+ // ============================================================================
135
+ async function runShellCommand(cmd, cwd) {
136
+ // Safety check - block dangerous commands
137
+ const dangerous = [/\brm\s/i, /rmdir/i, /sudo/i, /chmod\s*7/i, /eval\s*\(/i, /DROP\s+TABLE/i, /DELETE\s+FROM/i];
138
+ for (const p of dangerous) {
139
+ if (p.test(cmd))
140
+ return { ok: false, out: `Blocked dangerous command: ${p.source}` };
141
+ }
142
+ try {
143
+ const { stdout, stderr } = await execAsync(cmd, { cwd, timeout: 30000 });
144
+ return { ok: true, out: stdout + stderr };
145
+ }
146
+ catch (e) {
147
+ return { ok: false, out: e instanceof Error ? e.message : 'Command failed' };
148
+ }
149
+ }
150
+ async function runTest(test, cwd, llmQuery) {
151
+ const result = {
152
+ test,
153
+ success: false,
154
+ output: '',
155
+ errors: '',
156
+ matchedPatterns: [],
157
+ unmatchedPatterns: []
158
+ };
159
+ try {
160
+ // Run shell commands
161
+ if (test.shellCommands && test.shellCommands.length > 0) {
162
+ for (const cmd of test.shellCommands) {
163
+ const shellResult = await runShellCommand(cmd, cwd);
164
+ result.output += `$ ${cmd}\n${shellResult.out}\n`;
165
+ if (!shellResult.ok) {
166
+ result.errors += `${shellResult.out}\n`;
167
+ }
168
+ }
169
+ }
170
+ // Check expected output patterns
171
+ if (test.expectedOutputs) {
172
+ for (const pattern of test.expectedOutputs) {
173
+ if (result.output.includes(pattern) || new RegExp(pattern, 'i').test(result.output)) {
174
+ result.matchedPatterns.push(pattern);
175
+ }
176
+ else {
177
+ result.unmatchedPatterns.push(pattern);
178
+ }
179
+ }
180
+ }
181
+ // LLM assessment of behavior
182
+ if (test.expectedBehavior) {
183
+ const assessPrompt = `Assess if this output demonstrates the expected behavior.
184
+
185
+ EXPECTED: ${test.expectedBehavior}
186
+
187
+ OUTPUT:
188
+ ---
189
+ ${result.output.slice(0, 4000)}
190
+ ---
191
+
192
+ Return JSON: {"matches": true/false, "confidence": 0-100, "reasoning": "explanation"}`;
193
+ try {
194
+ const assessment = await llmQuery(assessPrompt);
195
+ const match = assessment.match(/\{[\s\S]*\}/);
196
+ if (match) {
197
+ const parsed = JSON.parse(match[0]);
198
+ result.llmAssessment = `${parsed.matches ? '✅' : '❌'} [${parsed.confidence}%] ${parsed.reasoning}`;
199
+ if (!parsed.matches || parsed.confidence < 70) {
200
+ result.unmatchedPatterns.push(`behavior: ${test.expectedBehavior}`);
201
+ }
202
+ else {
203
+ result.matchedPatterns.push(`behavior: ${test.expectedBehavior}`);
204
+ }
205
+ }
206
+ }
207
+ catch {
208
+ result.llmAssessment = 'LLM assessment failed';
209
+ }
210
+ }
211
+ // Determine success
212
+ result.success = result.unmatchedPatterns.length === 0 &&
213
+ (result.matchedPatterns.length > 0 || (!test.expectedOutputs?.length && !test.expectedBehavior));
214
+ }
215
+ catch (err) {
216
+ result.errors = err instanceof Error ? err.message : 'Unknown error';
217
+ }
218
+ return result;
219
+ }
220
+ // ============================================================================
221
+ // MAIN VERIFICATION
222
+ // ============================================================================
223
+ async function verify(request) {
224
+ const timestamp = new Date().toISOString();
225
+ const id = `verify-${Date.now()}`;
226
+ // Create LLM query function using isolated provider
227
+ const provider = createProvider({
228
+ provider: request.provider,
229
+ model: request.model,
230
+ });
231
+ const llmQuery = async (prompt) => {
232
+ const response = await provider.generate([{ role: 'user', content: prompt }], [] // No tools for verification queries
233
+ );
234
+ if (response.type === 'message' && response.content) {
235
+ return response.content;
236
+ }
237
+ return '';
238
+ };
239
+ // Extract claims
240
+ const claims = await extractClaims(request.response, request.workingDirectory, request.conversationHistory, llmQuery);
241
+ if (claims.length === 0) {
242
+ return {
243
+ responseId: id,
244
+ timestamp,
245
+ claims: [],
246
+ results: [],
247
+ summary: { total: 0, verified: 0, failed: 0, inconclusive: 0 },
248
+ overallVerdict: 'unverified',
249
+ trustScore: 50
250
+ };
251
+ }
252
+ // Generate tests
253
+ const tests = await generateTests(claims, request.workingDirectory, llmQuery);
254
+ // Run tests
255
+ const testResults = [];
256
+ for (const test of tests) {
257
+ const result = await runTest(test, request.workingDirectory, llmQuery);
258
+ testResults.push(result);
259
+ }
260
+ // Map results to claims
261
+ const results = claims.map((claim, i) => {
262
+ const testResult = testResults[i];
263
+ if (!testResult) {
264
+ return {
265
+ claim,
266
+ verified: false,
267
+ confidence: 'low',
268
+ evidence: 'No test generated',
269
+ method: 'skip',
270
+ timestamp
271
+ };
272
+ }
273
+ return {
274
+ claim,
275
+ verified: testResult.success,
276
+ confidence: testResult.success ? 'high' : (testResult.matchedPatterns.length > 0 ? 'medium' : 'low'),
277
+ evidence: testResult.success
278
+ ? `Verified: ${testResult.matchedPatterns.join(', ')}`
279
+ : `Failed: ${testResult.unmatchedPatterns.join(', ')}`,
280
+ method: 'isolated-process',
281
+ reasoning: testResult.llmAssessment,
282
+ executedCode: (testResult.test.shellCommands || []).join('\n'),
283
+ rawOutput: testResult.output.slice(0, 2000),
284
+ error: testResult.errors || undefined,
285
+ timestamp
286
+ };
287
+ });
288
+ // Calculate summary
289
+ const verified = results.filter(r => r.verified).length;
290
+ const failed = results.filter(r => !r.verified && r.confidence === 'high').length;
291
+ const inconclusive = results.filter(r => !r.verified && r.confidence !== 'high').length;
292
+ // Determine verdict
293
+ let overallVerdict;
294
+ if (failed > 0) {
295
+ overallVerdict = 'contradicted';
296
+ }
297
+ else if (verified === claims.length && claims.length > 0) {
298
+ overallVerdict = 'verified';
299
+ }
300
+ else if (verified > 0) {
301
+ overallVerdict = 'partially_verified';
302
+ }
303
+ else {
304
+ overallVerdict = 'unverified';
305
+ }
306
+ const trustScore = claims.length > 0
307
+ ? Math.round((verified / claims.length) * 100)
308
+ : 50;
309
+ return {
310
+ responseId: id,
311
+ timestamp,
312
+ claims,
313
+ results,
314
+ summary: { total: claims.length, verified, failed, inconclusive },
315
+ overallVerdict,
316
+ trustScore
317
+ };
318
+ }
319
+ // ============================================================================
320
+ // PROCESS ENTRY POINT
321
+ // ============================================================================
322
+ async function main() {
323
+ // Read request from stdin
324
+ let input = '';
325
+ process.stdin.setEncoding('utf8');
326
+ for await (const chunk of process.stdin) {
327
+ input += chunk;
328
+ }
329
+ try {
330
+ const request = JSON.parse(input);
331
+ if (request.type !== 'verify') {
332
+ throw new Error(`Unknown request type: ${request.type}`);
333
+ }
334
+ const report = await verify(request);
335
+ // Output result as JSON to stdout
336
+ process.stdout.write(JSON.stringify(report));
337
+ process.exit(0);
338
+ }
339
+ catch (error) {
340
+ // Output error as JSON
341
+ process.stdout.write(JSON.stringify({
342
+ error: true,
343
+ message: error instanceof Error ? error.message : 'Unknown error'
344
+ }));
345
+ process.exit(1);
346
+ }
347
+ }
348
+ main();
349
+ //# sourceMappingURL=isolatedVerifier.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"isolatedVerifier.js","sourceRoot":"","sources":["../../src/core/isolatedVerifier.ts"],"names":[],"mappings":";AACA;;;;;;;;;;;;;;GAcG;AAEH,OAAO,EAAE,cAAc,EAAE,MAAM,iCAAiC,CAAC;AACjE,OAAO,EAAE,IAAI,EAAE,MAAM,oBAAoB,CAAC;AAC1C,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAEtC,MAAM,SAAS,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;AA6DlC,+EAA+E;AAC/E,mBAAmB;AACnB,+EAA+E;AAE/E,MAAM,qBAAqB,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;8BA0BA,CAAC;AAE/B,KAAK,UAAU,aAAa,CAC1B,QAAgB,EAChB,UAAkB,EAClB,mBAA6B,EAC7B,QAA6C;IAE7C,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,qBAAqB;aACjC,OAAO,CAAC,YAAY,EAAE,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;aAC9C,OAAO,CAAC,WAAW,EAAE,mBAAmB,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;aACpE,OAAO,CAAC,eAAe,EAAE,UAAU,CAAC,CAAC;QAExC,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,MAAM,CAAC,CAAC;QACtC,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;QAC1C,IAAI,KAAK,EAAE,CAAC;YACV,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAY,CAAC;QACzC,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,eAAe;IACjB,CAAC;IAED,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,+EAA+E;AAC/E,kBAAkB;AAClB,+EAA+E;AAE/E,MAAM,qBAAqB,GAAG;;;;;;;;;;;;;;;;;;;;;;;8BAuBA,CAAC;AAE/B,KAAK,UAAU,aAAa,CAC1B,MAAe,EACf,UAAkB,EAClB,QAA6C;IAE7C,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEnC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,qBAAqB;aACjC,OAAO,CAAC,UAAU,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;aACxD,OAAO,CAAC,eAAe,EAAE,UAAU,CAAC;aACpC,OAAO,CAAC,YAAY,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;QAE3C,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,MAAM,CAAC,CAAC;QACtC,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;QAC1C,IAAI,KAAK,EAAE,CAAC;YACV,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAmB,CAAC;QAChD,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,8BAA8B;IAChC,CAAC;IAED,iCAAiC;IACjC,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,UAAU,IAAI,CAAC,CAAC,CAAC,QAAQ,KAAK,UAAU,IAAI,CAAC,CAAC,QAAQ,KAAK,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QAC3G,MAAM,IAAI,GAAiB;YACzB,EAAE,EAAE,QAAQ,CAAC,EAAE;YACf,WAAW,EAAE,CAAC,CAAC,SAAS;YACxB,aAAa,EAAE,EAAE;YACjB,gBAAgB,EAAE,CAAC,CAAC,SAAS;YAC7B,OAAO,EAAE,KAAK;SACf,CAAC;QAEF,2CAA2C;QAC3C,IAAI,CAAC,CAAC,QAAQ,KAAK,SAAS,IAAI,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;YAClD,IAAI,CAAC,aAAa,GAAG,CAAC,YAAY,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,wCAAwC,CAAC,CAAC;YAC7F,IAAI,CAAC,eAAe,GAAG,CAAC,QAAQ,CAAC,CAAC;QACpC,CAAC;aAAM,IAAI,CAAC,CAAC,QAAQ,KAAK,MAAM,EAAE,CAAC;YACjC,IAAI,CAAC,aAAa,GAAG,CAAC,8BAA8B,CAAC,CAAC;QACxD,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC;AACL,CAAC;AAED,+EAA+E;AAC/E,iBAAiB;AACjB,+EAA+E;AAE/E,KAAK,UAAU,eAAe,CAAC,GAAW,EAAE,GAAW;IACrD,0CAA0C;IAC1C,MAAM,SAAS,GAAG,CAAC,SAAS,EAAE,QAAQ,EAAE,OAAO,EAAE,YAAY,EAAE,YAAY,EAAE,eAAe,EAAE,gBAAgB,CAAC,CAAC;IAChH,KAAK,MAAM,CAAC,IAAI,SAAS,EAAE,CAAC;QAC1B,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC;YAAE,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,8BAA8B,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC;IACvF,CAAC;IAED,IAAI,CAAC;QACH,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,SAAS,CAAC,GAAG,EAAE,EAAE,GAAG,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;QACzE,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAC5C,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,gBAAgB,EAAE,CAAC;IAC/E,CAAC;AACH,CAAC;AAYD,KAAK,UAAU,OAAO,CACpB,IAAkB,EAClB,GAAW,EACX,QAA6C;IAE7C,MAAM,MAAM,GAAe;QACzB,IAAI;QACJ,OAAO,EAAE,KAAK;QACd,MAAM,EAAE,EAAE;QACV,MAAM,EAAE,EAAE;QACV,eAAe,EAAE,EAAE;QACnB,iBAAiB,EAAE,EAAE;KACtB,CAAC;IAEF,IAAI,CAAC;QACH,qBAAqB;QACrB,IAAI,IAAI,CAAC,aAAa,IAAI,IAAI,CAAC,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACxD,KAAK,MAAM,GAAG,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;gBACrC,MAAM,WAAW,GAAG,MAAM,eAAe,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;gBACpD,MAAM,CAAC,MAAM,IAAI,KAAK,GAAG,KAAK,WAAW,CAAC,GAAG,IAAI,CAAC;gBAClD,IAAI,CAAC,WAAW,CAAC,EAAE,EAAE,CAAC;oBACpB,MAAM,CAAC,MAAM,IAAI,GAAG,WAAW,CAAC,GAAG,IAAI,CAAC;gBAC1C,CAAC;YACH,CAAC;QACH,CAAC;QAED,iCAAiC;QACjC,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;YACzB,KAAK,MAAM,OAAO,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;gBAC3C,IAAI,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,IAAI,MAAM,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC;oBACpF,MAAM,CAAC,eAAe,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBACvC,CAAC;qBAAM,CAAC;oBACN,MAAM,CAAC,iBAAiB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBACzC,CAAC;YACH,CAAC;QACH,CAAC;QAED,6BAA6B;QAC7B,IAAI,IAAI,CAAC,gBAAgB,EAAE,CAAC;YAC1B,MAAM,YAAY,GAAG;;YAEf,IAAI,CAAC,gBAAgB;;;;EAI/B,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC;;;sFAGwD,CAAC;YAEjF,IAAI,CAAC;gBACH,MAAM,UAAU,GAAG,MAAM,QAAQ,CAAC,YAAY,CAAC,CAAC;gBAChD,MAAM,KAAK,GAAG,UAAU,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;gBAC9C,IAAI,KAAK,EAAE,CAAC;oBACV,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAgE,CAAC;oBACnG,MAAM,CAAC,aAAa,GAAG,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,KAAK,MAAM,CAAC,UAAU,MAAM,MAAM,CAAC,SAAS,EAAE,CAAC;oBACnG,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,UAAU,GAAG,EAAE,EAAE,CAAC;wBAC9C,MAAM,CAAC,iBAAiB,CAAC,IAAI,CAAC,aAAa,IAAI,CAAC,gBAAgB,EAAE,CAAC,CAAC;oBACtE,CAAC;yBAAM,CAAC;wBACN,MAAM,CAAC,eAAe,CAAC,IAAI,CAAC,aAAa,IAAI,CAAC,gBAAgB,EAAE,CAAC,CAAC;oBACpE,CAAC;gBACH,CAAC;YACH,CAAC;YAAC,MAAM,CAAC;gBACP,MAAM,CAAC,aAAa,GAAG,uBAAuB,CAAC;YACjD,CAAC;QACH,CAAC;QAED,oBAAoB;QACpB,MAAM,CAAC,OAAO,GAAG,MAAM,CAAC,iBAAiB,CAAC,MAAM,KAAK,CAAC;YACrC,CAAC,MAAM,CAAC,eAAe,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,eAAe,EAAE,MAAM,IAAI,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC,CAAC;IAEpH,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,CAAC,MAAM,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;IACvE,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,+EAA+E;AAC/E,oBAAoB;AACpB,+EAA+E;AAE/E,KAAK,UAAU,MAAM,CAAC,OAA4B;IAChD,MAAM,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAC3C,MAAM,EAAE,GAAG,UAAU,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC;IAElC,oDAAoD;IACpD,MAAM,QAAQ,GAAG,cAAc,CAAC;QAC9B,QAAQ,EAAE,OAAO,CAAC,QAAkF;QACpG,KAAK,EAAE,OAAO,CAAC,KAAK;KACrB,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAG,KAAK,EAAE,MAAc,EAAmB,EAAE;QACzD,MAAM,QAAQ,GAAG,MAAM,QAAQ,CAAC,QAAQ,CACtC,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,EACnC,EAAE,CAAC,oCAAoC;SACxC,CAAC;QAEF,IAAI,QAAQ,CAAC,IAAI,KAAK,SAAS,IAAI,QAAQ,CAAC,OAAO,EAAE,CAAC;YACpD,OAAO,QAAQ,CAAC,OAAO,CAAC;QAC1B,CAAC;QACD,OAAO,EAAE,CAAC;IACZ,CAAC,CAAC;IAEF,iBAAiB;IACjB,MAAM,MAAM,GAAG,MAAM,aAAa,CAChC,OAAO,CAAC,QAAQ,EAChB,OAAO,CAAC,gBAAgB,EACxB,OAAO,CAAC,mBAAmB,EAC3B,QAAQ,CACT,CAAC;IAEF,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO;YACL,UAAU,EAAE,EAAE;YACd,SAAS;YACT,MAAM,EAAE,EAAE;YACV,OAAO,EAAE,EAAE;YACX,OAAO,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE;YAC9D,cAAc,EAAE,YAAY;YAC5B,UAAU,EAAE,EAAE;SACf,CAAC;IACJ,CAAC;IAED,iBAAiB;IACjB,MAAM,KAAK,GAAG,MAAM,aAAa,CAAC,MAAM,EAAE,OAAO,CAAC,gBAAgB,EAAE,QAAQ,CAAC,CAAC;IAE9E,YAAY;IACZ,MAAM,WAAW,GAAiB,EAAE,CAAC;IACrC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,gBAAgB,EAAE,QAAQ,CAAC,CAAC;QACvE,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAC3B,CAAC;IAED,wBAAwB;IACxB,MAAM,OAAO,GAA8B,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,EAAE,EAAE;QACjE,MAAM,UAAU,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;QAClC,IAAI,CAAC,UAAU,EAAE,CAAC;YAChB,OAAO;gBACL,KAAK;gBACL,QAAQ,EAAE,KAAK;gBACf,UAAU,EAAE,KAAc;gBAC1B,QAAQ,EAAE,mBAAmB;gBAC7B,MAAM,EAAE,MAAM;gBACd,SAAS;aACV,CAAC;QACJ,CAAC;QAED,OAAO;YACL,KAAK;YACL,QAAQ,EAAE,UAAU,CAAC,OAAO;YAC5B,UAAU,EAAE,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,MAAe,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,eAAe,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAiB,CAAC,CAAC,CAAC,KAAc,CAAC;YAC/H,QAAQ,EAAE,UAAU,CAAC,OAAO;gBAC1B,CAAC,CAAC,aAAa,UAAU,CAAC,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE;gBACtD,CAAC,CAAC,WAAW,UAAU,CAAC,iBAAiB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE;YACxD,MAAM,EAAE,kBAAkB;YAC1B,SAAS,EAAE,UAAU,CAAC,aAAa;YACnC,YAAY,EAAE,CAAC,UAAU,CAAC,IAAI,CAAC,aAAa,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;YAC9D,SAAS,EAAE,UAAU,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC;YAC3C,KAAK,EAAE,UAAU,CAAC,MAAM,IAAI,SAAS;YACrC,SAAS;SACV,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,oBAAoB;IACpB,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC;IACxD,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,UAAU,KAAK,MAAM,CAAC,CAAC,MAAM,CAAC;IAClF,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,UAAU,KAAK,MAAM,CAAC,CAAC,MAAM,CAAC;IAExF,oBAAoB;IACpB,IAAI,cAAoD,CAAC;IACzD,IAAI,MAAM,GAAG,CAAC,EAAE,CAAC;QACf,cAAc,GAAG,cAAc,CAAC;IAClC,CAAC;SAAM,IAAI,QAAQ,KAAK,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC3D,cAAc,GAAG,UAAU,CAAC;IAC9B,CAAC;SAAM,IAAI,QAAQ,GAAG,CAAC,EAAE,CAAC;QACxB,cAAc,GAAG,oBAAoB,CAAC;IACxC,CAAC;SAAM,CAAC;QACN,cAAc,GAAG,YAAY,CAAC;IAChC,CAAC;IAED,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC;QAClC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC,GAAG,GAAG,CAAC;QAC9C,CAAC,CAAC,EAAE,CAAC;IAEP,OAAO;QACL,UAAU,EAAE,EAAE;QACd,SAAS;QACT,MAAM;QACN,OAAO;QACP,OAAO,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,YAAY,EAAE;QACjE,cAAc;QACd,UAAU;KACX,CAAC;AACJ,CAAC;AAED,+EAA+E;AAC/E,sBAAsB;AACtB,+EAA+E;AAE/E,KAAK,UAAU,IAAI;IACjB,0BAA0B;IAC1B,IAAI,KAAK,GAAG,EAAE,CAAC;IAEf,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;IAElC,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;QACxC,KAAK,IAAI,KAAK,CAAC;IACjB,CAAC;IAED,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAwB,CAAC;QAEzD,IAAI,OAAO,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC9B,MAAM,IAAI,KAAK,CAAC,yBAAyB,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;QAC3D,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,CAAC;QAErC,kCAAkC;QAClC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC;QAC7C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,uBAAuB;QACvB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC;YAClC,KAAK,EAAE,IAAI;YACX,OAAO,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe;SAClE,CAAC,CAAC,CAAC;QACJ,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC;AAED,IAAI,EAAE,CAAC"}
@@ -30,6 +30,8 @@ export interface IsolatedRuntimeResult {
30
30
  export interface VerificationContext {
31
31
  workingDirectory: string;
32
32
  conversationHistory?: string[];
33
+ provider: string;
34
+ model: string;
33
35
  llmVerifier?: (prompt: string) => Promise<string>;
34
36
  }
35
37
  export interface Claim {
@@ -71,8 +73,15 @@ export interface VerificationReport {
71
73
  */
72
74
  export declare function runIsolatedTest(test: IsolatedRuntimeTest, cwd: string, llmVerifier?: (prompt: string) => Promise<string>): Promise<IsolatedRuntimeResult>;
73
75
  /**
74
- * Verify an assistant response using isolated runtime tests.
75
- * This is the main entry point for verification.
76
+ * Verify an assistant response using a completely isolated process.
77
+ *
78
+ * This spawns a separate Node.js process to run all verification:
79
+ * - Separate memory space from main CLI
80
+ * - Separate event loop
81
+ * - Independent error handling
82
+ * - No shared state
83
+ *
84
+ * This ensures verification cannot interfere with the main process and vice versa.
76
85
  */
77
86
  export declare function verifyResponse(response: string, ctx: VerificationContext, responseId?: string): Promise<VerificationReport>;
78
87
  /**
@@ -1 +1 @@
1
- {"version":3,"file":"responseVerifier.d.ts","sourceRoot":"","sources":["../../src/core/responseVerifier.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAaH,MAAM,WAAW,mBAAmB;IAClC,EAAE,EAAE,MAAM,CAAC;IACX,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB;AAED,MAAM,WAAW,qBAAqB;IACpC,IAAI,EAAE,mBAAmB,CAAC;IAC1B,OAAO,EAAE,OAAO,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,QAAQ,EAAE,MAAM,CAAC;IACjB,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,iBAAiB,EAAE,MAAM,EAAE,CAAC;IAC5B,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,WAAW,mBAAmB;IAClC,gBAAgB,EAAE,MAAM,CAAC;IACzB,mBAAmB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC/B,WAAW,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;CACnD;AAED,MAAM,WAAW,KAAK;IACpB,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,OAAO,CAAC;IACpB,QAAQ,EAAE,UAAU,GAAG,MAAM,GAAG,QAAQ,GAAG,KAAK,CAAC;IACjD,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAClC;AAED,MAAM,WAAW,uBAAuB;IACtC,KAAK,EAAE,KAAK,CAAC;IACb,QAAQ,EAAE,OAAO,CAAC;IAClB,UAAU,EAAE,MAAM,GAAG,QAAQ,GAAG,KAAK,CAAC;IACtC,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,kBAAkB;IACjC,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,OAAO,EAAE,uBAAuB,EAAE,CAAC;IACnC,OAAO,EAAE;QACP,KAAK,EAAE,MAAM,CAAC;QACd,QAAQ,EAAE,MAAM,CAAC;QACjB,MAAM,EAAE,MAAM,CAAC;QACf,YAAY,EAAE,MAAM,CAAC;KACtB,CAAC;IACF,cAAc,EAAE,UAAU,GAAG,oBAAoB,GAAG,cAAc,GAAG,YAAY,CAAC;IAClF,UAAU,EAAE,MAAM,CAAC;CACpB;AAgHD;;GAEG;AACH,wBAAsB,eAAe,CACnC,IAAI,EAAE,mBAAmB,EACzB,GAAG,EAAE,MAAM,EACX,WAAW,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAChD,OAAO,CAAC,qBAAqB,CAAC,CAwGhC;AA4ID;;;GAGG;AACH,wBAAsB,cAAc,CAClC,QAAQ,EAAE,MAAM,EAChB,GAAG,EAAE,mBAAmB,EACxB,UAAU,CAAC,EAAE,MAAM,GAClB,OAAO,CAAC,kBAAkB,CAAC,CA0F7B;AAED;;GAEG;AACH,wBAAgB,wBAAwB,CAAC,MAAM,EAAE,kBAAkB,GAAG,MAAM,CA+B3E;AAED;;GAEG;AACH,wBAAsB,WAAW,CAC/B,QAAQ,EAAE,MAAM,EAChB,GAAG,EAAE,mBAAmB,GACvB,OAAO,CAAC;IAAE,UAAU,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC,CAoBlD"}
1
+ {"version":3,"file":"responseVerifier.d.ts","sourceRoot":"","sources":["../../src/core/responseVerifier.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAaH,MAAM,WAAW,mBAAmB;IAClC,EAAE,EAAE,MAAM,CAAC;IACX,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB;AAED,MAAM,WAAW,qBAAqB;IACpC,IAAI,EAAE,mBAAmB,CAAC;IAC1B,OAAO,EAAE,OAAO,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,QAAQ,EAAE,MAAM,CAAC;IACjB,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,iBAAiB,EAAE,MAAM,EAAE,CAAC;IAC5B,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,WAAW,mBAAmB;IAClC,gBAAgB,EAAE,MAAM,CAAC;IACzB,mBAAmB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC/B,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IAEd,WAAW,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;CACnD;AAED,MAAM,WAAW,KAAK;IACpB,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,OAAO,CAAC;IACpB,QAAQ,EAAE,UAAU,GAAG,MAAM,GAAG,QAAQ,GAAG,KAAK,CAAC;IACjD,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAClC;AAED,MAAM,WAAW,uBAAuB;IACtC,KAAK,EAAE,KAAK,CAAC;IACb,QAAQ,EAAE,OAAO,CAAC;IAClB,UAAU,EAAE,MAAM,GAAG,QAAQ,GAAG,KAAK,CAAC;IACtC,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,kBAAkB;IACjC,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,OAAO,EAAE,uBAAuB,EAAE,CAAC;IACnC,OAAO,EAAE;QACP,KAAK,EAAE,MAAM,CAAC;QACd,QAAQ,EAAE,MAAM,CAAC;QACjB,MAAM,EAAE,MAAM,CAAC;QACf,YAAY,EAAE,MAAM,CAAC;KACtB,CAAC;IACF,cAAc,EAAE,UAAU,GAAG,oBAAoB,GAAG,cAAc,GAAG,YAAY,CAAC;IAClF,UAAU,EAAE,MAAM,CAAC;CACpB;AAgHD;;GAEG;AACH,wBAAsB,eAAe,CACnC,IAAI,EAAE,mBAAmB,EACzB,GAAG,EAAE,MAAM,EACX,WAAW,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAChD,OAAO,CAAC,qBAAqB,CAAC,CAwGhC;AA4ID;;;;;;;;;;GAUG;AACH,wBAAsB,cAAc,CAClC,QAAQ,EAAE,MAAM,EAChB,GAAG,EAAE,mBAAmB,EACxB,UAAU,CAAC,EAAE,MAAM,GAClB,OAAO,CAAC,kBAAkB,CAAC,CAwH7B;AAED;;GAEG;AACH,wBAAgB,wBAAwB,CAAC,MAAM,EAAE,kBAAkB,GAAG,MAAM,CA+B3E;AAED;;GAEG;AACH,wBAAsB,WAAW,CAC/B,QAAQ,EAAE,MAAM,EAChB,GAAG,EAAE,mBAAmB,GACvB,OAAO,CAAC;IAAE,UAAU,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC,CAoBlD"}
@@ -68,7 +68,7 @@ async function spawnIsolatedCLI(cwd, timeout = 60000) {
68
68
  */
69
69
  async function sendCommand(cli, command, waitMs = 5000) {
70
70
  const outputBefore = cli.output.length;
71
- cli.stdin.write(command + '\n');
71
+ cli.stdin.write(`${command}\n`);
72
72
  await new Promise(resolve => {
73
73
  let lastLength = cli.output.length;
74
74
  const checkInterval = setInterval(() => {
@@ -135,7 +135,7 @@ export async function runIsolatedTest(test, cwd, llmVerifier) {
135
135
  const shellResult = await runShellVerification(cmd, cwd);
136
136
  result.output += `$ ${cmd}\n${shellResult.out}\n`;
137
137
  if (!shellResult.ok) {
138
- result.errors += shellResult.out + '\n';
138
+ result.errors += `${shellResult.out}\n`;
139
139
  }
140
140
  }
141
141
  }
@@ -333,15 +333,26 @@ async function generateTests(claims, ctx) {
333
333
  // MAIN VERIFICATION API
334
334
  // ============================================================================
335
335
  /**
336
- * Verify an assistant response using isolated runtime tests.
337
- * This is the main entry point for verification.
336
+ * Verify an assistant response using a completely isolated process.
337
+ *
338
+ * This spawns a separate Node.js process to run all verification:
339
+ * - Separate memory space from main CLI
340
+ * - Separate event loop
341
+ * - Independent error handling
342
+ * - No shared state
343
+ *
344
+ * This ensures verification cannot interfere with the main process and vice versa.
338
345
  */
339
346
  export async function verifyResponse(response, ctx, responseId) {
340
347
  const timestamp = new Date().toISOString();
341
348
  const id = responseId || `verify-${Date.now()}`;
342
- // Extract claims from response
343
- const claims = await extractClaims(response, ctx);
344
- if (claims.length === 0) {
349
+ // Find the isolated verifier script
350
+ const verifierPath = path.join(ctx.workingDirectory, 'dist/core/isolatedVerifier.js');
351
+ try {
352
+ await fs.access(verifierPath);
353
+ }
354
+ catch {
355
+ // Fallback: return unverified if script not found
345
356
  return {
346
357
  responseId: id,
347
358
  timestamp,
@@ -352,73 +363,97 @@ export async function verifyResponse(response, ctx, responseId) {
352
363
  trustScore: 50
353
364
  };
354
365
  }
355
- // Generate isolated tests for claims
356
- const tests = await generateTests(claims, ctx);
357
- // Run all isolated tests
358
- const testResults = [];
359
- for (const test of tests) {
360
- const result = await runIsolatedTest(test, ctx.workingDirectory, ctx.llmVerifier);
361
- testResults.push(result);
362
- }
363
- // Map test results back to claims
364
- const results = claims.map((claim, i) => {
365
- const testResult = testResults[i];
366
- if (!testResult) {
367
- return {
368
- claim,
369
- verified: false,
370
- confidence: 'low',
371
- evidence: 'No test generated',
372
- method: 'skip',
373
- timestamp
374
- };
375
- }
376
- return {
377
- claim,
378
- verified: testResult.success,
379
- confidence: testResult.success ? 'high' : (testResult.matchedPatterns.length > 0 ? 'medium' : 'low'),
380
- evidence: testResult.success
381
- ? `Verified in isolated runtime: ${testResult.matchedPatterns.join(', ')}`
382
- : `Failed: ${testResult.unmatchedPatterns.join(', ')}`,
383
- method: 'isolated-runtime',
384
- reasoning: testResult.llmAssessment,
385
- executedCode: [...(testResult.test.shellCommands || []), ...(testResult.test.commands || [])].join('\n'),
386
- rawOutput: testResult.output.slice(0, 2000),
387
- error: testResult.errors || undefined,
388
- timestamp
389
- };
390
- });
391
- // Calculate summary
392
- const verified = results.filter(r => r.verified).length;
393
- const failed = results.filter(r => !r.verified && r.confidence === 'high').length;
394
- const inconclusive = results.filter(r => !r.verified && r.confidence !== 'high').length;
395
- // Determine verdict
396
- let overallVerdict;
397
- if (failed > 0) {
398
- overallVerdict = 'contradicted';
399
- }
400
- else if (verified === claims.length && claims.length > 0) {
401
- overallVerdict = 'verified';
402
- }
403
- else if (verified > 0) {
404
- overallVerdict = 'partially_verified';
405
- }
406
- else {
407
- overallVerdict = 'unverified';
408
- }
409
- // Calculate trust score
410
- const trustScore = claims.length > 0
411
- ? Math.round((verified / claims.length) * 100)
412
- : 50;
413
- return {
414
- responseId: id,
415
- timestamp,
416
- claims,
417
- results,
418
- summary: { total: claims.length, verified, failed, inconclusive },
419
- overallVerdict,
420
- trustScore
366
+ // Build request for isolated process
367
+ const request = {
368
+ type: 'verify',
369
+ response,
370
+ workingDirectory: ctx.workingDirectory,
371
+ conversationHistory: ctx.conversationHistory || [],
372
+ provider: ctx.provider,
373
+ model: ctx.model,
421
374
  };
375
+ return new Promise((resolve) => {
376
+ // Spawn completely isolated Node.js process
377
+ const child = spawn('node', [verifierPath], {
378
+ cwd: ctx.workingDirectory,
379
+ env: {
380
+ ...process.env,
381
+ EROSOLAR_ISOLATED_VERIFIER: '1',
382
+ NODE_OPTIONS: '--max-old-space-size=512', // Limit memory for safety
383
+ },
384
+ stdio: ['pipe', 'pipe', 'pipe'],
385
+ });
386
+ let stdout = '';
387
+ let stderr = '';
388
+ child.stdout.on('data', (data) => {
389
+ stdout += data.toString();
390
+ });
391
+ child.stderr.on('data', (data) => {
392
+ stderr += data.toString();
393
+ });
394
+ // Set timeout for verification (2 minutes max)
395
+ const timeout = setTimeout(() => {
396
+ child.kill('SIGTERM');
397
+ resolve({
398
+ responseId: id,
399
+ timestamp,
400
+ claims: [],
401
+ results: [],
402
+ summary: { total: 0, verified: 0, failed: 0, inconclusive: 0 },
403
+ overallVerdict: 'unverified',
404
+ trustScore: 50
405
+ });
406
+ }, 120000);
407
+ child.on('close', (_code) => {
408
+ clearTimeout(timeout);
409
+ try {
410
+ // Parse result from isolated process
411
+ const result = JSON.parse(stdout);
412
+ if (result.error) {
413
+ // Process returned error
414
+ resolve({
415
+ responseId: id,
416
+ timestamp,
417
+ claims: [],
418
+ results: [],
419
+ summary: { total: 0, verified: 0, failed: 0, inconclusive: 0 },
420
+ overallVerdict: 'unverified',
421
+ trustScore: 50
422
+ });
423
+ }
424
+ else {
425
+ resolve(result);
426
+ }
427
+ }
428
+ catch {
429
+ // JSON parse failed
430
+ resolve({
431
+ responseId: id,
432
+ timestamp,
433
+ claims: [],
434
+ results: [],
435
+ summary: { total: 0, verified: 0, failed: 0, inconclusive: 0 },
436
+ overallVerdict: 'unverified',
437
+ trustScore: 50
438
+ });
439
+ }
440
+ });
441
+ child.on('error', () => {
442
+ clearTimeout(timeout);
443
+ resolve({
444
+ responseId: id,
445
+ timestamp,
446
+ claims: [],
447
+ results: [],
448
+ summary: { total: 0, verified: 0, failed: 0, inconclusive: 0 },
449
+ overallVerdict: 'unverified',
450
+ trustScore: 50
451
+ });
452
+ });
453
+ // Send request to isolated process via stdin
454
+ child.stdin.write(JSON.stringify(request));
455
+ child.stdin.end();
456
+ });
422
457
  }
423
458
  /**
424
459
  * Format verification report for display