erosolar-cli 1.7.91 → 1.7.94

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,601 +1,129 @@
1
- #!/usr/bin/env node
2
1
  /**
3
- * Isolated Verification Runner
2
+ * Isolated Verification System
4
3
  *
5
- * This script runs in a completely separate Node.js process to verify
6
- * assistant responses. It receives verification requests via stdin and
7
- * outputs results via stdout.
8
- *
9
- * This ensures verification is completely isolated from the main CLI process:
10
- * - Separate memory space
11
- * - Separate event loop
12
- * - Independent error handling
13
- * - No shared state with main process
14
- *
15
- * @license MIT
4
+ * Provides runtime isolation for verification tasks by running them in separate processes.
5
+ * This ensures that verification failures don't affect the main CLI process.
16
6
  */
17
- import { createProvider } from '../providers/providerFactory.js';
18
- import { exec } from 'node:child_process';
19
- import { promisify } from 'node:util';
20
- import { runVerificationTests } from './cliTestHarness.js';
21
- const execAsync = promisify(exec);
22
- // ============================================================================
23
- // CLAIM EXTRACTION
24
- // ============================================================================
25
- const EXTRACT_CLAIMS_PROMPT = `Extract ALL verifiable claims from this AI assistant response.
26
-
27
- RESPONSE:
28
- ---
29
- {RESPONSE}
30
- ---
31
-
32
- CONTEXT: {CONTEXT}
33
- WORKING_DIR: {WORKING_DIR}
34
-
35
- For each claim, determine:
36
- 1. What specific assertion is being made
37
- 2. Category: file_op (created/modified/deleted files), code (compiles/tests pass), command (executed successfully), state (something changed), behavior (feature works), fact (verifiable truth)
38
- 3. How it can be verified (shell command, file check, etc.)
39
- 4. Priority: critical (must verify), high (should verify), medium (nice to verify), low (optional)
40
-
41
- Return JSON array:
42
- [{
43
- "id": "c1",
44
- "statement": "the specific claim",
45
- "category": "file_op|code|command|state|behavior|fact",
46
- "verifiable": true,
47
- "priority": "critical|high|medium|low",
48
- "context": {"path": "/path/if/relevant", "command": "if relevant"}
49
- }]
50
-
51
- Output ONLY valid JSON array.`;
52
- async function extractClaims(response, workingDir, conversationHistory, llmQuery) {
53
- // Try LLM-based extraction first
54
- try {
55
- const prompt = EXTRACT_CLAIMS_PROMPT
56
- .replace('{RESPONSE}', response.slice(0, 8000))
57
- .replace('{CONTEXT}', conversationHistory.slice(-3).join('\n') || '')
58
- .replace('{WORKING_DIR}', workingDir);
59
- const result = await llmQuery(prompt);
60
- const match = result.match(/\[[\s\S]*\]/);
61
- if (match) {
62
- const claims = JSON.parse(match[0]);
63
- if (claims.length > 0) {
64
- return claims;
65
- }
66
- }
67
- }
68
- catch (err) {
69
- // LLM extraction failed, fall through to pattern-based
70
- process.stderr.write(`LLM claim extraction failed: ${err instanceof Error ? err.message : 'unknown'}\n`);
71
- }
72
- // Fallback: Pattern-based claim extraction
73
- return extractClaimsWithPatterns(response, workingDir);
74
- }
75
- /**
76
- * Fallback pattern-based claim extraction when LLM is unavailable
77
- */
78
- function extractClaimsWithPatterns(response, workingDir) {
79
- const claims = [];
80
- let claimId = 1;
81
- // Pattern 1: File creation/modification claims
82
- // "created src/foo.ts", "wrote to file.js", "Updated package.json"
83
- const filePatterns = [
84
- /(?:created|wrote|updated|modified|edited|added)\s+(?:file\s+)?[`"']?([\/\w\-\.]+\.\w+)[`"']?/gi,
85
- /(?:src|dist|lib|test)\/[\w\-\/]+\.\w+/g,
86
- ];
87
- for (const pattern of filePatterns) {
88
- const matches = response.matchAll(pattern);
89
- for (const match of matches) {
90
- const filePath = match[1] || match[0];
91
- if (filePath && !claims.some(c => c.context['path'] === filePath)) {
92
- claims.push({
93
- id: `c${claimId++}`,
94
- statement: `File exists: ${filePath}`,
95
- category: 'file_op',
96
- verifiable: true,
97
- priority: 'high',
98
- context: { path: filePath.startsWith('/') ? filePath : `${workingDir}/${filePath}` }
99
- });
100
- }
101
- }
102
- }
103
- // Pattern 2: Test success claims
104
- // "all tests pass", "12/12 tests successful", "tests passing"
105
- const testPatterns = [
106
- /(\d+)\/\1\s+tests?\s+(?:pass|success)/i,
107
- /all\s+tests?\s+pass/i,
108
- /tests?\s+(?:are\s+)?passing/i,
109
- /✅.*tests?\s+pass/i,
110
- ];
111
- for (const pattern of testPatterns) {
112
- if (pattern.test(response)) {
113
- claims.push({
114
- id: `c${claimId++}`,
115
- statement: 'All tests are passing',
116
- category: 'code',
117
- verifiable: true,
118
- priority: 'critical',
119
- context: { command: 'npm test' }
120
- });
121
- break;
122
- }
123
- }
124
- // Pattern 3: Build success claims
125
- // "build succeeded", "compiled successfully"
126
- const buildPatterns = [
127
- /build\s+succeed/i,
128
- /compil(?:ed|ation)\s+success/i,
129
- /✅.*build/i,
130
- ];
131
- for (const pattern of buildPatterns) {
132
- if (pattern.test(response)) {
133
- claims.push({
134
- id: `c${claimId++}`,
135
- statement: 'Build succeeds',
136
- category: 'code',
137
- verifiable: true,
138
- priority: 'critical',
139
- context: { command: 'npm run build' }
140
- });
141
- break;
142
- }
143
- }
144
- // Pattern 4: Implementation complete claims
145
- // "implemented", "created", "added feature"
146
- const implementPatterns = [
147
- /(?:successfully\s+)?implement(?:ed|ation)/i,
148
- /feature\s+(?:is\s+)?(?:now\s+)?(?:complete|ready|working)/i,
149
- /integration\s+ready/i,
150
- /refactor\s+(?:successfully\s+)?completed?/i,
151
- /delivered/i,
152
- ];
153
- for (const pattern of implementPatterns) {
154
- if (pattern.test(response)) {
155
- claims.push({
156
- id: `c${claimId++}`,
157
- statement: 'Implementation is complete and working',
158
- category: 'behavior',
159
- verifiable: true,
160
- priority: 'high',
161
- context: {}
162
- });
163
- break;
164
- }
165
- }
166
- // Pattern 5: Paste/input handling claims
167
- // "paste", "multi-line input", "input handling"
168
- const pastePatterns = [
169
- /(?:paste|pasted|pasting).*(?:work|handle|process)/i,
170
- /multi[\s-]?line\s+(?:paste|input)/i,
171
- /(?:block|chunk)\s+description/i,
172
- /input\s+handling/i,
173
- /graceful(?:ly)?\s+(?:handle|sent|submit)/i,
174
- ];
175
- for (const pattern of pastePatterns) {
176
- if (pattern.test(response)) {
177
- claims.push({
178
- id: `c${claimId++}`,
179
- statement: 'Paste handling feature works correctly',
180
- category: 'cli_behavior',
181
- verifiable: true,
182
- priority: 'high',
183
- context: {
184
- testType: 'cli_interactive',
185
- feature: 'paste_handling'
186
- }
7
+ export class IsolatedVerifier {
8
+ /**
9
+ * Verify task completion in an isolated process
10
+ */
11
+ async verifyTaskCompletion(taskName, config) {
12
+ const startTime = Date.now();
13
+ try {
14
+ const scriptPath = await this.createVerificationScript(config);
15
+ const { exec } = await import('node:child_process');
16
+ const { promisify } = await import('node:util');
17
+ const execAsync = promisify(exec);
18
+ const result = await execAsync(`node "${scriptPath}"`, {
19
+ timeout: 30000
187
20
  });
188
- break;
21
+ const duration = Date.now() - startTime;
22
+ return {
23
+ success: true,
24
+ report: this.generateReport(taskName, config, result.stdout, result.stderr, duration),
25
+ duration
26
+ };
189
27
  }
190
- }
191
- // Pattern 6: Specific file path mentions with context suggesting they exist
192
- const specificFilePattern = /[`"']?((?:src|dist|lib|test|scripts)\/[\w\-\/\.]+)[`"']?/g;
193
- const fileMatches = response.matchAll(specificFilePattern);
194
- for (const match of fileMatches) {
195
- const filePath = match[1];
196
- if (filePath && !claims.some(c => c.context['path']?.toString().includes(filePath || ''))) {
197
- claims.push({
198
- id: `c${claimId++}`,
199
- statement: `Referenced file exists: ${filePath}`,
200
- category: 'file_op',
201
- verifiable: true,
202
- priority: 'medium',
203
- context: { path: `${workingDir}/${filePath}` }
204
- });
28
+ catch (error) {
29
+ const duration = Date.now() - startTime;
30
+ return {
31
+ success: false,
32
+ report: this.generateErrorReport(taskName, config, error, duration),
33
+ error: error.message,
34
+ duration
35
+ };
205
36
  }
206
37
  }
207
- return claims.slice(0, 10); // Limit to 10 claims
208
- }
209
- // ============================================================================
210
- // TEST GENERATION
211
- // ============================================================================
212
- const GENERATE_TESTS_PROMPT = `Generate isolated runtime tests for these claims.
213
-
214
- CLAIMS:
215
- {CLAIMS}
38
+ /**
39
+ * Create a verification script that runs in isolation
40
+ */
41
+ async createVerificationScript(config) {
42
+ const fs = await import('node:fs');
43
+ const os = await import('node:os');
44
+ const path = await import('node:path');
45
+ const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'isolated-verification-'));
46
+ const scriptPath = path.join(tempDir, 'verification.js');
47
+ const scriptContent = this.generateVerificationScript(config);
48
+ fs.writeFileSync(scriptPath, scriptContent, 'utf8');
49
+ return scriptPath;
50
+ }
51
+ /**
52
+ * Generate the verification script content
53
+ */
54
+ generateVerificationScript(config) {
55
+ const fileChecks = config.expectedFiles.map(file => ` if (!require('fs').existsSync("${file}")) {
56
+ console.error('MISSING FILE: ${file}');
57
+ process.exit(1);
58
+ }`).join('\n');
59
+ const commandExecutions = config.verificationCommands.map(cmd => ` try {
60
+ const { execSync } = require('child_process');
61
+ execSync("${cmd}", { stdio: 'inherit' });
62
+ } catch (error) {
63
+ console.error('COMMAND FAILED: ${cmd}');
64
+ console.error(error.message);
65
+ process.exit(1);
66
+ }`).join('\n');
67
+ return `#!/usr/bin/env node
68
+ /**
69
+ * Isolated Verification Script
70
+ * Generated by Erosolar CLI
71
+ */
216
72
 
217
- WORKING_DIR: {WORKING_DIR}
218
- PLATFORM: {PLATFORM}
73
+ console.log('ISOLATED VERIFICATION REPORT');
74
+ console.log('=============================');
75
+ console.log('Task: ${config.taskDescription}');
76
+ console.log('');
219
77
 
220
- For each claim, generate a test that verifies it using:
221
- - Shell commands (for file checks, git status, etc.)
222
- - Expected output patterns
78
+ // File existence checks
79
+ ${fileChecks}
223
80
 
224
- Return JSON array:
225
- [{
226
- "id": "test-1",
227
- "description": "what we're testing",
228
- "shellCommands": ["ls -la path", "cat file"],
229
- "expectedOutputs": ["pattern1", "pattern2"],
230
- "expectedBehavior": "description for LLM assessment",
231
- "timeout": 30000
232
- }]
81
+ // Command executions
82
+ ${commandExecutions}
233
83
 
234
- Use READ-ONLY commands only. No destructive operations.
235
- Output ONLY valid JSON array.`;
236
- async function generateTests(claims, workingDir, llmQuery) {
237
- if (claims.length === 0)
238
- return [];
239
- try {
240
- const prompt = GENERATE_TESTS_PROMPT
241
- .replace('{CLAIMS}', JSON.stringify(claims.slice(0, 10)))
242
- .replace('{WORKING_DIR}', workingDir)
243
- .replace('{PLATFORM}', process.platform);
244
- const result = await llmQuery(prompt);
245
- const match = result.match(/\[[\s\S]*\]/);
246
- if (match) {
247
- return JSON.parse(match[0]);
248
- }
249
- }
250
- catch {
251
- // Fall through to basic tests
252
- }
253
- // Fallback: generate basic tests for all verifiable claims
254
- return generateBasicTests(claims, workingDir);
255
- }
256
- /**
257
- * Generate basic tests without LLM assistance
258
- */
259
- function generateBasicTests(claims, workingDir) {
260
- const tests = [];
261
- for (const claim of claims.filter(c => c.verifiable)) {
262
- const test = {
263
- id: `test-${tests.length + 1}`,
264
- description: claim.statement,
265
- shellCommands: [],
266
- expectedOutputs: [],
267
- timeout: 30000
268
- };
269
- switch (claim.category) {
270
- case 'file_op': {
271
- const filePath = claim.context['path'];
272
- if (filePath) {
273
- // Check if file exists
274
- test.shellCommands = [`test -f "${filePath}" && echo "FILE_EXISTS" || echo "FILE_NOT_FOUND"`];
275
- test.expectedOutputs = ['FILE_EXISTS'];
276
- }
277
- break;
278
- }
279
- case 'code': {
280
- const command = claim.context['command'];
281
- if (command === 'npm test') {
282
- test.shellCommands = [`cd "${workingDir}" && npm test 2>&1 | tail -20`];
283
- test.expectedOutputs = ['pass', 'passing', '0 fail'];
284
- }
285
- else if (command === 'npm run build') {
286
- test.shellCommands = [`cd "${workingDir}" && npm run build 2>&1`];
287
- test.expectedOutputs = ['postbuild', 'success']; // tsc typically outputs nothing on success
288
- }
289
- else {
290
- test.shellCommands = [`cd "${workingDir}" && npm run build 2>&1 | tail -10`];
291
- }
292
- break;
293
- }
294
- case 'behavior': {
295
- // For behavior claims, check that build passes as a proxy
296
- test.shellCommands = [`cd "${workingDir}" && npm run build 2>&1 | tail -5`];
297
- test.expectedOutputs = ['postbuild'];
298
- break;
299
- }
300
- case 'cli_behavior': {
301
- // For CLI behavior claims, run relevant tests based on feature
302
- const feature = claim.context['feature'];
303
- if (feature === 'paste_handling') {
304
- // Run paste-specific tests via npm test with filter
305
- test.shellCommands = [
306
- // First run any paste/input related tests
307
- `cd "${workingDir}" && npm test -- --testPathPattern="(paste|input|multiLine)" --passWithNoTests 2>&1 | tail -30`,
308
- // Also run the RobustInputProcessor tests specifically
309
- `cd "${workingDir}" && npm test -- --testPathPattern="robustInputProcessor" --passWithNoTests 2>&1 | tail -20`,
310
- ];
311
- test.expectedOutputs = ['pass', 'passing'];
312
- test.timeout = 60000;
313
- test.expectedBehavior = 'Paste handling tests pass and verify multi-line input is processed correctly';
314
- }
315
- else {
316
- // Generic CLI behavior - run build
317
- test.shellCommands = [`cd "${workingDir}" && npm run build 2>&1 | tail -5`];
318
- test.expectedOutputs = ['postbuild'];
319
- }
320
- break;
321
- }
322
- default: {
323
- // Generic check - just verify the project builds
324
- test.shellCommands = [`cd "${workingDir}" && npm run build 2>&1 | tail -5`];
325
- }
326
- }
327
- if (test.shellCommands && test.shellCommands.length > 0) {
328
- tests.push(test);
329
- }
330
- }
331
- return tests.slice(0, 5); // Limit to 5 tests for performance
332
- }
333
- // ============================================================================
334
- // TEST EXECUTION
335
- // ============================================================================
336
- /**
337
- * Run PTY-based CLI verification for behavior claims
338
- * @internal - Reserved for future use when PTY-based verification is needed
339
- */
340
- async function _runCLIBehaviorTest(claim, cwd) {
341
- const feature = claim.context['feature'] || 'generic';
342
- try {
343
- const verification = await runVerificationTests(feature, cwd);
344
- // Convert PTY test results to our TestResult format
345
- const test = {
346
- id: `cli-behavior-${feature}`,
347
- description: claim.statement,
348
- timeout: 60000,
349
- };
350
- const result = {
351
- test,
352
- success: verification.passed,
353
- output: verification.results.map(r => r.output).join('\n---\n'),
354
- errors: verification.results.flatMap(r => r.errors).join('\n'),
355
- matchedPatterns: verification.passed ? [verification.summary] : [],
356
- unmatchedPatterns: verification.passed ? [] : [verification.summary],
357
- llmAssessment: `PTY Test: ${verification.summary}`,
358
- };
359
- return result;
360
- }
361
- catch (err) {
362
- const test = {
363
- id: `cli-behavior-${feature}`,
364
- description: claim.statement,
365
- };
366
- return {
367
- test,
368
- success: false,
369
- output: '',
370
- errors: err instanceof Error ? err.message : String(err),
371
- matchedPatterns: [],
372
- unmatchedPatterns: ['PTY test failed to run'],
373
- };
374
- }
375
- }
376
- async function runShellCommand(cmd, cwd) {
377
- // Safety check - block dangerous commands
378
- const dangerous = [/\brm\s/i, /rmdir/i, /sudo/i, /chmod\s*7/i, /eval\s*\(/i, /DROP\s+TABLE/i, /DELETE\s+FROM/i];
379
- for (const p of dangerous) {
380
- if (p.test(cmd))
381
- return { ok: false, out: `Blocked dangerous command: ${p.source}` };
382
- }
383
- try {
384
- const { stdout, stderr } = await execAsync(cmd, { cwd, timeout: 30000 });
385
- return { ok: true, out: stdout + stderr };
386
- }
387
- catch (e) {
388
- return { ok: false, out: e instanceof Error ? e.message : 'Command failed' };
389
- }
390
- }
391
- async function runTest(test, cwd, llmQuery) {
392
- const result = {
393
- test,
394
- success: false,
395
- output: '',
396
- errors: '',
397
- matchedPatterns: [],
398
- unmatchedPatterns: []
399
- };
400
- try {
401
- // Run shell commands
402
- if (test.shellCommands && test.shellCommands.length > 0) {
403
- for (const cmd of test.shellCommands) {
404
- const shellResult = await runShellCommand(cmd, cwd);
405
- result.output += `$ ${cmd}\n${shellResult.out}\n`;
406
- if (!shellResult.ok) {
407
- result.errors += `${shellResult.out}\n`;
408
- }
409
- }
410
- }
411
- // Check expected output patterns
412
- if (test.expectedOutputs) {
413
- for (const pattern of test.expectedOutputs) {
414
- if (result.output.includes(pattern) || new RegExp(pattern, 'i').test(result.output)) {
415
- result.matchedPatterns.push(pattern);
416
- }
417
- else {
418
- result.unmatchedPatterns.push(pattern);
419
- }
420
- }
421
- }
422
- // LLM assessment of behavior
423
- if (test.expectedBehavior) {
424
- const assessPrompt = `Assess if this output demonstrates the expected behavior.
84
+ console.log('');
85
+ console.log('VERIFICATION COMPLETED SUCCESSFULLY');
86
+ `;
87
+ }
88
+ /**
89
+ * Generate a success report
90
+ */
91
+ generateReport(taskName, config, stdout, stderr, duration) {
92
+ return `ISOLATED VERIFICATION REPORT
93
+ =============================
94
+ Task: ${taskName}
95
+ Description: ${config.taskDescription}
96
+ Duration: ${duration}ms
97
+ Status: SUCCESS
425
98
 
426
- EXPECTED: ${test.expectedBehavior}
99
+ VERIFICATION STEPS:
100
+ ${config.verificationCommands.map((cmd, i) => ` ${i + 1}. ${cmd}`).join('\n')}
427
101
 
428
102
  OUTPUT:
429
- ---
430
- ${result.output.slice(0, 4000)}
431
- ---
103
+ ${stdout}
104
+ ${stderr ? `ERRORS:\n${stderr}` : ''}
105
+ `;
106
+ }
107
+ /**
108
+ * Generate an error report
109
+ */
110
+ generateErrorReport(taskName, config, error, duration) {
111
+ return `ISOLATED VERIFICATION REPORT
112
+ =============================
113
+ Task: ${taskName}
114
+ Description: ${config.taskDescription}
115
+ Duration: ${duration}ms
116
+ Status: FAILED
432
117
 
433
- Return JSON: {"matches": true/false, "confidence": 0-100, "reasoning": "explanation"}`;
434
- try {
435
- const assessment = await llmQuery(assessPrompt);
436
- const match = assessment.match(/\{[\s\S]*\}/);
437
- if (match) {
438
- const parsed = JSON.parse(match[0]);
439
- result.llmAssessment = `${parsed.matches ? '✅' : '❌'} [${parsed.confidence}%] ${parsed.reasoning}`;
440
- if (!parsed.matches || parsed.confidence < 70) {
441
- result.unmatchedPatterns.push(`behavior: ${test.expectedBehavior}`);
442
- }
443
- else {
444
- result.matchedPatterns.push(`behavior: ${test.expectedBehavior}`);
445
- }
446
- }
447
- }
448
- catch {
449
- result.llmAssessment = 'LLM assessment failed';
450
- }
451
- }
452
- // Determine success
453
- result.success = result.unmatchedPatterns.length === 0 &&
454
- (result.matchedPatterns.length > 0 || (!test.expectedOutputs?.length && !test.expectedBehavior));
455
- }
456
- catch (err) {
457
- result.errors = err instanceof Error ? err.message : 'Unknown error';
458
- }
459
- return result;
460
- }
461
- // ============================================================================
462
- // MAIN VERIFICATION
463
- // ============================================================================
464
- async function verify(request) {
465
- const timestamp = new Date().toISOString();
466
- const id = `verify-${Date.now()}`;
467
- // Create LLM query function using isolated provider
468
- const provider = createProvider({
469
- provider: request.provider,
470
- model: request.model,
471
- });
472
- const llmQuery = async (prompt) => {
473
- const response = await provider.generate([{ role: 'user', content: prompt }], [] // No tools for verification queries
474
- );
475
- if (response.type === 'message' && response.content) {
476
- return response.content;
477
- }
478
- return '';
479
- };
480
- // Extract claims
481
- const claims = await extractClaims(request.response, request.workingDirectory, request.conversationHistory, llmQuery);
482
- if (claims.length === 0) {
483
- return {
484
- responseId: id,
485
- timestamp,
486
- claims: [],
487
- results: [],
488
- summary: { total: 0, verified: 0, failed: 0, inconclusive: 0 },
489
- overallVerdict: 'unverified',
490
- trustScore: 50
491
- };
492
- }
493
- // Generate tests for non-CLI claims
494
- const nonCliBehaviorClaims = claims.filter(c => c.category !== 'cli_behavior');
495
- const cliBehaviorClaims = claims.filter(c => c.category === 'cli_behavior');
496
- const tests = await generateTests(nonCliBehaviorClaims, request.workingDirectory, llmQuery);
497
- // Run shell-based tests for non-CLI claims
498
- const testResults = [];
499
- for (const test of tests) {
500
- const result = await runTest(test, request.workingDirectory, llmQuery);
501
- testResults.push(result);
502
- }
503
- // Run PTY-based tests for CLI behavior claims
504
- for (const claim of cliBehaviorClaims) {
505
- const result = await _runCLIBehaviorTest(claim, request.workingDirectory);
506
- testResults.push(result);
507
- }
508
- // Rebuild claims array to match test results order
509
- const orderedClaims = [...nonCliBehaviorClaims, ...cliBehaviorClaims];
510
- // Map results to claims
511
- const results = orderedClaims.map((claim, i) => {
512
- const testResult = testResults[i];
513
- if (!testResult) {
514
- return {
515
- claim,
516
- verified: false,
517
- confidence: 'low',
518
- evidence: 'No test generated',
519
- method: 'skip',
520
- timestamp
521
- };
522
- }
523
- // Determine method based on claim category
524
- const method = claim.category === 'cli_behavior' ? 'pty-runtime' : 'isolated-process';
525
- return {
526
- claim,
527
- verified: testResult.success,
528
- confidence: testResult.success ? 'high' : (testResult.matchedPatterns.length > 0 ? 'medium' : 'low'),
529
- evidence: testResult.success
530
- ? `Verified: ${testResult.matchedPatterns.join(', ')}`
531
- : `Failed: ${testResult.unmatchedPatterns.join(', ')}`,
532
- method,
533
- reasoning: testResult.llmAssessment,
534
- executedCode: (testResult.test.shellCommands || []).join('\n'),
535
- rawOutput: testResult.output.slice(0, 2000),
536
- error: testResult.errors || undefined,
537
- timestamp
538
- };
539
- });
540
- // Calculate summary
541
- const verified = results.filter(r => r.verified).length;
542
- const failed = results.filter(r => !r.verified && r.confidence === 'high').length;
543
- const inconclusive = results.filter(r => !r.verified && r.confidence !== 'high').length;
544
- // Determine verdict
545
- let overallVerdict;
546
- if (failed > 0) {
547
- overallVerdict = 'contradicted';
548
- }
549
- else if (verified === claims.length && claims.length > 0) {
550
- overallVerdict = 'verified';
551
- }
552
- else if (verified > 0) {
553
- overallVerdict = 'partially_verified';
554
- }
555
- else {
556
- overallVerdict = 'unverified';
557
- }
558
- const trustScore = claims.length > 0
559
- ? Math.round((verified / claims.length) * 100)
560
- : 50;
561
- return {
562
- responseId: id,
563
- timestamp,
564
- claims,
565
- results,
566
- summary: { total: claims.length, verified, failed, inconclusive },
567
- overallVerdict,
568
- trustScore
569
- };
570
- }
571
- // ============================================================================
572
- // PROCESS ENTRY POINT
573
- // ============================================================================
574
- async function main() {
575
- // Read request from stdin
576
- let input = '';
577
- process.stdin.setEncoding('utf8');
578
- for await (const chunk of process.stdin) {
579
- input += chunk;
580
- }
581
- try {
582
- const request = JSON.parse(input);
583
- if (request.type !== 'verify') {
584
- throw new Error(`Unknown request type: ${request.type}`);
585
- }
586
- const report = await verify(request);
587
- // Output result as JSON to stdout
588
- process.stdout.write(JSON.stringify(report));
589
- process.exit(0);
590
- }
591
- catch (error) {
592
- // Output error as JSON
593
- process.stdout.write(JSON.stringify({
594
- error: true,
595
- message: error instanceof Error ? error.message : 'Unknown error'
596
- }));
597
- process.exit(1);
118
+ VERIFICATION STEPS:
119
+ ${config.verificationCommands.map((cmd, i) => ` ${i + 1}. ${cmd}`).join('\n')}
120
+
121
+ ERROR:
122
+ ${error.message}
123
+
124
+ STACK TRACE:
125
+ ${error.stack}
126
+ `;
598
127
  }
599
128
  }
600
- main();
601
129
  //# sourceMappingURL=isolatedVerifier.js.map