erosolar-cli 1.7.22 → 1.7.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,11 +9,248 @@
9
9
  *
10
10
  * @license MIT
11
11
  */
12
- import { exec } from 'node:child_process';
12
+ import { exec, spawn } from 'node:child_process';
13
13
  import { promisify } from 'node:util';
14
14
  import * as fs from 'node:fs/promises';
15
15
  import * as path from 'node:path';
16
16
  const execAsync = promisify(exec);
17
+ /**
18
+ * Spawns a fresh isolated erosolar-cli instance for testing
19
+ */
20
+ async function spawnIsolatedCLI(cwd, timeout = 60000) {
21
+ const cliPath = path.join(cwd, 'dist/bin/erosolar.js');
22
+ // Verify CLI exists
23
+ try {
24
+ await fs.access(cliPath);
25
+ }
26
+ catch {
27
+ throw new Error(`CLI not found at ${cliPath}. Run build first.`);
28
+ }
29
+ let output = '';
30
+ let errors = '';
31
+ let exitResolve;
32
+ const exitPromise = new Promise(resolve => { exitResolve = resolve; });
33
+ const child = spawn('node', [cliPath, '--plain'], {
34
+ cwd,
35
+ env: { ...process.env, EROSOLAR_TEST_MODE: '1', NO_COLOR: '1' },
36
+ stdio: ['pipe', 'pipe', 'pipe']
37
+ });
38
+ child.stdout.on('data', (data) => { output += data.toString(); });
39
+ child.stderr.on('data', (data) => { errors += data.toString(); });
40
+ child.on('close', (code) => { exitResolve(code); });
41
+ child.on('error', (err) => { errors += err.message; exitResolve(1); });
42
+ // Set timeout
43
+ const timeoutId = setTimeout(() => {
44
+ child.kill('SIGTERM');
45
+ errors += `\nTimeout after ${timeout}ms`;
46
+ }, timeout);
47
+ child.on('close', () => clearTimeout(timeoutId));
48
+ // Wait for startup (look for prompt or give it 2 seconds)
49
+ await new Promise(resolve => {
50
+ const checkStartup = setInterval(() => {
51
+ if (output.includes('erosolar') || output.includes('>') || output.length > 100) {
52
+ clearInterval(checkStartup);
53
+ resolve();
54
+ }
55
+ }, 100);
56
+ setTimeout(() => { clearInterval(checkStartup); resolve(); }, 2000);
57
+ });
58
+ return {
59
+ process: child,
60
+ stdin: child.stdin,
61
+ output,
62
+ errors,
63
+ exitPromise
64
+ };
65
+ }
66
+ /**
67
+ * Sends a command to the spawned CLI and waits for response
68
+ */
69
+ async function sendCommand(cli, command, waitMs = 5000) {
70
+ const outputBefore = cli.output.length;
71
+ cli.stdin.write(command + '\n');
72
+ // Wait for output to stabilize
73
+ await new Promise(resolve => {
74
+ let lastLength = cli.output.length;
75
+ const checkInterval = setInterval(() => {
76
+ if (cli.output.length > lastLength) {
77
+ lastLength = cli.output.length;
78
+ }
79
+ else if (cli.output.length > outputBefore) {
80
+ clearInterval(checkInterval);
81
+ resolve();
82
+ }
83
+ }, 200);
84
+ setTimeout(() => { clearInterval(checkInterval); resolve(); }, waitMs);
85
+ });
86
+ return cli.output.slice(outputBefore);
87
+ }
88
+ /**
89
+ * Runs an isolated runtime test in a fresh CLI instance
90
+ */
91
+ export async function runIsolatedTest(test, cwd, llmVerifier) {
92
+ const startTime = Date.now();
93
+ const result = {
94
+ test,
95
+ success: false,
96
+ output: '',
97
+ errors: '',
98
+ exitCode: null,
99
+ duration: 0,
100
+ matchedPatterns: [],
101
+ unmatchedPatterns: []
102
+ };
103
+ try {
104
+ // Rebuild if required
105
+ if (test.requiresBuild) {
106
+ try {
107
+ await execAsync('npm run build', { cwd, timeout: 120000 });
108
+ }
109
+ catch (buildErr) {
110
+ result.errors = `Build failed: ${buildErr instanceof Error ? buildErr.message : 'unknown'}`;
111
+ result.duration = Date.now() - startTime;
112
+ return result;
113
+ }
114
+ }
115
+ // Spawn fresh CLI
116
+ const cli = await spawnIsolatedCLI(cwd, test.timeout || 60000);
117
+ // Execute each command
118
+ for (const cmd of test.commands) {
119
+ const cmdOutput = await sendCommand(cli, cmd);
120
+ result.output += `> ${cmd}\n${cmdOutput}\n`;
121
+ }
122
+ // Gracefully exit
123
+ cli.stdin.write('/quit\n');
124
+ await new Promise(resolve => setTimeout(resolve, 500));
125
+ cli.process.kill('SIGTERM');
126
+ result.exitCode = await cli.exitPromise;
127
+ result.errors = cli.errors;
128
+ // Check expected output patterns
129
+ if (test.expectedOutputs) {
130
+ for (const pattern of test.expectedOutputs) {
131
+ if (result.output.includes(pattern) || new RegExp(pattern, 'i').test(result.output)) {
132
+ result.matchedPatterns.push(pattern);
133
+ }
134
+ else {
135
+ result.unmatchedPatterns.push(pattern);
136
+ }
137
+ }
138
+ }
139
+ // LLM assessment of behavior if specified
140
+ if (test.expectedBehavior && llmVerifier) {
141
+ const assessPrompt = `Assess if this CLI output demonstrates the expected behavior.
142
+
143
+ EXPECTED BEHAVIOR: ${test.expectedBehavior}
144
+
145
+ CLI OUTPUT:
146
+ ---
147
+ ${result.output.slice(0, 4000)}
148
+ ---
149
+
150
+ Return JSON: {"matches": true/false, "confidence": 0-100, "reasoning": "explanation"}`;
151
+ try {
152
+ const assessment = await llmVerifier(assessPrompt);
153
+ const match = assessment.match(/\{[\s\S]*\}/);
154
+ if (match) {
155
+ const parsed = JSON.parse(match[0]);
156
+ result.llmAssessment = `${parsed.matches ? '✅' : '❌'} [${parsed.confidence}%] ${parsed.reasoning}`;
157
+ if (!parsed.matches || parsed.confidence < 70) {
158
+ result.unmatchedPatterns.push(`behavior: ${test.expectedBehavior}`);
159
+ }
160
+ else {
161
+ result.matchedPatterns.push(`behavior: ${test.expectedBehavior}`);
162
+ }
163
+ }
164
+ }
165
+ catch {
166
+ result.llmAssessment = 'LLM assessment failed';
167
+ }
168
+ }
169
+ // Determine success
170
+ result.success = result.unmatchedPatterns.length === 0 &&
171
+ (result.matchedPatterns.length > 0 || !test.expectedOutputs?.length);
172
+ }
173
+ catch (err) {
174
+ result.errors = err instanceof Error ? err.message : 'Unknown error';
175
+ }
176
+ result.duration = Date.now() - startTime;
177
+ return result;
178
+ }
179
+ /**
180
+ * Generates isolated runtime tests for self-referential claims
181
+ * (claims about erosolar-cli's own behavior/features)
182
+ */
183
+ export async function generateIsolatedTests(claims, llmVerifier) {
184
+ const selfClaims = claims.filter(c => c.statement.toLowerCase().includes('erosolar') ||
185
+ c.statement.toLowerCase().includes('cli') ||
186
+ c.statement.toLowerCase().includes('command') ||
187
+ c.statement.toLowerCase().includes('feature') ||
188
+ c.category === 'behavior' ||
189
+ c.category === 'feature');
190
+ if (selfClaims.length === 0)
191
+ return [];
192
+ const prompt = `Generate isolated CLI tests for these claims about erosolar-cli behavior.
193
+
194
+ CLAIMS:
195
+ ${selfClaims.map((c, i) => `${i + 1}. ${c.statement}`).join('\n')}
196
+
197
+ For each claim, generate a test that:
198
+ 1. Spawns a fresh CLI instance
199
+ 2. Sends commands to test the claimed behavior
200
+ 3. Checks expected output patterns
201
+
202
+ Return JSON array:
203
+ [{
204
+ "id": "test-1",
205
+ "description": "what we're testing",
206
+ "commands": ["command1", "command2"],
207
+ "expectedOutputs": ["pattern1", "pattern2"],
208
+ "expectedBehavior": "description of expected behavior",
209
+ "requiresBuild": false,
210
+ "timeout": 30000
211
+ }]
212
+
213
+ Output ONLY valid JSON array.`;
214
+ try {
215
+ const response = await llmVerifier(prompt);
216
+ const match = response.match(/\[[\s\S]*\]/);
217
+ if (match) {
218
+ return JSON.parse(match[0]);
219
+ }
220
+ }
221
+ catch {
222
+ // Fall through to manual generation
223
+ }
224
+ // Fallback: generate basic tests for self-claims
225
+ return selfClaims.map((c, i) => ({
226
+ id: `iso-${i}`,
227
+ description: c.statement,
228
+ commands: ['/help'], // Basic smoke test
229
+ expectedBehavior: c.statement,
230
+ timeout: 30000
231
+ }));
232
+ }
233
+ /**
234
+ * Runs all isolated tests and returns aggregated results
235
+ */
236
+ export async function runIsolatedVerification(claims, cwd, llmVerifier) {
237
+ if (!llmVerifier) {
238
+ return { tests: [], summary: { total: 0, passed: 0, failed: 0 }, allPassed: true };
239
+ }
240
+ const tests = await generateIsolatedTests(claims, llmVerifier);
241
+ const results = [];
242
+ for (const test of tests) {
243
+ const result = await runIsolatedTest(test, cwd, llmVerifier);
244
+ results.push(result);
245
+ }
246
+ const passed = results.filter(r => r.success).length;
247
+ const failed = results.filter(r => !r.success).length;
248
+ return {
249
+ tests: results,
250
+ summary: { total: tests.length, passed, failed },
251
+ allPassed: failed === 0
252
+ };
253
+ }
17
254
  /**
18
255
  * LLM-based claim extraction prompt.
19
256
  * Used when pattern matching isn't sufficient.
@@ -764,57 +1001,12 @@ export function generateVerificationTest(claim) {
764
1001
  }
765
1002
  }
766
1003
  /**
767
- * Verify all claims in an assistant response
1004
+ * Verify all claims in an assistant response using LLM-based semantic analysis.
1005
+ * Requires a VerificationContext with an llmVerifier function.
1006
+ * All claim extraction and verification is done via LLM.
768
1007
  */
769
- export async function verifyResponse(response, responseId) {
770
- const claims = extractClaims(response);
771
- const results = [];
772
- for (const claim of claims) {
773
- const test = generateVerificationTest(claim);
774
- try {
775
- const result = await test();
776
- results.push(result);
777
- }
778
- catch (err) {
779
- results.push({
780
- claim,
781
- verified: false,
782
- confidence: 'low',
783
- evidence: 'Verification test failed to execute',
784
- error: err instanceof Error ? err.message : 'Unknown error',
785
- timestamp: new Date().toISOString()
786
- });
787
- }
788
- }
789
- const verified = results.filter(r => r.verified).length;
790
- const failed = results.filter(r => !r.verified && r.confidence === 'high').length;
791
- const inconclusive = results.filter(r => !r.verified && r.confidence !== 'high').length;
792
- let overallVerdict;
793
- if (failed > 0) {
794
- overallVerdict = 'contradicted';
795
- }
796
- else if (verified === claims.length && claims.length > 0) {
797
- overallVerdict = 'verified';
798
- }
799
- else if (verified > 0) {
800
- overallVerdict = 'partially_verified';
801
- }
802
- else {
803
- overallVerdict = 'unverified';
804
- }
805
- return {
806
- responseId: responseId || `response-${Date.now()}`,
807
- timestamp: new Date().toISOString(),
808
- claims,
809
- results,
810
- summary: {
811
- total: claims.length,
812
- verified,
813
- failed,
814
- inconclusive
815
- },
816
- overallVerdict
817
- };
1008
+ export async function verifyResponse(response, context, responseId) {
1009
+ return verifyResponseComprehensive(response, context, responseId);
818
1010
  }
819
1011
  /**
820
1012
  * Format a verification report for display
@@ -854,10 +1046,11 @@ export function formatVerificationReport(report) {
854
1046
  return lines.join('\n');
855
1047
  }
856
1048
  /**
857
- * Quick verification - returns true if response claims are valid
1049
+ * Quick verification - returns true if response claims are valid.
1050
+ * Requires a VerificationContext with llmVerifier for LLM-based semantic analysis.
858
1051
  */
859
- export async function quickVerify(response) {
860
- const report = await verifyResponse(response);
1052
+ export async function quickVerify(response, context) {
1053
+ const report = await verifyResponse(response, context);
861
1054
  return report.overallVerdict === 'verified' || report.overallVerdict === 'partially_verified';
862
1055
  }
863
1056
  /**
@@ -1120,203 +1313,59 @@ export function generateExtendedVerificationTest(claim, context) {
1120
1313
  case 'error_fixed':
1121
1314
  case 'feature_implemented':
1122
1315
  case 'refactor_complete':
1123
- // These require semantic verification - use LLM if available
1316
+ // These require semantic verification - LLM is required
1124
1317
  return async () => {
1125
- if (context.llmVerifier) {
1126
- return verifyClaimWithLLM(claim, context);
1127
- }
1128
- // Fall back to checking if related files were modified
1129
- const relatedPath = claim.params.path;
1130
- if (relatedPath) {
1131
- try {
1132
- const resolvedPath = path.isAbsolute(relatedPath)
1133
- ? relatedPath
1134
- : path.resolve(context.workingDirectory, relatedPath);
1135
- const stats = await fs.stat(resolvedPath);
1136
- const recentlyModified = (Date.now() - stats.mtimeMs) < 10 * 60 * 1000;
1137
- return {
1138
- ...baseResult,
1139
- verified: recentlyModified,
1140
- confidence: 'low',
1141
- evidence: recentlyModified
1142
- ? `Related file ${relatedPath} was recently modified`
1143
- : `Related file ${relatedPath} exists but wasn't recently modified`
1144
- };
1145
- }
1146
- catch {
1147
- return {
1148
- ...baseResult,
1149
- verified: false,
1150
- confidence: 'low',
1151
- evidence: 'Could not verify - no LLM available and related file not found'
1152
- };
1153
- }
1318
+ if (!context.llmVerifier) {
1319
+ return {
1320
+ ...baseResult,
1321
+ verified: false,
1322
+ confidence: 'low',
1323
+ evidence: 'Semantic verification requires LLM verifier'
1324
+ };
1154
1325
  }
1155
- return {
1156
- ...baseResult,
1157
- verified: false,
1158
- confidence: 'low',
1159
- evidence: 'Semantic verification required but no LLM verifier available'
1160
- };
1326
+ return verifyClaimWithLLM(claim, context);
1161
1327
  };
1162
1328
  case 'data_transformed':
1163
- return async () => {
1164
- // Check if we have before/after state to compare
1165
- if (context.previousState && context.currentState) {
1166
- const inputKey = claim.params.input;
1167
- const outputKey = claim.params.output;
1168
- if (inputKey && outputKey) {
1169
- const inputExists = context.previousState[inputKey] !== undefined;
1170
- const outputExists = context.currentState[outputKey] !== undefined;
1171
- return {
1172
- ...baseResult,
1173
- verified: inputExists && outputExists,
1174
- confidence: inputExists && outputExists ? 'medium' : 'low',
1175
- evidence: `Input "${inputKey}" ${inputExists ? 'found' : 'missing'}, Output "${outputKey}" ${outputExists ? 'found' : 'missing'}`
1176
- };
1177
- }
1178
- }
1179
- // Fall back to LLM verification
1180
- if (context.llmVerifier) {
1181
- return verifyClaimWithLLM(claim, context);
1182
- }
1183
- return {
1184
- ...baseResult,
1185
- verified: false,
1186
- confidence: 'low',
1187
- evidence: 'Cannot verify data transformation without state comparison or LLM'
1188
- };
1189
- };
1190
1329
  case 'database_updated':
1191
- return async () => {
1192
- // Can't directly verify database changes without connection info
1193
- // Check if there's a command we can run
1194
- const checkCommand = claim.params.checkCommand;
1195
- if (checkCommand) {
1196
- try {
1197
- const { stdout } = await execAsync(checkCommand, {
1198
- timeout: 10000,
1199
- cwd: context.workingDirectory
1200
- });
1201
- return {
1202
- ...baseResult,
1203
- verified: true,
1204
- confidence: 'medium',
1205
- evidence: `Check command output: ${stdout.slice(0, 200)}`
1206
- };
1207
- }
1208
- catch (err) {
1209
- return {
1210
- ...baseResult,
1211
- verified: false,
1212
- confidence: 'medium',
1213
- evidence: 'Database check command failed',
1214
- error: err instanceof Error ? err.message : 'Unknown error'
1215
- };
1216
- }
1217
- }
1218
- // Fall back to LLM
1219
- if (context.llmVerifier) {
1220
- return verifyClaimWithLLM(claim, context);
1221
- }
1222
- return {
1223
- ...baseResult,
1224
- verified: false,
1225
- confidence: 'low',
1226
- evidence: 'Cannot verify database changes without check command or LLM'
1227
- };
1228
- };
1229
1330
  case 'permission_granted':
1230
- return async () => {
1231
- const targetPath = claim.params.path;
1232
- const expectedMode = claim.params.mode;
1233
- if (targetPath) {
1234
- try {
1235
- const resolvedPath = path.isAbsolute(targetPath)
1236
- ? targetPath
1237
- : path.resolve(context.workingDirectory, targetPath);
1238
- const stats = await fs.stat(resolvedPath);
1239
- const mode = (stats.mode & 0o777).toString(8);
1240
- if (expectedMode) {
1241
- const matches = mode === expectedMode;
1242
- return {
1243
- ...baseResult,
1244
- verified: matches,
1245
- confidence: 'high',
1246
- evidence: matches
1247
- ? `File has expected permissions: ${mode}`
1248
- : `Expected mode ${expectedMode}, got ${mode}`
1249
- };
1250
- }
1251
- return {
1252
- ...baseResult,
1253
- verified: true,
1254
- confidence: 'medium',
1255
- evidence: `File permissions: ${mode}`
1256
- };
1257
- }
1258
- catch (err) {
1259
- return {
1260
- ...baseResult,
1261
- verified: false,
1262
- confidence: 'high',
1263
- evidence: 'Could not check file permissions',
1264
- error: err instanceof Error ? err.message : 'Unknown error'
1265
- };
1266
- }
1267
- }
1268
- // Fall back to LLM
1269
- if (context.llmVerifier) {
1270
- return verifyClaimWithLLM(claim, context);
1271
- }
1272
- return {
1273
- ...baseResult,
1274
- verified: false,
1275
- confidence: 'low',
1276
- evidence: 'Cannot verify permission without file path or LLM'
1277
- };
1278
- };
1279
1331
  case 'generic':
1280
1332
  default:
1281
- // For generic claims, always try LLM verification first
1333
+ // All these claim types require LLM verification
1282
1334
  return async () => {
1283
- if (context.llmVerifier) {
1284
- return verifyClaimWithLLM(claim, context);
1335
+ if (!context.llmVerifier) {
1336
+ return {
1337
+ ...baseResult,
1338
+ verified: false,
1339
+ confidence: 'low',
1340
+ evidence: `${claim.type} verification requires LLM verifier`
1341
+ };
1285
1342
  }
1286
- return {
1287
- ...baseResult,
1288
- verified: false,
1289
- confidence: 'low',
1290
- evidence: 'Generic claim requires LLM verification which is not available'
1291
- };
1343
+ return verifyClaimWithLLM(claim, context);
1292
1344
  };
1293
1345
  }
1294
1346
  }
1295
1347
  /**
1296
- * Comprehensive verification using both runtime and LLM-based strategies
1348
+ * Comprehensive verification using LLM-based semantic analysis.
1349
+ * Requires an LLM verifier - all claims are verified through LLM semantic analysis.
1297
1350
  */
1298
1351
  export async function verifyResponseComprehensive(response, context, responseId) {
1299
- // First extract claims using LLM if available, otherwise pattern matching
1300
- const claims = context.llmVerifier
1301
- ? await extractClaimsWithLLM(response, context.llmVerifier)
1302
- : extractClaims(response);
1352
+ if (!context.llmVerifier) {
1353
+ return {
1354
+ responseId: responseId || `response-${Date.now()}`,
1355
+ timestamp: new Date().toISOString(),
1356
+ claims: [],
1357
+ results: [],
1358
+ summary: { total: 0, verified: 0, failed: 0, inconclusive: 0 },
1359
+ overallVerdict: 'unverified'
1360
+ };
1361
+ }
1362
+ // Extract ALL claims using LLM (required)
1363
+ const claims = await extractClaimsWithLLM(response, context.llmVerifier);
1303
1364
  const results = [];
1304
1365
  for (const claim of claims) {
1305
- // Check if this is a standard claim type that can be runtime-verified
1306
- const standardTypes = [
1307
- 'file_created', 'file_modified', 'file_deleted', 'code_compiles',
1308
- 'tests_pass', 'git_committed', 'package_published', 'command_executed',
1309
- 'dependency_installed', 'service_running', 'url_accessible', 'content_contains'
1310
- ];
1311
- let test;
1312
- if (standardTypes.includes(claim.type)) {
1313
- test = generateVerificationTest(claim);
1314
- }
1315
- else {
1316
- test = generateExtendedVerificationTest(claim, context);
1317
- }
1366
+ // ALL claims are verified via LLM semantic analysis
1318
1367
  try {
1319
- const result = await test();
1368
+ const result = await verifyClaimWithLLM(claim, context);
1320
1369
  results.push(result);
1321
1370
  }
1322
1371
  catch (err) {
@@ -1324,7 +1373,7 @@ export async function verifyResponseComprehensive(response, context, responseId)
1324
1373
  claim,
1325
1374
  verified: false,
1326
1375
  confidence: 'low',
1327
- evidence: 'Verification test failed to execute',
1376
+ evidence: 'LLM verification failed',
1328
1377
  error: err instanceof Error ? err.message : 'Unknown error',
1329
1378
  timestamp: new Date().toISOString()
1330
1379
  });
@@ -1814,4 +1863,205 @@ export async function verifyResponseHybrid(response, context, responseId) {
1814
1863
  overallVerdict
1815
1864
  };
1816
1865
  }
1866
+ const UNIVERSAL_EXTRACT = `Extract ALL verifiable claims from this AI response. Include explicit claims, implicit claims, state changes, results, assertions.
1867
+
1868
+ RESPONSE:
1869
+ ---
1870
+ {RESPONSE}
1871
+ ---
1872
+ CONTEXT: {CONTEXT}
1873
+ DIR: {WORKING_DIR}
1874
+
1875
+ Return JSON array: [{"id":"c1","statement":"claim","category":"file_op|code|state|data|behavior|fact|other","verifiable":true/false,"verificationApproach":"how","priority":"critical|high|medium|low","context":{}}]
1876
+ Output ONLY valid JSON.`;
1877
+ const UNIVERSAL_GEN = `Generate verification code for: {STATEMENT}
1878
+ Category: {CATEGORY} | Approach: {APPROACH} | Context: {CONTEXT} | Dir: {WORKING_DIR} | Platform: {PLATFORM}
1879
+
1880
+ Use shell/javascript/python. READ-ONLY only.
1881
+ Return JSON: {"steps":[{"type":"shell|javascript|python","code":"code","desc":"what"}],"success":"success criteria","failure":"failure criteria","confPass":0-100,"confFail":0-100,"safe":{"ok":true/false,"why":"reason"}}
1882
+ Output ONLY valid JSON.`;
1883
+ const UNIVERSAL_ASSESS = `Assess: RESPONSE:{RESPONSE} CLAIMS:{CLAIMS} RESULTS:{RESULTS}
1884
+ Return JSON: {"trust":0-100,"summary":"text","concerns":[]}
1885
+ Output ONLY valid JSON.`;
1886
+ const UNSAFE = [/\brm\s/i, /rmdir/i, /sudo/i, /chmod\s*7/i, /eval\s*\(/i, /exec\s*\(/i, /child_process/i, /os\.system/i, /subprocess/i, /curl.*\|.*sh/i, /DROP\s+TABLE/i, /DELETE\s+FROM/i, /kill/i];
1887
+ export function validateUniversalCode(c) {
1888
+ for (const p of UNSAFE)
1889
+ if (p.test(c))
1890
+ return { safe: false, reason: p.source };
1891
+ return c.length > 5000 ? { safe: false, reason: 'too long' } : { safe: true, reason: 'ok' };
1892
+ }
1893
+ async function runUniversalStep(s, cwd) {
1894
+ const v = validateUniversalCode(s.code);
1895
+ if (!v.safe)
1896
+ return { ok: false, out: v.reason };
1897
+ try {
1898
+ if (s.type === 'shell') {
1899
+ const { stdout, stderr } = await execAsync(s.code, { cwd, timeout: 30000, maxBuffer: 5 * 1024 * 1024 });
1900
+ return { ok: true, out: stdout + stderr };
1901
+ }
1902
+ if (s.type === 'javascript') {
1903
+ const w = `(async()=>{try{const fs=require('fs').promises;const r=await(async()=>{${s.code}})();console.log(JSON.stringify({ok:1,r}))}catch(e){console.log(JSON.stringify({ok:0,e:e.message}))}})()`;
1904
+ const { stdout } = await execAsync(`node -e ${JSON.stringify(w)}`, { cwd, timeout: 30000 });
1905
+ return { ok: true, out: stdout };
1906
+ }
1907
+ if (s.type === 'python') {
1908
+ const { stdout, stderr } = await execAsync(`python3 -c ${JSON.stringify(s.code)}`, { cwd, timeout: 30000 });
1909
+ return { ok: true, out: stdout + stderr };
1910
+ }
1911
+ return { ok: false, out: 'unknown type' };
1912
+ }
1913
+ catch (e) {
1914
+ return { ok: false, out: e instanceof Error ? e.message : 'err' };
1915
+ }
1916
+ }
1917
+ export async function extractUniversalClaims(r, ctx) {
1918
+ if (!ctx.llmVerifier)
1919
+ return extractClaims(r).map((c, i) => ({ id: `c${i}`, statement: c.description, category: c.type, verifiable: true, verificationApproach: 'runtime', priority: 'medium', context: c.params }));
1920
+ try {
1921
+ const p = UNIVERSAL_EXTRACT.replace('{RESPONSE}', r.slice(0, 8000)).replace('{CONTEXT}', ctx.conversationHistory?.slice(-3).join('\n') || '').replace('{WORKING_DIR}', ctx.workingDirectory);
1922
+ const res = await ctx.llmVerifier(p);
1923
+ const m = res.match(/\[[\s\S]*\]/);
1924
+ if (m)
1925
+ return JSON.parse(m[0]);
1926
+ }
1927
+ catch { /* fall through */ }
1928
+ return extractClaims(r).map((c, i) => ({ id: `c${i}`, statement: c.description, category: c.type, verifiable: true, verificationApproach: 'runtime', priority: 'medium', context: c.params }));
1929
+ }
1930
+ export async function verifyUniversalClaim(claim, ctx) {
1931
+ const base = { claim, timestamp: new Date().toISOString() };
1932
+ if (!claim.verifiable)
1933
+ return { ...base, verified: false, confidence: 0, method: 'skip', evidence: 'Not verifiable', reasoning: 'Cannot verify' };
1934
+ if (!ctx.llmVerifier)
1935
+ return { ...base, verified: false, confidence: 0, method: 'skip', evidence: 'No LLM', reasoning: 'Needs LLM' };
1936
+ try {
1937
+ const p = UNIVERSAL_GEN.replace('{STATEMENT}', claim.statement).replace('{CATEGORY}', claim.category).replace('{APPROACH}', claim.verificationApproach).replace('{CONTEXT}', JSON.stringify(claim.context)).replace('{WORKING_DIR}', ctx.workingDirectory).replace('{PLATFORM}', process.platform);
1938
+ const res = await ctx.llmVerifier(p);
1939
+ const m = res.match(/\{[\s\S]*\}/);
1940
+ if (!m)
1941
+ throw new Error('bad');
1942
+ const plan = JSON.parse(m[0]);
1943
+ if (!plan.safe.ok)
1944
+ return { ...base, verified: false, confidence: 0, method: 'blocked', evidence: plan.safe.why, reasoning: 'Unsafe' };
1945
+ let allOk = true, out = '', code = '';
1946
+ for (const s of plan.steps) {
1947
+ code += s.code + '\n';
1948
+ const r = await runUniversalStep(s, ctx.workingDirectory);
1949
+ out += r.out + '\n';
1950
+ if (!r.ok)
1951
+ allOk = false;
1952
+ }
1953
+ return { ...base, verified: allOk, confidence: allOk ? plan.confPass : plan.confFail, method: plan.steps.map(s => s.type).join('+'), evidence: allOk ? plan.success : plan.failure, reasoning: allOk ? 'All passed' : 'Some failed', executedCode: code, rawOutput: out.slice(0, 2000) };
1954
+ }
1955
+ catch (e) {
1956
+ return { ...base, verified: false, confidence: 10, method: 'error', evidence: 'Failed', reasoning: e instanceof Error ? e.message : 'err' };
1957
+ }
1958
+ }
1959
+ export async function verifyResponseUniversal(response, ctx, id) {
1960
+ const claims = await extractUniversalClaims(response, ctx);
1961
+ const results = [];
1962
+ // Identify self-referential claims (about erosolar-cli itself)
1963
+ const selfClaims = claims.filter(c => c.statement.toLowerCase().includes('erosolar') ||
1964
+ c.statement.toLowerCase().includes('cli') ||
1965
+ c.category === 'behavior' ||
1966
+ c.category === 'feature');
1967
+ const regularClaims = claims.filter(c => !selfClaims.includes(c));
1968
+ // Run isolated runtime tests for self-referential claims
1969
+ if (selfClaims.length > 0 && ctx.llmVerifier) {
1970
+ const isoResults = await runIsolatedVerification(selfClaims.map(c => ({ statement: c.statement, category: c.category, context: c.context })), ctx.workingDirectory, ctx.llmVerifier);
1971
+ // Convert isolated results to UniversalVerificationResult
1972
+ for (let i = 0; i < selfClaims.length && i < isoResults.tests.length; i++) {
1973
+ const claim = selfClaims[i];
1974
+ const isoTest = isoResults.tests[i];
1975
+ results.push({
1976
+ claim,
1977
+ verified: isoTest.success,
1978
+ confidence: isoTest.success ? 90 : (isoTest.matchedPatterns.length > 0 ? 50 : 20),
1979
+ method: 'isolated-runtime',
1980
+ evidence: isoTest.success ? `Verified in fresh CLI instance` : `Failed: ${isoTest.unmatchedPatterns.join(', ')}`,
1981
+ reasoning: isoTest.llmAssessment || (isoTest.success ? 'All patterns matched in isolated runtime' : 'Patterns not matched'),
1982
+ executedCode: isoTest.test.commands.join('\n'),
1983
+ rawOutput: isoTest.output.slice(0, 2000),
1984
+ timestamp: new Date().toISOString()
1985
+ });
1986
+ }
1987
+ }
1988
+ // Verify regular claims with standard approach
1989
+ for (const c of regularClaims) {
1990
+ results.push(c.verifiable || c.priority === 'critical' || c.priority === 'high'
1991
+ ? await verifyUniversalClaim(c, ctx)
1992
+ : { claim: c, verified: false, confidence: 0, method: 'skip', evidence: 'Low priority', reasoning: 'Skipped', timestamp: new Date().toISOString() });
1993
+ }
1994
+ const vClaims = claims.filter(c => c.verifiable).length;
1995
+ const verified = results.filter(r => r.verified).length;
1996
+ const failed = results.filter(r => !r.verified && r.confidence > 50).length;
1997
+ const inconclusive = results.filter(r => !r.verified && r.confidence <= 50 && r.method !== 'skip').length;
1998
+ const avgConf = results.length ? results.reduce((s, r) => s + r.confidence, 0) / results.length : 0;
1999
+ // Count isolated tests for assessment
2000
+ const isoCount = results.filter(r => r.method === 'isolated-runtime').length;
2001
+ const isoVerified = results.filter(r => r.method === 'isolated-runtime' && r.verified).length;
2002
+ let assessment = '', trust = 0;
2003
+ if (ctx.llmVerifier)
2004
+ try {
2005
+ const isoSummary = isoCount > 0 ? ` Isolated runtime tests: ${isoVerified}/${isoCount} passed.` : '';
2006
+ const p = UNIVERSAL_ASSESS.replace('{RESPONSE}', response.slice(0, 4000)).replace('{CLAIMS}', JSON.stringify(claims.slice(0, 15))).replace('{RESULTS}', JSON.stringify(results.slice(0, 15)));
2007
+ const r = await ctx.llmVerifier(p);
2008
+ const m = r.match(/\{[\s\S]*\}/);
2009
+ if (m) {
2010
+ const a = JSON.parse(m[0]);
2011
+ trust = a.trust;
2012
+ assessment = a.summary + isoSummary + (a.concerns?.length ? ` Concerns: ${a.concerns.join('; ')}` : '');
2013
+ }
2014
+ }
2015
+ catch {
2016
+ trust = Math.round(avgConf * verified / Math.max(vClaims, 1));
2017
+ assessment = `${verified}/${vClaims} verified`;
2018
+ }
2019
+ else {
2020
+ trust = Math.round(avgConf * verified / Math.max(vClaims, 1));
2021
+ assessment = `${verified}/${vClaims} verified`;
2022
+ }
2023
+ return { responseId: id || `u-${Date.now()}`, originalResponse: response, timestamp: new Date().toISOString(), claims, results, summary: { totalClaims: claims.length, verifiableClaims: vClaims, verified, failed, inconclusive, averageConfidence: Math.round(avgConf) }, overallAssessment: assessment, trustScore: trust };
2024
+ }
2025
+ export async function quickUniversalVerify(r, ctx) {
2026
+ const claims = await extractUniversalClaims(r, ctx);
2027
+ const crit = claims.filter(c => c.verifiable && (c.priority === 'critical' || c.priority === 'high')).slice(0, 5);
2028
+ if (!crit.length)
2029
+ return { trustScore: 50, summary: 'No critical claims' };
2030
+ let v = 0;
2031
+ for (const c of crit)
2032
+ if ((await verifyUniversalClaim(c, ctx)).verified)
2033
+ v++;
2034
+ return { trustScore: Math.round(v / crit.length * 100), summary: `${v}/${crit.length} critical verified` };
2035
+ }
2036
+ export function formatUniversalReport(r) {
2037
+ const bar = '█'.repeat(Math.round(r.trustScore / 10)) + '░'.repeat(10 - Math.round(r.trustScore / 10));
2038
+ const icon = r.trustScore >= 80 ? '✅' : r.trustScore >= 50 ? '⚠️' : '❌';
2039
+ let out = `╔════════════════════════════════════════════════════════════╗\n║ UNIVERSAL VERIFICATION REPORT ║\n╚════════════════════════════════════════════════════════════╝\n\n`;
2040
+ out += `Trust: ${icon} ${r.trustScore}/100 [${bar}]\n${r.overallAssessment}\n\nClaims: ${r.summary.totalClaims} | ✅ ${r.summary.verified} | ❌ ${r.summary.failed} | ❓ ${r.summary.inconclusive}\n\n`;
2041
+ // Group results by method
2042
+ const isoResults = r.results.filter(x => x.method === 'isolated-runtime');
2043
+ const otherResults = r.results.filter(x => x.method !== 'isolated-runtime');
2044
+ // Show isolated runtime tests first (most robust verification)
2045
+ if (isoResults.length > 0) {
2046
+ out += `🔬 ISOLATED RUNTIME TESTS (fresh CLI instance):\n`;
2047
+ for (const x of isoResults.slice(0, 4)) {
2048
+ out += ` ${x.verified ? '✅' : '❌'} [${x.confidence}%] ${x.claim.statement.slice(0, 50)}...\n`;
2049
+ if (x.reasoning)
2050
+ out += ` └─ ${x.reasoning.slice(0, 60)}\n`;
2051
+ }
2052
+ if (isoResults.length > 4)
2053
+ out += ` ... +${isoResults.length - 4} more isolated tests\n`;
2054
+ out += '\n';
2055
+ }
2056
+ // Show other verification results
2057
+ if (otherResults.length > 0) {
2058
+ out += `📋 STANDARD VERIFICATION:\n`;
2059
+ for (const x of otherResults.slice(0, 6)) {
2060
+ out += ` ${x.verified ? '✅' : x.confidence > 50 ? '❌' : '❓'} [${x.confidence}%] ${x.claim.statement.slice(0, 50)}...\n`;
2061
+ }
2062
+ if (otherResults.length > 6)
2063
+ out += ` ... +${otherResults.length - 6} more\n`;
2064
+ }
2065
+ return out;
2066
+ }
1817
2067
  //# sourceMappingURL=responseVerifier.js.map