@goldensheepai/toknxr-cli 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/cli.js +182 -23
- package/lib/commands/hallucination-commands.js +453 -0
- package/lib/enhanced-hallucination-detector.js +622 -0
- package/lib/execution-based-detector.js +538 -0
- package/lib/execution-sandbox.js +602 -0
- package/lib/hallucination-database-service.js +447 -0
- package/lib/hallucination-patterns.js +490 -0
- package/lib/types/database-types.js +5 -0
- package/lib/types/hallucination-types.js +74 -0
- package/lib/types/index.js +8 -0
- package/lib/ui.js +73 -6
- package/package.json +1 -1
- package/lib/auth.js +0 -73
- package/lib/cli.test.js +0 -49
- package/lib/code-review.js +0 -319
- package/lib/config.js +0 -7
- package/lib/sync.js +0 -117
@@ -0,0 +1,602 @@
|
|
1
|
+
/**
|
2
|
+
* Secure Execution Sandbox
|
3
|
+
* Safe code execution environment for hallucination detection
|
4
|
+
*
|
5
|
+
* Provides isolated, resource-limited execution for Python code analysis
|
6
|
+
*/
|
7
|
+
import { spawn } from 'child_process';
|
8
|
+
import { writeFile, unlink, mkdir } from 'fs/promises';
|
9
|
+
import { join } from 'path';
|
10
|
+
import { tmpdir } from 'os';
|
11
|
+
import { randomBytes } from 'crypto';
|
12
|
+
/**
|
13
|
+
* Default resource limits for safe execution
|
14
|
+
*/
|
15
|
+
const DEFAULT_RESOURCE_LIMITS = {
|
16
|
+
maxMemoryMB: 128,
|
17
|
+
maxExecutionTimeMs: 5000,
|
18
|
+
maxCpuCores: 1,
|
19
|
+
maxFileOperations: 10,
|
20
|
+
maxNetworkOperations: 0,
|
21
|
+
allowedSystemCalls: [
|
22
|
+
'read', 'write', 'open', 'close', 'stat', 'fstat', 'lstat',
|
23
|
+
'poll', 'lseek', 'mmap', 'mprotect', 'munmap', 'brk', 'rt_sigaction',
|
24
|
+
'rt_sigprocmask', 'rt_sigreturn', 'ioctl', 'pread64', 'pwrite64',
|
25
|
+
'readv', 'writev', 'access', 'pipe', 'select', 'sched_yield',
|
26
|
+
'mremap', 'msync', 'mincore', 'madvise', 'shmget', 'shmat', 'shmctl',
|
27
|
+
'dup', 'dup2', 'pause', 'nanosleep', 'getitimer', 'alarm', 'setitimer',
|
28
|
+
'getpid', 'sendfile', 'socket', 'connect', 'accept', 'sendto', 'recvfrom',
|
29
|
+
'sendmsg', 'recvmsg', 'shutdown', 'bind', 'listen', 'getsockname',
|
30
|
+
'getpeername', 'socketpair', 'setsockopt', 'getsockopt', 'clone', 'fork',
|
31
|
+
'vfork', 'execve', 'exit', 'wait4', 'kill', 'uname', 'semget', 'semop',
|
32
|
+
'semctl', 'shmdt', 'msgget', 'msgsnd', 'msgrcv', 'msgctl', 'fcntl',
|
33
|
+
'flock', 'fsync', 'fdatasync', 'truncate', 'ftruncate', 'getdents',
|
34
|
+
'getcwd', 'chdir', 'fchdir', 'rename', 'mkdir', 'rmdir', 'creat',
|
35
|
+
'link', 'unlink', 'symlink', 'readlink', 'chmod', 'fchmod', 'chown',
|
36
|
+
'fchown', 'lchown', 'umask', 'gettimeofday', 'getrlimit', 'getrusage',
|
37
|
+
'sysinfo', 'times', 'ptrace', 'getuid', 'syslog', 'getgid', 'setuid',
|
38
|
+
'setgid', 'geteuid', 'getegid', 'setpgid', 'getppid', 'getpgrp',
|
39
|
+
'setsid', 'setreuid', 'setregid', 'getgroups', 'setgroups', 'setresuid',
|
40
|
+
'getresuid', 'setresgid', 'getresgid', 'getpgid', 'setfsuid', 'setfsgid',
|
41
|
+
'getsid', 'capget', 'capset', 'rt_sigpending', 'rt_sigtimedwait',
|
42
|
+
'rt_sigqueueinfo', 'rt_sigsuspend', 'sigaltstack', 'utime', 'mknod',
|
43
|
+
'uselib', 'personality', 'ustat', 'statfs', 'fstatfs', 'sysfs',
|
44
|
+
'getpriority', 'setpriority', 'sched_setparam', 'sched_getparam',
|
45
|
+
'sched_setscheduler', 'sched_getscheduler', 'sched_get_priority_max',
|
46
|
+
'sched_get_priority_min', 'sched_rr_get_interval', 'mlock', 'munlock',
|
47
|
+
'mlockall', 'munlockall', 'vhangup', 'modify_ldt', 'pivot_root',
|
48
|
+
'_sysctl', 'prctl', 'arch_prctl', 'adjtimex', 'setrlimit', 'chroot',
|
49
|
+
'sync', 'acct', 'settimeofday', 'mount', 'umount2', 'swapon', 'swapoff',
|
50
|
+
'reboot', 'sethostname', 'setdomainname', 'iopl', 'ioperm',
|
51
|
+
'create_module', 'init_module', 'delete_module', 'get_kernel_syms',
|
52
|
+
'query_module', 'quotactl', 'nfsservctl', 'getpmsg', 'putpmsg',
|
53
|
+
'afs_syscall', 'tuxcall', 'security', 'gettid', 'readahead', 'setxattr',
|
54
|
+
'lsetxattr', 'fsetxattr', 'getxattr', 'lgetxattr', 'fgetxattr',
|
55
|
+
'listxattr', 'llistxattr', 'flistxattr', 'removexattr', 'lremovexattr',
|
56
|
+
'fremovexattr', 'tkill', 'time', 'futex', 'sched_setaffinity',
|
57
|
+
'sched_getaffinity', 'set_thread_area', 'io_setup', 'io_destroy',
|
58
|
+
'io_getevents', 'io_submit', 'io_cancel', 'get_thread_area',
|
59
|
+
'lookup_dcookie', 'epoll_create', 'epoll_ctl_old', 'epoll_wait_old',
|
60
|
+
'remap_file_pages', 'getdents64', 'set_tid_address', 'restart_syscall',
|
61
|
+
'semtimedop', 'fadvise64', 'timer_create', 'timer_settime',
|
62
|
+
'timer_gettime', 'timer_getoverrun', 'timer_delete', 'clock_settime',
|
63
|
+
'clock_gettime', 'clock_getres', 'clock_nanosleep', 'exit_group',
|
64
|
+
'epoll_wait', 'epoll_ctl', 'tgkill', 'utimes', 'vserver', 'mbind',
|
65
|
+
'set_mempolicy', 'get_mempolicy', 'mq_open', 'mq_unlink', 'mq_timedsend',
|
66
|
+
'mq_timedreceive', 'mq_notify', 'mq_getsetattr', 'kexec_load',
|
67
|
+
'waitid', 'add_key', 'request_key', 'keyctl', 'ioprio_set', 'ioprio_get',
|
68
|
+
'inotify_init', 'inotify_add_watch', 'inotify_rm_watch', 'migrate_pages',
|
69
|
+
'openat', 'mkdirat', 'mknodat', 'fchownat', 'futimesat', 'newfstatat',
|
70
|
+
'unlinkat', 'renameat', 'linkat', 'symlinkat', 'readlinkat', 'fchmodat',
|
71
|
+
'faccessat', 'pselect6', 'ppoll', 'unshare', 'set_robust_list',
|
72
|
+
'get_robust_list', 'splice', 'tee', 'sync_file_range', 'vmsplice',
|
73
|
+
'move_pages', 'utimensat', 'epoll_pwait', 'signalfd', 'timerfd_create',
|
74
|
+
'eventfd', 'fallocate', 'timerfd_settime', 'timerfd_gettime', 'accept4',
|
75
|
+
'signalfd4', 'eventfd2', 'epoll_create1', 'dup3', 'pipe2', 'inotify_init1',
|
76
|
+
'preadv', 'pwritev', 'rt_tgsigqueueinfo', 'perf_event_open', 'recvmmsg',
|
77
|
+
'fanotify_init', 'fanotify_mark', 'prlimit64', 'name_to_handle_at',
|
78
|
+
'open_by_handle_at', 'clock_adjtime', 'syncfs', 'sendmmsg', 'setns',
|
79
|
+
'getcpu', 'process_vm_readv', 'process_vm_writev', 'kcmp',
|
80
|
+
'finit_module', 'sched_setattr', 'sched_getattr', 'renameat2',
|
81
|
+
'seccomp', 'getrandom', 'memfd_create', 'kexec_file_load', 'bpf',
|
82
|
+
'execveat', 'userfaultfd', 'membarrier', 'mlock2', 'copy_file_range',
|
83
|
+
'preadv2', 'pwritev2'
|
84
|
+
],
|
85
|
+
};
|
86
|
+
/**
|
87
|
+
* Security patterns that indicate potentially dangerous code
|
88
|
+
*/
|
89
|
+
const SECURITY_PATTERNS = [
|
90
|
+
// System access
|
91
|
+
/import\s+os/i,
|
92
|
+
/import\s+subprocess/i,
|
93
|
+
/import\s+sys/i,
|
94
|
+
/from\s+os\s+import/i,
|
95
|
+
/from\s+subprocess\s+import/i,
|
96
|
+
/from\s+sys\s+import/i,
|
97
|
+
// Code execution
|
98
|
+
/exec\s*\(/i,
|
99
|
+
/eval\s*\(/i,
|
100
|
+
/__import__\s*\(/i,
|
101
|
+
/compile\s*\(/i,
|
102
|
+
// File system access
|
103
|
+
/open\s*\(/i,
|
104
|
+
/file\s*\(/i,
|
105
|
+
/\.read\s*\(/i,
|
106
|
+
/\.write\s*\(/i,
|
107
|
+
/\.remove\s*\(/i,
|
108
|
+
/\.delete\s*\(/i,
|
109
|
+
// Network access
|
110
|
+
/import\s+socket/i,
|
111
|
+
/import\s+urllib/i,
|
112
|
+
/import\s+requests/i,
|
113
|
+
/import\s+http/i,
|
114
|
+
// Process control
|
115
|
+
/\.kill\s*\(/i,
|
116
|
+
/\.terminate\s*\(/i,
|
117
|
+
/\.exit\s*\(/i,
|
118
|
+
/quit\s*\(/i,
|
119
|
+
// Dangerous builtins
|
120
|
+
/globals\s*\(/i,
|
121
|
+
/locals\s*\(/i,
|
122
|
+
/vars\s*\(/i,
|
123
|
+
/dir\s*\(/i,
|
124
|
+
/getattr\s*\(/i,
|
125
|
+
/setattr\s*\(/i,
|
126
|
+
/delattr\s*\(/i,
|
127
|
+
/hasattr\s*\(/i,
|
128
|
+
];
|
129
|
+
/**
|
130
|
+
* Execution sandbox for safe code execution
|
131
|
+
*/
|
132
|
+
export class ExecutionSandbox {
|
133
|
+
constructor(resourceLimits) {
|
134
|
+
this.resourceLimits = { ...DEFAULT_RESOURCE_LIMITS, ...resourceLimits };
|
135
|
+
this.tempDir = join(tmpdir(), 'toknxr-sandbox');
|
136
|
+
}
|
137
|
+
/**
|
138
|
+
* Execute Python code safely in the sandbox
|
139
|
+
*/
|
140
|
+
async execute(code, language = 'python', options) {
|
141
|
+
const startTime = Date.now();
|
142
|
+
try {
|
143
|
+
// Validate input
|
144
|
+
this.validateInput(code, language);
|
145
|
+
// Security assessment
|
146
|
+
const securityAssessment = this.validateSafety(code);
|
147
|
+
if (!securityAssessment.isSafe) {
|
148
|
+
return this.createSecurityErrorResult(securityAssessment, startTime);
|
149
|
+
}
|
150
|
+
// Prepare execution environment
|
151
|
+
const executionId = this.generateExecutionId();
|
152
|
+
const codeFile = await this.prepareCodeFile(code, executionId);
|
153
|
+
try {
|
154
|
+
// Execute with resource limits
|
155
|
+
const result = await this.executeWithLimits(codeFile, options, startTime);
|
156
|
+
// Cleanup
|
157
|
+
await this.cleanup(codeFile);
|
158
|
+
return result;
|
159
|
+
}
|
160
|
+
catch (error) {
|
161
|
+
// Cleanup on error
|
162
|
+
await this.cleanup(codeFile);
|
163
|
+
throw error;
|
164
|
+
}
|
165
|
+
}
|
166
|
+
catch (error) {
|
167
|
+
return this.createErrorResult(error, startTime);
|
168
|
+
}
|
169
|
+
}
|
170
|
+
/**
|
171
|
+
* Validate code safety before execution
|
172
|
+
*/
|
173
|
+
validateSafety(code) {
|
174
|
+
const risks = [];
|
175
|
+
const recommendations = [];
|
176
|
+
let confidence = 1.0;
|
177
|
+
// Check for dangerous patterns
|
178
|
+
for (const pattern of SECURITY_PATTERNS) {
|
179
|
+
if (pattern.test(code)) {
|
180
|
+
const patternStr = pattern.toString();
|
181
|
+
risks.push(`Potentially dangerous pattern detected: ${patternStr}`);
|
182
|
+
confidence -= 0.1;
|
183
|
+
}
|
184
|
+
}
|
185
|
+
// Check for suspicious characteristics
|
186
|
+
if (code.length > 10000) {
|
187
|
+
risks.push('Code is unusually long');
|
188
|
+
confidence -= 0.05;
|
189
|
+
}
|
190
|
+
if (code.includes('while True') && !code.includes('break')) {
|
191
|
+
risks.push('Potential infinite loop detected');
|
192
|
+
confidence -= 0.2;
|
193
|
+
}
|
194
|
+
// Generate recommendations
|
195
|
+
if (risks.length > 0) {
|
196
|
+
recommendations.push('Review code for security implications');
|
197
|
+
recommendations.push('Consider running in isolated environment');
|
198
|
+
recommendations.push('Monitor resource usage during execution');
|
199
|
+
}
|
200
|
+
const isSafe = confidence > 0.5 && risks.length < 5;
|
201
|
+
return {
|
202
|
+
isSafe,
|
203
|
+
risks,
|
204
|
+
confidence: Math.max(0, confidence),
|
205
|
+
recommendations,
|
206
|
+
allowExecution: isSafe,
|
207
|
+
};
|
208
|
+
}
|
209
|
+
/**
|
210
|
+
* Get current resource limits
|
211
|
+
*/
|
212
|
+
getResourceLimits() {
|
213
|
+
return { ...this.resourceLimits };
|
214
|
+
}
|
215
|
+
/**
|
216
|
+
* Update resource limits
|
217
|
+
*/
|
218
|
+
setResourceLimits(limits) {
|
219
|
+
this.resourceLimits = { ...this.resourceLimits, ...limits };
|
220
|
+
}
|
221
|
+
/**
|
222
|
+
* Execute code with test cases
|
223
|
+
*/
|
224
|
+
async executeWithTests(code, testCases, language = 'python') {
|
225
|
+
const results = [];
|
226
|
+
for (const testCase of testCases) {
|
227
|
+
// Prepare code with test input
|
228
|
+
const testCode = this.prepareTestCode(code, testCase);
|
229
|
+
// Execute with timeout specific to this test
|
230
|
+
const options = {
|
231
|
+
timeoutMs: testCase.timeoutMs || this.resourceLimits.maxExecutionTimeMs,
|
232
|
+
memoryLimitMB: this.resourceLimits.maxMemoryMB,
|
233
|
+
};
|
234
|
+
const result = await this.execute(testCode, language, options);
|
235
|
+
results.push(result);
|
236
|
+
// Stop on critical failure if marked as critical test
|
237
|
+
if (testCase.critical && !result.success) {
|
238
|
+
break;
|
239
|
+
}
|
240
|
+
}
|
241
|
+
return results;
|
242
|
+
}
|
243
|
+
/**
|
244
|
+
* Validate input parameters
|
245
|
+
*/
|
246
|
+
validateInput(code, language) {
|
247
|
+
if (!code || code.trim().length === 0) {
|
248
|
+
throw new Error('Code cannot be empty');
|
249
|
+
}
|
250
|
+
if (language !== 'python') {
|
251
|
+
throw new Error(`Language '${language}' not supported. Only Python is currently supported.`);
|
252
|
+
}
|
253
|
+
if (code.length > 100000) {
|
254
|
+
throw new Error('Code is too long (max 100KB)');
|
255
|
+
}
|
256
|
+
}
|
257
|
+
/**
|
258
|
+
* Prepare code file for execution
|
259
|
+
*/
|
260
|
+
async prepareCodeFile(code, executionId) {
|
261
|
+
// Ensure temp directory exists
|
262
|
+
try {
|
263
|
+
await mkdir(this.tempDir, { recursive: true });
|
264
|
+
}
|
265
|
+
catch (error) {
|
266
|
+
// Directory might already exist
|
267
|
+
}
|
268
|
+
const codeFile = join(this.tempDir, `exec_${executionId}.py`);
|
269
|
+
// Wrap code with resource monitoring
|
270
|
+
const wrappedCode = this.wrapCodeWithMonitoring(code);
|
271
|
+
await writeFile(codeFile, wrappedCode, 'utf8');
|
272
|
+
return codeFile;
|
273
|
+
}
|
274
|
+
/**
|
275
|
+
* Wrap code with resource monitoring
|
276
|
+
*/
|
277
|
+
wrapCodeWithMonitoring(code) {
|
278
|
+
return `
|
279
|
+
import sys
|
280
|
+
import time
|
281
|
+
import traceback
|
282
|
+
import resource
|
283
|
+
import gc
|
284
|
+
import json
|
285
|
+
|
286
|
+
# Resource monitoring setup
|
287
|
+
start_time = time.time()
|
288
|
+
start_memory = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
|
289
|
+
|
290
|
+
# Execution result tracking
|
291
|
+
execution_result = {
|
292
|
+
"success": False,
|
293
|
+
"output": "",
|
294
|
+
"errors": [],
|
295
|
+
"resource_usage": {},
|
296
|
+
"security_flags": []
|
297
|
+
}
|
298
|
+
|
299
|
+
try:
|
300
|
+
# Redirect stdout to capture output
|
301
|
+
import io
|
302
|
+
old_stdout = sys.stdout
|
303
|
+
sys.stdout = captured_output = io.StringIO()
|
304
|
+
|
305
|
+
# Execute user code
|
306
|
+
${code.split('\n').map(line => ' ' + line).join('\n')}
|
307
|
+
|
308
|
+
# Capture output
|
309
|
+
execution_result["output"] = captured_output.getvalue()
|
310
|
+
execution_result["success"] = True
|
311
|
+
|
312
|
+
except Exception as e:
|
313
|
+
execution_result["errors"].append({
|
314
|
+
"type": type(e).__name__,
|
315
|
+
"message": str(e),
|
316
|
+
"traceback": traceback.format_exc()
|
317
|
+
})
|
318
|
+
|
319
|
+
finally:
|
320
|
+
# Restore stdout
|
321
|
+
if 'old_stdout' in locals():
|
322
|
+
sys.stdout = old_stdout
|
323
|
+
|
324
|
+
# Calculate resource usage
|
325
|
+
end_time = time.time()
|
326
|
+
end_memory = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
|
327
|
+
|
328
|
+
execution_result["resource_usage"] = {
|
329
|
+
"execution_time_ms": int((end_time - start_time) * 1000),
|
330
|
+
"memory_mb": max(0, (end_memory - start_memory) / 1024), # Convert KB to MB
|
331
|
+
"peak_memory_mb": end_memory / 1024
|
332
|
+
}
|
333
|
+
|
334
|
+
# Output result as JSON
|
335
|
+
print("__EXECUTION_RESULT__")
|
336
|
+
print(json.dumps(execution_result))
|
337
|
+
`;
|
338
|
+
}
|
339
|
+
/**
|
340
|
+
* Execute code with resource limits
|
341
|
+
*/
|
342
|
+
async executeWithLimits(codeFile, options, startTime = Date.now()) {
|
343
|
+
const timeout = options?.timeoutMs || this.resourceLimits.maxExecutionTimeMs;
|
344
|
+
const memoryLimit = options?.memoryLimitMB || this.resourceLimits.maxMemoryMB;
|
345
|
+
return new Promise((resolve) => {
|
346
|
+
let stdout = '';
|
347
|
+
let stderr = '';
|
348
|
+
let timedOut = false;
|
349
|
+
let childProcess;
|
350
|
+
// Set up timeout
|
351
|
+
const timeoutHandle = setTimeout(() => {
|
352
|
+
timedOut = true;
|
353
|
+
if (childProcess && !childProcess.killed) {
|
354
|
+
childProcess.kill('SIGKILL');
|
355
|
+
}
|
356
|
+
}, timeout);
|
357
|
+
// Spawn Python process with resource limits
|
358
|
+
childProcess = spawn('python3', [codeFile], {
|
359
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
360
|
+
env: {
|
361
|
+
...process.env,
|
362
|
+
PYTHONPATH: '',
|
363
|
+
PYTHONHOME: '',
|
364
|
+
// Limit memory (approximate)
|
365
|
+
MALLOC_ARENA_MAX: '1',
|
366
|
+
},
|
367
|
+
// Additional security: run as different user if possible
|
368
|
+
// uid: 65534, // nobody user
|
369
|
+
// gid: 65534, // nobody group
|
370
|
+
});
|
371
|
+
// Collect output
|
372
|
+
childProcess.stdout?.on('data', (data) => {
|
373
|
+
stdout += data.toString();
|
374
|
+
});
|
375
|
+
childProcess.stderr?.on('data', (data) => {
|
376
|
+
stderr += data.toString();
|
377
|
+
});
|
378
|
+
// Handle process completion
|
379
|
+
childProcess.on('close', (code, signal) => {
|
380
|
+
clearTimeout(timeoutHandle);
|
381
|
+
const executionTime = Date.now() - startTime;
|
382
|
+
// Parse execution result from stdout
|
383
|
+
const result = this.parseExecutionResult(stdout, stderr, {
|
384
|
+
exitCode: code,
|
385
|
+
signal,
|
386
|
+
timedOut,
|
387
|
+
executionTime,
|
388
|
+
});
|
389
|
+
resolve(result);
|
390
|
+
});
|
391
|
+
// Handle process errors
|
392
|
+
childProcess.on('error', (error) => {
|
393
|
+
clearTimeout(timeoutHandle);
|
394
|
+
resolve(this.createErrorResult(error, startTime));
|
395
|
+
});
|
396
|
+
});
|
397
|
+
}
|
398
|
+
/**
|
399
|
+
* Parse execution result from process output
|
400
|
+
*/
|
401
|
+
parseExecutionResult(stdout, stderr, processInfo) {
|
402
|
+
let success = false;
|
403
|
+
let output = '';
|
404
|
+
let errors = [];
|
405
|
+
let resourceUsage = {
|
406
|
+
memoryMB: 0,
|
407
|
+
executionTimeMs: processInfo.executionTime,
|
408
|
+
cpuUsage: 0,
|
409
|
+
};
|
410
|
+
let securityFlags = [];
|
411
|
+
try {
|
412
|
+
// Look for our JSON result marker
|
413
|
+
const resultMarker = '__EXECUTION_RESULT__';
|
414
|
+
const markerIndex = stdout.indexOf(resultMarker);
|
415
|
+
if (markerIndex !== -1) {
|
416
|
+
// Extract JSON result
|
417
|
+
const jsonStart = markerIndex + resultMarker.length;
|
418
|
+
const jsonStr = stdout.substring(jsonStart).trim();
|
419
|
+
try {
|
420
|
+
const executionResult = JSON.parse(jsonStr);
|
421
|
+
success = executionResult.success;
|
422
|
+
output = executionResult.output || '';
|
423
|
+
// Parse errors
|
424
|
+
if (executionResult.errors && Array.isArray(executionResult.errors)) {
|
425
|
+
errors = executionResult.errors.map((err) => ({
|
426
|
+
type: err.type || 'UnknownError',
|
427
|
+
message: err.message || 'Unknown error',
|
428
|
+
stackTrace: err.traceback,
|
429
|
+
}));
|
430
|
+
}
|
431
|
+
// Parse resource usage
|
432
|
+
if (executionResult.resource_usage) {
|
433
|
+
resourceUsage = {
|
434
|
+
memoryMB: executionResult.resource_usage.memory_mb || 0,
|
435
|
+
executionTimeMs: executionResult.resource_usage.execution_time_ms || processInfo.executionTime,
|
436
|
+
cpuUsage: 0, // Not easily measurable from Python
|
437
|
+
peakMemoryMB: executionResult.resource_usage.peak_memory_mb,
|
438
|
+
};
|
439
|
+
}
|
440
|
+
// Parse security flags
|
441
|
+
if (executionResult.security_flags && Array.isArray(executionResult.security_flags)) {
|
442
|
+
securityFlags = executionResult.security_flags;
|
443
|
+
}
|
444
|
+
}
|
445
|
+
catch (parseError) {
|
446
|
+
// JSON parsing failed, treat as error
|
447
|
+
errors.push({
|
448
|
+
type: 'ParseError',
|
449
|
+
message: 'Failed to parse execution result',
|
450
|
+
stackTrace: parseError instanceof Error ? parseError.stack : undefined,
|
451
|
+
});
|
452
|
+
}
|
453
|
+
}
|
454
|
+
else {
|
455
|
+
// No result marker found, use raw output
|
456
|
+
output = stdout;
|
457
|
+
success = processInfo.exitCode === 0 && !processInfo.timedOut;
|
458
|
+
}
|
459
|
+
// Add stderr as errors if present
|
460
|
+
if (stderr.trim()) {
|
461
|
+
errors.push({
|
462
|
+
type: 'StderrOutput',
|
463
|
+
message: stderr.trim(),
|
464
|
+
});
|
465
|
+
}
|
466
|
+
// Add timeout error if applicable
|
467
|
+
if (processInfo.timedOut) {
|
468
|
+
errors.push({
|
469
|
+
type: 'TimeoutError',
|
470
|
+
message: `Execution timed out after ${resourceUsage.executionTimeMs}ms`,
|
471
|
+
});
|
472
|
+
success = false;
|
473
|
+
}
|
474
|
+
// Add process exit error if non-zero
|
475
|
+
if (processInfo.exitCode !== 0 && processInfo.exitCode !== null) {
|
476
|
+
errors.push({
|
477
|
+
type: 'ProcessExitError',
|
478
|
+
message: `Process exited with code ${processInfo.exitCode}`,
|
479
|
+
});
|
480
|
+
success = false;
|
481
|
+
}
|
482
|
+
}
|
483
|
+
catch (error) {
|
484
|
+
// Fallback error handling
|
485
|
+
success = false;
|
486
|
+
errors = [{
|
487
|
+
type: 'ExecutionError',
|
488
|
+
message: error instanceof Error ? error.message : 'Unknown execution error',
|
489
|
+
stackTrace: error instanceof Error ? error.stack : undefined,
|
490
|
+
}];
|
491
|
+
}
|
492
|
+
return {
|
493
|
+
success,
|
494
|
+
output,
|
495
|
+
stderr: stderr || undefined,
|
496
|
+
errors,
|
497
|
+
resourceUsage,
|
498
|
+
securityFlags,
|
499
|
+
exitCode: processInfo.exitCode || undefined,
|
500
|
+
timedOut: processInfo.timedOut,
|
501
|
+
};
|
502
|
+
}
|
503
|
+
/**
|
504
|
+
* Prepare test code with input
|
505
|
+
*/
|
506
|
+
prepareTestCode(code, testCase) {
|
507
|
+
// Simple test preparation - inject input as variables
|
508
|
+
let testCode = `# Test case: ${testCase.description}\n`;
|
509
|
+
if (testCase.input !== undefined) {
|
510
|
+
testCode += `test_input = ${JSON.stringify(testCase.input)}\n`;
|
511
|
+
}
|
512
|
+
testCode += code;
|
513
|
+
// Add output validation if expected output is provided
|
514
|
+
if (testCase.expectedOutput !== undefined) {
|
515
|
+
testCode += `\n# Validate output\n`;
|
516
|
+
testCode += `expected_output = ${JSON.stringify(testCase.expectedOutput)}\n`;
|
517
|
+
testCode += `if 'result' in locals():\n`;
|
518
|
+
testCode += ` if result != expected_output:\n`;
|
519
|
+
testCode += ` raise AssertionError(f"Expected {expected_output}, got {result}")\n`;
|
520
|
+
}
|
521
|
+
return testCode;
|
522
|
+
}
|
523
|
+
/**
|
524
|
+
* Create error result for security violations
|
525
|
+
*/
|
526
|
+
createSecurityErrorResult(assessment, startTime) {
|
527
|
+
return {
|
528
|
+
success: false,
|
529
|
+
output: '',
|
530
|
+
errors: [{
|
531
|
+
type: 'SecurityError',
|
532
|
+
message: `Code execution blocked due to security concerns: ${assessment.risks.join(', ')}`,
|
533
|
+
}],
|
534
|
+
resourceUsage: {
|
535
|
+
memoryMB: 0,
|
536
|
+
executionTimeMs: Date.now() - startTime,
|
537
|
+
cpuUsage: 0,
|
538
|
+
},
|
539
|
+
securityFlags: assessment.risks,
|
540
|
+
timedOut: false,
|
541
|
+
};
|
542
|
+
}
|
543
|
+
/**
|
544
|
+
* Create error result for general errors
|
545
|
+
*/
|
546
|
+
createErrorResult(error, startTime) {
|
547
|
+
return {
|
548
|
+
success: false,
|
549
|
+
output: '',
|
550
|
+
errors: [{
|
551
|
+
type: 'ExecutionError',
|
552
|
+
message: error instanceof Error ? error.message : 'Unknown error',
|
553
|
+
stackTrace: error instanceof Error ? error.stack : undefined,
|
554
|
+
}],
|
555
|
+
resourceUsage: {
|
556
|
+
memoryMB: 0,
|
557
|
+
executionTimeMs: Date.now() - startTime,
|
558
|
+
cpuUsage: 0,
|
559
|
+
},
|
560
|
+
securityFlags: [],
|
561
|
+
timedOut: false,
|
562
|
+
};
|
563
|
+
}
|
564
|
+
/**
|
565
|
+
* Generate unique execution ID
|
566
|
+
*/
|
567
|
+
generateExecutionId() {
|
568
|
+
return randomBytes(8).toString('hex');
|
569
|
+
}
|
570
|
+
/**
|
571
|
+
* Cleanup temporary files
|
572
|
+
*/
|
573
|
+
async cleanup(codeFile) {
|
574
|
+
try {
|
575
|
+
await unlink(codeFile);
|
576
|
+
}
|
577
|
+
catch (error) {
|
578
|
+
// File might not exist or already deleted
|
579
|
+
console.warn('Failed to cleanup code file:', error);
|
580
|
+
}
|
581
|
+
}
|
582
|
+
}
|
583
|
+
/**
|
584
|
+
* Factory function to create execution sandbox
|
585
|
+
*/
|
586
|
+
export function createExecutionSandbox(resourceLimits) {
|
587
|
+
return new ExecutionSandbox(resourceLimits);
|
588
|
+
}
|
589
|
+
/**
|
590
|
+
* Utility function to check if Python is available
|
591
|
+
*/
|
592
|
+
export async function checkPythonAvailability() {
|
593
|
+
return new Promise((resolve) => {
|
594
|
+
const process = spawn('python3', ['--version'], { stdio: 'pipe' });
|
595
|
+
process.on('close', (code) => {
|
596
|
+
resolve(code === 0);
|
597
|
+
});
|
598
|
+
process.on('error', () => {
|
599
|
+
resolve(false);
|
600
|
+
});
|
601
|
+
});
|
602
|
+
}
|