@goldensheepai/toknxr-cli 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/cli.js +182 -23
- package/lib/commands/hallucination-commands.js +453 -0
- package/lib/enhanced-hallucination-detector.js +622 -0
- package/lib/execution-based-detector.js +538 -0
- package/lib/execution-sandbox.js +602 -0
- package/lib/hallucination-database-service.js +447 -0
- package/lib/hallucination-patterns.js +490 -0
- package/lib/types/database-types.js +5 -0
- package/lib/types/hallucination-types.js +74 -0
- package/lib/types/index.js +8 -0
- package/lib/ui.js +73 -6
- package/package.json +1 -1
- package/lib/auth.js +0 -73
- package/lib/cli.test.js +0 -49
- package/lib/code-review.js +0 -319
- package/lib/config.js +0 -7
- package/lib/sync.js +0 -117
@@ -0,0 +1,538 @@
|
|
1
|
+
/**
|
2
|
+
* Execution-Based Hallucination Detection
|
3
|
+
* Integrates execution sandbox with hallucination detection for runtime analysis
|
4
|
+
*/
|
5
|
+
import { ExecutionSandbox } from './execution-sandbox.js';
|
6
|
+
/**
|
7
|
+
* Default execution analysis configuration
|
8
|
+
*/
|
9
|
+
const DEFAULT_EXECUTION_CONFIG = {
|
10
|
+
enableResourceMonitoring: true,
|
11
|
+
enableLogicValidation: true,
|
12
|
+
enablePerformanceAnalysis: true,
|
13
|
+
memoryThresholdMB: 64,
|
14
|
+
executionTimeThresholdMs: 3000,
|
15
|
+
cpuUsageThreshold: 80,
|
16
|
+
};
|
17
|
+
/**
|
18
|
+
* Execution-based hallucination detector
|
19
|
+
*/
|
20
|
+
export class ExecutionBasedDetector {
|
21
|
+
constructor(config = {}) {
|
22
|
+
this.config = { ...DEFAULT_EXECUTION_CONFIG, ...config };
|
23
|
+
this.sandbox = new ExecutionSandbox({
|
24
|
+
maxMemoryMB: this.config.memoryThresholdMB * 2, // Allow some headroom
|
25
|
+
maxExecutionTimeMs: this.config.executionTimeThresholdMs * 2,
|
26
|
+
});
|
27
|
+
}
|
28
|
+
/**
|
29
|
+
* Detect resource-related hallucinations from execution results
|
30
|
+
*/
|
31
|
+
async detectResourceHallucinations(executionResult) {
|
32
|
+
const categories = [];
|
33
|
+
if (!this.config.enableResourceMonitoring) {
|
34
|
+
return categories;
|
35
|
+
}
|
36
|
+
try {
|
37
|
+
const resourceAnalysis = this.analyzeResourceUsage(executionResult.resourceUsage);
|
38
|
+
// Memory usage hallucinations
|
39
|
+
if (resourceAnalysis.memoryUsage.isExcessive) {
|
40
|
+
categories.push({
|
41
|
+
type: 'resource',
|
42
|
+
subtype: 'physical_constraint',
|
43
|
+
severity: this.getMemorySeverity(resourceAnalysis.memoryUsage.current),
|
44
|
+
confidence: 0.9,
|
45
|
+
description: `Excessive memory usage detected: ${resourceAnalysis.memoryUsage.current.toFixed(2)}MB`,
|
46
|
+
evidence: [
|
47
|
+
{
|
48
|
+
type: 'resource_usage',
|
49
|
+
content: `Memory: ${resourceAnalysis.memoryUsage.current}MB (threshold: ${resourceAnalysis.memoryUsage.threshold}MB)`,
|
50
|
+
confidence: 1.0,
|
51
|
+
},
|
52
|
+
{
|
53
|
+
type: 'resource_usage',
|
54
|
+
content: `Peak memory: ${resourceAnalysis.memoryUsage.peak}MB`,
|
55
|
+
confidence: 0.9,
|
56
|
+
},
|
57
|
+
],
|
58
|
+
suggestedFix: 'Optimize data structures, use generators, or implement memory-efficient algorithms',
|
59
|
+
businessImpact: {
|
60
|
+
estimatedDevTimeWasted: this.calculateMemoryImpact(resourceAnalysis.memoryUsage.current),
|
61
|
+
costMultiplier: 1.5,
|
62
|
+
qualityImpact: 30,
|
63
|
+
costOfHallucinations: this.calculateMemoryImpact(resourceAnalysis.memoryUsage.current) * 100,
|
64
|
+
},
|
65
|
+
});
|
66
|
+
}
|
67
|
+
// Execution time hallucinations
|
68
|
+
if (resourceAnalysis.executionTime.isExcessive) {
|
69
|
+
categories.push({
|
70
|
+
type: 'resource',
|
71
|
+
subtype: 'computational_boundary',
|
72
|
+
severity: this.getTimeSeverity(resourceAnalysis.executionTime.actual),
|
73
|
+
confidence: 0.85,
|
74
|
+
description: `Excessive execution time: ${resourceAnalysis.executionTime.actual}ms`,
|
75
|
+
evidence: [
|
76
|
+
{
|
77
|
+
type: 'performance_metric',
|
78
|
+
content: `Execution time: ${resourceAnalysis.executionTime.actual}ms (threshold: ${resourceAnalysis.executionTime.threshold}ms)`,
|
79
|
+
confidence: 1.0,
|
80
|
+
},
|
81
|
+
],
|
82
|
+
suggestedFix: 'Optimize algorithm complexity, add caching, or use more efficient data structures',
|
83
|
+
businessImpact: {
|
84
|
+
estimatedDevTimeWasted: this.calculateTimeImpact(resourceAnalysis.executionTime.actual),
|
85
|
+
costMultiplier: 1.3,
|
86
|
+
qualityImpact: 20,
|
87
|
+
costOfHallucinations: this.calculateTimeImpact(resourceAnalysis.executionTime.actual) * 100,
|
88
|
+
},
|
89
|
+
});
|
90
|
+
}
|
91
|
+
// CPU usage hallucinations
|
92
|
+
if (resourceAnalysis.cpuUsage.isExcessive) {
|
93
|
+
categories.push({
|
94
|
+
type: 'resource',
|
95
|
+
subtype: 'computational_boundary',
|
96
|
+
severity: 'medium',
|
97
|
+
confidence: 0.7,
|
98
|
+
description: `High CPU usage detected: ${resourceAnalysis.cpuUsage.percentage}%`,
|
99
|
+
evidence: [
|
100
|
+
{
|
101
|
+
type: 'resource_usage',
|
102
|
+
content: `CPU usage: ${resourceAnalysis.cpuUsage.percentage}% (threshold: ${resourceAnalysis.cpuUsage.threshold}%)`,
|
103
|
+
confidence: 0.8,
|
104
|
+
},
|
105
|
+
],
|
106
|
+
suggestedFix: 'Optimize computational complexity or add CPU usage limits',
|
107
|
+
businessImpact: {
|
108
|
+
estimatedDevTimeWasted: 1.5,
|
109
|
+
costMultiplier: 1.2,
|
110
|
+
qualityImpact: 15,
|
111
|
+
costOfHallucinations: 150.0,
|
112
|
+
},
|
113
|
+
});
|
114
|
+
}
|
115
|
+
// Timeout hallucinations
|
116
|
+
if (executionResult.timedOut) {
|
117
|
+
categories.push({
|
118
|
+
type: 'resource',
|
119
|
+
subtype: 'computational_boundary',
|
120
|
+
severity: 'high',
|
121
|
+
confidence: 1.0,
|
122
|
+
description: 'Code execution timed out, indicating potential infinite loop or excessive computation',
|
123
|
+
evidence: [
|
124
|
+
{
|
125
|
+
type: 'timeout',
|
126
|
+
content: 'Execution exceeded maximum allowed time',
|
127
|
+
confidence: 1.0,
|
128
|
+
},
|
129
|
+
],
|
130
|
+
suggestedFix: 'Add proper termination conditions, optimize loops, or reduce computational complexity',
|
131
|
+
businessImpact: {
|
132
|
+
estimatedDevTimeWasted: 3.0,
|
133
|
+
costMultiplier: 1.6,
|
134
|
+
qualityImpact: 35,
|
135
|
+
costOfHallucinations: 300.0,
|
136
|
+
},
|
137
|
+
});
|
138
|
+
}
|
139
|
+
}
|
140
|
+
catch (error) {
|
141
|
+
console.warn('Resource analysis failed:', error);
|
142
|
+
}
|
143
|
+
return categories;
|
144
|
+
}
|
145
|
+
/**
|
146
|
+
* Detect logic-related hallucinations from code and execution results
|
147
|
+
*/
|
148
|
+
async detectLogicHallucinations(code, executionResult, expectedOutput) {
|
149
|
+
const categories = [];
|
150
|
+
if (!this.config.enableLogicValidation) {
|
151
|
+
return categories;
|
152
|
+
}
|
153
|
+
try {
|
154
|
+
// 1. Analyze infinite loop risks
|
155
|
+
const loopAnalysis = this.detectInfiniteLoops(code);
|
156
|
+
if (loopAnalysis.hasInfiniteLoopRisk) {
|
157
|
+
categories.push({
|
158
|
+
type: 'logic',
|
159
|
+
subtype: 'logic_deviation',
|
160
|
+
severity: loopAnalysis.confidence > 0.8 ? 'high' : 'medium',
|
161
|
+
confidence: loopAnalysis.confidence,
|
162
|
+
description: 'Potential infinite loop or inadequate termination conditions detected',
|
163
|
+
evidence: [
|
164
|
+
{
|
165
|
+
type: 'code_pattern',
|
166
|
+
content: `Loop complexity: ${loopAnalysis.loopComplexity}`,
|
167
|
+
confidence: 0.8,
|
168
|
+
},
|
169
|
+
{
|
170
|
+
type: 'code_pattern',
|
171
|
+
content: `Termination conditions: ${loopAnalysis.terminationConditions.join(', ')}`,
|
172
|
+
confidence: 0.7,
|
173
|
+
},
|
174
|
+
],
|
175
|
+
suggestedFix: 'Add proper loop termination conditions and bounds checking',
|
176
|
+
businessImpact: {
|
177
|
+
estimatedDevTimeWasted: 2.5,
|
178
|
+
costMultiplier: 1.4,
|
179
|
+
qualityImpact: 30,
|
180
|
+
costOfHallucinations: 250.0,
|
181
|
+
},
|
182
|
+
});
|
183
|
+
}
|
184
|
+
// 2. Validate output correctness
|
185
|
+
if (expectedOutput !== undefined) {
|
186
|
+
const correctnessAnalysis = this.validateOutputCorrectness(executionResult, expectedOutput);
|
187
|
+
if (!correctnessAnalysis.outputMatches && correctnessAnalysis.confidence > 0.7) {
|
188
|
+
categories.push({
|
189
|
+
type: 'logic',
|
190
|
+
subtype: 'logic_breakdown',
|
191
|
+
severity: this.getCorrectnessSeverity(correctnessAnalysis.similarity),
|
192
|
+
confidence: correctnessAnalysis.confidence,
|
193
|
+
description: 'Output does not match expected result, indicating logic errors',
|
194
|
+
evidence: [
|
195
|
+
{
|
196
|
+
type: 'output_comparison',
|
197
|
+
content: `Expected type: ${correctnessAnalysis.expectedType}, Actual type: ${correctnessAnalysis.actualType}`,
|
198
|
+
confidence: 0.9,
|
199
|
+
},
|
200
|
+
{
|
201
|
+
type: 'output_comparison',
|
202
|
+
content: `Similarity score: ${correctnessAnalysis.similarity.toFixed(2)}`,
|
203
|
+
confidence: 0.8,
|
204
|
+
},
|
205
|
+
],
|
206
|
+
suggestedFix: 'Review algorithm logic and test with various input scenarios',
|
207
|
+
businessImpact: {
|
208
|
+
estimatedDevTimeWasted: this.calculateCorrectnessImpact(correctnessAnalysis.similarity),
|
209
|
+
costMultiplier: 1.3,
|
210
|
+
qualityImpact: 25,
|
211
|
+
costOfHallucinations: this.calculateCorrectnessImpact(correctnessAnalysis.similarity) * 100,
|
212
|
+
},
|
213
|
+
});
|
214
|
+
}
|
215
|
+
}
|
216
|
+
// 3. Analyze execution errors for logic issues
|
217
|
+
if (executionResult.errors.length > 0) {
|
218
|
+
const logicErrors = this.analyzeExecutionErrorsForLogic(executionResult.errors);
|
219
|
+
categories.push(...logicErrors);
|
220
|
+
}
|
221
|
+
}
|
222
|
+
catch (error) {
|
223
|
+
console.warn('Logic analysis failed:', error);
|
224
|
+
}
|
225
|
+
return categories;
|
226
|
+
}
|
227
|
+
/**
|
228
|
+
* Analyze resource usage patterns
|
229
|
+
*/
|
230
|
+
analyzeResourceUsage(resourceUsage) {
|
231
|
+
return {
|
232
|
+
memoryUsage: {
|
233
|
+
current: resourceUsage.memoryMB,
|
234
|
+
peak: resourceUsage.peakMemoryMB || resourceUsage.memoryMB,
|
235
|
+
threshold: this.config.memoryThresholdMB,
|
236
|
+
isExcessive: resourceUsage.memoryMB > this.config.memoryThresholdMB,
|
237
|
+
},
|
238
|
+
executionTime: {
|
239
|
+
actual: resourceUsage.executionTimeMs,
|
240
|
+
threshold: this.config.executionTimeThresholdMs,
|
241
|
+
isExcessive: resourceUsage.executionTimeMs > this.config.executionTimeThresholdMs,
|
242
|
+
},
|
243
|
+
cpuUsage: {
|
244
|
+
percentage: resourceUsage.cpuUsage,
|
245
|
+
threshold: this.config.cpuUsageThreshold,
|
246
|
+
isExcessive: resourceUsage.cpuUsage > this.config.cpuUsageThreshold,
|
247
|
+
},
|
248
|
+
};
|
249
|
+
}
|
250
|
+
/**
|
251
|
+
* Detect potential infinite loops in code
|
252
|
+
*/
|
253
|
+
detectInfiniteLoops(code) {
|
254
|
+
const analysis = {
|
255
|
+
hasInfiniteLoopRisk: false,
|
256
|
+
loopComplexity: 0,
|
257
|
+
terminationConditions: [],
|
258
|
+
confidence: 0,
|
259
|
+
};
|
260
|
+
// Detect while loops
|
261
|
+
const whileLoops = code.match(/while\s+([^:]+):/g) || [];
|
262
|
+
const forLoops = code.match(/for\s+[^:]+:/g) || [];
|
263
|
+
analysis.loopComplexity = whileLoops.length + forLoops.length;
|
264
|
+
// Check for dangerous while patterns
|
265
|
+
whileLoops.forEach(loop => {
|
266
|
+
const condition = loop.match(/while\s+([^:]+):/)?.[1] || '';
|
267
|
+
// Check for "while True" without break
|
268
|
+
if (condition.trim() === 'True' || condition.trim() === '1') {
|
269
|
+
const loopBlock = this.extractLoopBlock(code, loop);
|
270
|
+
if (!loopBlock.includes('break') && !loopBlock.includes('return')) {
|
271
|
+
analysis.hasInfiniteLoopRisk = true;
|
272
|
+
analysis.confidence = Math.max(analysis.confidence, 0.9);
|
273
|
+
}
|
274
|
+
else {
|
275
|
+
analysis.terminationConditions.push('break/return statement');
|
276
|
+
}
|
277
|
+
}
|
278
|
+
// Check for complex conditions that might not terminate
|
279
|
+
if (condition.includes('!=') || condition.includes('>=') || condition.includes('<=')) {
|
280
|
+
analysis.confidence = Math.max(analysis.confidence, 0.6);
|
281
|
+
analysis.terminationConditions.push(`condition: ${condition}`);
|
282
|
+
}
|
283
|
+
});
|
284
|
+
// Check for nested loops (higher complexity risk)
|
285
|
+
const nestedLoopPattern = /(for|while)[^:]*:[\s\S]*?(for|while)[^:]*:/g;
|
286
|
+
const nestedLoops = code.match(nestedLoopPattern) || [];
|
287
|
+
if (nestedLoops.length > 0) {
|
288
|
+
analysis.loopComplexity += nestedLoops.length;
|
289
|
+
analysis.confidence = Math.max(analysis.confidence, 0.5);
|
290
|
+
}
|
291
|
+
return analysis;
|
292
|
+
}
|
293
|
+
/**
|
294
|
+
* Validate output correctness against expected results
|
295
|
+
*/
|
296
|
+
validateOutputCorrectness(executionResult, expectedOutput) {
|
297
|
+
const analysis = {
|
298
|
+
outputMatches: false,
|
299
|
+
similarity: 0,
|
300
|
+
expectedType: typeof expectedOutput,
|
301
|
+
actualType: 'undefined',
|
302
|
+
confidence: 0.8,
|
303
|
+
};
|
304
|
+
if (!executionResult.output) {
|
305
|
+
analysis.actualType = 'null';
|
306
|
+
analysis.similarity = 0;
|
307
|
+
return analysis;
|
308
|
+
}
|
309
|
+
// Try to parse the output
|
310
|
+
let actualOutput;
|
311
|
+
try {
|
312
|
+
// Try to extract the last line as the result
|
313
|
+
const outputLines = executionResult.output.trim().split('\n');
|
314
|
+
const lastLine = outputLines[outputLines.length - 1];
|
315
|
+
// Try to parse as JSON first
|
316
|
+
try {
|
317
|
+
actualOutput = JSON.parse(lastLine);
|
318
|
+
}
|
319
|
+
catch {
|
320
|
+
// If not JSON, use as string
|
321
|
+
actualOutput = lastLine;
|
322
|
+
}
|
323
|
+
}
|
324
|
+
catch {
|
325
|
+
actualOutput = executionResult.output;
|
326
|
+
}
|
327
|
+
analysis.actualType = typeof actualOutput;
|
328
|
+
// Type comparison
|
329
|
+
if (analysis.expectedType === analysis.actualType) {
|
330
|
+
analysis.similarity += 0.3;
|
331
|
+
}
|
332
|
+
// Value comparison
|
333
|
+
if (analysis.expectedType === 'string' && analysis.actualType === 'string') {
|
334
|
+
analysis.similarity += this.calculateStringSimilarity(expectedOutput.toString(), actualOutput.toString()) * 0.7;
|
335
|
+
}
|
336
|
+
else if (analysis.expectedType === 'number' && analysis.actualType === 'number') {
|
337
|
+
const diff = Math.abs(expectedOutput - actualOutput);
|
338
|
+
const maxValue = Math.max(Math.abs(expectedOutput), Math.abs(actualOutput), 1);
|
339
|
+
analysis.similarity += Math.max(0, 1 - (diff / maxValue)) * 0.7;
|
340
|
+
}
|
341
|
+
else if (expectedOutput === actualOutput) {
|
342
|
+
analysis.similarity = 1.0;
|
343
|
+
}
|
344
|
+
else {
|
345
|
+
// Try string comparison as fallback
|
346
|
+
analysis.similarity += this.calculateStringSimilarity(expectedOutput.toString(), actualOutput.toString()) * 0.5;
|
347
|
+
}
|
348
|
+
analysis.outputMatches = analysis.similarity > 0.9;
|
349
|
+
return analysis;
|
350
|
+
}
|
351
|
+
/**
|
352
|
+
* Analyze execution errors for logic-related issues
|
353
|
+
*/
|
354
|
+
analyzeExecutionErrorsForLogic(errors) {
|
355
|
+
const categories = [];
|
356
|
+
const logicErrorTypes = [
|
357
|
+
'ZeroDivisionError',
|
358
|
+
'ValueError',
|
359
|
+
'AssertionError',
|
360
|
+
'LogicError',
|
361
|
+
'RuntimeError',
|
362
|
+
];
|
363
|
+
errors.forEach(error => {
|
364
|
+
if (logicErrorTypes.includes(error.type)) {
|
365
|
+
const severity = this.getLogicErrorSeverity(error.type);
|
366
|
+
categories.push({
|
367
|
+
type: 'logic',
|
368
|
+
subtype: error.type === 'ZeroDivisionError' ? 'logic_deviation' : 'logic_breakdown',
|
369
|
+
severity,
|
370
|
+
confidence: 0.9,
|
371
|
+
description: `Logic error detected: ${error.type}`,
|
372
|
+
evidence: [
|
373
|
+
{
|
374
|
+
type: 'execution_error',
|
375
|
+
content: error.message,
|
376
|
+
lineNumber: error.lineNumber,
|
377
|
+
confidence: 1.0,
|
378
|
+
},
|
379
|
+
],
|
380
|
+
suggestedFix: this.getLogicErrorFix(error.type),
|
381
|
+
businessImpact: {
|
382
|
+
estimatedDevTimeWasted: severity === 'critical' ? 4.0 : severity === 'high' ? 2.5 : 1.5,
|
383
|
+
costMultiplier: severity === 'critical' ? 2.0 : severity === 'high' ? 1.5 : 1.2,
|
384
|
+
qualityImpact: severity === 'critical' ? 50 : severity === 'high' ? 35 : 20,
|
385
|
+
costOfHallucinations: (severity === 'critical' ? 4.0 : severity === 'high' ? 2.5 : 1.5) * 100,
|
386
|
+
},
|
387
|
+
});
|
388
|
+
}
|
389
|
+
});
|
390
|
+
return categories;
|
391
|
+
}
|
392
|
+
/**
|
393
|
+
* Helper methods
|
394
|
+
*/
|
395
|
+
extractLoopBlock(code, loopStatement) {
|
396
|
+
const loopIndex = code.indexOf(loopStatement);
|
397
|
+
if (loopIndex === -1)
|
398
|
+
return '';
|
399
|
+
const lines = code.substring(loopIndex).split('\n');
|
400
|
+
const loopBlock = [lines[0]]; // Include the loop statement
|
401
|
+
let indentLevel = 0;
|
402
|
+
let baseIndent = -1;
|
403
|
+
for (let i = 1; i < lines.length; i++) {
|
404
|
+
const line = lines[i];
|
405
|
+
const trimmed = line.trim();
|
406
|
+
if (trimmed === '')
|
407
|
+
continue;
|
408
|
+
const currentIndent = line.length - line.trimStart().length;
|
409
|
+
if (baseIndent === -1 && trimmed !== '') {
|
410
|
+
baseIndent = currentIndent;
|
411
|
+
}
|
412
|
+
if (currentIndent <= baseIndent && trimmed !== '' && i > 1) {
|
413
|
+
break; // End of loop block
|
414
|
+
}
|
415
|
+
loopBlock.push(line);
|
416
|
+
}
|
417
|
+
return loopBlock.join('\n');
|
418
|
+
}
|
419
|
+
calculateStringSimilarity(str1, str2) {
|
420
|
+
if (str1 === str2)
|
421
|
+
return 1.0;
|
422
|
+
const longer = str1.length > str2.length ? str1 : str2;
|
423
|
+
const shorter = str1.length > str2.length ? str2 : str1;
|
424
|
+
if (longer.length === 0)
|
425
|
+
return 1.0;
|
426
|
+
const distance = this.levenshteinDistance(longer, shorter);
|
427
|
+
return (longer.length - distance) / longer.length;
|
428
|
+
}
|
429
|
+
levenshteinDistance(str1, str2) {
|
430
|
+
const matrix = Array(str2.length + 1).fill(null).map(() => Array(str1.length + 1).fill(null));
|
431
|
+
for (let i = 0; i <= str1.length; i++)
|
432
|
+
matrix[0][i] = i;
|
433
|
+
for (let j = 0; j <= str2.length; j++)
|
434
|
+
matrix[j][0] = j;
|
435
|
+
for (let j = 1; j <= str2.length; j++) {
|
436
|
+
for (let i = 1; i <= str1.length; i++) {
|
437
|
+
const indicator = str1[i - 1] === str2[j - 1] ? 0 : 1;
|
438
|
+
matrix[j][i] = Math.min(matrix[j][i - 1] + 1, matrix[j - 1][i] + 1, matrix[j - 1][i - 1] + indicator);
|
439
|
+
}
|
440
|
+
}
|
441
|
+
return matrix[str2.length][str1.length];
|
442
|
+
}
|
443
|
+
getMemorySeverity(memoryMB) {
|
444
|
+
if (memoryMB > 200)
|
445
|
+
return 'critical';
|
446
|
+
if (memoryMB > 128)
|
447
|
+
return 'high';
|
448
|
+
if (memoryMB > 64)
|
449
|
+
return 'medium';
|
450
|
+
return 'low';
|
451
|
+
}
|
452
|
+
getTimeSeverity(timeMs) {
|
453
|
+
if (timeMs > 10000)
|
454
|
+
return 'critical';
|
455
|
+
if (timeMs > 5000)
|
456
|
+
return 'high';
|
457
|
+
if (timeMs > 3000)
|
458
|
+
return 'medium';
|
459
|
+
return 'low';
|
460
|
+
}
|
461
|
+
getCorrectnessSeverity(similarity) {
|
462
|
+
if (similarity < 0.3)
|
463
|
+
return 'critical';
|
464
|
+
if (similarity < 0.5)
|
465
|
+
return 'high';
|
466
|
+
if (similarity < 0.7)
|
467
|
+
return 'medium';
|
468
|
+
return 'low';
|
469
|
+
}
|
470
|
+
getLogicErrorSeverity(errorType) {
|
471
|
+
const severityMap = {
|
472
|
+
'ZeroDivisionError': 'high',
|
473
|
+
'ValueError': 'medium',
|
474
|
+
'AssertionError': 'high',
|
475
|
+
'LogicError': 'critical',
|
476
|
+
'RuntimeError': 'medium',
|
477
|
+
};
|
478
|
+
return severityMap[errorType] || 'medium';
|
479
|
+
}
|
480
|
+
getLogicErrorFix(errorType) {
|
481
|
+
const fixes = {
|
482
|
+
'ZeroDivisionError': 'Add zero division check before division operations',
|
483
|
+
'ValueError': 'Validate input values and add proper error handling',
|
484
|
+
'AssertionError': 'Review assertion conditions and fix logic',
|
485
|
+
'LogicError': 'Restructure the algorithm logic',
|
486
|
+
'RuntimeError': 'Add proper error handling and resource management',
|
487
|
+
};
|
488
|
+
return fixes[errorType] || 'Review and fix the logic error';
|
489
|
+
}
|
490
|
+
calculateMemoryImpact(memoryMB) {
|
491
|
+
if (memoryMB > 200)
|
492
|
+
return 4.0;
|
493
|
+
if (memoryMB > 128)
|
494
|
+
return 2.5;
|
495
|
+
if (memoryMB > 64)
|
496
|
+
return 1.5;
|
497
|
+
return 0.5;
|
498
|
+
}
|
499
|
+
calculateTimeImpact(timeMs) {
|
500
|
+
if (timeMs > 10000)
|
501
|
+
return 3.5;
|
502
|
+
if (timeMs > 5000)
|
503
|
+
return 2.0;
|
504
|
+
if (timeMs > 3000)
|
505
|
+
return 1.0;
|
506
|
+
return 0.5;
|
507
|
+
}
|
508
|
+
calculateCorrectnessImpact(similarity) {
|
509
|
+
if (similarity < 0.3)
|
510
|
+
return 4.0;
|
511
|
+
if (similarity < 0.5)
|
512
|
+
return 3.0;
|
513
|
+
if (similarity < 0.7)
|
514
|
+
return 2.0;
|
515
|
+
return 1.0;
|
516
|
+
}
|
517
|
+
}
|
518
|
+
/**
|
519
|
+
* Factory function to create execution-based detector
|
520
|
+
*/
|
521
|
+
export function createExecutionBasedDetector(config) {
|
522
|
+
return new ExecutionBasedDetector(config);
|
523
|
+
}
|
524
|
+
/**
|
525
|
+
* Utility function for quick execution-based analysis
|
526
|
+
*/
|
527
|
+
export async function analyzeExecutionForHallucinations(code, expectedOutput, config) {
|
528
|
+
const detector = createExecutionBasedDetector(config);
|
529
|
+
const sandbox = new ExecutionSandbox();
|
530
|
+
const executionResult = await sandbox.execute(code);
|
531
|
+
const resourceHallucinations = await detector.detectResourceHallucinations(executionResult);
|
532
|
+
const logicHallucinations = await detector.detectLogicHallucinations(code, executionResult, expectedOutput);
|
533
|
+
return {
|
534
|
+
executionResult,
|
535
|
+
resourceHallucinations,
|
536
|
+
logicHallucinations,
|
537
|
+
};
|
538
|
+
}
|