@goldensheepai/toknxr-cli 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +270 -9
- package/lib/audit-logger.js +500 -0
- package/lib/cli.js +1850 -129
- package/lib/cli.test.js +49 -0
- package/lib/code-analysis.js +349 -4
- package/lib/dashboard.js +4 -17
- package/lib/fixtures/canary-interaction.js +18 -0
- package/lib/plugin-system.js +266 -0
- package/lib/sync.js +27 -5
- package/lib/ui.js +129 -0
- package/lib/utils.js +117 -0
- package/package.json +51 -18
- package/.env +0 -21
- package/.env.example +0 -21
- package/interactions.log +0 -8
- package/src/ai-analytics.ts +0 -418
- package/src/auth.ts +0 -80
- package/src/cli.ts +0 -447
- package/src/code-analysis.ts +0 -365
- package/src/config.ts +0 -10
- package/src/dashboard.tsx +0 -391
- package/src/hallucination-detector.ts +0 -368
- package/src/policy.ts +0 -55
- package/src/pricing.ts +0 -21
- package/src/proxy.ts +0 -438
- package/src/sync.ts +0 -129
- package/start.sh +0 -56
- package/test-analysis.mjs +0 -77
- package/test-coding.mjs +0 -27
- package/test-generate-sample-data.js +0 -118
- package/test-proxy.mjs +0 -25
- package/toknxr.config.json +0 -63
- package/toknxr.policy.json +0 -18
- package/tsconfig.json +0 -19
@@ -1,368 +0,0 @@
|
|
1
|
-
export interface HallucinationDetection {
|
2
|
-
isLikelyHallucination: boolean;
|
3
|
-
confidence: number; // 0-100, higher = more likely hallucination
|
4
|
-
severity: 'low' | 'medium' | 'high' | 'critical';
|
5
|
-
categories: HallucinationCategory[];
|
6
|
-
issues: string[];
|
7
|
-
evidence: HallucinationEvidence[];
|
8
|
-
}
|
9
|
-
|
10
|
-
export interface HallucinationCategory {
|
11
|
-
type: 'factual' | 'contextual' | 'technical' | 'logical' | 'citation';
|
12
|
-
description: string;
|
13
|
-
confidence: number;
|
14
|
-
}
|
15
|
-
|
16
|
-
export interface HallucinationEvidence {
|
17
|
-
type: 'contradiction' | 'overconfidence' | 'fabrication' | 'context_drift' | 'invalid_reference';
|
18
|
-
description: string;
|
19
|
-
severity: number; // 1-10
|
20
|
-
context?: string;
|
21
|
-
}
|
22
|
-
|
23
|
-
export interface HallucinationMetrics {
|
24
|
-
totalAnalyses: number;
|
25
|
-
hallucinationCount: number;
|
26
|
-
hallucinationRate: number; // percentage
|
27
|
-
avgConfidence: number;
|
28
|
-
byCategory: Record<string, number>;
|
29
|
-
byProvider: Record<string, number>;
|
30
|
-
businessImpact: BusinessImpactMetrics;
|
31
|
-
}
|
32
|
-
|
33
|
-
export interface BusinessImpactMetrics {
|
34
|
-
estimatedDevTimeWasted: number; // hours
|
35
|
-
qualityDegradationScore: number; // 0-100
|
36
|
-
roiImpact: number; // percentage reduction in ROI
|
37
|
-
costOfHallucinations: number; // USD
|
38
|
-
}
|
39
|
-
|
40
|
-
/**
|
41
|
-
* Main hallucination detection engine
|
42
|
-
*/
|
43
|
-
export class HallucinationDetector {
|
44
|
-
private technicalTerms = new Set([
|
45
|
-
'api', 'endpoint', 'function', 'method', 'class', 'interface', 'module',
|
46
|
-
'library', 'framework', 'database', 'server', 'client', 'request', 'response',
|
47
|
-
'parameter', 'argument', 'variable', 'constant', 'algorithm', 'data structure'
|
48
|
-
]);
|
49
|
-
|
50
|
-
private commonLibraries = new Set([
|
51
|
-
'react', 'express', 'axios', 'lodash', 'jquery', 'bootstrap', 'tailwind',
|
52
|
-
'tensorflow', 'pytorch', 'pandas', 'numpy', 'requests', 'flask', 'django'
|
53
|
-
]);
|
54
|
-
|
55
|
-
/**
|
56
|
-
* Analyze response for potential hallucinations
|
57
|
-
*/
|
58
|
-
detectHallucination(
|
59
|
-
userPrompt: string,
|
60
|
-
aiResponse: string,
|
61
|
-
context?: string[]
|
62
|
-
): HallucinationDetection {
|
63
|
-
const issues: string[] = [];
|
64
|
-
const evidence: HallucinationEvidence[] = [];
|
65
|
-
const categories: HallucinationCategory[] = [];
|
66
|
-
|
67
|
-
// 1. Check for overconfidence indicators
|
68
|
-
const overconfidenceEvidence = this.detectOverconfidence(aiResponse);
|
69
|
-
if (overconfidenceEvidence) {
|
70
|
-
evidence.push(overconfidenceEvidence);
|
71
|
-
issues.push('Response shows signs of overconfidence without sufficient evidence');
|
72
|
-
}
|
73
|
-
|
74
|
-
// 2. Check for factual contradictions
|
75
|
-
const contradictions = this.detectContradictions(aiResponse, context);
|
76
|
-
evidence.push(...contradictions);
|
77
|
-
if (contradictions.length > 0) {
|
78
|
-
issues.push('Internal contradictions detected in response');
|
79
|
-
}
|
80
|
-
|
81
|
-
// 3. Check for technical hallucinations (made-up APIs, libraries, etc.)
|
82
|
-
const technicalHallucinations = this.detectTechnicalHallucinations(aiResponse);
|
83
|
-
evidence.push(...technicalHallucinations);
|
84
|
-
if (technicalHallucinations.length > 0) {
|
85
|
-
issues.push('Potential technical hallucinations detected');
|
86
|
-
}
|
87
|
-
|
88
|
-
// 4. Check for context drift
|
89
|
-
const contextDrift = this.detectContextDrift(userPrompt, aiResponse, context);
|
90
|
-
if (contextDrift) {
|
91
|
-
evidence.push(contextDrift);
|
92
|
-
issues.push('Response may have drifted from original context');
|
93
|
-
}
|
94
|
-
|
95
|
-
// 5. Check for citation/reference issues
|
96
|
-
const citationIssues = this.detectCitationIssues(aiResponse);
|
97
|
-
evidence.push(...citationIssues);
|
98
|
-
if (citationIssues.length > 0) {
|
99
|
-
issues.push('Questionable citations or references detected');
|
100
|
-
}
|
101
|
-
|
102
|
-
// Calculate overall confidence and categorize
|
103
|
-
const overallConfidence = this.calculateOverallConfidence(evidence, categories);
|
104
|
-
const severity = this.determineSeverity(overallConfidence);
|
105
|
-
|
106
|
-
// Determine if this is likely a hallucination
|
107
|
-
const isLikelyHallucination = overallConfidence > 60 || issues.length >= 2;
|
108
|
-
|
109
|
-
return {
|
110
|
-
isLikelyHallucination,
|
111
|
-
confidence: overallConfidence,
|
112
|
-
severity,
|
113
|
-
categories,
|
114
|
-
issues,
|
115
|
-
evidence
|
116
|
-
};
|
117
|
-
}
|
118
|
-
|
119
|
-
/**
|
120
|
-
* Detect overconfidence indicators
|
121
|
-
*/
|
122
|
-
private detectOverconfidence(response: string): HallucinationEvidence | null {
|
123
|
-
const overconfidencePatterns = [
|
124
|
-
/definitely\s+(correct|right|accurate)/gi,
|
125
|
-
/absolutely\s+(certain|sure|positive)/gi,
|
126
|
-
/without\s+(a\s+)?doubt/gi,
|
127
|
-
/everyone\s+knows/gi,
|
128
|
-
/obviously/gi,
|
129
|
-
/clearly/gi
|
130
|
-
];
|
131
|
-
|
132
|
-
const confidence = overconfidencePatterns.reduce((score, pattern) => {
|
133
|
-
const matches = response.match(pattern);
|
134
|
-
return score + (matches ? matches.length * 15 : 0);
|
135
|
-
}, 0);
|
136
|
-
|
137
|
-
if (confidence > 30) {
|
138
|
-
return {
|
139
|
-
type: 'overconfidence',
|
140
|
-
description: `Response shows ${confidence}% overconfidence indicators`,
|
141
|
-
severity: Math.min(confidence / 10, 10)
|
142
|
-
};
|
143
|
-
}
|
144
|
-
|
145
|
-
return null;
|
146
|
-
}
|
147
|
-
|
148
|
-
/**
|
149
|
-
* Detect internal contradictions
|
150
|
-
*/
|
151
|
-
private detectContradictions(response: string, _context?: string[]): HallucinationEvidence[] {
|
152
|
-
const evidence: HallucinationEvidence[] = [];
|
153
|
-
|
154
|
-
// Look for contradictory statements
|
155
|
-
const contradictions = [
|
156
|
-
{ pattern: /(yes|correct|true).*?(no|incorrect|false)/gi, description: 'Direct yes/no contradiction' },
|
157
|
-
{ pattern: /(always).*?(never)/gi, description: 'Always/never contradiction' },
|
158
|
-
{ pattern: /(all|every).*?(none|no)/gi, description: 'All/none contradiction' },
|
159
|
-
{ pattern: /(\d+).*?(\d+)/g, description: 'Numerical contradictions' }
|
160
|
-
];
|
161
|
-
|
162
|
-
contradictions.forEach(({ pattern, description }) => {
|
163
|
-
const matches = response.match(pattern);
|
164
|
-
if (matches) {
|
165
|
-
evidence.push({
|
166
|
-
type: 'contradiction',
|
167
|
-
description: `${description} detected`,
|
168
|
-
severity: 8,
|
169
|
-
context: matches[0]
|
170
|
-
});
|
171
|
-
}
|
172
|
-
});
|
173
|
-
|
174
|
-
return evidence;
|
175
|
-
}
|
176
|
-
|
177
|
-
/**
|
178
|
-
* Detect technical hallucinations (made-up APIs, libraries, etc.)
|
179
|
-
*/
|
180
|
-
private detectTechnicalHallucinations(response: string): HallucinationEvidence[] {
|
181
|
-
const evidence: HallucinationEvidence[] = [];
|
182
|
-
|
183
|
-
// Extract technical terms and check if they're likely made up
|
184
|
-
const technicalTerms = response.match(/\b[A-Z][a-zA-Z]*[A-Z]\w*\b/g) || [];
|
185
|
-
const suspiciousTerms = technicalTerms.filter(term => {
|
186
|
-
// Check if it looks like a class name or API but isn't common
|
187
|
-
return term.length > 6 &&
|
188
|
-
!this.technicalTerms.has(term.toLowerCase()) &&
|
189
|
-
/[A-Z]/.test(term) && // Has uppercase letters (likely class/API name)
|
190
|
-
!this.commonLibraries.has(term.toLowerCase());
|
191
|
-
});
|
192
|
-
|
193
|
-
if (suspiciousTerms.length > 0) {
|
194
|
-
evidence.push({
|
195
|
-
type: 'fabrication',
|
196
|
-
description: `Suspicious technical terms detected: ${suspiciousTerms.join(', ')}`,
|
197
|
-
severity: 7
|
198
|
-
});
|
199
|
-
}
|
200
|
-
|
201
|
-
// Check for made-up method names
|
202
|
-
const methodPatterns = [
|
203
|
-
/\.([a-z][a-zA-Z]*[A-Z]\w*)\(/g, // camelCase methods
|
204
|
-
/\b([a-z]+_[a-z_]*)\(/g // snake_case functions
|
205
|
-
];
|
206
|
-
|
207
|
-
methodPatterns.forEach(pattern => {
|
208
|
-
const matches = Array.from(response.matchAll(pattern));
|
209
|
-
const suspiciousMethods = matches.filter(match => {
|
210
|
-
const methodName = match[1];
|
211
|
-
return methodName.length > 10 &&
|
212
|
-
!this.technicalTerms.has(methodName.toLowerCase()) &&
|
213
|
-
/[A-Z]/.test(methodName); // Likely made up
|
214
|
-
});
|
215
|
-
|
216
|
-
if (suspiciousMethods.length > 0) {
|
217
|
-
evidence.push({
|
218
|
-
type: 'fabrication',
|
219
|
-
description: `Potentially fabricated method names: ${suspiciousMethods.map(m => m[1]).join(', ')}`,
|
220
|
-
severity: 6
|
221
|
-
});
|
222
|
-
}
|
223
|
-
});
|
224
|
-
|
225
|
-
return evidence;
|
226
|
-
}
|
227
|
-
|
228
|
-
/**
|
229
|
-
* Detect context drift from conversation history
|
230
|
-
*/
|
231
|
-
private detectContextDrift(
|
232
|
-
userPrompt: string,
|
233
|
-
response: string,
|
234
|
-
context?: string[]
|
235
|
-
): HallucinationEvidence | null {
|
236
|
-
if (!context || context.length === 0) return null;
|
237
|
-
|
238
|
-
// Check if response addresses the current prompt or drifts to previous context
|
239
|
-
const promptKeywords = this.extractKeywords(userPrompt);
|
240
|
-
const responseKeywords = this.extractKeywords(response);
|
241
|
-
|
242
|
-
const contextOverlap = promptKeywords.filter(keyword =>
|
243
|
-
responseKeywords.some(respKeyword =>
|
244
|
-
respKeyword.includes(keyword) || keyword.includes(respKeyword)
|
245
|
-
)
|
246
|
-
).length;
|
247
|
-
|
248
|
-
const driftScore = Math.max(0, (promptKeywords.length - contextOverlap) / promptKeywords.length * 100);
|
249
|
-
|
250
|
-
if (driftScore > 60) {
|
251
|
-
return {
|
252
|
-
type: 'context_drift',
|
253
|
-
description: `High context drift detected (${driftScore.toFixed(1)}% deviation from prompt)`,
|
254
|
-
severity: Math.min(driftScore / 10, 10)
|
255
|
-
};
|
256
|
-
}
|
257
|
-
|
258
|
-
return null;
|
259
|
-
}
|
260
|
-
|
261
|
-
/**
|
262
|
-
* Detect citation and reference issues
|
263
|
-
*/
|
264
|
-
private detectCitationIssues(response: string): HallucinationEvidence[] {
|
265
|
-
const evidence: HallucinationEvidence[] = [];
|
266
|
-
|
267
|
-
// Look for citations that might be fabricated
|
268
|
-
const citationPatterns = [
|
269
|
-
/according\s+to\s+([^,\.]+)/gi,
|
270
|
-
/as\s+stated\s+(in|by)\s+([^,\.]+)/gi,
|
271
|
-
/\[([^\]]+)\]/g, // Reference brackets
|
272
|
-
/source[s]?:\s*([^,\.]+)/gi
|
273
|
-
];
|
274
|
-
|
275
|
-
citationPatterns.forEach(pattern => {
|
276
|
-
const matches = Array.from(response.matchAll(pattern));
|
277
|
-
matches.forEach(match => {
|
278
|
-
const citation = match[1] || match[0];
|
279
|
-
if (citation && citation.length > 50) { // Unusually long citation
|
280
|
-
evidence.push({
|
281
|
-
type: 'invalid_reference',
|
282
|
-
description: `Suspiciously long or complex citation: ${citation.substring(0, 50)}...`,
|
283
|
-
severity: 5
|
284
|
-
});
|
285
|
-
}
|
286
|
-
});
|
287
|
-
});
|
288
|
-
|
289
|
-
return evidence;
|
290
|
-
}
|
291
|
-
|
292
|
-
/**
|
293
|
-
* Calculate overall hallucination confidence
|
294
|
-
*/
|
295
|
-
private calculateOverallConfidence(
|
296
|
-
evidence: HallucinationEvidence[],
|
297
|
-
_categories: HallucinationCategory[]
|
298
|
-
): number {
|
299
|
-
if (evidence.length === 0) return 0;
|
300
|
-
|
301
|
-
// Weight different types of evidence
|
302
|
-
const weights = {
|
303
|
-
contradiction: 1.0,
|
304
|
-
overconfidence: 0.8,
|
305
|
-
fabrication: 0.9,
|
306
|
-
context_drift: 0.7,
|
307
|
-
invalid_reference: 0.6
|
308
|
-
};
|
309
|
-
|
310
|
-
const totalWeightedScore = evidence.reduce((sum, ev) => {
|
311
|
-
return sum + (ev.severity * (weights[ev.type] || 0.5));
|
312
|
-
}, 0);
|
313
|
-
|
314
|
-
const avgScore = totalWeightedScore / evidence.length;
|
315
|
-
|
316
|
-
// Cap at 100 and apply some randomness to simulate uncertainty
|
317
|
-
return Math.min(100, Math.max(0, avgScore * 10 + Math.random() * 10 - 5));
|
318
|
-
}
|
319
|
-
|
320
|
-
/**
|
321
|
-
* Determine severity level
|
322
|
-
*/
|
323
|
-
private determineSeverity(confidence: number): 'low' | 'medium' | 'high' | 'critical' {
|
324
|
-
if (confidence >= 80) return 'critical';
|
325
|
-
if (confidence >= 60) return 'high';
|
326
|
-
if (confidence >= 40) return 'medium';
|
327
|
-
return 'low';
|
328
|
-
}
|
329
|
-
|
330
|
-
/**
|
331
|
-
* Extract meaningful keywords from text
|
332
|
-
*/
|
333
|
-
private extractKeywords(text: string): string[] {
|
334
|
-
return text
|
335
|
-
.toLowerCase()
|
336
|
-
.split(/\s+/)
|
337
|
-
.filter(word => word.length > 4)
|
338
|
-
.filter(word => !['that', 'with', 'from', 'this', 'will', 'should', 'would', 'could'].includes(word))
|
339
|
-
.slice(0, 10); // Limit to top 10 keywords
|
340
|
-
}
|
341
|
-
|
342
|
-
/**
|
343
|
-
* Calculate business impact of hallucinations
|
344
|
-
*/
|
345
|
-
calculateBusinessImpact(
|
346
|
-
hallucinationRate: number,
|
347
|
-
totalInteractions: number,
|
348
|
-
avgCostPerInteraction: number,
|
349
|
-
avgDevTimePerFix: number = 0.5 // hours
|
350
|
-
): BusinessImpactMetrics {
|
351
|
-
const devTimeWasted = (hallucinationRate / 100) * totalInteractions * avgDevTimePerFix;
|
352
|
-
const qualityDegradationScore = Math.min(100, hallucinationRate * 1.5);
|
353
|
-
const roiImpact = hallucinationRate * 0.8; // 0.8% ROI reduction per 1% hallucination rate
|
354
|
-
const costOfHallucinations = (hallucinationRate / 100) * totalInteractions * avgCostPerInteraction * 2; // 2x multiplier for debugging cost
|
355
|
-
|
356
|
-
return {
|
357
|
-
estimatedDevTimeWasted: Math.round(devTimeWasted * 10) / 10,
|
358
|
-
qualityDegradationScore: Math.round(qualityDegradationScore),
|
359
|
-
roiImpact: Math.round(roiImpact * 10) / 10,
|
360
|
-
costOfHallucinations: Math.round(costOfHallucinations * 100) / 100
|
361
|
-
};
|
362
|
-
}
|
363
|
-
}
|
364
|
-
|
365
|
-
/**
|
366
|
-
* Global hallucination detector instance
|
367
|
-
*/
|
368
|
-
export const hallucinationDetector = new HallucinationDetector();
|
package/src/policy.ts
DELETED
@@ -1,55 +0,0 @@
|
|
1
|
-
import * as fs from 'node:fs';
|
2
|
-
import * as path from 'node:path';
|
3
|
-
import axios from 'axios';
|
4
|
-
|
5
|
-
export interface BudgetsPolicy {
|
6
|
-
version?: string;
|
7
|
-
monthlyUSD?: number; // global monthly cap
|
8
|
-
perProviderMonthlyUSD?: Record<string, number>; // caps per provider name
|
9
|
-
webhookUrl?: string; // optional webhook for alerts
|
10
|
-
}
|
11
|
-
|
12
|
-
export function loadPolicy(cwd: string = process.cwd()): BudgetsPolicy | null {
|
13
|
-
const policyPath = path.resolve(cwd, 'toknxr.policy.json');
|
14
|
-
if (!fs.existsSync(policyPath)) return null;
|
15
|
-
try {
|
16
|
-
const raw = fs.readFileSync(policyPath, 'utf8');
|
17
|
-
return JSON.parse(raw) as BudgetsPolicy;
|
18
|
-
} catch (error) {
|
19
|
-
console.error('Error loading policy file:', error);
|
20
|
-
return null;
|
21
|
-
}
|
22
|
-
}
|
23
|
-
|
24
|
-
export function currentMonthKey(date = new Date()): string {
|
25
|
-
return `${date.getUTCFullYear()}-${String(date.getUTCMonth() + 1).padStart(2, '0')}`;
|
26
|
-
}
|
27
|
-
|
28
|
-
export function computeMonthlySpend(logFilePath: string, monthKey: string) {
|
29
|
-
const sums = { total: 0, byProvider: {} as Record<string, number> };
|
30
|
-
if (!fs.existsSync(logFilePath)) return sums;
|
31
|
-
const lines = fs.readFileSync(logFilePath, 'utf8').trim().split('\n').filter(Boolean);
|
32
|
-
for (const line of lines) {
|
33
|
-
try {
|
34
|
-
const j = JSON.parse(line);
|
35
|
-
const ts = new Date(j.timestamp);
|
36
|
-
const key = currentMonthKey(ts);
|
37
|
-
if (key !== monthKey) continue;
|
38
|
-
const cost = Number(j.costUSD || 0);
|
39
|
-
sums.total += cost;
|
40
|
-
sums.byProvider[j.provider] = (sums.byProvider[j.provider] || 0) + cost;
|
41
|
-
} catch (error) {
|
42
|
-
console.warn('Skipping invalid log entry in policy check', error);
|
43
|
-
}
|
44
|
-
}
|
45
|
-
return sums;
|
46
|
-
}
|
47
|
-
|
48
|
-
export async function sendBudgetAlert(webhookUrl: string, payload: any) {
|
49
|
-
try {
|
50
|
-
await axios.post(webhookUrl, payload, { timeout: 5000 });
|
51
|
-
} catch (error) {
|
52
|
-
console.error('Error sending budget alert:', error);
|
53
|
-
}
|
54
|
-
}
|
55
|
-
|
package/src/pricing.ts
DELETED
@@ -1,21 +0,0 @@
|
|
1
|
-
export const modelToPricing = {
|
2
|
-
// Gemini (Free tier available)
|
3
|
-
'gemini-2.5-flash': { promptPer1k: 0.15, completionPer1k: 0.60 },
|
4
|
-
'gemini-2.5-pro': { promptPer1k: 0.50, completionPer1k: 1.50 },
|
5
|
-
'gemini-flash-latest': { promptPer1k: 0.15, completionPer1k: 0.60 },
|
6
|
-
'gemini-pro-latest': { promptPer1k: 0.50, completionPer1k: 1.50 },
|
7
|
-
// OpenAI (Free tier available for some models)
|
8
|
-
'gpt-4o-mini': { promptPer1k: 0.15, completionPer1k: 0.60 },
|
9
|
-
'gpt-4o': { promptPer1k: 5.00, completionPer1k: 15.00 },
|
10
|
-
// Free tier models (zero cost)
|
11
|
-
'ollama-llama3': { promptPer1k: 0.00, completionPer1k: 0.00 },
|
12
|
-
'local-model': { promptPer1k: 0.00, completionPer1k: 0.00 },
|
13
|
-
};
|
14
|
-
|
15
|
-
export function estimateCostUSD(model: string, promptTokens: number, completionTokens: number): number {
|
16
|
-
const pricing = modelToPricing[model as keyof typeof modelToPricing] || modelToPricing['gemini-2.5-flash'];
|
17
|
-
const promptK = promptTokens / 1000;
|
18
|
-
const completionK = completionTokens / 1000;
|
19
|
-
const cost = promptK * pricing.promptPer1k + completionK * pricing.completionPer1k;
|
20
|
-
return Number(cost.toFixed(6));
|
21
|
-
}
|