@goldensheepai/toknxr-cli 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,368 +0,0 @@
1
- export interface HallucinationDetection {
2
- isLikelyHallucination: boolean;
3
- confidence: number; // 0-100, higher = more likely hallucination
4
- severity: 'low' | 'medium' | 'high' | 'critical';
5
- categories: HallucinationCategory[];
6
- issues: string[];
7
- evidence: HallucinationEvidence[];
8
- }
9
-
10
- export interface HallucinationCategory {
11
- type: 'factual' | 'contextual' | 'technical' | 'logical' | 'citation';
12
- description: string;
13
- confidence: number;
14
- }
15
-
16
- export interface HallucinationEvidence {
17
- type: 'contradiction' | 'overconfidence' | 'fabrication' | 'context_drift' | 'invalid_reference';
18
- description: string;
19
- severity: number; // 1-10
20
- context?: string;
21
- }
22
-
23
- export interface HallucinationMetrics {
24
- totalAnalyses: number;
25
- hallucinationCount: number;
26
- hallucinationRate: number; // percentage
27
- avgConfidence: number;
28
- byCategory: Record<string, number>;
29
- byProvider: Record<string, number>;
30
- businessImpact: BusinessImpactMetrics;
31
- }
32
-
33
- export interface BusinessImpactMetrics {
34
- estimatedDevTimeWasted: number; // hours
35
- qualityDegradationScore: number; // 0-100
36
- roiImpact: number; // percentage reduction in ROI
37
- costOfHallucinations: number; // USD
38
- }
39
-
40
- /**
41
- * Main hallucination detection engine
42
- */
43
- export class HallucinationDetector {
44
- private technicalTerms = new Set([
45
- 'api', 'endpoint', 'function', 'method', 'class', 'interface', 'module',
46
- 'library', 'framework', 'database', 'server', 'client', 'request', 'response',
47
- 'parameter', 'argument', 'variable', 'constant', 'algorithm', 'data structure'
48
- ]);
49
-
50
- private commonLibraries = new Set([
51
- 'react', 'express', 'axios', 'lodash', 'jquery', 'bootstrap', 'tailwind',
52
- 'tensorflow', 'pytorch', 'pandas', 'numpy', 'requests', 'flask', 'django'
53
- ]);
54
-
55
- /**
56
- * Analyze response for potential hallucinations
57
- */
58
- detectHallucination(
59
- userPrompt: string,
60
- aiResponse: string,
61
- context?: string[]
62
- ): HallucinationDetection {
63
- const issues: string[] = [];
64
- const evidence: HallucinationEvidence[] = [];
65
- const categories: HallucinationCategory[] = [];
66
-
67
- // 1. Check for overconfidence indicators
68
- const overconfidenceEvidence = this.detectOverconfidence(aiResponse);
69
- if (overconfidenceEvidence) {
70
- evidence.push(overconfidenceEvidence);
71
- issues.push('Response shows signs of overconfidence without sufficient evidence');
72
- }
73
-
74
- // 2. Check for factual contradictions
75
- const contradictions = this.detectContradictions(aiResponse, context);
76
- evidence.push(...contradictions);
77
- if (contradictions.length > 0) {
78
- issues.push('Internal contradictions detected in response');
79
- }
80
-
81
- // 3. Check for technical hallucinations (made-up APIs, libraries, etc.)
82
- const technicalHallucinations = this.detectTechnicalHallucinations(aiResponse);
83
- evidence.push(...technicalHallucinations);
84
- if (technicalHallucinations.length > 0) {
85
- issues.push('Potential technical hallucinations detected');
86
- }
87
-
88
- // 4. Check for context drift
89
- const contextDrift = this.detectContextDrift(userPrompt, aiResponse, context);
90
- if (contextDrift) {
91
- evidence.push(contextDrift);
92
- issues.push('Response may have drifted from original context');
93
- }
94
-
95
- // 5. Check for citation/reference issues
96
- const citationIssues = this.detectCitationIssues(aiResponse);
97
- evidence.push(...citationIssues);
98
- if (citationIssues.length > 0) {
99
- issues.push('Questionable citations or references detected');
100
- }
101
-
102
- // Calculate overall confidence and categorize
103
- const overallConfidence = this.calculateOverallConfidence(evidence, categories);
104
- const severity = this.determineSeverity(overallConfidence);
105
-
106
- // Determine if this is likely a hallucination
107
- const isLikelyHallucination = overallConfidence > 60 || issues.length >= 2;
108
-
109
- return {
110
- isLikelyHallucination,
111
- confidence: overallConfidence,
112
- severity,
113
- categories,
114
- issues,
115
- evidence
116
- };
117
- }
118
-
119
- /**
120
- * Detect overconfidence indicators
121
- */
122
- private detectOverconfidence(response: string): HallucinationEvidence | null {
123
- const overconfidencePatterns = [
124
- /definitely\s+(correct|right|accurate)/gi,
125
- /absolutely\s+(certain|sure|positive)/gi,
126
- /without\s+(a\s+)?doubt/gi,
127
- /everyone\s+knows/gi,
128
- /obviously/gi,
129
- /clearly/gi
130
- ];
131
-
132
- const confidence = overconfidencePatterns.reduce((score, pattern) => {
133
- const matches = response.match(pattern);
134
- return score + (matches ? matches.length * 15 : 0);
135
- }, 0);
136
-
137
- if (confidence > 30) {
138
- return {
139
- type: 'overconfidence',
140
- description: `Response shows ${confidence}% overconfidence indicators`,
141
- severity: Math.min(confidence / 10, 10)
142
- };
143
- }
144
-
145
- return null;
146
- }
147
-
148
- /**
149
- * Detect internal contradictions
150
- */
151
- private detectContradictions(response: string, _context?: string[]): HallucinationEvidence[] {
152
- const evidence: HallucinationEvidence[] = [];
153
-
154
- // Look for contradictory statements
155
- const contradictions = [
156
- { pattern: /(yes|correct|true).*?(no|incorrect|false)/gi, description: 'Direct yes/no contradiction' },
157
- { pattern: /(always).*?(never)/gi, description: 'Always/never contradiction' },
158
- { pattern: /(all|every).*?(none|no)/gi, description: 'All/none contradiction' },
159
- { pattern: /(\d+).*?(\d+)/g, description: 'Numerical contradictions' }
160
- ];
161
-
162
- contradictions.forEach(({ pattern, description }) => {
163
- const matches = response.match(pattern);
164
- if (matches) {
165
- evidence.push({
166
- type: 'contradiction',
167
- description: `${description} detected`,
168
- severity: 8,
169
- context: matches[0]
170
- });
171
- }
172
- });
173
-
174
- return evidence;
175
- }
176
-
177
- /**
178
- * Detect technical hallucinations (made-up APIs, libraries, etc.)
179
- */
180
- private detectTechnicalHallucinations(response: string): HallucinationEvidence[] {
181
- const evidence: HallucinationEvidence[] = [];
182
-
183
- // Extract technical terms and check if they're likely made up
184
- const technicalTerms = response.match(/\b[A-Z][a-zA-Z]*[A-Z]\w*\b/g) || [];
185
- const suspiciousTerms = technicalTerms.filter(term => {
186
- // Check if it looks like a class name or API but isn't common
187
- return term.length > 6 &&
188
- !this.technicalTerms.has(term.toLowerCase()) &&
189
- /[A-Z]/.test(term) && // Has uppercase letters (likely class/API name)
190
- !this.commonLibraries.has(term.toLowerCase());
191
- });
192
-
193
- if (suspiciousTerms.length > 0) {
194
- evidence.push({
195
- type: 'fabrication',
196
- description: `Suspicious technical terms detected: ${suspiciousTerms.join(', ')}`,
197
- severity: 7
198
- });
199
- }
200
-
201
- // Check for made-up method names
202
- const methodPatterns = [
203
- /\.([a-z][a-zA-Z]*[A-Z]\w*)\(/g, // camelCase methods
204
- /\b([a-z]+_[a-z_]*)\(/g // snake_case functions
205
- ];
206
-
207
- methodPatterns.forEach(pattern => {
208
- const matches = Array.from(response.matchAll(pattern));
209
- const suspiciousMethods = matches.filter(match => {
210
- const methodName = match[1];
211
- return methodName.length > 10 &&
212
- !this.technicalTerms.has(methodName.toLowerCase()) &&
213
- /[A-Z]/.test(methodName); // Likely made up
214
- });
215
-
216
- if (suspiciousMethods.length > 0) {
217
- evidence.push({
218
- type: 'fabrication',
219
- description: `Potentially fabricated method names: ${suspiciousMethods.map(m => m[1]).join(', ')}`,
220
- severity: 6
221
- });
222
- }
223
- });
224
-
225
- return evidence;
226
- }
227
-
228
- /**
229
- * Detect context drift from conversation history
230
- */
231
- private detectContextDrift(
232
- userPrompt: string,
233
- response: string,
234
- context?: string[]
235
- ): HallucinationEvidence | null {
236
- if (!context || context.length === 0) return null;
237
-
238
- // Check if response addresses the current prompt or drifts to previous context
239
- const promptKeywords = this.extractKeywords(userPrompt);
240
- const responseKeywords = this.extractKeywords(response);
241
-
242
- const contextOverlap = promptKeywords.filter(keyword =>
243
- responseKeywords.some(respKeyword =>
244
- respKeyword.includes(keyword) || keyword.includes(respKeyword)
245
- )
246
- ).length;
247
-
248
- const driftScore = Math.max(0, (promptKeywords.length - contextOverlap) / promptKeywords.length * 100);
249
-
250
- if (driftScore > 60) {
251
- return {
252
- type: 'context_drift',
253
- description: `High context drift detected (${driftScore.toFixed(1)}% deviation from prompt)`,
254
- severity: Math.min(driftScore / 10, 10)
255
- };
256
- }
257
-
258
- return null;
259
- }
260
-
261
- /**
262
- * Detect citation and reference issues
263
- */
264
- private detectCitationIssues(response: string): HallucinationEvidence[] {
265
- const evidence: HallucinationEvidence[] = [];
266
-
267
- // Look for citations that might be fabricated
268
- const citationPatterns = [
269
- /according\s+to\s+([^,\.]+)/gi,
270
- /as\s+stated\s+(in|by)\s+([^,\.]+)/gi,
271
- /\[([^\]]+)\]/g, // Reference brackets
272
- /source[s]?:\s*([^,\.]+)/gi
273
- ];
274
-
275
- citationPatterns.forEach(pattern => {
276
- const matches = Array.from(response.matchAll(pattern));
277
- matches.forEach(match => {
278
- const citation = match[1] || match[0];
279
- if (citation && citation.length > 50) { // Unusually long citation
280
- evidence.push({
281
- type: 'invalid_reference',
282
- description: `Suspiciously long or complex citation: ${citation.substring(0, 50)}...`,
283
- severity: 5
284
- });
285
- }
286
- });
287
- });
288
-
289
- return evidence;
290
- }
291
-
292
- /**
293
- * Calculate overall hallucination confidence
294
- */
295
- private calculateOverallConfidence(
296
- evidence: HallucinationEvidence[],
297
- _categories: HallucinationCategory[]
298
- ): number {
299
- if (evidence.length === 0) return 0;
300
-
301
- // Weight different types of evidence
302
- const weights = {
303
- contradiction: 1.0,
304
- overconfidence: 0.8,
305
- fabrication: 0.9,
306
- context_drift: 0.7,
307
- invalid_reference: 0.6
308
- };
309
-
310
- const totalWeightedScore = evidence.reduce((sum, ev) => {
311
- return sum + (ev.severity * (weights[ev.type] || 0.5));
312
- }, 0);
313
-
314
- const avgScore = totalWeightedScore / evidence.length;
315
-
316
- // Cap at 100 and apply some randomness to simulate uncertainty
317
- return Math.min(100, Math.max(0, avgScore * 10 + Math.random() * 10 - 5));
318
- }
319
-
320
- /**
321
- * Determine severity level
322
- */
323
- private determineSeverity(confidence: number): 'low' | 'medium' | 'high' | 'critical' {
324
- if (confidence >= 80) return 'critical';
325
- if (confidence >= 60) return 'high';
326
- if (confidence >= 40) return 'medium';
327
- return 'low';
328
- }
329
-
330
- /**
331
- * Extract meaningful keywords from text
332
- */
333
- private extractKeywords(text: string): string[] {
334
- return text
335
- .toLowerCase()
336
- .split(/\s+/)
337
- .filter(word => word.length > 4)
338
- .filter(word => !['that', 'with', 'from', 'this', 'will', 'should', 'would', 'could'].includes(word))
339
- .slice(0, 10); // Limit to top 10 keywords
340
- }
341
-
342
- /**
343
- * Calculate business impact of hallucinations
344
- */
345
- calculateBusinessImpact(
346
- hallucinationRate: number,
347
- totalInteractions: number,
348
- avgCostPerInteraction: number,
349
- avgDevTimePerFix: number = 0.5 // hours
350
- ): BusinessImpactMetrics {
351
- const devTimeWasted = (hallucinationRate / 100) * totalInteractions * avgDevTimePerFix;
352
- const qualityDegradationScore = Math.min(100, hallucinationRate * 1.5);
353
- const roiImpact = hallucinationRate * 0.8; // 0.8% ROI reduction per 1% hallucination rate
354
- const costOfHallucinations = (hallucinationRate / 100) * totalInteractions * avgCostPerInteraction * 2; // 2x multiplier for debugging cost
355
-
356
- return {
357
- estimatedDevTimeWasted: Math.round(devTimeWasted * 10) / 10,
358
- qualityDegradationScore: Math.round(qualityDegradationScore),
359
- roiImpact: Math.round(roiImpact * 10) / 10,
360
- costOfHallucinations: Math.round(costOfHallucinations * 100) / 100
361
- };
362
- }
363
- }
364
-
365
- /**
366
- * Global hallucination detector instance
367
- */
368
- export const hallucinationDetector = new HallucinationDetector();
package/src/policy.ts DELETED
@@ -1,55 +0,0 @@
1
- import * as fs from 'node:fs';
2
- import * as path from 'node:path';
3
- import axios from 'axios';
4
-
5
- export interface BudgetsPolicy {
6
- version?: string;
7
- monthlyUSD?: number; // global monthly cap
8
- perProviderMonthlyUSD?: Record<string, number>; // caps per provider name
9
- webhookUrl?: string; // optional webhook for alerts
10
- }
11
-
12
- export function loadPolicy(cwd: string = process.cwd()): BudgetsPolicy | null {
13
- const policyPath = path.resolve(cwd, 'toknxr.policy.json');
14
- if (!fs.existsSync(policyPath)) return null;
15
- try {
16
- const raw = fs.readFileSync(policyPath, 'utf8');
17
- return JSON.parse(raw) as BudgetsPolicy;
18
- } catch (error) {
19
- console.error('Error loading policy file:', error);
20
- return null;
21
- }
22
- }
23
-
24
- export function currentMonthKey(date = new Date()): string {
25
- return `${date.getUTCFullYear()}-${String(date.getUTCMonth() + 1).padStart(2, '0')}`;
26
- }
27
-
28
- export function computeMonthlySpend(logFilePath: string, monthKey: string) {
29
- const sums = { total: 0, byProvider: {} as Record<string, number> };
30
- if (!fs.existsSync(logFilePath)) return sums;
31
- const lines = fs.readFileSync(logFilePath, 'utf8').trim().split('\n').filter(Boolean);
32
- for (const line of lines) {
33
- try {
34
- const j = JSON.parse(line);
35
- const ts = new Date(j.timestamp);
36
- const key = currentMonthKey(ts);
37
- if (key !== monthKey) continue;
38
- const cost = Number(j.costUSD || 0);
39
- sums.total += cost;
40
- sums.byProvider[j.provider] = (sums.byProvider[j.provider] || 0) + cost;
41
- } catch (error) {
42
- console.warn('Skipping invalid log entry in policy check', error);
43
- }
44
- }
45
- return sums;
46
- }
47
-
48
- export async function sendBudgetAlert(webhookUrl: string, payload: any) {
49
- try {
50
- await axios.post(webhookUrl, payload, { timeout: 5000 });
51
- } catch (error) {
52
- console.error('Error sending budget alert:', error);
53
- }
54
- }
55
-
package/src/pricing.ts DELETED
@@ -1,21 +0,0 @@
1
- export const modelToPricing = {
2
- // Gemini (Free tier available)
3
- 'gemini-2.5-flash': { promptPer1k: 0.15, completionPer1k: 0.60 },
4
- 'gemini-2.5-pro': { promptPer1k: 0.50, completionPer1k: 1.50 },
5
- 'gemini-flash-latest': { promptPer1k: 0.15, completionPer1k: 0.60 },
6
- 'gemini-pro-latest': { promptPer1k: 0.50, completionPer1k: 1.50 },
7
- // OpenAI (Free tier available for some models)
8
- 'gpt-4o-mini': { promptPer1k: 0.15, completionPer1k: 0.60 },
9
- 'gpt-4o': { promptPer1k: 5.00, completionPer1k: 15.00 },
10
- // Free tier models (zero cost)
11
- 'ollama-llama3': { promptPer1k: 0.00, completionPer1k: 0.00 },
12
- 'local-model': { promptPer1k: 0.00, completionPer1k: 0.00 },
13
- };
14
-
15
- export function estimateCostUSD(model: string, promptTokens: number, completionTokens: number): number {
16
- const pricing = modelToPricing[model as keyof typeof modelToPricing] || modelToPricing['gemini-2.5-flash'];
17
- const promptK = promptTokens / 1000;
18
- const completionK = completionTokens / 1000;
19
- const cost = promptK * pricing.promptPer1k + completionK * pricing.completionPer1k;
20
- return Number(cost.toFixed(6));
21
- }