@kernel.chat/kbot 3.69.1 → 3.71.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,677 @@
1
+ // kbot AI Analysis & Interpretability Tools
2
+ //
3
+ // Inspired by Anthropic's circuit tracing research and Google's Bayesian teaching paper.
4
+ // Five tools for understanding, comparing, and reasoning about AI models and prompts:
5
+ //
6
+ // 1. model_compare — Side-by-side comparison of two AI models on the same prompt
7
+ // 2. prompt_analyze — Evaluate prompt clarity, ambiguity, and effectiveness
8
+ // 3. reasoning_chain — Break complex questions into explicit chain-of-thought steps
9
+ // 4. bayesian_update — Compute posterior probabilities via Bayes' theorem
10
+ // 5. bias_check — Detect cognitive biases in text or arguments
11
+ //
12
+ // model_compare uses kbot's multi-provider auth system (20 providers).
13
+ // prompt_analyze, reasoning_chain, and bias_check use local Ollama ($0 cost).
14
+ // bayesian_update is pure math — no API calls.
15
+ import { registerTool } from './index.js';
16
+ import { getByokKey, getByokProvider, getProvider, estimateCost, isOllamaRunning, PROVIDERS, } from '../auth.js';
17
+ // ══════════════════════════════════════════════════════════════════════════════
18
+ // SHARED HELPERS
19
+ // ══════════════════════════════════════════════════════════════════════════════
20
+ const OLLAMA_HOST = process.env.OLLAMA_HOST || 'http://localhost:11434';
21
+ /** Call a provider's chat completions API and return the raw response text + metadata. */
22
+ async function callProviderRaw(provider, model, systemPrompt, userPrompt, timeout = 120_000) {
23
+ const p = getProvider(provider);
24
+ const apiKey = getByokKey() || 'local';
25
+ const start = Date.now();
26
+ if (p.apiStyle === 'anthropic') {
27
+ const res = await fetch(p.apiUrl, {
28
+ method: 'POST',
29
+ headers: {
30
+ 'Content-Type': 'application/json',
31
+ 'x-api-key': apiKey,
32
+ 'anthropic-version': '2023-06-01',
33
+ },
34
+ body: JSON.stringify({
35
+ model,
36
+ max_tokens: 4096,
37
+ system: systemPrompt,
38
+ messages: [{ role: 'user', content: userPrompt }],
39
+ }),
40
+ signal: AbortSignal.timeout(timeout),
41
+ });
42
+ if (!res.ok) {
43
+ const err = await res.text().catch(() => `HTTP ${res.status}`);
44
+ throw new Error(`${p.name} API error: ${err}`);
45
+ }
46
+ const data = await res.json();
47
+ const text = (data.content || []).filter((b) => b.type === 'text').map((b) => b.text).join('');
48
+ const u = data.usage || {};
49
+ return {
50
+ content: text,
51
+ model: data.model || model,
52
+ inputTokens: u.input_tokens || 0,
53
+ outputTokens: u.output_tokens || 0,
54
+ durationMs: Date.now() - start,
55
+ };
56
+ }
57
+ if (p.apiStyle === 'google') {
58
+ const url = `${p.apiUrl}/${model}:generateContent`;
59
+ const res = await fetch(url, {
60
+ method: 'POST',
61
+ headers: { 'Content-Type': 'application/json', 'x-goog-api-key': apiKey },
62
+ body: JSON.stringify({
63
+ systemInstruction: { parts: [{ text: systemPrompt }] },
64
+ contents: [{ parts: [{ text: userPrompt }] }],
65
+ generationConfig: { maxOutputTokens: 4096 },
66
+ }),
67
+ signal: AbortSignal.timeout(timeout),
68
+ });
69
+ if (!res.ok) {
70
+ const err = await res.text().catch(() => `HTTP ${res.status}`);
71
+ throw new Error(`${p.name} API error: ${err}`);
72
+ }
73
+ const data = await res.json();
74
+ const text = data.candidates?.[0]?.content?.parts?.map((p) => p.text).join('') || '';
75
+ const u = data.usageMetadata || {};
76
+ return {
77
+ content: text,
78
+ model,
79
+ inputTokens: u.promptTokenCount || 0,
80
+ outputTokens: u.candidatesTokenCount || 0,
81
+ durationMs: Date.now() - start,
82
+ };
83
+ }
84
+ // OpenAI-compatible (works for OpenAI, Mistral, Groq, DeepSeek, Ollama, etc.)
85
+ const messages = [];
86
+ if (systemPrompt)
87
+ messages.push({ role: 'system', content: systemPrompt });
88
+ messages.push({ role: 'user', content: userPrompt });
89
+ const headers = { 'Content-Type': 'application/json' };
90
+ if (apiKey && apiKey !== 'local') {
91
+ headers['Authorization'] = `Bearer ${apiKey}`;
92
+ }
93
+ const res = await fetch(p.apiUrl, {
94
+ method: 'POST',
95
+ headers,
96
+ body: JSON.stringify({ model, max_tokens: 4096, messages }),
97
+ signal: AbortSignal.timeout(timeout),
98
+ });
99
+ if (!res.ok) {
100
+ const err = await res.text().catch(() => `HTTP ${res.status}`);
101
+ throw new Error(`${p.name} API error: ${err}`);
102
+ }
103
+ const data = await res.json();
104
+ const text = data.choices?.[0]?.message?.content || '';
105
+ const u = data.usage || {};
106
+ return {
107
+ content: text,
108
+ model: data.model || model,
109
+ inputTokens: u.prompt_tokens || 0,
110
+ outputTokens: u.completion_tokens || 0,
111
+ durationMs: Date.now() - start,
112
+ };
113
+ }
114
+ /** Resolve a model spec like "openai:gpt-4.1" or "ollama:gemma3:12b" into provider + model. */
115
+ function resolveModelSpec(spec) {
116
+ // Check for "provider:model" format
117
+ const colonIdx = spec.indexOf(':');
118
+ if (colonIdx > 0) {
119
+ const candidateProvider = spec.slice(0, colonIdx);
120
+ if (candidateProvider in PROVIDERS) {
121
+ return { provider: candidateProvider, model: spec.slice(colonIdx + 1) };
122
+ }
123
+ }
124
+ // Check if spec matches a known model name in any provider
125
+ for (const [pid, pConfig] of Object.entries(PROVIDERS)) {
126
+ if (pConfig.models?.includes(spec) || pConfig.defaultModel === spec || pConfig.fastModel === spec) {
127
+ return { provider: pid, model: spec };
128
+ }
129
+ }
130
+ // Fall back to current BYOK provider
131
+ return { provider: getByokProvider(), model: spec };
132
+ }
133
+ /** Call local Ollama for analysis tasks (zero cost). */
134
+ async function callOllama(prompt, systemPrompt, model, timeout = 120_000) {
135
+ const running = await isOllamaRunning();
136
+ if (!running) {
137
+ return 'Error: Ollama is not running. Install and start Ollama: https://ollama.com';
138
+ }
139
+ const ollamaModel = model || 'gemma3:12b';
140
+ const messages = [];
141
+ if (systemPrompt)
142
+ messages.push({ role: 'system', content: systemPrompt });
143
+ messages.push({ role: 'user', content: prompt });
144
+ try {
145
+ const res = await fetch(`${OLLAMA_HOST}/v1/chat/completions`, {
146
+ method: 'POST',
147
+ headers: { 'Content-Type': 'application/json' },
148
+ body: JSON.stringify({ model: ollamaModel, messages, stream: false }),
149
+ signal: AbortSignal.timeout(timeout),
150
+ });
151
+ if (!res.ok) {
152
+ const err = await res.text().catch(() => `HTTP ${res.status}`);
153
+ return `Error: Ollama request failed — ${err}`;
154
+ }
155
+ const data = await res.json();
156
+ return data.choices?.[0]?.message?.content || 'No response from Ollama.';
157
+ }
158
+ catch (err) {
159
+ if (err instanceof Error && err.name === 'AbortError') {
160
+ return `Error: Ollama timed out after ${timeout / 1000}s. Try a smaller model.`;
161
+ }
162
+ return `Error: Could not reach Ollama at ${OLLAMA_HOST}. Is it running?`;
163
+ }
164
+ }
165
+ /** Format a duration in ms to human-readable. */
166
+ function fmtDuration(ms) {
167
+ if (ms < 1000)
168
+ return `${ms}ms`;
169
+ return `${(ms / 1000).toFixed(2)}s`;
170
+ }
171
+ /** Format a cost in USD. */
172
+ function fmtCost(usd) {
173
+ if (usd === 0)
174
+ return '$0.00 (local)';
175
+ if (usd < 0.001)
176
+ return `$${usd.toFixed(6)}`;
177
+ if (usd < 0.01)
178
+ return `$${usd.toFixed(4)}`;
179
+ return `$${usd.toFixed(4)}`;
180
+ }
181
+ // ══════════════════════════════════════════════════════════════════════════════
182
+ // 1. MODEL COMPARE
183
+ // ══════════════════════════════════════════════════════════════════════════════
184
+ function registerModelCompare() {
185
+ registerTool({
186
+ name: 'model_compare',
187
+ description: 'Compare two AI models side-by-side on the same prompt. ' +
188
+ 'Shows response quality, speed, token usage, and cost. ' +
189
+ 'Model format: "provider:model" (e.g., "openai:gpt-4.1", "ollama:gemma3:12b") ' +
190
+ 'or just a model name if unambiguous.',
191
+ parameters: {
192
+ prompt: { type: 'string', description: 'The prompt to send to both models', required: true },
193
+ model_a: { type: 'string', description: 'First model (e.g., "openai:gpt-4.1" or "ollama:gemma3:12b")', required: true },
194
+ model_b: { type: 'string', description: 'Second model (e.g., "anthropic:claude-sonnet-4-6")', required: true },
195
+ system_prompt: { type: 'string', description: 'Optional system prompt for both models' },
196
+ },
197
+ tier: 'free',
198
+ timeout: 300_000, // 5 min — models may be slow
199
+ async execute(args) {
200
+ const prompt = String(args.prompt);
201
+ const specA = resolveModelSpec(String(args.model_a));
202
+ const specB = resolveModelSpec(String(args.model_b));
203
+ const systemPrompt = args.system_prompt ? String(args.system_prompt) : 'You are a helpful assistant. Answer concisely and accurately.';
204
+ // Run both models in parallel
205
+ const [resultA, resultB] = await Promise.allSettled([
206
+ callProviderRaw(specA.provider, specA.model, systemPrompt, prompt),
207
+ callProviderRaw(specB.provider, specB.model, systemPrompt, prompt),
208
+ ]);
209
+ const a = resultA.status === 'fulfilled' ? resultA.value : null;
210
+ const b = resultB.status === 'fulfilled' ? resultB.value : null;
211
+ const errA = resultA.status === 'rejected' ? resultA.reason.message : null;
212
+ const errB = resultB.status === 'rejected' ? resultB.reason.message : null;
213
+ const providerA = getProvider(specA.provider);
214
+ const providerB = getProvider(specB.provider);
215
+ const costA = a ? estimateCost(specA.provider, a.inputTokens, a.outputTokens) : 0;
216
+ const costB = b ? estimateCost(specB.provider, b.inputTokens, b.outputTokens) : 0;
217
+ // Build comparison output
218
+ const lines = [
219
+ '# Model Comparison',
220
+ '',
221
+ `**Prompt:** ${prompt.length > 200 ? prompt.slice(0, 200) + '...' : prompt}`,
222
+ '',
223
+ '---',
224
+ '',
225
+ '## Metrics',
226
+ '',
227
+ '| Metric | Model A | Model B |',
228
+ '|--------|---------|---------|',
229
+ `| Provider | ${providerA.name} | ${providerB.name} |`,
230
+ `| Model | ${a?.model || specA.model} | ${b?.model || specB.model} |`,
231
+ `| Status | ${a ? 'Success' : `Error: ${errA}`} | ${b ? 'Success' : `Error: ${errB}`} |`,
232
+ `| Latency | ${a ? fmtDuration(a.durationMs) : 'N/A'} | ${b ? fmtDuration(b.durationMs) : 'N/A'} |`,
233
+ `| Input tokens | ${a?.inputTokens ?? 'N/A'} | ${b?.inputTokens ?? 'N/A'} |`,
234
+ `| Output tokens | ${a?.outputTokens ?? 'N/A'} | ${b?.outputTokens ?? 'N/A'} |`,
235
+ `| Est. cost | ${a ? fmtCost(costA) : 'N/A'} | ${b ? fmtCost(costB) : 'N/A'} |`,
236
+ `| Response length | ${a ? `${a.content.length} chars` : 'N/A'} | ${b ? `${b.content.length} chars` : 'N/A'} |`,
237
+ ];
238
+ // Speed comparison
239
+ if (a && b) {
240
+ const faster = a.durationMs < b.durationMs ? 'A' : 'B';
241
+ const speedRatio = Math.max(a.durationMs, b.durationMs) / Math.max(1, Math.min(a.durationMs, b.durationMs));
242
+ lines.push(`| Speed winner | ${faster === 'A' ? '<--' : ''} | ${faster === 'B' ? '<--' : ''} | (${speedRatio.toFixed(1)}x faster)`);
243
+ const cheaper = costA < costB ? 'A' : costA > costB ? 'B' : 'tie';
244
+ if (cheaper !== 'tie') {
245
+ lines.push(`| Cost winner | ${cheaper === 'A' ? '<--' : ''} | ${cheaper === 'B' ? '<--' : ''} |`);
246
+ }
247
+ }
248
+ lines.push('', '---', '');
249
+ // Model A response
250
+ lines.push('## Model A Response', '');
251
+ if (a) {
252
+ lines.push(a.content);
253
+ }
254
+ else {
255
+ lines.push(`*Error:* ${errA}`);
256
+ }
257
+ lines.push('', '---', '');
258
+ // Model B response
259
+ lines.push('## Model B Response', '');
260
+ if (b) {
261
+ lines.push(b.content);
262
+ }
263
+ else {
264
+ lines.push(`*Error:* ${errB}`);
265
+ }
266
+ // Simple diff summary
267
+ if (a && b) {
268
+ lines.push('', '---', '', '## Quick Diff Summary', '');
269
+ const wordsA = a.content.split(/\s+/).length;
270
+ const wordsB = b.content.split(/\s+/).length;
271
+ lines.push(`- Model A: ${wordsA} words | Model B: ${wordsB} words`);
272
+ lines.push(`- Length ratio: ${(Math.max(wordsA, wordsB) / Math.max(1, Math.min(wordsA, wordsB))).toFixed(1)}x`);
273
+ // Check for overlapping content (shared n-grams)
274
+ const ngramsA = new Set();
275
+ const tokensA = a.content.toLowerCase().split(/\s+/);
276
+ for (let i = 0; i < tokensA.length - 2; i++) {
277
+ ngramsA.add(tokensA.slice(i, i + 3).join(' '));
278
+ }
279
+ const tokensB = b.content.toLowerCase().split(/\s+/);
280
+ let shared = 0;
281
+ for (let i = 0; i < tokensB.length - 2; i++) {
282
+ if (ngramsA.has(tokensB.slice(i, i + 3).join(' ')))
283
+ shared++;
284
+ }
285
+ const overlap = tokensB.length > 2 ? (shared / (tokensB.length - 2) * 100) : 0;
286
+ lines.push(`- Content overlap (3-gram): ${overlap.toFixed(1)}%`);
287
+ }
288
+ return lines.join('\n');
289
+ },
290
+ });
291
+ }
292
+ // ══════════════════════════════════════════════════════════════════════════════
293
+ // 2. PROMPT ANALYZE
294
+ // ══════════════════════════════════════════════════════════════════════════════
295
+ function registerPromptAnalyze() {
296
+ registerTool({
297
+ name: 'prompt_analyze',
298
+ description: 'Analyze a prompt for clarity, ambiguity, and effectiveness. ' +
299
+ 'Uses local Ollama to evaluate instruction quality, identify potential misinterpretations, ' +
300
+ 'suggest improvements, and estimate token efficiency. Helps write better prompts.',
301
+ parameters: {
302
+ prompt: { type: 'string', description: 'The prompt to analyze', required: true },
303
+ context: { type: 'string', description: 'Optional context about the intended use case' },
304
+ },
305
+ tier: 'free',
306
+ timeout: 120_000,
307
+ async execute(args) {
308
+ const prompt = String(args.prompt);
309
+ const context = args.context ? String(args.context) : '';
310
+ const systemPrompt = 'You are a prompt engineering expert. Analyze the given prompt and return a structured evaluation. ' +
311
+ 'Be specific and actionable. Return your analysis in exactly this format:\n\n' +
312
+ 'CLARITY_SCORE: <1-10>\n' +
313
+ 'SPECIFICITY_SCORE: <1-10>\n' +
314
+ 'COMPLETENESS_SCORE: <1-10>\n' +
315
+ 'OVERALL_SCORE: <1-10>\n\n' +
316
+ 'AMBIGUITIES:\n- <list each ambiguity on its own line>\n\n' +
317
+ 'MISINTERPRETATIONS:\n- <list each potential misinterpretation>\n\n' +
318
+ 'IMPROVEMENTS:\n- <list each suggested improvement>\n\n' +
319
+ 'TOKEN_ANALYSIS:\n' +
320
+ '- Estimated tokens: <number>\n' +
321
+ '- Redundant tokens: <number>\n' +
322
+ '- Efficiency: <percentage>\n\n' +
323
+ 'REWRITTEN_PROMPT:\n<the improved version of the prompt>';
324
+ const userPrompt = context
325
+ ? `Analyze this prompt (context: ${context}):\n\n---\n${prompt}\n---`
326
+ : `Analyze this prompt:\n\n---\n${prompt}\n---`;
327
+ const analysis = await callOllama(userPrompt, systemPrompt);
328
+ if (analysis.startsWith('Error:'))
329
+ return analysis;
330
+ // Parse scores from the response
331
+ const clarityMatch = analysis.match(/CLARITY_SCORE:\s*(\d+)/i);
332
+ const specificityMatch = analysis.match(/SPECIFICITY_SCORE:\s*(\d+)/i);
333
+ const completenessMatch = analysis.match(/COMPLETENESS_SCORE:\s*(\d+)/i);
334
+ const overallMatch = analysis.match(/OVERALL_SCORE:\s*(\d+)/i);
335
+ const clarity = clarityMatch ? parseInt(clarityMatch[1], 10) : null;
336
+ const specificity = specificityMatch ? parseInt(specificityMatch[1], 10) : null;
337
+ const completeness = completenessMatch ? parseInt(completenessMatch[1], 10) : null;
338
+ const overall = overallMatch ? parseInt(overallMatch[1], 10) : null;
339
+ // Build structured output
340
+ const lines = [
341
+ '# Prompt Analysis',
342
+ '',
343
+ `**Original prompt** (${prompt.length} chars, ~${Math.ceil(prompt.length / 4)} tokens):`,
344
+ `> ${prompt.length > 300 ? prompt.slice(0, 300) + '...' : prompt}`,
345
+ '',
346
+ ];
347
+ if (clarity !== null || overall !== null) {
348
+ lines.push('## Scores', '');
349
+ const bar = (score, label) => {
350
+ if (score === null)
351
+ return;
352
+ const filled = '\u2588'.repeat(score);
353
+ const empty = '\u2591'.repeat(10 - score);
354
+ lines.push(`${label.padEnd(16)} ${filled}${empty} ${score}/10`);
355
+ };
356
+ bar(clarity, 'Clarity');
357
+ bar(specificity, 'Specificity');
358
+ bar(completeness, 'Completeness');
359
+ bar(overall, 'Overall');
360
+ lines.push('');
361
+ }
362
+ lines.push('## Detailed Analysis', '', analysis);
363
+ return lines.join('\n');
364
+ },
365
+ });
366
+ }
367
+ // ══════════════════════════════════════════════════════════════════════════════
368
+ // 3. REASONING CHAIN
369
+ // ══════════════════════════════════════════════════════════════════════════════
370
+ function registerReasoningChain() {
371
+ registerTool({
372
+ name: 'reasoning_chain',
373
+ description: 'Break a complex question into explicit reasoning steps using chain-of-thought. ' +
374
+ 'Shows each step with a confidence level and alternative paths. ' +
375
+ 'Uses local Ollama to generate a reasoning tree. ' +
376
+ 'Output as a structured chain with numbered steps.',
377
+ parameters: {
378
+ question: { type: 'string', description: 'The complex question to reason about', required: true },
379
+ depth: { type: 'string', description: 'Reasoning depth: "shallow" (3-5 steps), "medium" (5-8 steps), or "deep" (8-12 steps). Default: medium' },
380
+ domain: { type: 'string', description: 'Optional domain hint (e.g., "math", "code", "science", "business")' },
381
+ },
382
+ tier: 'free',
383
+ timeout: 180_000,
384
+ async execute(args) {
385
+ const question = String(args.question);
386
+ const depth = String(args.depth || 'medium');
387
+ const domain = args.domain ? String(args.domain) : '';
388
+ const depthRange = depth === 'shallow' ? '3-5' : depth === 'deep' ? '8-12' : '5-8';
389
+ const systemPrompt = 'You are an expert reasoning engine. Given a question, decompose it into a chain of explicit reasoning steps. ' +
390
+ 'For each step, assign a confidence level (high/medium/low) and note any alternative reasoning paths.\n\n' +
391
+ 'Return your analysis in exactly this format:\n\n' +
392
+ 'QUESTION_TYPE: <classification of the question type>\n' +
393
+ 'COMPLEXITY: <1-10>\n\n' +
394
+ 'REASONING_CHAIN:\n\n' +
395
+ 'STEP 1: <description>\n' +
396
+ 'CONFIDENCE: <high|medium|low> (<0-100>%)\n' +
397
+ 'REASONING: <why this step follows>\n' +
398
+ 'ALTERNATIVES: <other approaches at this step, or "none">\n\n' +
399
+ 'STEP 2: ...\n' +
400
+ '(continue for all steps)\n\n' +
401
+ 'CONCLUSION:\n<final answer or synthesis>\n' +
402
+ 'OVERALL_CONFIDENCE: <0-100>%\n\n' +
403
+ 'ASSUMPTIONS:\n- <list key assumptions made>\n\n' +
404
+ 'WEAKNESSES:\n- <list weaknesses in the reasoning chain>';
405
+ const domainHint = domain ? ` (domain: ${domain})` : '';
406
+ const userPrompt = `Decompose this question into ${depthRange} explicit reasoning steps${domainHint}:\n\n${question}`;
407
+ const analysis = await callOllama(userPrompt, systemPrompt);
408
+ if (analysis.startsWith('Error:'))
409
+ return analysis;
410
+ // Parse step count
411
+ const stepMatches = analysis.match(/STEP\s+\d+/gi);
412
+ const stepCount = stepMatches ? stepMatches.length : 0;
413
+ // Parse overall confidence
414
+ const confMatch = analysis.match(/OVERALL_CONFIDENCE:\s*(\d+)/i);
415
+ const overallConf = confMatch ? parseInt(confMatch[1], 10) : null;
416
+ // Parse complexity
417
+ const complexityMatch = analysis.match(/COMPLEXITY:\s*(\d+)/i);
418
+ const complexity = complexityMatch ? parseInt(complexityMatch[1], 10) : null;
419
+ const lines = [
420
+ '# Reasoning Chain Analysis',
421
+ '',
422
+ `**Question:** ${question.length > 200 ? question.slice(0, 200) + '...' : question}`,
423
+ '',
424
+ ];
425
+ if (stepCount > 0 || overallConf !== null || complexity !== null) {
426
+ lines.push('## Summary', '');
427
+ if (complexity !== null)
428
+ lines.push(`- **Complexity:** ${complexity}/10`);
429
+ lines.push(`- **Steps:** ${stepCount}`);
430
+ lines.push(`- **Depth setting:** ${depth} (${depthRange} steps)`);
431
+ if (overallConf !== null)
432
+ lines.push(`- **Overall confidence:** ${overallConf}%`);
433
+ lines.push('');
434
+ }
435
+ lines.push('## Chain of Thought', '', analysis);
436
+ return lines.join('\n');
437
+ },
438
+ });
439
+ }
440
+ function registerBayesianUpdate() {
441
+ registerTool({
442
+ name: 'bayesian_update',
443
+ description: 'Compute posterior probability using Bayes\' theorem. ' +
444
+ 'Given a hypothesis, prior probability, and one or more pieces of evidence, ' +
445
+ 'computes the posterior step by step. Shows the full math. ' +
446
+ 'Handles sequential evidence updates. Pure math — no API calls.',
447
+ parameters: {
448
+ hypothesis: { type: 'string', description: 'The hypothesis being evaluated', required: true },
449
+ prior: { type: 'string', description: 'Prior probability P(H), a number between 0 and 1', required: true },
450
+ evidence: {
451
+ type: 'array',
452
+ description: 'Array of evidence objects. Each has: description (string), likelihood (number 0-1 = P(E|H)), likelihood_not (number 0-1 = P(E|~H))',
453
+ required: true,
454
+ items: {
455
+ type: 'object',
456
+ properties: {
457
+ description: { type: 'string' },
458
+ likelihood: { type: 'number' },
459
+ likelihood_not: { type: 'number' },
460
+ },
461
+ },
462
+ },
463
+ },
464
+ tier: 'free',
465
+ timeout: 10_000,
466
+ async execute(args) {
467
+ const hypothesis = String(args.hypothesis);
468
+ const prior = parseFloat(String(args.prior));
469
+ if (isNaN(prior) || prior < 0 || prior > 1) {
470
+ return 'Error: prior must be a number between 0 and 1.';
471
+ }
472
+ // Parse evidence array
473
+ const rawEvidence = args.evidence;
474
+ if (!rawEvidence || !Array.isArray(rawEvidence) || rawEvidence.length === 0) {
475
+ return 'Error: evidence must be a non-empty array of { description, likelihood, likelihood_not } objects.';
476
+ }
477
+ const evidence = [];
478
+ for (const e of rawEvidence) {
479
+ const desc = String(e.description || 'Evidence');
480
+ const lik = parseFloat(String(e.likelihood));
481
+ const likNot = parseFloat(String(e.likelihood_not));
482
+ if (isNaN(lik) || lik < 0 || lik > 1) {
483
+ return `Error: likelihood for "${desc}" must be between 0 and 1. Got: ${e.likelihood}`;
484
+ }
485
+ if (isNaN(likNot) || likNot < 0 || likNot > 1) {
486
+ return `Error: likelihood_not for "${desc}" must be between 0 and 1. Got: ${e.likelihood_not}`;
487
+ }
488
+ evidence.push({ description: desc, likelihood: lik, likelihood_not: likNot });
489
+ }
490
+ // Compute sequential Bayesian updates
491
+ const lines = [
492
+ '# Bayesian Update',
493
+ '',
494
+ `**Hypothesis:** ${hypothesis}`,
495
+ `**Prior probability:** P(H) = ${prior}`,
496
+ `**Evidence items:** ${evidence.length}`,
497
+ '',
498
+ '---',
499
+ '',
500
+ '## Bayes\' Theorem',
501
+ '',
502
+ '```',
503
+ ' P(E|H) * P(H)',
504
+ 'P(H|E) = ─────────────────────────',
505
+ ' P(E|H)*P(H) + P(E|~H)*P(~H)',
506
+ '```',
507
+ '',
508
+ '---',
509
+ '',
510
+ ];
511
+ let currentPrior = prior;
512
+ for (let i = 0; i < evidence.length; i++) {
513
+ const e = evidence[i];
514
+ const pH = currentPrior;
515
+ const pNotH = 1 - pH;
516
+ const pEgivenH = e.likelihood;
517
+ const pEgivenNotH = e.likelihood_not;
518
+ // Bayes' theorem
519
+ const numerator = pEgivenH * pH;
520
+ const denominator = pEgivenH * pH + pEgivenNotH * pNotH;
521
+ const posterior = denominator === 0 ? 0 : numerator / denominator;
522
+ // Likelihood ratio
523
+ const likelihoodRatio = pEgivenNotH === 0 ? Infinity : pEgivenH / pEgivenNotH;
524
+ // Log odds change
525
+ const priorOdds = pNotH === 0 ? Infinity : pH / pNotH;
526
+ const posteriorOdds = (1 - posterior) === 0 ? Infinity : posterior / (1 - posterior);
527
+ lines.push(`## Update ${i + 1}: ${e.description}`, '', '**Given:**', `- P(H) = ${pH.toFixed(6)} (prior)`, `- P(~H) = ${pNotH.toFixed(6)}`, `- P(E|H) = ${pEgivenH.toFixed(4)} (likelihood if true)`, `- P(E|~H) = ${pEgivenNotH.toFixed(4)} (likelihood if false)`, '', '**Calculation:**', '', '```', `Numerator: P(E|H) * P(H) = ${pEgivenH.toFixed(4)} * ${pH.toFixed(6)} = ${numerator.toFixed(8)}`, `Denominator: P(E|H)*P(H) + P(E|~H)*P(~H)`, ` = ${pEgivenH.toFixed(4)}*${pH.toFixed(6)} + ${pEgivenNotH.toFixed(4)}*${pNotH.toFixed(6)}`, ` = ${numerator.toFixed(8)} + ${(pEgivenNotH * pNotH).toFixed(8)}`, ` = ${denominator.toFixed(8)}`, '', `P(H|E) = ${numerator.toFixed(8)} / ${denominator.toFixed(8)} = ${posterior.toFixed(6)}`, '```', '', `**Posterior:** P(H|E) = **${(posterior * 100).toFixed(2)}%**`, `**Likelihood ratio:** ${isFinite(likelihoodRatio) ? likelihoodRatio.toFixed(4) : 'Infinity'} (${likelihoodRatio > 1 ? 'evidence supports H' : likelihoodRatio < 1 ? 'evidence opposes H' : 'neutral'})`, `**Shift:** ${pH.toFixed(4)} -> ${posterior.toFixed(4)} (${posterior > pH ? '+' : ''}${((posterior - pH) * 100).toFixed(2)} pp)`, '');
528
+ if (isFinite(priorOdds) && isFinite(posteriorOdds)) {
529
+ lines.push(`**Odds:** ${priorOdds.toFixed(4)} : 1 -> ${posteriorOdds.toFixed(4)} : 1`, '');
530
+ }
531
+ lines.push('---', '');
532
+ currentPrior = posterior;
533
+ }
534
+ // Final summary
535
+ const totalShift = currentPrior - prior;
536
+ const direction = totalShift > 0 ? 'strengthened' : totalShift < 0 ? 'weakened' : 'unchanged';
537
+ lines.push('## Final Result', '', `**Hypothesis:** ${hypothesis}`, `**Prior:** ${(prior * 100).toFixed(2)}%`, `**Posterior:** ${(currentPrior * 100).toFixed(2)}%`, `**Net shift:** ${totalShift > 0 ? '+' : ''}${(totalShift * 100).toFixed(2)} percentage points`, `**Verdict:** Evidence ${direction} the hypothesis.`, '');
538
+ // Interpretation guide
539
+ if (currentPrior >= 0.95) {
540
+ lines.push('**Interpretation:** Very strong support for the hypothesis.');
541
+ }
542
+ else if (currentPrior >= 0.75) {
543
+ lines.push('**Interpretation:** Moderate support for the hypothesis.');
544
+ }
545
+ else if (currentPrior >= 0.5) {
546
+ lines.push('**Interpretation:** Slight lean toward the hypothesis, but not conclusive.');
547
+ }
548
+ else if (currentPrior >= 0.25) {
549
+ lines.push('**Interpretation:** Slight lean against the hypothesis.');
550
+ }
551
+ else if (currentPrior >= 0.05) {
552
+ lines.push('**Interpretation:** Moderate evidence against the hypothesis.');
553
+ }
554
+ else {
555
+ lines.push('**Interpretation:** Very strong evidence against the hypothesis.');
556
+ }
557
+ return lines.join('\n');
558
+ },
559
+ });
560
+ }
561
+ // ══════════════════════════════════════════════════════════════════════════════
562
+ // 5. BIAS CHECK
563
+ // ══════════════════════════════════════════════════════════════════════════════
564
+ const COGNITIVE_BIASES = [
565
+ { name: 'Confirmation bias', description: 'Seeking/interpreting info that confirms existing beliefs' },
566
+ { name: 'Anchoring bias', description: 'Over-relying on the first piece of information encountered' },
567
+ { name: 'Survivorship bias', description: 'Focusing on successes while ignoring failures' },
568
+ { name: 'Availability heuristic', description: 'Overweighting easily recalled examples' },
569
+ { name: 'Dunning-Kruger effect', description: 'Overestimating competence in areas of limited knowledge' },
570
+ { name: 'Sunk cost fallacy', description: 'Continuing an endeavor due to prior investment' },
571
+ { name: 'Bandwagon effect', description: 'Adopting beliefs because many others hold them' },
572
+ { name: 'Halo effect', description: 'Letting one positive trait influence overall judgment' },
573
+ { name: 'Hindsight bias', description: 'Believing past events were predictable after they occurred' },
574
+ { name: 'Appeal to authority', description: 'Accepting claims based on authority rather than evidence' },
575
+ { name: 'False dichotomy', description: 'Presenting only two options when more exist' },
576
+ { name: 'Ad hominem', description: 'Attacking the person rather than the argument' },
577
+ { name: 'Slippery slope', description: 'Assuming one event will lead to extreme consequences' },
578
+ { name: 'Cherry-picking', description: 'Selecting data that supports a conclusion while ignoring contradictions' },
579
+ { name: 'Recency bias', description: 'Overweighting recent events or information' },
580
+ { name: 'Status quo bias', description: 'Preferring the current state of affairs' },
581
+ { name: 'Framing effect', description: 'Drawing different conclusions based on how information is presented' },
582
+ { name: 'Optimism bias', description: 'Overestimating the likelihood of positive outcomes' },
583
+ { name: 'Fundamental attribution error', description: 'Attributing others\' behavior to character rather than circumstances' },
584
+ { name: 'Base rate neglect', description: 'Ignoring base rate probabilities in favor of specific information' },
585
+ ];
586
+ function registerBiasCheck() {
587
+ registerTool({
588
+ name: 'bias_check',
589
+ description: 'Analyze text or an argument for cognitive biases. ' +
590
+ 'Uses local Ollama to identify confirmation bias, anchoring, survivorship bias, ' +
591
+ 'and 17 other common cognitive biases. Returns biases found, severity, and a debiased alternative.',
592
+ parameters: {
593
+ text: { type: 'string', description: 'The text or argument to analyze for biases', required: true },
594
+ focus: { type: 'string', description: 'Optional focus: "argument", "data", "decision", "narrative", or "all" (default: "all")' },
595
+ },
596
+ tier: 'free',
597
+ timeout: 120_000,
598
+ async execute(args) {
599
+ const text = String(args.text);
600
+ const focus = String(args.focus || 'all');
601
+ const biasList = COGNITIVE_BIASES.map(b => `- ${b.name}: ${b.description}`).join('\n');
602
+ const systemPrompt = 'You are a cognitive bias detection expert. Analyze the given text for cognitive biases.\n\n' +
603
+ 'Known biases to check for:\n' + biasList + '\n\n' +
604
+ 'For each bias found, return in exactly this format:\n\n' +
605
+ 'BIAS_FOUND: <bias name>\n' +
606
+ 'SEVERITY: <low|medium|high>\n' +
607
+ 'EVIDENCE: <specific quote or pattern from the text>\n' +
608
+ 'EXPLANATION: <why this constitutes this bias>\n\n' +
609
+ '(repeat for each bias found)\n\n' +
610
+ 'BIAS_COUNT: <total number of biases found>\n\n' +
611
+ 'DEBIASED_VERSION:\n<rewritten version of the text with biases removed or mitigated>\n\n' +
612
+ 'RECOMMENDATIONS:\n- <list actionable recommendations for more balanced reasoning>';
613
+ const focusHint = focus !== 'all' ? ` Focus on biases typical in ${focus} contexts.` : '';
614
+ const userPrompt = `Analyze this text for cognitive biases.${focusHint}\n\n---\n${text}\n---`;
615
+ const analysis = await callOllama(userPrompt, systemPrompt);
616
+ if (analysis.startsWith('Error:'))
617
+ return analysis;
618
+ // Parse bias count
619
+ const countMatch = analysis.match(/BIAS_COUNT:\s*(\d+)/i);
620
+ const biasCount = countMatch ? parseInt(countMatch[1], 10) : null;
621
+ // Parse individual biases found
622
+ const biasMatches = analysis.match(/BIAS_FOUND:\s*(.+)/gi) || [];
623
+ const biasNames = biasMatches.map(m => m.replace(/BIAS_FOUND:\s*/i, '').trim());
624
+ // Parse severities
625
+ const severityMatches = analysis.match(/SEVERITY:\s*(low|medium|high)/gi) || [];
626
+ const severities = severityMatches.map(m => m.replace(/SEVERITY:\s*/i, '').trim().toLowerCase());
627
+ const lines = [
628
+ '# Cognitive Bias Analysis',
629
+ '',
630
+ `**Text analyzed:** ${text.length > 200 ? text.slice(0, 200) + '...' : text}`,
631
+ `**Focus:** ${focus}`,
632
+ '',
633
+ ];
634
+ if (biasNames.length > 0 || biasCount !== null) {
635
+ lines.push('## Summary', '');
636
+ lines.push(`- **Biases detected:** ${biasCount ?? biasNames.length}`);
637
+ // Severity breakdown
638
+ const highCount = severities.filter(s => s === 'high').length;
639
+ const medCount = severities.filter(s => s === 'medium').length;
640
+ const lowCount = severities.filter(s => s === 'low').length;
641
+ if (highCount + medCount + lowCount > 0) {
642
+ lines.push(`- **Severity breakdown:** ${highCount} high, ${medCount} medium, ${lowCount} low`);
643
+ }
644
+ if (biasNames.length > 0) {
645
+ lines.push(`- **Biases:** ${biasNames.join(', ')}`);
646
+ }
647
+ // Overall rating
648
+ if (highCount >= 3) {
649
+ lines.push('- **Rating:** Heavily biased - significant revision recommended');
650
+ }
651
+ else if (highCount >= 1 || medCount >= 3) {
652
+ lines.push('- **Rating:** Moderately biased - some revision recommended');
653
+ }
654
+ else if (medCount >= 1 || lowCount >= 2) {
655
+ lines.push('- **Rating:** Mildly biased - minor adjustments suggested');
656
+ }
657
+ else {
658
+ lines.push('- **Rating:** Relatively unbiased');
659
+ }
660
+ lines.push('');
661
+ }
662
+ lines.push('## Detailed Analysis', '', analysis);
663
+ return lines.join('\n');
664
+ },
665
+ });
666
+ }
667
+ // ══════════════════════════════════════════════════════════════════════════════
668
+ // REGISTRATION
669
+ // ══════════════════════════════════════════════════════════════════════════════
670
+ export function registerAIAnalysisTools() {
671
+ registerModelCompare();
672
+ registerPromptAnalyze();
673
+ registerReasoningChain();
674
+ registerBayesianUpdate();
675
+ registerBiasCheck();
676
+ }
677
+ //# sourceMappingURL=ai-analysis.js.map