@kernel.chat/kbot 2.8.0 → 2.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/dist/agent-protocol.d.ts +97 -0
  2. package/dist/agent-protocol.d.ts.map +1 -0
  3. package/dist/agent-protocol.js +618 -0
  4. package/dist/agent-protocol.js.map +1 -0
  5. package/dist/agent.d.ts +2 -0
  6. package/dist/agent.d.ts.map +1 -1
  7. package/dist/agent.js +1 -1
  8. package/dist/agent.js.map +1 -1
  9. package/dist/auth.d.ts.map +1 -1
  10. package/dist/auth.js +17 -12
  11. package/dist/auth.js.map +1 -1
  12. package/dist/cli.js +166 -2
  13. package/dist/cli.js.map +1 -1
  14. package/dist/cloud-sync.d.ts.map +1 -1
  15. package/dist/cloud-sync.js +15 -4
  16. package/dist/cloud-sync.js.map +1 -1
  17. package/dist/confidence.d.ts +102 -0
  18. package/dist/confidence.d.ts.map +1 -0
  19. package/dist/confidence.js +696 -0
  20. package/dist/confidence.js.map +1 -0
  21. package/dist/ide/acp-server.js +5 -4
  22. package/dist/ide/acp-server.js.map +1 -1
  23. package/dist/ide/lsp-bridge.d.ts.map +1 -1
  24. package/dist/ide/lsp-bridge.js +11 -5
  25. package/dist/ide/lsp-bridge.js.map +1 -1
  26. package/dist/intentionality.d.ts +139 -0
  27. package/dist/intentionality.d.ts.map +1 -0
  28. package/dist/intentionality.js +1092 -0
  29. package/dist/intentionality.js.map +1 -0
  30. package/dist/planner.d.ts.map +1 -1
  31. package/dist/planner.js +3 -2
  32. package/dist/planner.js.map +1 -1
  33. package/dist/reasoning.d.ts +100 -0
  34. package/dist/reasoning.d.ts.map +1 -0
  35. package/dist/reasoning.js +1292 -0
  36. package/dist/reasoning.js.map +1 -0
  37. package/dist/streaming.d.ts.map +1 -1
  38. package/dist/streaming.js +18 -2
  39. package/dist/streaming.js.map +1 -1
  40. package/dist/temporal.d.ts +133 -0
  41. package/dist/temporal.d.ts.map +1 -0
  42. package/dist/temporal.js +778 -0
  43. package/dist/temporal.js.map +1 -0
  44. package/dist/tools/index.d.ts.map +1 -1
  45. package/dist/tools/index.js +30 -10
  46. package/dist/tools/index.js.map +1 -1
  47. package/dist/ui.js +1 -1
  48. package/package.json +2 -2
@@ -0,0 +1,696 @@
1
+ // K:BOT Confidence Engine — Self-Awareness for Agent Actions
2
+ //
3
+ // Three systems:
4
+ // 1. CONFIDENCE CALIBRATION — Express uncertainty about responses/actions
5
+ // 2. SKILL BOUNDARIES — Self-model of strengths, weaknesses, unknowns
6
+ // 3. EFFORT ESTIMATION — Predict tool calls, cost, and complexity
7
+ //
8
+ // Persists calibration and effort history to ~/.kbot/ as JSON files.
9
+ // Integrates with the learning engine for pattern/task data.
10
+ import { homedir } from 'node:os';
11
+ import { join } from 'node:path';
12
+ import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'node:fs';
13
+ import { registerTool } from './tools/index.js';
14
+ import { classifyTask, findPattern } from './learning.js';
15
+ import { isLocalProvider, getByokProvider, estimateCost } from './auth.js';
16
+ // ── File Paths ──
17
+ const KBOT_DIR = join(homedir(), '.kbot');
18
+ const CONFIDENCE_FILE = join(KBOT_DIR, 'confidence.json');
19
+ const EFFORT_FILE = join(KBOT_DIR, 'effort-history.json');
20
+ const SKILL_FILE = join(KBOT_DIR, 'skill-profile.json');
21
+ // ── Helpers ──
22
+ function ensureDir() {
23
+ if (!existsSync(KBOT_DIR))
24
+ mkdirSync(KBOT_DIR, { recursive: true });
25
+ }
26
+ function loadJSON(path, fallback) {
27
+ ensureDir();
28
+ if (!existsSync(path))
29
+ return fallback;
30
+ try {
31
+ return JSON.parse(readFileSync(path, 'utf-8'));
32
+ }
33
+ catch {
34
+ return fallback;
35
+ }
36
+ }
37
+ function saveJSON(path, data) {
38
+ ensureDir();
39
+ try {
40
+ writeFileSync(path, JSON.stringify(data, null, 2), 'utf-8');
41
+ }
42
+ catch (err) {
43
+ if (process.env.KBOT_DEBUG)
44
+ console.error('[confidence] save failed:', err.message);
45
+ }
46
+ }
47
+ function loadCalibration() {
48
+ return loadJSON(CONFIDENCE_FILE, { entries: [], avgError: 0 });
49
+ }
50
+ function saveCalibration(data) {
51
+ // Keep only last 200 entries to avoid unbounded growth
52
+ if (data.entries.length > 200) {
53
+ data.entries = data.entries.slice(-200);
54
+ }
55
+ saveJSON(CONFIDENCE_FILE, data);
56
+ }
57
+ /** Compute calibration bias from historical data for a domain */
58
+ function getCalibrationBias(domain) {
59
+ const data = loadCalibration();
60
+ const domainEntries = data.entries.filter(e => e.domain === domain);
61
+ if (domainEntries.length < 3)
62
+ return 0;
63
+ // Average (predicted - actual): positive = overconfident, negative = underconfident
64
+ const totalBias = domainEntries.reduce((sum, e) => sum + (e.predicted - e.actual), 0);
65
+ return totalBias / domainEntries.length;
66
+ }
67
+ /** Get historical success rate for a domain from calibration data */
68
+ function getHistoricalSuccessRate(domain) {
69
+ const data = loadCalibration();
70
+ const entries = data.entries.filter(e => e.domain === domain);
71
+ if (entries.length === 0)
72
+ return { rate: 0.5, count: 0 };
73
+ const avgActual = entries.reduce((sum, e) => sum + e.actual, 0) / entries.length;
74
+ return { rate: avgActual, count: entries.length };
75
+ }
76
+ /** Detect the primary domain from task text */
77
+ function detectDomain(task) {
78
+ const lower = task.toLowerCase();
79
+ // Check file extensions
80
+ const extMatch = lower.match(/\.(ts|tsx|js|jsx|py|rs|go|java|rb|cpp|c|cs|swift|kt|lua|zig|sql|css|html|md|yaml|yml|json|toml|sh|bash|zsh)/);
81
+ if (extMatch) {
82
+ const extMap = {
83
+ ts: 'typescript', tsx: 'typescript', js: 'javascript', jsx: 'javascript',
84
+ py: 'python', rs: 'rust', go: 'go', java: 'java', rb: 'ruby',
85
+ cpp: 'cpp', c: 'c', cs: 'csharp', swift: 'swift', kt: 'kotlin',
86
+ lua: 'lua', zig: 'zig', sql: 'sql', css: 'css', html: 'html',
87
+ md: 'writing', yaml: 'devops', yml: 'devops', json: 'config',
88
+ toml: 'config', sh: 'devops', bash: 'devops', zsh: 'devops',
89
+ };
90
+ return extMap[extMatch[1]] || 'general';
91
+ }
92
+ // Check language/domain keywords
93
+ const domainKeywords = {
94
+ typescript: ['typescript', 'ts ', 'tsx', 'type ', 'interface ', 'enum '],
95
+ javascript: ['javascript', 'js ', 'jsx', 'node', 'npm', 'yarn', 'pnpm', 'bun'],
96
+ python: ['python', 'pip', 'conda', 'django', 'flask', 'fastapi', 'pytorch'],
97
+ rust: ['rust', 'cargo', 'crate', 'impl ', 'fn ', 'struct '],
98
+ go: ['golang', 'go ', 'goroutine', 'go mod'],
99
+ devops: ['docker', 'kubernetes', 'k8s', 'terraform', 'ansible', 'ci/cd', 'pipeline', 'deploy', 'nginx', 'aws', 'gcp', 'azure'],
100
+ database: ['sql', 'postgres', 'mysql', 'mongo', 'redis', 'supabase', 'prisma', 'database', 'migration'],
101
+ writing: ['write', 'blog', 'article', 'documentation', 'readme', 'explain', 'describe'],
102
+ testing: ['test', 'spec', 'coverage', 'vitest', 'jest', 'pytest', 'assert'],
103
+ security: ['security', 'vulnerability', 'auth', 'permission', 'encrypt', 'cve'],
104
+ design: ['css', 'style', 'layout', 'responsive', 'animation', 'color', 'font'],
105
+ react: ['react', 'component', 'hook', 'jsx', 'tsx', 'next.js', 'nextjs', 'remix'],
106
+ git: ['git', 'commit', 'branch', 'merge', 'rebase', 'pr ', 'pull request'],
107
+ };
108
+ for (const [domain, keywords] of Object.entries(domainKeywords)) {
109
+ if (keywords.some(kw => lower.includes(kw)))
110
+ return domain;
111
+ }
112
+ return 'general';
113
+ }
114
+ /** Count complexity signals in a task description */
115
+ function assessComplexity(task) {
116
+ const lower = task.toLowerCase();
117
+ // Estimate number of files involved
118
+ const fileRefs = (task.match(/\b\w+\.\w{1,5}\b/g) || []).length;
119
+ const dirRefs = (task.match(/\b\w+\//g) || []).length;
120
+ const fileCount = Math.max(1, fileRefs + dirRefs);
121
+ // Ambiguity — vague words that signal unclear intent
122
+ const vagueTerms = ['maybe', 'somehow', 'might', 'possibly', 'not sure', 'something like',
123
+ 'kind of', 'sort of', 'whatever', 'figure out', 'if possible'];
124
+ const ambiguity = vagueTerms.filter(t => lower.includes(t)).length / vagueTerms.length;
125
+ // Multi-step — signals that the task has multiple phases
126
+ const multiStepTerms = ['then', 'after that', 'also', 'and then', 'next', 'finally',
127
+ 'first', 'second', 'step', 'multi', 'several', 'all', 'each'];
128
+ const multiStep = multiStepTerms.some(t => lower.includes(t));
129
+ return { fileCount, ambiguity, multiStep };
130
+ }
131
+ /**
132
+ * Estimate confidence for a task before execution.
133
+ *
134
+ * Considers task complexity, past success with similar tasks,
135
+ * provider capability, and available context.
136
+ */
137
+ export function estimateConfidence(task, context) {
138
+ const domain = detectDomain(task);
139
+ const taskType = classifyTask(task);
140
+ const complexity = assessComplexity(task);
141
+ const cachedPattern = findPattern(task);
142
+ const historical = getHistoricalSuccessRate(domain);
143
+ const calibrationBias = getCalibrationBias(domain);
144
+ // ── Base scores ──
145
+ // Factual confidence — higher if we have context, patterns, or history
146
+ let factual = 0.5;
147
+ if (context.length > 200)
148
+ factual += 0.15; // good context available
149
+ if (context.length > 1000)
150
+ factual += 0.1; // rich context
151
+ if (cachedPattern)
152
+ factual += 0.15; // we've done this before
153
+ if (historical.count > 5) {
154
+ factual += (historical.rate - 0.5) * 0.2; // historical performance adjustment
155
+ }
156
+ // Approach confidence — higher for known task types, lower for ambiguity
157
+ let approach = 0.6;
158
+ if (taskType !== 'general')
159
+ approach += 0.1; // recognized task type
160
+ if (cachedPattern && cachedPattern.successRate > 0.8)
161
+ approach += 0.2; // proven pattern
162
+ approach -= complexity.ambiguity * 0.3; // ambiguity reduces confidence
163
+ if (complexity.multiStep)
164
+ approach -= 0.1; // multi-step = more room for error
165
+ // Completeness — how sure we cover everything
166
+ let completeness = 0.6;
167
+ if (complexity.fileCount > 5)
168
+ completeness -= 0.15; // many files = easy to miss things
169
+ if (complexity.multiStep)
170
+ completeness -= 0.1; // multi-step = might skip a step
171
+ if (context.includes('repo-map') || context.includes('project structure'))
172
+ completeness += 0.1;
173
+ if (cachedPattern)
174
+ completeness += 0.1;
175
+ // Provider adjustment — local models get a penalty on complex tasks
176
+ try {
177
+ const provider = getByokProvider();
178
+ if (isLocalProvider(provider)) {
179
+ const localPenalty = complexity.multiStep ? 0.2 : 0.1;
180
+ factual -= localPenalty;
181
+ approach -= localPenalty;
182
+ completeness -= localPenalty * 0.5;
183
+ }
184
+ }
185
+ catch { /* no provider configured — skip adjustment */ }
186
+ // Apply calibration correction — if we're historically overconfident, reduce scores
187
+ factual -= calibrationBias * 0.5;
188
+ approach -= calibrationBias * 0.5;
189
+ completeness -= calibrationBias * 0.3;
190
+ // Clamp all scores to [0.05, 0.99]
191
+ factual = Math.max(0.05, Math.min(0.99, factual));
192
+ approach = Math.max(0.05, Math.min(0.99, approach));
193
+ completeness = Math.max(0.05, Math.min(0.99, completeness));
194
+ // Overall — weighted average
195
+ const overall = Math.round((factual * 0.3 + approach * 0.4 + completeness * 0.3) * 100) / 100;
196
+ // Build reasoning
197
+ const reasons = [];
198
+ if (cachedPattern)
199
+ reasons.push('have a proven pattern for this');
200
+ if (historical.count > 5 && historical.rate > 0.7)
201
+ reasons.push(`strong track record in ${domain}`);
202
+ if (historical.count > 5 && historical.rate < 0.4)
203
+ reasons.push(`historically weak in ${domain}`);
204
+ if (complexity.ambiguity > 0.2)
205
+ reasons.push('request is somewhat ambiguous');
206
+ if (complexity.multiStep)
207
+ reasons.push('multi-step task');
208
+ if (context.length < 100)
209
+ reasons.push('limited context available');
210
+ try {
211
+ if (isLocalProvider(getByokProvider()))
212
+ reasons.push('using local model (lower capability)');
213
+ }
214
+ catch { /* skip */ }
215
+ const pct = Math.round(overall * 100);
216
+ const reasoning = reasons.length > 0
217
+ ? `~${pct}% confident — ${reasons.join(', ')}`
218
+ : `~${pct}% confident — standard assessment for ${domain} ${taskType} task`;
219
+ return {
220
+ overall,
221
+ factual: Math.round(factual * 100) / 100,
222
+ approach: Math.round(approach * 100) / 100,
223
+ completeness: Math.round(completeness * 100) / 100,
224
+ reasoning,
225
+ };
226
+ }
227
+ /**
228
+ * Format a confidence score as a human-readable string.
229
+ */
230
+ export function reportConfidence(score) {
231
+ const pct = Math.round(score.overall * 100);
232
+ const level = pct >= 80 ? 'high' : pct >= 50 ? 'moderate' : 'low';
233
+ const lines = [
234
+ `Confidence: ${pct}% (${level})`,
235
+ ` Factual: ${Math.round(score.factual * 100)}%`,
236
+ ` Approach: ${Math.round(score.approach * 100)}%`,
237
+ ` Completeness: ${Math.round(score.completeness * 100)}%`,
238
+ ` ${score.reasoning}`,
239
+ ];
240
+ return lines.join('\n');
241
+ }
242
+ /**
243
+ * Record a calibration entry — predicted vs actual (from self-eval or user feedback).
244
+ * Called after a task completes to improve future predictions.
245
+ */
246
+ export function recordCalibration(task, predicted, actual) {
247
+ const domain = detectDomain(task);
248
+ const data = loadCalibration();
249
+ data.entries.push({
250
+ task: task.slice(0, 200), // truncate for storage
251
+ predicted,
252
+ actual,
253
+ domain,
254
+ timestamp: new Date().toISOString(),
255
+ });
256
+ // Recompute running average error
257
+ if (data.entries.length > 0) {
258
+ data.avgError = data.entries.reduce((sum, e) => sum + Math.abs(e.predicted - e.actual), 0) / data.entries.length;
259
+ }
260
+ saveCalibration(data);
261
+ }
262
+ function loadSkillData() {
263
+ return loadJSON(SKILL_FILE, { skills: {} });
264
+ }
265
+ function saveSkillData(data) {
266
+ saveJSON(SKILL_FILE, data);
267
+ }
268
+ /**
269
+ * Build a skill profile from stored skill data and calibration history.
270
+ */
271
+ export function getSkillProfile() {
272
+ const skillData = loadSkillData();
273
+ const calibration = loadCalibration();
274
+ // Merge skill data with calibration entries for domains not in skill data
275
+ const domainSet = new Set(Object.keys(skillData.skills));
276
+ for (const entry of calibration.entries) {
277
+ domainSet.add(entry.domain);
278
+ }
279
+ const allDomains = Array.from(domainSet);
280
+ const entries = [];
281
+ for (const domain of allDomains) {
282
+ const stored = skillData.skills[domain];
283
+ const calEntries = calibration.entries.filter(e => e.domain === domain);
284
+ let successRate;
285
+ let avgConfidence;
286
+ let sampleSize;
287
+ let lastAttempt;
288
+ if (stored) {
289
+ sampleSize = stored.sampleSize;
290
+ successRate = sampleSize > 0 ? stored.successCount / sampleSize : 0.5;
291
+ avgConfidence = sampleSize > 0 ? stored.totalConfidence / sampleSize : 0.5;
292
+ lastAttempt = stored.lastAttempt;
293
+ }
294
+ else if (calEntries.length > 0) {
295
+ sampleSize = calEntries.length;
296
+ successRate = calEntries.reduce((s, e) => s + e.actual, 0) / sampleSize;
297
+ avgConfidence = calEntries.reduce((s, e) => s + e.predicted, 0) / sampleSize;
298
+ lastAttempt = calEntries[calEntries.length - 1].timestamp;
299
+ }
300
+ else {
301
+ continue;
302
+ }
303
+ entries.push({
304
+ domain,
305
+ successRate: Math.round(successRate * 100) / 100,
306
+ avgConfidence: Math.round(avgConfidence * 100) / 100,
307
+ sampleSize,
308
+ lastAttempt,
309
+ });
310
+ }
311
+ // All known domains that could exist but have no data
312
+ const allKnownDomains = [
313
+ 'typescript', 'javascript', 'python', 'rust', 'go', 'java', 'ruby',
314
+ 'cpp', 'c', 'csharp', 'swift', 'kotlin', 'devops', 'database', 'sql',
315
+ 'testing', 'security', 'design', 'writing', 'react', 'git', 'config',
316
+ 'html', 'css', 'lua', 'zig',
317
+ ];
318
+ const strengths = entries
319
+ .filter(e => e.successRate >= 0.7 && e.sampleSize >= 3)
320
+ .sort((a, b) => b.successRate - a.successRate);
321
+ const weaknesses = entries
322
+ .filter(e => e.successRate < 0.5 && e.sampleSize >= 3)
323
+ .sort((a, b) => a.successRate - b.successRate);
324
+ const assessedDomains = new Set(entries.map(e => e.domain));
325
+ const unknown = allKnownDomains.filter(d => {
326
+ const entry = entries.find(e => e.domain === d);
327
+ return !entry || entry.sampleSize < 3;
328
+ });
329
+ return { strengths, weaknesses, unknown };
330
+ }
331
+ /**
332
+ * Assess whether the agent is suitable for a given task.
333
+ */
334
+ export function assessSkillForTask(task) {
335
+ const domain = detectDomain(task);
336
+ const profile = getSkillProfile();
337
+ // Check if this is a known strength
338
+ const strength = profile.strengths.find(s => s.domain === domain);
339
+ if (strength) {
340
+ return {
341
+ canDo: true,
342
+ confidence: strength.avgConfidence,
343
+ };
344
+ }
345
+ // Check if this is a known weakness
346
+ const weakness = profile.weaknesses.find(w => w.domain === domain);
347
+ if (weakness) {
348
+ return {
349
+ canDo: true,
350
+ confidence: weakness.avgConfidence,
351
+ suggestion: `Historical success rate in ${domain} is ${Math.round(weakness.successRate * 100)}% — consider breaking this into smaller steps or using a specialized tool.`,
352
+ };
353
+ }
354
+ // Check if unknown domain
355
+ if (profile.unknown.includes(domain)) {
356
+ return {
357
+ canDo: true,
358
+ confidence: 0.5,
359
+ suggestion: `Limited experience with ${domain} tasks — proceeding with caution.`,
360
+ };
361
+ }
362
+ // General domain — no strong signal either way
363
+ return {
364
+ canDo: true,
365
+ confidence: 0.6,
366
+ };
367
+ }
368
+ /**
369
+ * Update the skill profile after completing a task.
370
+ *
371
+ * @param domain - The task domain (auto-detected or overridden)
372
+ * @param success - Whether the task completed successfully
373
+ * @param confidence - The confidence score used for this task
374
+ */
375
+ export function updateSkillProfile(domain, success, confidence) {
376
+ const data = loadSkillData();
377
+ const existing = data.skills[domain];
378
+ if (existing) {
379
+ if (success)
380
+ existing.successCount++;
381
+ else
382
+ existing.failureCount++;
383
+ existing.totalConfidence += confidence;
384
+ existing.sampleSize++;
385
+ existing.lastAttempt = new Date().toISOString();
386
+ }
387
+ else {
388
+ data.skills[domain] = {
389
+ successCount: success ? 1 : 0,
390
+ failureCount: success ? 0 : 1,
391
+ totalConfidence: confidence,
392
+ sampleSize: 1,
393
+ lastAttempt: new Date().toISOString(),
394
+ };
395
+ }
396
+ saveSkillData(data);
397
+ }
398
+ function loadEffortHistory() {
399
+ return loadJSON(EFFORT_FILE, { entries: [] });
400
+ }
401
+ function saveEffortHistory(data) {
402
+ // Keep only last 100
403
+ if (data.entries.length > 100) {
404
+ data.entries = data.entries.slice(-100);
405
+ }
406
+ saveJSON(EFFORT_FILE, data);
407
+ }
408
+ /** Get average actual tool calls for a task type from history */
409
+ function getHistoricalEffort(taskType, domain) {
410
+ const history = loadEffortHistory();
411
+ const relevant = history.entries.filter(e => e.actual && (e.taskType === taskType || e.domain === domain));
412
+ if (relevant.length < 2)
413
+ return null;
414
+ const avgToolCalls = relevant.reduce((s, e) => s + (e.actual.toolCalls || 0), 0) / relevant.length;
415
+ const avgCost = relevant.reduce((s, e) => s + (e.actual.costUsd || 0), 0) / relevant.length;
416
+ return { avgToolCalls, avgCost, count: relevant.length };
417
+ }
418
+ /** Complexity classification based on signals */
419
+ function classifyComplexity(task) {
420
+ const complexity = assessComplexity(task);
421
+ const taskType = classifyTask(task);
422
+ const wordCount = task.split(/\s+/).length;
423
+ // Simple heuristics
424
+ if (wordCount < 8 && !complexity.multiStep && complexity.fileCount <= 1)
425
+ return 'trivial';
426
+ if (wordCount < 20 && !complexity.multiStep && complexity.fileCount <= 2)
427
+ return 'simple';
428
+ if (complexity.fileCount > 10 || (complexity.multiStep && wordCount > 50))
429
+ return 'ambitious';
430
+ if (complexity.multiStep || complexity.fileCount > 5 || wordCount > 40)
431
+ return 'complex';
432
+ return 'moderate';
433
+ }
434
+ /** Estimate tool call counts by task type */
435
+ function estimateToolCounts(taskType, complexityLevel, fileCount) {
436
+ // Base estimates by task type
437
+ const baseEstimates = {
438
+ debug: { min: 3, expected: 6, max: 15 },
439
+ build: { min: 5, expected: 10, max: 25 },
440
+ refactor: { min: 3, expected: 8, max: 20 },
441
+ test: { min: 2, expected: 5, max: 12 },
442
+ deploy: { min: 2, expected: 5, max: 10 },
443
+ explain: { min: 1, expected: 3, max: 6 },
444
+ review: { min: 2, expected: 5, max: 12 },
445
+ search: { min: 1, expected: 3, max: 8 },
446
+ general: { min: 2, expected: 5, max: 12 },
447
+ };
448
+ const base = baseEstimates[taskType] || baseEstimates.general;
449
+ // Complexity multiplier
450
+ const complexityMultiplier = {
451
+ trivial: 0.3,
452
+ simple: 0.6,
453
+ moderate: 1.0,
454
+ complex: 1.8,
455
+ ambitious: 3.0,
456
+ };
457
+ const mult = complexityMultiplier[complexityLevel] || 1.0;
458
+ // File count adjustment
459
+ const fileAdj = Math.max(0, (fileCount - 2) * 0.5);
460
+ return {
461
+ min: Math.max(1, Math.round(base.min * mult)),
462
+ expected: Math.max(1, Math.round(base.expected * mult + fileAdj)),
463
+ max: Math.max(2, Math.round(base.max * mult + fileAdj * 2)),
464
+ };
465
+ }
466
+ /** Build a human-readable breakdown of expected operations */
467
+ function buildBreakdown(taskType, fileCount, complexityLevel) {
468
+ const parts = [];
469
+ // Reads
470
+ const reads = Math.max(1, Math.ceil(fileCount * 0.8));
471
+ parts.push(`~${reads} file read${reads > 1 ? 's' : ''}`);
472
+ // Edits
473
+ if (['build', 'debug', 'refactor'].includes(taskType)) {
474
+ const edits = Math.max(1, Math.ceil(fileCount * 0.5));
475
+ parts.push(`~${edits} edit${edits > 1 ? 's' : ''}`);
476
+ }
477
+ // Search
478
+ if (['debug', 'search', 'review', 'refactor'].includes(taskType)) {
479
+ parts.push('~1-2 searches');
480
+ }
481
+ // Test/build run
482
+ if (['test', 'build', 'debug', 'deploy'].includes(taskType)) {
483
+ parts.push('~1 test/build run');
484
+ }
485
+ // Git
486
+ if (['deploy', 'build'].includes(taskType)) {
487
+ parts.push('~1 git operation');
488
+ }
489
+ if (complexityLevel === 'ambitious') {
490
+ parts.push('may require multiple iterations');
491
+ }
492
+ return parts.join(', ');
493
+ }
494
+ /**
495
+ * Estimate the effort required for a task — tool calls, cost, and complexity.
496
+ *
497
+ * @param task - The task description
498
+ * @param context - Optional context (repo state, file list, etc.)
499
+ */
500
+ export function estimateEffort(task, context) {
501
+ const taskType = classifyTask(task);
502
+ const domain = detectDomain(task);
503
+ const complexityLevel = classifyComplexity(task);
504
+ const complexity = assessComplexity(task);
505
+ // Try historical data first
506
+ const historical = getHistoricalEffort(taskType, domain);
507
+ let toolCalls;
508
+ if (historical && historical.count >= 3) {
509
+ // Use historical averages with some spread
510
+ const avg = Math.round(historical.avgToolCalls);
511
+ toolCalls = {
512
+ min: Math.max(1, Math.round(avg * 0.5)),
513
+ expected: avg,
514
+ max: Math.round(avg * 2),
515
+ };
516
+ }
517
+ else {
518
+ toolCalls = estimateToolCounts(taskType, complexityLevel, complexity.fileCount);
519
+ }
520
+ // Cost estimation — rough approximation based on tool calls
521
+ // Each tool call ~ 500 input tokens + 200 output tokens on average
522
+ const tokensPerCall = { input: 500, output: 200 };
523
+ let costPerCall;
524
+ try {
525
+ const provider = getByokProvider();
526
+ costPerCall = estimateCost(provider, tokensPerCall.input, tokensPerCall.output);
527
+ if (isLocalProvider(provider))
528
+ costPerCall = 0;
529
+ }
530
+ catch {
531
+ // Default to Anthropic pricing (~$3/$15 per MTok)
532
+ costPerCall = (tokensPerCall.input * 3 / 1_000_000) + (tokensPerCall.output * 15 / 1_000_000);
533
+ }
534
+ const estimatedCostUsd = {
535
+ min: Math.round(toolCalls.min * costPerCall * 10000) / 10000,
536
+ expected: Math.round(toolCalls.expected * costPerCall * 10000) / 10000,
537
+ max: Math.round(toolCalls.max * costPerCall * 10000) / 10000,
538
+ };
539
+ const breakdown = buildBreakdown(taskType, complexity.fileCount, complexityLevel);
540
+ // Store prediction for later calibration
541
+ const history = loadEffortHistory();
542
+ history.entries.push({
543
+ task: task.slice(0, 200),
544
+ taskType,
545
+ domain,
546
+ predicted: {
547
+ toolCalls: toolCalls.expected,
548
+ costUsd: estimatedCostUsd.expected,
549
+ complexity: complexityLevel,
550
+ },
551
+ timestamp: new Date().toISOString(),
552
+ });
553
+ saveEffortHistory(history);
554
+ return {
555
+ toolCalls,
556
+ estimatedCostUsd,
557
+ complexity: complexityLevel,
558
+ breakdown,
559
+ };
560
+ }
561
+ /**
562
+ * Record actual effort after a task completes, for future calibration.
563
+ */
564
+ export function recordActualEffort(task, actualToolCalls, actualCostUsd) {
565
+ const history = loadEffortHistory();
566
+ // Find the most recent prediction for this task
567
+ const taskSlice = task.slice(0, 200);
568
+ for (let i = history.entries.length - 1; i >= 0; i--) {
569
+ if (history.entries[i].task === taskSlice && !history.entries[i].actual) {
570
+ history.entries[i].actual = { toolCalls: actualToolCalls, costUsd: actualCostUsd };
571
+ break;
572
+ }
573
+ }
574
+ saveEffortHistory(history);
575
+ }
576
+ // ══════════════════════════════════════════════════════════════════
577
+ // TOOL REGISTRATION
578
+ // ══════════════════════════════════════════════════════════════════
579
+ /**
580
+ * Register confidence engine tools with the K:BOT tool registry.
581
+ */
582
+ export function registerConfidenceTools() {
583
+ registerTool({
584
+ name: 'confidence_check',
585
+ description: 'Get a confidence score for a proposed action or task. Returns factual, approach, and completeness scores with reasoning.',
586
+ parameters: {
587
+ task: {
588
+ type: 'string',
589
+ description: 'Description of the task or action to assess confidence for',
590
+ required: true,
591
+ },
592
+ context: {
593
+ type: 'string',
594
+ description: 'Available context (repo state, file contents, memory, etc.)',
595
+ required: false,
596
+ default: '',
597
+ },
598
+ },
599
+ tier: 'free',
600
+ execute: async (args) => {
601
+ const task = String(args.task || '');
602
+ const context = String(args.context || '');
603
+ if (!task)
604
+ return 'Error: task parameter is required';
605
+ const score = estimateConfidence(task, context);
606
+ return reportConfidence(score);
607
+ },
608
+ });
609
+ registerTool({
610
+ name: 'skill_profile',
611
+ description: 'Show the agent skill profile — strengths, weaknesses, and untested domains. Optionally assess suitability for a specific task.',
612
+ parameters: {
613
+ task: {
614
+ type: 'string',
615
+ description: 'Optional task to assess suitability for',
616
+ required: false,
617
+ },
618
+ },
619
+ tier: 'free',
620
+ execute: async (args) => {
621
+ const task = args.task ? String(args.task) : null;
622
+ const profile = getSkillProfile();
623
+ const lines = ['=== Skill Profile ===', ''];
624
+ if (profile.strengths.length > 0) {
625
+ lines.push('Strengths:');
626
+ for (const s of profile.strengths) {
627
+ lines.push(` ${s.domain}: ${Math.round(s.successRate * 100)}% success (${s.sampleSize} tasks, avg confidence ${Math.round(s.avgConfidence * 100)}%)`);
628
+ }
629
+ lines.push('');
630
+ }
631
+ if (profile.weaknesses.length > 0) {
632
+ lines.push('Weaknesses:');
633
+ for (const w of profile.weaknesses) {
634
+ lines.push(` ${w.domain}: ${Math.round(w.successRate * 100)}% success (${w.sampleSize} tasks, avg confidence ${Math.round(w.avgConfidence * 100)}%)`);
635
+ }
636
+ lines.push('');
637
+ }
638
+ if (profile.unknown.length > 0) {
639
+ lines.push(`Untested domains: ${profile.unknown.join(', ')}`);
640
+ lines.push('');
641
+ }
642
+ if (task) {
643
+ lines.push('--- Task Assessment ---');
644
+ const assessment = assessSkillForTask(task);
645
+ lines.push(`Can do: ${assessment.canDo ? 'yes' : 'no'}`);
646
+ lines.push(`Confidence: ${Math.round(assessment.confidence * 100)}%`);
647
+ if (assessment.suggestion)
648
+ lines.push(`Note: ${assessment.suggestion}`);
649
+ }
650
+ return lines.join('\n');
651
+ },
652
+ });
653
+ registerTool({
654
+ name: 'effort_estimate',
655
+ description: 'Predict how many tool calls, cost, and complexity a task will involve. Uses historical data when available.',
656
+ parameters: {
657
+ task: {
658
+ type: 'string',
659
+ description: 'Description of the task to estimate effort for',
660
+ required: true,
661
+ },
662
+ context: {
663
+ type: 'string',
664
+ description: 'Optional context about the current state',
665
+ required: false,
666
+ },
667
+ },
668
+ tier: 'free',
669
+ execute: async (args) => {
670
+ const task = String(args.task || '');
671
+ const context = args.context ? String(args.context) : undefined;
672
+ if (!task)
673
+ return 'Error: task parameter is required';
674
+ const estimate = estimateEffort(task, context);
675
+ const lines = [
676
+ '=== Effort Estimate ===',
677
+ '',
678
+ `Complexity: ${estimate.complexity}`,
679
+ '',
680
+ `Tool calls:`,
681
+ ` Min: ${estimate.toolCalls.min}`,
682
+ ` Expected: ${estimate.toolCalls.expected}`,
683
+ ` Max: ${estimate.toolCalls.max}`,
684
+ '',
685
+ `Estimated cost (USD):`,
686
+ ` Min: $${estimate.estimatedCostUsd.min.toFixed(4)}`,
687
+ ` Expected: $${estimate.estimatedCostUsd.expected.toFixed(4)}`,
688
+ ` Max: $${estimate.estimatedCostUsd.max.toFixed(4)}`,
689
+ '',
690
+ `Breakdown: ${estimate.breakdown}`,
691
+ ];
692
+ return lines.join('\n');
693
+ },
694
+ });
695
+ }
696
+ //# sourceMappingURL=confidence.js.map