@kernel.chat/kbot 1.3.1 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. package/README.md +94 -0
  2. package/dist/agent.d.ts +9 -0
  3. package/dist/agent.d.ts.map +1 -1
  4. package/dist/agent.js +576 -119
  5. package/dist/agent.js.map +1 -1
  6. package/dist/auth.d.ts +20 -35
  7. package/dist/auth.d.ts.map +1 -1
  8. package/dist/auth.js +224 -66
  9. package/dist/auth.js.map +1 -1
  10. package/dist/auth.test.d.ts +2 -0
  11. package/dist/auth.test.d.ts.map +1 -0
  12. package/dist/auth.test.js +89 -0
  13. package/dist/auth.test.js.map +1 -0
  14. package/dist/build-targets.d.ts +37 -0
  15. package/dist/build-targets.d.ts.map +1 -0
  16. package/dist/build-targets.js +507 -0
  17. package/dist/build-targets.js.map +1 -0
  18. package/dist/cli.js +1210 -130
  19. package/dist/cli.js.map +1 -1
  20. package/dist/context.d.ts +2 -0
  21. package/dist/context.d.ts.map +1 -1
  22. package/dist/context.js +72 -22
  23. package/dist/context.js.map +1 -1
  24. package/dist/hooks.d.ts +27 -0
  25. package/dist/hooks.d.ts.map +1 -0
  26. package/dist/hooks.js +145 -0
  27. package/dist/hooks.js.map +1 -0
  28. package/dist/ide/acp-server.d.ts +6 -0
  29. package/dist/ide/acp-server.d.ts.map +1 -0
  30. package/dist/ide/acp-server.js +319 -0
  31. package/dist/ide/acp-server.js.map +1 -0
  32. package/dist/ide/bridge.d.ts +128 -0
  33. package/dist/ide/bridge.d.ts.map +1 -0
  34. package/dist/ide/bridge.js +185 -0
  35. package/dist/ide/bridge.js.map +1 -0
  36. package/dist/ide/index.d.ts +5 -0
  37. package/dist/ide/index.d.ts.map +1 -0
  38. package/dist/ide/index.js +11 -0
  39. package/dist/ide/index.js.map +1 -0
  40. package/dist/ide/lsp-bridge.d.ts +27 -0
  41. package/dist/ide/lsp-bridge.d.ts.map +1 -0
  42. package/dist/ide/lsp-bridge.js +267 -0
  43. package/dist/ide/lsp-bridge.js.map +1 -0
  44. package/dist/ide/mcp-server.d.ts +7 -0
  45. package/dist/ide/mcp-server.d.ts.map +1 -0
  46. package/dist/ide/mcp-server.js +451 -0
  47. package/dist/ide/mcp-server.js.map +1 -0
  48. package/dist/learning.d.ts +179 -0
  49. package/dist/learning.d.ts.map +1 -0
  50. package/dist/learning.js +829 -0
  51. package/dist/learning.js.map +1 -0
  52. package/dist/learning.test.d.ts +2 -0
  53. package/dist/learning.test.d.ts.map +1 -0
  54. package/dist/learning.test.js +115 -0
  55. package/dist/learning.test.js.map +1 -0
  56. package/dist/matrix.d.ts +49 -0
  57. package/dist/matrix.d.ts.map +1 -0
  58. package/dist/matrix.js +302 -0
  59. package/dist/matrix.js.map +1 -0
  60. package/dist/memory.d.ts +11 -0
  61. package/dist/memory.d.ts.map +1 -1
  62. package/dist/memory.js +54 -2
  63. package/dist/memory.js.map +1 -1
  64. package/dist/multimodal.d.ts +57 -0
  65. package/dist/multimodal.d.ts.map +1 -0
  66. package/dist/multimodal.js +206 -0
  67. package/dist/multimodal.js.map +1 -0
  68. package/dist/permissions.d.ts +21 -0
  69. package/dist/permissions.d.ts.map +1 -0
  70. package/dist/permissions.js +122 -0
  71. package/dist/permissions.js.map +1 -0
  72. package/dist/planner.d.ts +54 -0
  73. package/dist/planner.d.ts.map +1 -0
  74. package/dist/planner.js +298 -0
  75. package/dist/planner.js.map +1 -0
  76. package/dist/plugins.d.ts +30 -0
  77. package/dist/plugins.d.ts.map +1 -0
  78. package/dist/plugins.js +135 -0
  79. package/dist/plugins.js.map +1 -0
  80. package/dist/sessions.d.ts +38 -0
  81. package/dist/sessions.d.ts.map +1 -0
  82. package/dist/sessions.js +177 -0
  83. package/dist/sessions.js.map +1 -0
  84. package/dist/streaming.d.ts +88 -0
  85. package/dist/streaming.d.ts.map +1 -0
  86. package/dist/streaming.js +317 -0
  87. package/dist/streaming.js.map +1 -0
  88. package/dist/tools/background.d.ts +2 -0
  89. package/dist/tools/background.d.ts.map +1 -0
  90. package/dist/tools/background.js +163 -0
  91. package/dist/tools/background.js.map +1 -0
  92. package/dist/tools/bash.d.ts.map +1 -1
  93. package/dist/tools/bash.js +26 -1
  94. package/dist/tools/bash.js.map +1 -1
  95. package/dist/tools/browser.js +7 -7
  96. package/dist/tools/browser.js.map +1 -1
  97. package/dist/tools/build-matrix.d.ts +2 -0
  98. package/dist/tools/build-matrix.d.ts.map +1 -0
  99. package/dist/tools/build-matrix.js +463 -0
  100. package/dist/tools/build-matrix.js.map +1 -0
  101. package/dist/tools/computer.js +5 -5
  102. package/dist/tools/computer.js.map +1 -1
  103. package/dist/tools/fetch.d.ts +2 -0
  104. package/dist/tools/fetch.d.ts.map +1 -0
  105. package/dist/tools/fetch.js +106 -0
  106. package/dist/tools/fetch.js.map +1 -0
  107. package/dist/tools/files.d.ts.map +1 -1
  108. package/dist/tools/files.js +112 -6
  109. package/dist/tools/files.js.map +1 -1
  110. package/dist/tools/git.js +3 -3
  111. package/dist/tools/git.js.map +1 -1
  112. package/dist/tools/github.d.ts +2 -0
  113. package/dist/tools/github.d.ts.map +1 -0
  114. package/dist/tools/github.js +196 -0
  115. package/dist/tools/github.js.map +1 -0
  116. package/dist/tools/index.d.ts +29 -5
  117. package/dist/tools/index.d.ts.map +1 -1
  118. package/dist/tools/index.js +136 -20
  119. package/dist/tools/index.js.map +1 -1
  120. package/dist/tools/index.test.d.ts +2 -0
  121. package/dist/tools/index.test.d.ts.map +1 -0
  122. package/dist/tools/index.test.js +162 -0
  123. package/dist/tools/index.test.js.map +1 -0
  124. package/dist/tools/matrix.d.ts +2 -0
  125. package/dist/tools/matrix.d.ts.map +1 -0
  126. package/dist/tools/matrix.js +79 -0
  127. package/dist/tools/matrix.js.map +1 -0
  128. package/dist/tools/mcp-client.d.ts +2 -0
  129. package/dist/tools/mcp-client.d.ts.map +1 -0
  130. package/dist/tools/mcp-client.js +295 -0
  131. package/dist/tools/mcp-client.js.map +1 -0
  132. package/dist/tools/notebook.d.ts +2 -0
  133. package/dist/tools/notebook.d.ts.map +1 -0
  134. package/dist/tools/notebook.js +207 -0
  135. package/dist/tools/notebook.js.map +1 -0
  136. package/dist/tools/openclaw.d.ts +2 -0
  137. package/dist/tools/openclaw.d.ts.map +1 -0
  138. package/dist/tools/openclaw.js +187 -0
  139. package/dist/tools/openclaw.js.map +1 -0
  140. package/dist/tools/parallel.d.ts +2 -0
  141. package/dist/tools/parallel.d.ts.map +1 -0
  142. package/dist/tools/parallel.js +60 -0
  143. package/dist/tools/parallel.js.map +1 -0
  144. package/dist/tools/sandbox.d.ts +2 -0
  145. package/dist/tools/sandbox.d.ts.map +1 -0
  146. package/dist/tools/sandbox.js +352 -0
  147. package/dist/tools/sandbox.js.map +1 -0
  148. package/dist/tools/search.d.ts.map +1 -1
  149. package/dist/tools/search.js +135 -28
  150. package/dist/tools/search.js.map +1 -1
  151. package/dist/tools/subagent.d.ts +4 -0
  152. package/dist/tools/subagent.d.ts.map +1 -0
  153. package/dist/tools/subagent.js +260 -0
  154. package/dist/tools/subagent.js.map +1 -0
  155. package/dist/tools/tasks.d.ts +14 -0
  156. package/dist/tools/tasks.d.ts.map +1 -0
  157. package/dist/tools/tasks.js +210 -0
  158. package/dist/tools/tasks.js.map +1 -0
  159. package/dist/tools/worktree.d.ts +2 -0
  160. package/dist/tools/worktree.d.ts.map +1 -0
  161. package/dist/tools/worktree.js +223 -0
  162. package/dist/tools/worktree.js.map +1 -0
  163. package/dist/tui.d.ts +73 -0
  164. package/dist/tui.d.ts.map +1 -0
  165. package/dist/tui.js +257 -0
  166. package/dist/tui.js.map +1 -0
  167. package/dist/ui.d.ts +11 -19
  168. package/dist/ui.d.ts.map +1 -1
  169. package/dist/ui.js +143 -171
  170. package/dist/ui.js.map +1 -1
  171. package/dist/updater.d.ts +3 -0
  172. package/dist/updater.d.ts.map +1 -0
  173. package/dist/updater.js +70 -0
  174. package/dist/updater.js.map +1 -0
  175. package/install.sh +5 -7
  176. package/package.json +9 -5
package/dist/agent.js CHANGED
@@ -1,23 +1,37 @@
1
- // K:BOT Agent Loop — ReAct reasoning cycle
2
- // Message → APITool Calls → Execute LocallyContinue or Return
1
+ // K:BOT Agent Loop v2 Autonomous Reasoning Engine
2
+ // Message → ThinkPlan → Execute Verify Learn Return
3
3
  //
4
- // EFFICIENCY-FIRST DESIGN:
4
+ // INTELLIGENCE ARCHITECTURE:
5
5
  // 1. Local-first: handle simple tasks (file reads, git, ls) without any API call
6
- // 2. Context batching: gather all relevant context before the first API call
7
- // 3. One-shot prompting: send rich context so the agent gets it right in one try
8
- // 4. Tool execution is always local and free only reasoning burns tokens
9
- // 5. Smart model selection: use fast model for simple tasks, default for complex
10
- import { getApiKey, getApiBase, isByokEnabled, getByokKey, getByokProvider, getProviderModel, getProvider, estimateCost, } from './auth.js';
6
+ // 2. Deep context: project memory + learned patterns + user knowledge + corrections
7
+ // 3. Plan-then-execute: think step by step, then use tools aggressively
8
+ // 4. Self-correction: if a tool fails, analyze the error and try a different approach
9
+ // 5. Auto-research: if you don't know how, search GitHub/web FIRST, then execute
10
+ // 6. Verify: always confirm the output exists and is correct
11
+ // 7. Learn: extract knowledge from every interaction for future use
12
+ import { getByokKey, getByokProvider, getProviderModel, getProvider, estimateCost, isLocalProvider, warmOllamaModelCache, } from './auth.js';
11
13
  import { executeTool, getTool, getToolDefinitionsForApi, } from './tools/index.js';
12
14
  import { formatContextForPrompt } from './context.js';
15
+ import { getMatrixSystemPrompt } from './matrix.js';
16
+ import { buildFullLearningContext, findPattern, recordPattern, cacheSolution, updateProfile, classifyTask, extractKeywords, learnFromExchange, updateProjectMemory, shouldAutoTrain, selfTrain, } from './learning.js';
13
17
  import { getMemoryPrompt, addTurn, getPreviousMessages } from './memory.js';
14
- import { createSpinner, printToolCall, printToolResult, printResponse, printError, printInfo } from './ui.js';
15
- const MAX_TOOL_LOOPS = 15;
16
- const KBOT_VERSION = '1.2.0';
18
+ import { createSpinner, printToolCall, printToolResult, printResponse, printError, printInfo, printWarn } from './ui.js';
19
+ import { parseMultimodalMessage, toAnthropicContent } from './multimodal.js';
20
+ import { streamAnthropicResponse, streamOpenAIResponse } from './streaming.js';
21
+ import { checkPermission } from './permissions.js';
22
+ import { runPreToolHook, runPostToolHook } from './hooks.js';
23
+ const MAX_TOOL_LOOPS = 75;
24
+ /** Maximum cumulative cost (USD) before auto-stopping tool loops */
25
+ const MAX_COST_CEILING = 1.00;
26
+ /** Cached system prompt — rebuilt only when context changes, not every iteration */
27
+ let _cachedSystemPrompt = null;
28
+ let _cachedSystemPromptKey = null;
17
29
  // ── Local-first execution ──
18
30
  async function tryLocalFirst(message) {
19
31
  const lower = message.toLowerCase().trim();
20
- const readMatch = lower.match(/^(?:read|show|cat|view|open)\s+(.+)$/i);
32
+ // Only match file-like paths — avoid intercepting "open chrome" or "show me how to..."
33
+ const readMatch = lower.match(/^(?:read|cat|view)\s+(.+)$/i)
34
+ || lower.match(/^(?:show|open)\s+((?:\.{0,2}\/|~\/|\w+\.\w+).+)$/i);
21
35
  if (readMatch) {
22
36
  const tool = getTool('read_file');
23
37
  if (tool)
@@ -44,7 +58,10 @@ async function tryLocalFirst(message) {
44
58
  if (tool)
45
59
  return tool.execute({});
46
60
  }
47
- const grepMatch = lower.match(/^(?:grep|search|find)\s+['"""]?(.+?)['"""]?\s+(?:in\s+)?(.+)$/i);
61
+ // Only match explicit grep-like patterns: "grep PATTERN in PATH" or "search for PATTERN in PATH"
62
+ // Avoid intercepting natural language like "search the web for..."
63
+ const grepMatch = lower.match(/^(?:grep)\s+['"""]?(.+?)['"""]?\s+(?:in\s+)?(.+)$/i)
64
+ || lower.match(/^(?:search|find)\s+(?:for\s+)?['"""]?(.+?)['"""]?\s+in\s+(.+)$/i);
48
65
  if (grepMatch) {
49
66
  const tool = getTool('grep');
50
67
  if (tool)
@@ -56,7 +73,26 @@ async function tryLocalFirst(message) {
56
73
  return null;
57
74
  }
58
75
  /** Anthropic Messages API (Claude) */
59
- async function callAnthropic(apiKey, apiUrl, model, systemContext, messages) {
76
+ async function callAnthropic(apiKey, apiUrl, model, systemContext, messages, tools, options) {
77
+ // Build messages — use multimodal content blocks if images are present
78
+ const apiMessages = messages.map((m, i) => {
79
+ // Only the first user message might have images
80
+ if (i === messages.length - 1 && m.role === 'user' && options?.multimodal?.isMultimodal) {
81
+ return { role: m.role, content: toAnthropicContent(options.multimodal) };
82
+ }
83
+ return { role: m.role, content: m.content };
84
+ });
85
+ const body = {
86
+ model,
87
+ max_tokens: options?.thinking ? 16384 : 8192,
88
+ system: systemContext || undefined,
89
+ messages: apiMessages,
90
+ };
91
+ if (tools && tools.length > 0)
92
+ body.tools = tools;
93
+ if (options?.thinking) {
94
+ body.thinking = { type: 'enabled', budget_tokens: options.thinkingBudget || 10000 };
95
+ }
60
96
  const res = await fetch(apiUrl, {
61
97
  method: 'POST',
62
98
  headers: {
@@ -64,46 +100,142 @@ async function callAnthropic(apiKey, apiUrl, model, systemContext, messages) {
64
100
  'x-api-key': apiKey,
65
101
  'anthropic-version': '2023-06-01',
66
102
  },
67
- body: JSON.stringify({
68
- model,
69
- max_tokens: 8192,
70
- system: systemContext || undefined,
71
- messages: messages.map(m => ({ role: m.role, content: m.content })),
72
- }),
103
+ body: JSON.stringify(body),
73
104
  });
74
105
  if (!res.ok) {
75
106
  const err = await res.json().catch(() => ({ error: { message: `HTTP ${res.status}` } }));
76
107
  throw new Error(err.error?.message || `Anthropic error: ${res.status}`);
77
108
  }
78
109
  const data = await res.json();
79
- const text = (data.content || []).filter((b) => b.type === 'text').map((b) => b.text).join('');
110
+ const contentBlocks = data.content || [];
111
+ const text = contentBlocks.filter((b) => b.type === 'text').map((b) => b.text).join('');
112
+ const thinkingText = contentBlocks.filter((b) => b.type === 'thinking').map((b) => b.thinking).join('');
113
+ const toolUseBlocks = contentBlocks.filter((b) => b.type === 'tool_use');
80
114
  const u = data.usage || {};
81
- return { content: text, model: data.model, usage: { input_tokens: u.input_tokens || 0, output_tokens: u.output_tokens || 0 } };
115
+ const result = {
116
+ content: text,
117
+ thinking: thinkingText || undefined,
118
+ model: data.model,
119
+ usage: { input_tokens: u.input_tokens || 0, output_tokens: u.output_tokens || 0 },
120
+ stop_reason: data.stop_reason,
121
+ };
122
+ if (toolUseBlocks.length > 0) {
123
+ result.tool_calls = toolUseBlocks.map((b) => ({
124
+ id: b.id,
125
+ name: b.name,
126
+ arguments: b.input || {},
127
+ }));
128
+ }
129
+ return result;
82
130
  }
83
131
  /** OpenAI-compatible Chat Completions API
84
- * Works with: OpenAI, Mistral, xAI, DeepSeek, Groq, Together, Fireworks, Perplexity
132
+ * Works with: OpenAI, Mistral, xAI, DeepSeek, Groq, Together, Fireworks, Perplexity, Ollama, OpenClaw
85
133
  */
86
- async function callOpenAICompat(apiKey, apiUrl, model, systemContext, messages) {
134
+ async function callOpenAICompat(apiKey, apiUrl, model, systemContext, messages, tools) {
87
135
  const apiMessages = [];
88
136
  if (systemContext)
89
137
  apiMessages.push({ role: 'system', content: systemContext });
90
138
  apiMessages.push(...messages.map(m => ({ role: m.role, content: m.content })));
139
+ const body = { model, max_tokens: 8192, messages: apiMessages };
140
+ if (tools && tools.length > 0) {
141
+ body.tools = tools.map(t => ({
142
+ type: 'function',
143
+ function: { name: t.name, description: t.description, parameters: t.input_schema },
144
+ }));
145
+ }
146
+ // Local providers (Ollama, OpenClaw) may not need auth headers
147
+ const headers = { 'Content-Type': 'application/json' };
148
+ if (apiKey && apiKey !== 'local') {
149
+ headers['Authorization'] = `Bearer ${apiKey}`;
150
+ }
91
151
  const res = await fetch(apiUrl, {
92
152
  method: 'POST',
93
- headers: {
94
- 'Content-Type': 'application/json',
95
- 'Authorization': `Bearer ${apiKey}`,
96
- },
97
- body: JSON.stringify({ model, max_tokens: 8192, messages: apiMessages }),
153
+ headers,
154
+ body: JSON.stringify(body),
155
+ signal: AbortSignal.timeout(300_000), // 5 min timeout for local models
98
156
  });
99
157
  if (!res.ok) {
100
158
  const err = await res.json().catch(() => ({ error: { message: `HTTP ${res.status}` } }));
101
159
  throw new Error(err.error?.message || `API error: ${res.status}`);
102
160
  }
103
161
  const data = await res.json();
104
- const content = data.choices?.[0]?.message?.content || '';
162
+ const choice = data.choices?.[0] || {};
163
+ let content = choice.message?.content || '';
105
164
  const u = data.usage || {};
106
- return { content, model: data.model || model, usage: { input_tokens: u.prompt_tokens || 0, output_tokens: u.completion_tokens || 0 } };
165
+ const result = {
166
+ content,
167
+ model: data.model || model,
168
+ usage: { input_tokens: u.prompt_tokens || 0, output_tokens: u.completion_tokens || 0 },
169
+ stop_reason: choice.finish_reason,
170
+ };
171
+ // Standard tool_calls from the API
172
+ if (choice.message?.tool_calls && choice.message.tool_calls.length > 0) {
173
+ result.tool_calls = choice.message.tool_calls.map((tc) => ({
174
+ id: tc.id,
175
+ name: tc.function.name,
176
+ arguments: typeof tc.function.arguments === 'string'
177
+ ? JSON.parse(tc.function.arguments)
178
+ : tc.function.arguments || {},
179
+ }));
180
+ }
181
+ // Fallback: Small local models (7B) sometimes emit tool calls as raw JSON in content
182
+ // instead of structured tool_calls. Parse these so tools still work with Ollama.
183
+ if (!result.tool_calls && content && tools && tools.length > 0) {
184
+ const toolNames = tools.map(t => t.function?.name || t.name).filter(Boolean);
185
+ const parsed = tryParseInlineToolCalls(content, toolNames);
186
+ if (parsed.length > 0) {
187
+ result.tool_calls = parsed;
188
+ // Remove the raw JSON from the displayed content
189
+ result.content = content.replace(/```(?:json)?\s*\{[\s\S]*?\}\s*```/g, '').replace(/\{[\s\S]*?"name"\s*:\s*"[a-z_]+[\s\S]*?\}/g, '').trim();
190
+ }
191
+ }
192
+ return result;
193
+ }
194
+ /** Try to parse tool calls that local models emit as raw JSON in their text output */
195
+ function tryParseInlineToolCalls(content, knownTools) {
196
+ const calls = [];
197
+ // Pattern 1: JSON in code blocks ```json { "name": "tool_name", ... } ```
198
+ const codeBlockPattern = /```(?:json)?\s*(\{[\s\S]*?\})\s*```/g;
199
+ let match;
200
+ while ((match = codeBlockPattern.exec(content)) !== null) {
201
+ const parsed = tryParseToolJson(match[1], knownTools);
202
+ if (parsed)
203
+ calls.push(parsed);
204
+ }
205
+ if (calls.length > 0)
206
+ return calls;
207
+ // Pattern 2: Raw JSON objects with "name" field matching known tools
208
+ const jsonPattern = /\{[^{}]*"name"\s*:\s*"([a-z_]+)"[^{}]*\}/g;
209
+ while ((match = jsonPattern.exec(content)) !== null) {
210
+ if (knownTools.includes(match[1])) {
211
+ const parsed = tryParseToolJson(match[0], knownTools);
212
+ if (parsed)
213
+ calls.push(parsed);
214
+ }
215
+ }
216
+ return calls;
217
+ }
218
+ function tryParseToolJson(json, knownTools) {
219
+ try {
220
+ const obj = JSON.parse(json);
221
+ const name = obj.name || obj.function?.name;
222
+ if (!name || !knownTools.includes(name))
223
+ return null;
224
+ const args = obj.arguments || obj.parameters || obj.input || obj;
225
+ // Remove meta fields to get clean arguments
226
+ const cleanArgs = { ...args };
227
+ delete cleanArgs.name;
228
+ delete cleanArgs.function;
229
+ delete cleanArgs.type;
230
+ return {
231
+ id: `inline_${Date.now()}_${Math.random().toString(36).slice(2, 6)}`,
232
+ name,
233
+ arguments: cleanArgs,
234
+ };
235
+ }
236
+ catch {
237
+ return null;
238
+ }
107
239
  }
108
240
  /** Google Gemini API */
109
241
  async function callGemini(apiKey, apiUrl, model, systemContext, messages) {
@@ -155,123 +287,381 @@ async function callCohere(apiKey, apiUrl, model, systemContext, messages) {
155
287
  const u = data.usage?.tokens || {};
156
288
  return { content, model, usage: { input_tokens: u.input_tokens || 0, output_tokens: u.output_tokens || 0 } };
157
289
  }
290
+ /** Streaming provider call — tokens appear progressively in terminal */
291
+ async function callProviderStreaming(provider, apiKey, model, systemContext, messages, tools, options) {
292
+ const p = getProvider(provider);
293
+ let state;
294
+ if (p.apiStyle === 'anthropic') {
295
+ state = await streamAnthropicResponse(apiKey, p.apiUrl, model, systemContext, messages.map(m => ({ role: m.role, content: m.content })), tools, { thinking: options?.thinking, thinkingBudget: options?.thinkingBudget });
296
+ }
297
+ else {
298
+ state = await streamOpenAIResponse(apiKey, p.apiUrl, model, systemContext, messages.map(m => ({ role: m.role, content: m.content })), tools);
299
+ }
300
+ const result = {
301
+ content: state.content,
302
+ thinking: state.thinking || undefined,
303
+ model: state.model || model,
304
+ usage: state.usage,
305
+ stop_reason: state.stopReason,
306
+ };
307
+ if (state.toolCalls.length > 0) {
308
+ result.tool_calls = state.toolCalls
309
+ .filter(tc => tc.name)
310
+ .map(tc => {
311
+ let args = {};
312
+ if (tc.partialJson) {
313
+ try {
314
+ args = JSON.parse(tc.partialJson);
315
+ }
316
+ catch { /* malformed JSON from stream — use empty args */ }
317
+ }
318
+ return {
319
+ id: tc.id || `stream_${Date.now()}_${Math.random().toString(36).slice(2, 6)}`,
320
+ name: tc.name,
321
+ arguments: args,
322
+ };
323
+ });
324
+ }
325
+ // Fallback: if local model streamed tool calls as raw JSON in content,
326
+ // try to parse them (won't fix the displayed output, but tools will work)
327
+ if (!result.tool_calls && result.content && tools && tools.length > 0) {
328
+ const toolNames = tools.map(t => t.name);
329
+ const parsed = tryParseInlineToolCalls(result.content, toolNames);
330
+ if (parsed.length > 0) {
331
+ result.tool_calls = parsed;
332
+ result.content = result.content
333
+ .replace(/```(?:json)?\s*\{[\s\S]*?\}\s*```/g, '')
334
+ .replace(/\{[\s\S]*?"name"\s*:\s*"[a-z_]+[\s\S]*?\}/g, '')
335
+ .trim();
336
+ }
337
+ }
338
+ return result;
339
+ }
340
+ /** Detect if a message is casual conversation that doesn't need tools */
341
+ function isCasualMessage(message) {
342
+ const lower = message.toLowerCase().trim();
343
+ // Very short messages are usually conversational
344
+ if (lower.length < 20 && !/\b(fix|create|build|run|deploy|install|delete|remove|write|edit|read|find|search|open|show|list|git|npm|pip|cargo)\b/.test(lower)) {
345
+ return true;
346
+ }
347
+ // Greetings and chitchat
348
+ const casualPatterns = [
349
+ /^(hey|hi|hello|yo|sup|what's up|whats up|howdy|hola)\b/,
350
+ /^(how are you|how's it going|what's good|how do you do)\b/,
351
+ /^(thanks|thank you|thx|ty|cool|nice|great|awesome|perfect|ok|okay|sure|got it|understood)\b/,
352
+ /^(do you|can you|are you|what are you|who are you|what is|what's your)\b/,
353
+ /^(tell me about|explain|what do you think|how does|why does|why is|what if)\b/,
354
+ /^(good morning|good night|good evening|good afternoon|gm|gn)\b/,
355
+ /^(bye|goodbye|see you|later|peace|quit|exit)\b/,
356
+ /^(yes|no|maybe|probably|nah|nope|yep|yeah)\b/,
357
+ /^(lol|lmao|haha|bruh|wow|damn|dang|omg|wtf)\b/,
358
+ /\?$/, // Questions are usually conversational unless they contain action words
359
+ ];
360
+ // If it matches a casual pattern AND doesn't contain action words, it's casual
361
+ const isCasualPattern = casualPatterns.some(p => p.test(lower));
362
+ const hasActionWords = /\b(fix|create|build|run|deploy|install|delete|remove|write|edit|make|generate|scaffold|refactor|update|add|implement|set up|configure|debug|test)\b/.test(lower);
363
+ if (isCasualPattern && !hasActionWords)
364
+ return true;
365
+ // Questions that end with ? and don't have action words
366
+ if (lower.endsWith('?') && !hasActionWords && lower.length < 100)
367
+ return true;
368
+ return false;
369
+ }
370
+ /** Core tools that small local models can handle without getting confused */
371
+ const CORE_TOOLS = new Set([
372
+ 'read_file', 'write_file', 'list_directory', 'bash',
373
+ 'git_status', 'git_diff', 'git_commit', 'git_log',
374
+ 'grep', 'web_search',
375
+ ]);
376
+ /** Detect if a message describes a complex multi-step task */
377
+ function isComplexTask(message) {
378
+ const lower = message.toLowerCase();
379
+ const complexSignals = [
380
+ /\b(refactor|migrate|convert|rewrite|restructure|reorganize)\b/,
381
+ /\b(all files|every file|across the|codebase|entire project)\b/,
382
+ /\b(first|then|after that|finally|step \d|phase \d)\b/,
383
+ /\b(add (?:a |an )?new (?:feature|system|module|layer))\b/,
384
+ /\b(build and deploy|ci\s*\/?\s*cd|set up|configure)\b.*\b(pipeline|workflow|infrastructure)\b/,
385
+ ];
386
+ const signalCount = complexSignals.filter(r => r.test(lower)).length;
387
+ const isLong = message.length > 500;
388
+ return signalCount >= 2 || (signalCount >= 1 && isLong);
389
+ }
158
390
  /** Universal provider call — routes to the right API format */
159
- async function callProvider(provider, apiKey, model, systemContext, messages) {
391
+ async function callProvider(provider, apiKey, model, systemContext, messages, tools, options) {
160
392
  const p = getProvider(provider);
161
- switch (p.apiStyle) {
162
- case 'anthropic': return callAnthropic(apiKey, p.apiUrl, model, systemContext, messages);
163
- case 'google': return callGemini(apiKey, p.apiUrl, model, systemContext, messages);
164
- case 'cohere': return callCohere(apiKey, p.apiUrl, model, systemContext, messages);
165
- case 'openai': return callOpenAICompat(apiKey, p.apiUrl, model, systemContext, messages);
166
- default: return callOpenAICompat(apiKey, p.apiUrl, model, systemContext, messages);
393
+ try {
394
+ switch (p.apiStyle) {
395
+ case 'anthropic': return await callAnthropic(apiKey, p.apiUrl, model, systemContext, messages, tools, options);
396
+ case 'google': return await callGemini(apiKey, p.apiUrl, model, systemContext, messages);
397
+ case 'cohere': return await callCohere(apiKey, p.apiUrl, model, systemContext, messages);
398
+ case 'openai': return await callOpenAICompat(apiKey, p.apiUrl, model, systemContext, messages, tools);
399
+ default: return await callOpenAICompat(apiKey, p.apiUrl, model, systemContext, messages, tools);
400
+ }
401
+ }
402
+ catch (err) {
403
+ // Auto-retry with fallback model for local providers
404
+ if (isLocalProvider(provider) && model !== p.fastModel) {
405
+ const errMsg = err instanceof Error ? err.message : String(err);
406
+ // Only retry on model-specific errors, not connection errors
407
+ if (errMsg.includes('not found') || errMsg.includes('does not exist') || errMsg.includes('model')) {
408
+ printWarn(`Model ${model} unavailable, falling back to ${p.fastModel}...`);
409
+ switch (p.apiStyle) {
410
+ case 'openai': return callOpenAICompat(apiKey, p.apiUrl, p.fastModel, systemContext, messages, tools);
411
+ default: return callOpenAICompat(apiKey, p.apiUrl, p.fastModel, systemContext, messages, tools);
412
+ }
413
+ }
414
+ }
415
+ throw err;
167
416
  }
168
417
  }
169
418
  // ── Main agent loop ──
170
419
  export async function runAgent(message, options = {}) {
171
- const byokMode = isByokEnabled();
172
- const apiKey = byokMode ? getByokKey() : getApiKey();
173
- if (!apiKey) {
174
- throw new Error(byokMode
175
- ? 'No LLM API key configured. Run `kbot byok` to set up.'
176
- : 'No API key configured. Run `kbot auth` to set up.');
177
- }
178
- // Step 1: Local-first
179
- const localResult = await tryLocalFirst(message);
180
- if (localResult !== null) {
181
- addTurn({ role: 'user', content: message });
182
- addTurn({ role: 'assistant', content: localResult });
183
- printInfo('(handled locally — 0 tokens used)');
184
- return { content: localResult, agent: 'local', model: 'none', toolCalls: 0 };
420
+ const apiKey = getByokKey();
421
+ const byokProvider = getByokProvider();
422
+ const isLocal = byokProvider ? isLocalProvider(byokProvider) : false;
423
+ if (!apiKey && !isLocal) {
424
+ throw new Error('No LLM API key configured. Run `kbot byok` to set up, or `kbot ollama` for local models.');
425
+ }
426
+ // Step 0a: Warm Ollama model cache if using local provider
427
+ if (isLocal && byokProvider === 'ollama') {
428
+ warmOllamaModelCache().catch(() => { }); // non-blocking
429
+ }
430
+ // Step 0: Parse multimodal content (images in message)
431
+ const parsed = options.multimodal || parseMultimodalMessage(message);
432
+ if (parsed.isMultimodal) {
433
+ printInfo(`(${parsed.imageCount} image${parsed.imageCount > 1 ? 's' : ''} attached)`);
434
+ }
435
+ // Step 1: Local-first (skip if multimodal — needs AI to interpret)
436
+ if (!parsed.isMultimodal) {
437
+ const localResult = await tryLocalFirst(message);
438
+ if (localResult !== null) {
439
+ addTurn({ role: 'user', content: message });
440
+ addTurn({ role: 'assistant', content: localResult });
441
+ printInfo('(handled locally — 0 tokens used)');
442
+ return { content: localResult, agent: 'local', model: 'none', toolCalls: 0 };
443
+ }
444
+ }
445
+ // Step 1.5: Complexity detection — auto-plan complex tasks
446
+ if (isComplexTask(message) && !message.startsWith('/plan')) {
447
+ printInfo('Complex task detected. Using autonomous planner...');
448
+ try {
449
+ const { autonomousExecute, formatPlanSummary } = await import('./planner.js');
450
+ const plan = await autonomousExecute(message, {
451
+ ...options,
452
+ agent: options.agent || 'coder',
453
+ }, { autoApprove: false, onApproval: async () => true });
454
+ const summary = formatPlanSummary(plan);
455
+ addTurn({ role: 'user', content: message });
456
+ addTurn({ role: 'assistant', content: summary });
457
+ return {
458
+ content: summary,
459
+ agent: options.agent || 'coder',
460
+ model: 'planner',
461
+ toolCalls: plan.steps.filter(s => s.status === 'done').length,
462
+ };
463
+ }
464
+ catch {
465
+ // Planner failed — fall through to regular agent loop
466
+ printWarn('Planner failed, falling back to direct execution...');
467
+ }
185
468
  }
186
469
  const tier = options.tier || 'free';
187
- const tools = getToolDefinitionsForApi(tier);
188
- // Step 2: Build context
470
+ const allTools = getToolDefinitionsForApi(tier);
471
+ const casual = isCasualMessage(message);
472
+ // Smart tool filtering:
473
+ // 1. Casual messages → no tools (just chat)
474
+ // 2. Local small models → core tools only (10 instead of 60+, prevents confusion)
475
+ // 3. Everything else → full tool set
476
+ let tools;
477
+ if (casual) {
478
+ tools = []; // No tools for casual conversation
479
+ }
480
+ else if (isLocal) {
481
+ tools = allTools.filter(t => CORE_TOOLS.has(t.name));
482
+ }
483
+ else {
484
+ tools = allTools;
485
+ }
486
+ // Step 2: Build context (cached — only rebuilt when inputs change)
487
+ const matrixPrompt = options.agent ? getMatrixSystemPrompt(options.agent) : null;
189
488
  const contextSnippet = options.context ? formatContextForPrompt(options.context) : '';
190
489
  const memorySnippet = getMemoryPrompt();
191
- const efficiencyNote = `[K:BOT Efficiency Note]
192
- You have local tools (file read/write/edit, grep, git, bash) that execute for FREE on the user's machine.
193
- Prefer local tools over asking the user to do things manually.
194
- Be thorough in your FIRST response — aim to solve the task in one shot.
195
- Only use web_search when the user explicitly asks for current information.`;
196
- const systemContext = [contextSnippet, memorySnippet, efficiencyNote].filter(Boolean).join('\n');
490
+ const learningContext = buildFullLearningContext(message, process.cwd());
491
+ // Cache key to avoid rebuilding identical system prompts
492
+ const cacheKey = `${options.agent || ''}:${contextSnippet.length}:${memorySnippet.length}`;
493
+ let systemContext;
494
+ if (_cachedSystemPromptKey === cacheKey && _cachedSystemPrompt) {
495
+ // Reuse cached base prompt, only update learning context (changes per message)
496
+ systemContext = learningContext ? `${learningContext}\n\n${_cachedSystemPrompt}` : _cachedSystemPrompt;
497
+ }
498
+ else {
499
+ const preContext = `You are K:BOT, an AI that lives in the user's terminal. Talk naturally — be direct, concise, and conversational. You're like a skilled colleague, not a corporate chatbot.
500
+
501
+ Conversation style:
502
+ - Be casual and natural. Use short sentences. Don't over-explain.
503
+ - When the user asks a question, answer it directly. Lead with the answer, not the reasoning.
504
+ - When chatting casually, just chat. Not everything needs tools or code.
505
+ - If the user says "hey" or "what's up", respond naturally — don't launch into capabilities.
506
+ - Match the user's energy. Short question → short answer. Detailed question → detailed response.
507
+ - Never say "I'd be happy to help with that" or "Certainly!" or "Great question!" — just do it.
508
+ - Don't repeat back what the user said. Don't restate the problem before solving it.
509
+
510
+ How you work with tools:
511
+ - When asked to create, fix, or build something — do it directly with your tools. Don't describe what you would do, just do it.
512
+ - If something fails, read the error and try a different approach. Don't give up.
513
+ - Make reasonable decisions autonomously — pick good defaults.
514
+ - After completing a task, briefly say what you did and where the output is.
515
+ - You run commands yourself. You never tell the user to run something.
516
+
517
+ Always quote file paths that contain spaces. Never reference internal system names.`;
518
+ const matrixPrefix = matrixPrompt ? `[Agent Persona]\n${matrixPrompt}\n\nIMPORTANT: Stay in character as defined above. Your responses should reflect this agent's expertise and perspective.\n` : '';
519
+ _cachedSystemPrompt = [matrixPrefix, contextSnippet, memorySnippet, preContext].filter(Boolean).join('\n');
520
+ _cachedSystemPromptKey = cacheKey;
521
+ systemContext = learningContext ? `${learningContext}\n\n${_cachedSystemPrompt}` : _cachedSystemPrompt;
522
+ }
197
523
  let toolCallCount = 0;
198
524
  let lastResponse = null;
199
- let pendingToolResults = [];
525
+ const toolSequenceLog = [];
526
+ const originalMessage = message;
527
+ let cumulativeCostUsd = 0;
528
+ // Loop messages track the full conversation within a multi-tool execution.
529
+ // This includes assistant responses (with tool-use reasoning) and tool results,
530
+ // so the AI maintains context across tool iterations.
531
+ const loopMessages = [];
200
532
  for (let i = 0; i < MAX_TOOL_LOOPS; i++) {
201
- const spinner = createSpinner(i === 0 ? 'Thinking...' : `Running tools (${toolCallCount})...`);
202
- spinner.start();
533
+ // Cost ceiling stop burning money on runaway loops
534
+ if (cumulativeCostUsd > MAX_COST_CEILING) {
535
+ printWarn(`Cost ceiling reached ($${cumulativeCostUsd.toFixed(2)} > $${MAX_COST_CEILING}). Stopping tool loop.`);
536
+ break;
537
+ }
538
+ // Don't use spinner when streaming (conflicts with stdout)
539
+ const useSpinner = !options.stream;
540
+ const spinner = useSpinner ? createSpinner(i === 0 ? 'Thinking...' : `Running tools (${toolCallCount})...`) : null;
541
+ spinner?.start();
203
542
  try {
204
- if (byokMode) {
205
- // ── BYOK: Call provider directly ──
206
- const provider = getByokProvider();
207
- const speed = options.model === 'haiku' || options.model === 'fast' ? 'fast' : 'default';
208
- const model = getProviderModel(provider, speed);
209
- const messages = [
210
- ...getPreviousMessages(),
211
- { role: 'user', content: i === 0 ? (systemContext ? `${systemContext}\n\n${message}` : message) : message },
212
- ];
213
- if (pendingToolResults.length > 0) {
214
- for (const tr of pendingToolResults) {
215
- messages.push({ role: 'user', content: `[Tool Result for ${tr.tool_call_id}]: ${tr.result}` });
216
- }
217
- pendingToolResults = [];
218
- }
219
- spinner.stop();
220
- const result = await callProvider(provider, apiKey, model, systemContext, messages);
543
+ // ── BYOK: Call provider directly with tool-use support ──
544
+ const provider = byokProvider || 'anthropic';
545
+ const speed = options.model === 'haiku' || options.model === 'fast' ? 'fast' : 'default';
546
+ const model = getProviderModel(provider, speed, originalMessage);
547
+ const byokTools = tools.map(t => ({
548
+ name: t.name,
549
+ description: t.description,
550
+ input_schema: t.input_schema,
551
+ }));
552
+ // Build messages: session history + original user message + loop context
553
+ const messages = [
554
+ ...getPreviousMessages(),
555
+ { role: 'user', content: message },
556
+ ...loopMessages,
557
+ ];
558
+ spinner?.stop();
559
+ // Use streaming if requested and provider supports it
560
+ // Disable streaming for local models when tools are active — local models
561
+ // often emit tool calls as raw JSON text, which can't be cleaned up after streaming
562
+ const p = getProvider(provider);
563
+ const canStream = options.stream
564
+ && p.apiStyle !== 'google'
565
+ && p.apiStyle !== 'cohere'
566
+ && !(isLocal && byokTools.length > 0); // Don't stream local + tools (inline tool parsing needs full response)
567
+ const result = canStream
568
+ ? await callProviderStreaming(provider, apiKey || 'local', model, systemContext, messages, byokTools, {
569
+ thinking: options.thinking,
570
+ thinkingBudget: options.thinkingBudget,
571
+ })
572
+ : await callProvider(provider, apiKey || 'local', model, systemContext, messages, byokTools, {
573
+ multimodal: i === 0 ? parsed : undefined,
574
+ thinking: options.thinking,
575
+ thinkingBudget: options.thinkingBudget,
576
+ });
577
+ const iterationCost = estimateCost(provider, result.usage.input_tokens, result.usage.output_tokens);
578
+ cumulativeCostUsd += iterationCost;
579
+ if (result.tool_calls && result.tool_calls.length > 0) {
221
580
  lastResponse = {
222
- type: 'text',
581
+ type: 'tool_calls',
223
582
  agent: options.agent || 'kernel',
224
583
  model: result.model,
225
584
  content: result.content,
585
+ tool_calls: result.tool_calls.map(tc => ({
586
+ id: tc.id,
587
+ name: tc.name,
588
+ arguments: tc.arguments,
589
+ })),
226
590
  usage: {
227
591
  input_tokens: result.usage.input_tokens,
228
592
  output_tokens: result.usage.output_tokens,
229
- cost_usd: estimateCost(provider, result.usage.input_tokens, result.usage.output_tokens),
593
+ cost_usd: iterationCost,
230
594
  },
231
595
  };
232
596
  }
233
597
  else {
234
- // ── Kernel API: Route through Matrix ──
235
- const apiBase = getApiBase();
236
- const body = {
237
- message: i === 0 ? (systemContext ? `${systemContext}\n\n${message}` : message) : message,
238
- mode: 'json',
239
- max_tokens: 8192,
240
- previous_messages: getPreviousMessages(),
241
- tools: tools.length > 0 ? tools : undefined,
242
- };
243
- if (options.agent && options.agent !== 'auto')
244
- body.agent = options.agent;
245
- if (pendingToolResults.length > 0) {
246
- body.tool_results = pendingToolResults;
247
- pendingToolResults = [];
248
- }
249
- const res = await fetch(`${apiBase}/chat`, {
250
- method: 'POST',
251
- headers: {
252
- 'Content-Type': 'application/json',
253
- Authorization: `Bearer ${apiKey}`,
254
- 'X-Kbot-Version': KBOT_VERSION,
598
+ lastResponse = {
599
+ type: 'text',
600
+ agent: options.agent || 'kernel',
601
+ model: result.model,
602
+ content: result.content,
603
+ streamed: canStream, // flag to skip re-printing
604
+ usage: {
605
+ input_tokens: result.usage.input_tokens,
606
+ output_tokens: result.usage.output_tokens,
607
+ cost_usd: iterationCost,
255
608
  },
256
- body: JSON.stringify(body),
257
- });
258
- spinner.stop();
259
- if (!res.ok) {
260
- const err = await res.json().catch(() => ({ error: `HTTP ${res.status}` }));
261
- throw new Error(err.error || `API error: ${res.status}`);
262
- }
263
- lastResponse = await res.json();
609
+ };
264
610
  }
265
611
  // Text response → done
266
612
  if (lastResponse.type === 'text' || !lastResponse.tool_calls || lastResponse.tool_calls.length === 0) {
267
613
  const content = lastResponse.content || '';
268
- addTurn({ role: 'user', content: message });
614
+ addTurn({ role: 'user', content: originalMessage });
269
615
  addTurn({ role: 'assistant', content });
616
+ // ── Recursive Learning: record what worked (async — non-blocking) ──
617
+ const totalTokens = lastResponse.usage
618
+ ? (lastResponse.usage.input_tokens || 0) + (lastResponse.usage.output_tokens || 0)
619
+ : 0;
620
+ // Run all learning asynchronously to avoid blocking the response
621
+ setImmediate(() => {
622
+ try {
623
+ // Record pattern if tools were used successfully
624
+ if (toolCallCount > 0 && toolSequenceLog.length > 0) {
625
+ const patternHint = findPattern(originalMessage);
626
+ const savedTokens = patternHint ? patternHint.avgTokensSaved : 0;
627
+ recordPattern(originalMessage, toolSequenceLog, savedTokens);
628
+ }
629
+ // Cache solution for reuse (only meaningful responses)
630
+ if (content.length > 50 && toolCallCount <= 5) {
631
+ cacheSolution(originalMessage, content.slice(0, 2000));
632
+ }
633
+ // Update user profile
634
+ updateProfile({
635
+ tokens: totalTokens,
636
+ tokensSaved: findPattern(originalMessage)?.avgTokensSaved || 0,
637
+ agent: lastResponse.agent || 'kernel',
638
+ taskType: classifyTask(originalMessage),
639
+ techTerms: extractKeywords(originalMessage),
640
+ });
641
+ // Deep learning — extract knowledge, detect corrections, update project memory
642
+ learnFromExchange(originalMessage, content, toolSequenceLog, process.cwd());
643
+ // Track project context
644
+ if (toolSequenceLog.length > 0) {
645
+ updateProjectMemory(process.cwd(), {
646
+ stack: extractKeywords(originalMessage),
647
+ });
648
+ }
649
+ // Auto self-training trigger
650
+ if (shouldAutoTrain()) {
651
+ try {
652
+ selfTrain();
653
+ }
654
+ catch { /* silent */ }
655
+ }
656
+ }
657
+ catch { /* learning failures are non-critical */ }
658
+ });
270
659
  return {
271
660
  content,
272
661
  agent: lastResponse.agent || 'kernel',
273
662
  model: lastResponse.model || 'unknown',
274
663
  toolCalls: toolCallCount,
664
+ streamed: lastResponse.streamed || false,
275
665
  usage: lastResponse.usage,
276
666
  };
277
667
  }
@@ -280,16 +670,43 @@ Only use web_search when the user explicitly asks for current information.`;
280
670
  const results = [];
281
671
  for (const call of toolCalls) {
282
672
  toolCallCount++;
673
+ toolSequenceLog.push(call.name);
283
674
  printToolCall(call.name, call.arguments || {});
675
+ // Permission check — confirm destructive operations
676
+ const permitted = await checkPermission(call.name, call.arguments || {});
677
+ if (!permitted) {
678
+ results.push({ tool_call_id: call.id, result: 'Denied by user — operation skipped.', error: true });
679
+ printToolResult('Denied by user', true);
680
+ continue;
681
+ }
682
+ // Pre-tool hook
683
+ const preHook = runPreToolHook(call.name, call.arguments || {}, options.agent || 'kernel');
684
+ if (preHook.blocked) {
685
+ results.push({ tool_call_id: call.id, result: `Blocked by hook: ${preHook.blockReason}`, error: true });
686
+ printToolResult(`Blocked by hook: ${preHook.blockReason}`, true);
687
+ continue;
688
+ }
284
689
  const result = await executeTool(call);
285
690
  results.push(result);
286
691
  printToolResult(result.result, result.error);
692
+ // Post-tool hook
693
+ runPostToolHook(call.name, call.arguments || {}, result.result, options.agent || 'kernel');
287
694
  }
288
- pendingToolResults = results;
289
- message = `Here are the tool results:\n${results.map(r => `[${r.tool_call_id}]: ${r.result}`).join('\n\n')}\n\nContinue based on these results.`;
695
+ // ── Maintain conversation context across tool iterations ──
696
+ // 1. Include the assistant's response (its reasoning + which tools it chose)
697
+ const assistantSummary = lastResponse.content
698
+ ? lastResponse.content
699
+ : `Using tools: ${toolCalls.map(tc => tc.name).join(', ')}`;
700
+ loopMessages.push({ role: 'assistant', content: assistantSummary });
701
+ // 2. Include tool results so the AI sees what happened
702
+ const toolResultSummary = results.map(r => {
703
+ const status = r.error ? '[ERROR] ' : '';
704
+ return `${r.tool_call_id} (${toolCalls.find(tc => tc.id === r.tool_call_id)?.name || 'unknown'}): ${status}${r.result}`;
705
+ }).join('\n\n');
706
+ loopMessages.push({ role: 'user', content: `Tool results:\n${toolResultSummary}` });
290
707
  }
291
708
  catch (err) {
292
- spinner.stop();
709
+ spinner?.stop();
293
710
  throw err;
294
711
  }
295
712
  }
@@ -306,14 +723,54 @@ Only use web_search when the user explicitly asks for current information.`;
306
723
  export async function runAndPrint(message, options = {}) {
307
724
  try {
308
725
  const response = await runAgent(message, options);
309
- printResponse(response.agent, response.content);
726
+ // Skip re-printing if content was already streamed to stdout
727
+ if (!response.streamed) {
728
+ printResponse(response.agent, response.content);
729
+ }
730
+ // Usage footer — subtle, one line (stderr — status, not content)
310
731
  if (response.usage) {
311
- const { input_tokens, output_tokens, cost_usd } = response.usage;
312
- process.stdout.write(` \x1b[2m${response.agent} · ${response.model} · ${input_tokens + output_tokens} tokens · $${cost_usd.toFixed(4)}\x1b[0m\n`);
732
+ const tokens = response.usage.input_tokens + response.usage.output_tokens;
733
+ const cost = response.usage.cost_usd === 0 ? 'free' : `$${response.usage.cost_usd.toFixed(4)}`;
734
+ process.stderr.write(`\n \x1b[2m${tokens} tokens · ${cost}\x1b[0m\n`);
313
735
  }
314
736
  }
315
737
  catch (err) {
316
- printError(err instanceof Error ? err.message : String(err));
738
+ const errMsg = err instanceof Error ? err.message : String(err);
739
+ // Ollama connection errors — friendly, actionable
740
+ if (errMsg.includes('fetch failed') || errMsg.includes('ECONNREFUSED')) {
741
+ const config = await import('./auth.js').then(m => m.loadConfig());
742
+ if (config?.byok_provider === 'ollama') {
743
+ printError('Ollama isn\'t running.');
744
+ printInfo('Open the Ollama app or run: ollama serve');
745
+ return;
746
+ }
747
+ if (config?.byok_provider === 'openclaw') {
748
+ printError('OpenClaw gateway isn\'t running.');
749
+ printInfo('Start it: openclaw-cmd start');
750
+ return;
751
+ }
752
+ printError('Can\'t reach the AI provider.');
753
+ printInfo('Check your internet connection and try again.');
754
+ return;
755
+ }
756
+ // Model not found
757
+ if (errMsg.includes('model') && (errMsg.includes('not found') || errMsg.includes('does not exist'))) {
758
+ printError('That model isn\'t installed yet.');
759
+ printInfo('Download it: ollama pull <model-name>');
760
+ return;
761
+ }
762
+ // Rate limiting
763
+ if (errMsg.includes('rate') || errMsg.includes('429') || errMsg.includes('too many')) {
764
+ printError('Too many requests. Wait a moment and try again.');
765
+ return;
766
+ }
767
+ // Auth errors
768
+ if (errMsg.includes('401') || errMsg.includes('403') || errMsg.includes('invalid') && errMsg.includes('key')) {
769
+ printError('API key issue. Your key may be expired or invalid.');
770
+ printInfo('Update it: kbot auth');
771
+ return;
772
+ }
773
+ printError(errMsg);
317
774
  process.exit(1);
318
775
  }
319
776
  }