groove-dev 0.27.150 → 0.27.152

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@groove-dev/cli",
3
- "version": "0.27.150",
3
+ "version": "0.27.152",
4
4
  "description": "GROOVE CLI — manage AI coding agents from your terminal",
5
5
  "license": "FSL-1.1-Apache-2.0",
6
6
  "type": "module",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@groove-dev/daemon",
3
- "version": "0.27.150",
3
+ "version": "0.27.152",
4
4
  "description": "GROOVE daemon — agent orchestration engine",
5
5
  "license": "FSL-1.1-Apache-2.0",
6
6
  "type": "module",
@@ -10,6 +10,11 @@ import { existsSync, readFileSync, writeFileSync, mkdirSync, unlinkSync } from '
10
10
  import { resolve, dirname } from 'path';
11
11
  import { TOOL_DEFINITIONS, ToolExecutor } from './tool-executor.js';
12
12
 
13
+ function stripThinkTags(text) {
14
+ if (!text) return text;
15
+ return text.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
16
+ }
17
+
13
18
  export class AgentLoop extends EventEmitter {
14
19
  constructor({ daemon, agent, loopConfig, logStream }) {
15
20
  super();
@@ -24,6 +29,11 @@ export class AgentLoop extends EventEmitter {
24
29
  this.idle = true;
25
30
  this.abortController = null;
26
31
 
32
+ // Tool calling mode: 'native' uses OpenAI function-calling API fields,
33
+ // 'prompt' injects tool schemas into the system prompt and parses
34
+ // <tool_call> blocks from the model's text output.
35
+ this.toolMode = 'native';
36
+
27
37
  // Metrics
28
38
  this.totalTokensIn = 0;
29
39
  this.totalTokensOut = 0;
@@ -132,7 +142,7 @@ export class AgentLoop extends EventEmitter {
132
142
  const response = await this._callApi();
133
143
  if (!response || !this.running) break;
134
144
 
135
- const { content, toolCalls, usage, finishReason } = response;
145
+ let { content, toolCalls, usage, finishReason } = response;
136
146
  consecutiveErrors = 0; // Reset on successful call
137
147
 
138
148
  // Update token tracking from API response
@@ -140,10 +150,21 @@ export class AgentLoop extends EventEmitter {
140
150
  this._updateTokens(usage);
141
151
  }
142
152
 
143
- // Append assistant message to conversation history
153
+ // Strip thinking tags from display content (keep raw in conversation history)
154
+ const displayContent = stripThinkTags(content);
155
+
156
+ // In prompt-based mode, parse tool calls from the model's text
157
+ if (this.toolMode === 'prompt' && content) {
158
+ const parsed = this._parseToolCallsFromText(content);
159
+ if (parsed.length > 0) {
160
+ toolCalls = parsed;
161
+ }
162
+ }
163
+
164
+ // Append assistant message to conversation history (raw content with thinking preserved)
144
165
  const assistantMsg = { role: 'assistant' };
145
166
  if (content) assistantMsg.content = content;
146
- if (toolCalls?.length > 0) {
167
+ if (this.toolMode === 'native' && toolCalls?.length > 0) {
147
168
  assistantMsg.tool_calls = toolCalls.map((tc) => ({
148
169
  id: tc.id,
149
170
  type: 'function',
@@ -154,17 +175,21 @@ export class AgentLoop extends EventEmitter {
154
175
 
155
176
  // No tool calls → turn complete, broadcast final text and go idle
156
177
  if (!toolCalls || toolCalls.length === 0) {
157
- if (content) {
158
- this._writeLog({ type: 'assistant', content: content.slice(0, 2000) });
178
+ if (displayContent) {
179
+ this._writeLog({ type: 'assistant', content: displayContent.slice(0, 2000) });
159
180
  }
160
- this.emit('output', { type: 'result', subtype: 'assistant', data: content || 'Turn complete', turns: this.turns });
181
+ this.emit('output', { type: 'result', subtype: 'assistant', data: displayContent || 'Turn complete', turns: this.turns });
161
182
  break;
162
183
  }
163
184
 
164
185
  // Has tool calls — broadcast text before executing tools (if model sent text + tools)
165
- if (content) {
166
- this._writeLog({ type: 'assistant', content: content.slice(0, 2000) });
167
- this.emit('output', { type: 'activity', subtype: 'assistant', data: content });
186
+ let preToolText = displayContent;
187
+ if (this.toolMode === 'prompt') {
188
+ preToolText = stripThinkTags((content || '').replace(/<tool_call>[\s\S]*?<\/tool_call>/g, ''));
189
+ }
190
+ if (preToolText) {
191
+ this._writeLog({ type: 'assistant', content: preToolText.slice(0, 2000) });
192
+ this.emit('output', { type: 'activity', subtype: 'assistant', data: preToolText });
168
193
  }
169
194
 
170
195
  // Execute each tool call
@@ -172,13 +197,29 @@ export class AgentLoop extends EventEmitter {
172
197
  if (!this.running) break;
173
198
 
174
199
  let args;
200
+ let parseError = null;
175
201
  try {
176
202
  args = JSON.parse(call.function.arguments);
177
- } catch {
203
+ } catch (e) {
204
+ parseError = e.message;
178
205
  args = {};
179
206
  }
180
207
 
181
208
  const toolName = call.function.name;
209
+
210
+ // Report malformed JSON back to the model instead of silently failing
211
+ if (parseError) {
212
+ const errMsg = `Invalid JSON in tool arguments: ${parseError}. Raw: ${call.function.arguments.slice(0, 200)}`;
213
+ this._writeLog({ type: 'tool_result', tool: toolName, success: false, output: errMsg });
214
+ this.emit('output', { type: 'activity', subtype: 'tool_result', data: [{ type: 'tool_result', name: toolName, success: false, output: errMsg }] });
215
+ if (this.toolMode === 'native') {
216
+ this.messages.push({ role: 'tool', tool_call_id: call.id, content: `Error: ${errMsg}` });
217
+ } else {
218
+ this.messages.push({ role: 'user', content: `<tool_result name="${toolName}">\nError: ${errMsg}\n</tool_result>` });
219
+ }
220
+ continue;
221
+ }
222
+
182
223
  const inputSummary = this._summarizeToolInput(toolName, args);
183
224
 
184
225
  // Log + broadcast tool invocation
@@ -210,12 +251,24 @@ export class AgentLoop extends EventEmitter {
210
251
  this.daemon.classifier.addEvent(this.agent.id, { type: 'error', text: result.error });
211
252
  }
212
253
 
213
- // Append tool result to conversation for the model
214
- this.messages.push({
215
- role: 'tool',
216
- tool_call_id: call.id,
217
- content: result.success ? (result.result || 'Done.') : `Error: ${result.error}`,
218
- });
254
+ // Append tool result to conversation cap size to protect context window
255
+ const MAX_RESULT_CHARS = 30000;
256
+ let resultContent = result.success ? (result.result || 'Done.') : `Error: ${result.error}`;
257
+ if (resultContent.length > MAX_RESULT_CHARS) {
258
+ resultContent = resultContent.slice(0, MAX_RESULT_CHARS) + '\n... (result truncated use offset/limit for large files, or pipe commands through head/tail)';
259
+ }
260
+ if (this.toolMode === 'native') {
261
+ this.messages.push({
262
+ role: 'tool',
263
+ tool_call_id: call.id,
264
+ content: resultContent,
265
+ });
266
+ } else {
267
+ this.messages.push({
268
+ role: 'user',
269
+ content: `<tool_result name="${toolName}">\n${resultContent}\n</tool_result>`,
270
+ });
271
+ }
219
272
  }
220
273
 
221
274
  // Context rotation is handled by the Rotator's 15s polling loop
@@ -236,12 +289,15 @@ export class AgentLoop extends EventEmitter {
236
289
  const body = {
237
290
  model: this.config.model,
238
291
  messages: this.messages,
239
- tools: TOOL_DEFINITIONS,
240
- tool_choice: 'auto',
241
292
  temperature: this.config.temperature ?? 0.1,
242
293
  max_tokens: this.config.maxResponseTokens || 4096,
243
294
  };
244
295
 
296
+ if (this.toolMode === 'native') {
297
+ body.tools = TOOL_DEFINITIONS;
298
+ body.tool_choice = 'auto';
299
+ }
300
+
245
301
  if (this.config.stream !== false) {
246
302
  body.stream = true;
247
303
  body.stream_options = { include_usage: true };
@@ -283,6 +339,18 @@ export class AgentLoop extends EventEmitter {
283
339
  const text = await response.text().catch(() => '');
284
340
  const errMsg = `API error ${response.status}: ${text.slice(0, 500)}`;
285
341
 
342
+ // Detect tool_choice rejection (vLLM, TGI, etc. without tool-calling flags)
343
+ // Fall back to prompt-based tool calling and retry immediately
344
+ if (response.status === 400 && this.toolMode === 'native' &&
345
+ (text.includes('tool_choice') || text.includes('tool-call-parser') || text.includes('enable-auto-tool-choice'))) {
346
+ this._writeLog({ type: 'system', event: 'tool-fallback', reason: 'Runtime rejected native tool calling — switching to prompt-based tools' });
347
+ this.toolMode = 'prompt';
348
+ this._injectToolPrompt();
349
+ delete body.tools;
350
+ delete body.tool_choice;
351
+ continue;
352
+ }
353
+
286
354
  if (response.status === 401 || response.status === 403) {
287
355
  this._writeLog({ type: 'error', text: errMsg });
288
356
  this.emit('error', { message: errMsg });
@@ -320,6 +388,10 @@ export class AgentLoop extends EventEmitter {
320
388
  let finishReason = null;
321
389
  let buffer = '';
322
390
 
391
+ // State machine for suppressing <think> blocks during streaming
392
+ let insideThink = false;
393
+ let streamBuf = '';
394
+
323
395
  const reader = response.body.getReader();
324
396
  const decoder = new TextDecoder();
325
397
 
@@ -349,10 +421,56 @@ export class AgentLoop extends EventEmitter {
349
421
  if (choice.finish_reason) finishReason = choice.finish_reason;
350
422
  const delta = choice.delta || {};
351
423
 
352
- // Stream text tokens to GUI in real-time
424
+ // reasoning_content: separate thinking field (vLLM, some OpenAI-compat servers)
425
+ // Capture for logging but don't stream to GUI
426
+ if (delta.reasoning_content || delta.reasoning) {
427
+ // Accumulate in content so it's in the conversation history
428
+ // but don't stream it to the GUI
429
+ content += delta.reasoning_content || delta.reasoning;
430
+ }
431
+
432
+ // Stream text tokens to GUI in real-time, suppressing <think> blocks
353
433
  if (delta.content) {
354
434
  content += delta.content;
355
- this.emit('output', { type: 'activity', subtype: 'stream', data: delta.content });
435
+ streamBuf += delta.content;
436
+
437
+ // Process buffer — emit non-think content, suppress think content
438
+ let safety = 0;
439
+ while (streamBuf.length > 0 && safety++ < 100) {
440
+ if (insideThink) {
441
+ const closeIdx = streamBuf.indexOf('</think>');
442
+ if (closeIdx >= 0) {
443
+ insideThink = false;
444
+ streamBuf = streamBuf.slice(closeIdx + 8);
445
+ } else {
446
+ break; // wait for more data
447
+ }
448
+ } else {
449
+ const openIdx = streamBuf.indexOf('<think>');
450
+ if (openIdx >= 0) {
451
+ const before = streamBuf.slice(0, openIdx);
452
+ if (before) {
453
+ this.emit('output', { type: 'activity', subtype: 'stream', data: before });
454
+ }
455
+ insideThink = true;
456
+ streamBuf = streamBuf.slice(openIdx + 7);
457
+ } else {
458
+ // Hold back bytes that could be the start of a <think> tag
459
+ let safeEnd = streamBuf.length;
460
+ for (let i = Math.min(6, streamBuf.length); i >= 1; i--) {
461
+ if ('<think>'.startsWith(streamBuf.slice(-i))) {
462
+ safeEnd = streamBuf.length - i;
463
+ break;
464
+ }
465
+ }
466
+ if (safeEnd > 0) {
467
+ this.emit('output', { type: 'activity', subtype: 'stream', data: streamBuf.slice(0, safeEnd) });
468
+ }
469
+ streamBuf = streamBuf.slice(safeEnd);
470
+ break;
471
+ }
472
+ }
473
+ }
356
474
  }
357
475
 
358
476
  // Accumulate tool call deltas
@@ -380,6 +498,11 @@ export class AgentLoop extends EventEmitter {
380
498
  return null;
381
499
  }
382
500
 
501
+ // Flush remaining stream buffer (e.g. unclosed <think> — treat as display content)
502
+ if (streamBuf) {
503
+ this.emit('output', { type: 'activity', subtype: 'stream', data: streamBuf });
504
+ }
505
+
383
506
  return {
384
507
  content: content || null,
385
508
  toolCalls: toolCalls.size > 0 ? Array.from(toolCalls.values()) : null,
@@ -405,6 +528,65 @@ export class AgentLoop extends EventEmitter {
405
528
  };
406
529
  }
407
530
 
531
+ // --- Prompt-Based Tool Calling Fallback ---
532
+
533
+ _injectToolPrompt() {
534
+ const toolPrompt = this._buildToolPrompt();
535
+ const systemIdx = this.messages.findIndex(m => m.role === 'system');
536
+ if (systemIdx >= 0) {
537
+ this.messages[systemIdx].content += '\n\n' + toolPrompt;
538
+ } else {
539
+ this.messages.unshift({ role: 'system', content: toolPrompt });
540
+ }
541
+ }
542
+
543
+ _buildToolPrompt() {
544
+ const toolDefs = TOOL_DEFINITIONS.map(t => {
545
+ const f = t.function;
546
+ const params = Object.entries(f.parameters.properties).map(([name, schema]) => {
547
+ const req = f.parameters.required?.includes(name) ? ' (required)' : ' (optional)';
548
+ return ` - ${name}: ${schema.type}${req} — ${schema.description}`;
549
+ }).join('\n');
550
+ return `### ${f.name}\n${f.description}\nParameters:\n${params}`;
551
+ }).join('\n\n');
552
+
553
+ return `## Available Tools
554
+
555
+ To use a tool, include a tool_call block in your response:
556
+
557
+ <tool_call>
558
+ {"name": "tool_name", "arguments": {"param1": "value1"}}
559
+ </tool_call>
560
+
561
+ You can make multiple tool calls in one response. After each tool call you will receive a <tool_result> with the output.
562
+
563
+ ${toolDefs}
564
+
565
+ Always use tools to read, write, or search files and to run commands. Do not guess file contents.`;
566
+ }
567
+
568
+ _parseToolCallsFromText(content) {
569
+ if (!content) return [];
570
+ const calls = [];
571
+ const regex = /<tool_call>\s*([\s\S]*?)\s*<\/tool_call>/g;
572
+ let match;
573
+ while ((match = regex.exec(content)) !== null) {
574
+ try {
575
+ const parsed = JSON.parse(match[1].trim());
576
+ if (parsed.name) {
577
+ calls.push({
578
+ id: `call_${Date.now()}_${calls.length}`,
579
+ function: {
580
+ name: parsed.name,
581
+ arguments: JSON.stringify(parsed.arguments || {}),
582
+ },
583
+ });
584
+ }
585
+ } catch { /* skip malformed tool call */ }
586
+ }
587
+ return calls;
588
+ }
589
+
408
590
  // --- Token Tracking ---
409
591
 
410
592
  _updateTokens(usage) {
@@ -137,6 +137,7 @@ export class LocalProvider extends Provider {
137
137
  let model = agent.model || 'qwen2.5-coder:7b';
138
138
  let apiBase = 'http://localhost:11434/v1';
139
139
  let apiKey = agent.apiKey || null;
140
+ let runtimeType = 'ollama';
140
141
 
141
142
  if (agent.apiBase) {
142
143
  apiBase = agent.apiBase;
@@ -153,6 +154,7 @@ export class LocalProvider extends Provider {
153
154
  if (rt) {
154
155
  apiBase = rt.endpoint.includes('/v1') ? rt.endpoint : `${rt.endpoint}/v1`;
155
156
  if (rt.apiKey) apiKey = rt.apiKey;
157
+ if (rt.type) runtimeType = rt.type;
156
158
  const rtModel = rt.models?.[0];
157
159
  model = rtModel?.id || rtModel?.name || ggufId;
158
160
  }
@@ -167,6 +169,7 @@ export class LocalProvider extends Provider {
167
169
  if (rt) {
168
170
  apiBase = rt.endpoint.includes('/v1') ? rt.endpoint : `${rt.endpoint}/v1`;
169
171
  if (rt.apiKey) apiKey = rt.apiKey;
172
+ if (rt.type) runtimeType = rt.type;
170
173
  model = modelId;
171
174
  }
172
175
  }
@@ -176,6 +179,7 @@ export class LocalProvider extends Provider {
176
179
  return {
177
180
  apiBase,
178
181
  model,
182
+ runtimeType,
179
183
  contextWindow,
180
184
  temperature: typeof agent.temperature === 'number' ? agent.temperature : 0.1,
181
185
  maxResponseTokens: 4096,
@@ -1,7 +1,7 @@
1
1
  // GROOVE — Tool Executor for Local Agent Loop
2
2
  // FSL-1.1-Apache-2.0 — see LICENSE
3
3
 
4
- import { readFileSync, writeFileSync, readdirSync, statSync, mkdirSync, existsSync } from 'fs';
4
+ import { readFileSync, writeFileSync, readdirSync, statSync, mkdirSync, existsSync, openSync, readSync, closeSync } from 'fs';
5
5
  import { execSync } from 'child_process';
6
6
  import { resolve, relative, dirname, sep } from 'path';
7
7
  import { minimatch } from 'minimatch';
@@ -190,11 +190,24 @@ export class ToolExecutor {
190
190
  if (stat.isDirectory()) {
191
191
  return { success: false, error: `Path is a directory, not a file: ${filePath}` };
192
192
  }
193
- // Guard against huge files
194
193
  if (stat.size > 5 * 1024 * 1024) {
195
194
  return { success: false, error: `File too large (${formatBytes(stat.size)}). Use offset/limit to read a section.` };
196
195
  }
197
196
 
197
+ // Detect binary files — check first 8KB for null bytes
198
+ if (stat.size > 0) {
199
+ const probe = Buffer.alloc(Math.min(8192, stat.size));
200
+ const fd = openSync(resolved, 'r');
201
+ try {
202
+ readSync(fd, probe, 0, probe.length, 0);
203
+ } finally {
204
+ closeSync(fd);
205
+ }
206
+ if (probe.includes(0)) {
207
+ return { success: false, error: `Binary file (${formatBytes(stat.size)}). Cannot read non-text files.` };
208
+ }
209
+ }
210
+
198
211
  const content = readFileSync(resolved, 'utf8');
199
212
  let lines = content.split('\n');
200
213
  const totalLines = lines.length;
@@ -207,8 +220,19 @@ export class ToolExecutor {
207
220
  lines = lines.slice(0, limit);
208
221
  }
209
222
 
223
+ // Auto-limit large files when no explicit limit was provided
224
+ const MAX_LINES = 2000;
225
+ let autoTruncated = false;
226
+ if (!limit && lines.length > MAX_LINES) {
227
+ lines = lines.slice(0, MAX_LINES);
228
+ autoTruncated = true;
229
+ }
230
+
210
231
  const numbered = lines.map((line, i) => `${startLine + i}\t${line}`).join('\n');
211
- return { success: true, result: numbered, meta: { totalLines } };
232
+ const result = autoTruncated
233
+ ? numbered + `\n\n... (showing ${MAX_LINES} of ${totalLines} lines — use offset/limit to read more)`
234
+ : numbered;
235
+ return { success: true, result, meta: { totalLines } };
212
236
  }
213
237
 
214
238
  writeFile({ path: filePath, content }) {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@groove-dev/gui",
3
- "version": "0.27.150",
3
+ "version": "0.27.152",
4
4
  "description": "GROOVE GUI — visual agent control plane",
5
5
  "license": "FSL-1.1-Apache-2.0",
6
6
  "type": "module",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "groove-dev",
3
- "version": "0.27.150",
3
+ "version": "0.27.152",
4
4
  "description": "Open-source agent orchestration layer — the AI company OS. Local model agent engine (GGUF/Ollama/llama-server), HuggingFace model browser, MCP integrations (Slack, Gmail, Stripe, 15+), agent scheduling (cron), business roles (CMO, CFO, EA). GUI dashboard, multi-agent coordination, zero cold-start, infinite sessions. Works with Claude Code, Codex, Gemini CLI, Ollama, any local model.",
5
5
  "license": "FSL-1.1-Apache-2.0",
6
6
  "author": "Groove Dev <hello@groovedev.ai> (https://groovedev.ai)",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@groove-dev/cli",
3
- "version": "0.27.150",
3
+ "version": "0.27.152",
4
4
  "description": "GROOVE CLI — manage AI coding agents from your terminal",
5
5
  "license": "FSL-1.1-Apache-2.0",
6
6
  "type": "module",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@groove-dev/daemon",
3
- "version": "0.27.150",
3
+ "version": "0.27.152",
4
4
  "description": "GROOVE daemon — agent orchestration engine",
5
5
  "license": "FSL-1.1-Apache-2.0",
6
6
  "type": "module",
@@ -10,6 +10,11 @@ import { existsSync, readFileSync, writeFileSync, mkdirSync, unlinkSync } from '
10
10
  import { resolve, dirname } from 'path';
11
11
  import { TOOL_DEFINITIONS, ToolExecutor } from './tool-executor.js';
12
12
 
13
+ function stripThinkTags(text) {
14
+ if (!text) return text;
15
+ return text.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
16
+ }
17
+
13
18
  export class AgentLoop extends EventEmitter {
14
19
  constructor({ daemon, agent, loopConfig, logStream }) {
15
20
  super();
@@ -24,6 +29,11 @@ export class AgentLoop extends EventEmitter {
24
29
  this.idle = true;
25
30
  this.abortController = null;
26
31
 
32
+ // Tool calling mode: 'native' uses OpenAI function-calling API fields,
33
+ // 'prompt' injects tool schemas into the system prompt and parses
34
+ // <tool_call> blocks from the model's text output.
35
+ this.toolMode = 'native';
36
+
27
37
  // Metrics
28
38
  this.totalTokensIn = 0;
29
39
  this.totalTokensOut = 0;
@@ -132,7 +142,7 @@ export class AgentLoop extends EventEmitter {
132
142
  const response = await this._callApi();
133
143
  if (!response || !this.running) break;
134
144
 
135
- const { content, toolCalls, usage, finishReason } = response;
145
+ let { content, toolCalls, usage, finishReason } = response;
136
146
  consecutiveErrors = 0; // Reset on successful call
137
147
 
138
148
  // Update token tracking from API response
@@ -140,10 +150,21 @@ export class AgentLoop extends EventEmitter {
140
150
  this._updateTokens(usage);
141
151
  }
142
152
 
143
- // Append assistant message to conversation history
153
+ // Strip thinking tags from display content (keep raw in conversation history)
154
+ const displayContent = stripThinkTags(content);
155
+
156
+ // In prompt-based mode, parse tool calls from the model's text
157
+ if (this.toolMode === 'prompt' && content) {
158
+ const parsed = this._parseToolCallsFromText(content);
159
+ if (parsed.length > 0) {
160
+ toolCalls = parsed;
161
+ }
162
+ }
163
+
164
+ // Append assistant message to conversation history (raw content with thinking preserved)
144
165
  const assistantMsg = { role: 'assistant' };
145
166
  if (content) assistantMsg.content = content;
146
- if (toolCalls?.length > 0) {
167
+ if (this.toolMode === 'native' && toolCalls?.length > 0) {
147
168
  assistantMsg.tool_calls = toolCalls.map((tc) => ({
148
169
  id: tc.id,
149
170
  type: 'function',
@@ -154,17 +175,21 @@ export class AgentLoop extends EventEmitter {
154
175
 
155
176
  // No tool calls → turn complete, broadcast final text and go idle
156
177
  if (!toolCalls || toolCalls.length === 0) {
157
- if (content) {
158
- this._writeLog({ type: 'assistant', content: content.slice(0, 2000) });
178
+ if (displayContent) {
179
+ this._writeLog({ type: 'assistant', content: displayContent.slice(0, 2000) });
159
180
  }
160
- this.emit('output', { type: 'result', subtype: 'assistant', data: content || 'Turn complete', turns: this.turns });
181
+ this.emit('output', { type: 'result', subtype: 'assistant', data: displayContent || 'Turn complete', turns: this.turns });
161
182
  break;
162
183
  }
163
184
 
164
185
  // Has tool calls — broadcast text before executing tools (if model sent text + tools)
165
- if (content) {
166
- this._writeLog({ type: 'assistant', content: content.slice(0, 2000) });
167
- this.emit('output', { type: 'activity', subtype: 'assistant', data: content });
186
+ let preToolText = displayContent;
187
+ if (this.toolMode === 'prompt') {
188
+ preToolText = stripThinkTags((content || '').replace(/<tool_call>[\s\S]*?<\/tool_call>/g, ''));
189
+ }
190
+ if (preToolText) {
191
+ this._writeLog({ type: 'assistant', content: preToolText.slice(0, 2000) });
192
+ this.emit('output', { type: 'activity', subtype: 'assistant', data: preToolText });
168
193
  }
169
194
 
170
195
  // Execute each tool call
@@ -172,13 +197,29 @@ export class AgentLoop extends EventEmitter {
172
197
  if (!this.running) break;
173
198
 
174
199
  let args;
200
+ let parseError = null;
175
201
  try {
176
202
  args = JSON.parse(call.function.arguments);
177
- } catch {
203
+ } catch (e) {
204
+ parseError = e.message;
178
205
  args = {};
179
206
  }
180
207
 
181
208
  const toolName = call.function.name;
209
+
210
+ // Report malformed JSON back to the model instead of silently failing
211
+ if (parseError) {
212
+ const errMsg = `Invalid JSON in tool arguments: ${parseError}. Raw: ${call.function.arguments.slice(0, 200)}`;
213
+ this._writeLog({ type: 'tool_result', tool: toolName, success: false, output: errMsg });
214
+ this.emit('output', { type: 'activity', subtype: 'tool_result', data: [{ type: 'tool_result', name: toolName, success: false, output: errMsg }] });
215
+ if (this.toolMode === 'native') {
216
+ this.messages.push({ role: 'tool', tool_call_id: call.id, content: `Error: ${errMsg}` });
217
+ } else {
218
+ this.messages.push({ role: 'user', content: `<tool_result name="${toolName}">\nError: ${errMsg}\n</tool_result>` });
219
+ }
220
+ continue;
221
+ }
222
+
182
223
  const inputSummary = this._summarizeToolInput(toolName, args);
183
224
 
184
225
  // Log + broadcast tool invocation
@@ -210,12 +251,24 @@ export class AgentLoop extends EventEmitter {
210
251
  this.daemon.classifier.addEvent(this.agent.id, { type: 'error', text: result.error });
211
252
  }
212
253
 
213
- // Append tool result to conversation for the model
214
- this.messages.push({
215
- role: 'tool',
216
- tool_call_id: call.id,
217
- content: result.success ? (result.result || 'Done.') : `Error: ${result.error}`,
218
- });
254
+ // Append tool result to conversation cap size to protect context window
255
+ const MAX_RESULT_CHARS = 30000;
256
+ let resultContent = result.success ? (result.result || 'Done.') : `Error: ${result.error}`;
257
+ if (resultContent.length > MAX_RESULT_CHARS) {
258
+ resultContent = resultContent.slice(0, MAX_RESULT_CHARS) + '\n... (result truncated use offset/limit for large files, or pipe commands through head/tail)';
259
+ }
260
+ if (this.toolMode === 'native') {
261
+ this.messages.push({
262
+ role: 'tool',
263
+ tool_call_id: call.id,
264
+ content: resultContent,
265
+ });
266
+ } else {
267
+ this.messages.push({
268
+ role: 'user',
269
+ content: `<tool_result name="${toolName}">\n${resultContent}\n</tool_result>`,
270
+ });
271
+ }
219
272
  }
220
273
 
221
274
  // Context rotation is handled by the Rotator's 15s polling loop
@@ -236,12 +289,15 @@ export class AgentLoop extends EventEmitter {
236
289
  const body = {
237
290
  model: this.config.model,
238
291
  messages: this.messages,
239
- tools: TOOL_DEFINITIONS,
240
- tool_choice: 'auto',
241
292
  temperature: this.config.temperature ?? 0.1,
242
293
  max_tokens: this.config.maxResponseTokens || 4096,
243
294
  };
244
295
 
296
+ if (this.toolMode === 'native') {
297
+ body.tools = TOOL_DEFINITIONS;
298
+ body.tool_choice = 'auto';
299
+ }
300
+
245
301
  if (this.config.stream !== false) {
246
302
  body.stream = true;
247
303
  body.stream_options = { include_usage: true };
@@ -283,6 +339,18 @@ export class AgentLoop extends EventEmitter {
283
339
  const text = await response.text().catch(() => '');
284
340
  const errMsg = `API error ${response.status}: ${text.slice(0, 500)}`;
285
341
 
342
+ // Detect tool_choice rejection (vLLM, TGI, etc. without tool-calling flags)
343
+ // Fall back to prompt-based tool calling and retry immediately
344
+ if (response.status === 400 && this.toolMode === 'native' &&
345
+ (text.includes('tool_choice') || text.includes('tool-call-parser') || text.includes('enable-auto-tool-choice'))) {
346
+ this._writeLog({ type: 'system', event: 'tool-fallback', reason: 'Runtime rejected native tool calling — switching to prompt-based tools' });
347
+ this.toolMode = 'prompt';
348
+ this._injectToolPrompt();
349
+ delete body.tools;
350
+ delete body.tool_choice;
351
+ continue;
352
+ }
353
+
286
354
  if (response.status === 401 || response.status === 403) {
287
355
  this._writeLog({ type: 'error', text: errMsg });
288
356
  this.emit('error', { message: errMsg });
@@ -320,6 +388,10 @@ export class AgentLoop extends EventEmitter {
320
388
  let finishReason = null;
321
389
  let buffer = '';
322
390
 
391
+ // State machine for suppressing <think> blocks during streaming
392
+ let insideThink = false;
393
+ let streamBuf = '';
394
+
323
395
  const reader = response.body.getReader();
324
396
  const decoder = new TextDecoder();
325
397
 
@@ -349,10 +421,56 @@ export class AgentLoop extends EventEmitter {
349
421
  if (choice.finish_reason) finishReason = choice.finish_reason;
350
422
  const delta = choice.delta || {};
351
423
 
352
- // Stream text tokens to GUI in real-time
424
+ // reasoning_content: separate thinking field (vLLM, some OpenAI-compat servers)
425
+ // Capture for logging but don't stream to GUI
426
+ if (delta.reasoning_content || delta.reasoning) {
427
+ // Accumulate in content so it's in the conversation history
428
+ // but don't stream it to the GUI
429
+ content += delta.reasoning_content || delta.reasoning;
430
+ }
431
+
432
+ // Stream text tokens to GUI in real-time, suppressing <think> blocks
353
433
  if (delta.content) {
354
434
  content += delta.content;
355
- this.emit('output', { type: 'activity', subtype: 'stream', data: delta.content });
435
+ streamBuf += delta.content;
436
+
437
+ // Process buffer — emit non-think content, suppress think content
438
+ let safety = 0;
439
+ while (streamBuf.length > 0 && safety++ < 100) {
440
+ if (insideThink) {
441
+ const closeIdx = streamBuf.indexOf('</think>');
442
+ if (closeIdx >= 0) {
443
+ insideThink = false;
444
+ streamBuf = streamBuf.slice(closeIdx + 8);
445
+ } else {
446
+ break; // wait for more data
447
+ }
448
+ } else {
449
+ const openIdx = streamBuf.indexOf('<think>');
450
+ if (openIdx >= 0) {
451
+ const before = streamBuf.slice(0, openIdx);
452
+ if (before) {
453
+ this.emit('output', { type: 'activity', subtype: 'stream', data: before });
454
+ }
455
+ insideThink = true;
456
+ streamBuf = streamBuf.slice(openIdx + 7);
457
+ } else {
458
+ // Hold back bytes that could be the start of a <think> tag
459
+ let safeEnd = streamBuf.length;
460
+ for (let i = Math.min(6, streamBuf.length); i >= 1; i--) {
461
+ if ('<think>'.startsWith(streamBuf.slice(-i))) {
462
+ safeEnd = streamBuf.length - i;
463
+ break;
464
+ }
465
+ }
466
+ if (safeEnd > 0) {
467
+ this.emit('output', { type: 'activity', subtype: 'stream', data: streamBuf.slice(0, safeEnd) });
468
+ }
469
+ streamBuf = streamBuf.slice(safeEnd);
470
+ break;
471
+ }
472
+ }
473
+ }
356
474
  }
357
475
 
358
476
  // Accumulate tool call deltas
@@ -380,6 +498,11 @@ export class AgentLoop extends EventEmitter {
380
498
  return null;
381
499
  }
382
500
 
501
+ // Flush remaining stream buffer (e.g. unclosed <think> — treat as display content)
502
+ if (streamBuf) {
503
+ this.emit('output', { type: 'activity', subtype: 'stream', data: streamBuf });
504
+ }
505
+
383
506
  return {
384
507
  content: content || null,
385
508
  toolCalls: toolCalls.size > 0 ? Array.from(toolCalls.values()) : null,
@@ -405,6 +528,65 @@ export class AgentLoop extends EventEmitter {
405
528
  };
406
529
  }
407
530
 
531
+ // --- Prompt-Based Tool Calling Fallback ---
532
+
533
+ _injectToolPrompt() {
534
+ const toolPrompt = this._buildToolPrompt();
535
+ const systemIdx = this.messages.findIndex(m => m.role === 'system');
536
+ if (systemIdx >= 0) {
537
+ this.messages[systemIdx].content += '\n\n' + toolPrompt;
538
+ } else {
539
+ this.messages.unshift({ role: 'system', content: toolPrompt });
540
+ }
541
+ }
542
+
543
+ _buildToolPrompt() {
544
+ const toolDefs = TOOL_DEFINITIONS.map(t => {
545
+ const f = t.function;
546
+ const params = Object.entries(f.parameters.properties).map(([name, schema]) => {
547
+ const req = f.parameters.required?.includes(name) ? ' (required)' : ' (optional)';
548
+ return ` - ${name}: ${schema.type}${req} — ${schema.description}`;
549
+ }).join('\n');
550
+ return `### ${f.name}\n${f.description}\nParameters:\n${params}`;
551
+ }).join('\n\n');
552
+
553
+ return `## Available Tools
554
+
555
+ To use a tool, include a tool_call block in your response:
556
+
557
+ <tool_call>
558
+ {"name": "tool_name", "arguments": {"param1": "value1"}}
559
+ </tool_call>
560
+
561
+ You can make multiple tool calls in one response. After each tool call you will receive a <tool_result> with the output.
562
+
563
+ ${toolDefs}
564
+
565
+ Always use tools to read, write, or search files and to run commands. Do not guess file contents.`;
566
+ }
567
+
568
+ _parseToolCallsFromText(content) {
569
+ if (!content) return [];
570
+ const calls = [];
571
+ const regex = /<tool_call>\s*([\s\S]*?)\s*<\/tool_call>/g;
572
+ let match;
573
+ while ((match = regex.exec(content)) !== null) {
574
+ try {
575
+ const parsed = JSON.parse(match[1].trim());
576
+ if (parsed.name) {
577
+ calls.push({
578
+ id: `call_${Date.now()}_${calls.length}`,
579
+ function: {
580
+ name: parsed.name,
581
+ arguments: JSON.stringify(parsed.arguments || {}),
582
+ },
583
+ });
584
+ }
585
+ } catch { /* skip malformed tool call */ }
586
+ }
587
+ return calls;
588
+ }
589
+
408
590
  // --- Token Tracking ---
409
591
 
410
592
  _updateTokens(usage) {
@@ -137,6 +137,7 @@ export class LocalProvider extends Provider {
137
137
  let model = agent.model || 'qwen2.5-coder:7b';
138
138
  let apiBase = 'http://localhost:11434/v1';
139
139
  let apiKey = agent.apiKey || null;
140
+ let runtimeType = 'ollama';
140
141
 
141
142
  if (agent.apiBase) {
142
143
  apiBase = agent.apiBase;
@@ -153,6 +154,7 @@ export class LocalProvider extends Provider {
153
154
  if (rt) {
154
155
  apiBase = rt.endpoint.includes('/v1') ? rt.endpoint : `${rt.endpoint}/v1`;
155
156
  if (rt.apiKey) apiKey = rt.apiKey;
157
+ if (rt.type) runtimeType = rt.type;
156
158
  const rtModel = rt.models?.[0];
157
159
  model = rtModel?.id || rtModel?.name || ggufId;
158
160
  }
@@ -167,6 +169,7 @@ export class LocalProvider extends Provider {
167
169
  if (rt) {
168
170
  apiBase = rt.endpoint.includes('/v1') ? rt.endpoint : `${rt.endpoint}/v1`;
169
171
  if (rt.apiKey) apiKey = rt.apiKey;
172
+ if (rt.type) runtimeType = rt.type;
170
173
  model = modelId;
171
174
  }
172
175
  }
@@ -176,6 +179,7 @@ export class LocalProvider extends Provider {
176
179
  return {
177
180
  apiBase,
178
181
  model,
182
+ runtimeType,
179
183
  contextWindow,
180
184
  temperature: typeof agent.temperature === 'number' ? agent.temperature : 0.1,
181
185
  maxResponseTokens: 4096,
@@ -1,7 +1,7 @@
1
1
  // GROOVE — Tool Executor for Local Agent Loop
2
2
  // FSL-1.1-Apache-2.0 — see LICENSE
3
3
 
4
- import { readFileSync, writeFileSync, readdirSync, statSync, mkdirSync, existsSync } from 'fs';
4
+ import { readFileSync, writeFileSync, readdirSync, statSync, mkdirSync, existsSync, openSync, readSync, closeSync } from 'fs';
5
5
  import { execSync } from 'child_process';
6
6
  import { resolve, relative, dirname, sep } from 'path';
7
7
  import { minimatch } from 'minimatch';
@@ -190,11 +190,24 @@ export class ToolExecutor {
190
190
  if (stat.isDirectory()) {
191
191
  return { success: false, error: `Path is a directory, not a file: ${filePath}` };
192
192
  }
193
- // Guard against huge files
194
193
  if (stat.size > 5 * 1024 * 1024) {
195
194
  return { success: false, error: `File too large (${formatBytes(stat.size)}). Use offset/limit to read a section.` };
196
195
  }
197
196
 
197
+ // Detect binary files — check first 8KB for null bytes
198
+ if (stat.size > 0) {
199
+ const probe = Buffer.alloc(Math.min(8192, stat.size));
200
+ const fd = openSync(resolved, 'r');
201
+ try {
202
+ readSync(fd, probe, 0, probe.length, 0);
203
+ } finally {
204
+ closeSync(fd);
205
+ }
206
+ if (probe.includes(0)) {
207
+ return { success: false, error: `Binary file (${formatBytes(stat.size)}). Cannot read non-text files.` };
208
+ }
209
+ }
210
+
198
211
  const content = readFileSync(resolved, 'utf8');
199
212
  let lines = content.split('\n');
200
213
  const totalLines = lines.length;
@@ -207,8 +220,19 @@ export class ToolExecutor {
207
220
  lines = lines.slice(0, limit);
208
221
  }
209
222
 
223
+ // Auto-limit large files when no explicit limit was provided
224
+ const MAX_LINES = 2000;
225
+ let autoTruncated = false;
226
+ if (!limit && lines.length > MAX_LINES) {
227
+ lines = lines.slice(0, MAX_LINES);
228
+ autoTruncated = true;
229
+ }
230
+
210
231
  const numbered = lines.map((line, i) => `${startLine + i}\t${line}`).join('\n');
211
- return { success: true, result: numbered, meta: { totalLines } };
232
+ const result = autoTruncated
233
+ ? numbered + `\n\n... (showing ${MAX_LINES} of ${totalLines} lines — use offset/limit to read more)`
234
+ : numbered;
235
+ return { success: true, result, meta: { totalLines } };
212
236
  }
213
237
 
214
238
  writeFile({ path: filePath, content }) {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@groove-dev/gui",
3
- "version": "0.27.150",
3
+ "version": "0.27.152",
4
4
  "description": "GROOVE GUI — visual agent control plane",
5
5
  "license": "FSL-1.1-Apache-2.0",
6
6
  "type": "module",