groove-dev 0.27.151 → 0.27.153
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/node_modules/@groove-dev/cli/package.json +1 -1
- package/node_modules/@groove-dev/daemon/package.json +1 -1
- package/node_modules/@groove-dev/daemon/src/agent-loop.js +99 -15
- package/node_modules/@groove-dev/daemon/src/llama-server.js +96 -3
- package/node_modules/@groove-dev/daemon/src/model-manager.js +52 -10
- package/node_modules/@groove-dev/daemon/src/routes/providers.js +11 -0
- package/node_modules/@groove-dev/daemon/src/tool-executor.js +27 -3
- package/node_modules/@groove-dev/gui/dist/assets/{index-CReKPWhY.js → index-BU_YTEZo.js} +220 -220
- package/node_modules/@groove-dev/gui/dist/assets/index-ChfYTsyc.css +1 -0
- package/node_modules/@groove-dev/gui/dist/index.html +2 -2
- package/node_modules/@groove-dev/gui/package.json +1 -1
- package/node_modules/@groove-dev/gui/src/components/agents/spawn-wizard.jsx +9 -2
- package/node_modules/@groove-dev/gui/src/components/lab/runtime-config.jsx +23 -8
- package/node_modules/@groove-dev/gui/src/components/settings/quick-connect.jsx +8 -1
- package/node_modules/@groove-dev/gui/src/stores/slices/providers-slice.js +13 -0
- package/node_modules/@groove-dev/gui/src/views/models.jsx +15 -2
- package/package.json +1 -1
- package/packages/cli/package.json +1 -1
- package/packages/daemon/package.json +1 -1
- package/packages/daemon/src/agent-loop.js +99 -15
- package/packages/daemon/src/llama-server.js +96 -3
- package/packages/daemon/src/model-manager.js +52 -10
- package/packages/daemon/src/routes/providers.js +11 -0
- package/packages/daemon/src/tool-executor.js +27 -3
- package/packages/gui/dist/assets/{index-CReKPWhY.js → index-BU_YTEZo.js} +220 -220
- package/packages/gui/dist/assets/index-ChfYTsyc.css +1 -0
- package/packages/gui/dist/index.html +2 -2
- package/packages/gui/package.json +1 -1
- package/packages/gui/src/components/agents/spawn-wizard.jsx +9 -2
- package/packages/gui/src/components/lab/runtime-config.jsx +23 -8
- package/packages/gui/src/components/settings/quick-connect.jsx +8 -1
- package/packages/gui/src/stores/slices/providers-slice.js +13 -0
- package/packages/gui/src/views/models.jsx +15 -2
- package/node_modules/@groove-dev/gui/dist/assets/index-CEkPsSAm.css +0 -1
- package/packages/gui/dist/assets/index-CEkPsSAm.css +0 -1
|
@@ -10,6 +10,11 @@ import { existsSync, readFileSync, writeFileSync, mkdirSync, unlinkSync } from '
|
|
|
10
10
|
import { resolve, dirname } from 'path';
|
|
11
11
|
import { TOOL_DEFINITIONS, ToolExecutor } from './tool-executor.js';
|
|
12
12
|
|
|
13
|
+
function stripThinkTags(text) {
|
|
14
|
+
if (!text) return text;
|
|
15
|
+
return text.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
|
|
16
|
+
}
|
|
17
|
+
|
|
13
18
|
export class AgentLoop extends EventEmitter {
|
|
14
19
|
constructor({ daemon, agent, loopConfig, logStream }) {
|
|
15
20
|
super();
|
|
@@ -145,6 +150,9 @@ export class AgentLoop extends EventEmitter {
|
|
|
145
150
|
this._updateTokens(usage);
|
|
146
151
|
}
|
|
147
152
|
|
|
153
|
+
// Strip thinking tags from display content (keep raw in conversation history)
|
|
154
|
+
const displayContent = stripThinkTags(content);
|
|
155
|
+
|
|
148
156
|
// In prompt-based mode, parse tool calls from the model's text
|
|
149
157
|
if (this.toolMode === 'prompt' && content) {
|
|
150
158
|
const parsed = this._parseToolCallsFromText(content);
|
|
@@ -153,7 +161,7 @@ export class AgentLoop extends EventEmitter {
|
|
|
153
161
|
}
|
|
154
162
|
}
|
|
155
163
|
|
|
156
|
-
// Append assistant message to conversation history
|
|
164
|
+
// Append assistant message to conversation history (raw content with thinking preserved)
|
|
157
165
|
const assistantMsg = { role: 'assistant' };
|
|
158
166
|
if (content) assistantMsg.content = content;
|
|
159
167
|
if (this.toolMode === 'native' && toolCalls?.length > 0) {
|
|
@@ -167,20 +175,21 @@ export class AgentLoop extends EventEmitter {
|
|
|
167
175
|
|
|
168
176
|
// No tool calls → turn complete, broadcast final text and go idle
|
|
169
177
|
if (!toolCalls || toolCalls.length === 0) {
|
|
170
|
-
if (
|
|
171
|
-
this._writeLog({ type: 'assistant', content:
|
|
178
|
+
if (displayContent) {
|
|
179
|
+
this._writeLog({ type: 'assistant', content: displayContent.slice(0, 2000) });
|
|
172
180
|
}
|
|
173
|
-
this.emit('output', { type: 'result', subtype: 'assistant', data:
|
|
181
|
+
this.emit('output', { type: 'result', subtype: 'assistant', data: displayContent || 'Turn complete', turns: this.turns });
|
|
174
182
|
break;
|
|
175
183
|
}
|
|
176
184
|
|
|
177
185
|
// Has tool calls — broadcast text before executing tools (if model sent text + tools)
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
this.
|
|
186
|
+
let preToolText = displayContent;
|
|
187
|
+
if (this.toolMode === 'prompt') {
|
|
188
|
+
preToolText = stripThinkTags((content || '').replace(/<tool_call>[\s\S]*?<\/tool_call>/g, ''));
|
|
189
|
+
}
|
|
190
|
+
if (preToolText) {
|
|
191
|
+
this._writeLog({ type: 'assistant', content: preToolText.slice(0, 2000) });
|
|
192
|
+
this.emit('output', { type: 'activity', subtype: 'assistant', data: preToolText });
|
|
184
193
|
}
|
|
185
194
|
|
|
186
195
|
// Execute each tool call
|
|
@@ -188,13 +197,29 @@ export class AgentLoop extends EventEmitter {
|
|
|
188
197
|
if (!this.running) break;
|
|
189
198
|
|
|
190
199
|
let args;
|
|
200
|
+
let parseError = null;
|
|
191
201
|
try {
|
|
192
202
|
args = JSON.parse(call.function.arguments);
|
|
193
|
-
} catch {
|
|
203
|
+
} catch (e) {
|
|
204
|
+
parseError = e.message;
|
|
194
205
|
args = {};
|
|
195
206
|
}
|
|
196
207
|
|
|
197
208
|
const toolName = call.function.name;
|
|
209
|
+
|
|
210
|
+
// Report malformed JSON back to the model instead of silently failing
|
|
211
|
+
if (parseError) {
|
|
212
|
+
const errMsg = `Invalid JSON in tool arguments: ${parseError}. Raw: ${call.function.arguments.slice(0, 200)}`;
|
|
213
|
+
this._writeLog({ type: 'tool_result', tool: toolName, success: false, output: errMsg });
|
|
214
|
+
this.emit('output', { type: 'activity', subtype: 'tool_result', data: [{ type: 'tool_result', name: toolName, success: false, output: errMsg }] });
|
|
215
|
+
if (this.toolMode === 'native') {
|
|
216
|
+
this.messages.push({ role: 'tool', tool_call_id: call.id, content: `Error: ${errMsg}` });
|
|
217
|
+
} else {
|
|
218
|
+
this.messages.push({ role: 'user', content: `<tool_result name="${toolName}">\nError: ${errMsg}\n</tool_result>` });
|
|
219
|
+
}
|
|
220
|
+
continue;
|
|
221
|
+
}
|
|
222
|
+
|
|
198
223
|
const inputSummary = this._summarizeToolInput(toolName, args);
|
|
199
224
|
|
|
200
225
|
// Log + broadcast tool invocation
|
|
@@ -226,8 +251,12 @@ export class AgentLoop extends EventEmitter {
|
|
|
226
251
|
this.daemon.classifier.addEvent(this.agent.id, { type: 'error', text: result.error });
|
|
227
252
|
}
|
|
228
253
|
|
|
229
|
-
// Append tool result to conversation
|
|
230
|
-
const
|
|
254
|
+
// Append tool result to conversation — cap size to protect context window
|
|
255
|
+
const MAX_RESULT_CHARS = 30000;
|
|
256
|
+
let resultContent = result.success ? (result.result || 'Done.') : `Error: ${result.error}`;
|
|
257
|
+
if (resultContent.length > MAX_RESULT_CHARS) {
|
|
258
|
+
resultContent = resultContent.slice(0, MAX_RESULT_CHARS) + '\n... (result truncated — use offset/limit for large files, or pipe commands through head/tail)';
|
|
259
|
+
}
|
|
231
260
|
if (this.toolMode === 'native') {
|
|
232
261
|
this.messages.push({
|
|
233
262
|
role: 'tool',
|
|
@@ -359,6 +388,10 @@ export class AgentLoop extends EventEmitter {
|
|
|
359
388
|
let finishReason = null;
|
|
360
389
|
let buffer = '';
|
|
361
390
|
|
|
391
|
+
// State machine for suppressing <think> blocks during streaming
|
|
392
|
+
let insideThink = false;
|
|
393
|
+
let streamBuf = '';
|
|
394
|
+
|
|
362
395
|
const reader = response.body.getReader();
|
|
363
396
|
const decoder = new TextDecoder();
|
|
364
397
|
|
|
@@ -388,10 +421,56 @@ export class AgentLoop extends EventEmitter {
|
|
|
388
421
|
if (choice.finish_reason) finishReason = choice.finish_reason;
|
|
389
422
|
const delta = choice.delta || {};
|
|
390
423
|
|
|
391
|
-
//
|
|
424
|
+
// reasoning_content: separate thinking field (vLLM, some OpenAI-compat servers)
|
|
425
|
+
// Capture for logging but don't stream to GUI
|
|
426
|
+
if (delta.reasoning_content || delta.reasoning) {
|
|
427
|
+
// Accumulate in content so it's in the conversation history
|
|
428
|
+
// but don't stream it to the GUI
|
|
429
|
+
content += delta.reasoning_content || delta.reasoning;
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
// Stream text tokens to GUI in real-time, suppressing <think> blocks
|
|
392
433
|
if (delta.content) {
|
|
393
434
|
content += delta.content;
|
|
394
|
-
|
|
435
|
+
streamBuf += delta.content;
|
|
436
|
+
|
|
437
|
+
// Process buffer — emit non-think content, suppress think content
|
|
438
|
+
let safety = 0;
|
|
439
|
+
while (streamBuf.length > 0 && safety++ < 100) {
|
|
440
|
+
if (insideThink) {
|
|
441
|
+
const closeIdx = streamBuf.indexOf('</think>');
|
|
442
|
+
if (closeIdx >= 0) {
|
|
443
|
+
insideThink = false;
|
|
444
|
+
streamBuf = streamBuf.slice(closeIdx + 8);
|
|
445
|
+
} else {
|
|
446
|
+
break; // wait for more data
|
|
447
|
+
}
|
|
448
|
+
} else {
|
|
449
|
+
const openIdx = streamBuf.indexOf('<think>');
|
|
450
|
+
if (openIdx >= 0) {
|
|
451
|
+
const before = streamBuf.slice(0, openIdx);
|
|
452
|
+
if (before) {
|
|
453
|
+
this.emit('output', { type: 'activity', subtype: 'stream', data: before });
|
|
454
|
+
}
|
|
455
|
+
insideThink = true;
|
|
456
|
+
streamBuf = streamBuf.slice(openIdx + 7);
|
|
457
|
+
} else {
|
|
458
|
+
// Hold back bytes that could be the start of a <think> tag
|
|
459
|
+
let safeEnd = streamBuf.length;
|
|
460
|
+
for (let i = Math.min(6, streamBuf.length); i >= 1; i--) {
|
|
461
|
+
if ('<think>'.startsWith(streamBuf.slice(-i))) {
|
|
462
|
+
safeEnd = streamBuf.length - i;
|
|
463
|
+
break;
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
if (safeEnd > 0) {
|
|
467
|
+
this.emit('output', { type: 'activity', subtype: 'stream', data: streamBuf.slice(0, safeEnd) });
|
|
468
|
+
}
|
|
469
|
+
streamBuf = streamBuf.slice(safeEnd);
|
|
470
|
+
break;
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
}
|
|
395
474
|
}
|
|
396
475
|
|
|
397
476
|
// Accumulate tool call deltas
|
|
@@ -419,6 +498,11 @@ export class AgentLoop extends EventEmitter {
|
|
|
419
498
|
return null;
|
|
420
499
|
}
|
|
421
500
|
|
|
501
|
+
// Flush remaining stream buffer (e.g. unclosed <think> — treat as display content)
|
|
502
|
+
if (streamBuf) {
|
|
503
|
+
this.emit('output', { type: 'activity', subtype: 'stream', data: streamBuf });
|
|
504
|
+
}
|
|
505
|
+
|
|
422
506
|
return {
|
|
423
507
|
content: content || null,
|
|
424
508
|
toolCalls: toolCalls.size > 0 ? Array.from(toolCalls.values()) : null,
|
|
@@ -5,7 +5,10 @@
|
|
|
5
5
|
// Each model gets its own server on a unique port.
|
|
6
6
|
// Auto-starts when an agent needs a GGUF model, auto-stops when idle.
|
|
7
7
|
|
|
8
|
-
import { spawn, execSync } from 'child_process';
|
|
8
|
+
import { spawn, execSync, execFileSync } from 'child_process';
|
|
9
|
+
import { existsSync, mkdirSync, chmodSync } from 'fs';
|
|
10
|
+
import { resolve } from 'path';
|
|
11
|
+
import { homedir } from 'os';
|
|
9
12
|
|
|
10
13
|
const BASE_PORT = 8081;
|
|
11
14
|
const MAX_SERVERS = 5;
|
|
@@ -25,10 +28,98 @@ export class LlamaServerManager {
|
|
|
25
28
|
execSync('which llama-server', { stdio: 'ignore' });
|
|
26
29
|
return true;
|
|
27
30
|
} catch {
|
|
28
|
-
|
|
31
|
+
// Check common manual install locations
|
|
32
|
+
const paths = [
|
|
33
|
+
resolve(homedir(), '.local', 'bin', 'llama-server'),
|
|
34
|
+
resolve(homedir(), '.groove', 'bin', 'llama-server'),
|
|
35
|
+
'/usr/local/bin/llama-server',
|
|
36
|
+
];
|
|
37
|
+
return paths.some(p => existsSync(p));
|
|
29
38
|
}
|
|
30
39
|
}
|
|
31
40
|
|
|
41
|
+
static getLlamaServerPath() {
|
|
42
|
+
try {
|
|
43
|
+
return execSync('which llama-server', { stdio: 'pipe', encoding: 'utf8' }).trim();
|
|
44
|
+
} catch {
|
|
45
|
+
const paths = [
|
|
46
|
+
resolve(homedir(), '.local', 'bin', 'llama-server'),
|
|
47
|
+
resolve(homedir(), '.groove', 'bin', 'llama-server'),
|
|
48
|
+
'/usr/local/bin/llama-server',
|
|
49
|
+
];
|
|
50
|
+
return paths.find(p => existsSync(p)) || 'llama-server';
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
static async install() {
|
|
55
|
+
const platform = process.platform;
|
|
56
|
+
|
|
57
|
+
if (platform === 'darwin') {
|
|
58
|
+
try {
|
|
59
|
+
execSync('which brew', { stdio: 'ignore' });
|
|
60
|
+
} catch {
|
|
61
|
+
throw new Error('Homebrew not found. Install it from https://brew.sh then retry.');
|
|
62
|
+
}
|
|
63
|
+
execSync('brew install llama.cpp', { stdio: 'pipe', timeout: 600000 });
|
|
64
|
+
return { method: 'brew', path: execSync('which llama-server', { encoding: 'utf8', stdio: 'pipe' }).trim() };
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
if (platform === 'linux') {
|
|
68
|
+
const installDir = resolve(homedir(), '.local', 'bin');
|
|
69
|
+
mkdirSync(installDir, { recursive: true });
|
|
70
|
+
|
|
71
|
+
const arch = process.arch === 'arm64' ? 'arm64' : 'x64';
|
|
72
|
+
const hasCuda = (() => { try { execSync('which nvidia-smi', { stdio: 'ignore' }); return true; } catch { return false; } })();
|
|
73
|
+
|
|
74
|
+
const resp = await fetch('https://api.github.com/repos/ggml-org/llama.cpp/releases/latest', {
|
|
75
|
+
headers: { 'User-Agent': 'groove-dev' },
|
|
76
|
+
});
|
|
77
|
+
if (!resp.ok) throw new Error(`GitHub API error: ${resp.status}`);
|
|
78
|
+
const release = await resp.json();
|
|
79
|
+
|
|
80
|
+
const suffix = hasCuda ? `ubuntu-${arch}-cuda` : `ubuntu-${arch}`;
|
|
81
|
+
let asset = release.assets.find(a => a.name.includes(suffix) && a.name.endsWith('.zip'));
|
|
82
|
+
if (!asset && hasCuda) {
|
|
83
|
+
asset = release.assets.find(a => a.name.includes(`ubuntu-${arch}`) && a.name.endsWith('.zip'));
|
|
84
|
+
}
|
|
85
|
+
if (!asset) {
|
|
86
|
+
asset = release.assets.find(a => a.name.includes('ubuntu') && a.name.includes(arch) && a.name.endsWith('.zip'));
|
|
87
|
+
}
|
|
88
|
+
if (!asset) throw new Error(`No pre-built binary found for linux-${arch}. Build from source: https://github.com/ggml-org/llama.cpp#build`);
|
|
89
|
+
|
|
90
|
+
const tmpZip = `/tmp/groove-llama-${Date.now()}.zip`;
|
|
91
|
+
const tmpDir = `/tmp/groove-llama-extract-${Date.now()}`;
|
|
92
|
+
|
|
93
|
+
execSync(`curl -fSL "${asset.browser_download_url}" -o "${tmpZip}"`, { stdio: 'pipe', timeout: 600000 });
|
|
94
|
+
execSync(`unzip -o "${tmpZip}" -d "${tmpDir}"`, { stdio: 'pipe', timeout: 60000 });
|
|
95
|
+
|
|
96
|
+
const findResult = execSync(`find "${tmpDir}" -name llama-server -type f`, { encoding: 'utf8', stdio: 'pipe' }).trim();
|
|
97
|
+
const binPath = findResult.split('\n')[0];
|
|
98
|
+
if (!binPath) throw new Error('llama-server binary not found in release archive');
|
|
99
|
+
|
|
100
|
+
const destPath = resolve(installDir, 'llama-server');
|
|
101
|
+
execSync(`cp "${binPath}" "${destPath}"`, { stdio: 'pipe' });
|
|
102
|
+
chmodSync(destPath, 0o755);
|
|
103
|
+
|
|
104
|
+
// Copy shared libraries if present
|
|
105
|
+
try {
|
|
106
|
+
const libDir = resolve(binPath, '..', '..', 'lib');
|
|
107
|
+
if (existsSync(libDir)) {
|
|
108
|
+
const userLibDir = resolve(homedir(), '.local', 'lib');
|
|
109
|
+
mkdirSync(userLibDir, { recursive: true });
|
|
110
|
+
execSync(`cp -r "${libDir}/"* "${userLibDir}/"`, { stdio: 'pipe' });
|
|
111
|
+
}
|
|
112
|
+
} catch { /* libs are optional */ }
|
|
113
|
+
|
|
114
|
+
// Cleanup
|
|
115
|
+
try { execSync(`rm -rf "${tmpZip}" "${tmpDir}"`, { stdio: 'ignore' }); } catch { /* best-effort */ }
|
|
116
|
+
|
|
117
|
+
return { method: 'github-release', path: destPath, cuda: hasCuda, release: release.tag_name };
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
throw new Error(`Automatic install not supported on ${platform}. Install llama-server manually: https://github.com/ggml-org/llama.cpp#build`);
|
|
121
|
+
}
|
|
122
|
+
|
|
32
123
|
// --- Server Lifecycle ---
|
|
33
124
|
|
|
34
125
|
/**
|
|
@@ -74,9 +165,11 @@ export class LlamaServerManager {
|
|
|
74
165
|
args.push('--flash-attn', 'auto');
|
|
75
166
|
}
|
|
76
167
|
|
|
77
|
-
const
|
|
168
|
+
const serverBin = LlamaServerManager.getLlamaServerPath();
|
|
169
|
+
const proc = spawn(serverBin, args, {
|
|
78
170
|
stdio: ['ignore', 'pipe', 'pipe'],
|
|
79
171
|
detached: false,
|
|
172
|
+
env: { ...process.env, LD_LIBRARY_PATH: [resolve(homedir(), '.local', 'lib'), process.env.LD_LIBRARY_PATH].filter(Boolean).join(':') },
|
|
80
173
|
});
|
|
81
174
|
|
|
82
175
|
if (!proc.pid) {
|
|
@@ -69,7 +69,6 @@ export class ModelManager {
|
|
|
69
69
|
async search(query, { limit = 20, sort = 'downloads' } = {}) {
|
|
70
70
|
const params = new URLSearchParams({
|
|
71
71
|
search: query,
|
|
72
|
-
filter: 'gguf',
|
|
73
72
|
sort,
|
|
74
73
|
direction: '-1',
|
|
75
74
|
limit: String(limit),
|
|
@@ -83,15 +82,20 @@ export class ModelManager {
|
|
|
83
82
|
if (!res.ok) throw new Error(`HuggingFace API error: ${res.status}`);
|
|
84
83
|
const models = await res.json();
|
|
85
84
|
|
|
86
|
-
return models.map((m) =>
|
|
87
|
-
id
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
85
|
+
return models.map((m) => {
|
|
86
|
+
const id = m.modelId || m.id;
|
|
87
|
+
const tags = m.tags || [];
|
|
88
|
+
return {
|
|
89
|
+
id,
|
|
90
|
+
name: id.split('/').pop() || id,
|
|
91
|
+
author: id.split('/')[0] || '',
|
|
92
|
+
downloads: m.downloads || 0,
|
|
93
|
+
likes: m.likes || 0,
|
|
94
|
+
tags,
|
|
95
|
+
lastModified: m.lastModified,
|
|
96
|
+
recommendedRuntimes: inferRuntimes(id, tags),
|
|
97
|
+
};
|
|
98
|
+
});
|
|
95
99
|
}
|
|
96
100
|
|
|
97
101
|
async getModelFiles(repoId) {
|
|
@@ -409,3 +413,41 @@ function classifyTier(params, quant) {
|
|
|
409
413
|
if (billions >= 10) return 'medium';
|
|
410
414
|
return 'light';
|
|
411
415
|
}
|
|
416
|
+
|
|
417
|
+
function inferRuntimes(repoId, tags) {
|
|
418
|
+
const lower = repoId.toLowerCase();
|
|
419
|
+
const tagSet = new Set(tags.map((t) => t.toLowerCase()));
|
|
420
|
+
const runtimes = new Set();
|
|
421
|
+
|
|
422
|
+
// GGUF → llama.cpp and (implicitly) Ollama
|
|
423
|
+
if (tagSet.has('gguf') || lower.includes('-gguf') || lower.includes('_gguf')) {
|
|
424
|
+
runtimes.add('llama.cpp');
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
// MLX-optimized models
|
|
428
|
+
if (tagSet.has('mlx') || lower.includes('-mlx') || lower.includes('_mlx')) {
|
|
429
|
+
runtimes.add('MLX');
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
// GPTQ / AWQ quantized → vLLM handles these well
|
|
433
|
+
if (tagSet.has('gptq') || tagSet.has('awq') || lower.includes('-gptq') || lower.includes('-awq')) {
|
|
434
|
+
runtimes.add('vLLM');
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
// SafeTensors / standard transformer weights → vLLM, TGI, MLX
|
|
438
|
+
if (tagSet.has('safetensors') || tagSet.has('transformers')) {
|
|
439
|
+
runtimes.add('vLLM');
|
|
440
|
+
runtimes.add('TGI');
|
|
441
|
+
if (!runtimes.has('MLX')) runtimes.add('MLX');
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
// If nothing matched, infer from general model traits
|
|
445
|
+
if (runtimes.size === 0) {
|
|
446
|
+
if (tagSet.has('pytorch') || tagSet.has('tf') || tagSet.has('jax')) {
|
|
447
|
+
runtimes.add('vLLM');
|
|
448
|
+
runtimes.add('TGI');
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
return [...runtimes];
|
|
453
|
+
}
|
|
@@ -713,6 +713,17 @@ export function registerProviderRoutes(app, daemon) {
|
|
|
713
713
|
res.json(daemon.llamaServer.getStatus());
|
|
714
714
|
});
|
|
715
715
|
|
|
716
|
+
app.post('/api/llama/install', async (req, res) => {
|
|
717
|
+
try {
|
|
718
|
+
const { LlamaServerManager } = await import('../llama-server.js');
|
|
719
|
+
const result = await LlamaServerManager.install();
|
|
720
|
+
daemon.modelLab.refreshInstalledTools();
|
|
721
|
+
res.json({ success: true, ...result });
|
|
722
|
+
} catch (err) {
|
|
723
|
+
res.status(500).json({ error: err.message });
|
|
724
|
+
}
|
|
725
|
+
});
|
|
726
|
+
|
|
716
727
|
app.get('/api/mlx/status', (req, res) => {
|
|
717
728
|
res.json(daemon.mlxServer.getStatus());
|
|
718
729
|
});
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
// GROOVE — Tool Executor for Local Agent Loop
|
|
2
2
|
// FSL-1.1-Apache-2.0 — see LICENSE
|
|
3
3
|
|
|
4
|
-
import { readFileSync, writeFileSync, readdirSync, statSync, mkdirSync, existsSync } from 'fs';
|
|
4
|
+
import { readFileSync, writeFileSync, readdirSync, statSync, mkdirSync, existsSync, openSync, readSync, closeSync } from 'fs';
|
|
5
5
|
import { execSync } from 'child_process';
|
|
6
6
|
import { resolve, relative, dirname, sep } from 'path';
|
|
7
7
|
import { minimatch } from 'minimatch';
|
|
@@ -190,11 +190,24 @@ export class ToolExecutor {
|
|
|
190
190
|
if (stat.isDirectory()) {
|
|
191
191
|
return { success: false, error: `Path is a directory, not a file: ${filePath}` };
|
|
192
192
|
}
|
|
193
|
-
// Guard against huge files
|
|
194
193
|
if (stat.size > 5 * 1024 * 1024) {
|
|
195
194
|
return { success: false, error: `File too large (${formatBytes(stat.size)}). Use offset/limit to read a section.` };
|
|
196
195
|
}
|
|
197
196
|
|
|
197
|
+
// Detect binary files — check first 8KB for null bytes
|
|
198
|
+
if (stat.size > 0) {
|
|
199
|
+
const probe = Buffer.alloc(Math.min(8192, stat.size));
|
|
200
|
+
const fd = openSync(resolved, 'r');
|
|
201
|
+
try {
|
|
202
|
+
readSync(fd, probe, 0, probe.length, 0);
|
|
203
|
+
} finally {
|
|
204
|
+
closeSync(fd);
|
|
205
|
+
}
|
|
206
|
+
if (probe.includes(0)) {
|
|
207
|
+
return { success: false, error: `Binary file (${formatBytes(stat.size)}). Cannot read non-text files.` };
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
198
211
|
const content = readFileSync(resolved, 'utf8');
|
|
199
212
|
let lines = content.split('\n');
|
|
200
213
|
const totalLines = lines.length;
|
|
@@ -207,8 +220,19 @@ export class ToolExecutor {
|
|
|
207
220
|
lines = lines.slice(0, limit);
|
|
208
221
|
}
|
|
209
222
|
|
|
223
|
+
// Auto-limit large files when no explicit limit was provided
|
|
224
|
+
const MAX_LINES = 2000;
|
|
225
|
+
let autoTruncated = false;
|
|
226
|
+
if (!limit && lines.length > MAX_LINES) {
|
|
227
|
+
lines = lines.slice(0, MAX_LINES);
|
|
228
|
+
autoTruncated = true;
|
|
229
|
+
}
|
|
230
|
+
|
|
210
231
|
const numbered = lines.map((line, i) => `${startLine + i}\t${line}`).join('\n');
|
|
211
|
-
|
|
232
|
+
const result = autoTruncated
|
|
233
|
+
? numbered + `\n\n... (showing ${MAX_LINES} of ${totalLines} lines — use offset/limit to read more)`
|
|
234
|
+
: numbered;
|
|
235
|
+
return { success: true, result, meta: { totalLines } };
|
|
212
236
|
}
|
|
213
237
|
|
|
214
238
|
writeFile({ path: filePath, content }) {
|