@sheepbun/yips 0.1.1 → 0.1.46
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +52 -0
- package/bin/yips.js +15 -0
- package/package.json +21 -128
- package/postinstall.js +50 -0
- package/dist/agent/commands/command-catalog.js +0 -243
- package/dist/agent/commands/commands.js +0 -418
- package/dist/agent/conductor.js +0 -118
- package/dist/agent/context/code-context.js +0 -68
- package/dist/agent/context/memory-store.js +0 -159
- package/dist/agent/context/session-store.js +0 -211
- package/dist/agent/protocol/tool-protocol.js +0 -160
- package/dist/agent/skills/skills.js +0 -327
- package/dist/agent/tools/tool-executor.js +0 -415
- package/dist/agent/tools/tool-safety.js +0 -52
- package/dist/app/index.js +0 -35
- package/dist/app/repl.js +0 -105
- package/dist/app/update-check.js +0 -132
- package/dist/app/version.js +0 -51
- package/dist/code-context.js +0 -68
- package/dist/colors.js +0 -204
- package/dist/command-catalog.js +0 -242
- package/dist/commands.js +0 -350
- package/dist/conductor.js +0 -94
- package/dist/config/config.js +0 -335
- package/dist/config/hooks.js +0 -187
- package/dist/config.js +0 -335
- package/dist/downloader-state.js +0 -302
- package/dist/downloader-ui.js +0 -289
- package/dist/gateway/adapters/discord.js +0 -108
- package/dist/gateway/adapters/formatting.js +0 -96
- package/dist/gateway/adapters/telegram.js +0 -106
- package/dist/gateway/adapters/types.js +0 -2
- package/dist/gateway/adapters/whatsapp.js +0 -124
- package/dist/gateway/auth-policy.js +0 -66
- package/dist/gateway/core.js +0 -87
- package/dist/gateway/headless-conductor.js +0 -328
- package/dist/gateway/message-router.js +0 -23
- package/dist/gateway/rate-limiter.js +0 -48
- package/dist/gateway/runtime/backend-policy.js +0 -18
- package/dist/gateway/runtime/discord-bot.js +0 -104
- package/dist/gateway/runtime/discord-main.js +0 -69
- package/dist/gateway/session-manager.js +0 -77
- package/dist/gateway/types.js +0 -2
- package/dist/hardware.js +0 -92
- package/dist/hooks.js +0 -187
- package/dist/index.js +0 -34
- package/dist/input-engine.js +0 -250
- package/dist/llama-client.js +0 -227
- package/dist/llama-server.js +0 -620
- package/dist/llm/llama-client.js +0 -227
- package/dist/llm/llama-server.js +0 -620
- package/dist/llm/token-counter.js +0 -47
- package/dist/memory-store.js +0 -159
- package/dist/messages.js +0 -59
- package/dist/model-downloader.js +0 -382
- package/dist/model-manager-state.js +0 -118
- package/dist/model-manager-ui.js +0 -194
- package/dist/model-manager.js +0 -190
- package/dist/models/hardware.js +0 -92
- package/dist/models/model-downloader.js +0 -382
- package/dist/models/model-manager.js +0 -190
- package/dist/prompt-box.js +0 -78
- package/dist/prompt-composer.js +0 -498
- package/dist/repl.js +0 -105
- package/dist/session-store.js +0 -211
- package/dist/spinner.js +0 -76
- package/dist/title-box.js +0 -388
- package/dist/token-counter.js +0 -47
- package/dist/tool-executor.js +0 -415
- package/dist/tool-protocol.js +0 -121
- package/dist/tool-safety.js +0 -52
- package/dist/tui/app.js +0 -2553
- package/dist/tui/startup.js +0 -56
- package/dist/tui-input-routing.js +0 -53
- package/dist/tui.js +0 -51
- package/dist/types/app-types.js +0 -2
- package/dist/types.js +0 -2
- package/dist/ui/colors.js +0 -204
- package/dist/ui/downloader/downloader-state.js +0 -302
- package/dist/ui/downloader/downloader-ui.js +0 -289
- package/dist/ui/input/input-engine.js +0 -250
- package/dist/ui/input/tui-input-routing.js +0 -53
- package/dist/ui/input/vt-session.js +0 -168
- package/dist/ui/messages.js +0 -59
- package/dist/ui/model-manager/model-manager-state.js +0 -118
- package/dist/ui/model-manager/model-manager-ui.js +0 -194
- package/dist/ui/prompt/prompt-box.js +0 -78
- package/dist/ui/prompt/prompt-composer.js +0 -498
- package/dist/ui/spinner.js +0 -76
- package/dist/ui/title-box.js +0 -388
- package/dist/ui/tui/app.js +0 -6
- package/dist/ui/tui/autocomplete.js +0 -85
- package/dist/ui/tui/constants.js +0 -18
- package/dist/ui/tui/history.js +0 -29
- package/dist/ui/tui/layout.js +0 -341
- package/dist/ui/tui/runtime-core.js +0 -2584
- package/dist/ui/tui/runtime-utils.js +0 -53
- package/dist/ui/tui/start-tui.js +0 -54
- package/dist/ui/tui/startup.js +0 -56
- package/dist/version.js +0 -51
- package/dist/vt-session.js +0 -168
- package/install.sh +0 -457
package/dist/llm/llama-server.js
DELETED
|
@@ -1,620 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.isLocalLlamaEndpoint = isLocalLlamaEndpoint;
|
|
4
|
-
exports.checkLlamaHealth = checkLlamaHealth;
|
|
5
|
-
exports.stopLlamaServer = stopLlamaServer;
|
|
6
|
-
exports.resetLlamaForFreshSession = resetLlamaForFreshSession;
|
|
7
|
-
exports.startLlamaServer = startLlamaServer;
|
|
8
|
-
exports.ensureLlamaReady = ensureLlamaReady;
|
|
9
|
-
exports.formatLlamaStartupFailure = formatLlamaStartupFailure;
|
|
10
|
-
const node_child_process_1 = require("node:child_process");
|
|
11
|
-
const node_fs_1 = require("node:fs");
|
|
12
|
-
const promises_1 = require("node:fs/promises");
|
|
13
|
-
const node_path_1 = require("node:path");
|
|
14
|
-
const hardware_1 = require("#models/hardware");
|
|
15
|
-
const HEALTH_PATH = "/health";
|
|
16
|
-
const STARTUP_TIMEOUT_MS = 60_000;
|
|
17
|
-
const HEALTH_TIMEOUT_MS = 2_000;
|
|
18
|
-
const HEALTH_RETRY_INTERVAL_MS = 500;
|
|
19
|
-
const STDERR_TAIL_LIMIT = 120;
|
|
20
|
-
let runningState = null;
|
|
21
|
-
const LOCAL_ENDPOINT_HOSTS = new Set([
|
|
22
|
-
"localhost",
|
|
23
|
-
"127.0.0.1",
|
|
24
|
-
"0.0.0.0",
|
|
25
|
-
"::1"
|
|
26
|
-
]);
|
|
27
|
-
function sleep(ms) {
|
|
28
|
-
return new Promise((resolveSleep) => {
|
|
29
|
-
setTimeout(resolveSleep, ms);
|
|
30
|
-
});
|
|
31
|
-
}
|
|
32
|
-
async function exists(path) {
|
|
33
|
-
try {
|
|
34
|
-
await (0, promises_1.access)(path, node_fs_1.constants.R_OK);
|
|
35
|
-
return true;
|
|
36
|
-
}
|
|
37
|
-
catch {
|
|
38
|
-
return false;
|
|
39
|
-
}
|
|
40
|
-
}
|
|
41
|
-
function toErrorMessage(error) {
|
|
42
|
-
return error instanceof Error ? error.message : String(error);
|
|
43
|
-
}
|
|
44
|
-
function resolveHost(config) {
|
|
45
|
-
const host = config.llamaHost.trim();
|
|
46
|
-
return host.length > 0 ? host : "127.0.0.1";
|
|
47
|
-
}
|
|
48
|
-
function resolvePort(config) {
|
|
49
|
-
return Number.isInteger(config.llamaPort) && config.llamaPort > 0 ? config.llamaPort : 8080;
|
|
50
|
-
}
|
|
51
|
-
function buildBaseUrl(config) {
|
|
52
|
-
const host = resolveHost(config);
|
|
53
|
-
const port = resolvePort(config);
|
|
54
|
-
const fallback = `http://${host}:${port}`;
|
|
55
|
-
const trimmed = config.llamaBaseUrl?.trim();
|
|
56
|
-
return trimmed.length > 0 ? trimmed.replace(/\/+$/, "") : fallback;
|
|
57
|
-
}
|
|
58
|
-
function isLocalHostname(hostname) {
|
|
59
|
-
const normalized = hostname.trim().toLowerCase().replace(/^\[(.*)\]$/u, "$1");
|
|
60
|
-
return LOCAL_ENDPOINT_HOSTS.has(normalized) || normalized.startsWith("127.");
|
|
61
|
-
}
|
|
62
|
-
function isLocalLlamaEndpoint(config) {
|
|
63
|
-
const baseUrl = buildBaseUrl(config);
|
|
64
|
-
try {
|
|
65
|
-
const parsed = new URL(baseUrl);
|
|
66
|
-
return isLocalHostname(parsed.hostname);
|
|
67
|
-
}
|
|
68
|
-
catch {
|
|
69
|
-
return false;
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
function buildDefaultDeps(overrides) {
|
|
73
|
-
return {
|
|
74
|
-
checkHealth: checkLlamaHealth,
|
|
75
|
-
inspectPortOwner: inspectPortOwner,
|
|
76
|
-
sleep,
|
|
77
|
-
spawnProcess: node_child_process_1.spawn,
|
|
78
|
-
sendSignal: (pid, signal) => {
|
|
79
|
-
process.kill(pid, signal);
|
|
80
|
-
},
|
|
81
|
-
isPidRunning: (pid) => {
|
|
82
|
-
try {
|
|
83
|
-
process.kill(pid, 0);
|
|
84
|
-
return true;
|
|
85
|
-
}
|
|
86
|
-
catch {
|
|
87
|
-
return false;
|
|
88
|
-
}
|
|
89
|
-
},
|
|
90
|
-
currentUid: () => {
|
|
91
|
-
return typeof process.getuid === "function" ? process.getuid() : null;
|
|
92
|
-
},
|
|
93
|
-
...overrides
|
|
94
|
-
};
|
|
95
|
-
}
|
|
96
|
-
async function resolveServerBinaryPath(config) {
|
|
97
|
-
const envPath = process.env["LLAMA_SERVER_PATH"]?.trim();
|
|
98
|
-
if (envPath && (await exists(envPath))) {
|
|
99
|
-
return envPath;
|
|
100
|
-
}
|
|
101
|
-
const configPath = config.llamaServerPath.trim();
|
|
102
|
-
if (configPath && (await exists(configPath))) {
|
|
103
|
-
return configPath;
|
|
104
|
-
}
|
|
105
|
-
const whichResult = (0, node_child_process_1.spawnSync)("which", ["llama-server"], {
|
|
106
|
-
encoding: "utf8",
|
|
107
|
-
stdio: ["ignore", "pipe", "ignore"]
|
|
108
|
-
});
|
|
109
|
-
if (whichResult.status === 0) {
|
|
110
|
-
const whichPath = String(whichResult.stdout ?? "").trim();
|
|
111
|
-
if (whichPath.length > 0 && (await exists(whichPath))) {
|
|
112
|
-
return whichPath;
|
|
113
|
-
}
|
|
114
|
-
}
|
|
115
|
-
const fallback = (0, node_path_1.resolve)(process.env["HOME"] ?? "~", "llama.cpp", "build", "bin", "llama-server");
|
|
116
|
-
if (await exists(fallback)) {
|
|
117
|
-
return fallback;
|
|
118
|
-
}
|
|
119
|
-
return null;
|
|
120
|
-
}
|
|
121
|
-
async function collectGgufPaths(root) {
|
|
122
|
-
const discovered = [];
|
|
123
|
-
async function walk(path) {
|
|
124
|
-
const entries = await (0, promises_1.readdir)(path, { withFileTypes: true });
|
|
125
|
-
for (const entry of entries) {
|
|
126
|
-
const fullPath = (0, node_path_1.join)(path, entry.name);
|
|
127
|
-
if (entry.isDirectory()) {
|
|
128
|
-
await walk(fullPath);
|
|
129
|
-
continue;
|
|
130
|
-
}
|
|
131
|
-
if (entry.isFile() && entry.name.toLowerCase().endsWith(".gguf")) {
|
|
132
|
-
discovered.push(fullPath);
|
|
133
|
-
}
|
|
134
|
-
}
|
|
135
|
-
}
|
|
136
|
-
try {
|
|
137
|
-
await walk(root);
|
|
138
|
-
}
|
|
139
|
-
catch {
|
|
140
|
-
return [];
|
|
141
|
-
}
|
|
142
|
-
return discovered;
|
|
143
|
-
}
|
|
144
|
-
async function resolveModelPath(config) {
|
|
145
|
-
const model = config.model.trim();
|
|
146
|
-
if (model.length === 0 || model.toLowerCase() === "default") {
|
|
147
|
-
return null;
|
|
148
|
-
}
|
|
149
|
-
if (await exists(model)) {
|
|
150
|
-
return (0, node_path_1.resolve)(model);
|
|
151
|
-
}
|
|
152
|
-
const fromModelsDir = (0, node_path_1.resolve)(config.llamaModelsDir, model);
|
|
153
|
-
if (await exists(fromModelsDir)) {
|
|
154
|
-
return fromModelsDir;
|
|
155
|
-
}
|
|
156
|
-
const candidates = await collectGgufPaths((0, node_path_1.resolve)(config.llamaModelsDir));
|
|
157
|
-
const found = candidates.find((candidate) => candidate.includes(model));
|
|
158
|
-
return found ?? null;
|
|
159
|
-
}
|
|
160
|
-
function getOptimalContextSize() {
|
|
161
|
-
const specs = (0, hardware_1.getSystemSpecs)();
|
|
162
|
-
const rawContext = Math.floor(specs.totalMemoryGb * 512);
|
|
163
|
-
const rounded = Math.floor(rawContext / 1024) * 1024;
|
|
164
|
-
return Math.max(2048, rounded);
|
|
165
|
-
}
|
|
166
|
-
function startFailure(kind, message, details, metadata) {
|
|
167
|
-
return {
|
|
168
|
-
started: false,
|
|
169
|
-
failure: {
|
|
170
|
-
kind,
|
|
171
|
-
message,
|
|
172
|
-
details,
|
|
173
|
-
...metadata
|
|
174
|
-
}
|
|
175
|
-
};
|
|
176
|
-
}
|
|
177
|
-
function parseProcNetListeners(procNetText, port) {
|
|
178
|
-
const inodes = [];
|
|
179
|
-
for (const line of procNetText.split(/\r?\n/).slice(1)) {
|
|
180
|
-
const parts = line.trim().split(/\s+/);
|
|
181
|
-
if (parts.length < 10) {
|
|
182
|
-
continue;
|
|
183
|
-
}
|
|
184
|
-
const localAddress = parts[1] ?? "";
|
|
185
|
-
const state = parts[3] ?? "";
|
|
186
|
-
const inode = parts[9] ?? "";
|
|
187
|
-
if (state !== "0A" || inode.length === 0) {
|
|
188
|
-
continue;
|
|
189
|
-
}
|
|
190
|
-
const addressParts = localAddress.split(":");
|
|
191
|
-
if (addressParts.length !== 2) {
|
|
192
|
-
continue;
|
|
193
|
-
}
|
|
194
|
-
const portHex = addressParts[1] ?? "";
|
|
195
|
-
const entryPort = Number.parseInt(portHex, 16);
|
|
196
|
-
if (entryPort === port) {
|
|
197
|
-
inodes.push(inode);
|
|
198
|
-
}
|
|
199
|
-
}
|
|
200
|
-
return inodes;
|
|
201
|
-
}
|
|
202
|
-
async function readProcessUid(pid) {
|
|
203
|
-
try {
|
|
204
|
-
const statusText = await (0, promises_1.readFile)(`/proc/${pid}/status`, "utf8");
|
|
205
|
-
const uidLine = statusText
|
|
206
|
-
.split(/\r?\n/)
|
|
207
|
-
.find((line) => line.startsWith("Uid:") || line.startsWith("Uid\t"));
|
|
208
|
-
if (!uidLine) {
|
|
209
|
-
return null;
|
|
210
|
-
}
|
|
211
|
-
const fields = uidLine.trim().split(/\s+/);
|
|
212
|
-
const uid = Number.parseInt(fields[1] ?? "", 10);
|
|
213
|
-
return Number.isInteger(uid) ? uid : null;
|
|
214
|
-
}
|
|
215
|
-
catch {
|
|
216
|
-
return null;
|
|
217
|
-
}
|
|
218
|
-
}
|
|
219
|
-
async function readProcessCommand(pid) {
|
|
220
|
-
try {
|
|
221
|
-
const cmdline = await (0, promises_1.readFile)(`/proc/${pid}/cmdline`, "utf8");
|
|
222
|
-
const text = cmdline.replace(/\0+/g, " ").trim();
|
|
223
|
-
if (text.length > 0) {
|
|
224
|
-
return text;
|
|
225
|
-
}
|
|
226
|
-
}
|
|
227
|
-
catch {
|
|
228
|
-
// noop
|
|
229
|
-
}
|
|
230
|
-
try {
|
|
231
|
-
return (await (0, promises_1.readFile)(`/proc/${pid}/comm`, "utf8")).trim();
|
|
232
|
-
}
|
|
233
|
-
catch {
|
|
234
|
-
return "unknown";
|
|
235
|
-
}
|
|
236
|
-
}
|
|
237
|
-
async function findPortOwnerByInode(inode) {
|
|
238
|
-
let processEntries;
|
|
239
|
-
try {
|
|
240
|
-
processEntries = await (0, promises_1.readdir)("/proc", { withFileTypes: true });
|
|
241
|
-
}
|
|
242
|
-
catch {
|
|
243
|
-
return null;
|
|
244
|
-
}
|
|
245
|
-
for (const entry of processEntries) {
|
|
246
|
-
if (!entry.isDirectory() || !/^\d+$/.test(entry.name)) {
|
|
247
|
-
continue;
|
|
248
|
-
}
|
|
249
|
-
const pid = Number.parseInt(entry.name, 10);
|
|
250
|
-
if (!Number.isInteger(pid) || pid <= 0) {
|
|
251
|
-
continue;
|
|
252
|
-
}
|
|
253
|
-
let fdEntries;
|
|
254
|
-
try {
|
|
255
|
-
fdEntries = await (0, promises_1.readdir)(`/proc/${pid}/fd`, { withFileTypes: true });
|
|
256
|
-
}
|
|
257
|
-
catch {
|
|
258
|
-
continue;
|
|
259
|
-
}
|
|
260
|
-
for (const fdEntry of fdEntries) {
|
|
261
|
-
const fdPath = `/proc/${pid}/fd/${fdEntry.name}`;
|
|
262
|
-
let target;
|
|
263
|
-
try {
|
|
264
|
-
target = await (0, promises_1.readlink)(fdPath);
|
|
265
|
-
}
|
|
266
|
-
catch {
|
|
267
|
-
continue;
|
|
268
|
-
}
|
|
269
|
-
if (target === `socket:[${inode}]`) {
|
|
270
|
-
return {
|
|
271
|
-
pid,
|
|
272
|
-
uid: await readProcessUid(pid),
|
|
273
|
-
command: await readProcessCommand(pid)
|
|
274
|
-
};
|
|
275
|
-
}
|
|
276
|
-
}
|
|
277
|
-
}
|
|
278
|
-
return null;
|
|
279
|
-
}
|
|
280
|
-
async function inspectPortOwner(_host, port) {
|
|
281
|
-
if (process.platform !== "linux") {
|
|
282
|
-
return null;
|
|
283
|
-
}
|
|
284
|
-
const inodeSet = new Set();
|
|
285
|
-
for (const path of ["/proc/net/tcp", "/proc/net/tcp6"]) {
|
|
286
|
-
try {
|
|
287
|
-
const content = await (0, promises_1.readFile)(path, "utf8");
|
|
288
|
-
for (const inode of parseProcNetListeners(content, port)) {
|
|
289
|
-
inodeSet.add(inode);
|
|
290
|
-
}
|
|
291
|
-
}
|
|
292
|
-
catch {
|
|
293
|
-
// noop
|
|
294
|
-
}
|
|
295
|
-
}
|
|
296
|
-
for (const inode of inodeSet) {
|
|
297
|
-
const owner = await findPortOwnerByInode(inode);
|
|
298
|
-
if (owner) {
|
|
299
|
-
return owner;
|
|
300
|
-
}
|
|
301
|
-
}
|
|
302
|
-
return null;
|
|
303
|
-
}
|
|
304
|
-
function isLlamaServerCommand(command) {
|
|
305
|
-
const normalized = command.toLowerCase();
|
|
306
|
-
return normalized.includes("llama-server") || normalized.includes("llama.cpp/build/bin/server");
|
|
307
|
-
}
|
|
308
|
-
async function tryTerminatePid(pid, deps) {
|
|
309
|
-
try {
|
|
310
|
-
deps.sendSignal(pid, "SIGTERM");
|
|
311
|
-
}
|
|
312
|
-
catch {
|
|
313
|
-
return !deps.isPidRunning(pid);
|
|
314
|
-
}
|
|
315
|
-
for (let i = 0; i < 6; i += 1) {
|
|
316
|
-
if (!deps.isPidRunning(pid)) {
|
|
317
|
-
return true;
|
|
318
|
-
}
|
|
319
|
-
await deps.sleep(500);
|
|
320
|
-
}
|
|
321
|
-
try {
|
|
322
|
-
deps.sendSignal(pid, "SIGKILL");
|
|
323
|
-
}
|
|
324
|
-
catch {
|
|
325
|
-
return !deps.isPidRunning(pid);
|
|
326
|
-
}
|
|
327
|
-
for (let i = 0; i < 4; i += 1) {
|
|
328
|
-
if (!deps.isPidRunning(pid)) {
|
|
329
|
-
return true;
|
|
330
|
-
}
|
|
331
|
-
await deps.sleep(250);
|
|
332
|
-
}
|
|
333
|
-
return !deps.isPidRunning(pid);
|
|
334
|
-
}
|
|
335
|
-
async function resolvePortConflict(config, context, deps) {
|
|
336
|
-
const owner = await deps.inspectPortOwner(context.host, context.port);
|
|
337
|
-
if (!owner) {
|
|
338
|
-
return null;
|
|
339
|
-
}
|
|
340
|
-
const ownerText = `PID ${owner.pid} (${owner.command})`;
|
|
341
|
-
const metadata = {
|
|
342
|
-
host: context.host,
|
|
343
|
-
port: context.port,
|
|
344
|
-
conflictPid: owner.pid,
|
|
345
|
-
conflictCommand: owner.command
|
|
346
|
-
};
|
|
347
|
-
const policy = config.llamaPortConflictPolicy;
|
|
348
|
-
if (policy === "fail") {
|
|
349
|
-
return startFailure("port-unavailable", `Configured llama.cpp port ${context.host}:${context.port} is already in use by ${ownerText}.`, ["Change llamaPort or stop the conflicting process."], metadata);
|
|
350
|
-
}
|
|
351
|
-
if (policy === "kill-llama" && !isLlamaServerCommand(owner.command)) {
|
|
352
|
-
return startFailure("port-unavailable", `Configured llama.cpp port ${context.host}:${context.port} is already in use by ${ownerText}.`, ["Port conflict policy is kill-llama, but the owner is not llama-server."], metadata);
|
|
353
|
-
}
|
|
354
|
-
if (policy === "kill-user") {
|
|
355
|
-
const currentUid = deps.currentUid();
|
|
356
|
-
if (currentUid === null || owner.uid === null || owner.uid !== currentUid) {
|
|
357
|
-
return startFailure("port-unavailable", `Configured llama.cpp port ${context.host}:${context.port} is already in use by ${ownerText}.`, ["Port conflict policy is kill-user, but the process is not owned by current user."], metadata);
|
|
358
|
-
}
|
|
359
|
-
}
|
|
360
|
-
const terminated = await tryTerminatePid(owner.pid, deps);
|
|
361
|
-
if (!terminated) {
|
|
362
|
-
return startFailure("port-unavailable", `Failed to free llama.cpp port ${context.host}:${context.port} from ${ownerText}.`, ["Tried SIGTERM then SIGKILL."], metadata);
|
|
363
|
-
}
|
|
364
|
-
const remainingOwner = await deps.inspectPortOwner(context.host, context.port);
|
|
365
|
-
if (remainingOwner) {
|
|
366
|
-
return startFailure("port-unavailable", `Configured llama.cpp port ${context.host}:${context.port} is still in use after terminating ${ownerText}.`, [
|
|
367
|
-
`Current owner: PID ${remainingOwner.pid} (${remainingOwner.command})`,
|
|
368
|
-
"Change llamaPort or stop the conflicting process manually."
|
|
369
|
-
], {
|
|
370
|
-
host: context.host,
|
|
371
|
-
port: context.port,
|
|
372
|
-
conflictPid: remainingOwner.pid,
|
|
373
|
-
conflictCommand: remainingOwner.command
|
|
374
|
-
});
|
|
375
|
-
}
|
|
376
|
-
return null;
|
|
377
|
-
}
|
|
378
|
-
function pushStderrLines(buffer, chunk, partial) {
|
|
379
|
-
partial.value += chunk;
|
|
380
|
-
const lines = partial.value.split(/\r?\n/);
|
|
381
|
-
partial.value = lines.pop() ?? "";
|
|
382
|
-
for (const line of lines) {
|
|
383
|
-
if (line.length === 0) {
|
|
384
|
-
continue;
|
|
385
|
-
}
|
|
386
|
-
buffer.push(line);
|
|
387
|
-
if (buffer.length > STDERR_TAIL_LIMIT) {
|
|
388
|
-
buffer.shift();
|
|
389
|
-
}
|
|
390
|
-
}
|
|
391
|
-
}
|
|
392
|
-
function flushStderrPartial(buffer, partial) {
|
|
393
|
-
const tail = partial.value.trim();
|
|
394
|
-
if (tail.length > 0) {
|
|
395
|
-
buffer.push(tail);
|
|
396
|
-
if (buffer.length > STDERR_TAIL_LIMIT) {
|
|
397
|
-
buffer.shift();
|
|
398
|
-
}
|
|
399
|
-
}
|
|
400
|
-
partial.value = "";
|
|
401
|
-
}
|
|
402
|
-
function isBindError(stderrLines) {
|
|
403
|
-
const text = stderrLines.join("\n").toLowerCase();
|
|
404
|
-
return (text.includes("couldn't bind") ||
|
|
405
|
-
text.includes("address already in use") ||
|
|
406
|
-
text.includes("http server socket") ||
|
|
407
|
-
text.includes("exiting due to http server error"));
|
|
408
|
-
}
|
|
409
|
-
function stderrTailDetails(stderrLines) {
|
|
410
|
-
if (stderrLines.length === 0) {
|
|
411
|
-
return [];
|
|
412
|
-
}
|
|
413
|
-
const tail = stderrLines.slice(-6);
|
|
414
|
-
return ["llama-server stderr (tail):", ...tail.map((line) => ` ${line}`)];
|
|
415
|
-
}
|
|
416
|
-
async function checkLlamaHealth(baseUrl, fetchImpl = fetch) {
|
|
417
|
-
const controller = new AbortController();
|
|
418
|
-
const timeout = setTimeout(() => controller.abort(), HEALTH_TIMEOUT_MS);
|
|
419
|
-
const endpoint = `${baseUrl.replace(/\/+$/, "")}${HEALTH_PATH}`;
|
|
420
|
-
try {
|
|
421
|
-
const response = await fetchImpl(endpoint, {
|
|
422
|
-
method: "GET",
|
|
423
|
-
signal: controller.signal
|
|
424
|
-
});
|
|
425
|
-
return response.ok;
|
|
426
|
-
}
|
|
427
|
-
catch {
|
|
428
|
-
return false;
|
|
429
|
-
}
|
|
430
|
-
finally {
|
|
431
|
-
clearTimeout(timeout);
|
|
432
|
-
}
|
|
433
|
-
}
|
|
434
|
-
async function killProcess(process) {
|
|
435
|
-
if (process.exitCode !== null) {
|
|
436
|
-
return;
|
|
437
|
-
}
|
|
438
|
-
process.kill("SIGTERM");
|
|
439
|
-
const started = Date.now();
|
|
440
|
-
while (process.exitCode === null && Date.now() - started < 5_000) {
|
|
441
|
-
await sleep(100);
|
|
442
|
-
}
|
|
443
|
-
if (process.exitCode === null) {
|
|
444
|
-
process.kill("SIGKILL");
|
|
445
|
-
}
|
|
446
|
-
}
|
|
447
|
-
async function stopLlamaServer() {
|
|
448
|
-
if (!runningState) {
|
|
449
|
-
return;
|
|
450
|
-
}
|
|
451
|
-
const state = runningState;
|
|
452
|
-
runningState = null;
|
|
453
|
-
await killProcess(state.process);
|
|
454
|
-
}
|
|
455
|
-
async function resetLlamaForFreshSession(config, overrides) {
|
|
456
|
-
if (!isLocalLlamaEndpoint(config)) {
|
|
457
|
-
return { started: false };
|
|
458
|
-
}
|
|
459
|
-
await stopLlamaServer();
|
|
460
|
-
return startLlamaServer(config, overrides);
|
|
461
|
-
}
|
|
462
|
-
async function createStartContext(config) {
|
|
463
|
-
const binaryPath = await resolveServerBinaryPath(config);
|
|
464
|
-
const baseUrl = buildBaseUrl(config);
|
|
465
|
-
const modelPath = await resolveModelPath(config);
|
|
466
|
-
const host = resolveHost(config);
|
|
467
|
-
const port = resolvePort(config);
|
|
468
|
-
if (!binaryPath) {
|
|
469
|
-
return startFailure("binary-not-found", "Could not locate llama-server binary.", [
|
|
470
|
-
"Set LLAMA_SERVER_PATH or config.llamaServerPath to your llama-server executable.",
|
|
471
|
-
"Try: which llama-server"
|
|
472
|
-
]);
|
|
473
|
-
}
|
|
474
|
-
if (!modelPath) {
|
|
475
|
-
return startFailure("model-not-found", `Could not resolve configured model '${config.model}'.`, [
|
|
476
|
-
`Checked models dir: ${config.llamaModelsDir}`,
|
|
477
|
-
"Pick a model with /model or update config.model."
|
|
478
|
-
]);
|
|
479
|
-
}
|
|
480
|
-
return {
|
|
481
|
-
binaryPath,
|
|
482
|
-
modelPath,
|
|
483
|
-
baseUrl,
|
|
484
|
-
host,
|
|
485
|
-
port
|
|
486
|
-
};
|
|
487
|
-
}
|
|
488
|
-
async function startLlamaServer(config, overrides) {
|
|
489
|
-
const deps = buildDefaultDeps(overrides);
|
|
490
|
-
const contextOrFailure = await createStartContext(config);
|
|
491
|
-
if ("started" in contextOrFailure) {
|
|
492
|
-
return contextOrFailure;
|
|
493
|
-
}
|
|
494
|
-
const context = contextOrFailure;
|
|
495
|
-
if (runningState && runningState.baseUrl === context.baseUrl) {
|
|
496
|
-
if (await deps.checkHealth(context.baseUrl)) {
|
|
497
|
-
return { started: false };
|
|
498
|
-
}
|
|
499
|
-
await stopLlamaServer();
|
|
500
|
-
}
|
|
501
|
-
const preStartConflict = await resolvePortConflict(config, context, deps);
|
|
502
|
-
if (preStartConflict) {
|
|
503
|
-
return preStartConflict;
|
|
504
|
-
}
|
|
505
|
-
const contextSize = config.llamaContextSize > 0 ? config.llamaContextSize : getOptimalContextSize();
|
|
506
|
-
const gpuLayers = config.llamaGpuLayers > 0 ? config.llamaGpuLayers : 999;
|
|
507
|
-
const args = [
|
|
508
|
-
"-m",
|
|
509
|
-
context.modelPath,
|
|
510
|
-
"-c",
|
|
511
|
-
String(contextSize),
|
|
512
|
-
"--host",
|
|
513
|
-
context.host,
|
|
514
|
-
"--port",
|
|
515
|
-
String(context.port),
|
|
516
|
-
"--embedding",
|
|
517
|
-
"-ngl",
|
|
518
|
-
String(gpuLayers)
|
|
519
|
-
];
|
|
520
|
-
let process;
|
|
521
|
-
const stderrLines = [];
|
|
522
|
-
const partial = { value: "" };
|
|
523
|
-
try {
|
|
524
|
-
process = deps.spawnProcess(context.binaryPath, args, {
|
|
525
|
-
stdio: ["ignore", "ignore", "pipe"]
|
|
526
|
-
});
|
|
527
|
-
process.stderr?.setEncoding("utf8");
|
|
528
|
-
process.stderr?.on("data", (chunk) => {
|
|
529
|
-
pushStderrLines(stderrLines, String(chunk), partial);
|
|
530
|
-
});
|
|
531
|
-
}
|
|
532
|
-
catch (error) {
|
|
533
|
-
return startFailure("start-failed", `Failed to start llama-server: ${toErrorMessage(error)}`, [
|
|
534
|
-
`Binary: ${context.binaryPath}`,
|
|
535
|
-
`Model: ${context.modelPath}`
|
|
536
|
-
]);
|
|
537
|
-
}
|
|
538
|
-
runningState = {
|
|
539
|
-
process,
|
|
540
|
-
baseUrl: context.baseUrl,
|
|
541
|
-
modelPath: context.modelPath
|
|
542
|
-
};
|
|
543
|
-
const started = Date.now();
|
|
544
|
-
while (Date.now() - started < STARTUP_TIMEOUT_MS) {
|
|
545
|
-
if (process.exitCode !== null) {
|
|
546
|
-
runningState = null;
|
|
547
|
-
flushStderrPartial(stderrLines, partial);
|
|
548
|
-
const bindFailure = isBindError(stderrLines);
|
|
549
|
-
const details = [
|
|
550
|
-
`Binary: ${context.binaryPath}`,
|
|
551
|
-
`Model: ${context.modelPath}`,
|
|
552
|
-
`Endpoint: ${context.host}:${context.port}`,
|
|
553
|
-
...stderrTailDetails(stderrLines)
|
|
554
|
-
];
|
|
555
|
-
if (bindFailure || process.exitCode === 98) {
|
|
556
|
-
return startFailure("port-unavailable", `llama-server could not bind ${context.host}:${context.port}.`, details, { host: context.host, port: context.port });
|
|
557
|
-
}
|
|
558
|
-
return startFailure("process-exited", `llama-server exited before becoming healthy (exit code ${String(process.exitCode)}).`, details, { host: context.host, port: context.port });
|
|
559
|
-
}
|
|
560
|
-
if (await deps.checkHealth(context.baseUrl)) {
|
|
561
|
-
return { started: true };
|
|
562
|
-
}
|
|
563
|
-
await deps.sleep(HEALTH_RETRY_INTERVAL_MS);
|
|
564
|
-
}
|
|
565
|
-
await stopLlamaServer();
|
|
566
|
-
return startFailure("health-timeout", "llama-server did not report healthy status before timeout.", [
|
|
567
|
-
`Endpoint: ${context.baseUrl}${HEALTH_PATH}`,
|
|
568
|
-
`Model: ${context.modelPath}`
|
|
569
|
-
]);
|
|
570
|
-
}
|
|
571
|
-
function failureFromResult(result) {
|
|
572
|
-
return (result.failure ?? {
|
|
573
|
-
kind: "start-failed",
|
|
574
|
-
message: "Failed to start llama-server.",
|
|
575
|
-
details: []
|
|
576
|
-
});
|
|
577
|
-
}
|
|
578
|
-
async function ensureLlamaReady(config, overrides) {
|
|
579
|
-
const deps = buildDefaultDeps(overrides);
|
|
580
|
-
const baseUrl = buildBaseUrl(config);
|
|
581
|
-
if (await deps.checkHealth(baseUrl)) {
|
|
582
|
-
return { ready: true, started: false };
|
|
583
|
-
}
|
|
584
|
-
if (!config.llamaAutoStart) {
|
|
585
|
-
return {
|
|
586
|
-
ready: false,
|
|
587
|
-
started: false,
|
|
588
|
-
failure: {
|
|
589
|
-
kind: "start-failed",
|
|
590
|
-
message: "llama.cpp is not reachable and auto-start is disabled.",
|
|
591
|
-
details: [`Expected endpoint: ${baseUrl}${HEALTH_PATH}`]
|
|
592
|
-
}
|
|
593
|
-
};
|
|
594
|
-
}
|
|
595
|
-
const started = await startLlamaServer(config, overrides);
|
|
596
|
-
if (started.failure) {
|
|
597
|
-
return {
|
|
598
|
-
ready: false,
|
|
599
|
-
started: false,
|
|
600
|
-
failure: failureFromResult(started)
|
|
601
|
-
};
|
|
602
|
-
}
|
|
603
|
-
return { ready: true, started: started.started };
|
|
604
|
-
}
|
|
605
|
-
function formatLlamaStartupFailure(failure, config) {
|
|
606
|
-
const lines = [`${failure.message}`];
|
|
607
|
-
if (failure.host && failure.port) {
|
|
608
|
-
lines.push(`- Endpoint: ${failure.host}:${failure.port}`);
|
|
609
|
-
}
|
|
610
|
-
if (failure.conflictPid && failure.conflictCommand) {
|
|
611
|
-
lines.push(`- Conflict: PID ${failure.conflictPid} (${failure.conflictCommand})`);
|
|
612
|
-
}
|
|
613
|
-
for (const detail of failure.details) {
|
|
614
|
-
lines.push(`- ${detail}`);
|
|
615
|
-
}
|
|
616
|
-
lines.push(`- Base URL: ${buildBaseUrl(config)}`);
|
|
617
|
-
lines.push("- Verify llama-server is installed: which llama-server");
|
|
618
|
-
lines.push(`- Verify model exists: ls ${(0, node_path_1.resolve)(config.llamaModelsDir, config.model)}`);
|
|
619
|
-
return lines.join("\n");
|
|
620
|
-
}
|
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.computeAutoMaxTokens = computeAutoMaxTokens;
|
|
4
|
-
exports.resolveEffectiveMaxTokens = resolveEffectiveMaxTokens;
|
|
5
|
-
exports.formatTitleTokenUsage = formatTitleTokenUsage;
|
|
6
|
-
exports.estimateConversationTokens = estimateConversationTokens;
|
|
7
|
-
function clamp(value, min, max) {
|
|
8
|
-
return Math.max(min, Math.min(max, value));
|
|
9
|
-
}
|
|
10
|
-
function computeAutoMaxTokens(input) {
|
|
11
|
-
const safeRamGb = Number.isFinite(input.ramGb) && input.ramGb > 0 ? input.ramGb : 0;
|
|
12
|
-
const safeModelBytes = Number.isFinite(input.modelSizeBytes) && input.modelSizeBytes > 0 ? input.modelSizeBytes : 0;
|
|
13
|
-
const modelSizeGb = safeModelBytes / 1024 ** 3;
|
|
14
|
-
const availableGb = Math.max(0, safeRamGb - modelSizeGb - 2);
|
|
15
|
-
const rawTokens = Math.floor(availableGb * 1500);
|
|
16
|
-
return clamp(rawTokens, 4096, 128000);
|
|
17
|
-
}
|
|
18
|
-
function resolveEffectiveMaxTokens(tokensMode, manualMax, autoMax) {
|
|
19
|
-
if (tokensMode === "manual") {
|
|
20
|
-
return Number.isFinite(manualMax) && manualMax > 0 ? Math.floor(manualMax) : autoMax;
|
|
21
|
-
}
|
|
22
|
-
return autoMax;
|
|
23
|
-
}
|
|
24
|
-
function formatTokenCount(value) {
|
|
25
|
-
if (value < 1000) {
|
|
26
|
-
return String(Math.floor(value));
|
|
27
|
-
}
|
|
28
|
-
const rounded = Number((value / 1000).toFixed(1));
|
|
29
|
-
const compact = Number.isInteger(rounded) ? String(rounded) : rounded.toFixed(1);
|
|
30
|
-
return `${compact}k`;
|
|
31
|
-
}
|
|
32
|
-
function formatTitleTokenUsage(usedTokens, maxTokens) {
|
|
33
|
-
const safeUsed = Number.isFinite(usedTokens) && usedTokens > 0 ? usedTokens : 0;
|
|
34
|
-
const safeMax = Number.isFinite(maxTokens) && maxTokens > 0 ? maxTokens : 0;
|
|
35
|
-
return `${formatTokenCount(safeUsed)}/${formatTokenCount(safeMax)} tks`;
|
|
36
|
-
}
|
|
37
|
-
function estimateConversationTokens(messages) {
|
|
38
|
-
let total = 0;
|
|
39
|
-
for (const message of messages) {
|
|
40
|
-
const chars = Array.from(message.content).length;
|
|
41
|
-
if (chars === 0) {
|
|
42
|
-
continue;
|
|
43
|
-
}
|
|
44
|
-
total += Math.max(1, Math.ceil(chars / 4));
|
|
45
|
-
}
|
|
46
|
-
return total;
|
|
47
|
-
}
|