grepmax 0.13.1 → 0.13.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/commands/llm.js +143 -0
- package/dist/config.js +3 -0
- package/dist/index.js +2 -0
- package/dist/lib/daemon/daemon.js +59 -11
- package/dist/lib/daemon/ipc-handler.js +7 -0
- package/dist/lib/llm/config.js +25 -0
- package/dist/lib/llm/server.js +261 -0
- package/package.json +1 -1
- package/plugins/grepmax/.claude-plugin/plugin.json +1 -1
- package/plugins/grepmax/skills/grepmax/SKILL.md +1 -1
package/README.md
CHANGED
|
@@ -164,15 +164,15 @@ gmax "query" [options]
|
|
|
164
164
|
|
|
165
165
|
## Background Daemon
|
|
166
166
|
|
|
167
|
-
A single daemon watches all registered projects via native OS file events (FSEvents/inotify). Changes are detected in sub-second and incrementally reindexed.
|
|
167
|
+
A single daemon watches all registered projects via native OS file events (FSEvents/inotify). Changes are detected in sub-second and incrementally reindexed. All writes to LanceDB are routed through the daemon via IPC, eliminating lock contention.
|
|
168
168
|
|
|
169
169
|
```bash
|
|
170
|
-
gmax watch --daemon -b # Start daemon
|
|
170
|
+
gmax watch --daemon -b # Start daemon manually
|
|
171
171
|
gmax watch stop # Stop daemon
|
|
172
172
|
gmax status # See all projects + watcher status
|
|
173
173
|
```
|
|
174
174
|
|
|
175
|
-
The daemon auto-starts
|
|
175
|
+
The daemon auto-starts when you run `gmax add`, `gmax index`, `gmax remove`, or `gmax summarize`. It shuts down after 30 minutes of inactivity.
|
|
176
176
|
|
|
177
177
|
## Architecture
|
|
178
178
|
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
36
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
37
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
38
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
39
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
40
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
41
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
42
|
+
});
|
|
43
|
+
};
|
|
44
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
45
|
+
exports.llm = void 0;
|
|
46
|
+
const path = __importStar(require("node:path"));
|
|
47
|
+
const commander_1 = require("commander");
|
|
48
|
+
const exit_1 = require("../lib/utils/exit");
|
|
49
|
+
function showStatus() {
|
|
50
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
51
|
+
const { isDaemonRunning, sendDaemonCommand } = yield Promise.resolve().then(() => __importStar(require("../lib/utils/daemon-client")));
|
|
52
|
+
if (!(yield isDaemonRunning())) {
|
|
53
|
+
console.log("LLM server: not running (daemon not started)");
|
|
54
|
+
return;
|
|
55
|
+
}
|
|
56
|
+
const resp = yield sendDaemonCommand({ cmd: "llm-status" });
|
|
57
|
+
if (!resp.ok) {
|
|
58
|
+
console.error("Failed to get LLM status:", resp.error);
|
|
59
|
+
process.exitCode = 1;
|
|
60
|
+
return;
|
|
61
|
+
}
|
|
62
|
+
if (resp.running) {
|
|
63
|
+
const model = path.basename(String(resp.model));
|
|
64
|
+
const uptime = Number(resp.uptime) || 0;
|
|
65
|
+
const mins = Math.floor(uptime / 60);
|
|
66
|
+
const secs = uptime % 60;
|
|
67
|
+
console.log(`LLM server: running (PID: ${resp.pid}, port: ${resp.port})`);
|
|
68
|
+
console.log(` Model: ${model}`);
|
|
69
|
+
console.log(` Uptime: ${mins}m ${secs}s`);
|
|
70
|
+
}
|
|
71
|
+
else {
|
|
72
|
+
console.log("LLM server: not running");
|
|
73
|
+
}
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
exports.llm = new commander_1.Command("llm")
|
|
77
|
+
.description("Manage the local LLM server (llama-server)")
|
|
78
|
+
.action(() => __awaiter(void 0, void 0, void 0, function* () {
|
|
79
|
+
try {
|
|
80
|
+
yield showStatus();
|
|
81
|
+
}
|
|
82
|
+
finally {
|
|
83
|
+
yield (0, exit_1.gracefulExit)();
|
|
84
|
+
}
|
|
85
|
+
}));
|
|
86
|
+
exports.llm
|
|
87
|
+
.command("start")
|
|
88
|
+
.description("Start the LLM server")
|
|
89
|
+
.action(() => __awaiter(void 0, void 0, void 0, function* () {
|
|
90
|
+
try {
|
|
91
|
+
const { ensureDaemonRunning, sendDaemonCommand } = yield Promise.resolve().then(() => __importStar(require("../lib/utils/daemon-client")));
|
|
92
|
+
if (!(yield ensureDaemonRunning())) {
|
|
93
|
+
console.error("Failed to start daemon");
|
|
94
|
+
process.exitCode = 1;
|
|
95
|
+
return;
|
|
96
|
+
}
|
|
97
|
+
console.log("Starting LLM server...");
|
|
98
|
+
const resp = yield sendDaemonCommand({ cmd: "llm-start" }, { timeoutMs: 90000 });
|
|
99
|
+
if (!resp.ok) {
|
|
100
|
+
console.error(`Failed: ${resp.error}`);
|
|
101
|
+
process.exitCode = 1;
|
|
102
|
+
return;
|
|
103
|
+
}
|
|
104
|
+
const model = path.basename(String(resp.model));
|
|
105
|
+
console.log(`LLM server ready (PID: ${resp.pid}, port: ${resp.port}, model: ${model})`);
|
|
106
|
+
}
|
|
107
|
+
finally {
|
|
108
|
+
yield (0, exit_1.gracefulExit)();
|
|
109
|
+
}
|
|
110
|
+
}));
|
|
111
|
+
exports.llm
|
|
112
|
+
.command("stop")
|
|
113
|
+
.description("Stop the LLM server")
|
|
114
|
+
.action(() => __awaiter(void 0, void 0, void 0, function* () {
|
|
115
|
+
try {
|
|
116
|
+
const { isDaemonRunning, sendDaemonCommand } = yield Promise.resolve().then(() => __importStar(require("../lib/utils/daemon-client")));
|
|
117
|
+
if (!(yield isDaemonRunning())) {
|
|
118
|
+
console.log("Daemon not running");
|
|
119
|
+
return;
|
|
120
|
+
}
|
|
121
|
+
const resp = yield sendDaemonCommand({ cmd: "llm-stop" });
|
|
122
|
+
if (!resp.ok) {
|
|
123
|
+
console.error(`Failed: ${resp.error}`);
|
|
124
|
+
process.exitCode = 1;
|
|
125
|
+
return;
|
|
126
|
+
}
|
|
127
|
+
console.log("LLM server stopped");
|
|
128
|
+
}
|
|
129
|
+
finally {
|
|
130
|
+
yield (0, exit_1.gracefulExit)();
|
|
131
|
+
}
|
|
132
|
+
}));
|
|
133
|
+
exports.llm
|
|
134
|
+
.command("status")
|
|
135
|
+
.description("Show LLM server status")
|
|
136
|
+
.action(() => __awaiter(void 0, void 0, void 0, function* () {
|
|
137
|
+
try {
|
|
138
|
+
yield showStatus();
|
|
139
|
+
}
|
|
140
|
+
finally {
|
|
141
|
+
yield (0, exit_1.gracefulExit)();
|
|
142
|
+
}
|
|
143
|
+
}));
|
package/dist/config.js
CHANGED
|
@@ -103,6 +103,9 @@ exports.PATHS = {
|
|
|
103
103
|
lmdbPath: path.join(GLOBAL_ROOT, "cache", "meta.lmdb"),
|
|
104
104
|
configPath: path.join(GLOBAL_ROOT, "config.json"),
|
|
105
105
|
lockDir: GLOBAL_ROOT,
|
|
106
|
+
// LLM server (llama-server)
|
|
107
|
+
llmPidFile: path.join(GLOBAL_ROOT, "llm-server.pid"),
|
|
108
|
+
llmLogFile: path.join(GLOBAL_ROOT, "logs", "llm-server.log"),
|
|
106
109
|
};
|
|
107
110
|
exports.MAX_FILE_SIZE_BYTES = 1024 * 1024 * 2; // 2MB limit for indexing
|
|
108
111
|
// Extensions we consider for indexing to avoid binary noise and improve relevance.
|
package/dist/index.js
CHANGED
|
@@ -50,6 +50,7 @@ const impact_1 = require("./commands/impact");
|
|
|
50
50
|
const droid_1 = require("./commands/droid");
|
|
51
51
|
const index_1 = require("./commands/index");
|
|
52
52
|
const list_1 = require("./commands/list");
|
|
53
|
+
const llm_1 = require("./commands/llm");
|
|
53
54
|
const mcp_1 = require("./commands/mcp");
|
|
54
55
|
const peek_1 = require("./commands/peek");
|
|
55
56
|
const project_1 = require("./commands/project");
|
|
@@ -110,6 +111,7 @@ commander_1.program.addCommand(serve_1.serve);
|
|
|
110
111
|
commander_1.program.addCommand(watch_1.watch);
|
|
111
112
|
commander_1.program.addCommand(mcp_1.mcp);
|
|
112
113
|
commander_1.program.addCommand(summarize_1.summarize);
|
|
114
|
+
commander_1.program.addCommand(llm_1.llm);
|
|
113
115
|
// Setup & diagnostics
|
|
114
116
|
commander_1.program.addCommand(setup_1.setup);
|
|
115
117
|
commander_1.program.addCommand(config_1.config);
|
|
@@ -60,6 +60,7 @@ const vector_db_1 = require("../store/vector-db");
|
|
|
60
60
|
const process_1 = require("../utils/process");
|
|
61
61
|
const project_registry_1 = require("../utils/project-registry");
|
|
62
62
|
const watcher_store_1 = require("../utils/watcher-store");
|
|
63
|
+
const server_1 = require("../llm/server");
|
|
63
64
|
const ipc_handler_1 = require("./ipc-handler");
|
|
64
65
|
const IDLE_TIMEOUT_MS = 30 * 60 * 1000; // 30 minutes
|
|
65
66
|
const HEARTBEAT_INTERVAL_MS = 60 * 1000;
|
|
@@ -78,6 +79,7 @@ class Daemon {
|
|
|
78
79
|
this.shuttingDown = false;
|
|
79
80
|
this.pendingOps = new Set();
|
|
80
81
|
this.projectLocks = new Map();
|
|
82
|
+
this.llmServer = null;
|
|
81
83
|
}
|
|
82
84
|
start() {
|
|
83
85
|
return __awaiter(this, void 0, void 0, function* () {
|
|
@@ -124,7 +126,9 @@ class Daemon {
|
|
|
124
126
|
console.error("[daemon] Failed to open shared resources:", err);
|
|
125
127
|
throw err;
|
|
126
128
|
}
|
|
127
|
-
// 6.
|
|
129
|
+
// 6. LLM server manager (constructed, not started — starts on first request)
|
|
130
|
+
this.llmServer = new server_1.LlmServer();
|
|
131
|
+
// 7. Register daemon (only after resources are open)
|
|
128
132
|
(0, watcher_store_1.registerDaemon)(process.pid);
|
|
129
133
|
// 7. Subscribe to all registered projects (skip missing directories)
|
|
130
134
|
const projects = (0, project_registry_1.listProjects)().filter((p) => p.status === "indexed");
|
|
@@ -496,9 +500,48 @@ class Daemon {
|
|
|
496
500
|
}));
|
|
497
501
|
});
|
|
498
502
|
}
|
|
503
|
+
// --- LLM server management ---
|
|
504
|
+
llmStart() {
|
|
505
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
506
|
+
if (!this.llmServer)
|
|
507
|
+
return { ok: false, error: "daemon not initialized" };
|
|
508
|
+
try {
|
|
509
|
+
yield this.llmServer.start();
|
|
510
|
+
this.resetActivity();
|
|
511
|
+
return Object.assign({ ok: true }, this.llmServer.getStatus());
|
|
512
|
+
}
|
|
513
|
+
catch (err) {
|
|
514
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
515
|
+
return { ok: false, error: msg };
|
|
516
|
+
}
|
|
517
|
+
});
|
|
518
|
+
}
|
|
519
|
+
llmStop() {
|
|
520
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
521
|
+
if (!this.llmServer)
|
|
522
|
+
return { ok: false, error: "daemon not initialized" };
|
|
523
|
+
try {
|
|
524
|
+
yield this.llmServer.stop();
|
|
525
|
+
return { ok: true };
|
|
526
|
+
}
|
|
527
|
+
catch (err) {
|
|
528
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
529
|
+
return { ok: false, error: msg };
|
|
530
|
+
}
|
|
531
|
+
});
|
|
532
|
+
}
|
|
533
|
+
llmStatus() {
|
|
534
|
+
if (!this.llmServer)
|
|
535
|
+
return { ok: false, error: "daemon not initialized" };
|
|
536
|
+
return Object.assign({ ok: true }, this.llmServer.getStatus());
|
|
537
|
+
}
|
|
538
|
+
llmTouch() {
|
|
539
|
+
var _a;
|
|
540
|
+
(_a = this.llmServer) === null || _a === void 0 ? void 0 : _a.touchIdle();
|
|
541
|
+
}
|
|
499
542
|
shutdown() {
|
|
500
543
|
return __awaiter(this, void 0, void 0, function* () {
|
|
501
|
-
var _a, _b, _c;
|
|
544
|
+
var _a, _b, _c, _d;
|
|
502
545
|
if (this.shuttingDown)
|
|
503
546
|
return;
|
|
504
547
|
this.shuttingDown = true;
|
|
@@ -511,29 +554,34 @@ class Daemon {
|
|
|
511
554
|
for (const processor of this.processors.values()) {
|
|
512
555
|
yield processor.close();
|
|
513
556
|
}
|
|
557
|
+
// Stop LLM server if running
|
|
558
|
+
try {
|
|
559
|
+
yield ((_a = this.llmServer) === null || _a === void 0 ? void 0 : _a.stop());
|
|
560
|
+
}
|
|
561
|
+
catch (_e) { }
|
|
514
562
|
// Unsubscribe all watchers
|
|
515
563
|
for (const sub of this.subscriptions.values()) {
|
|
516
564
|
try {
|
|
517
565
|
yield sub.unsubscribe();
|
|
518
566
|
}
|
|
519
|
-
catch (
|
|
567
|
+
catch (_f) { }
|
|
520
568
|
}
|
|
521
569
|
this.subscriptions.clear();
|
|
522
570
|
// Close server + socket + PID file + lock
|
|
523
|
-
(
|
|
571
|
+
(_b = this.server) === null || _b === void 0 ? void 0 : _b.close();
|
|
524
572
|
try {
|
|
525
573
|
fs.unlinkSync(config_1.PATHS.daemonSocket);
|
|
526
574
|
}
|
|
527
|
-
catch (
|
|
575
|
+
catch (_g) { }
|
|
528
576
|
try {
|
|
529
577
|
fs.unlinkSync(config_1.PATHS.daemonPidFile);
|
|
530
578
|
}
|
|
531
|
-
catch (
|
|
579
|
+
catch (_h) { }
|
|
532
580
|
if (this.releaseLock) {
|
|
533
581
|
try {
|
|
534
582
|
yield this.releaseLock();
|
|
535
583
|
}
|
|
536
|
-
catch (
|
|
584
|
+
catch (_j) { }
|
|
537
585
|
this.releaseLock = null;
|
|
538
586
|
}
|
|
539
587
|
// Unregister all
|
|
@@ -544,13 +592,13 @@ class Daemon {
|
|
|
544
592
|
this.processors.clear();
|
|
545
593
|
// Close shared resources
|
|
546
594
|
try {
|
|
547
|
-
yield ((
|
|
595
|
+
yield ((_c = this.metaCache) === null || _c === void 0 ? void 0 : _c.close());
|
|
548
596
|
}
|
|
549
|
-
catch (
|
|
597
|
+
catch (_k) { }
|
|
550
598
|
try {
|
|
551
|
-
yield ((
|
|
599
|
+
yield ((_d = this.vectorDb) === null || _d === void 0 ? void 0 : _d.close());
|
|
552
600
|
}
|
|
553
|
-
catch (
|
|
601
|
+
catch (_l) { }
|
|
554
602
|
console.log("[daemon] Shutdown complete");
|
|
555
603
|
});
|
|
556
604
|
}
|
|
@@ -101,6 +101,13 @@ function handleCommand(daemon, cmd, conn) {
|
|
|
101
101
|
});
|
|
102
102
|
return null;
|
|
103
103
|
}
|
|
104
|
+
// --- LLM server management ---
|
|
105
|
+
case "llm-start":
|
|
106
|
+
return yield daemon.llmStart();
|
|
107
|
+
case "llm-stop":
|
|
108
|
+
return yield daemon.llmStop();
|
|
109
|
+
case "llm-status":
|
|
110
|
+
return daemon.llmStatus();
|
|
104
111
|
default:
|
|
105
112
|
return { ok: false, error: `unknown command: ${cmd.cmd}` };
|
|
106
113
|
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.getLlmConfig = getLlmConfig;
|
|
4
|
+
const DEFAULT_MODEL = "/Volumes/External/models/huggingface/hub/models--unsloth--Qwen3.5-35B-A3B-GGUF/Qwen3.5-35B-A3B-Q4_K_M.gguf";
|
|
5
|
+
function envInt(key, fallback) {
|
|
6
|
+
const v = process.env[key];
|
|
7
|
+
if (!v)
|
|
8
|
+
return fallback;
|
|
9
|
+
const n = Number.parseInt(v, 10);
|
|
10
|
+
return Number.isFinite(n) && n > 0 ? n : fallback;
|
|
11
|
+
}
|
|
12
|
+
function getLlmConfig() {
|
|
13
|
+
var _a, _b, _c;
|
|
14
|
+
return {
|
|
15
|
+
model: (_a = process.env.GMAX_LLM_MODEL) !== null && _a !== void 0 ? _a : DEFAULT_MODEL,
|
|
16
|
+
binary: (_b = process.env.GMAX_LLM_BINARY) !== null && _b !== void 0 ? _b : "llama-server",
|
|
17
|
+
host: (_c = process.env.GMAX_LLM_HOST) !== null && _c !== void 0 ? _c : "127.0.0.1",
|
|
18
|
+
port: envInt("GMAX_LLM_PORT", 8079),
|
|
19
|
+
ctxSize: envInt("GMAX_LLM_CTX_SIZE", 16384),
|
|
20
|
+
ngl: envInt("GMAX_LLM_NGL", 99),
|
|
21
|
+
maxTokens: envInt("GMAX_LLM_MAX_TOKENS", 8192),
|
|
22
|
+
idleTimeoutMin: envInt("GMAX_LLM_IDLE_TIMEOUT", 30),
|
|
23
|
+
startupWaitSec: envInt("GMAX_LLM_STARTUP_WAIT", 60),
|
|
24
|
+
};
|
|
25
|
+
}
|
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
36
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
37
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
38
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
39
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
40
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
41
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
42
|
+
});
|
|
43
|
+
};
|
|
44
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
45
|
+
exports.LlmServer = void 0;
|
|
46
|
+
const node_child_process_1 = require("node:child_process");
|
|
47
|
+
const fs = __importStar(require("node:fs"));
|
|
48
|
+
const http = __importStar(require("node:http"));
|
|
49
|
+
const config_1 = require("../../config");
|
|
50
|
+
const log_rotate_1 = require("../utils/log-rotate");
|
|
51
|
+
const config_2 = require("./config");
|
|
52
|
+
const HEALTH_TIMEOUT_MS = 2000;
|
|
53
|
+
const POLL_INTERVAL_MS = 500;
|
|
54
|
+
const STOP_GRACE_MS = 5000;
|
|
55
|
+
const IDLE_CHECK_INTERVAL_MS = 5 * 60 * 1000;
|
|
56
|
+
class LlmServer {
|
|
57
|
+
constructor() {
|
|
58
|
+
this.lastRequestTime = 0;
|
|
59
|
+
this.startTime = 0;
|
|
60
|
+
this.idleTimer = null;
|
|
61
|
+
this.config = (0, config_2.getLlmConfig)();
|
|
62
|
+
}
|
|
63
|
+
/** HTTP GET /v1/models — returns true if llama-server is responding. */
|
|
64
|
+
healthy() {
|
|
65
|
+
return new Promise((resolve) => {
|
|
66
|
+
const req = http.get({
|
|
67
|
+
hostname: this.config.host,
|
|
68
|
+
port: this.config.port,
|
|
69
|
+
path: "/v1/models",
|
|
70
|
+
timeout: HEALTH_TIMEOUT_MS,
|
|
71
|
+
}, (res) => {
|
|
72
|
+
res.resume();
|
|
73
|
+
resolve(res.statusCode === 200);
|
|
74
|
+
});
|
|
75
|
+
req.on("error", () => resolve(false));
|
|
76
|
+
req.on("timeout", () => {
|
|
77
|
+
req.destroy();
|
|
78
|
+
resolve(false);
|
|
79
|
+
});
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
/** Start llama-server, poll until ready, start idle watchdog. */
|
|
83
|
+
start() {
|
|
84
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
85
|
+
if (yield this.healthy())
|
|
86
|
+
return;
|
|
87
|
+
// Validate binary
|
|
88
|
+
const binary = this.config.binary;
|
|
89
|
+
try {
|
|
90
|
+
(0, node_child_process_1.execSync)(`which ${binary}`, { stdio: "ignore" });
|
|
91
|
+
}
|
|
92
|
+
catch (_a) {
|
|
93
|
+
throw new Error(`llama-server binary not found: "${binary}". Install llama.cpp or set GMAX_LLM_BINARY`);
|
|
94
|
+
}
|
|
95
|
+
// Validate model file
|
|
96
|
+
if (!fs.existsSync(this.config.model)) {
|
|
97
|
+
throw new Error(`Model file not found: "${this.config.model}". Set GMAX_LLM_MODEL to a valid .gguf path`);
|
|
98
|
+
}
|
|
99
|
+
const logFd = (0, log_rotate_1.openRotatedLog)(config_1.PATHS.llmLogFile);
|
|
100
|
+
const child = (0, node_child_process_1.spawn)(binary, [
|
|
101
|
+
"-m", this.config.model,
|
|
102
|
+
"--host", this.config.host,
|
|
103
|
+
"--port", String(this.config.port),
|
|
104
|
+
"-ngl", String(this.config.ngl),
|
|
105
|
+
"--ctx-size", String(this.config.ctxSize),
|
|
106
|
+
], { detached: true, stdio: ["ignore", logFd, logFd] });
|
|
107
|
+
child.unref();
|
|
108
|
+
fs.closeSync(logFd);
|
|
109
|
+
const pid = child.pid;
|
|
110
|
+
if (!pid) {
|
|
111
|
+
throw new Error("Failed to spawn llama-server — no PID returned");
|
|
112
|
+
}
|
|
113
|
+
fs.writeFileSync(config_1.PATHS.llmPidFile, String(pid));
|
|
114
|
+
console.log(`[llm] Starting llama-server (PID: ${pid}, port: ${this.config.port})`);
|
|
115
|
+
// Poll until ready
|
|
116
|
+
const deadline = Date.now() + this.config.startupWaitSec * 1000;
|
|
117
|
+
while (Date.now() < deadline) {
|
|
118
|
+
yield new Promise((r) => setTimeout(r, POLL_INTERVAL_MS));
|
|
119
|
+
// Check if process died
|
|
120
|
+
try {
|
|
121
|
+
process.kill(pid, 0);
|
|
122
|
+
}
|
|
123
|
+
catch (_b) {
|
|
124
|
+
throw new Error(`llama-server process died during startup — check ${config_1.PATHS.llmLogFile}`);
|
|
125
|
+
}
|
|
126
|
+
if (yield this.healthy()) {
|
|
127
|
+
this.startTime = Date.now();
|
|
128
|
+
this.lastRequestTime = Date.now();
|
|
129
|
+
this.startIdleWatchdog();
|
|
130
|
+
console.log("[llm] Server ready");
|
|
131
|
+
return;
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
// Timeout — kill the process
|
|
135
|
+
try {
|
|
136
|
+
process.kill(pid, "SIGKILL");
|
|
137
|
+
}
|
|
138
|
+
catch (_c) { }
|
|
139
|
+
try {
|
|
140
|
+
fs.unlinkSync(config_1.PATHS.llmPidFile);
|
|
141
|
+
}
|
|
142
|
+
catch (_d) { }
|
|
143
|
+
throw new Error(`llama-server startup timed out after ${this.config.startupWaitSec}s — check ${config_1.PATHS.llmLogFile}`);
|
|
144
|
+
});
|
|
145
|
+
}
|
|
146
|
+
/** Stop llama-server gracefully (SIGTERM → wait → SIGKILL). */
|
|
147
|
+
stop() {
|
|
148
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
149
|
+
this.stopIdleWatchdog();
|
|
150
|
+
const pid = this.readPid();
|
|
151
|
+
if (!pid)
|
|
152
|
+
return;
|
|
153
|
+
// Check if alive
|
|
154
|
+
try {
|
|
155
|
+
process.kill(pid, 0);
|
|
156
|
+
}
|
|
157
|
+
catch (_a) {
|
|
158
|
+
this.cleanupPidFile();
|
|
159
|
+
return;
|
|
160
|
+
}
|
|
161
|
+
// SIGTERM
|
|
162
|
+
try {
|
|
163
|
+
process.kill(pid, "SIGTERM");
|
|
164
|
+
}
|
|
165
|
+
catch (_b) { }
|
|
166
|
+
// Wait up to 5s
|
|
167
|
+
const deadline = Date.now() + STOP_GRACE_MS;
|
|
168
|
+
while (Date.now() < deadline) {
|
|
169
|
+
yield new Promise((r) => setTimeout(r, POLL_INTERVAL_MS));
|
|
170
|
+
try {
|
|
171
|
+
process.kill(pid, 0);
|
|
172
|
+
}
|
|
173
|
+
catch (_c) {
|
|
174
|
+
// Process exited
|
|
175
|
+
this.cleanupPidFile();
|
|
176
|
+
console.log(`[llm] Server stopped (PID: ${pid})`);
|
|
177
|
+
return;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
// Force kill
|
|
181
|
+
try {
|
|
182
|
+
process.kill(pid, "SIGKILL");
|
|
183
|
+
}
|
|
184
|
+
catch (_d) { }
|
|
185
|
+
this.cleanupPidFile();
|
|
186
|
+
console.log(`[llm] Server force-killed (PID: ${pid})`);
|
|
187
|
+
});
|
|
188
|
+
}
|
|
189
|
+
/** Start if not running. */
|
|
190
|
+
ensure() {
|
|
191
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
192
|
+
if (yield this.healthy()) {
|
|
193
|
+
this.touchIdle();
|
|
194
|
+
return;
|
|
195
|
+
}
|
|
196
|
+
yield this.start();
|
|
197
|
+
});
|
|
198
|
+
}
|
|
199
|
+
/** Mark activity — resets idle timer. Called by inference endpoints. */
|
|
200
|
+
touchIdle() {
|
|
201
|
+
this.lastRequestTime = Date.now();
|
|
202
|
+
}
|
|
203
|
+
/** Get current status for IPC/CLI display. */
|
|
204
|
+
getStatus() {
|
|
205
|
+
const pid = this.readPid();
|
|
206
|
+
const alive = pid ? this.isAlive(pid) : false;
|
|
207
|
+
return {
|
|
208
|
+
running: alive,
|
|
209
|
+
pid: alive ? pid : null,
|
|
210
|
+
port: this.config.port,
|
|
211
|
+
model: this.config.model,
|
|
212
|
+
uptime: alive && this.startTime ? Math.floor((Date.now() - this.startTime) / 1000) : 0,
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
startIdleWatchdog() {
|
|
216
|
+
this.stopIdleWatchdog();
|
|
217
|
+
const timeoutMs = this.config.idleTimeoutMin * 60 * 1000;
|
|
218
|
+
this.idleTimer = setInterval(() => __awaiter(this, void 0, void 0, function* () {
|
|
219
|
+
if (this.lastRequestTime === 0)
|
|
220
|
+
return;
|
|
221
|
+
if (Date.now() - this.lastRequestTime > timeoutMs) {
|
|
222
|
+
console.log(`[llm] Server idle for ${this.config.idleTimeoutMin}min, shutting down`);
|
|
223
|
+
yield this.stop();
|
|
224
|
+
}
|
|
225
|
+
}), IDLE_CHECK_INTERVAL_MS);
|
|
226
|
+
this.idleTimer.unref();
|
|
227
|
+
}
|
|
228
|
+
stopIdleWatchdog() {
|
|
229
|
+
if (this.idleTimer) {
|
|
230
|
+
clearInterval(this.idleTimer);
|
|
231
|
+
this.idleTimer = null;
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
readPid() {
|
|
235
|
+
try {
|
|
236
|
+
const raw = fs.readFileSync(config_1.PATHS.llmPidFile, "utf-8").trim();
|
|
237
|
+
const pid = Number.parseInt(raw, 10);
|
|
238
|
+
return Number.isFinite(pid) && pid > 0 ? pid : null;
|
|
239
|
+
}
|
|
240
|
+
catch (_a) {
|
|
241
|
+
return null;
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
isAlive(pid) {
|
|
245
|
+
try {
|
|
246
|
+
process.kill(pid, 0);
|
|
247
|
+
return true;
|
|
248
|
+
}
|
|
249
|
+
catch (_a) {
|
|
250
|
+
return false;
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
cleanupPidFile() {
|
|
254
|
+
try {
|
|
255
|
+
fs.unlinkSync(config_1.PATHS.llmPidFile);
|
|
256
|
+
}
|
|
257
|
+
catch (_a) { }
|
|
258
|
+
this.startTime = 0;
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
exports.LlmServer = LlmServer;
|
package/package.json
CHANGED
|
@@ -24,7 +24,7 @@ Bash(gmax "auth handler" --role ORCHESTRATION --lang ts --agent -m 3)
|
|
|
24
24
|
|
|
25
25
|
## Project management
|
|
26
26
|
|
|
27
|
-
Projects must be added before search works:
|
|
27
|
+
Projects must be added before search works. These commands auto-start the daemon if not running:
|
|
28
28
|
|
|
29
29
|
```
|
|
30
30
|
gmax add # add + index current directory
|