grepmax 0.14.2 → 0.14.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/claude-code.js +9 -1
- package/dist/commands/doctor.js +58 -6
- package/dist/commands/mcp.js +16 -3
- package/dist/commands/status.js +3 -1
- package/dist/index.js +2 -2
- package/dist/lib/daemon/daemon.js +21 -5
- package/dist/lib/index/ignore-patterns.js +0 -1
- package/dist/lib/index/syncer.js +14 -1
- package/dist/lib/llm/server.js +25 -2
- package/dist/lib/store/vector-db.js +7 -0
- package/dist/lib/utils/daemon-client.js +1 -1
- package/dist/lib/utils/project-registry.js +3 -1
- package/dist/lib/workers/embeddings/mlx-client.js +30 -1
- package/dist/lib/workers/orchestrator.js +8 -2
- package/dist/lib/workers/pool.js +5 -3
- package/mlx-embed-server/server.py +12 -7
- package/package.json +1 -1
- package/plugins/grepmax/.claude-plugin/plugin.json +1 -1
- package/plugins/grepmax/hooks/start.js +52 -2
|
@@ -53,8 +53,16 @@ function runClaudeCommand(args) {
|
|
|
53
53
|
env: process.env,
|
|
54
54
|
stdio: "inherit",
|
|
55
55
|
});
|
|
56
|
-
|
|
56
|
+
const timeout = setTimeout(() => {
|
|
57
|
+
child.kill("SIGTERM");
|
|
58
|
+
reject(new Error("claude command timed out after 60s"));
|
|
59
|
+
}, 60000);
|
|
60
|
+
child.on("error", (error) => {
|
|
61
|
+
clearTimeout(timeout);
|
|
62
|
+
reject(error);
|
|
63
|
+
});
|
|
57
64
|
child.on("exit", (code) => {
|
|
65
|
+
clearTimeout(timeout);
|
|
58
66
|
if (code === 0) {
|
|
59
67
|
resolve();
|
|
60
68
|
}
|
package/dist/commands/doctor.js
CHANGED
|
@@ -85,7 +85,7 @@ exports.doctor = new commander_1.Command("doctor")
|
|
|
85
85
|
.option("--fix", "Auto-fix detected issues (compact, prune, remove stale locks)", false)
|
|
86
86
|
.option("--agent", "Compact output for AI agents", false)
|
|
87
87
|
.action((opts) => __awaiter(void 0, void 0, void 0, function* () {
|
|
88
|
-
var _a;
|
|
88
|
+
var _a, _b, _c, _d;
|
|
89
89
|
if (!opts.agent)
|
|
90
90
|
console.log("gmax Doctor\n");
|
|
91
91
|
const root = config_1.PATHS.globalRoot;
|
|
@@ -103,6 +103,9 @@ exports.doctor = new commander_1.Command("doctor")
|
|
|
103
103
|
}
|
|
104
104
|
const globalConfig = (0, index_config_1.readGlobalConfig)();
|
|
105
105
|
const tier = (_a = config_1.MODEL_TIERS[globalConfig.modelTier]) !== null && _a !== void 0 ? _a : config_1.MODEL_TIERS.small;
|
|
106
|
+
if (!config_1.MODEL_TIERS[globalConfig.modelTier]) {
|
|
107
|
+
console.log(`WARN Unknown model tier '${globalConfig.modelTier}', falling back to 'small'`);
|
|
108
|
+
}
|
|
106
109
|
const embedModel = globalConfig.embedMode === "gpu" ? tier.mlxModel : tier.onnxModel;
|
|
107
110
|
if (!opts.agent) {
|
|
108
111
|
console.log(`\nEmbed mode: ${globalConfig.embedMode} | Model tier: ${globalConfig.modelTier} (${tier.vectorDim}d)`);
|
|
@@ -125,10 +128,39 @@ exports.doctor = new commander_1.Command("doctor")
|
|
|
125
128
|
console.log(`INFO No index found in current directory (run 'gmax index' to create one)`);
|
|
126
129
|
}
|
|
127
130
|
// Check MLX embed server
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
131
|
+
let embedUp = false;
|
|
132
|
+
let embedError = "";
|
|
133
|
+
try {
|
|
134
|
+
const res = yield fetch("http://127.0.0.1:8100/health");
|
|
135
|
+
embedUp = res.ok;
|
|
136
|
+
}
|
|
137
|
+
catch (err) {
|
|
138
|
+
embedError = err.code === "ECONNREFUSED" ? "connection refused" : (err.message || String(err));
|
|
139
|
+
}
|
|
140
|
+
console.log(`${embedUp ? "ok" : "WARN"} MLX Embed: ${embedUp ? "running (port 8100)" : `not running${embedError ? ` (${embedError})` : ""}`}`);
|
|
141
|
+
if (embedUp) {
|
|
142
|
+
try {
|
|
143
|
+
const start = Date.now();
|
|
144
|
+
const embedRes = yield fetch("http://127.0.0.1:8100/embed", {
|
|
145
|
+
method: "POST",
|
|
146
|
+
headers: { "Content-Type": "application/json" },
|
|
147
|
+
body: JSON.stringify({ texts: ["gmax health check"] }),
|
|
148
|
+
});
|
|
149
|
+
const embedData = yield embedRes.json();
|
|
150
|
+
const dim = (_d = (_c = (_b = embedData === null || embedData === void 0 ? void 0 : embedData.vectors) === null || _b === void 0 ? void 0 : _b[0]) === null || _c === void 0 ? void 0 : _c.length) !== null && _d !== void 0 ? _d : 0;
|
|
151
|
+
const ms = Date.now() - start;
|
|
152
|
+
const expectedDim = tier.vectorDim || 384;
|
|
153
|
+
if (dim === expectedDim) {
|
|
154
|
+
console.log(`ok Embedding: working (${dim}d, ${ms}ms)`);
|
|
155
|
+
}
|
|
156
|
+
else {
|
|
157
|
+
console.log(`FAIL Embedding: wrong dimensions (got ${dim}, expected ${expectedDim})`);
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
catch (err) {
|
|
161
|
+
console.log(`FAIL Embedding: test failed (${err.message || err})`);
|
|
162
|
+
}
|
|
163
|
+
}
|
|
132
164
|
// Check summarizer server
|
|
133
165
|
const summarizerUp = yield fetch("http://127.0.0.1:8101/health")
|
|
134
166
|
.then((r) => r.ok)
|
|
@@ -256,6 +288,26 @@ exports.doctor = new commander_1.Command("doctor")
|
|
|
256
288
|
else if (projects.length > 0) {
|
|
257
289
|
console.log(`ok Projects: ${projects.length} registered, all directories exist`);
|
|
258
290
|
}
|
|
291
|
+
// Cache Coherence
|
|
292
|
+
if (projects.length > 0) {
|
|
293
|
+
console.log("\nCache Coherence\n");
|
|
294
|
+
try {
|
|
295
|
+
const { MetaCache } = yield Promise.resolve().then(() => __importStar(require("../lib/store/meta-cache")));
|
|
296
|
+
const mc = new MetaCache(config_1.PATHS.lmdbPath);
|
|
297
|
+
for (const project of projects.filter(p => p.status === "indexed")) {
|
|
298
|
+
const prefix = project.root.endsWith("/") ? project.root : `${project.root}/`;
|
|
299
|
+
const cachedCount = (yield mc.getKeysWithPrefix(prefix)).size;
|
|
300
|
+
const vectorCount = yield db.countDistinctFilesForPath(prefix);
|
|
301
|
+
if (cachedCount > 0) {
|
|
302
|
+
const pct = Math.round((vectorCount / cachedCount) * 100);
|
|
303
|
+
const status = pct >= 80 ? "ok" : "WARN";
|
|
304
|
+
console.log(`${status} ${project.name || path.basename(project.root)}: ${vectorCount} indexed / ${cachedCount} cached (${pct}%)`);
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
yield mc.close();
|
|
308
|
+
}
|
|
309
|
+
catch (_e) { }
|
|
310
|
+
}
|
|
259
311
|
}
|
|
260
312
|
// --fix auto-remediation
|
|
261
313
|
if (opts.fix) {
|
|
@@ -291,7 +343,7 @@ exports.doctor = new commander_1.Command("doctor")
|
|
|
291
343
|
}
|
|
292
344
|
yield db.close();
|
|
293
345
|
}
|
|
294
|
-
catch (
|
|
346
|
+
catch (_f) {
|
|
295
347
|
if (opts.agent) {
|
|
296
348
|
console.log("index_health\terror=could_not_check");
|
|
297
349
|
}
|
package/dist/commands/mcp.js
CHANGED
|
@@ -462,7 +462,18 @@ exports.mcp = new commander_1.Command("mcp")
|
|
|
462
462
|
_indexChildPid = (_a = child.pid) !== null && _a !== void 0 ? _a : null;
|
|
463
463
|
child.unref();
|
|
464
464
|
_indexProgress = `PID ${_indexChildPid}`;
|
|
465
|
+
const indexTimeout = setTimeout(() => {
|
|
466
|
+
try {
|
|
467
|
+
child.kill("SIGKILL");
|
|
468
|
+
}
|
|
469
|
+
catch (_a) { }
|
|
470
|
+
_indexing = false;
|
|
471
|
+
_indexProgress = "";
|
|
472
|
+
_indexChildPid = null;
|
|
473
|
+
console.error("[MCP] Background indexing timed out after 30 minutes");
|
|
474
|
+
}, 30 * 60 * 1000);
|
|
465
475
|
child.on("exit", (code) => {
|
|
476
|
+
clearTimeout(indexTimeout);
|
|
466
477
|
_indexing = false;
|
|
467
478
|
_indexProgress = "";
|
|
468
479
|
_indexChildPid = null;
|
|
@@ -503,6 +514,11 @@ exports.mcp = new commander_1.Command("mcp")
|
|
|
503
514
|
if (_indexing) {
|
|
504
515
|
return ok(`Indexing in progress (${_indexProgress}). Results may be incomplete or empty — try again shortly.`);
|
|
505
516
|
}
|
|
517
|
+
// Check if project is pending or has no chunks
|
|
518
|
+
const proj = (0, project_registry_1.getProject)(projectRoot);
|
|
519
|
+
if ((proj === null || proj === void 0 ? void 0 : proj.status) === "pending" || (proj && proj.chunkCount === 0)) {
|
|
520
|
+
return err("Project not indexed yet. Run `gmax add` to index it first.");
|
|
521
|
+
}
|
|
506
522
|
try {
|
|
507
523
|
const searcher = getSearcher();
|
|
508
524
|
// Determine path prefix and display root for relative paths
|
|
@@ -1864,9 +1880,6 @@ exports.mcp = new commander_1.Command("mcp")
|
|
|
1864
1880
|
case "semantic_search":
|
|
1865
1881
|
result = yield handleSemanticSearch(toolArgs, false);
|
|
1866
1882
|
break;
|
|
1867
|
-
case "search_all":
|
|
1868
|
-
result = yield handleSemanticSearch(toolArgs, true);
|
|
1869
|
-
break;
|
|
1870
1883
|
case "code_skeleton":
|
|
1871
1884
|
result = yield handleCodeSkeleton(toolArgs);
|
|
1872
1885
|
break;
|
package/dist/commands/status.js
CHANGED
|
@@ -125,7 +125,9 @@ Examples:
|
|
|
125
125
|
}
|
|
126
126
|
yield db.close();
|
|
127
127
|
}
|
|
128
|
-
catch (
|
|
128
|
+
catch (err) {
|
|
129
|
+
console.warn(`[status] Failed to query LanceDB for live chunk counts, using cached counts`);
|
|
130
|
+
}
|
|
129
131
|
if (projects.length === 0) {
|
|
130
132
|
if (opts.agent) {
|
|
131
133
|
console.log("(none)");
|
package/dist/index.js
CHANGED
|
@@ -79,8 +79,8 @@ commander_1.program
|
|
|
79
79
|
encoding: "utf-8",
|
|
80
80
|
})).version)
|
|
81
81
|
.option("--store <string>", "The store to use (auto-detected if not specified)", process.env.GMAX_STORE || undefined);
|
|
82
|
-
// Detect legacy per-project .gmax/
|
|
83
|
-
const legacyProjectData = [".gmax"
|
|
82
|
+
// Detect legacy per-project .gmax/ directory
|
|
83
|
+
const legacyProjectData = [".gmax"]
|
|
84
84
|
.map((d) => path.join(process.cwd(), d))
|
|
85
85
|
.find((d) => fs.existsSync(path.join(d, "lancedb")));
|
|
86
86
|
if (legacyProjectData) {
|
|
@@ -125,8 +125,10 @@ class Daemon {
|
|
|
125
125
|
try {
|
|
126
126
|
fs.mkdirSync(config_1.PATHS.cacheDir, { recursive: true });
|
|
127
127
|
fs.mkdirSync(config_1.PATHS.lancedbDir, { recursive: true });
|
|
128
|
+
console.log("[daemon] Opening LanceDB:", config_1.PATHS.lancedbDir);
|
|
128
129
|
this.vectorDb = new vector_db_1.VectorDB(config_1.PATHS.lancedbDir);
|
|
129
130
|
this.vectorDb.startMaintenanceLoop();
|
|
131
|
+
console.log("[daemon] Opening MetaCache:", config_1.PATHS.lmdbPath);
|
|
130
132
|
this.metaCache = new meta_cache_1.MetaCache(config_1.PATHS.lmdbPath);
|
|
131
133
|
}
|
|
132
134
|
catch (err) {
|
|
@@ -175,6 +177,10 @@ class Daemon {
|
|
|
175
177
|
let buf = "";
|
|
176
178
|
conn.on("data", (chunk) => {
|
|
177
179
|
buf += chunk.toString();
|
|
180
|
+
if (buf.length > 1000000) {
|
|
181
|
+
conn.destroy();
|
|
182
|
+
return;
|
|
183
|
+
}
|
|
178
184
|
const nl = buf.indexOf("\n");
|
|
179
185
|
if (nl === -1)
|
|
180
186
|
return;
|
|
@@ -231,12 +237,19 @@ class Daemon {
|
|
|
231
237
|
projectRoot: root,
|
|
232
238
|
vectorDb: this.vectorDb,
|
|
233
239
|
metaCache: this.metaCache,
|
|
234
|
-
onReindex: (files, ms) => {
|
|
240
|
+
onReindex: (files, ms) => __awaiter(this, void 0, void 0, function* () {
|
|
235
241
|
console.log(`[daemon:${path.basename(root)}] Reindexed ${files} file${files !== 1 ? "s" : ""} (${(ms / 1000).toFixed(1)}s)`);
|
|
236
242
|
// Update project registry so gmax status shows fresh data
|
|
237
243
|
const proj = (0, project_registry_1.getProject)(root);
|
|
238
244
|
if (proj) {
|
|
239
|
-
|
|
245
|
+
let chunkCount = proj.chunkCount;
|
|
246
|
+
try {
|
|
247
|
+
chunkCount = yield this.vectorDb.countRowsForPath(root);
|
|
248
|
+
}
|
|
249
|
+
catch (err) {
|
|
250
|
+
console.warn(`[daemon:${path.basename(root)}] Failed to query chunk count: ${err}`);
|
|
251
|
+
}
|
|
252
|
+
(0, project_registry_1.registerProject)(Object.assign(Object.assign({}, proj), { lastIndexed: new Date().toISOString(), chunkCount }));
|
|
240
253
|
}
|
|
241
254
|
// Back to watching after batch completes
|
|
242
255
|
(0, watcher_store_1.registerWatcher)({
|
|
@@ -247,7 +260,7 @@ class Daemon {
|
|
|
247
260
|
lastHeartbeat: Date.now(),
|
|
248
261
|
lastReindex: Date.now(),
|
|
249
262
|
});
|
|
250
|
-
},
|
|
263
|
+
}),
|
|
251
264
|
onActivity: () => {
|
|
252
265
|
this.lastActivity = Date.now();
|
|
253
266
|
// Mark as syncing while processing
|
|
@@ -292,7 +305,7 @@ class Daemon {
|
|
|
292
305
|
return __awaiter(this, void 0, void 0, function* () {
|
|
293
306
|
var _a, e_1, _b, _c;
|
|
294
307
|
const { walk } = yield Promise.resolve().then(() => __importStar(require("../index/walker")));
|
|
295
|
-
const { INDEXABLE_EXTENSIONS } = yield Promise.resolve().then(() => __importStar(require("../../config")));
|
|
308
|
+
const { INDEXABLE_EXTENSIONS, MAX_FILE_SIZE_BYTES } = yield Promise.resolve().then(() => __importStar(require("../../config")));
|
|
296
309
|
const { isFileCached } = yield Promise.resolve().then(() => __importStar(require("../utils/cache-check")));
|
|
297
310
|
const rootPrefix = root.endsWith("/") ? root : `${root}/`;
|
|
298
311
|
const cachedPaths = yield this.metaCache.getKeysWithPrefix(rootPrefix);
|
|
@@ -300,7 +313,7 @@ class Daemon {
|
|
|
300
313
|
let queued = 0;
|
|
301
314
|
try {
|
|
302
315
|
for (var _d = true, _e = __asyncValues(walk(root, {
|
|
303
|
-
additionalPatterns: ["**/.git/**", "**/.gmax/**"
|
|
316
|
+
additionalPatterns: ["**/.git/**", "**/.gmax/**"],
|
|
304
317
|
})), _f; _f = yield _e.next(), _a = _f.done, !_a; _d = true) {
|
|
305
318
|
_c = _f.value;
|
|
306
319
|
_d = false;
|
|
@@ -313,6 +326,9 @@ class Daemon {
|
|
|
313
326
|
seenPaths.add(absPath);
|
|
314
327
|
try {
|
|
315
328
|
const stats = yield fs.promises.stat(absPath);
|
|
329
|
+
// Skip files that are too large or empty — they'll never be indexed
|
|
330
|
+
if (stats.size === 0 || stats.size > MAX_FILE_SIZE_BYTES)
|
|
331
|
+
continue;
|
|
316
332
|
const cached = this.metaCache.get(absPath);
|
|
317
333
|
if (!isFileCached(cached, stats)) {
|
|
318
334
|
processor.handleFileEvent("change", absPath);
|
package/dist/lib/index/syncer.js
CHANGED
|
@@ -239,6 +239,10 @@ function initialSync(options) {
|
|
|
239
239
|
// timeouts wrote MetaCache but not vectors, compaction failure, etc.).
|
|
240
240
|
// Clear the stale cache entries so those files get re-embedded.
|
|
241
241
|
const vectorFileCount = yield vectorDb.countDistinctFilesForPath(rootPrefix);
|
|
242
|
+
if (projectKeys.size > 0) {
|
|
243
|
+
const pct = Math.round((vectorFileCount / projectKeys.size) * 100);
|
|
244
|
+
(0, logger_1.log)("index", `Coherence: ${vectorFileCount} vectors / ${projectKeys.size} cached (${pct}%)`);
|
|
245
|
+
}
|
|
242
246
|
if (projectKeys.size > 0 && vectorFileCount === 0) {
|
|
243
247
|
(0, logger_1.log)("index", `Stale cache detected: ${projectKeys.size} cached files but no vectors — clearing cache`);
|
|
244
248
|
for (const key of projectKeys) {
|
|
@@ -272,6 +276,15 @@ function initialSync(options) {
|
|
|
272
276
|
let total = 0;
|
|
273
277
|
onProgress === null || onProgress === void 0 ? void 0 : onProgress({ processed: 0, indexed: 0, total, filePath: "Scanning..." });
|
|
274
278
|
const pool = (0, pool_1.getWorkerPool)();
|
|
279
|
+
// Pre-flight: verify embedding pipeline is functional
|
|
280
|
+
const embedMode = process.env.GMAX_EMBED_MODE || "auto";
|
|
281
|
+
if (embedMode !== "cpu") {
|
|
282
|
+
const { isMlxUp } = yield Promise.resolve().then(() => __importStar(require("../workers/embeddings/mlx-client")));
|
|
283
|
+
const mlxReady = yield isMlxUp();
|
|
284
|
+
if (!mlxReady) {
|
|
285
|
+
(0, logger_1.log)("index", "WARNING: MLX embed server not running — using CPU embeddings (slower)");
|
|
286
|
+
}
|
|
287
|
+
}
|
|
275
288
|
// Get only this project's cached paths (scoped by prefix)
|
|
276
289
|
const cachedPaths = dryRun || treatAsEmptyCache
|
|
277
290
|
? new Set()
|
|
@@ -362,7 +375,7 @@ function initialSync(options) {
|
|
|
362
375
|
});
|
|
363
376
|
try {
|
|
364
377
|
for (var _e = true, _f = __asyncValues((0, walker_1.walk)(paths.root, {
|
|
365
|
-
additionalPatterns: ["**/.git/**", "**/.gmax/**"
|
|
378
|
+
additionalPatterns: ["**/.git/**", "**/.gmax/**"],
|
|
366
379
|
})), _g; _g = yield _f.next(), _a = _g.done, !_a; _e = true) {
|
|
367
380
|
_c = _g.value;
|
|
368
381
|
_e = false;
|
package/dist/lib/llm/server.js
CHANGED
|
@@ -46,6 +46,7 @@ exports.LlmServer = void 0;
|
|
|
46
46
|
const node_child_process_1 = require("node:child_process");
|
|
47
47
|
const fs = __importStar(require("node:fs"));
|
|
48
48
|
const http = __importStar(require("node:http"));
|
|
49
|
+
const path = __importStar(require("node:path"));
|
|
49
50
|
const config_1 = require("../../config");
|
|
50
51
|
const index_config_1 = require("../index/index-config");
|
|
51
52
|
const log_rotate_1 = require("../utils/log-rotate");
|
|
@@ -70,8 +71,30 @@ class LlmServer {
|
|
|
70
71
|
path: "/v1/models",
|
|
71
72
|
timeout: HEALTH_TIMEOUT_MS,
|
|
72
73
|
}, (res) => {
|
|
73
|
-
res.
|
|
74
|
-
|
|
74
|
+
if (res.statusCode !== 200) {
|
|
75
|
+
res.resume();
|
|
76
|
+
resolve(false);
|
|
77
|
+
return;
|
|
78
|
+
}
|
|
79
|
+
const chunks = [];
|
|
80
|
+
res.on("data", (chunk) => chunks.push(chunk));
|
|
81
|
+
res.on("end", () => {
|
|
82
|
+
var _a, _b;
|
|
83
|
+
try {
|
|
84
|
+
const body = JSON.parse(Buffer.concat(chunks).toString());
|
|
85
|
+
const runningModel = (_b = (_a = body === null || body === void 0 ? void 0 : body.data) === null || _a === void 0 ? void 0 : _a[0]) === null || _b === void 0 ? void 0 : _b.id;
|
|
86
|
+
if (runningModel) {
|
|
87
|
+
const configBasename = path.basename(this.config.model);
|
|
88
|
+
if (runningModel !== configBasename && !configBasename.includes(runningModel) && !runningModel.includes(configBasename)) {
|
|
89
|
+
console.log(`[llm] Model mismatch: running "${runningModel}" but config expects "${configBasename}"`);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
catch (_c) {
|
|
94
|
+
// ignore parse errors — server is still healthy
|
|
95
|
+
}
|
|
96
|
+
resolve(true);
|
|
97
|
+
});
|
|
75
98
|
});
|
|
76
99
|
req.on("error", () => resolve(false));
|
|
77
100
|
req.on("timeout", () => {
|
|
@@ -442,6 +442,13 @@ class VectorDB {
|
|
|
442
442
|
return rows.length > 0;
|
|
443
443
|
});
|
|
444
444
|
}
|
|
445
|
+
countRowsForPath(pathPrefix) {
|
|
446
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
447
|
+
const table = yield this.ensureTable();
|
|
448
|
+
const prefix = pathPrefix.endsWith("/") ? pathPrefix : `${pathPrefix}/`;
|
|
449
|
+
return table.countRows(`path LIKE '${(0, filter_builder_1.escapeSqlString)(prefix)}%'`);
|
|
450
|
+
});
|
|
451
|
+
}
|
|
445
452
|
countDistinctFilesForPath(pathPrefix) {
|
|
446
453
|
return __awaiter(this, void 0, void 0, function* () {
|
|
447
454
|
const table = yield this.ensureTable();
|
|
@@ -60,7 +60,9 @@ function loadRegistry() {
|
|
|
60
60
|
}
|
|
61
61
|
function saveRegistry(entries) {
|
|
62
62
|
fs.mkdirSync(path.dirname(REGISTRY_PATH), { recursive: true });
|
|
63
|
-
|
|
63
|
+
const tmp = REGISTRY_PATH + ".tmp";
|
|
64
|
+
fs.writeFileSync(tmp, `${JSON.stringify(entries, null, 2)}\n`);
|
|
65
|
+
fs.renameSync(tmp, REGISTRY_PATH);
|
|
64
66
|
}
|
|
65
67
|
function registerProject(entry) {
|
|
66
68
|
const entries = loadRegistry();
|
|
@@ -47,6 +47,7 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
47
47
|
});
|
|
48
48
|
};
|
|
49
49
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
50
|
+
exports.isMlxUp = isMlxUp;
|
|
50
51
|
exports.mlxEmbed = mlxEmbed;
|
|
51
52
|
exports.resetMlxCache = resetMlxCache;
|
|
52
53
|
const http = __importStar(require("node:http"));
|
|
@@ -57,6 +58,8 @@ const EMBED_MODE = process.env.GMAX_EMBED_MODE || "auto";
|
|
|
57
58
|
let mlxAvailable = null;
|
|
58
59
|
let lastCheck = 0;
|
|
59
60
|
const CHECK_INTERVAL_MS = 30000;
|
|
61
|
+
let lastMlxWarning = 0;
|
|
62
|
+
const MLX_WARNING_INTERVAL_MS = 60000;
|
|
60
63
|
function postJSON(path, body) {
|
|
61
64
|
return new Promise((resolve) => {
|
|
62
65
|
const payload = JSON.stringify(body);
|
|
@@ -119,8 +122,15 @@ function isMlxUp() {
|
|
|
119
122
|
let result = yield checkHealth();
|
|
120
123
|
// On first check (cold start), retry once after 3s — server may still be loading
|
|
121
124
|
if (!result && mlxAvailable === null) {
|
|
125
|
+
console.log("[mlx] Embed server not ready, retrying in 3s...");
|
|
122
126
|
yield new Promise((r) => setTimeout(r, 3000));
|
|
123
127
|
result = yield checkHealth();
|
|
128
|
+
if (result) {
|
|
129
|
+
console.log("[mlx] Embed server ready");
|
|
130
|
+
}
|
|
131
|
+
else {
|
|
132
|
+
console.warn("[mlx] Embed server not available after retry");
|
|
133
|
+
}
|
|
124
134
|
}
|
|
125
135
|
mlxAvailable = result;
|
|
126
136
|
lastCheck = now;
|
|
@@ -137,9 +147,28 @@ function mlxEmbed(texts) {
|
|
|
137
147
|
return null;
|
|
138
148
|
if (!(yield isMlxUp()))
|
|
139
149
|
return null;
|
|
140
|
-
|
|
150
|
+
let postResult;
|
|
151
|
+
try {
|
|
152
|
+
postResult = yield postJSON("/embed", { texts });
|
|
153
|
+
}
|
|
154
|
+
catch (error) {
|
|
155
|
+
mlxAvailable = false;
|
|
156
|
+
const now = Date.now();
|
|
157
|
+
if (now - lastMlxWarning >= MLX_WARNING_INTERVAL_MS) {
|
|
158
|
+
console.error("[mlx] Embed server failed:", error.message || error);
|
|
159
|
+
lastMlxWarning = now;
|
|
160
|
+
}
|
|
161
|
+
return null;
|
|
162
|
+
}
|
|
163
|
+
const { ok, data } = postResult;
|
|
141
164
|
if (!ok || !(data === null || data === void 0 ? void 0 : data.vectors)) {
|
|
165
|
+
const wasPreviouslyAvailable = mlxAvailable !== false;
|
|
142
166
|
mlxAvailable = false;
|
|
167
|
+
const now = Date.now();
|
|
168
|
+
if (wasPreviouslyAvailable || now - lastMlxWarning >= MLX_WARNING_INTERVAL_MS) {
|
|
169
|
+
console.error("[mlx] Embed server failed: bad response (ok=" + ok + ", hasVectors=" + !!(data === null || data === void 0 ? void 0 : data.vectors) + ")");
|
|
170
|
+
lastMlxWarning = now;
|
|
171
|
+
}
|
|
143
172
|
return null;
|
|
144
173
|
}
|
|
145
174
|
return data.vectors.map((v) => new Float32Array(v));
|
|
@@ -56,6 +56,7 @@ const colbert_math_1 = require("./colbert-math");
|
|
|
56
56
|
const colbert_1 = require("./embeddings/colbert");
|
|
57
57
|
const granite_1 = require("./embeddings/granite");
|
|
58
58
|
const mlx_client_1 = require("./embeddings/mlx-client");
|
|
59
|
+
let mlxFallbackWarned = false;
|
|
59
60
|
const CACHE_DIR = config_1.PATHS.models;
|
|
60
61
|
const LOG_MODELS = process.env.GMAX_DEBUG_MODELS === "1" ||
|
|
61
62
|
process.env.GMAX_DEBUG_MODELS === "true";
|
|
@@ -105,7 +106,7 @@ class WorkerOrchestrator {
|
|
|
105
106
|
}
|
|
106
107
|
computeHybrid(texts, onProgress) {
|
|
107
108
|
return __awaiter(this, void 0, void 0, function* () {
|
|
108
|
-
var _a
|
|
109
|
+
var _a;
|
|
109
110
|
if (!texts.length)
|
|
110
111
|
return [];
|
|
111
112
|
yield this.ensureReady();
|
|
@@ -119,7 +120,12 @@ class WorkerOrchestrator {
|
|
|
119
120
|
onProgress === null || onProgress === void 0 ? void 0 : onProgress();
|
|
120
121
|
const batchTexts = texts.slice(i, i + BATCH_SIZE);
|
|
121
122
|
// Try MLX GPU server first, fall back to ONNX CPU
|
|
122
|
-
const
|
|
123
|
+
const mlxResult = yield (0, mlx_client_1.mlxEmbed)(batchTexts);
|
|
124
|
+
if (!mlxResult && !mlxFallbackWarned) {
|
|
125
|
+
console.warn("[embed] MLX unavailable, falling back to CPU (ONNX)");
|
|
126
|
+
mlxFallbackWarned = true;
|
|
127
|
+
}
|
|
128
|
+
const denseBatch = mlxResult !== null && mlxResult !== void 0 ? mlxResult : (yield this.granite.runBatch(batchTexts));
|
|
123
129
|
const colbertBatch = yield this.colbert.runBatch(batchTexts, denseBatch, this.vectorDimensions);
|
|
124
130
|
results.push(...colbertBatch);
|
|
125
131
|
}
|
package/dist/lib/workers/pool.js
CHANGED
|
@@ -275,20 +275,22 @@ class WorkerPool {
|
|
|
275
275
|
});
|
|
276
276
|
}
|
|
277
277
|
handleTaskTimeout(task, worker) {
|
|
278
|
+
var _a, _b, _c, _d;
|
|
278
279
|
if (this.destroyed || !this.tasks.has(task.id))
|
|
279
280
|
return;
|
|
280
281
|
this.clearTaskTimeout(task);
|
|
282
|
+
const filePath = (_d = (_b = (_a = task.payload) === null || _a === void 0 ? void 0 : _a.path) !== null && _b !== void 0 ? _b : (_c = task.payload) === null || _c === void 0 ? void 0 : _c.absolutePath) !== null && _d !== void 0 ? _d : "unknown";
|
|
281
283
|
if (task.method !== "processFile") {
|
|
282
|
-
console.warn(`[worker-pool] ${task.method} timed out after ${TASK_TIMEOUT_MS}ms; restarting worker.`);
|
|
284
|
+
console.warn(`[worker-pool] ${task.method} timed out after ${TASK_TIMEOUT_MS}ms on ${filePath}; restarting worker.`);
|
|
283
285
|
}
|
|
284
286
|
this.completeTask(task, null);
|
|
285
|
-
task.reject(new Error(`Worker task ${task.method} timed out after ${TASK_TIMEOUT_MS}ms`));
|
|
287
|
+
task.reject(new Error(`Worker task ${task.method} timed out after ${TASK_TIMEOUT_MS}ms on ${filePath}`));
|
|
286
288
|
worker.child.removeAllListeners("message");
|
|
287
289
|
worker.child.removeAllListeners("exit");
|
|
288
290
|
try {
|
|
289
291
|
worker.child.kill("SIGKILL");
|
|
290
292
|
}
|
|
291
|
-
catch (
|
|
293
|
+
catch (_e) { }
|
|
292
294
|
this.workers = this.workers.filter((w) => w !== worker);
|
|
293
295
|
if (!this.destroyed) {
|
|
294
296
|
this.spawnWorker();
|
|
@@ -29,7 +29,12 @@ warnings.filterwarnings("ignore", message=".*PyTorch.*")
|
|
|
29
29
|
warnings.filterwarnings("ignore", message=".*resource_tracker.*")
|
|
30
30
|
logging.getLogger("huggingface_hub").setLevel(logging.ERROR)
|
|
31
31
|
|
|
32
|
-
|
|
32
|
+
logging.basicConfig(
|
|
33
|
+
format="%(asctime)s %(message)s",
|
|
34
|
+
datefmt="%Y-%m-%dT%H:%M:%S",
|
|
35
|
+
level=logging.INFO,
|
|
36
|
+
)
|
|
37
|
+
logger = logging.getLogger("mlx-embed")
|
|
33
38
|
|
|
34
39
|
|
|
35
40
|
import mlx.core as mx
|
|
@@ -89,18 +94,18 @@ def embed_texts(texts: list[str]) -> mx.array:
|
|
|
89
94
|
|
|
90
95
|
def load_model():
|
|
91
96
|
global model, tokenizer
|
|
92
|
-
|
|
97
|
+
logger.info(f"[mlx-embed] Loading {MODEL_ID}...")
|
|
93
98
|
model, _ = load(MODEL_ID)
|
|
94
99
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
|
95
100
|
_ = embed_texts(["warm up"])
|
|
96
|
-
|
|
101
|
+
logger.info("[mlx-embed] Model ready on Metal GPU.")
|
|
97
102
|
|
|
98
103
|
|
|
99
104
|
async def idle_watchdog():
|
|
100
105
|
while True:
|
|
101
106
|
await asyncio.sleep(60)
|
|
102
107
|
if time.time() - last_activity > IDLE_TIMEOUT_S:
|
|
103
|
-
|
|
108
|
+
logger.info("[mlx-embed] Idle timeout, shutting down")
|
|
104
109
|
os._exit(0)
|
|
105
110
|
|
|
106
111
|
|
|
@@ -158,14 +163,14 @@ def main():
|
|
|
158
163
|
|
|
159
164
|
# Bail early if port is already taken
|
|
160
165
|
if is_port_in_use(PORT):
|
|
161
|
-
|
|
166
|
+
logger.info(f"[mlx-embed] Port {PORT} already in use — server is already running.")
|
|
162
167
|
return
|
|
163
168
|
|
|
164
|
-
|
|
169
|
+
logger.info(f"[mlx-embed] Starting on port {PORT}")
|
|
165
170
|
|
|
166
171
|
# Clean shutdown — exit immediately, skip uvicorn's noisy teardown
|
|
167
172
|
def handle_signal(sig, frame):
|
|
168
|
-
|
|
173
|
+
logger.info("[mlx-embed] Stopped.")
|
|
169
174
|
# Kill the resource_tracker child process before exit to prevent
|
|
170
175
|
# its spurious "leaked semaphore" warning (Python 3.13 bug)
|
|
171
176
|
try:
|
package/package.json
CHANGED
|
@@ -68,11 +68,39 @@ function startPythonServer(serverDir, scriptName, logName, processName) {
|
|
|
68
68
|
VIRTUAL_ENV: "",
|
|
69
69
|
CONDA_DEFAULT_ENV: "",
|
|
70
70
|
GMAX_PROCESS_NAME: processName || logName,
|
|
71
|
+
HF_TOKEN_PATH: process.env.HF_TOKEN_PATH || _path.join(require("node:os").homedir(), ".cache", "huggingface", "token"),
|
|
71
72
|
},
|
|
72
73
|
});
|
|
73
74
|
child.unref();
|
|
74
75
|
}
|
|
75
76
|
|
|
77
|
+
// --- Crash counter (Item 14) ---
|
|
78
|
+
const CRASH_FILE = _path.join(require("node:os").homedir(), ".gmax", "mlx-embed-crashes.json");
|
|
79
|
+
const MAX_CRASHES = 3;
|
|
80
|
+
const CRASH_WINDOW_MS = 10 * 60 * 1000; // 10 minutes
|
|
81
|
+
|
|
82
|
+
function readCrashCount() {
|
|
83
|
+
try {
|
|
84
|
+
const data = JSON.parse(fs.readFileSync(CRASH_FILE, "utf-8"));
|
|
85
|
+
if (data.lastCrash && Date.now() - new Date(data.lastCrash).getTime() > CRASH_WINDOW_MS) {
|
|
86
|
+
return { count: 0, lastCrash: null }; // Window expired, reset
|
|
87
|
+
}
|
|
88
|
+
return { count: data.count || 0, lastCrash: data.lastCrash };
|
|
89
|
+
} catch {
|
|
90
|
+
return { count: 0, lastCrash: null };
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
function writeCrashCount(count, lastCrash) {
|
|
95
|
+
try {
|
|
96
|
+
fs.writeFileSync(CRASH_FILE, JSON.stringify({ count, lastCrash }));
|
|
97
|
+
} catch {}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function resetCrashCount() {
|
|
101
|
+
try { fs.unlinkSync(CRASH_FILE); } catch {}
|
|
102
|
+
}
|
|
103
|
+
|
|
76
104
|
function isProjectRegistered() {
|
|
77
105
|
try {
|
|
78
106
|
const projectsPath = _path.join(
|
|
@@ -109,8 +137,30 @@ async function main() {
|
|
|
109
137
|
const serverDir = findMlxServerDir();
|
|
110
138
|
|
|
111
139
|
// Start MLX embed server (port 8100)
|
|
112
|
-
|
|
113
|
-
|
|
140
|
+
const embedRunning = await isServerRunning(8100);
|
|
141
|
+
if (serverDir && !embedRunning) {
|
|
142
|
+
const crashes = readCrashCount();
|
|
143
|
+
if (crashes.count < MAX_CRASHES) {
|
|
144
|
+
startPythonServer(serverDir, "server.py", "mlx-embed-server", "gmax-embed");
|
|
145
|
+
|
|
146
|
+
// Fire-and-forget health verification (Item 13)
|
|
147
|
+
(async () => {
|
|
148
|
+
const maxAttempts = 5;
|
|
149
|
+
const delayMs = 2000;
|
|
150
|
+
for (let i = 0; i < maxAttempts; i++) {
|
|
151
|
+
await new Promise(r => setTimeout(r, delayMs));
|
|
152
|
+
if (await isServerRunning(8100)) {
|
|
153
|
+
resetCrashCount();
|
|
154
|
+
return;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
// Server didn't start after 10s — record crash
|
|
158
|
+
const c = readCrashCount();
|
|
159
|
+
writeCrashCount(c.count + 1, new Date().toISOString());
|
|
160
|
+
})();
|
|
161
|
+
}
|
|
162
|
+
} else if (embedRunning) {
|
|
163
|
+
resetCrashCount();
|
|
114
164
|
}
|
|
115
165
|
|
|
116
166
|
// Start LLM summarizer server (port 8101) — opt-in only
|