grepmax 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/mcp.js +67 -34
- package/dist/lib/search/searcher.js +1 -2
- package/dist/lib/store/vector-db.js +4 -1
- package/dist/lib/workers/orchestrator.js +18 -1
- package/dist/lib/workers/summarize/llm-client.js +165 -0
- package/mlx-embed-server/pyproject.toml +3 -1
- package/mlx-embed-server/summarizer.py +169 -0
- package/mlx-embed-server/uv.lock +2 -0
- package/package.json +1 -1
- package/plugins/grepmax/.claude-plugin/plugin.json +1 -1
- package/plugins/grepmax/hooks/start.js +15 -11
- package/plugins/grepmax/skills/gmax/SKILL.md +28 -42
package/dist/commands/mcp.js
CHANGED
|
@@ -81,7 +81,7 @@ const TOOLS = [
|
|
|
81
81
|
},
|
|
82
82
|
limit: {
|
|
83
83
|
type: "number",
|
|
84
|
-
description: "Max results to return (default
|
|
84
|
+
description: "Max results to return (default 3, max 50)",
|
|
85
85
|
},
|
|
86
86
|
root: {
|
|
87
87
|
type: "string",
|
|
@@ -91,6 +91,10 @@ const TOOLS = [
|
|
|
91
91
|
type: "string",
|
|
92
92
|
description: "Restrict search to files under this path prefix (e.g. 'src/auth/'). Relative to the search root.",
|
|
93
93
|
},
|
|
94
|
+
detail: {
|
|
95
|
+
type: "string",
|
|
96
|
+
description: "Output detail: 'pointer' (default, metadata only — symbol, location, role, calls) or 'code' (include 4-line code snippets)",
|
|
97
|
+
},
|
|
94
98
|
min_score: {
|
|
95
99
|
type: "number",
|
|
96
100
|
description: "Minimum relevance score (0-1). Results below this threshold are filtered out. Default: 0 (no filtering)",
|
|
@@ -115,7 +119,11 @@ const TOOLS = [
|
|
|
115
119
|
},
|
|
116
120
|
limit: {
|
|
117
121
|
type: "number",
|
|
118
|
-
description: "Max results to return (default
|
|
122
|
+
description: "Max results to return (default 3, max 50)",
|
|
123
|
+
},
|
|
124
|
+
detail: {
|
|
125
|
+
type: "string",
|
|
126
|
+
description: "Output detail: 'pointer' (default) or 'code' (include snippets)",
|
|
119
127
|
},
|
|
120
128
|
min_score: {
|
|
121
129
|
type: "number",
|
|
@@ -335,21 +343,21 @@ exports.mcp = new commander_1.Command("mcp")
|
|
|
335
343
|
const query = String(args.query || "");
|
|
336
344
|
if (!query)
|
|
337
345
|
return err("Missing required parameter: query");
|
|
338
|
-
const limit = Math.min(Math.max(Number(args.limit) ||
|
|
346
|
+
const limit = Math.min(Math.max(Number(args.limit) || 3, 1), 50);
|
|
339
347
|
yield ensureIndexReady();
|
|
340
348
|
try {
|
|
341
349
|
const searcher = getSearcher();
|
|
342
|
-
// Determine path prefix for
|
|
350
|
+
// Determine path prefix and display root for relative paths
|
|
343
351
|
let pathPrefix;
|
|
352
|
+
let displayRoot = projectRoot;
|
|
344
353
|
if (!searchAll) {
|
|
345
|
-
// Resolve search root — default to project root
|
|
346
354
|
const searchRoot = typeof args.root === "string"
|
|
347
355
|
? path.resolve(args.root)
|
|
348
356
|
: path.resolve(projectRoot);
|
|
357
|
+
displayRoot = searchRoot;
|
|
349
358
|
pathPrefix = searchRoot.endsWith("/")
|
|
350
359
|
? searchRoot
|
|
351
360
|
: `${searchRoot}/`;
|
|
352
|
-
// If a sub-path is specified, append it
|
|
353
361
|
if (typeof args.path === "string") {
|
|
354
362
|
pathPrefix = path.join(searchRoot, args.path);
|
|
355
363
|
if (!pathPrefix.endsWith("/"))
|
|
@@ -362,47 +370,72 @@ exports.mcp = new commander_1.Command("mcp")
|
|
|
362
370
|
}
|
|
363
371
|
const minScore = typeof args.min_score === "number" ? args.min_score : 0;
|
|
364
372
|
const maxPerFile = typeof args.max_per_file === "number" ? args.max_per_file : 0;
|
|
365
|
-
const
|
|
366
|
-
let
|
|
373
|
+
const detail = typeof args.detail === "string" ? args.detail : "pointer";
|
|
374
|
+
let results = result.data.map((r) => {
|
|
367
375
|
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m;
|
|
368
|
-
const
|
|
369
|
-
const
|
|
370
|
-
?
|
|
371
|
-
:
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
const
|
|
376
|
-
const
|
|
377
|
-
const
|
|
378
|
-
const
|
|
379
|
-
|
|
380
|
-
|
|
376
|
+
const absPath = (_c = (_a = r.path) !== null && _a !== void 0 ? _a : (_b = r.metadata) === null || _b === void 0 ? void 0 : _b.path) !== null && _c !== void 0 ? _c : "";
|
|
377
|
+
const relPath = absPath.startsWith(displayRoot)
|
|
378
|
+
? absPath.slice(displayRoot.length + 1)
|
|
379
|
+
: absPath;
|
|
380
|
+
const startLine = (_f = (_d = r.startLine) !== null && _d !== void 0 ? _d : (_e = r.generated_metadata) === null || _e === void 0 ? void 0 : _e.start_line) !== null && _f !== void 0 ? _f : 0;
|
|
381
|
+
const endLine = (_j = (_g = r.endLine) !== null && _g !== void 0 ? _g : (_h = r.generated_metadata) === null || _h === void 0 ? void 0 : _h.end_line) !== null && _j !== void 0 ? _j : 0;
|
|
382
|
+
const defs = toStringArray((_k = r.definedSymbols) !== null && _k !== void 0 ? _k : r.defined_symbols);
|
|
383
|
+
const refs = toStringArray((_l = r.referenced_symbols) !== null && _l !== void 0 ? _l : r.referencedSymbols);
|
|
384
|
+
const symbol = defs[0] || "(anonymous)";
|
|
385
|
+
const role = ((_m = r.role) !== null && _m !== void 0 ? _m : "IMPL").slice(0, 4).toUpperCase();
|
|
386
|
+
const exported = r.is_exported ? "exported " : "";
|
|
387
|
+
const complexity = typeof r.complexity === "number" && r.complexity > 0
|
|
388
|
+
? ` C:${Math.round(r.complexity)}`
|
|
389
|
+
: "";
|
|
390
|
+
const parentStr = r.parent_symbol
|
|
391
|
+
? `parent:${r.parent_symbol} `
|
|
392
|
+
: "";
|
|
393
|
+
const callsStr = refs.length > 0
|
|
394
|
+
? `calls:${refs.slice(0, 8).join(",")}`
|
|
395
|
+
: "";
|
|
396
|
+
const line1 = `${symbol} [${exported}${role}${complexity}] ${relPath}:${startLine + 1}-${endLine + 1}`;
|
|
397
|
+
const summaryStr = r.summary ? ` ${r.summary}` : "";
|
|
398
|
+
const line2 = parentStr || callsStr
|
|
399
|
+
? ` ${parentStr}${callsStr}`
|
|
400
|
+
: "";
|
|
401
|
+
let snippet = "";
|
|
402
|
+
if (detail === "code") {
|
|
403
|
+
const raw = typeof r.content === "string"
|
|
404
|
+
? r.content
|
|
405
|
+
: typeof r.text === "string"
|
|
406
|
+
? r.text
|
|
407
|
+
: "";
|
|
408
|
+
const lines = raw.split("\n").slice(0, 4);
|
|
409
|
+
snippet =
|
|
410
|
+
"\n" +
|
|
411
|
+
lines
|
|
412
|
+
.map((l, i) => `${startLine + i + 1}│${l}`)
|
|
413
|
+
.join("\n");
|
|
414
|
+
}
|
|
415
|
+
const text = line1 +
|
|
416
|
+
(summaryStr ? `\n${summaryStr}` : "") +
|
|
417
|
+
(line2 ? `\n${line2}` : "") +
|
|
418
|
+
snippet;
|
|
381
419
|
return {
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
score: typeof r.score === "number" ? +r.score.toFixed(3) : 0,
|
|
386
|
-
role: (_k = r.role) !== null && _k !== void 0 ? _k : "IMPLEMENTATION",
|
|
387
|
-
confidence: (_l = r.confidence) !== null && _l !== void 0 ? _l : "Unknown",
|
|
388
|
-
definedSymbols: toStringArray((_m = r.definedSymbols) !== null && _m !== void 0 ? _m : r.defined_symbols).slice(0, 5),
|
|
389
|
-
snippet,
|
|
420
|
+
absPath,
|
|
421
|
+
text,
|
|
422
|
+
score: typeof r.score === "number" ? r.score : 0,
|
|
390
423
|
};
|
|
391
424
|
});
|
|
392
425
|
if (minScore > 0) {
|
|
393
|
-
|
|
426
|
+
results = results.filter((r) => r.score >= minScore);
|
|
394
427
|
}
|
|
395
428
|
if (maxPerFile > 0) {
|
|
396
429
|
const counts = new Map();
|
|
397
|
-
|
|
398
|
-
const count = counts.get(r.
|
|
430
|
+
results = results.filter((r) => {
|
|
431
|
+
const count = counts.get(r.absPath) || 0;
|
|
399
432
|
if (count >= maxPerFile)
|
|
400
433
|
return false;
|
|
401
|
-
counts.set(r.
|
|
434
|
+
counts.set(r.absPath, count + 1);
|
|
402
435
|
return true;
|
|
403
436
|
});
|
|
404
437
|
}
|
|
405
|
-
return ok(
|
|
438
|
+
return ok(results.map((r) => r.text).join("\n\n"));
|
|
406
439
|
}
|
|
407
440
|
catch (e) {
|
|
408
441
|
const msg = e instanceof Error ? e.message : String(e);
|
|
@@ -127,8 +127,7 @@ class Searcher {
|
|
|
127
127
|
referenced_symbols: referencedSymbols,
|
|
128
128
|
imports,
|
|
129
129
|
exports,
|
|
130
|
-
|
|
131
|
-
// context: record.context_prev ? [record.context_prev] : [],
|
|
130
|
+
summary: record.summary,
|
|
132
131
|
};
|
|
133
132
|
}
|
|
134
133
|
applyStructureBoost(record, score, intent) {
|
|
@@ -97,6 +97,7 @@ class VectorDB {
|
|
|
97
97
|
role: "",
|
|
98
98
|
parent_symbol: "",
|
|
99
99
|
file_skeleton: "",
|
|
100
|
+
summary: "",
|
|
100
101
|
};
|
|
101
102
|
}
|
|
102
103
|
validateSchema(table) {
|
|
@@ -138,6 +139,7 @@ class VectorDB {
|
|
|
138
139
|
new apache_arrow_1.Field("role", new apache_arrow_1.Utf8(), true),
|
|
139
140
|
new apache_arrow_1.Field("parent_symbol", new apache_arrow_1.Utf8(), true),
|
|
140
141
|
new apache_arrow_1.Field("file_skeleton", new apache_arrow_1.Utf8(), true),
|
|
142
|
+
new apache_arrow_1.Field("summary", new apache_arrow_1.Utf8(), true),
|
|
141
143
|
]);
|
|
142
144
|
}
|
|
143
145
|
ensureTable() {
|
|
@@ -201,7 +203,7 @@ class VectorDB {
|
|
|
201
203
|
return [];
|
|
202
204
|
};
|
|
203
205
|
const rows = records.map((rec) => {
|
|
204
|
-
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o;
|
|
206
|
+
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o, _p;
|
|
205
207
|
const vec = (() => {
|
|
206
208
|
const arr = toNumberArray(rec.vector);
|
|
207
209
|
if (arr.length < this.vectorDim) {
|
|
@@ -241,6 +243,7 @@ class VectorDB {
|
|
|
241
243
|
role: (_l = rec.role) !== null && _l !== void 0 ? _l : "",
|
|
242
244
|
parent_symbol: (_m = rec.parent_symbol) !== null && _m !== void 0 ? _m : "",
|
|
243
245
|
file_skeleton: (_o = rec.file_skeleton) !== null && _o !== void 0 ? _o : "",
|
|
246
|
+
summary: (_p = rec.summary) !== null && _p !== void 0 ? _p : null,
|
|
244
247
|
};
|
|
245
248
|
});
|
|
246
249
|
try {
|
|
@@ -49,6 +49,7 @@ const transformers_1 = require("@huggingface/transformers");
|
|
|
49
49
|
const ort = __importStar(require("onnxruntime-node"));
|
|
50
50
|
const uuid_1 = require("uuid");
|
|
51
51
|
const config_1 = require("../../config");
|
|
52
|
+
const llm_client_1 = require("./summarize/llm-client");
|
|
52
53
|
const chunker_1 = require("../index/chunker");
|
|
53
54
|
const skeleton_1 = require("../skeleton");
|
|
54
55
|
const file_utils_1 = require("../utils/file-utils");
|
|
@@ -213,7 +214,23 @@ class WorkerOrchestrator {
|
|
|
213
214
|
if (!chunks.length)
|
|
214
215
|
return { vectors: [], hash, mtimeMs, size };
|
|
215
216
|
const preparedChunks = this.toPreparedChunks(input.path, hash, chunks, skeletonResult.success ? skeletonResult.skeleton : undefined);
|
|
216
|
-
|
|
217
|
+
// Run embedding and summarization in parallel
|
|
218
|
+
const lang = path.extname(input.path).replace(/^\./, "") || "unknown";
|
|
219
|
+
const [hybrids, summaries] = yield Promise.all([
|
|
220
|
+
this.computeHybrid(preparedChunks.map((chunk) => chunk.content), onProgress),
|
|
221
|
+
(0, llm_client_1.summarizeChunks)(preparedChunks.map((c) => ({
|
|
222
|
+
code: c.content,
|
|
223
|
+
language: lang,
|
|
224
|
+
file: c.path,
|
|
225
|
+
}))),
|
|
226
|
+
]);
|
|
227
|
+
// Attach summaries if available
|
|
228
|
+
if (summaries) {
|
|
229
|
+
for (let i = 0; i < preparedChunks.length; i++) {
|
|
230
|
+
if (summaries[i])
|
|
231
|
+
preparedChunks[i].summary = summaries[i];
|
|
232
|
+
}
|
|
233
|
+
}
|
|
217
234
|
const vectors = preparedChunks.map((chunk, idx) => {
|
|
218
235
|
var _a;
|
|
219
236
|
const hybrid = (_a = hybrids[idx]) !== null && _a !== void 0 ? _a : {
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* LLM summarizer HTTP client.
|
|
4
|
+
* Talks to the MLX summarizer server to generate code summaries.
|
|
5
|
+
* Returns null if server isn't running — caller skips summaries gracefully.
|
|
6
|
+
*/
|
|
7
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
8
|
+
if (k2 === undefined) k2 = k;
|
|
9
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
10
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
11
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
12
|
+
}
|
|
13
|
+
Object.defineProperty(o, k2, desc);
|
|
14
|
+
}) : (function(o, m, k, k2) {
|
|
15
|
+
if (k2 === undefined) k2 = k;
|
|
16
|
+
o[k2] = m[k];
|
|
17
|
+
}));
|
|
18
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
19
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
20
|
+
}) : function(o, v) {
|
|
21
|
+
o["default"] = v;
|
|
22
|
+
});
|
|
23
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
24
|
+
var ownKeys = function(o) {
|
|
25
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
26
|
+
var ar = [];
|
|
27
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
28
|
+
return ar;
|
|
29
|
+
};
|
|
30
|
+
return ownKeys(o);
|
|
31
|
+
};
|
|
32
|
+
return function (mod) {
|
|
33
|
+
if (mod && mod.__esModule) return mod;
|
|
34
|
+
var result = {};
|
|
35
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
36
|
+
__setModuleDefault(result, mod);
|
|
37
|
+
return result;
|
|
38
|
+
};
|
|
39
|
+
})();
|
|
40
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
41
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
42
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
43
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
44
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
45
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
46
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
47
|
+
});
|
|
48
|
+
};
|
|
49
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
50
|
+
exports.summarizeChunks = summarizeChunks;
|
|
51
|
+
exports.resetSummarizerCache = resetSummarizerCache;
|
|
52
|
+
const http = __importStar(require("node:http"));
|
|
53
|
+
const SUMMARY_PORT = parseInt(process.env.GMAX_SUMMARY_PORT || "8101", 10);
|
|
54
|
+
const SUMMARY_HOST = "127.0.0.1";
|
|
55
|
+
const SUMMARY_TIMEOUT_MS = 120000; // 2 min — batches of chunks take time
|
|
56
|
+
let summarizerAvailable = null;
|
|
57
|
+
let lastCheck = 0;
|
|
58
|
+
const CHECK_INTERVAL_MS = 5000; // short cache — retry quickly if server just started
|
|
59
|
+
function postJSON(path, body) {
|
|
60
|
+
return new Promise((resolve) => {
|
|
61
|
+
const payload = JSON.stringify(body);
|
|
62
|
+
const req = http.request({
|
|
63
|
+
hostname: SUMMARY_HOST,
|
|
64
|
+
port: SUMMARY_PORT,
|
|
65
|
+
path,
|
|
66
|
+
method: "POST",
|
|
67
|
+
headers: {
|
|
68
|
+
"Content-Type": "application/json",
|
|
69
|
+
"Content-Length": Buffer.byteLength(payload),
|
|
70
|
+
},
|
|
71
|
+
timeout: SUMMARY_TIMEOUT_MS,
|
|
72
|
+
}, (res) => {
|
|
73
|
+
const chunks = [];
|
|
74
|
+
res.on("data", (chunk) => chunks.push(chunk));
|
|
75
|
+
res.on("end", () => {
|
|
76
|
+
try {
|
|
77
|
+
const data = JSON.parse(Buffer.concat(chunks).toString("utf-8"));
|
|
78
|
+
resolve({ ok: res.statusCode === 200, data });
|
|
79
|
+
}
|
|
80
|
+
catch (_a) {
|
|
81
|
+
resolve({ ok: false });
|
|
82
|
+
}
|
|
83
|
+
});
|
|
84
|
+
});
|
|
85
|
+
req.on("error", () => resolve({ ok: false }));
|
|
86
|
+
req.on("timeout", () => {
|
|
87
|
+
req.destroy();
|
|
88
|
+
resolve({ ok: false });
|
|
89
|
+
});
|
|
90
|
+
req.write(payload);
|
|
91
|
+
req.end();
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
function isSummarizerUp() {
|
|
95
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
96
|
+
const now = Date.now();
|
|
97
|
+
if (summarizerAvailable !== null && now - lastCheck < CHECK_INTERVAL_MS) {
|
|
98
|
+
return summarizerAvailable;
|
|
99
|
+
}
|
|
100
|
+
const result = yield new Promise((resolve) => {
|
|
101
|
+
const req = http.get({
|
|
102
|
+
hostname: SUMMARY_HOST,
|
|
103
|
+
port: SUMMARY_PORT,
|
|
104
|
+
path: "/health",
|
|
105
|
+
timeout: 5000,
|
|
106
|
+
}, (res) => {
|
|
107
|
+
res.resume();
|
|
108
|
+
resolve(res.statusCode === 200);
|
|
109
|
+
});
|
|
110
|
+
req.on("error", () => resolve(false));
|
|
111
|
+
req.on("timeout", () => {
|
|
112
|
+
req.destroy();
|
|
113
|
+
resolve(false);
|
|
114
|
+
});
|
|
115
|
+
});
|
|
116
|
+
summarizerAvailable = result;
|
|
117
|
+
lastCheck = now;
|
|
118
|
+
return result;
|
|
119
|
+
});
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Generate summaries for code chunks via the local LLM server.
|
|
123
|
+
* Sends one chunk at a time. Skips health check — just tries the request.
|
|
124
|
+
* If the server is busy, the TCP connection queues until it's ready.
|
|
125
|
+
* Returns string[] on success, null if server unavailable.
|
|
126
|
+
*/
|
|
127
|
+
function summarizeChunks(chunks) {
|
|
128
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
129
|
+
var _a;
|
|
130
|
+
if (chunks.length === 0)
|
|
131
|
+
return [];
|
|
132
|
+
// Quick check only if we've never connected
|
|
133
|
+
if (summarizerAvailable === null) {
|
|
134
|
+
summarizerAvailable = yield isSummarizerUp();
|
|
135
|
+
if (!summarizerAvailable)
|
|
136
|
+
return null;
|
|
137
|
+
}
|
|
138
|
+
if (summarizerAvailable === false) {
|
|
139
|
+
// Recheck periodically
|
|
140
|
+
const now = Date.now();
|
|
141
|
+
if (now - lastCheck < CHECK_INTERVAL_MS)
|
|
142
|
+
return null;
|
|
143
|
+
summarizerAvailable = yield isSummarizerUp();
|
|
144
|
+
if (!summarizerAvailable)
|
|
145
|
+
return null;
|
|
146
|
+
}
|
|
147
|
+
const summaries = [];
|
|
148
|
+
for (const chunk of chunks) {
|
|
149
|
+
const { ok, data } = yield postJSON("/summarize", {
|
|
150
|
+
chunks: [chunk],
|
|
151
|
+
});
|
|
152
|
+
if (!ok || !((_a = data === null || data === void 0 ? void 0 : data.summaries) === null || _a === void 0 ? void 0 : _a[0])) {
|
|
153
|
+
summaries.push("");
|
|
154
|
+
}
|
|
155
|
+
else {
|
|
156
|
+
summaries.push(data.summaries[0]);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
return summaries;
|
|
160
|
+
});
|
|
161
|
+
}
|
|
162
|
+
function resetSummarizerCache() {
|
|
163
|
+
summarizerAvailable = null;
|
|
164
|
+
lastCheck = 0;
|
|
165
|
+
}
|
|
@@ -1,13 +1,15 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "mlx-embed-server"
|
|
3
3
|
version = "0.1.0"
|
|
4
|
-
description = "MLX-accelerated embedding server for grepmax"
|
|
4
|
+
description = "MLX-accelerated embedding and summarization server for grepmax"
|
|
5
5
|
requires-python = ">=3.13"
|
|
6
6
|
dependencies = [
|
|
7
7
|
"fastapi>=0.115.0",
|
|
8
8
|
"uvicorn>=0.34.0",
|
|
9
9
|
"mlx-embeddings @ git+https://github.com/Blaizzy/mlx-embeddings.git",
|
|
10
|
+
"mlx-lm>=0.22.0",
|
|
10
11
|
]
|
|
11
12
|
|
|
12
13
|
[project.scripts]
|
|
13
14
|
mlx-embed-server = "server:main"
|
|
15
|
+
mlx-summarizer = "summarizer:main"
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
"""MLX-accelerated code summarizer for grepmax.
|
|
2
|
+
|
|
3
|
+
Runs Qwen3-Coder-30B-A3B on Apple Silicon GPU to generate one-line
|
|
4
|
+
summaries of code chunks during indexing. Summaries are stored in
|
|
5
|
+
LanceDB and returned in search results.
|
|
6
|
+
|
|
7
|
+
IMPORTANT: All MLX operations must run on a single thread. FastAPI async
|
|
8
|
+
endpoints run on the event loop thread, avoiding Metal thread-safety crashes.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import asyncio
|
|
12
|
+
import logging
|
|
13
|
+
import os
|
|
14
|
+
import signal
|
|
15
|
+
import socket
|
|
16
|
+
import time
|
|
17
|
+
import warnings
|
|
18
|
+
from contextlib import asynccontextmanager
|
|
19
|
+
|
|
20
|
+
os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "1"
|
|
21
|
+
os.environ["HF_HUB_DISABLE_IMPLICIT_TOKEN"] = "1"
|
|
22
|
+
os.environ["HF_HUB_VERBOSITY"] = "error"
|
|
23
|
+
os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"
|
|
24
|
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
25
|
+
warnings.filterwarnings("ignore", message=".*PyTorch.*")
|
|
26
|
+
warnings.filterwarnings("ignore", message=".*resource_tracker.*")
|
|
27
|
+
logging.getLogger("huggingface_hub").setLevel(logging.ERROR)
|
|
28
|
+
|
|
29
|
+
import mlx.core as mx
|
|
30
|
+
import uvicorn
|
|
31
|
+
from fastapi import FastAPI
|
|
32
|
+
from mlx_lm import load, generate
|
|
33
|
+
from pydantic import BaseModel
|
|
34
|
+
|
|
35
|
+
MODEL_ID = os.environ.get(
|
|
36
|
+
"MLX_SUMMARY_MODEL",
|
|
37
|
+
"lmstudio-community/Qwen3-Coder-30B-A3B-Instruct-MLX-4bit",
|
|
38
|
+
)
|
|
39
|
+
PORT = int(os.environ.get("MLX_SUMMARY_PORT", "8101"))
|
|
40
|
+
IDLE_TIMEOUT_S = int(os.environ.get("MLX_SUMMARY_IDLE_TIMEOUT", "1800")) # 30 min
|
|
41
|
+
MAX_TOKENS = 100 # summaries should be one line
|
|
42
|
+
|
|
43
|
+
model = None
|
|
44
|
+
tokenizer = None
|
|
45
|
+
last_activity = time.time()
|
|
46
|
+
|
|
47
|
+
_mlx_lock = asyncio.Lock()
|
|
48
|
+
|
|
49
|
+
SYSTEM_PROMPT = """You are a code summarizer. Given a code chunk, produce exactly one line describing what it does.
|
|
50
|
+
Be specific about business logic, services, and side effects. Do not describe syntax.
|
|
51
|
+
Do not use phrases like "This function" or "This code". Start with a verb."""
|
|
52
|
+
|
|
53
|
+
def build_prompt(code: str, language: str, file: str) -> str:
|
|
54
|
+
return f"Language: {language}\nFile: {file}\n\n```\n{code}\n```"
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def is_port_in_use(port: int) -> bool:
|
|
58
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
59
|
+
return s.connect_ex(("127.0.0.1", port)) == 0
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def summarize_chunk(code: str, language: str, file: str) -> str:
|
|
63
|
+
"""Generate a one-line summary for a code chunk."""
|
|
64
|
+
messages = [
|
|
65
|
+
{"role": "system", "content": SYSTEM_PROMPT},
|
|
66
|
+
{"role": "user", "content": build_prompt(code, language, file)},
|
|
67
|
+
]
|
|
68
|
+
prompt = tokenizer.apply_chat_template(
|
|
69
|
+
messages, tokenize=False, add_generation_prompt=True
|
|
70
|
+
)
|
|
71
|
+
response = generate(
|
|
72
|
+
model,
|
|
73
|
+
tokenizer,
|
|
74
|
+
prompt=prompt,
|
|
75
|
+
max_tokens=MAX_TOKENS,
|
|
76
|
+
verbose=False,
|
|
77
|
+
)
|
|
78
|
+
# Take first line only, strip whitespace
|
|
79
|
+
summary = response.strip().split("\n")[0].strip()
|
|
80
|
+
# Remove common prefixes the model might add
|
|
81
|
+
for prefix in ["Summary: ", "summary: ", "- "]:
|
|
82
|
+
if summary.startswith(prefix):
|
|
83
|
+
summary = summary[len(prefix):]
|
|
84
|
+
return summary
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def load_model():
|
|
88
|
+
global model, tokenizer
|
|
89
|
+
print(f"[summarizer] Loading {MODEL_ID}...")
|
|
90
|
+
model, tokenizer = load(MODEL_ID)
|
|
91
|
+
# Warm up
|
|
92
|
+
_ = summarize_chunk("function hello() { return 'world'; }", "javascript", "test.js")
|
|
93
|
+
print("[summarizer] Model ready on Metal GPU.")
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
@asynccontextmanager
|
|
97
|
+
async def lifespan(app: FastAPI):
|
|
98
|
+
load_model()
|
|
99
|
+
yield
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
app = FastAPI(lifespan=lifespan)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class ChunkInput(BaseModel):
|
|
106
|
+
code: str
|
|
107
|
+
language: str = "unknown"
|
|
108
|
+
file: str = ""
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class SummarizeRequest(BaseModel):
|
|
112
|
+
chunks: list[ChunkInput]
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class SummarizeResponse(BaseModel):
|
|
116
|
+
summaries: list[str]
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
@app.post("/summarize")
|
|
120
|
+
async def summarize(request: SummarizeRequest) -> SummarizeResponse:
|
|
121
|
+
global last_activity
|
|
122
|
+
last_activity = time.time()
|
|
123
|
+
|
|
124
|
+
summaries = []
|
|
125
|
+
async with _mlx_lock:
|
|
126
|
+
for chunk in request.chunks:
|
|
127
|
+
try:
|
|
128
|
+
summary = summarize_chunk(chunk.code, chunk.language, chunk.file)
|
|
129
|
+
summaries.append(summary)
|
|
130
|
+
except Exception as e:
|
|
131
|
+
summaries.append(f"(summary failed: {e})")
|
|
132
|
+
|
|
133
|
+
return SummarizeResponse(summaries=summaries)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
@app.get("/health")
|
|
137
|
+
async def health():
|
|
138
|
+
# Health check must NOT acquire _mlx_lock — it must respond instantly
|
|
139
|
+
# even when a summarization is in progress
|
|
140
|
+
global last_activity
|
|
141
|
+
last_activity = time.time()
|
|
142
|
+
return {"status": "ok", "model": MODEL_ID}
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def main():
|
|
146
|
+
if is_port_in_use(PORT):
|
|
147
|
+
print(f"[summarizer] Port {PORT} already in use — server is already running.")
|
|
148
|
+
return
|
|
149
|
+
|
|
150
|
+
print(f"[summarizer] Starting on port {PORT}")
|
|
151
|
+
|
|
152
|
+
def handle_signal(sig, frame):
|
|
153
|
+
print("[summarizer] Stopped.")
|
|
154
|
+
try:
|
|
155
|
+
from multiprocessing.resource_tracker import _resource_tracker
|
|
156
|
+
if _resource_tracker._pid is not None:
|
|
157
|
+
os.kill(_resource_tracker._pid, signal.SIGKILL)
|
|
158
|
+
except Exception:
|
|
159
|
+
pass
|
|
160
|
+
os._exit(0)
|
|
161
|
+
|
|
162
|
+
signal.signal(signal.SIGINT, handle_signal)
|
|
163
|
+
signal.signal(signal.SIGTERM, handle_signal)
|
|
164
|
+
|
|
165
|
+
uvicorn.run(app, host="127.0.0.1", port=PORT, log_level="warning")
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
if __name__ == "__main__":
|
|
169
|
+
main()
|
package/mlx-embed-server/uv.lock
CHANGED
|
@@ -610,6 +610,7 @@ source = { virtual = "." }
|
|
|
610
610
|
dependencies = [
|
|
611
611
|
{ name = "fastapi" },
|
|
612
612
|
{ name = "mlx-embeddings" },
|
|
613
|
+
{ name = "mlx-lm" },
|
|
613
614
|
{ name = "uvicorn" },
|
|
614
615
|
]
|
|
615
616
|
|
|
@@ -617,6 +618,7 @@ dependencies = [
|
|
|
617
618
|
requires-dist = [
|
|
618
619
|
{ name = "fastapi", specifier = ">=0.115.0" },
|
|
619
620
|
{ name = "mlx-embeddings", git = "https://github.com/Blaizzy/mlx-embeddings.git" },
|
|
621
|
+
{ name = "mlx-lm", specifier = ">=0.22.0" },
|
|
620
622
|
{ name = "uvicorn", specifier = ">=0.34.0" },
|
|
621
623
|
]
|
|
622
624
|
|
package/package.json
CHANGED
|
@@ -3,10 +3,10 @@ const _path = require("node:path");
|
|
|
3
3
|
const http = require("node:http");
|
|
4
4
|
const { spawn } = require("node:child_process");
|
|
5
5
|
|
|
6
|
-
function
|
|
6
|
+
function isServerRunning(port) {
|
|
7
7
|
return new Promise((resolve) => {
|
|
8
8
|
const req = http.get(
|
|
9
|
-
{ hostname: "127.0.0.1", port
|
|
9
|
+
{ hostname: "127.0.0.1", port, path: "/health", timeout: 1000 },
|
|
10
10
|
(res) => {
|
|
11
11
|
res.resume();
|
|
12
12
|
resolve(res.statusCode === 200);
|
|
@@ -20,17 +20,17 @@ function isMlxRunning() {
|
|
|
20
20
|
});
|
|
21
21
|
}
|
|
22
22
|
|
|
23
|
-
function
|
|
23
|
+
function startPythonServer(scriptName, logName) {
|
|
24
24
|
const pluginRoot = __dirname.replace(/\/hooks$/, "");
|
|
25
25
|
const gmaxRoot = _path.resolve(pluginRoot, "../..");
|
|
26
26
|
const serverDir = _path.join(gmaxRoot, "mlx-embed-server");
|
|
27
27
|
|
|
28
|
-
if (!fs.existsSync(_path.join(serverDir,
|
|
28
|
+
if (!fs.existsSync(_path.join(serverDir, scriptName))) return;
|
|
29
29
|
|
|
30
|
-
const logPath =
|
|
30
|
+
const logPath = `/tmp/${logName}.log`;
|
|
31
31
|
const out = fs.openSync(logPath, "a");
|
|
32
32
|
|
|
33
|
-
const child = spawn("uv", ["run", "python",
|
|
33
|
+
const child = spawn("uv", ["run", "python", scriptName], {
|
|
34
34
|
cwd: serverDir,
|
|
35
35
|
detached: true,
|
|
36
36
|
stdio: ["ignore", out, out],
|
|
@@ -40,17 +40,21 @@ function startMlxServer() {
|
|
|
40
40
|
}
|
|
41
41
|
|
|
42
42
|
async function main() {
|
|
43
|
-
// Start MLX embed server if not running (set GMAX_EMBED_MODE=cpu to skip)
|
|
44
43
|
const embedMode =
|
|
45
44
|
process.env.GMAX_EMBED_MODE || process.env.OSGREP_EMBED_MODE || "auto";
|
|
45
|
+
|
|
46
46
|
if (embedMode !== "cpu") {
|
|
47
|
-
|
|
48
|
-
if (!
|
|
49
|
-
|
|
47
|
+
// Start MLX embed server (port 8100)
|
|
48
|
+
if (!(await isServerRunning(8100))) {
|
|
49
|
+
startPythonServer("server.py", "mlx-embed-server");
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Start LLM summarizer server (port 8101)
|
|
53
|
+
if (!(await isServerRunning(8101))) {
|
|
54
|
+
startPythonServer("summarizer.py", "mlx-summarizer");
|
|
50
55
|
}
|
|
51
56
|
}
|
|
52
57
|
|
|
53
|
-
// MCP server handles indexing and search directly — no daemon needed
|
|
54
58
|
const response = {
|
|
55
59
|
hookSpecificOutput: {
|
|
56
60
|
hookEventName: "SessionStart",
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: gmax
|
|
3
3
|
description: Semantic code search. Use alongside grep - grep for exact strings, gmax for concepts.
|
|
4
|
-
allowed-tools: "mcp__grepmax__semantic_search, mcp__grepmax__code_skeleton, mcp__grepmax__trace_calls, mcp__grepmax__list_symbols, mcp__grepmax__index_status, Bash(gmax:*), Read"
|
|
4
|
+
allowed-tools: "mcp__grepmax__semantic_search, mcp__grepmax__search_all, mcp__grepmax__code_skeleton, mcp__grepmax__trace_calls, mcp__grepmax__list_symbols, mcp__grepmax__index_status, Bash(gmax:*), Read"
|
|
5
5
|
---
|
|
6
6
|
|
|
7
7
|
## What gmax does
|
|
@@ -11,24 +11,35 @@ Finds code by meaning. When you'd ask a colleague "where do we handle auth?", us
|
|
|
11
11
|
- grep/ripgrep: exact string match, fast
|
|
12
12
|
- gmax: concept match, finds code you couldn't grep for
|
|
13
13
|
|
|
14
|
-
## MCP tools
|
|
15
|
-
|
|
16
|
-
Use these structured tools when available — they return typed JSON and don't need output parsing.
|
|
14
|
+
## MCP tools
|
|
17
15
|
|
|
18
16
|
### semantic_search
|
|
19
|
-
Search code by meaning. Returns
|
|
17
|
+
Search code by meaning. Returns **pointers** by default — symbol, file:line, role, calls. No code snippets unless requested.
|
|
20
18
|
- `query` (required): Natural language. Be specific — more words = better results.
|
|
21
|
-
- `limit` (optional): Max results (default
|
|
19
|
+
- `limit` (optional): Max results (default 3, max 50)
|
|
20
|
+
- `root` (optional): Directory to search. Defaults to project root. Use to search a parent directory (e.g. `root: "../"` to search the monorepo).
|
|
22
21
|
- `path` (optional): Restrict to path prefix (e.g. "src/auth/")
|
|
22
|
+
- `detail` (optional): `"pointer"` (default) or `"code"` (adds 4-line numbered snippets)
|
|
23
23
|
- `min_score` (optional): Filter by minimum relevance score (0-1)
|
|
24
24
|
- `max_per_file` (optional): Cap results per file for diversity
|
|
25
25
|
|
|
26
|
+
**Output format (pointer mode):**
|
|
27
|
+
```
|
|
28
|
+
handleAuth [exported ORCH C:8] src/auth/handler.ts:45-90
|
|
29
|
+
parent:AuthController calls:validateToken,checkRole,respond
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
**When to use `detail: "code"`:** Only when you need to see the actual code before deciding to Read — e.g. comparing implementations, checking syntax. For navigation ("where is X?"), pointer mode is sufficient.
|
|
33
|
+
|
|
34
|
+
### search_all
|
|
35
|
+
Search ALL indexed code across every directory. Same output format as semantic_search. Use when code could be anywhere — e.g. tracing a function across projects.
|
|
36
|
+
|
|
26
37
|
### code_skeleton
|
|
27
38
|
Show file structure — signatures with bodies collapsed (~4x fewer tokens).
|
|
28
39
|
- `target` (required): File path relative to project root
|
|
29
40
|
|
|
30
41
|
### trace_calls
|
|
31
|
-
Trace call graph — who calls a symbol and what it calls.
|
|
42
|
+
Trace call graph — who calls a symbol and what it calls. Unscoped — follows calls across all indexed directories.
|
|
32
43
|
- `symbol` (required): Function/method/class name (e.g. "handleAuth")
|
|
33
44
|
|
|
34
45
|
### list_symbols
|
|
@@ -38,45 +49,20 @@ List indexed symbols with definition locations.
|
|
|
38
49
|
- `path` (optional): Only symbols under this path prefix
|
|
39
50
|
|
|
40
51
|
### index_status
|
|
41
|
-
Check index
|
|
42
|
-
|
|
43
|
-
## CLI fallback
|
|
52
|
+
Check centralized index health — chunk count, files, indexed directories, model info.
|
|
44
53
|
|
|
45
|
-
|
|
54
|
+
## Workflow
|
|
46
55
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
gmax trace handleAuth # Call graph
|
|
52
|
-
gmax symbols booking # Find symbols by name
|
|
53
|
-
```
|
|
54
|
-
|
|
55
|
-
## Output explained (CLI)
|
|
56
|
-
```
|
|
57
|
-
ORCHESTRATION src/auth/handler.ts:45
|
|
58
|
-
Defines: handleAuth | Calls: validate, checkRole, respond | Score: .94
|
|
56
|
+
1. **Locate** — `semantic_search` with pointer mode to find relevant code
|
|
57
|
+
2. **Read** — `Read file:line` for the specific ranges you need
|
|
58
|
+
3. **Trace** — `trace_calls` to understand how functions connect
|
|
59
|
+
4. **Skeleton** — `code_skeleton` before reading large files
|
|
59
60
|
|
|
60
|
-
|
|
61
|
-
const token = req.headers.get("Authorization");
|
|
62
|
-
const claims = await validateToken(token);
|
|
63
|
-
...
|
|
64
|
-
```
|
|
65
|
-
|
|
66
|
-
- **ORCHESTRATION** = contains logic, coordinates other code
|
|
67
|
-
- **DEFINITION** = types, interfaces, classes
|
|
68
|
-
- **Score** = relevance (1 = best match)
|
|
69
|
-
- **Calls** = what this code calls (helps trace flow)
|
|
61
|
+
Don't read entire files. Use the line ranges from search results.
|
|
70
62
|
|
|
71
63
|
## Tips
|
|
72
64
|
|
|
73
65
|
- More words = better results. "auth" is vague. "where does the server validate JWT tokens" is specific.
|
|
74
|
-
- ORCH results contain the logic — prioritize these.
|
|
75
|
-
-
|
|
76
|
-
-
|
|
77
|
-
- Use `code_skeleton` before reading large files — understand structure first.
|
|
78
|
-
- Use `trace_calls` to understand how functions connect across the codebase.
|
|
79
|
-
|
|
80
|
-
## If Index is Building
|
|
81
|
-
|
|
82
|
-
If you see "Indexing" or daemon not ready: tell the user. Ask if they want to wait or proceed with partial results.
|
|
66
|
+
- ORCH results contain the logic — prioritize these over DEF/IMPL.
|
|
67
|
+
- Use `root` to search parent directories (monorepo, workspace).
|
|
68
|
+
- Use `search_all` sparingly — it searches everything indexed.
|