npm - grepmax - Versions diffs - 0.3.0 → 0.4.0 - Mend

grepmax 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/dist/commands/mcp.js +67 -34
package/dist/lib/search/searcher.js +1 -2
package/dist/lib/store/vector-db.js +4 -1
package/dist/lib/workers/orchestrator.js +18 -1
package/dist/lib/workers/summarize/llm-client.js +165 -0
package/mlx-embed-server/pyproject.toml +3 -1
package/mlx-embed-server/summarizer.py +169 -0
package/mlx-embed-server/uv.lock +2 -0
package/package.json +1 -1
package/plugins/grepmax/.claude-plugin/plugin.json +1 -1
package/plugins/grepmax/hooks/start.js +15 -11
package/plugins/grepmax/skills/gmax/SKILL.md +28 -42

package/dist/commands/mcp.js CHANGED Viewed

@@ -81,7 +81,7 @@ const TOOLS = [
                 },
                 limit: {
                     type: "number",
-                    description: "Max results to return (default 10, max 50)",
+                    description: "Max results to return (default 3, max 50)",
                 },
                 root: {
                     type: "string",
@@ -91,6 +91,10 @@ const TOOLS = [
                     type: "string",
                     description: "Restrict search to files under this path prefix (e.g. 'src/auth/'). Relative to the search root.",
                 },
+                detail: {
+                    type: "string",
+                    description: "Output detail: 'pointer' (default, metadata only — symbol, location, role, calls) or 'code' (include 4-line code snippets)",
+                },
                 min_score: {
                     type: "number",
                     description: "Minimum relevance score (0-1). Results below this threshold are filtered out. Default: 0 (no filtering)",
@@ -115,7 +119,11 @@ const TOOLS = [
                 },
                 limit: {
                     type: "number",
-                    description: "Max results to return (default 10, max 50)",
+                    description: "Max results to return (default 3, max 50)",
+                },
+                detail: {
+                    type: "string",
+                    description: "Output detail: 'pointer' (default) or 'code' (include snippets)",
                 },
                 min_score: {
                     type: "number",
@@ -335,21 +343,21 @@ exports.mcp = new commander_1.Command("mcp")
             const query = String(args.query || "");
             if (!query)
                 return err("Missing required parameter: query");
-            const limit = Math.min(Math.max(Number(args.limit) || 10, 1), 50);
+            const limit = Math.min(Math.max(Number(args.limit) || 3, 1), 50);
             yield ensureIndexReady();
             try {
                 const searcher = getSearcher();
-                // Determine path prefix for scoping
+                // Determine path prefix and display root for relative paths
                 let pathPrefix;
+                let displayRoot = projectRoot;
                 if (!searchAll) {
-                    // Resolve search root — default to project root
                     const searchRoot = typeof args.root === "string"
                         ? path.resolve(args.root)
                         : path.resolve(projectRoot);
+                    displayRoot = searchRoot;
                     pathPrefix = searchRoot.endsWith("/")
                         ? searchRoot
                         : `${searchRoot}/`;
-                    // If a sub-path is specified, append it
                     if (typeof args.path === "string") {
                         pathPrefix = path.join(searchRoot, args.path);
                         if (!pathPrefix.endsWith("/"))
@@ -362,47 +370,72 @@ exports.mcp = new commander_1.Command("mcp")
                 }
                 const minScore = typeof args.min_score === "number" ? args.min_score : 0;
                 const maxPerFile = typeof args.max_per_file === "number" ? args.max_per_file : 0;
-                const MAX_SNIPPET_LINES = 8;
-                let compact = result.data.map((r) => {
+                const detail = typeof args.detail === "string" ? args.detail : "pointer";
+                let results = result.data.map((r) => {
                     var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m;
-                    const startLine = (_c = (_a = r.startLine) !== null && _a !== void 0 ? _a : (_b = r.generated_metadata) === null || _b === void 0 ? void 0 : _b.start_line) !== null && _c !== void 0 ? _c : 0;
-                    const raw = typeof r.content === "string"
-                        ? r.content
-                        : typeof r.text === "string"
-                            ? r.text
-                            : "";
-                    // Add line numbers and cap at MAX_SNIPPET_LINES
-                    const lines = raw.split("\n");
-                    const capped = lines.slice(0, MAX_SNIPPET_LINES);
-                    const numbered = capped.map((line, i) => `${startLine + i + 1}│${line}`);
-                    const snippet = lines.length > MAX_SNIPPET_LINES
-                        ? `${numbered.join("\n")}\n… (+${lines.length - MAX_SNIPPET_LINES} more lines)`
-                        : numbered.join("\n");
+                    const absPath = (_c = (_a = r.path) !== null && _a !== void 0 ? _a : (_b = r.metadata) === null || _b === void 0 ? void 0 : _b.path) !== null && _c !== void 0 ? _c : "";
+                    const relPath = absPath.startsWith(displayRoot)
+                        ? absPath.slice(displayRoot.length + 1)
+                        : absPath;
+                    const startLine = (_f = (_d = r.startLine) !== null && _d !== void 0 ? _d : (_e = r.generated_metadata) === null || _e === void 0 ? void 0 : _e.start_line) !== null && _f !== void 0 ? _f : 0;
+                    const endLine = (_j = (_g = r.endLine) !== null && _g !== void 0 ? _g : (_h = r.generated_metadata) === null || _h === void 0 ? void 0 : _h.end_line) !== null && _j !== void 0 ? _j : 0;
+                    const defs = toStringArray((_k = r.definedSymbols) !== null && _k !== void 0 ? _k : r.defined_symbols);
+                    const refs = toStringArray((_l = r.referenced_symbols) !== null && _l !== void 0 ? _l : r.referencedSymbols);
+                    const symbol = defs[0] || "(anonymous)";
+                    const role = ((_m = r.role) !== null && _m !== void 0 ? _m : "IMPL").slice(0, 4).toUpperCase();
+                    const exported = r.is_exported ? "exported " : "";
+                    const complexity = typeof r.complexity === "number" && r.complexity > 0
+                        ? ` C:${Math.round(r.complexity)}`
+                        : "";
+                    const parentStr = r.parent_symbol
+                        ? `parent:${r.parent_symbol} `
+                        : "";
+                    const callsStr = refs.length > 0
+                        ? `calls:${refs.slice(0, 8).join(",")}`
+                        : "";
+                    const line1 = `${symbol} [${exported}${role}${complexity}] ${relPath}:${startLine + 1}-${endLine + 1}`;
+                    const summaryStr = r.summary ? `  ${r.summary}` : "";
+                    const line2 = parentStr || callsStr
+                        ? `  ${parentStr}${callsStr}`
+                        : "";
+                    let snippet = "";
+                    if (detail === "code") {
+                        const raw = typeof r.content === "string"
+                            ? r.content
+                            : typeof r.text === "string"
+                                ? r.text
+                                : "";
+                        const lines = raw.split("\n").slice(0, 4);
+                        snippet =
+                            "\n" +
+                                lines
+                                    .map((l, i) => `${startLine + i + 1}│${l}`)
+                                    .join("\n");
+                    }
+                    const text = line1 +
+                        (summaryStr ? `\n${summaryStr}` : "") +
+                        (line2 ? `\n${line2}` : "") +
+                        snippet;
                     return {
-                        path: (_f = (_d = r.path) !== null && _d !== void 0 ? _d : (_e = r.metadata) === null || _e === void 0 ? void 0 : _e.path) !== null && _f !== void 0 ? _f : "",
-                        startLine,
-                        endLine: (_j = (_g = r.endLine) !== null && _g !== void 0 ? _g : (_h = r.generated_metadata) === null || _h === void 0 ? void 0 : _h.end_line) !== null && _j !== void 0 ? _j : 0,
-                        score: typeof r.score === "number" ? +r.score.toFixed(3) : 0,
-                        role: (_k = r.role) !== null && _k !== void 0 ? _k : "IMPLEMENTATION",
-                        confidence: (_l = r.confidence) !== null && _l !== void 0 ? _l : "Unknown",
-                        definedSymbols: toStringArray((_m = r.definedSymbols) !== null && _m !== void 0 ? _m : r.defined_symbols).slice(0, 5),
-                        snippet,
+                        absPath,
+                        text,
+                        score: typeof r.score === "number" ? r.score : 0,
                     };
                 });
                 if (minScore > 0) {
-                    compact = compact.filter((r) => r.score >= minScore);
+                    results = results.filter((r) => r.score >= minScore);
                 }
                 if (maxPerFile > 0) {
                     const counts = new Map();
-                    compact = compact.filter((r) => {
-                        const count = counts.get(r.path) || 0;
+                    results = results.filter((r) => {
+                        const count = counts.get(r.absPath) || 0;
                         if (count >= maxPerFile)
                             return false;
-                        counts.set(r.path, count + 1);
+                        counts.set(r.absPath, count + 1);
                         return true;
                     });
                 }
-                return ok(JSON.stringify(compact));
+                return ok(results.map((r) => r.text).join("\n\n"));
             }
             catch (e) {
                 const msg = e instanceof Error ? e.message : String(e);

package/dist/lib/search/searcher.js CHANGED Viewed

@@ -127,8 +127,7 @@ class Searcher {
             referenced_symbols: referencedSymbols,
             imports,
             exports,
-            // Remove 'context' field entirely from JSON output
-            // context: record.context_prev ? [record.context_prev] : [],
+            summary: record.summary,
         };
     }
     applyStructureBoost(record, score, intent) {

package/dist/lib/store/vector-db.js CHANGED Viewed

@@ -97,6 +97,7 @@ class VectorDB {
             role: "",
             parent_symbol: "",
             file_skeleton: "",
+            summary: "",
         };
     }
     validateSchema(table) {
@@ -138,6 +139,7 @@ class VectorDB {
             new apache_arrow_1.Field("role", new apache_arrow_1.Utf8(), true),
             new apache_arrow_1.Field("parent_symbol", new apache_arrow_1.Utf8(), true),
             new apache_arrow_1.Field("file_skeleton", new apache_arrow_1.Utf8(), true),
+            new apache_arrow_1.Field("summary", new apache_arrow_1.Utf8(), true),
         ]);
     }
     ensureTable() {
@@ -201,7 +203,7 @@ class VectorDB {
                 return [];
             };
             const rows = records.map((rec) => {
-                var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o;
+                var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o, _p;
                 const vec = (() => {
                     const arr = toNumberArray(rec.vector);
                     if (arr.length < this.vectorDim) {
@@ -241,6 +243,7 @@ class VectorDB {
                     role: (_l = rec.role) !== null && _l !== void 0 ? _l : "",
                     parent_symbol: (_m = rec.parent_symbol) !== null && _m !== void 0 ? _m : "",
                     file_skeleton: (_o = rec.file_skeleton) !== null && _o !== void 0 ? _o : "",
+                    summary: (_p = rec.summary) !== null && _p !== void 0 ? _p : null,
                 };
             });
             try {

package/dist/lib/workers/orchestrator.js CHANGED Viewed

@@ -49,6 +49,7 @@ const transformers_1 = require("@huggingface/transformers");
 const ort = __importStar(require("onnxruntime-node"));
 const uuid_1 = require("uuid");
 const config_1 = require("../../config");
+const llm_client_1 = require("./summarize/llm-client");
 const chunker_1 = require("../index/chunker");
 const skeleton_1 = require("../skeleton");
 const file_utils_1 = require("../utils/file-utils");
@@ -213,7 +214,23 @@ class WorkerOrchestrator {
             if (!chunks.length)
                 return { vectors: [], hash, mtimeMs, size };
             const preparedChunks = this.toPreparedChunks(input.path, hash, chunks, skeletonResult.success ? skeletonResult.skeleton : undefined);
-            const hybrids = yield this.computeHybrid(preparedChunks.map((chunk) => chunk.content), onProgress);
+            // Run embedding and summarization in parallel
+            const lang = path.extname(input.path).replace(/^\./, "") || "unknown";
+            const [hybrids, summaries] = yield Promise.all([
+                this.computeHybrid(preparedChunks.map((chunk) => chunk.content), onProgress),
+                (0, llm_client_1.summarizeChunks)(preparedChunks.map((c) => ({
+                    code: c.content,
+                    language: lang,
+                    file: c.path,
+                }))),
+            ]);
+            // Attach summaries if available
+            if (summaries) {
+                for (let i = 0; i < preparedChunks.length; i++) {
+                    if (summaries[i])
+                        preparedChunks[i].summary = summaries[i];
+                }
+            }
             const vectors = preparedChunks.map((chunk, idx) => {
                 var _a;
                 const hybrid = (_a = hybrids[idx]) !== null && _a !== void 0 ? _a : {

package/dist/lib/workers/summarize/llm-client.js ADDED Viewed

@@ -0,0 +1,165 @@
+"use strict";
+/**
+ * LLM summarizer HTTP client.
+ * Talks to the MLX summarizer server to generate code summaries.
+ * Returns null if server isn't running — caller skips summaries gracefully.
+ */
+var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    var desc = Object.getOwnPropertyDescriptor(m, k);
+    if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
+      desc = { enumerable: true, get: function() { return m[k]; } };
+    }
+    Object.defineProperty(o, k2, desc);
+}) : (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    o[k2] = m[k];
+}));
+var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
+    Object.defineProperty(o, "default", { enumerable: true, value: v });
+}) : function(o, v) {
+    o["default"] = v;
+});
+var __importStar = (this && this.__importStar) || (function () {
+    var ownKeys = function(o) {
+        ownKeys = Object.getOwnPropertyNames || function (o) {
+            var ar = [];
+            for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
+            return ar;
+        };
+        return ownKeys(o);
+    };
+    return function (mod) {
+        if (mod && mod.__esModule) return mod;
+        var result = {};
+        if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
+        __setModuleDefault(result, mod);
+        return result;
+    };
+})();
+var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
+    function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
+    return new (P || (P = Promise))(function (resolve, reject) {
+        function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
+        function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
+        function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
+        step((generator = generator.apply(thisArg, _arguments || [])).next());
+    });
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.summarizeChunks = summarizeChunks;
+exports.resetSummarizerCache = resetSummarizerCache;
+const http = __importStar(require("node:http"));
+const SUMMARY_PORT = parseInt(process.env.GMAX_SUMMARY_PORT || "8101", 10);
+const SUMMARY_HOST = "127.0.0.1";
+const SUMMARY_TIMEOUT_MS = 120000; // 2 min — batches of chunks take time
+let summarizerAvailable = null;
+let lastCheck = 0;
+const CHECK_INTERVAL_MS = 5000; // short cache — retry quickly if server just started
+function postJSON(path, body) {
+    return new Promise((resolve) => {
+        const payload = JSON.stringify(body);
+        const req = http.request({
+            hostname: SUMMARY_HOST,
+            port: SUMMARY_PORT,
+            path,
+            method: "POST",
+            headers: {
+                "Content-Type": "application/json",
+                "Content-Length": Buffer.byteLength(payload),
+            },
+            timeout: SUMMARY_TIMEOUT_MS,
+        }, (res) => {
+            const chunks = [];
+            res.on("data", (chunk) => chunks.push(chunk));
+            res.on("end", () => {
+                try {
+                    const data = JSON.parse(Buffer.concat(chunks).toString("utf-8"));
+                    resolve({ ok: res.statusCode === 200, data });
+                }
+                catch (_a) {
+                    resolve({ ok: false });
+                }
+            });
+        });
+        req.on("error", () => resolve({ ok: false }));
+        req.on("timeout", () => {
+            req.destroy();
+            resolve({ ok: false });
+        });
+        req.write(payload);
+        req.end();
+    });
+}
+function isSummarizerUp() {
+    return __awaiter(this, void 0, void 0, function* () {
+        const now = Date.now();
+        if (summarizerAvailable !== null && now - lastCheck < CHECK_INTERVAL_MS) {
+            return summarizerAvailable;
+        }
+        const result = yield new Promise((resolve) => {
+            const req = http.get({
+                hostname: SUMMARY_HOST,
+                port: SUMMARY_PORT,
+                path: "/health",
+                timeout: 5000,
+            }, (res) => {
+                res.resume();
+                resolve(res.statusCode === 200);
+            });
+            req.on("error", () => resolve(false));
+            req.on("timeout", () => {
+                req.destroy();
+                resolve(false);
+            });
+        });
+        summarizerAvailable = result;
+        lastCheck = now;
+        return result;
+    });
+}
+/**
+ * Generate summaries for code chunks via the local LLM server.
+ * Sends one chunk at a time. Skips health check — just tries the request.
+ * If the server is busy, the TCP connection queues until it's ready.
+ * Returns string[] on success, null if server unavailable.
+ */
+function summarizeChunks(chunks) {
+    return __awaiter(this, void 0, void 0, function* () {
+        var _a;
+        if (chunks.length === 0)
+            return [];
+        // Quick check only if we've never connected
+        if (summarizerAvailable === null) {
+            summarizerAvailable = yield isSummarizerUp();
+            if (!summarizerAvailable)
+                return null;
+        }
+        if (summarizerAvailable === false) {
+            // Recheck periodically
+            const now = Date.now();
+            if (now - lastCheck < CHECK_INTERVAL_MS)
+                return null;
+            summarizerAvailable = yield isSummarizerUp();
+            if (!summarizerAvailable)
+                return null;
+        }
+        const summaries = [];
+        for (const chunk of chunks) {
+            const { ok, data } = yield postJSON("/summarize", {
+                chunks: [chunk],
+            });
+            if (!ok || !((_a = data === null || data === void 0 ? void 0 : data.summaries) === null || _a === void 0 ? void 0 : _a[0])) {
+                summaries.push("");
+            }
+            else {
+                summaries.push(data.summaries[0]);
+            }
+        }
+        return summaries;
+    });
+}
+function resetSummarizerCache() {
+    summarizerAvailable = null;
+    lastCheck = 0;
+}

package/mlx-embed-server/pyproject.toml CHANGED Viewed

@@ -1,13 +1,15 @@
 [project]
 name = "mlx-embed-server"
 version = "0.1.0"
-description = "MLX-accelerated embedding server for grepmax"
+description = "MLX-accelerated embedding and summarization server for grepmax"
 requires-python = ">=3.13"
 dependencies = [
     "fastapi>=0.115.0",
     "uvicorn>=0.34.0",
     "mlx-embeddings @ git+https://github.com/Blaizzy/mlx-embeddings.git",
+    "mlx-lm>=0.22.0",
 ]
 [project.scripts]
 mlx-embed-server = "server:main"
+mlx-summarizer = "summarizer:main"

package/mlx-embed-server/summarizer.py ADDED Viewed

@@ -0,0 +1,169 @@
+"""MLX-accelerated code summarizer for grepmax.
+Runs Qwen3-Coder-30B-A3B on Apple Silicon GPU to generate one-line
+summaries of code chunks during indexing. Summaries are stored in
+LanceDB and returned in search results.
+IMPORTANT: All MLX operations must run on a single thread. FastAPI async
+endpoints run on the event loop thread, avoiding Metal thread-safety crashes.
+"""
+import asyncio
+import logging
+import os
+import signal
+import socket
+import time
+import warnings
+from contextlib import asynccontextmanager
+os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "1"
+os.environ["HF_HUB_DISABLE_IMPLICIT_TOKEN"] = "1"
+os.environ["HF_HUB_VERBOSITY"] = "error"
+os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+warnings.filterwarnings("ignore", message=".*PyTorch.*")
+warnings.filterwarnings("ignore", message=".*resource_tracker.*")
+logging.getLogger("huggingface_hub").setLevel(logging.ERROR)
+import mlx.core as mx
+import uvicorn
+from fastapi import FastAPI
+from mlx_lm import load, generate
+from pydantic import BaseModel
+MODEL_ID = os.environ.get(
+    "MLX_SUMMARY_MODEL",
+    "lmstudio-community/Qwen3-Coder-30B-A3B-Instruct-MLX-4bit",
+)
+PORT = int(os.environ.get("MLX_SUMMARY_PORT", "8101"))
+IDLE_TIMEOUT_S = int(os.environ.get("MLX_SUMMARY_IDLE_TIMEOUT", "1800"))  # 30 min
+MAX_TOKENS = 100  # summaries should be one line
+model = None
+tokenizer = None
+last_activity = time.time()
+_mlx_lock = asyncio.Lock()
+SYSTEM_PROMPT = """You are a code summarizer. Given a code chunk, produce exactly one line describing what it does.
+Be specific about business logic, services, and side effects. Do not describe syntax.
+Do not use phrases like "This function" or "This code". Start with a verb."""
+def build_prompt(code: str, language: str, file: str) -> str:
+    return f"Language: {language}\nFile: {file}\n\n```\n{code}\n```"
+def is_port_in_use(port: int) -> bool:
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        return s.connect_ex(("127.0.0.1", port)) == 0
+def summarize_chunk(code: str, language: str, file: str) -> str:
+    """Generate a one-line summary for a code chunk."""
+    messages = [
+        {"role": "system", "content": SYSTEM_PROMPT},
+        {"role": "user", "content": build_prompt(code, language, file)},
+    ]
+    prompt = tokenizer.apply_chat_template(
+        messages, tokenize=False, add_generation_prompt=True
+    )
+    response = generate(
+        model,
+        tokenizer,
+        prompt=prompt,
+        max_tokens=MAX_TOKENS,
+        verbose=False,
+    )
+    # Take first line only, strip whitespace
+    summary = response.strip().split("\n")[0].strip()
+    # Remove common prefixes the model might add
+    for prefix in ["Summary: ", "summary: ", "- "]:
+        if summary.startswith(prefix):
+            summary = summary[len(prefix):]
+    return summary
+def load_model():
+    global model, tokenizer
+    print(f"[summarizer] Loading {MODEL_ID}...")
+    model, tokenizer = load(MODEL_ID)
+    # Warm up
+    _ = summarize_chunk("function hello() { return 'world'; }", "javascript", "test.js")
+    print("[summarizer] Model ready on Metal GPU.")
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    load_model()
+    yield
+app = FastAPI(lifespan=lifespan)
+class ChunkInput(BaseModel):
+    code: str
+    language: str = "unknown"
+    file: str = ""
+class SummarizeRequest(BaseModel):
+    chunks: list[ChunkInput]
+class SummarizeResponse(BaseModel):
+    summaries: list[str]
+@app.post("/summarize")
+async def summarize(request: SummarizeRequest) -> SummarizeResponse:
+    global last_activity
+    last_activity = time.time()
+    summaries = []
+    async with _mlx_lock:
+        for chunk in request.chunks:
+            try:
+                summary = summarize_chunk(chunk.code, chunk.language, chunk.file)
+                summaries.append(summary)
+            except Exception as e:
+                summaries.append(f"(summary failed: {e})")
+    return SummarizeResponse(summaries=summaries)
+@app.get("/health")
+async def health():
+    # Health check must NOT acquire _mlx_lock — it must respond instantly
+    # even when a summarization is in progress
+    global last_activity
+    last_activity = time.time()
+    return {"status": "ok", "model": MODEL_ID}
+def main():
+    if is_port_in_use(PORT):
+        print(f"[summarizer] Port {PORT} already in use — server is already running.")
+        return
+    print(f"[summarizer] Starting on port {PORT}")
+    def handle_signal(sig, frame):
+        print("[summarizer] Stopped.")
+        try:
+            from multiprocessing.resource_tracker import _resource_tracker
+            if _resource_tracker._pid is not None:
+                os.kill(_resource_tracker._pid, signal.SIGKILL)
+        except Exception:
+            pass
+        os._exit(0)
+    signal.signal(signal.SIGINT, handle_signal)
+    signal.signal(signal.SIGTERM, handle_signal)
+    uvicorn.run(app, host="127.0.0.1", port=PORT, log_level="warning")
+if __name__ == "__main__":
+    main()

package/mlx-embed-server/uv.lock CHANGED Viewed

@@ -610,6 +610,7 @@ source = { virtual = "." }
 dependencies = [
     { name = "fastapi" },
     { name = "mlx-embeddings" },
+    { name = "mlx-lm" },
     { name = "uvicorn" },
 ]
@@ -617,6 +618,7 @@ dependencies = [
 requires-dist = [
     { name = "fastapi", specifier = ">=0.115.0" },
     { name = "mlx-embeddings", git = "https://github.com/Blaizzy/mlx-embeddings.git" },
+    { name = "mlx-lm", specifier = ">=0.22.0" },
     { name = "uvicorn", specifier = ">=0.34.0" },
 ]

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "grepmax",
-  "version": "0.3.0",
+  "version": "0.4.0",
   "author": "Robert Owens <robowens@me.com>",
   "homepage": "https://github.com/reowens/grepmax",
   "bugs": {

package/plugins/grepmax/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "grepmax",
-  "version": "0.3.0",
+  "version": "0.4.0",
   "description": "Semantic code search for Claude Code. Automatically indexes your project and provides intelligent search capabilities.",
   "author": {
     "name": "Robert Owens",

package/plugins/grepmax/hooks/start.js CHANGED Viewed

@@ -3,10 +3,10 @@ const _path = require("node:path");
 const http = require("node:http");
 const { spawn } = require("node:child_process");
-function isMlxRunning() {
+function isServerRunning(port) {
   return new Promise((resolve) => {
     const req = http.get(
-      { hostname: "127.0.0.1", port: 8100, path: "/health", timeout: 1000 },
+      { hostname: "127.0.0.1", port, path: "/health", timeout: 1000 },
       (res) => {
         res.resume();
         resolve(res.statusCode === 200);
@@ -20,17 +20,17 @@ function isMlxRunning() {
   });
 }
-function startMlxServer() {
+function startPythonServer(scriptName, logName) {
   const pluginRoot = __dirname.replace(/\/hooks$/, "");
   const gmaxRoot = _path.resolve(pluginRoot, "../..");
   const serverDir = _path.join(gmaxRoot, "mlx-embed-server");
-  if (!fs.existsSync(_path.join(serverDir, "server.py"))) return;
+  if (!fs.existsSync(_path.join(serverDir, scriptName))) return;
-  const logPath = "/tmp/mlx-embed-server.log";
+  const logPath = `/tmp/${logName}.log`;
   const out = fs.openSync(logPath, "a");
-  const child = spawn("uv", ["run", "python", "server.py"], {
+  const child = spawn("uv", ["run", "python", scriptName], {
     cwd: serverDir,
     detached: true,
     stdio: ["ignore", out, out],
@@ -40,17 +40,21 @@ function startMlxServer() {
 }
 async function main() {
-  // Start MLX embed server if not running (set GMAX_EMBED_MODE=cpu to skip)
   const embedMode =
     process.env.GMAX_EMBED_MODE || process.env.OSGREP_EMBED_MODE || "auto";
   if (embedMode !== "cpu") {
-    const mlxUp = await isMlxRunning();
-    if (!mlxUp) {
-      startMlxServer();
+    // Start MLX embed server (port 8100)
+    if (!(await isServerRunning(8100))) {
+      startPythonServer("server.py", "mlx-embed-server");
+    }
+    // Start LLM summarizer server (port 8101)
+    if (!(await isServerRunning(8101))) {
+      startPythonServer("summarizer.py", "mlx-summarizer");
     }
   }
-  // MCP server handles indexing and search directly — no daemon needed
   const response = {
     hookSpecificOutput: {
       hookEventName: "SessionStart",

package/plugins/grepmax/skills/gmax/SKILL.md CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 name: gmax
 description: Semantic code search. Use alongside grep - grep for exact strings, gmax for concepts.
-allowed-tools: "mcp__grepmax__semantic_search, mcp__grepmax__code_skeleton, mcp__grepmax__trace_calls, mcp__grepmax__list_symbols, mcp__grepmax__index_status, Bash(gmax:*), Read"
+allowed-tools: "mcp__grepmax__semantic_search, mcp__grepmax__search_all, mcp__grepmax__code_skeleton, mcp__grepmax__trace_calls, mcp__grepmax__list_symbols, mcp__grepmax__index_status, Bash(gmax:*), Read"
 ---
 ## What gmax does
@@ -11,24 +11,35 @@ Finds code by meaning. When you'd ask a colleague "where do we handle auth?", us
 - grep/ripgrep: exact string match, fast
 - gmax: concept match, finds code you couldn't grep for
-## MCP tools (preferred)
-Use these structured tools when available — they return typed JSON and don't need output parsing.
+## MCP tools
 ### semantic_search
-Search code by meaning. Returns ranked snippets with file paths, line numbers, scores.
+Search code by meaning. Returns **pointers** by default — symbol, file:line, role, calls. No code snippets unless requested.
 - `query` (required): Natural language. Be specific — more words = better results.
-- `limit` (optional): Max results (default 10, max 50)
+- `limit` (optional): Max results (default 3, max 50)
+- `root` (optional): Directory to search. Defaults to project root. Use to search a parent directory (e.g. `root: "../"` to search the monorepo).
 - `path` (optional): Restrict to path prefix (e.g. "src/auth/")
+- `detail` (optional): `"pointer"` (default) or `"code"` (adds 4-line numbered snippets)
 - `min_score` (optional): Filter by minimum relevance score (0-1)
 - `max_per_file` (optional): Cap results per file for diversity
+**Output format (pointer mode):**
+```
+handleAuth [exported ORCH C:8] src/auth/handler.ts:45-90
+  parent:AuthController calls:validateToken,checkRole,respond
+```
+**When to use `detail: "code"`:** Only when you need to see the actual code before deciding to Read — e.g. comparing implementations, checking syntax. For navigation ("where is X?"), pointer mode is sufficient.
+### search_all
+Search ALL indexed code across every directory. Same output format as semantic_search. Use when code could be anywhere — e.g. tracing a function across projects.
 ### code_skeleton
 Show file structure — signatures with bodies collapsed (~4x fewer tokens).
 - `target` (required): File path relative to project root
 ### trace_calls
-Trace call graph — who calls a symbol and what it calls.
+Trace call graph — who calls a symbol and what it calls. Unscoped — follows calls across all indexed directories.
 - `symbol` (required): Function/method/class name (e.g. "handleAuth")
 ### list_symbols
@@ -38,45 +49,20 @@ List indexed symbols with definition locations.
 - `path` (optional): Only symbols under this path prefix
 ### index_status
-Check index and daemon health — file count, chunks, embed mode, age, watching status.
-## CLI fallback
+Check centralized index health — chunk count, files, indexed directories, model info.
-If MCP tools aren't available, use the CLI via Bash:
+## Workflow
-```bash
-gmax "where do we validate user permissions"   # Semantic search
-gmax "authentication" --compact                 # Just file paths + line ranges
-gmax skeleton src/giant-2000-line-file.ts       # File structure
-gmax trace handleAuth                           # Call graph
-gmax symbols booking                            # Find symbols by name
-```
-## Output explained (CLI)
-```
-ORCHESTRATION src/auth/handler.ts:45
-Defines: handleAuth | Calls: validate, checkRole, respond | Score: .94
+1. **Locate** — `semantic_search` with pointer mode to find relevant code
+2. **Read** — `Read file:line` for the specific ranges you need
+3. **Trace** — `trace_calls` to understand how functions connect
+4. **Skeleton** — `code_skeleton` before reading large files
-export async function handleAuth(req: Request) {
-  const token = req.headers.get("Authorization");
-  const claims = await validateToken(token);
-  ...
-```
-- **ORCHESTRATION** = contains logic, coordinates other code
-- **DEFINITION** = types, interfaces, classes
-- **Score** = relevance (1 = best match)
-- **Calls** = what this code calls (helps trace flow)
+Don't read entire files. Use the line ranges from search results.
 ## Tips
 - More words = better results. "auth" is vague. "where does the server validate JWT tokens" is specific.
-- ORCH results contain the logic — prioritize these.
-- Don't read entire files. Use the line ranges from results.
-- If results seem off, rephrase like you'd ask a teammate.
-- Use `code_skeleton` before reading large files — understand structure first.
-- Use `trace_calls` to understand how functions connect across the codebase.
-## If Index is Building
-If you see "Indexing" or daemon not ready: tell the user. Ask if they want to wait or proceed with partial results.
+- ORCH results contain the logic — prioritize these over DEF/IMPL.
+- Use `root` to search parent directories (monorepo, workspace).
+- Use `search_all` sparingly — it searches everything indexed.