npm - grepmax - Versions diffs - 0.5.0 → 0.5.2 - Mend

grepmax 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/dist/commands/mcp.js +45 -0
package/dist/commands/summarize.js +4 -3
package/dist/lib/index/syncer.js +33 -32
package/dist/lib/index/watcher.js +2 -0
package/dist/lib/store/vector-db.js +2 -0
package/dist/lib/utils/logger.js +23 -0
package/dist/lib/workers/pool.js +3 -0
package/dist/lib/workers/summarize/llm-client.js +1 -0
package/mlx-embed-server/summarizer.py +8 -3
package/package.json +1 -1
package/plugins/grepmax/.claude-plugin/plugin.json +1 -1
package/plugins/grepmax/skills/gmax/SKILL.md +5 -1

package/dist/commands/mcp.js CHANGED Viewed

@@ -194,6 +194,23 @@ const TOOLS = [
             properties: {},
         },
     },
+    {
+        name: "summarize_directory",
+        description: "Generate LLM summaries for indexed code in a directory. Run after indexing. Summaries are stored and returned in search results. Requires the summarizer server on port 8101.",
+        inputSchema: {
+            type: "object",
+            properties: {
+                path: {
+                    type: "string",
+                    description: "Directory to summarize (absolute or relative). Defaults to current project root.",
+                },
+                limit: {
+                    type: "number",
+                    description: "Max chunks to summarize per call (default 200, max 5000). Run again to continue.",
+                },
+            },
+        },
+    },
 ];
 // ---------------------------------------------------------------------------
 // Helpers
@@ -625,6 +642,32 @@ exports.mcp = new commander_1.Command("mcp")
             }
         });
     }
+    function handleSummarizeDirectory(args) {
+        return __awaiter(this, void 0, void 0, function* () {
+            const dir = typeof args.path === "string"
+                ? path.resolve(args.path)
+                : projectRoot;
+            const prefix = dir.endsWith("/") ? dir : `${dir}/`;
+            const limit = Math.min(Math.max(Number(args.limit) || 200, 1), 5000);
+            try {
+                const db = getVectorDb();
+                const { summarized, remaining } = yield (0, syncer_1.generateSummaries)(db, prefix, (done, total) => {
+                    console.log(`[summarize] ${done}/${total} chunks`);
+                }, limit);
+                if (summarized === 0) {
+                    return ok("No chunks to summarize (all have summaries or summarizer unavailable)");
+                }
+                const remainMsg = remaining > 0
+                    ? ` (${remaining}+ remaining — run again to continue)`
+                    : "";
+                return ok(`Summarized ${summarized} chunks in ${path.basename(dir)}/${remainMsg}`);
+            }
+            catch (e) {
+                const msg = e instanceof Error ? e.message : String(e);
+                return err(`Summarization failed: ${msg}`);
+            }
+        });
+    }
     // --- MCP server setup ---
     const transport = new stdio_js_1.StdioServerTransport();
     const server = new index_js_1.Server({
@@ -656,6 +699,8 @@ exports.mcp = new commander_1.Command("mcp")
                 return handleListSymbols(toolArgs);
             case "index_status":
                 return handleIndexStatus();
+            case "summarize_directory":
+                return handleSummarizeDirectory(toolArgs);
             default:
                 return err(`Unknown tool: ${name}`);
         }

package/dist/commands/summarize.js CHANGED Viewed

@@ -61,11 +61,12 @@ exports.summarize = new commander_1.Command("summarize")
         : "";
     const { spinner } = (0, sync_helpers_1.createIndexingSpinner)("", "Summarizing...");
     try {
-        const count = yield (0, syncer_1.generateSummaries)(vectorDb, rootPrefix, (done, total) => {
+        const { summarized, remaining } = yield (0, syncer_1.generateSummaries)(vectorDb, rootPrefix, (done, total) => {
             spinner.text = `Summarizing... (${done}/${total})`;
         });
-        if (count > 0) {
-            spinner.succeed(`Summarized ${count} chunks`);
+        if (summarized > 0) {
+            const remainMsg = remaining > 0 ? ` (${remaining}+ remaining — run again)` : "";
+            spinner.succeed(`Summarized ${summarized} chunks${remainMsg}`);
         }
         else {
             spinner.succeed("All chunks already have summaries (or summarizer unavailable)");

package/dist/lib/index/syncer.js CHANGED Viewed

@@ -54,6 +54,7 @@ exports.initialSync = initialSync;
 const fs = __importStar(require("node:fs"));
 const path = __importStar(require("node:path"));
 const config_1 = require("../../config");
+const logger_1 = require("../utils/logger");
 const meta_cache_1 = require("../store/meta-cache");
 const vector_db_1 = require("../store/vector-db");
 const file_utils_1 = require("../utils/file-utils");
@@ -63,7 +64,7 @@ const project_root_1 = require("../utils/project-root");
 const pool_1 = require("../workers/pool");
 const index_config_1 = require("./index-config");
 const walker_1 = require("./walker");
-function generateSummaries(db, pathPrefix, onProgress) {
+function generateSummaries(db, pathPrefix, onProgress, maxChunks) {
     return __awaiter(this, void 0, void 0, function* () {
         let summarizeChunks;
         try {
@@ -71,23 +72,24 @@ function generateSummaries(db, pathPrefix, onProgress) {
             summarizeChunks = mod.summarizeChunks;
         }
         catch (_a) {
-            return 0;
+            return { summarized: 0, remaining: 0 };
         }
         // Quick availability check
         const test = yield summarizeChunks([
             { code: "test", language: "ts", file: "test" },
         ]);
         if (!test)
-            return 0;
+            return { summarized: 0, remaining: 0 };
+        const queryLimit = maxChunks !== null && maxChunks !== void 0 ? maxChunks : 50000;
         const table = yield db.ensureTable();
         const rows = yield table
             .query()
             .select(["id", "path", "content", "defined_symbols"])
             .where(`path LIKE '${pathPrefix}%' AND (summary IS NULL OR summary = '')`)
-            .limit(50000)
+            .limit(queryLimit)
             .toArray();
         if (rows.length === 0)
-            return 0;
+            return { summarized: 0, remaining: 0 };
         let summarized = 0;
         const BATCH_SIZE = 5;
         for (let i = 0; i < rows.length; i += BATCH_SIZE) {
@@ -123,7 +125,11 @@ function generateSummaries(db, pathPrefix, onProgress) {
             }
             onProgress === null || onProgress === void 0 ? void 0 : onProgress(summarized, rows.length);
         }
-        return summarized;
+        // Estimate remaining (rows.length was capped by queryLimit)
+        const remaining = rows.length === queryLimit
+            ? queryLimit - summarized // at least this many more
+            : 0;
+        return { summarized, remaining };
     });
 }
 function flushBatch(db, meta, vectors, pendingMeta, pendingDeletes, dryRun) {
@@ -183,6 +189,8 @@ function initialSync(options) {
             : `${resolvedRoot}/`;
         // Propagate project root to worker processes
         process.env.GMAX_PROJECT_ROOT = paths.root;
+        const syncTimer = (0, logger_1.timer)("index", "Total");
+        (0, logger_1.log)("index", `Root: ${resolvedRoot}`);
         let lock = null;
         const vectorDb = new vector_db_1.VectorDB(paths.lancedbDir);
         const treatAsEmptyCache = reset && dryRun;
@@ -199,11 +207,15 @@ function initialSync(options) {
             if (!dryRun) {
                 // Scope checks to this project's paths only
                 const projectKeys = yield metaCache.getKeysWithPrefix(rootPrefix);
+                (0, logger_1.log)("index", `Cached files: ${projectKeys.size}`);
                 const modelChanged = (0, index_config_1.checkModelMismatch)(paths.configPath);
                 if (reset || modelChanged) {
                     if (modelChanged) {
                         const stored = (0, index_config_1.readIndexConfig)(paths.configPath);
-                        console.warn(`[syncer] Embedding model changed: ${stored === null || stored === void 0 ? void 0 : stored.embedModel} → ${config_1.MODEL_IDS.embed}. Forcing full re-index.`);
+                        (0, logger_1.log)("index", `Reset: model changed (${stored === null || stored === void 0 ? void 0 : stored.embedModel} → ${config_1.MODEL_IDS.embed})`);
+                    }
+                    else {
+                        (0, logger_1.log)("index", "Reset: --reset flag");
                     }
                     // Only delete this project's data from the centralized store
                     yield vectorDb.deletePathsWithPrefix(rootPrefix);
@@ -230,6 +242,9 @@ function initialSync(options) {
             let processed = 0;
             let indexed = 0;
             let failedFiles = 0;
+            let cacheHits = 0;
+            let walkedFiles = 0;
+            const walkTimer = (0, logger_1.timer)("index", "Walk");
             let shouldSkipCleanup = false;
             let flushError;
             let flushPromise = null;
@@ -326,6 +341,7 @@ function initialSync(options) {
                     }
                     if (!(0, file_utils_1.isIndexableFile)(absPath))
                         continue;
+                    walkedFiles++;
                     yield schedule(() => __awaiter(this, void 0, void 0, function* () {
                         if (signal === null || signal === void 0 ? void 0 : signal.aborted) {
                             shouldSkipCleanup = true;
@@ -343,11 +359,14 @@ function initialSync(options) {
                             if (cached &&
                                 cached.mtimeMs === stats.mtimeMs &&
                                 cached.size === stats.size) {
+                                cacheHits++;
+                                (0, logger_1.debug)("index", `SKIP ${relPath} (cached)`);
                                 processed += 1;
                                 seenPaths.add(absPath);
                                 markProgress(relPath);
                                 return;
                             }
+                            (0, logger_1.debug)("index", `EMBED ${relPath}`);
                             const result = yield processFileWithRetry(absPath);
                             const metaEntry = {
                                 hash: result.hash,
@@ -426,6 +445,9 @@ function initialSync(options) {
                 finally { if (e_1) throw e_1.error; }
             }
             yield Promise.allSettled(activeTasks);
+            walkTimer();
+            (0, logger_1.log)("index", `Walk: ${walkedFiles} files`);
+            (0, logger_1.log)("index", `Embed: ${indexed} new, ${cacheHits} cached, ${failedFiles} failed`);
             if (signal === null || signal === void 0 ? void 0 : signal.aborted) {
                 shouldSkipCleanup = true;
             }
@@ -436,6 +458,7 @@ function initialSync(options) {
                     : new Error(String(flushError));
             }
             if (!dryRun) {
+                const ftsTimer = (0, logger_1.timer)("index", "FTS");
                 onProgress === null || onProgress === void 0 ? void 0 : onProgress({
                     processed,
                     indexed,
@@ -443,40 +466,18 @@ function initialSync(options) {
                     filePath: "Creating FTS index...",
                 });
                 yield vectorDb.createFTSIndex();
+                ftsTimer();
             }
             // Stale cleanup: only remove paths scoped to this project's root
             const stale = Array.from(cachedPaths).filter((p) => !seenPaths.has(p));
             if (!dryRun && stale.length > 0 && !shouldSkipCleanup) {
+                (0, logger_1.log)("index", `Stale cleanup: ${stale.length} paths`);
                 yield vectorDb.deletePaths(stale);
                 stale.forEach((p) => {
                     metaCache.delete(p);
                 });
             }
-            // --- Summary post-processing (sequential, single process) ---
-            if (!dryRun && indexed > 0) {
-                onProgress === null || onProgress === void 0 ? void 0 : onProgress({
-                    processed,
-                    indexed,
-                    total,
-                    filePath: "Generating summaries...",
-                });
-                const summarized = yield generateSummaries(vectorDb, rootPrefix, (count, chunkTotal) => {
-                    onProgress === null || onProgress === void 0 ? void 0 : onProgress({
-                        processed: count,
-                        indexed,
-                        total: chunkTotal,
-                        filePath: `Summarizing... (${count}/${chunkTotal})`,
-                    });
-                });
-                if (summarized > 0) {
-                    onProgress === null || onProgress === void 0 ? void 0 : onProgress({
-                        processed,
-                        indexed,
-                        total,
-                        filePath: `Summarized ${summarized} chunks`,
-                    });
-                }
-            }
+            syncTimer();
             // Write model config so future runs can detect model changes
             if (!dryRun) {
                 (0, index_config_1.writeIndexConfig)(paths.configPath);

package/dist/lib/index/watcher.js CHANGED Viewed

@@ -48,6 +48,7 @@ const fs = __importStar(require("node:fs"));
 const path = __importStar(require("node:path"));
 const chokidar_1 = require("chokidar");
 const file_utils_1 = require("../utils/file-utils");
+const logger_1 = require("../utils/logger");
 const lock_1 = require("../utils/lock");
 const pool_1 = require("../workers/pool");
 const llm_client_1 = require("../workers/summarize/llm-client");
@@ -102,6 +103,7 @@ function startWatcher(opts) {
         processing = true;
         const batch = new Map(pending);
         pending.clear();
+        (0, logger_1.log)("watch", `Processing ${batch.size} changed files`);
         const start = Date.now();
         let reindexed = 0;
         const changedIds = [];

package/dist/lib/store/vector-db.js CHANGED Viewed

@@ -47,6 +47,7 @@ const fs = __importStar(require("node:fs"));
 const lancedb = __importStar(require("@lancedb/lancedb"));
 const apache_arrow_1 = require("apache-arrow");
 const config_1 = require("../../config");
+const logger_1 = require("../utils/logger");
 const cleanup_1 = require("../utils/cleanup");
 const TABLE_NAME = "chunks";
 class VectorDB {
@@ -151,6 +152,7 @@ class VectorDB {
                 return table;
             }
             catch (_err) {
+                (0, logger_1.log)("db", `Creating table (${this.vectorDim}d)`);
                 const schema = this.buildSchema();
                 const table = yield db.createTable(TABLE_NAME, [this.seedRow()], {
                     schema,

package/dist/lib/utils/logger.js ADDED Viewed

@@ -0,0 +1,23 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.log = log;
+exports.debug = debug;
+exports.timer = timer;
+const VERBOSE = process.env.GMAX_DEBUG === "1" || process.env.GMAX_VERBOSE === "1";
+function log(tag, msg) {
+    process.stderr.write(`[${tag}] ${msg}\n`);
+}
+function debug(tag, msg) {
+    if (VERBOSE)
+        process.stderr.write(`[${tag}] ${msg}\n`);
+}
+function timer(tag, label) {
+    const start = Date.now();
+    return () => {
+        const ms = Date.now() - start;
+        const elapsed = ms > 60000
+            ? `${(ms / 60000).toFixed(1)}min`
+            : `${(ms / 1000).toFixed(1)}s`;
+        log(tag, `${label}: ${elapsed}`);
+    };
+}

package/dist/lib/workers/pool.js CHANGED Viewed

@@ -51,6 +51,7 @@ exports.isWorkerPoolInitialized = isWorkerPoolInitialized;
  * to ensure the ONNX Runtime segfaults do not crash the main process.
  */
 const childProcess = __importStar(require("node:child_process"));
+const logger_1 = require("../utils/logger");
 const fs = __importStar(require("node:fs"));
 const path = __importStar(require("node:path"));
 const config_1 = require("../../config");
@@ -149,6 +150,7 @@ class WorkerPool {
             task.reject(new Error(`Worker exited unexpectedly${code ? ` (code ${code})` : ""}${signal ? ` signal ${signal}` : ""}`));
             this.completeTask(task, null);
         }
+        (0, logger_1.log)("pool", `Worker PID:${worker.child.pid} exited (code:${code} signal:${signal})`);
         this.workers = this.workers.filter((w) => w !== worker);
         if (!this.destroyed) {
             this.spawnWorker();
@@ -157,6 +159,7 @@ class WorkerPool {
     }
     spawnWorker() {
         const worker = new ProcessWorker(this.modulePath, this.execArgv);
+        (0, logger_1.debug)("pool", `Spawned worker PID:${worker.child.pid}`);
         const onMessage = (msg) => {
             const task = this.tasks.get(msg.id);
             if (!task)

package/dist/lib/workers/summarize/llm-client.js CHANGED Viewed

@@ -100,6 +100,7 @@ function summarizeChunks(chunks) {
             return [];
         const { ok, data } = yield postJSON("/summarize", { chunks });
         if (!ok || !(data === null || data === void 0 ? void 0 : data.summaries)) {
+            process.stderr.write("[summarizer] Request failed or server unavailable\n");
             return null;
         }
         return data.summaries;

package/mlx-embed-server/summarizer.py CHANGED Viewed

@@ -11,6 +11,7 @@ endpoints run on the event loop thread, avoiding Metal thread-safety crashes.
 import asyncio
 import logging
 import os
+import re
 import signal
 import socket
 import time
@@ -38,7 +39,7 @@ MODEL_ID = os.environ.get(
 )
 PORT = int(os.environ.get("MLX_SUMMARY_PORT", "8101"))
 IDLE_TIMEOUT_S = int(os.environ.get("MLX_SUMMARY_IDLE_TIMEOUT", "1800"))  # 30 min
-MAX_TOKENS = 100  # summaries should be one line
+MAX_TOKENS = 40  # summaries are ~20 tokens, one line
 model = None
 tokenizer = None
@@ -48,7 +49,7 @@ _mlx_lock = asyncio.Lock()
 SYSTEM_PROMPT = """You are a code summarizer. Given a code chunk, produce exactly one line describing what it does.
 Be specific about business logic, services, and side effects. Do not describe syntax.
-Do not use phrases like "This function" or "This code". Start with a verb."""
+Do not use phrases like "This function" or "This code". Start with a verb. /no_think"""
 def build_prompt(code: str, language: str, file: str, symbols: list[str] | None = None) -> str:
     parts = [f"Language: {language}", f"File: {file}"]
@@ -79,8 +80,12 @@ def summarize_chunk(code: str, language: str, file: str, symbols: list[str] | No
         max_tokens=MAX_TOKENS,
         verbose=False,
     )
+    # Strip thinking tokens if present
+    text = re.sub(r"<think>.*?</think>", "", response, flags=re.DOTALL).strip()
+    if not text:
+        text = response.strip()
     # Take first line only, strip whitespace
-    summary = response.strip().split("\n")[0].strip()
+    summary = text.split("\n")[0].strip()
     # Remove common prefixes the model might add
     for prefix in ["Summary: ", "summary: ", "- "]:
         if summary.startswith(prefix):

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "grepmax",
-  "version": "0.5.0",
+  "version": "0.5.2",
   "author": "Robert Owens <robowens@me.com>",
   "homepage": "https://github.com/reowens/grepmax",
   "bugs": {

package/plugins/grepmax/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "grepmax",
-  "version": "0.5.0",
+  "version": "0.5.2",
   "description": "Semantic code search for Claude Code. Automatically indexes your project and provides intelligent search capabilities.",
   "author": {
     "name": "Robert Owens",

package/plugins/grepmax/skills/gmax/SKILL.md CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 name: gmax
 description: Semantic code search. Use alongside grep - grep for exact strings, gmax for concepts.
-allowed-tools: "mcp__grepmax__semantic_search, mcp__grepmax__search_all, mcp__grepmax__code_skeleton, mcp__grepmax__trace_calls, mcp__grepmax__list_symbols, mcp__grepmax__index_status, Bash(gmax:*), Read"
+allowed-tools: "mcp__grepmax__semantic_search, mcp__grepmax__search_all, mcp__grepmax__code_skeleton, mcp__grepmax__trace_calls, mcp__grepmax__list_symbols, mcp__grepmax__index_status, mcp__grepmax__summarize_directory, Bash(gmax:*), Read"
 ---
 ## What gmax does
@@ -67,6 +67,10 @@ List indexed symbols with definition locations.
 ### index_status
 Check centralized index health — chunks, files, indexed directories, model info.
+### summarize_directory
+Generate LLM summaries for indexed code in a directory. Summaries are stored and returned in search results. Run after indexing a new directory.
+- `path` (optional): Directory to summarize. Defaults to project root.
 ## Workflow
 1. **Search** — `semantic_search` to find relevant code (pointers by default)