grepmax 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -194,6 +194,23 @@ const TOOLS = [
194
194
  properties: {},
195
195
  },
196
196
  },
197
+ {
198
+ name: "summarize_directory",
199
+ description: "Generate LLM summaries for indexed code in a directory. Run after indexing. Summaries are stored and returned in search results. Requires the summarizer server on port 8101.",
200
+ inputSchema: {
201
+ type: "object",
202
+ properties: {
203
+ path: {
204
+ type: "string",
205
+ description: "Directory to summarize (absolute or relative). Defaults to current project root.",
206
+ },
207
+ limit: {
208
+ type: "number",
209
+ description: "Max chunks to summarize per call (default 200, max 5000). Run again to continue.",
210
+ },
211
+ },
212
+ },
213
+ },
197
214
  ];
198
215
  // ---------------------------------------------------------------------------
199
216
  // Helpers
@@ -625,6 +642,32 @@ exports.mcp = new commander_1.Command("mcp")
625
642
  }
626
643
  });
627
644
  }
645
+ function handleSummarizeDirectory(args) {
646
+ return __awaiter(this, void 0, void 0, function* () {
647
+ const dir = typeof args.path === "string"
648
+ ? path.resolve(args.path)
649
+ : projectRoot;
650
+ const prefix = dir.endsWith("/") ? dir : `${dir}/`;
651
+ const limit = Math.min(Math.max(Number(args.limit) || 200, 1), 5000);
652
+ try {
653
+ const db = getVectorDb();
654
+ const { summarized, remaining } = yield (0, syncer_1.generateSummaries)(db, prefix, (done, total) => {
655
+ console.log(`[summarize] ${done}/${total} chunks`);
656
+ }, limit);
657
+ if (summarized === 0) {
658
+ return ok("No chunks to summarize (all have summaries or summarizer unavailable)");
659
+ }
660
+ const remainMsg = remaining > 0
661
+ ? ` (${remaining}+ remaining — run again to continue)`
662
+ : "";
663
+ return ok(`Summarized ${summarized} chunks in ${path.basename(dir)}/${remainMsg}`);
664
+ }
665
+ catch (e) {
666
+ const msg = e instanceof Error ? e.message : String(e);
667
+ return err(`Summarization failed: ${msg}`);
668
+ }
669
+ });
670
+ }
628
671
  // --- MCP server setup ---
629
672
  const transport = new stdio_js_1.StdioServerTransport();
630
673
  const server = new index_js_1.Server({
@@ -656,6 +699,8 @@ exports.mcp = new commander_1.Command("mcp")
656
699
  return handleListSymbols(toolArgs);
657
700
  case "index_status":
658
701
  return handleIndexStatus();
702
+ case "summarize_directory":
703
+ return handleSummarizeDirectory(toolArgs);
659
704
  default:
660
705
  return err(`Unknown tool: ${name}`);
661
706
  }
@@ -61,11 +61,12 @@ exports.summarize = new commander_1.Command("summarize")
61
61
  : "";
62
62
  const { spinner } = (0, sync_helpers_1.createIndexingSpinner)("", "Summarizing...");
63
63
  try {
64
- const count = yield (0, syncer_1.generateSummaries)(vectorDb, rootPrefix, (done, total) => {
64
+ const { summarized, remaining } = yield (0, syncer_1.generateSummaries)(vectorDb, rootPrefix, (done, total) => {
65
65
  spinner.text = `Summarizing... (${done}/${total})`;
66
66
  });
67
- if (count > 0) {
68
- spinner.succeed(`Summarized ${count} chunks`);
67
+ if (summarized > 0) {
68
+ const remainMsg = remaining > 0 ? ` (${remaining}+ remaining — run again)` : "";
69
+ spinner.succeed(`Summarized ${summarized} chunks${remainMsg}`);
69
70
  }
70
71
  else {
71
72
  spinner.succeed("All chunks already have summaries (or summarizer unavailable)");
@@ -54,6 +54,7 @@ exports.initialSync = initialSync;
54
54
  const fs = __importStar(require("node:fs"));
55
55
  const path = __importStar(require("node:path"));
56
56
  const config_1 = require("../../config");
57
+ const logger_1 = require("../utils/logger");
57
58
  const meta_cache_1 = require("../store/meta-cache");
58
59
  const vector_db_1 = require("../store/vector-db");
59
60
  const file_utils_1 = require("../utils/file-utils");
@@ -63,7 +64,7 @@ const project_root_1 = require("../utils/project-root");
63
64
  const pool_1 = require("../workers/pool");
64
65
  const index_config_1 = require("./index-config");
65
66
  const walker_1 = require("./walker");
66
- function generateSummaries(db, pathPrefix, onProgress) {
67
+ function generateSummaries(db, pathPrefix, onProgress, maxChunks) {
67
68
  return __awaiter(this, void 0, void 0, function* () {
68
69
  let summarizeChunks;
69
70
  try {
@@ -71,23 +72,24 @@ function generateSummaries(db, pathPrefix, onProgress) {
71
72
  summarizeChunks = mod.summarizeChunks;
72
73
  }
73
74
  catch (_a) {
74
- return 0;
75
+ return { summarized: 0, remaining: 0 };
75
76
  }
76
77
  // Quick availability check
77
78
  const test = yield summarizeChunks([
78
79
  { code: "test", language: "ts", file: "test" },
79
80
  ]);
80
81
  if (!test)
81
- return 0;
82
+ return { summarized: 0, remaining: 0 };
83
+ const queryLimit = maxChunks !== null && maxChunks !== void 0 ? maxChunks : 50000;
82
84
  const table = yield db.ensureTable();
83
85
  const rows = yield table
84
86
  .query()
85
87
  .select(["id", "path", "content", "defined_symbols"])
86
88
  .where(`path LIKE '${pathPrefix}%' AND (summary IS NULL OR summary = '')`)
87
- .limit(50000)
89
+ .limit(queryLimit)
88
90
  .toArray();
89
91
  if (rows.length === 0)
90
- return 0;
92
+ return { summarized: 0, remaining: 0 };
91
93
  let summarized = 0;
92
94
  const BATCH_SIZE = 5;
93
95
  for (let i = 0; i < rows.length; i += BATCH_SIZE) {
@@ -123,7 +125,11 @@ function generateSummaries(db, pathPrefix, onProgress) {
123
125
  }
124
126
  onProgress === null || onProgress === void 0 ? void 0 : onProgress(summarized, rows.length);
125
127
  }
126
- return summarized;
128
+ // Estimate remaining (rows.length was capped by queryLimit)
129
+ const remaining = rows.length === queryLimit
130
+ ? queryLimit - summarized // at least this many more
131
+ : 0;
132
+ return { summarized, remaining };
127
133
  });
128
134
  }
129
135
  function flushBatch(db, meta, vectors, pendingMeta, pendingDeletes, dryRun) {
@@ -183,6 +189,8 @@ function initialSync(options) {
183
189
  : `${resolvedRoot}/`;
184
190
  // Propagate project root to worker processes
185
191
  process.env.GMAX_PROJECT_ROOT = paths.root;
192
+ const syncTimer = (0, logger_1.timer)("index", "Total");
193
+ (0, logger_1.log)("index", `Root: ${resolvedRoot}`);
186
194
  let lock = null;
187
195
  const vectorDb = new vector_db_1.VectorDB(paths.lancedbDir);
188
196
  const treatAsEmptyCache = reset && dryRun;
@@ -199,11 +207,15 @@ function initialSync(options) {
199
207
  if (!dryRun) {
200
208
  // Scope checks to this project's paths only
201
209
  const projectKeys = yield metaCache.getKeysWithPrefix(rootPrefix);
210
+ (0, logger_1.log)("index", `Cached files: ${projectKeys.size}`);
202
211
  const modelChanged = (0, index_config_1.checkModelMismatch)(paths.configPath);
203
212
  if (reset || modelChanged) {
204
213
  if (modelChanged) {
205
214
  const stored = (0, index_config_1.readIndexConfig)(paths.configPath);
206
- console.warn(`[syncer] Embedding model changed: ${stored === null || stored === void 0 ? void 0 : stored.embedModel} → ${config_1.MODEL_IDS.embed}. Forcing full re-index.`);
215
+ (0, logger_1.log)("index", `Reset: model changed (${stored === null || stored === void 0 ? void 0 : stored.embedModel} → ${config_1.MODEL_IDS.embed})`);
216
+ }
217
+ else {
218
+ (0, logger_1.log)("index", "Reset: --reset flag");
207
219
  }
208
220
  // Only delete this project's data from the centralized store
209
221
  yield vectorDb.deletePathsWithPrefix(rootPrefix);
@@ -230,6 +242,9 @@ function initialSync(options) {
230
242
  let processed = 0;
231
243
  let indexed = 0;
232
244
  let failedFiles = 0;
245
+ let cacheHits = 0;
246
+ let walkedFiles = 0;
247
+ const walkTimer = (0, logger_1.timer)("index", "Walk");
233
248
  let shouldSkipCleanup = false;
234
249
  let flushError;
235
250
  let flushPromise = null;
@@ -326,6 +341,7 @@ function initialSync(options) {
326
341
  }
327
342
  if (!(0, file_utils_1.isIndexableFile)(absPath))
328
343
  continue;
344
+ walkedFiles++;
329
345
  yield schedule(() => __awaiter(this, void 0, void 0, function* () {
330
346
  if (signal === null || signal === void 0 ? void 0 : signal.aborted) {
331
347
  shouldSkipCleanup = true;
@@ -343,11 +359,14 @@ function initialSync(options) {
343
359
  if (cached &&
344
360
  cached.mtimeMs === stats.mtimeMs &&
345
361
  cached.size === stats.size) {
362
+ cacheHits++;
363
+ (0, logger_1.debug)("index", `SKIP ${relPath} (cached)`);
346
364
  processed += 1;
347
365
  seenPaths.add(absPath);
348
366
  markProgress(relPath);
349
367
  return;
350
368
  }
369
+ (0, logger_1.debug)("index", `EMBED ${relPath}`);
351
370
  const result = yield processFileWithRetry(absPath);
352
371
  const metaEntry = {
353
372
  hash: result.hash,
@@ -426,6 +445,9 @@ function initialSync(options) {
426
445
  finally { if (e_1) throw e_1.error; }
427
446
  }
428
447
  yield Promise.allSettled(activeTasks);
448
+ walkTimer();
449
+ (0, logger_1.log)("index", `Walk: ${walkedFiles} files`);
450
+ (0, logger_1.log)("index", `Embed: ${indexed} new, ${cacheHits} cached, ${failedFiles} failed`);
429
451
  if (signal === null || signal === void 0 ? void 0 : signal.aborted) {
430
452
  shouldSkipCleanup = true;
431
453
  }
@@ -436,6 +458,7 @@ function initialSync(options) {
436
458
  : new Error(String(flushError));
437
459
  }
438
460
  if (!dryRun) {
461
+ const ftsTimer = (0, logger_1.timer)("index", "FTS");
439
462
  onProgress === null || onProgress === void 0 ? void 0 : onProgress({
440
463
  processed,
441
464
  indexed,
@@ -443,40 +466,18 @@ function initialSync(options) {
443
466
  filePath: "Creating FTS index...",
444
467
  });
445
468
  yield vectorDb.createFTSIndex();
469
+ ftsTimer();
446
470
  }
447
471
  // Stale cleanup: only remove paths scoped to this project's root
448
472
  const stale = Array.from(cachedPaths).filter((p) => !seenPaths.has(p));
449
473
  if (!dryRun && stale.length > 0 && !shouldSkipCleanup) {
474
+ (0, logger_1.log)("index", `Stale cleanup: ${stale.length} paths`);
450
475
  yield vectorDb.deletePaths(stale);
451
476
  stale.forEach((p) => {
452
477
  metaCache.delete(p);
453
478
  });
454
479
  }
455
- // --- Summary post-processing (sequential, single process) ---
456
- if (!dryRun && indexed > 0) {
457
- onProgress === null || onProgress === void 0 ? void 0 : onProgress({
458
- processed,
459
- indexed,
460
- total,
461
- filePath: "Generating summaries...",
462
- });
463
- const summarized = yield generateSummaries(vectorDb, rootPrefix, (count, chunkTotal) => {
464
- onProgress === null || onProgress === void 0 ? void 0 : onProgress({
465
- processed: count,
466
- indexed,
467
- total: chunkTotal,
468
- filePath: `Summarizing... (${count}/${chunkTotal})`,
469
- });
470
- });
471
- if (summarized > 0) {
472
- onProgress === null || onProgress === void 0 ? void 0 : onProgress({
473
- processed,
474
- indexed,
475
- total,
476
- filePath: `Summarized ${summarized} chunks`,
477
- });
478
- }
479
- }
480
+ syncTimer();
480
481
  // Write model config so future runs can detect model changes
481
482
  if (!dryRun) {
482
483
  (0, index_config_1.writeIndexConfig)(paths.configPath);
@@ -48,6 +48,7 @@ const fs = __importStar(require("node:fs"));
48
48
  const path = __importStar(require("node:path"));
49
49
  const chokidar_1 = require("chokidar");
50
50
  const file_utils_1 = require("../utils/file-utils");
51
+ const logger_1 = require("../utils/logger");
51
52
  const lock_1 = require("../utils/lock");
52
53
  const pool_1 = require("../workers/pool");
53
54
  const llm_client_1 = require("../workers/summarize/llm-client");
@@ -102,6 +103,7 @@ function startWatcher(opts) {
102
103
  processing = true;
103
104
  const batch = new Map(pending);
104
105
  pending.clear();
106
+ (0, logger_1.log)("watch", `Processing ${batch.size} changed files`);
105
107
  const start = Date.now();
106
108
  let reindexed = 0;
107
109
  const changedIds = [];
@@ -47,6 +47,7 @@ const fs = __importStar(require("node:fs"));
47
47
  const lancedb = __importStar(require("@lancedb/lancedb"));
48
48
  const apache_arrow_1 = require("apache-arrow");
49
49
  const config_1 = require("../../config");
50
+ const logger_1 = require("../utils/logger");
50
51
  const cleanup_1 = require("../utils/cleanup");
51
52
  const TABLE_NAME = "chunks";
52
53
  class VectorDB {
@@ -151,6 +152,7 @@ class VectorDB {
151
152
  return table;
152
153
  }
153
154
  catch (_err) {
155
+ (0, logger_1.log)("db", `Creating table (${this.vectorDim}d)`);
154
156
  const schema = this.buildSchema();
155
157
  const table = yield db.createTable(TABLE_NAME, [this.seedRow()], {
156
158
  schema,
@@ -0,0 +1,23 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.log = log;
4
+ exports.debug = debug;
5
+ exports.timer = timer;
6
+ const VERBOSE = process.env.GMAX_DEBUG === "1" || process.env.GMAX_VERBOSE === "1";
7
+ function log(tag, msg) {
8
+ process.stderr.write(`[${tag}] ${msg}\n`);
9
+ }
10
+ function debug(tag, msg) {
11
+ if (VERBOSE)
12
+ process.stderr.write(`[${tag}] ${msg}\n`);
13
+ }
14
+ function timer(tag, label) {
15
+ const start = Date.now();
16
+ return () => {
17
+ const ms = Date.now() - start;
18
+ const elapsed = ms > 60000
19
+ ? `${(ms / 60000).toFixed(1)}min`
20
+ : `${(ms / 1000).toFixed(1)}s`;
21
+ log(tag, `${label}: ${elapsed}`);
22
+ };
23
+ }
@@ -51,6 +51,7 @@ exports.isWorkerPoolInitialized = isWorkerPoolInitialized;
51
51
  * to ensure the ONNX Runtime segfaults do not crash the main process.
52
52
  */
53
53
  const childProcess = __importStar(require("node:child_process"));
54
+ const logger_1 = require("../utils/logger");
54
55
  const fs = __importStar(require("node:fs"));
55
56
  const path = __importStar(require("node:path"));
56
57
  const config_1 = require("../../config");
@@ -149,6 +150,7 @@ class WorkerPool {
149
150
  task.reject(new Error(`Worker exited unexpectedly${code ? ` (code ${code})` : ""}${signal ? ` signal ${signal}` : ""}`));
150
151
  this.completeTask(task, null);
151
152
  }
153
+ (0, logger_1.log)("pool", `Worker PID:${worker.child.pid} exited (code:${code} signal:${signal})`);
152
154
  this.workers = this.workers.filter((w) => w !== worker);
153
155
  if (!this.destroyed) {
154
156
  this.spawnWorker();
@@ -157,6 +159,7 @@ class WorkerPool {
157
159
  }
158
160
  spawnWorker() {
159
161
  const worker = new ProcessWorker(this.modulePath, this.execArgv);
162
+ (0, logger_1.debug)("pool", `Spawned worker PID:${worker.child.pid}`);
160
163
  const onMessage = (msg) => {
161
164
  const task = this.tasks.get(msg.id);
162
165
  if (!task)
@@ -100,6 +100,7 @@ function summarizeChunks(chunks) {
100
100
  return [];
101
101
  const { ok, data } = yield postJSON("/summarize", { chunks });
102
102
  if (!ok || !(data === null || data === void 0 ? void 0 : data.summaries)) {
103
+ process.stderr.write("[summarizer] Request failed or server unavailable\n");
103
104
  return null;
104
105
  }
105
106
  return data.summaries;
@@ -11,6 +11,7 @@ endpoints run on the event loop thread, avoiding Metal thread-safety crashes.
11
11
  import asyncio
12
12
  import logging
13
13
  import os
14
+ import re
14
15
  import signal
15
16
  import socket
16
17
  import time
@@ -38,7 +39,7 @@ MODEL_ID = os.environ.get(
38
39
  )
39
40
  PORT = int(os.environ.get("MLX_SUMMARY_PORT", "8101"))
40
41
  IDLE_TIMEOUT_S = int(os.environ.get("MLX_SUMMARY_IDLE_TIMEOUT", "1800")) # 30 min
41
- MAX_TOKENS = 100 # summaries should be one line
42
+ MAX_TOKENS = 40 # summaries are ~20 tokens, one line
42
43
 
43
44
  model = None
44
45
  tokenizer = None
@@ -48,7 +49,7 @@ _mlx_lock = asyncio.Lock()
48
49
 
49
50
  SYSTEM_PROMPT = """You are a code summarizer. Given a code chunk, produce exactly one line describing what it does.
50
51
  Be specific about business logic, services, and side effects. Do not describe syntax.
51
- Do not use phrases like "This function" or "This code". Start with a verb."""
52
+ Do not use phrases like "This function" or "This code". Start with a verb. /no_think"""
52
53
 
53
54
  def build_prompt(code: str, language: str, file: str, symbols: list[str] | None = None) -> str:
54
55
  parts = [f"Language: {language}", f"File: {file}"]
@@ -79,8 +80,12 @@ def summarize_chunk(code: str, language: str, file: str, symbols: list[str] | No
79
80
  max_tokens=MAX_TOKENS,
80
81
  verbose=False,
81
82
  )
83
+ # Strip thinking tokens if present
84
+ text = re.sub(r"<think>.*?</think>", "", response, flags=re.DOTALL).strip()
85
+ if not text:
86
+ text = response.strip()
82
87
  # Take first line only, strip whitespace
83
- summary = response.strip().split("\n")[0].strip()
88
+ summary = text.split("\n")[0].strip()
84
89
  # Remove common prefixes the model might add
85
90
  for prefix in ["Summary: ", "summary: ", "- "]:
86
91
  if summary.startswith(prefix):
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "grepmax",
3
- "version": "0.5.0",
3
+ "version": "0.5.2",
4
4
  "author": "Robert Owens <robowens@me.com>",
5
5
  "homepage": "https://github.com/reowens/grepmax",
6
6
  "bugs": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "grepmax",
3
- "version": "0.5.0",
3
+ "version": "0.5.2",
4
4
  "description": "Semantic code search for Claude Code. Automatically indexes your project and provides intelligent search capabilities.",
5
5
  "author": {
6
6
  "name": "Robert Owens",
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  name: gmax
3
3
  description: Semantic code search. Use alongside grep - grep for exact strings, gmax for concepts.
4
- allowed-tools: "mcp__grepmax__semantic_search, mcp__grepmax__search_all, mcp__grepmax__code_skeleton, mcp__grepmax__trace_calls, mcp__grepmax__list_symbols, mcp__grepmax__index_status, Bash(gmax:*), Read"
4
+ allowed-tools: "mcp__grepmax__semantic_search, mcp__grepmax__search_all, mcp__grepmax__code_skeleton, mcp__grepmax__trace_calls, mcp__grepmax__list_symbols, mcp__grepmax__index_status, mcp__grepmax__summarize_directory, Bash(gmax:*), Read"
5
5
  ---
6
6
 
7
7
  ## What gmax does
@@ -67,6 +67,10 @@ List indexed symbols with definition locations.
67
67
  ### index_status
68
68
  Check centralized index health — chunks, files, indexed directories, model info.
69
69
 
70
+ ### summarize_directory
71
+ Generate LLM summaries for indexed code in a directory. Summaries are stored and returned in search results. Run after indexing a new directory.
72
+ - `path` (optional): Directory to summarize. Defaults to project root.
73
+
70
74
  ## Workflow
71
75
 
72
76
  1. **Search** — `semantic_search` to find relevant code (pointers by default)