grepmax 0.7.28 → 0.7.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -50,6 +50,7 @@ var __asyncValues = (this && this.__asyncValues) || function (o) {
50
50
  };
51
51
  Object.defineProperty(exports, "__esModule", { value: true });
52
52
  exports.generateSummaries = generateSummaries;
53
+ exports.computeStaleFiles = computeStaleFiles;
53
54
  exports.initialSync = initialSync;
54
55
  const fs = __importStar(require("node:fs"));
55
56
  const path = __importStar(require("node:path"));
@@ -178,6 +179,9 @@ function createNoopMetaCache() {
178
179
  close: () => __awaiter(this, void 0, void 0, function* () { }),
179
180
  };
180
181
  }
182
+ function computeStaleFiles(cachedPaths, seenPaths) {
183
+ return Array.from(cachedPaths).filter((p) => !seenPaths.has(p));
184
+ }
181
185
  function initialSync(options) {
182
186
  return __awaiter(this, void 0, void 0, function* () {
183
187
  var _a, e_1, _b, _c;
@@ -465,7 +469,7 @@ function initialSync(options) {
465
469
  : new Error(String(flushError));
466
470
  }
467
471
  // Stale cleanup: only remove paths scoped to this project's root
468
- const stale = Array.from(cachedPaths).filter((p) => !seenPaths.has(p));
472
+ const stale = computeStaleFiles(cachedPaths, seenPaths);
469
473
  if (!dryRun && stale.length > 0 && !shouldSkipCleanup) {
470
474
  (0, logger_1.log)("index", `Stale cleanup: ${stale.length} paths`);
471
475
  yield vectorDb.deletePaths(stale);
@@ -0,0 +1,149 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
36
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
37
+ return new (P || (P = Promise))(function (resolve, reject) {
38
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
39
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
40
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
41
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
42
+ });
43
+ };
44
+ Object.defineProperty(exports, "__esModule", { value: true });
45
+ exports.processBatchCore = processBatchCore;
46
+ exports.flushBatchToDb = flushBatchToDb;
47
+ exports.computeRetryAction = computeRetryAction;
48
+ const fs = __importStar(require("node:fs"));
49
+ const cache_check_1 = require("../utils/cache-check");
50
+ const file_utils_1 = require("../utils/file-utils");
51
+ function processBatchCore(batch, metaCache, pool) {
52
+ return __awaiter(this, void 0, void 0, function* () {
53
+ let reindexed = 0;
54
+ const changedIds = [];
55
+ const deletes = [];
56
+ const vectors = [];
57
+ const metaUpdates = new Map();
58
+ const metaDeletes = [];
59
+ for (const [absPath, event] of batch) {
60
+ if (event === "unlink") {
61
+ deletes.push(absPath);
62
+ metaDeletes.push(absPath);
63
+ reindexed++;
64
+ continue;
65
+ }
66
+ try {
67
+ const stats = yield fs.promises.stat(absPath);
68
+ if (!(0, file_utils_1.isIndexableFile)(absPath, stats.size))
69
+ continue;
70
+ const cached = metaCache.get(absPath);
71
+ if ((0, cache_check_1.isFileCached)(cached, stats)) {
72
+ continue;
73
+ }
74
+ const result = yield pool.processFile({
75
+ path: absPath,
76
+ absolutePath: absPath,
77
+ });
78
+ const metaEntry = {
79
+ hash: result.hash,
80
+ mtimeMs: result.mtimeMs,
81
+ size: result.size,
82
+ };
83
+ if (cached && cached.hash === result.hash) {
84
+ metaUpdates.set(absPath, metaEntry);
85
+ continue;
86
+ }
87
+ if (result.shouldDelete) {
88
+ deletes.push(absPath);
89
+ metaUpdates.set(absPath, metaEntry);
90
+ reindexed++;
91
+ continue;
92
+ }
93
+ deletes.push(absPath);
94
+ if (result.vectors.length > 0) {
95
+ vectors.push(...result.vectors);
96
+ for (const v of result.vectors) {
97
+ changedIds.push(v.id);
98
+ }
99
+ }
100
+ metaUpdates.set(absPath, metaEntry);
101
+ reindexed++;
102
+ }
103
+ catch (err) {
104
+ const code = err === null || err === void 0 ? void 0 : err.code;
105
+ if (code === "ENOENT") {
106
+ deletes.push(absPath);
107
+ metaDeletes.push(absPath);
108
+ reindexed++;
109
+ }
110
+ }
111
+ }
112
+ return { reindexed, changedIds, vectors, deletes, metaUpdates, metaDeletes };
113
+ });
114
+ }
115
+ function flushBatchToDb(result, vectorDb) {
116
+ return __awaiter(this, void 0, void 0, function* () {
117
+ const newIds = result.vectors.map((v) => v.id);
118
+ if (result.vectors.length > 0) {
119
+ yield vectorDb.insertBatch(result.vectors);
120
+ }
121
+ if (result.deletes.length > 0) {
122
+ if (newIds.length > 0) {
123
+ yield vectorDb.deletePathsExcludingIds(result.deletes, newIds);
124
+ }
125
+ else {
126
+ yield vectorDb.deletePaths(result.deletes);
127
+ }
128
+ }
129
+ });
130
+ }
131
+ function computeRetryAction(batch, retryCount, maxRetries, isLockError, consecutiveLockFailures, debounceMs) {
132
+ var _a;
133
+ const requeued = new Map();
134
+ let dropped = 0;
135
+ for (const [absPath, event] of batch) {
136
+ const count = ((_a = retryCount.get(absPath)) !== null && _a !== void 0 ? _a : 0) + 1;
137
+ if (count >= maxRetries) {
138
+ retryCount.delete(absPath);
139
+ dropped++;
140
+ }
141
+ else {
142
+ requeued.set(absPath, event);
143
+ retryCount.set(absPath, count);
144
+ }
145
+ }
146
+ const effectiveFailures = isLockError ? consecutiveLockFailures + 1 : 0;
147
+ const backoffMs = Math.min(debounceMs * Math.pow(2, effectiveFailures), 30000);
148
+ return { requeued, dropped, backoffMs };
149
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "grepmax",
3
- "version": "0.7.28",
3
+ "version": "0.7.30",
4
4
  "author": "Robert Owens <robowens@me.com>",
5
5
  "homepage": "https://github.com/reowens/grepmax",
6
6
  "bugs": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "grepmax",
3
- "version": "0.7.28",
3
+ "version": "0.7.30",
4
4
  "description": "Semantic code search for Claude Code. Automatically indexes your project and provides intelligent search capabilities.",
5
5
  "author": {
6
6
  "name": "Robert Owens",
@@ -89,7 +89,7 @@ async function main() {
89
89
  hookSpecificOutput: {
90
90
  hookEventName: "SessionStart",
91
91
  additionalContext:
92
- "gmax MCP ready. Use semantic_search for concept-based code search (5+ words recommended). Use code_skeleton before reading large files. index_status to check health.",
92
+ 'gmax ready. PREFER CLI over MCP tools — use Bash(gmax "query" --plain) for search, Bash(gmax trace <symbol>) for call graphs, Bash(gmax skeleton <path>) for file structure. CLI is 2x more token-efficient than MCP tool calls. Always add --plain flag.',
93
93
  },
94
94
  };
95
95
  process.stdout.write(JSON.stringify(response));
@@ -11,150 +11,99 @@ Semantic code search — finds code by meaning, not just strings.
11
11
  - grep/ripgrep: exact string match
12
12
  - gmax: concept match ("where do we handle auth?", "how does booking flow work?")
13
13
 
14
- ## MCP tools
15
-
16
- ### semantic_search
17
- Search code by meaning. Two output modes:
18
-
19
- **Pointer mode (default)** — returns metadata + LLM-generated summary per result:
20
- ```
21
- handleAuth [exported ORCH C:8] src/auth/handler.ts:45-90
22
- Validates JWT from Authorization header, checks RBAC permissions, returns 401 on failure
23
- parent:AuthController calls:validateToken,checkRole,respond
24
- ```
25
-
26
- **Code mode (`detail: "code"`)** — includes 4-line numbered code snippets:
27
- ```
28
- handleAuth [exported ORCH C:8] src/auth/handler.ts:45-90
29
- Validates JWT from Authorization header, checks RBAC permissions, returns 401 on failure
30
- parent:AuthController calls:validateToken,checkRole,respond
31
- 45│ const token = req.headers.get("Authorization");
32
- 46│ const claims = await validateToken(token);
33
- 47│ if (!claims) return unauthorized();
34
- 48│ const allowed = await checkRole(claims.role, req.path);
35
- ```
36
-
37
- Parameters:
38
- - `query` (required): Natural language. Be specific — 5+ words gives much better results than 1-2 words.
39
- - `limit` (optional): Max results (default 3, max 50)
40
- - `root` (optional): Absolute path to search a different indexed directory.
41
- - `path` (optional): Restrict to path prefix (e.g. "src/auth/"). Relative to the search root.
42
- - `detail` (optional): `"pointer"` (default), `"code"` (4-line snippets), or `"full"` (complete chunk with line numbers)
43
- - `context_lines` (optional): Include N lines before/after the chunk (like grep -C). Only with detail "code" or "full". Max 20.
44
- - `min_score` (optional): Filter by minimum relevance score (0-1)
45
- - `max_per_file` (optional): Cap results per file for diversity
46
- - `file` (optional): Filter to files matching this name (e.g. "syncer.ts"). Matches filename, not full path.
47
- - `exclude` (optional): Exclude files under this path prefix (e.g. "tests/" or "dist/")
48
- - `language` (optional): Filter by file extension (e.g. "ts", "py", "go"). Omit the dot.
49
- - `role` (optional): Filter by chunk role: "ORCHESTRATION" (logic/flow), "DEFINITION" (types), or "IMPLEMENTATION"
50
- - `mode` (optional): `"default"` (semantic only) or `"symbol"` (semantic + call graph appended). Use "symbol" when query is a function or class name — gets search results + callers/callees in one call.
51
- - `include_imports` (optional): Prepend file's import/require statements to each result. Deduped per file — see dependencies at a glance.
52
- - `name_pattern` (optional): Regex to filter by symbol name (e.g. "handle.*Auth"). Case-insensitive. Applied after search.
53
-
54
- **When to use which mode:**
55
- - `pointer` — navigation, finding locations, understanding architecture
56
- - `code` — comparing implementations, finding duplicates, checking syntax
57
-
58
- ### search_all
59
- Search ALL indexed code across every directory. Same parameters as semantic_search (query, limit, detail, min_score, max_per_file, file, exclude, language, role) but without `root` or `path`.
60
-
61
- Additional parameters:
62
- - `projects` (optional): Comma-separated project names to include (e.g. "platform,osgrep"). Use `index_status` to see names.
63
- - `exclude_projects` (optional): Comma-separated project names to exclude (e.g. "capstone,power")
64
-
65
- Use sparingly. Prefer `semantic_search` when you know which directory to search.
66
-
67
- ### code_skeleton
68
- File or directory structure — signatures with bodies collapsed (~4x fewer tokens).
69
- - `target` (required): File path, directory path (e.g. "src/lib/search/"), or comma-separated files
70
- - `limit` (optional): Max files for directory mode (default 10, max 20)
71
- - `format` (optional): `"text"` (default) or `"json"` (structured symbol list with name, line, signature, type, exported)
72
-
73
- ### trace_calls
74
- Call graph — who imports a symbol, who calls it, and what it calls. Includes file:line locations. Unscoped — follows calls across all indexed directories.
75
- - `symbol` (required): Function/method/class name
76
- - `depth` (optional): Traversal depth for callers (default 1, max 3). depth: 2 shows callers-of-callers with indentation.
77
-
78
- Output: definition, "Imported by" (files with import statements), "Callers" (functions that call it), "Calls" (what it calls).
79
-
80
- ### list_symbols
81
- List indexed symbols with definition locations, role, and export status.
82
- - `pattern` (optional): Filter by name (case-insensitive substring match)
83
- - `limit` (optional): Max results (default 20, max 100)
84
- - `path` (optional): Only symbols under this path prefix
85
-
86
- Output: `symbolName [ORCH] exported src/path/file.ts:42`
87
-
88
- ### summarize_project
89
- High-level project overview — languages, directory structure, role distribution, key symbols, entry points. Use when first exploring a new codebase.
90
- - `root` (optional): Project root path. Defaults to current project.
91
-
92
- ### related_files
93
- Find files related to a given file by shared symbol references. Shows dependencies (what this file calls) and dependents (what calls this file).
94
- - `file` (required): File path relative to project root
95
- - `limit` (optional): Max results per direction (default 10)
96
-
97
- ### recent_changes
98
- Show recently modified files in the index. Useful after pulls or merges to see what changed.
99
- - `limit` (optional): Max files (default 20)
100
- - `root` (optional): Project root (defaults to current project)
101
-
102
- ### index_status
103
- Check centralized index health — chunks, files, indexed directories, model info, watcher status.
104
-
105
- ### summarize_directory
106
- Generate LLM summaries for indexed code in a directory. Summaries are stored and returned in search results. Requires the summarizer server (auto-started by the plugin hook).
107
- - `path` (optional): Directory to summarize. Defaults to project root.
108
- - `limit` (optional): Max chunks to summarize per call (default 200, max 5000). Run again to continue.
14
+ ## IMPORTANT: Use CLI, not MCP tools
109
15
 
110
- ## Workflow
16
+ **Always prefer `Bash(gmax ...)` over MCP tool calls.** The CLI is ~2x more token-efficient because MCP tool schemas add ~800 tokens of overhead per call. The CLI has full feature parity with every MCP tool.
111
17
 
112
- 1. **Explore** — `summarize_project` for high-level overview of a new codebase
113
- 2. **Search** `semantic_search` to find relevant code (pointers by default). Use `mode: "symbol"` for function/class names.
114
- 3. **Read** — `Read file:line` for the specific ranges you need
115
- 4. **Skeleton** — `code_skeleton` before reading large files or directories
116
- 5. **Trace** — `trace_calls` to understand call flow, imports, and callers (use `depth: 2` for full chains)
117
- 6. **Context** — `related_files` to see what else you need to look at when editing
118
- 7. **Changes** — `recent_changes` after pulls to see what's been modified
18
+ ```
19
+ Bash(gmax "auth handler" --role ORCHESTRATION --lang ts --plain -m 3)
20
+ ```
119
21
 
120
- ## If results seem stale
22
+ **Only use MCP tools** for `index_status` (quick health check) or `summarize_directory` (LLM summaries). For everything else, use CLI.
121
23
 
122
- The watcher auto-starts when the MCP server connects — it detects file changes and re-indexes in the background. Usually results are fresh without manual intervention.
24
+ ## CLI commands (use these)
123
25
 
124
- 1. Check `index_status` if watcher shows "syncing", wait for it to finish.
125
- 2. To force a full re-index: `Bash(gmax index)` (indexes current directory)
126
- 3. To add summaries without re-indexing: `Bash(gmax summarize)`
127
- 4. Do NOT use `gmax reindex` it doesn't exist.
26
+ ### Search`gmax "query" --plain`
27
+ ```
28
+ gmax "where do we handle authentication" --plain
29
+ gmax "database connection pooling" --role ORCHESTRATION --plain -m 5
30
+ gmax "error handling" --lang ts --exclude tests/ --plain
31
+ gmax "VectorDB" --symbol --plain # search + call graph in one shot
32
+ gmax "handler" --name "handle.*" --plain # regex filter on symbol names
33
+ gmax "auth" --file handler.ts --plain # filter by filename
34
+ gmax "query" -C 5 --plain # include context lines
35
+ gmax "query" --imports --plain # show file imports
36
+ ```
128
37
 
129
- ## Search warnings
38
+ All flags: `--plain -m <n> --per-file <n> --min-score <n> --root <dir> --file <name> --exclude <prefix> --lang <ext> --role <role> --symbol --imports --name <regex> -C <n> --compact --content --scores --skeleton`
39
+
40
+ ### Trace — `gmax trace <symbol>`
41
+ ```
42
+ gmax trace handleAuth # 1-hop: callers + callees
43
+ gmax trace handleAuth -d 2 # 2-hop: callers-of-callers
44
+ ```
45
+
46
+ ### Skeleton — `gmax skeleton <target>`
47
+ ```
48
+ gmax skeleton src/lib/auth.ts # single file
49
+ gmax skeleton src/lib/search/ # entire directory
50
+ gmax skeleton src/a.ts,src/b.ts # batch
51
+ gmax skeleton src/lib/auth.ts --json # structured JSON output
52
+ ```
130
53
 
131
- If search results include a warning like "Full-text search unavailable", results may be less precise. This resolves automatically the index retries FTS every 5 minutes.
54
+ ### Project overview`gmax project`
55
+ ```
56
+ gmax project # languages, structure, key symbols
57
+ ```
132
58
 
133
- ## CLI vs MCP when to use which
59
+ ### Related files`gmax related <file>`
60
+ ```
61
+ gmax related src/lib/index/syncer.ts # dependencies + dependents
62
+ ```
134
63
 
135
- **Prefer CLI (`Bash(gmax ...)`) for repeated searches.** The CLI is ~2x more token-efficient because MCP tool schemas add ~800 tokens of overhead per call. Every CLI flag maps to an MCP param:
64
+ ### Recent changes — `gmax recent`
65
+ ```
66
+ gmax recent # recently modified files
67
+ ```
136
68
 
69
+ ### Other
137
70
  ```
138
- Bash(gmax "auth handler" --role ORCHESTRATION --lang ts --plain -m 3)
71
+ gmax symbols # list indexed symbols
72
+ gmax symbols auth -p src/ # filter by name and path
73
+ gmax index # reindex current directory
74
+ gmax config # view/change settings
75
+ gmax doctor # health check
139
76
  ```
140
77
 
141
- is equivalent to `semantic_search` with `role: "ORCHESTRATION", language: "ts", limit: 3` — but costs half the tokens.
78
+ ## Workflow
79
+
80
+ 1. **Explore** — `Bash(gmax project)` for overview of a new codebase
81
+ 2. **Search** — `Bash(gmax "query" --plain)` to find code. Add `--symbol` for function/class names.
82
+ 3. **Read** — `Read file:line` for specific ranges
83
+ 4. **Skeleton** — `Bash(gmax skeleton <path>)` before reading large files
84
+ 5. **Trace** — `Bash(gmax trace <symbol> -d 2)` for call flow
85
+ 6. **Context** — `Bash(gmax related <file>)` to see what else to look at
86
+ 7. **Changes** — `Bash(gmax recent)` after pulls
87
+
88
+ ## MCP tools (only when CLI isn't suitable)
142
89
 
143
- **CLI commands for all MCP tools:**
144
- - `gmax "query" --plain` `semantic_search`
145
- - `gmax trace <symbol> -d 2` `trace_calls` with depth
146
- - `gmax skeleton <target> --json` `code_skeleton`
147
- - `gmax project` → `summarize_project`
148
- - `gmax related <file>` → `related_files`
149
- - `gmax recent` → `recent_changes`
90
+ MCP tools are available but consume more tokens. Use them only for:
91
+ - `index_status` quick health check (no CLI equivalent that's cheaper)
92
+ - `summarize_directory` LLM summary generation
93
+ - `semantic_search` with `detail: "pointer"` when you need the structured pointer format
150
94
 
151
- **Use MCP tools when:** first exploring (tool descriptions guide usage), or when you need pointer mode output (more structured than CLI).
95
+ Full MCP tool documentation: semantic_search (16 params), search_all, code_skeleton, trace_calls, list_symbols, index_status, summarize_project, related_files, recent_changes, summarize_directory.
152
96
 
153
97
  ## Tips
154
98
 
155
- - **Be specific.** "auth" returns noise. "where does the server validate JWT tokens from the Authorization header" returns exactly what you need. Aim for 5+ words.
156
- - **Use `--plain` for CLI searches** agent-friendly output without ANSI codes.
157
- - **ORCH results contain the logic** use `--role ORCHESTRATION` to filter noise.
158
- - **Summaries tell you what the code does** without reading it. Use them to decide what to `Read`.
159
- - **Use `--symbol` on CLI** to get search results + call graph in one shot.
160
- - **Don't search for exact strings** — use grep/Grep for that. gmax finds concepts, not literals.
99
+ - **Always use `--plain`** on CLI searches agent-friendly output without ANSI codes.
100
+ - **Be specific.** 5+ words. "auth" returns noise. "where does the server validate JWT tokens" is specific.
101
+ - **Use `--role ORCHESTRATION`** to skip type definitions and find the actual logic.
102
+ - **Use `--symbol`** when the query is a function/class name gets search + trace in one call.
103
+ - **Don't search for exact strings** use grep/Grep for that. gmax finds concepts.
104
+
105
+ ## If results seem stale
106
+
107
+ The watcher auto-starts on first CLI search. Usually results are fresh without manual intervention.
108
+ 1. `Bash(gmax index)` to force re-index
109
+ 2. Do NOT use `gmax reindex` — it doesn't exist.