grepmax 0.5.1 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/commands/mcp.js
CHANGED
|
@@ -194,6 +194,23 @@ const TOOLS = [
|
|
|
194
194
|
properties: {},
|
|
195
195
|
},
|
|
196
196
|
},
|
|
197
|
+
{
|
|
198
|
+
name: "summarize_directory",
|
|
199
|
+
description: "Generate LLM summaries for indexed code in a directory. Run after indexing. Summaries are stored and returned in search results. Requires the summarizer server on port 8101.",
|
|
200
|
+
inputSchema: {
|
|
201
|
+
type: "object",
|
|
202
|
+
properties: {
|
|
203
|
+
path: {
|
|
204
|
+
type: "string",
|
|
205
|
+
description: "Directory to summarize (absolute or relative). Defaults to current project root.",
|
|
206
|
+
},
|
|
207
|
+
limit: {
|
|
208
|
+
type: "number",
|
|
209
|
+
description: "Max chunks to summarize per call (default 200, max 5000). Run again to continue.",
|
|
210
|
+
},
|
|
211
|
+
},
|
|
212
|
+
},
|
|
213
|
+
},
|
|
197
214
|
];
|
|
198
215
|
// ---------------------------------------------------------------------------
|
|
199
216
|
// Helpers
|
|
@@ -625,6 +642,32 @@ exports.mcp = new commander_1.Command("mcp")
|
|
|
625
642
|
}
|
|
626
643
|
});
|
|
627
644
|
}
|
|
645
|
+
function handleSummarizeDirectory(args) {
|
|
646
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
647
|
+
const dir = typeof args.path === "string"
|
|
648
|
+
? path.resolve(args.path)
|
|
649
|
+
: projectRoot;
|
|
650
|
+
const prefix = dir.endsWith("/") ? dir : `${dir}/`;
|
|
651
|
+
const limit = Math.min(Math.max(Number(args.limit) || 200, 1), 5000);
|
|
652
|
+
try {
|
|
653
|
+
const db = getVectorDb();
|
|
654
|
+
const { summarized, remaining } = yield (0, syncer_1.generateSummaries)(db, prefix, (done, total) => {
|
|
655
|
+
console.log(`[summarize] ${done}/${total} chunks`);
|
|
656
|
+
}, limit);
|
|
657
|
+
if (summarized === 0) {
|
|
658
|
+
return ok("No chunks to summarize (all have summaries or summarizer unavailable)");
|
|
659
|
+
}
|
|
660
|
+
const remainMsg = remaining > 0
|
|
661
|
+
? ` (${remaining}+ remaining — run again to continue)`
|
|
662
|
+
: "";
|
|
663
|
+
return ok(`Summarized ${summarized} chunks in ${path.basename(dir)}/${remainMsg}`);
|
|
664
|
+
}
|
|
665
|
+
catch (e) {
|
|
666
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
667
|
+
return err(`Summarization failed: ${msg}`);
|
|
668
|
+
}
|
|
669
|
+
});
|
|
670
|
+
}
|
|
628
671
|
// --- MCP server setup ---
|
|
629
672
|
const transport = new stdio_js_1.StdioServerTransport();
|
|
630
673
|
const server = new index_js_1.Server({
|
|
@@ -656,6 +699,8 @@ exports.mcp = new commander_1.Command("mcp")
|
|
|
656
699
|
return handleListSymbols(toolArgs);
|
|
657
700
|
case "index_status":
|
|
658
701
|
return handleIndexStatus();
|
|
702
|
+
case "summarize_directory":
|
|
703
|
+
return handleSummarizeDirectory(toolArgs);
|
|
659
704
|
default:
|
|
660
705
|
return err(`Unknown tool: ${name}`);
|
|
661
706
|
}
|
|
@@ -61,11 +61,12 @@ exports.summarize = new commander_1.Command("summarize")
|
|
|
61
61
|
: "";
|
|
62
62
|
const { spinner } = (0, sync_helpers_1.createIndexingSpinner)("", "Summarizing...");
|
|
63
63
|
try {
|
|
64
|
-
const
|
|
64
|
+
const { summarized, remaining } = yield (0, syncer_1.generateSummaries)(vectorDb, rootPrefix, (done, total) => {
|
|
65
65
|
spinner.text = `Summarizing... (${done}/${total})`;
|
|
66
66
|
});
|
|
67
|
-
if (
|
|
68
|
-
|
|
67
|
+
if (summarized > 0) {
|
|
68
|
+
const remainMsg = remaining > 0 ? ` (${remaining}+ remaining — run again)` : "";
|
|
69
|
+
spinner.succeed(`Summarized ${summarized} chunks${remainMsg}`);
|
|
69
70
|
}
|
|
70
71
|
else {
|
|
71
72
|
spinner.succeed("All chunks already have summaries (or summarizer unavailable)");
|
package/dist/lib/index/syncer.js
CHANGED
|
@@ -64,7 +64,7 @@ const project_root_1 = require("../utils/project-root");
|
|
|
64
64
|
const pool_1 = require("../workers/pool");
|
|
65
65
|
const index_config_1 = require("./index-config");
|
|
66
66
|
const walker_1 = require("./walker");
|
|
67
|
-
function generateSummaries(db, pathPrefix, onProgress) {
|
|
67
|
+
function generateSummaries(db, pathPrefix, onProgress, maxChunks) {
|
|
68
68
|
return __awaiter(this, void 0, void 0, function* () {
|
|
69
69
|
let summarizeChunks;
|
|
70
70
|
try {
|
|
@@ -72,23 +72,24 @@ function generateSummaries(db, pathPrefix, onProgress) {
|
|
|
72
72
|
summarizeChunks = mod.summarizeChunks;
|
|
73
73
|
}
|
|
74
74
|
catch (_a) {
|
|
75
|
-
return 0;
|
|
75
|
+
return { summarized: 0, remaining: 0 };
|
|
76
76
|
}
|
|
77
77
|
// Quick availability check
|
|
78
78
|
const test = yield summarizeChunks([
|
|
79
79
|
{ code: "test", language: "ts", file: "test" },
|
|
80
80
|
]);
|
|
81
81
|
if (!test)
|
|
82
|
-
return 0;
|
|
82
|
+
return { summarized: 0, remaining: 0 };
|
|
83
|
+
const queryLimit = maxChunks !== null && maxChunks !== void 0 ? maxChunks : 50000;
|
|
83
84
|
const table = yield db.ensureTable();
|
|
84
85
|
const rows = yield table
|
|
85
86
|
.query()
|
|
86
87
|
.select(["id", "path", "content", "defined_symbols"])
|
|
87
88
|
.where(`path LIKE '${pathPrefix}%' AND (summary IS NULL OR summary = '')`)
|
|
88
|
-
.limit(
|
|
89
|
+
.limit(queryLimit)
|
|
89
90
|
.toArray();
|
|
90
91
|
if (rows.length === 0)
|
|
91
|
-
return 0;
|
|
92
|
+
return { summarized: 0, remaining: 0 };
|
|
92
93
|
let summarized = 0;
|
|
93
94
|
const BATCH_SIZE = 5;
|
|
94
95
|
for (let i = 0; i < rows.length; i += BATCH_SIZE) {
|
|
@@ -124,7 +125,11 @@ function generateSummaries(db, pathPrefix, onProgress) {
|
|
|
124
125
|
}
|
|
125
126
|
onProgress === null || onProgress === void 0 ? void 0 : onProgress(summarized, rows.length);
|
|
126
127
|
}
|
|
127
|
-
|
|
128
|
+
// Estimate remaining (rows.length was capped by queryLimit)
|
|
129
|
+
const remaining = rows.length === queryLimit
|
|
130
|
+
? queryLimit - summarized // at least this many more
|
|
131
|
+
: 0;
|
|
132
|
+
return { summarized, remaining };
|
|
128
133
|
});
|
|
129
134
|
}
|
|
130
135
|
function flushBatch(db, meta, vectors, pendingMeta, pendingDeletes, dryRun) {
|
|
@@ -472,26 +477,6 @@ function initialSync(options) {
|
|
|
472
477
|
metaCache.delete(p);
|
|
473
478
|
});
|
|
474
479
|
}
|
|
475
|
-
// --- Summary post-processing (sequential, single process) ---
|
|
476
|
-
if (!dryRun && indexed > 0) {
|
|
477
|
-
const sumTimer = (0, logger_1.timer)("index", "Summarize");
|
|
478
|
-
onProgress === null || onProgress === void 0 ? void 0 : onProgress({
|
|
479
|
-
processed,
|
|
480
|
-
indexed,
|
|
481
|
-
total,
|
|
482
|
-
filePath: "Generating summaries...",
|
|
483
|
-
});
|
|
484
|
-
const summarized = yield generateSummaries(vectorDb, rootPrefix, (count, chunkTotal) => {
|
|
485
|
-
onProgress === null || onProgress === void 0 ? void 0 : onProgress({
|
|
486
|
-
processed: count,
|
|
487
|
-
indexed,
|
|
488
|
-
total: chunkTotal,
|
|
489
|
-
filePath: `Summarizing... (${count}/${chunkTotal})`,
|
|
490
|
-
});
|
|
491
|
-
});
|
|
492
|
-
sumTimer();
|
|
493
|
-
(0, logger_1.log)("index", `Summarize: ${summarized} chunks`);
|
|
494
|
-
}
|
|
495
480
|
syncTimer();
|
|
496
481
|
// Write model config so future runs can detect model changes
|
|
497
482
|
if (!dryRun) {
|
|
@@ -11,6 +11,7 @@ endpoints run on the event loop thread, avoiding Metal thread-safety crashes.
|
|
|
11
11
|
import asyncio
|
|
12
12
|
import logging
|
|
13
13
|
import os
|
|
14
|
+
import re
|
|
14
15
|
import signal
|
|
15
16
|
import socket
|
|
16
17
|
import time
|
|
@@ -38,7 +39,7 @@ MODEL_ID = os.environ.get(
|
|
|
38
39
|
)
|
|
39
40
|
PORT = int(os.environ.get("MLX_SUMMARY_PORT", "8101"))
|
|
40
41
|
IDLE_TIMEOUT_S = int(os.environ.get("MLX_SUMMARY_IDLE_TIMEOUT", "1800")) # 30 min
|
|
41
|
-
MAX_TOKENS =
|
|
42
|
+
MAX_TOKENS = 40 # summaries are ~20 tokens, one line
|
|
42
43
|
|
|
43
44
|
model = None
|
|
44
45
|
tokenizer = None
|
|
@@ -48,7 +49,7 @@ _mlx_lock = asyncio.Lock()
|
|
|
48
49
|
|
|
49
50
|
SYSTEM_PROMPT = """You are a code summarizer. Given a code chunk, produce exactly one line describing what it does.
|
|
50
51
|
Be specific about business logic, services, and side effects. Do not describe syntax.
|
|
51
|
-
Do not use phrases like "This function" or "This code". Start with a verb."""
|
|
52
|
+
Do not use phrases like "This function" or "This code". Start with a verb. /no_think"""
|
|
52
53
|
|
|
53
54
|
def build_prompt(code: str, language: str, file: str, symbols: list[str] | None = None) -> str:
|
|
54
55
|
parts = [f"Language: {language}", f"File: {file}"]
|
|
@@ -79,8 +80,12 @@ def summarize_chunk(code: str, language: str, file: str, symbols: list[str] | No
|
|
|
79
80
|
max_tokens=MAX_TOKENS,
|
|
80
81
|
verbose=False,
|
|
81
82
|
)
|
|
83
|
+
# Strip thinking tokens if present
|
|
84
|
+
text = re.sub(r"<think>.*?</think>", "", response, flags=re.DOTALL).strip()
|
|
85
|
+
if not text:
|
|
86
|
+
text = response.strip()
|
|
82
87
|
# Take first line only, strip whitespace
|
|
83
|
-
summary =
|
|
88
|
+
summary = text.split("\n")[0].strip()
|
|
84
89
|
# Remove common prefixes the model might add
|
|
85
90
|
for prefix in ["Summary: ", "summary: ", "- "]:
|
|
86
91
|
if summary.startswith(prefix):
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: gmax
|
|
3
3
|
description: Semantic code search. Use alongside grep - grep for exact strings, gmax for concepts.
|
|
4
|
-
allowed-tools: "mcp__grepmax__semantic_search, mcp__grepmax__search_all, mcp__grepmax__code_skeleton, mcp__grepmax__trace_calls, mcp__grepmax__list_symbols, mcp__grepmax__index_status, Bash(gmax:*), Read"
|
|
4
|
+
allowed-tools: "mcp__grepmax__semantic_search, mcp__grepmax__search_all, mcp__grepmax__code_skeleton, mcp__grepmax__trace_calls, mcp__grepmax__list_symbols, mcp__grepmax__index_status, mcp__grepmax__summarize_directory, Bash(gmax:*), Read"
|
|
5
5
|
---
|
|
6
6
|
|
|
7
7
|
## What gmax does
|
|
@@ -67,6 +67,10 @@ List indexed symbols with definition locations.
|
|
|
67
67
|
### index_status
|
|
68
68
|
Check centralized index health — chunks, files, indexed directories, model info.
|
|
69
69
|
|
|
70
|
+
### summarize_directory
|
|
71
|
+
Generate LLM summaries for indexed code in a directory. Summaries are stored and returned in search results. Run after indexing a new directory.
|
|
72
|
+
- `path` (optional): Directory to summarize. Defaults to project root.
|
|
73
|
+
|
|
70
74
|
## Workflow
|
|
71
75
|
|
|
72
76
|
1. **Search** — `semantic_search` to find relevant code (pointers by default)
|