codedeep-mcp 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -8
- package/dist/config.js +1 -1
- package/dist/fs-util.js +48 -0
- package/dist/git/git-service.js +27 -0
- package/dist/index.js +15 -1
- package/dist/indexer/code-index.js +91 -22
- package/dist/indexer/parser.js +100 -25
- package/dist/indexer/pipeline.js +64 -4
- package/dist/indexer/scanner.js +6 -4
- package/dist/indexer/watcher.js +9 -0
- package/dist/notes/note-store.js +513 -0
- package/dist/notes/staleness.js +168 -0
- package/dist/notes/types.js +19 -0
- package/dist/server.js +105 -16
- package/dist/tools/common.js +51 -41
- package/dist/tools/find-references.js +9 -11
- package/dist/tools/forget.js +26 -0
- package/dist/tools/get-context.js +149 -18
- package/dist/tools/impact.js +18 -5
- package/dist/tools/note-render.js +57 -0
- package/dist/tools/overview.js +76 -3
- package/dist/tools/recall.js +165 -0
- package/dist/tools/remember.js +207 -0
- package/dist/tools/search-structure.js +3 -2
- package/package.json +4 -2
package/dist/indexer/pipeline.js
CHANGED
|
@@ -24,7 +24,10 @@ function isUnchanged(prev, mtimeMs, size, language) {
|
|
|
24
24
|
prev.size === size &&
|
|
25
25
|
prev.language === language);
|
|
26
26
|
}
|
|
27
|
-
|
|
27
|
+
// Exported so the note store's staleness check can re-hash an anchored file
|
|
28
|
+
// from disk with the SAME algorithm the indexer records in FileInfo.contentHash
|
|
29
|
+
// — staleness compares against DISK, not the (possibly-lagging) live index.
|
|
30
|
+
export function hashContent(content) {
|
|
28
31
|
return createHash('sha1').update(content).digest('hex').slice(0, 16);
|
|
29
32
|
}
|
|
30
33
|
export class Indexer {
|
|
@@ -41,6 +44,10 @@ export class Indexer {
|
|
|
41
44
|
// preserves unseen cached entries — the watcher must know the rescan
|
|
42
45
|
// it requested may not have covered everything.
|
|
43
46
|
lastScanCompleteFlag = true;
|
|
47
|
+
// Languages whose grammar-load failure was already warned — dedupes the
|
|
48
|
+
// per-file warn in processFile (one line per failure EPISODE per language,
|
|
49
|
+
// not one per file). Entries clear on the next successful load.
|
|
50
|
+
grammarWarnedLangs = new Set();
|
|
44
51
|
get lastScanComplete() {
|
|
45
52
|
return this.lastScanCompleteFlag;
|
|
46
53
|
}
|
|
@@ -62,8 +69,10 @@ export class Indexer {
|
|
|
62
69
|
// work and retry what deserves retrying.
|
|
63
70
|
async indexAll() {
|
|
64
71
|
return this.runGuarded(async () => {
|
|
65
|
-
|
|
72
|
+
// Scan FIRST, then load only the grammars the repo actually contains —
|
|
73
|
+
// loading all 16 up front costs ~95MB RSS on a repo that needs one.
|
|
66
74
|
const { files: current, complete } = await scanProject(this.config);
|
|
75
|
+
await this.warmUpGrammars(current);
|
|
67
76
|
this.lastScanCompleteFlag = complete;
|
|
68
77
|
this.total = current.length;
|
|
69
78
|
await this.processBatched(current);
|
|
@@ -87,7 +96,6 @@ export class Indexer {
|
|
|
87
96
|
}
|
|
88
97
|
async indexChanged() {
|
|
89
98
|
return this.runGuarded(async () => {
|
|
90
|
-
await initParser();
|
|
91
99
|
const { files: current, complete } = await scanProject(this.config);
|
|
92
100
|
this.lastScanCompleteFlag = complete;
|
|
93
101
|
const previous = new Map(this.index.getAllFiles().map((f) => [f.path, f]));
|
|
@@ -114,6 +122,9 @@ export class Indexer {
|
|
|
114
122
|
this.ready = true;
|
|
115
123
|
return;
|
|
116
124
|
}
|
|
125
|
+
// Load only the grammars the CHANGED files need — the common warm start
|
|
126
|
+
// (few or no changes) then loads few or no grammars at all.
|
|
127
|
+
await this.warmUpGrammars(toIndex);
|
|
117
128
|
this.total = toIndex.length;
|
|
118
129
|
await this.processBatched(toIndex);
|
|
119
130
|
await this.persist();
|
|
@@ -129,7 +140,9 @@ export class Indexer {
|
|
|
129
140
|
return ran ? outcome : 'dropped';
|
|
130
141
|
}
|
|
131
142
|
async indexFileInner(rawPath) {
|
|
132
|
-
|
|
143
|
+
// No up-front initParser: processFile ensures the ONE grammar this file
|
|
144
|
+
// needs right before parsing (a watcher event on an unchanged file then
|
|
145
|
+
// loads nothing at all).
|
|
133
146
|
// Canonicalize to a project-relative POSIX path so the cache key
|
|
134
147
|
// aligns with the scanner's `src/a.ts` form regardless of whether
|
|
135
148
|
// the watcher emits an absolute path, a `./`-prefix, or Windows
|
|
@@ -247,6 +260,15 @@ export class Indexer {
|
|
|
247
260
|
this.done = 1;
|
|
248
261
|
return result;
|
|
249
262
|
}
|
|
263
|
+
// Parallel bulk load of the grammars `files` need. A WARM-UP, not a
|
|
264
|
+
// correctness requirement (processFile re-ensures per-file), so failure is
|
|
265
|
+
// TOLERATED: one missing/corrupt .wasm must degrade that one language
|
|
266
|
+
// (per-file 'transient' + deduped warns), not abort indexing for every
|
|
267
|
+
// other language.
|
|
268
|
+
async warmUpGrammars(files) {
|
|
269
|
+
await initParser(files.map((f) => f.language)).catch((err) => log.warn(`Indexer: bulk grammar warm-up failed (${errMsg(err)}); ` +
|
|
270
|
+
`grammars will load per-file`));
|
|
271
|
+
}
|
|
250
272
|
// Resolves `false` when a run is already in flight (the request is
|
|
251
273
|
// dropped, not queued); `true` when the work ran to completion.
|
|
252
274
|
async runGuarded(work) {
|
|
@@ -286,6 +308,44 @@ export class Indexer {
|
|
|
286
308
|
}
|
|
287
309
|
const absPath = join(this.config.projectRoot, file.path);
|
|
288
310
|
const removed = () => this.index.removeFile(file.path) ? 'removed' : 'noop';
|
|
311
|
+
// Memoized per-language ensure — a resolved-promise await after the first
|
|
312
|
+
// load. Covers the watcher path (a NEW language can appear after the
|
|
313
|
+
// startup scan chose the initial grammar set). Runs BEFORE the content
|
|
314
|
+
// read (no point reading bytes a failed grammar can't parse — a
|
|
315
|
+
// permanently corrupt .wasm would otherwise cost one full-file read per
|
|
316
|
+
// affected file per rescan) and OUTSIDE the parse try/catch below: a
|
|
317
|
+
// grammar-LOAD failure says nothing about the FILE, so it returns
|
|
318
|
+
// 'transient' (existing symbols kept — genuinely-transient causes were
|
|
319
|
+
// already retried in place by ensureLanguage's bounded backoff), never
|
|
320
|
+
// cascade-deletes them the way an unparseable file does. The warn is
|
|
321
|
+
// deduped per LANGUAGE (5,000 Python files must not produce 5,000
|
|
322
|
+
// identical lines); the dedup entry clears on the next successful load so
|
|
323
|
+
// a NEW failure episode warns again.
|
|
324
|
+
try {
|
|
325
|
+
await initParser([file.language]);
|
|
326
|
+
this.grammarWarnedLangs.delete(file.language);
|
|
327
|
+
}
|
|
328
|
+
catch (err) {
|
|
329
|
+
// Preserve the pre-existing precedence "unreadable bytes always prune
|
|
330
|
+
// the entry" even when the grammar is down: without this, a file that
|
|
331
|
+
// is itself gone/unreadable would keep serving stale symbols from the
|
|
332
|
+
// persisted cache indefinitely. One cheap access() probe, only on the
|
|
333
|
+
// (rare) grammar-failure path.
|
|
334
|
+
try {
|
|
335
|
+
await fs.access(absPath, fs.constants.R_OK);
|
|
336
|
+
}
|
|
337
|
+
catch {
|
|
338
|
+
return removed();
|
|
339
|
+
}
|
|
340
|
+
if (!this.grammarWarnedLangs.has(file.language)) {
|
|
341
|
+
this.grammarWarnedLangs.add(file.language);
|
|
342
|
+
log.warn(`Indexer: grammar load failed for ${file.language} (first: ${file.path}): ` +
|
|
343
|
+
`${errMsg(err)}. Files of this language are missing or stale in the ` +
|
|
344
|
+
`index until the grammar loads (fix the installation; probed again on ` +
|
|
345
|
+
`the next change or rescan). Existing symbols are kept.`);
|
|
346
|
+
}
|
|
347
|
+
return 'transient';
|
|
348
|
+
}
|
|
289
349
|
let content;
|
|
290
350
|
try {
|
|
291
351
|
content = await fs.readFile(absPath, 'utf8');
|
package/dist/indexer/scanner.js
CHANGED
|
@@ -1,8 +1,13 @@
|
|
|
1
1
|
import { open, readdir, stat } from 'node:fs/promises';
|
|
2
|
-
import { join, relative,
|
|
2
|
+
import { join, relative, posix } from 'node:path';
|
|
3
3
|
import picomatch from 'picomatch';
|
|
4
|
+
import { toPosix } from '../fs-util.js';
|
|
4
5
|
import { LANGUAGE_UNKNOWN } from '../types.js';
|
|
5
6
|
import { log } from '../logger.js';
|
|
7
|
+
// Re-exported for the scanner's indexer siblings (pipeline/watcher) — the
|
|
8
|
+
// implementation lives in the neutral fs-util module so config.ts can use it
|
|
9
|
+
// without importing the indexer layer.
|
|
10
|
+
export { toPosix };
|
|
6
11
|
const BYTE_CHECK_BUF_SIZE = 8192;
|
|
7
12
|
const LANGUAGE_BY_EXT = {
|
|
8
13
|
'.ts': 'typescript',
|
|
@@ -77,9 +82,6 @@ const BINARY_EXT = new Set([
|
|
|
77
82
|
'.class', '.pyc', '.pyo', '.jar', '.war',
|
|
78
83
|
]);
|
|
79
84
|
const GLOB_CHARS = /[*?[\]{}!]/;
|
|
80
|
-
export function toPosix(p) {
|
|
81
|
-
return sep === '/' ? p : p.split(sep).join('/');
|
|
82
|
-
}
|
|
83
85
|
export function detectLanguage(filename) {
|
|
84
86
|
const ext = posix.extname(toPosix(filename)).toLowerCase();
|
|
85
87
|
return LANGUAGE_BY_EXT[ext] ?? null;
|
package/dist/indexer/watcher.js
CHANGED
|
@@ -320,6 +320,15 @@ export class Watcher {
|
|
|
320
320
|
this.pending.add(rel); // guard drop — retry
|
|
321
321
|
else if (outcome === 'cap-skipped')
|
|
322
322
|
capSkipped.push(rel);
|
|
323
|
+
// 'transient' (grammar-load failure) is deliberately NOT re-queued:
|
|
324
|
+
// the loader already retried in place (parser.ts's bounded
|
|
325
|
+
// backoff inside ensureLanguage covers the genuinely-transient case
|
|
326
|
+
// for EVERY caller), so a failure surviving that is durable — a
|
|
327
|
+
// corrupt/missing .wasm re-queued here would just cycle the retry
|
|
328
|
+
// tick. The file's existing symbols were kept; recovery rides the
|
|
329
|
+
// next fs event or rescan (langLoads self-resets). A per-path retry
|
|
330
|
+
// budget was tried and removed — it swallowed edits landing
|
|
331
|
+
// mid-budget and its counters leaked across interleaved outcomes.
|
|
323
332
|
}
|
|
324
333
|
catch (err) {
|
|
325
334
|
log.warn(`watcher: failed to index ${rel}: ${errMsg(err)}`);
|