codedeep-mcp 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,7 +24,10 @@ function isUnchanged(prev, mtimeMs, size, language) {
24
24
  prev.size === size &&
25
25
  prev.language === language);
26
26
  }
27
- function hashContent(content) {
27
+ // Exported so the note store's staleness check can re-hash an anchored file
28
+ // from disk with the SAME algorithm the indexer records in FileInfo.contentHash
29
+ // — staleness compares against DISK, not the (possibly-lagging) live index.
30
+ export function hashContent(content) {
28
31
  return createHash('sha1').update(content).digest('hex').slice(0, 16);
29
32
  }
30
33
  export class Indexer {
@@ -41,6 +44,10 @@ export class Indexer {
41
44
  // preserves unseen cached entries — the watcher must know the rescan
42
45
  // it requested may not have covered everything.
43
46
  lastScanCompleteFlag = true;
47
+ // Languages whose grammar-load failure was already warned — dedupes the
48
+ // per-file warn in processFile (one line per failure EPISODE per language,
49
+ // not one per file). Entries clear on the next successful load.
50
+ grammarWarnedLangs = new Set();
44
51
  get lastScanComplete() {
45
52
  return this.lastScanCompleteFlag;
46
53
  }
@@ -62,8 +69,10 @@ export class Indexer {
62
69
  // work and retry what deserves retrying.
63
70
  async indexAll() {
64
71
  return this.runGuarded(async () => {
65
- await initParser();
72
+ // Scan FIRST, then load only the grammars the repo actually contains —
73
+ // loading all 16 up front costs ~95MB RSS on a repo that needs one.
66
74
  const { files: current, complete } = await scanProject(this.config);
75
+ await this.warmUpGrammars(current);
67
76
  this.lastScanCompleteFlag = complete;
68
77
  this.total = current.length;
69
78
  await this.processBatched(current);
@@ -87,7 +96,6 @@ export class Indexer {
87
96
  }
88
97
  async indexChanged() {
89
98
  return this.runGuarded(async () => {
90
- await initParser();
91
99
  const { files: current, complete } = await scanProject(this.config);
92
100
  this.lastScanCompleteFlag = complete;
93
101
  const previous = new Map(this.index.getAllFiles().map((f) => [f.path, f]));
@@ -114,6 +122,9 @@ export class Indexer {
114
122
  this.ready = true;
115
123
  return;
116
124
  }
125
+ // Load only the grammars the CHANGED files need — the common warm start
126
+ // (few or no changes) then loads few or no grammars at all.
127
+ await this.warmUpGrammars(toIndex);
117
128
  this.total = toIndex.length;
118
129
  await this.processBatched(toIndex);
119
130
  await this.persist();
@@ -129,7 +140,9 @@ export class Indexer {
129
140
  return ran ? outcome : 'dropped';
130
141
  }
131
142
  async indexFileInner(rawPath) {
132
- await initParser();
143
+ // No up-front initParser: processFile ensures the ONE grammar this file
144
+ // needs right before parsing (a watcher event on an unchanged file then
145
+ // loads nothing at all).
133
146
  // Canonicalize to a project-relative POSIX path so the cache key
134
147
  // aligns with the scanner's `src/a.ts` form regardless of whether
135
148
  // the watcher emits an absolute path, a `./`-prefix, or Windows
@@ -247,6 +260,15 @@ export class Indexer {
247
260
  this.done = 1;
248
261
  return result;
249
262
  }
263
+ // Parallel bulk load of the grammars `files` need. A WARM-UP, not a
264
+ // correctness requirement (processFile re-ensures per-file), so failure is
265
+ // TOLERATED: one missing/corrupt .wasm must degrade that one language
266
+ // (per-file 'transient' + deduped warns), not abort indexing for every
267
+ // other language.
268
+ async warmUpGrammars(files) {
269
+ await initParser(files.map((f) => f.language)).catch((err) => log.warn(`Indexer: bulk grammar warm-up failed (${errMsg(err)}); ` +
270
+ `grammars will load per-file`));
271
+ }
250
272
  // Resolves `false` when a run is already in flight (the request is
251
273
  // dropped, not queued); `true` when the work ran to completion.
252
274
  async runGuarded(work) {
@@ -286,6 +308,44 @@ export class Indexer {
286
308
  }
287
309
  const absPath = join(this.config.projectRoot, file.path);
288
310
  const removed = () => this.index.removeFile(file.path) ? 'removed' : 'noop';
311
+ // Memoized per-language ensure — a resolved-promise await after the first
312
+ // load. Covers the watcher path (a NEW language can appear after the
313
+ // startup scan chose the initial grammar set). Runs BEFORE the content
314
+ // read (no point reading bytes a failed grammar can't parse — a
315
+ // permanently corrupt .wasm would otherwise cost one full-file read per
316
+ // affected file per rescan) and OUTSIDE the parse try/catch below: a
317
+ // grammar-LOAD failure says nothing about the FILE, so it returns
318
+ // 'transient' (existing symbols kept — genuinely-transient causes were
319
+ // already retried in place by ensureLanguage's bounded backoff), never
320
+ // cascade-deletes them the way an unparseable file does. The warn is
321
+ // deduped per LANGUAGE (5,000 Python files must not produce 5,000
322
+ // identical lines); the dedup entry clears on the next successful load so
323
+ // a NEW failure episode warns again.
324
+ try {
325
+ await initParser([file.language]);
326
+ this.grammarWarnedLangs.delete(file.language);
327
+ }
328
+ catch (err) {
329
+ // Preserve the pre-existing precedence "unreadable bytes always prune
330
+ // the entry" even when the grammar is down: without this, a file that
331
+ // is itself gone/unreadable would keep serving stale symbols from the
332
+ // persisted cache indefinitely. One cheap access() probe, only on the
333
+ // (rare) grammar-failure path.
334
+ try {
335
+ await fs.access(absPath, fs.constants.R_OK);
336
+ }
337
+ catch {
338
+ return removed();
339
+ }
340
+ if (!this.grammarWarnedLangs.has(file.language)) {
341
+ this.grammarWarnedLangs.add(file.language);
342
+ log.warn(`Indexer: grammar load failed for ${file.language} (first: ${file.path}): ` +
343
+ `${errMsg(err)}. Files of this language are missing or stale in the ` +
344
+ `index until the grammar loads (fix the installation; probed again on ` +
345
+ `the next change or rescan). Existing symbols are kept.`);
346
+ }
347
+ return 'transient';
348
+ }
289
349
  let content;
290
350
  try {
291
351
  content = await fs.readFile(absPath, 'utf8');
@@ -1,8 +1,13 @@
1
1
  import { open, readdir, stat } from 'node:fs/promises';
2
- import { join, relative, sep, posix } from 'node:path';
2
+ import { join, relative, posix } from 'node:path';
3
3
  import picomatch from 'picomatch';
4
+ import { toPosix } from '../fs-util.js';
4
5
  import { LANGUAGE_UNKNOWN } from '../types.js';
5
6
  import { log } from '../logger.js';
7
+ // Re-exported for the scanner's indexer siblings (pipeline/watcher) — the
8
+ // implementation lives in the neutral fs-util module so config.ts can use it
9
+ // without importing the indexer layer.
10
+ export { toPosix };
6
11
  const BYTE_CHECK_BUF_SIZE = 8192;
7
12
  const LANGUAGE_BY_EXT = {
8
13
  '.ts': 'typescript',
@@ -77,9 +82,6 @@ const BINARY_EXT = new Set([
77
82
  '.class', '.pyc', '.pyo', '.jar', '.war',
78
83
  ]);
79
84
  const GLOB_CHARS = /[*?[\]{}!]/;
80
- export function toPosix(p) {
81
- return sep === '/' ? p : p.split(sep).join('/');
82
- }
83
85
  export function detectLanguage(filename) {
84
86
  const ext = posix.extname(toPosix(filename)).toLowerCase();
85
87
  return LANGUAGE_BY_EXT[ext] ?? null;
@@ -320,6 +320,15 @@ export class Watcher {
320
320
  this.pending.add(rel); // guard drop — retry
321
321
  else if (outcome === 'cap-skipped')
322
322
  capSkipped.push(rel);
323
+ // 'transient' (grammar-load failure) is deliberately NOT re-queued:
324
+ // the loader already retried in place (parser.ts's bounded
325
+ // backoff inside ensureLanguage covers the genuinely-transient case
326
+ // for EVERY caller), so a failure surviving that is durable — a
327
+ // corrupt/missing .wasm re-queued here would just cycle the retry
328
+ // tick. The file's existing symbols were kept; recovery rides the
329
+ // next fs event or rescan (langLoads self-resets). A per-path retry
330
+ // budget was tried and removed — it swallowed edits landing
331
+ // mid-budget and its counters leaked across interleaved outcomes.
323
332
  }
324
333
  catch (err) {
325
334
  log.warn(`watcher: failed to index ${rel}: ${errMsg(err)}`);