openlore 2.0.4 → 2.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/README.md +16 -7
  2. package/dist/cli/commands/analyze.d.ts.map +1 -1
  3. package/dist/cli/commands/analyze.js +38 -37
  4. package/dist/cli/commands/analyze.js.map +1 -1
  5. package/dist/cli/commands/mcp.d.ts.map +1 -1
  6. package/dist/cli/commands/mcp.js +4 -0
  7. package/dist/cli/commands/mcp.js.map +1 -1
  8. package/dist/cli/commands/orient.d.ts +15 -0
  9. package/dist/cli/commands/orient.d.ts.map +1 -0
  10. package/dist/cli/commands/orient.js +156 -0
  11. package/dist/cli/commands/orient.js.map +1 -0
  12. package/dist/cli/index.js +2 -0
  13. package/dist/cli/index.js.map +1 -1
  14. package/dist/cli/install/index.d.ts +6 -0
  15. package/dist/cli/install/index.d.ts.map +1 -1
  16. package/dist/cli/install/index.js +71 -1
  17. package/dist/cli/install/index.js.map +1 -1
  18. package/dist/constants.d.ts +22 -0
  19. package/dist/constants.d.ts.map +1 -1
  20. package/dist/constants.js +27 -0
  21. package/dist/constants.js.map +1 -1
  22. package/dist/core/analyzer/vector-index.d.ts +20 -0
  23. package/dist/core/analyzer/vector-index.d.ts.map +1 -1
  24. package/dist/core/analyzer/vector-index.js +173 -0
  25. package/dist/core/analyzer/vector-index.js.map +1 -1
  26. package/dist/core/services/mcp-handlers/utils.d.ts +16 -0
  27. package/dist/core/services/mcp-handlers/utils.d.ts.map +1 -1
  28. package/dist/core/services/mcp-handlers/utils.js +32 -0
  29. package/dist/core/services/mcp-handlers/utils.js.map +1 -1
  30. package/dist/core/services/mcp-watcher.d.ts +99 -6
  31. package/dist/core/services/mcp-watcher.d.ts.map +1 -1
  32. package/dist/core/services/mcp-watcher.js +438 -111
  33. package/dist/core/services/mcp-watcher.js.map +1 -1
  34. package/examples/drift-demo/package.json +1 -1
  35. package/package.json +2 -1
@@ -8,31 +8,87 @@
8
8
  * The call graph is deliberately excluded — rebuilding it requires full
9
9
  * tree-sitter analysis of all call sites and is too expensive for a watch loop.
10
10
  * It stays current via the post-commit hook (openlore analyze --force --embed).
11
+ *
12
+ * Spec 13.1 (watch-mode performance): freshness is O(change), not O(repo).
13
+ * • Per-file events COALESCE into one batched flush (single debounce timer +
14
+ * hard max-batch ceiling), so a burst / branch-switch runs the pipeline once,
15
+ * not once per file.
16
+ * • The patched llm-context is handed to the MCP read cache in place
17
+ * (primeContextCache), so the next tool call is a cache HIT — no 2.1 MB
18
+ * cold re-parse — even after the disk write.
19
+ * • Vector updates are row-level (VectorIndex.updateFiles), not a full-corpus
20
+ * read+overwrite, and run on a separate lower-priority lane so signature
21
+ * freshness never blocks on embedding.
22
+ * • VCS-flood / bulk batches are detected and collapsed to a single refresh.
23
+ * • stderr emits one summary line per batch by default (per-file detail behind
24
+ * OPENLORE_WATCH_DEBUG).
11
25
  */
12
- import { readFile, writeFile } from 'node:fs/promises';
26
+ import { readFile, writeFile, readdir } from 'node:fs/promises';
13
27
  import { createHash } from 'node:crypto';
14
28
  import { join, relative } from 'node:path';
15
29
  import chokidar from 'chokidar';
16
30
  import { extractSignatures, detectLanguage } from '../analyzer/signature-extractor.js';
17
31
  import { EdgeStore } from './edge-store.js';
18
- import { OPENLORE_DIR, OPENLORE_ANALYSIS_SUBDIR, ARTIFACT_LLM_CONTEXT, } from '../../constants.js';
32
+ import { primeContextCache } from './mcp-handlers/utils.js';
33
+ import { OPENLORE_DIR, OPENLORE_ANALYSIS_SUBDIR, ARTIFACT_LLM_CONTEXT, WATCH_DEBOUNCE_MS, WATCH_MAX_BATCH_MS, WATCH_BULK_THRESHOLD, WATCH_EMBED_FILE_CEILING, WATCH_VCS_SETTLE_MS, } from '../../constants.js';
19
34
  const CALL_GRAPH_LANGS = new Set([
20
35
  'Python', 'TypeScript', 'JavaScript', 'Go', 'Rust', 'Ruby', 'Java', 'C++', 'Swift',
21
36
  ]);
22
37
  /** Max callerFiles to re-parse in a single watch event (guards against high-fanIn renames). */
23
38
  const CALLER_REPARSE_LIMIT = 10;
24
39
  const SOURCE_EXTENSIONS = /\.(ts|tsx|js|jsx|py|go|rs|rb|java|kt|php|cs|cpp|cc|cxx|h|hpp|c|swift)$/;
25
- // String-segment checks evaluated before kqueue/inotify FDs are opened avoids
26
- // EMFILE on macOS when chokidar opens FDs for all directories before applying globs.
27
- const IGNORED_SEGMENTS = ['/node_modules/', '/.openlore/', '/dist/', '/.git/'];
40
+ // Directory NAMES that must never be watched. Build-output and dependency
41
+ // directories can hold hundreds of thousands of files (a Rust `target/` is
42
+ // routinely tens of GB), so watching them is both wasteful and a hard EMFILE
43
+ // trigger on the first tool call.
44
+ //
45
+ // Matched against root-RELATIVE path segments (see isIgnoredRelPath), which is
46
+ // what makes this robust:
47
+ // • The ignored directory ITSELF matches (not just its children), so chokidar
48
+ // prunes the whole subtree and never opens FDs inside it — the actual EMFILE
49
+ // fix. A naive `path.includes('/target/')` check only matches descendants,
50
+ // so chokidar still descends into target/ and readdir-storms before pruning.
51
+ // • Only segments BELOW the watch root are considered, so a repo that happens
52
+ // to live under e.g. /home/user/dist/myapp is not wrongly ignored.
53
+ const IGNORED_DIR_NAMES = new Set([
54
+ // VCS / openlore
55
+ '.git', '.hg', '.svn', '.openlore',
56
+ // JS / TS
57
+ 'node_modules', 'dist', 'build', '.next', '.nuxt', '.svelte-kit',
58
+ '.turbo', '.parcel-cache', '.cache', 'coverage', '.vite',
59
+ // Rust
60
+ 'target',
61
+ // Python
62
+ '.venv', 'venv', '__pycache__', '.mypy_cache', '.pytest_cache',
63
+ '.tox', '.ruff_cache',
64
+ // Go / vendored deps
65
+ 'vendor',
66
+ // JVM
67
+ '.gradle',
68
+ // .NET
69
+ 'obj',
70
+ // Editor metadata
71
+ '.idea',
72
+ ]);
28
73
  const IGNORED_SUFFIXES = ['.test.ts', '.test.js', '.spec.ts', '.spec.js'];
29
- function isIgnoredPath(filePath) {
30
- for (const seg of IGNORED_SEGMENTS) {
31
- if (filePath.includes(seg))
74
+ /**
75
+ * True if a root-relative path should never be watched. Evaluated as a cheap
76
+ * segment scan before any FD is opened, so it stays allocation-light. A path is
77
+ * ignored if ANY of its segments is a known build/dependency/VCS directory
78
+ * name, or it has a test-file suffix. Exported for testing.
79
+ *
80
+ * @param relPath path relative to the watch root (forward- or back-slashed)
81
+ */
82
+ export function isIgnoredRelPath(relPath) {
83
+ if (!relPath || relPath === '.')
84
+ return false;
85
+ const segments = relPath.split(/[/\\]/);
86
+ for (const seg of segments) {
87
+ if (IGNORED_DIR_NAMES.has(seg))
32
88
  return true;
33
89
  }
34
90
  for (const suf of IGNORED_SUFFIXES) {
35
- if (filePath.endsWith(suf))
91
+ if (relPath.endsWith(suf))
36
92
  return true;
37
93
  }
38
94
  return false;
@@ -41,25 +97,66 @@ function isIgnoredPath(filePath) {
41
97
  export class McpWatcher {
42
98
  rootPath;
43
99
  outputPath;
100
+ contextPath;
44
101
  debounceMs;
102
+ maxBatchMs;
103
+ bulkThreshold;
104
+ embedFileCeiling;
45
105
  extraIgnore;
106
+ debug;
46
107
  fsWatcher;
47
- timers = new Map();
48
- running = false;
108
+ gitWatcher;
109
+ // ── Coalescing queue (Step 1) ──────────────────────────────────────────────
110
+ pending = new Set(); // absolute paths awaiting a flush
111
+ debounceTimer;
112
+ maxBatchTimer;
113
+ running = false; // single-flight for the signature flush
114
+ vcsBulkFlag = false; // set by the .git ref watcher
115
+ // ── Embedding lane (Step 4 — decoupled, lower priority) ─────────────────────
116
+ embed;
117
+ embedDegraded = false; // auto-degraded on a too-large tree
118
+ embedFiles = new Map(); // rel → content awaiting embed
119
+ embedNodes = new Map(); // id → node awaiting embed
120
+ embedTimer;
121
+ embedRunning = false;
122
+ lastEmbedContext;
49
123
  constructor(options) {
50
124
  this.rootPath = options.rootPath;
51
125
  this.outputPath = options.outputPath
52
126
  ?? join(options.rootPath, OPENLORE_DIR, OPENLORE_ANALYSIS_SUBDIR);
53
- this.debounceMs = options.debounceMs ?? 400;
127
+ this.contextPath = join(this.outputPath, ARTIFACT_LLM_CONTEXT);
128
+ this.debounceMs = options.debounceMs ?? WATCH_DEBOUNCE_MS;
129
+ this.maxBatchMs = options.maxBatchMs ?? WATCH_MAX_BATCH_MS;
130
+ this.bulkThreshold = options.bulkThreshold ?? WATCH_BULK_THRESHOLD;
131
+ this.embedFileCeiling = options.embedFileCeiling ?? WATCH_EMBED_FILE_CEILING;
132
+ this.embed = options.embed ?? true;
54
133
  this.extraIgnore = options.ignore ?? [];
134
+ this.debug = !!process.env.OPENLORE_WATCH_DEBUG;
55
135
  }
56
136
  // ── Lifecycle ──────────────────────────────────────────────────────────────
57
137
  async start() {
138
+ // Auto-degrade live embedding on very large trees (Step 4). Counting is
139
+ // bounded — it stops as soon as the ceiling is exceeded.
140
+ if (this.embed) {
141
+ const count = await this.countSourceFiles(this.embedFileCeiling + 1);
142
+ if (count > this.embedFileCeiling) {
143
+ this.embedDegraded = true;
144
+ process.stderr.write(`[mcp-watcher] ${count}+ source files exceed the live-embed ceiling ` +
145
+ `(${this.embedFileCeiling}); running signatures-only — embeddings refresh at commit\n`);
146
+ }
147
+ }
58
148
  await new Promise((resolve, reject) => {
59
149
  const extraIgnore = this.extraIgnore;
60
- this.fsWatcher = chokidar.watch(this.rootPath, {
61
- ignored: (filePath) => isIgnoredPath(filePath) ||
62
- extraIgnore.some((p) => filePath.includes(p)),
150
+ const rootPath = this.rootPath;
151
+ this.fsWatcher = chokidar.watch(rootPath, {
152
+ // Resolve each candidate to a root-relative path first, then prune by
153
+ // directory name. This prunes the ignored directory itself (chokidar
154
+ // never opens FDs inside it — the EMFILE fix) without false-matching on
155
+ // parent path components above the watch root.
156
+ ignored: (filePath) => {
157
+ const rel = relative(rootPath, filePath);
158
+ return isIgnoredRelPath(rel) || extraIgnore.some((p) => rel.includes(p));
159
+ },
63
160
  persistent: true,
64
161
  ignoreInitial: true,
65
162
  followSymlinks: false,
@@ -67,125 +164,315 @@ export class McpWatcher {
67
164
  });
68
165
  this.fsWatcher.on('change', (absPath) => {
69
166
  if (SOURCE_EXTENSIONS.test(absPath)) {
70
- this.scheduleChange(absPath);
167
+ this.enqueue(absPath);
71
168
  }
72
169
  });
73
170
  this.fsWatcher.on('ready', () => resolve());
74
171
  this.fsWatcher.on('error', (err) => reject(err));
75
172
  });
76
- process.stderr.write(`[mcp-watcher] watching ${this.rootPath}\n`);
173
+ // Best-effort VCS-flood detection (Step 5): a branch switch / rebase / merge
174
+ // bumps these refs. We never recurse into .git (it stays ignored above); we
175
+ // watch only these specific files, then collapse the churn into one refresh.
176
+ try {
177
+ const gitDir = join(this.rootPath, '.git');
178
+ const refs = ['HEAD', 'index', 'MERGE_HEAD', 'ORIG_HEAD'].map((f) => join(gitDir, f));
179
+ this.gitWatcher = chokidar.watch(refs, {
180
+ persistent: true,
181
+ ignoreInitial: true,
182
+ followSymlinks: false,
183
+ });
184
+ this.gitWatcher.on('all', () => this.onVcsEvent());
185
+ }
186
+ catch {
187
+ // no .git, or watch failed — VCS detection falls back to the batch-size
188
+ // threshold in handleBatch, which is enough for G3.
189
+ }
190
+ process.stderr.write(`[mcp-watcher] watching ${this.rootPath}` +
191
+ `${this.embed && !this.embedDegraded ? '' : ' (signatures-only)'}\n`);
77
192
  }
78
193
  async stop() {
79
- for (const t of this.timers.values())
80
- clearTimeout(t);
81
- this.timers.clear();
194
+ if (this.debounceTimer)
195
+ clearTimeout(this.debounceTimer);
196
+ if (this.maxBatchTimer)
197
+ clearTimeout(this.maxBatchTimer);
198
+ if (this.embedTimer)
199
+ clearTimeout(this.embedTimer);
200
+ this.debounceTimer = this.maxBatchTimer = this.embedTimer = undefined;
201
+ // Best-effort: persist anything still queued so a save right before shutdown
202
+ // is not lost.
203
+ if (this.pending.size > 0 && !this.running) {
204
+ const batch = Array.from(this.pending);
205
+ this.pending.clear();
206
+ try {
207
+ await this.handleBatch(batch, { syncFlush: true });
208
+ }
209
+ catch { /* ignore */ }
210
+ }
82
211
  await this.fsWatcher?.close();
212
+ await this.gitWatcher?.close();
83
213
  process.stderr.write('[mcp-watcher] stopped\n');
84
214
  }
85
- // ── Debounce ───────────────────────────────────────────────────────────────
86
- scheduleChange(absPath) {
87
- const existing = this.timers.get(absPath);
88
- if (existing)
89
- clearTimeout(existing);
90
- const t = setTimeout(() => {
91
- this.timers.delete(absPath);
92
- if (this.running) {
93
- // Re-schedule instead of dropping — ensures no changes are lost
94
- this.scheduleChange(absPath);
95
- return;
215
+ // ── Coalescing (Step 1) ──────────────────────────────────────────────────────
216
+ /**
217
+ * Add a changed path to the pending set and (re)arm a single debounce timer,
218
+ * plus a one-shot hard ceiling so a continuous stream still flushes.
219
+ */
220
+ enqueue(absPath) {
221
+ this.pending.add(absPath);
222
+ if (this.debounceTimer)
223
+ clearTimeout(this.debounceTimer);
224
+ this.debounceTimer = setTimeout(() => this.flush(), this.debounceMs);
225
+ if (!this.maxBatchTimer) {
226
+ this.maxBatchTimer = setTimeout(() => this.flush(), this.maxBatchMs);
227
+ }
228
+ }
229
+ /** A .git ref changed — settle, then flush whatever changed as one bulk batch. */
230
+ onVcsEvent() {
231
+ this.vcsBulkFlag = true;
232
+ if (this.debounceTimer)
233
+ clearTimeout(this.debounceTimer);
234
+ this.debounceTimer = setTimeout(() => this.flush(), WATCH_VCS_SETTLE_MS);
235
+ if (this.debug) {
236
+ process.stderr.write('[mcp-watcher] VCS operation detected — coalescing into one refresh\n');
237
+ }
238
+ }
239
+ /**
240
+ * Drain the pending set into a single batch. Single-flight: if a flush is
241
+ * already running, leave the new paths in `pending` and reschedule once it
242
+ * finishes — never interleave two flushes.
243
+ */
244
+ flush() {
245
+ if (this.debounceTimer) {
246
+ clearTimeout(this.debounceTimer);
247
+ this.debounceTimer = undefined;
248
+ }
249
+ if (this.maxBatchTimer) {
250
+ clearTimeout(this.maxBatchTimer);
251
+ this.maxBatchTimer = undefined;
252
+ }
253
+ if (this.running)
254
+ return; // a follow-up is scheduled in finally{}
255
+ if (this.pending.size === 0)
256
+ return;
257
+ const batch = Array.from(this.pending);
258
+ this.pending.clear();
259
+ this.running = true;
260
+ this.handleBatch(batch)
261
+ .catch((err) => process.stderr.write(`[mcp-watcher] error: ${err.message}\n`))
262
+ .finally(() => {
263
+ this.running = false;
264
+ if (this.pending.size > 0) {
265
+ this.debounceTimer = setTimeout(() => this.flush(), this.debounceMs);
96
266
  }
97
- this.running = true;
98
- this.handleChange(absPath)
99
- .catch(err => process.stderr.write(`[mcp-watcher] error: ${err.message}\n`))
100
- .finally(() => { this.running = false; });
101
- }, this.debounceMs);
102
- this.timers.set(absPath, t);
267
+ });
103
268
  }
104
269
  // ── Core re-index ──────────────────────────────────────────────────────────
105
270
  /**
106
- * Re-index a single changed file.
107
- * Exposed for unit testing without needing a real file watcher.
271
+ * Re-index a single changed file. Exposed for unit testing without needing a
272
+ * real file watcher; flushes synchronously so callers observe the update on
273
+ * disk immediately. Internally this is just a batch of one.
108
274
  */
109
275
  async handleChange(absPath) {
110
- const rel = relative(this.rootPath, absPath);
111
- // Skip test files and unsupported languages
112
- if (isTestFile(rel))
113
- return;
114
- if (detectLanguage(rel) === 'unknown')
115
- return;
116
- // Read new file content (needed for hash check and re-parse)
117
- let content;
118
- try {
119
- content = await readFile(absPath, 'utf-8');
120
- }
121
- catch {
122
- return; // file may have been deleted between the event and now
276
+ await this.handleBatch([absPath], { syncFlush: true });
277
+ }
278
+ /**
279
+ * Process a coalesced batch of changed files as ONE pipeline pass:
280
+ * • per-file incremental edge update (content-hash skip), all under one open
281
+ * EdgeStore;
282
+ * • ONE signature patch + ONE llm-context persist + ONE read-cache handoff;
283
+ * • ONE vector update (inline when syncFlush, else on the embed lane).
284
+ */
285
+ async handleBatch(absPaths, opts = {}) {
286
+ const t0 = Date.now();
287
+ const consumedVcsBulk = this.vcsBulkFlag;
288
+ this.vcsBulkFlag = false;
289
+ // 1. Resolve + read candidate files (skip tests / unknown langs / deleted).
290
+ const files = [];
291
+ for (const abs of absPaths) {
292
+ const rel = relative(this.rootPath, abs);
293
+ if (isTestFile(rel))
294
+ continue;
295
+ if (detectLanguage(rel) === 'unknown')
296
+ continue;
297
+ let content;
298
+ try {
299
+ content = await readFile(abs, 'utf-8');
300
+ }
301
+ catch {
302
+ continue; // file may have been deleted between the event and now
303
+ }
304
+ files.push({ rel, abs, content });
123
305
  }
124
- // ── Incremental edge update (CGC _handle_modification algorithm) ──────────
306
+ if (files.length === 0)
307
+ return;
308
+ // 2. Incremental edge update (CGC _handle_modification algorithm), one open
309
+ // store for the whole batch. Content-hash skip drops no-op autosaves.
310
+ const changedFiles = [];
311
+ const changedNodes = [];
125
312
  if (EdgeStore.exists(this.outputPath)) {
126
313
  const store = EdgeStore.open(EdgeStore.dbPath(this.outputPath));
127
314
  try {
128
- // Content hash skip entirely on no-op IDE autosaves
129
- const newHash = createHash('sha256').update(content).digest('hex');
130
- if (store.getFileHash(rel) === newHash)
131
- return;
132
- // Reverse lookup BEFORE delete so we know which files call into this one
133
- // callerFiles are relative paths (DB stores relative paths)
134
- const callerFiles = store.getCallerFiles(rel);
135
- // Re-parse BEFORE mutating DB graph stays readable (old state) during parse.
136
- // Seed resolution with all known nodes so the re-parsed caller files'
137
- // calls into other files don't degrade to `external::` (they would
138
- // otherwise, since the subset trie only holds the re-parsed files).
139
- const resolutionNodes = store.getAllInternalNodes();
140
- const { edges: newEdges, nodes: newNodes } = await buildGraphSubset(rel, content, callerFiles, this.rootPath, resolutionNodes);
141
- // Atomic swap: delete stale data and insert fresh data in one transaction
142
- // so concurrent MCP reads never see a torn graph
143
- store.transaction(() => {
144
- store.deleteEdgesForFile(rel);
145
- for (const cf of callerFiles.slice(0, CALLER_REPARSE_LIMIT)) {
146
- store.deleteOutgoingEdgesForFile(cf);
315
+ for (const f of files) {
316
+ const newHash = createHash('sha256').update(f.content).digest('hex');
317
+ if (store.getFileHash(f.rel) === newHash)
318
+ continue; // no-op autosave
319
+ // Reverse lookup BEFORE delete so we know which files call into this one.
320
+ const callerFiles = store.getCallerFiles(f.rel);
321
+ // Re-parse BEFORE mutating DB — graph stays readable (old state) during
322
+ // parse. Seed resolution with all known nodes so re-parsed callers'
323
+ // cross-file calls don't degrade to `external::`.
324
+ const resolutionNodes = store.getAllInternalNodes();
325
+ const { edges: newEdges, nodes: newNodes } = await buildGraphSubset(f.rel, f.content, callerFiles, this.rootPath, resolutionNodes);
326
+ // Atomic swap so concurrent MCP reads never see a torn graph.
327
+ store.transaction(() => {
328
+ store.deleteEdgesForFile(f.rel);
329
+ for (const cf of callerFiles.slice(0, CALLER_REPARSE_LIMIT)) {
330
+ store.deleteOutgoingEdgesForFile(cf);
331
+ }
332
+ store.deleteNodesForFile(f.rel);
333
+ store.insertNodes(newNodes);
334
+ store.insertEdges(newEdges);
335
+ store.setFileHash(f.rel, newHash);
336
+ });
337
+ changedFiles.push({ rel: f.rel, content: f.content });
338
+ for (const n of newNodes)
339
+ changedNodes.push(n);
340
+ if (this.debug) {
341
+ process.stderr.write(`[mcp-watcher] graph: ${f.rel} (+${newNodes.length} nodes, +${newEdges.length} edges, ${callerFiles.length} callers)\n`);
147
342
  }
148
- store.deleteNodesForFile(rel);
149
- store.insertNodes(newNodes);
150
- store.insertEdges(newEdges);
151
- store.setFileHash(rel, newHash);
152
- });
153
- process.stderr.write(`[mcp-watcher] updated graph: ${rel} (+${newNodes.length} nodes, +${newEdges.length} edges, ${callerFiles.length} callers re-parsed)\n`);
343
+ }
154
344
  }
155
345
  finally {
156
346
  store.close();
157
347
  }
158
348
  }
159
- // ── Signature patch ───────────────────────────────────────────────────────
160
- const contextPath = join(this.outputPath, ARTIFACT_LLM_CONTEXT);
161
- let context;
162
- try {
163
- const raw = await readFile(contextPath, 'utf-8');
164
- context = JSON.parse(raw);
349
+ else {
350
+ // No edge store yet — still refresh signatures for every candidate.
351
+ for (const f of files)
352
+ changedFiles.push({ rel: f.rel, content: f.content });
165
353
  }
166
- catch {
167
- process.stderr.write(`[mcp-watcher] no context at ${contextPath} run analyze first\n`);
354
+ if (changedFiles.length === 0)
355
+ return; // every event was a no-op autosave
356
+ // 3. Signatures: load context (shared in-memory cache), patch all changed
357
+ // files, then ONE persist + read-cache handoff (Step 2). The handoff
358
+ // means the next tool call is a cache HIT — no cold 2.1 MB re-parse.
359
+ const context = await this.loadContext();
360
+ if (!context) {
361
+ process.stderr.write(`[mcp-watcher] no context at ${this.contextPath} — run analyze first\n`);
168
362
  return;
169
363
  }
170
- const newMap = extractSignatures(rel, content);
171
364
  if (!context.signatures)
172
365
  context.signatures = [];
173
- const idx = context.signatures.findIndex(m => m.path === rel);
174
- if (idx >= 0) {
175
- context.signatures[idx] = newMap;
366
+ for (const f of changedFiles) {
367
+ const newMap = extractSignatures(f.rel, f.content);
368
+ const idx = context.signatures.findIndex((m) => m.path === f.rel);
369
+ if (idx >= 0)
370
+ context.signatures[idx] = newMap;
371
+ else
372
+ context.signatures.push(newMap);
176
373
  }
177
- else {
178
- context.signatures.push(newMap);
374
+ await this.persistContext(context);
375
+ // 4. Vector update — decoupled from signature freshness (Step 4).
376
+ const isBulk = consumedVcsBulk || changedFiles.length >= this.bulkThreshold;
377
+ if (this.embed && !this.embedDegraded && context.callGraph) {
378
+ if (opts.syncFlush) {
379
+ // Direct handleChange path: inline so callers/tests observe it.
380
+ await this.updateVectors(context, changedFiles, changedNodes);
381
+ }
382
+ else {
383
+ // Watcher path: schedule on the lower-priority embed lane. On a bulk
384
+ // event this still collapses to a single deferred pass.
385
+ this.scheduleEmbed(context, changedFiles, changedNodes);
386
+ }
387
+ }
388
+ // 5. One summary line per batch (Step 6). Per-file detail is behind debug.
389
+ const n = changedFiles.length;
390
+ process.stderr.write(`[mcp-watcher] ${isBulk ? `coalesced ${n} changes` : `updated ${n} file${n === 1 ? '' : 's'}`} (${Date.now() - t0}ms)\n`);
391
+ }
392
+ // ── llm-context load + persistence + read-cache handoff (Step 2) ─────────────
393
+ /**
394
+ * True when this watcher writes to the canonical `<root>/.openlore/analysis`
395
+ * layout that the MCP read handlers cache against. Only then is the shared
396
+ * in-memory read cache (primeContextCache) the right channel to prime; a custom
397
+ * `outputPath` (tests / non-standard installs) writes only to disk.
398
+ */
399
+ get usesStandardLayout() {
400
+ return this.outputPath === join(this.rootPath, OPENLORE_DIR, OPENLORE_ANALYSIS_SUBDIR);
401
+ }
402
+ /**
403
+ * Load the context the watcher is about to patch. This ALWAYS reads fresh from
404
+ * disk — never through the shared read cache — because the cache is a read-path
405
+ * (tool-call) optimization, and patching a possibly-stale cached object could
406
+ * silently drop signatures written by a concurrent `analyze` between events.
407
+ * The writer reads ground truth; persistContext then primes the read cache with
408
+ * the result so the next tool call is still a hit (Step 2a, G1).
409
+ */
410
+ async loadContext() {
411
+ try {
412
+ const raw = await readFile(this.contextPath, 'utf-8');
413
+ return JSON.parse(raw);
414
+ }
415
+ catch {
416
+ return null;
417
+ }
418
+ }
419
+ async persistContext(context) {
420
+ // Strip the runtime-only EdgeStore handle before serializing.
421
+ const { edgeStore: _edgeStore, ...serializable } = context;
422
+ void _edgeStore;
423
+ await writeFile(this.contextPath, JSON.stringify(serializable, null, 2), 'utf-8');
424
+ // Hand the patched object back to the read cache, aligned to the new on-disk
425
+ // mtime, so the next tool call is a cache hit (no cold re-parse). This is the
426
+ // fix for root-cause item 2 (mtime bump forcing a full re-read). Only valid
427
+ // for the canonical layout the read handlers cache against.
428
+ if (this.usesStandardLayout)
429
+ await primeContextCache(this.rootPath, context);
430
+ }
431
+ // ── Embedding lane (Step 4) ──────────────────────────────────────────────────
432
+ scheduleEmbed(context, changedFiles, nodes) {
433
+ for (const f of changedFiles)
434
+ this.embedFiles.set(f.rel, f.content);
435
+ for (const node of nodes)
436
+ this.embedNodes.set(node.id, node);
437
+ this.lastEmbedContext = context;
438
+ if (this.embedTimer)
439
+ clearTimeout(this.embedTimer);
440
+ // Slightly behind the signature debounce so structural freshness always lands
441
+ // first and multiple flushes batch into one embed pass.
442
+ this.embedTimer = setTimeout(() => void this.runEmbedLane(), this.debounceMs);
443
+ }
444
+ async runEmbedLane() {
445
+ if (this.embedRunning) {
446
+ // Re-arm: drain again once the in-flight pass finishes.
447
+ this.embedTimer = setTimeout(() => void this.runEmbedLane(), this.debounceMs);
448
+ return;
179
449
  }
180
- await writeFile(contextPath, JSON.stringify(context, null, 2), 'utf-8');
181
- process.stderr.write(`[mcp-watcher] re-indexed signatures: ${rel}\n`);
182
- // Incremental vector re-embed silently skipped if no embedding service available
183
- if (context.callGraph) {
184
- await this.reEmbed(context, rel, content);
450
+ if (this.embedFiles.size === 0 || !this.lastEmbedContext)
451
+ return;
452
+ const changedFiles = Array.from(this.embedFiles, ([rel, content]) => ({ rel, content }));
453
+ const nodes = Array.from(this.embedNodes.values());
454
+ const context = this.lastEmbedContext;
455
+ this.embedFiles.clear();
456
+ this.embedNodes.clear();
457
+ this.embedRunning = true;
458
+ try {
459
+ await this.updateVectors(context, changedFiles, nodes);
460
+ }
461
+ catch (err) {
462
+ process.stderr.write(`[mcp-watcher] embed error: ${err.message}\n`);
463
+ }
464
+ finally {
465
+ this.embedRunning = false;
466
+ if (this.embedFiles.size > 0) {
467
+ this.embedTimer = setTimeout(() => void this.runEmbedLane(), this.debounceMs);
468
+ }
185
469
  }
186
470
  }
187
- // ── Embed step ─────────────────────────────────────────────────────────────
188
- async reEmbed(context, rel, content) {
471
+ /**
472
+ * Row-level vector update for the changed files only (Step 3). Falls back to a
473
+ * silent no-op when no embedding service and no index are available.
474
+ */
475
+ async updateVectors(context, changedFiles, changedNodes) {
189
476
  try {
190
477
  const { VectorIndex } = await import('../analyzer/vector-index.js');
191
478
  const { EmbeddingService } = await import('../analyzer/embedding-service.js');
@@ -200,24 +487,64 @@ export class McpWatcher {
200
487
  const cfg = await readOpenLoreConfig(this.rootPath);
201
488
  embedSvc = cfg ? EmbeddingService.fromConfig(cfg) : null;
202
489
  }
203
- // embedSvc may be null: VectorIndex.build then refreshes the BM25-only
204
- // corpus rather than re-embedding. Keeps the keyword index live in watch mode.
490
+ // embedSvc may be null: updateFiles then refreshes the BM25-only corpus
491
+ // rather than re-embedding, keeping the keyword index live in watch mode.
205
492
  const cg = context.callGraph;
206
- const hubIds = new Set((cg.hubFunctions ?? []).map(f => f.id));
207
- const entryIds = new Set((cg.entryPoints ?? []).map(f => f.id));
208
- const fileContents = new Map([[rel, content]]);
209
- const { embedded, reused, total, hasEmbeddings } = await VectorIndex.build(this.outputPath, cg.nodes, context.signatures ?? [], hubIds, entryIds, embedSvc, fileContents,
210
- /* incremental */ true);
211
- process.stderr.write(hasEmbeddings
212
- ? `[mcp-watcher] re-embedded ${rel}: ${embedded} new, ${reused} reused\n`
213
- : `[mcp-watcher] refreshed BM25 index for ${rel}: ${total} functions\n`);
493
+ if (!cg)
494
+ return;
495
+ const hubIds = new Set((cg.hubFunctions ?? []).map((f) => f.id));
496
+ const entryIds = new Set((cg.entryPoints ?? []).map((f) => f.id));
497
+ const changedFilePaths = new Set(changedFiles.map((f) => f.rel));
498
+ const fileContents = new Map(changedFiles.map((f) => [f.rel, f.content]));
499
+ // Prefer the freshly-parsed nodes; fall back to the (possibly stale)
500
+ // call-graph nodes for the changed files when no edge store seeded them.
501
+ const nodes = changedNodes.length > 0
502
+ ? changedNodes
503
+ : (cg.nodes ?? []).filter((n) => changedFilePaths.has(n.filePath));
504
+ const { embedded, reused, total, hasEmbeddings } = await VectorIndex.updateFiles(this.outputPath, nodes, changedFilePaths, context.signatures ?? [], hubIds, entryIds, embedSvc, fileContents);
505
+ if (this.debug) {
506
+ process.stderr.write(hasEmbeddings
507
+ ? `[mcp-watcher] re-embedded ${changedFilePaths.size} file(s): ${embedded} new, ${reused} reused\n`
508
+ : `[mcp-watcher] refreshed BM25 index for ${changedFilePaths.size} file(s): ${total} functions\n`);
509
+ }
214
510
  }
215
511
  catch (err) {
216
512
  process.stderr.write(`[mcp-watcher] embed error: ${err.message}\n`);
217
513
  }
218
514
  }
515
+ // ── Helpers ──────────────────────────────────────────────────────────────────
516
+ /** Bounded count of watched source files; stops early once `cap` is exceeded. */
517
+ async countSourceFiles(cap) {
518
+ let count = 0;
519
+ const walk = async (dir) => {
520
+ if (count > cap)
521
+ return;
522
+ let entries;
523
+ try {
524
+ entries = await readdir(dir, { withFileTypes: true });
525
+ }
526
+ catch {
527
+ return;
528
+ }
529
+ for (const entry of entries) {
530
+ if (count > cap)
531
+ return;
532
+ const abs = join(dir, entry.name);
533
+ const rel = relative(this.rootPath, abs);
534
+ if (entry.isDirectory()) {
535
+ if (!isIgnoredRelPath(rel))
536
+ await walk(abs);
537
+ }
538
+ else if (entry.isFile() && SOURCE_EXTENSIONS.test(entry.name) && !isIgnoredRelPath(rel)) {
539
+ count++;
540
+ }
541
+ }
542
+ };
543
+ await walk(this.rootPath);
544
+ return count;
545
+ }
219
546
  }
220
- // ── Helpers ───────────────────────────────────────────────────────────────────
547
+ // ── Module helpers ──────────────────────────────────────────────────────────────
221
548
  function isTestFile(relPath) {
222
549
  return (relPath.includes('.test.') ||
223
550
  relPath.includes('.spec.') ||
@@ -252,7 +579,7 @@ async function buildGraphSubset(changedRel, changedContent, callerFiles, rootDir
252
579
  const builder = new CallGraphBuilder();
253
580
  const result = await builder.build(files, undefined, undefined, resolutionNodes);
254
581
  // Only return nodes from changedFile — callerFiles nodes are already in DB and unchanged
255
- const changedNodes = Array.from(result.nodes.values()).filter(n => n.filePath === changedRel);
582
+ const changedNodes = Array.from(result.nodes.values()).filter((n) => n.filePath === changedRel);
256
583
  return { edges: result.edges, nodes: changedNodes };
257
584
  }
258
585
  //# sourceMappingURL=mcp-watcher.js.map