openlore 2.0.5 → 2.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,14 +8,29 @@
8
8
  * The call graph is deliberately excluded — rebuilding it requires full
9
9
  * tree-sitter analysis of all call sites and is too expensive for a watch loop.
10
10
  * It stays current via the post-commit hook (openlore analyze --force --embed).
11
+ *
12
+ * Spec 13.1 (watch-mode performance): freshness is O(change), not O(repo).
13
+ * • Per-file events COALESCE into one batched flush (single debounce timer +
14
+ * hard max-batch ceiling), so a burst / branch-switch runs the pipeline once,
15
+ * not once per file.
16
+ * • The patched llm-context is handed to the MCP read cache in place
17
+ * (primeContextCache), so the next tool call is a cache HIT — no 2.1 MB
18
+ * cold re-parse — even after the disk write.
19
+ * • Vector updates are row-level (VectorIndex.updateFiles), not a full-corpus
20
+ * read+overwrite, and run on a separate lower-priority lane so signature
21
+ * freshness never blocks on embedding.
22
+ * • VCS-flood / bulk batches are detected and collapsed to a single refresh.
23
+ * • stderr emits one summary line per batch by default (per-file detail behind
24
+ * OPENLORE_WATCH_DEBUG).
11
25
  */
12
- import { readFile, writeFile } from 'node:fs/promises';
26
+ import { readFile, writeFile, readdir } from 'node:fs/promises';
13
27
  import { createHash } from 'node:crypto';
14
28
  import { join, relative } from 'node:path';
15
29
  import chokidar from 'chokidar';
16
30
  import { extractSignatures, detectLanguage } from '../analyzer/signature-extractor.js';
17
31
  import { EdgeStore } from './edge-store.js';
18
- import { OPENLORE_DIR, OPENLORE_ANALYSIS_SUBDIR, ARTIFACT_LLM_CONTEXT, } from '../../constants.js';
32
+ import { primeContextCache } from './mcp-handlers/utils.js';
33
+ import { OPENLORE_DIR, OPENLORE_ANALYSIS_SUBDIR, ARTIFACT_LLM_CONTEXT, WATCH_DEBOUNCE_MS, WATCH_MAX_BATCH_MS, WATCH_BULK_THRESHOLD, WATCH_EMBED_FILE_CEILING, WATCH_VCS_SETTLE_MS, } from '../../constants.js';
19
34
  const CALL_GRAPH_LANGS = new Set([
20
35
  'Python', 'TypeScript', 'JavaScript', 'Go', 'Rust', 'Ruby', 'Java', 'C++', 'Swift',
21
36
  ]);
@@ -82,20 +97,54 @@ export function isIgnoredRelPath(relPath) {
82
97
  export class McpWatcher {
83
98
  rootPath;
84
99
  outputPath;
100
+ contextPath;
85
101
  debounceMs;
102
+ maxBatchMs;
103
+ bulkThreshold;
104
+ embedFileCeiling;
86
105
  extraIgnore;
106
+ debug;
87
107
  fsWatcher;
88
- timers = new Map();
89
- running = false;
108
+ gitWatcher;
109
+ // ── Coalescing queue (Step 1) ──────────────────────────────────────────────
110
+ pending = new Set(); // absolute paths awaiting a flush
111
+ debounceTimer;
112
+ maxBatchTimer;
113
+ running = false; // single-flight for the signature flush
114
+ vcsBulkFlag = false; // set by the .git ref watcher
115
+ // ── Embedding lane (Step 4 — decoupled, lower priority) ─────────────────────
116
+ embed;
117
+ embedDegraded = false; // auto-degraded on a too-large tree
118
+ embedFiles = new Map(); // rel → content awaiting embed
119
+ embedNodes = new Map(); // id → node awaiting embed
120
+ embedTimer;
121
+ embedRunning = false;
122
+ lastEmbedContext;
90
123
  constructor(options) {
91
124
  this.rootPath = options.rootPath;
92
125
  this.outputPath = options.outputPath
93
126
  ?? join(options.rootPath, OPENLORE_DIR, OPENLORE_ANALYSIS_SUBDIR);
94
- this.debounceMs = options.debounceMs ?? 400;
127
+ this.contextPath = join(this.outputPath, ARTIFACT_LLM_CONTEXT);
128
+ this.debounceMs = options.debounceMs ?? WATCH_DEBOUNCE_MS;
129
+ this.maxBatchMs = options.maxBatchMs ?? WATCH_MAX_BATCH_MS;
130
+ this.bulkThreshold = options.bulkThreshold ?? WATCH_BULK_THRESHOLD;
131
+ this.embedFileCeiling = options.embedFileCeiling ?? WATCH_EMBED_FILE_CEILING;
132
+ this.embed = options.embed ?? true;
95
133
  this.extraIgnore = options.ignore ?? [];
134
+ this.debug = !!process.env.OPENLORE_WATCH_DEBUG;
96
135
  }
97
136
  // ── Lifecycle ──────────────────────────────────────────────────────────────
98
137
  async start() {
138
+ // Auto-degrade live embedding on very large trees (Step 4). Counting is
139
+ // bounded — it stops as soon as the ceiling is exceeded.
140
+ if (this.embed) {
141
+ const count = await this.countSourceFiles(this.embedFileCeiling + 1);
142
+ if (count > this.embedFileCeiling) {
143
+ this.embedDegraded = true;
144
+ process.stderr.write(`[mcp-watcher] ${count}+ source files exceed the live-embed ceiling ` +
145
+ `(${this.embedFileCeiling}); running signatures-only — embeddings refresh at commit\n`);
146
+ }
147
+ }
99
148
  await new Promise((resolve, reject) => {
100
149
  const extraIgnore = this.extraIgnore;
101
150
  const rootPath = this.rootPath;
@@ -115,125 +164,315 @@ export class McpWatcher {
115
164
  });
116
165
  this.fsWatcher.on('change', (absPath) => {
117
166
  if (SOURCE_EXTENSIONS.test(absPath)) {
118
- this.scheduleChange(absPath);
167
+ this.enqueue(absPath);
119
168
  }
120
169
  });
121
170
  this.fsWatcher.on('ready', () => resolve());
122
171
  this.fsWatcher.on('error', (err) => reject(err));
123
172
  });
124
- process.stderr.write(`[mcp-watcher] watching ${this.rootPath}\n`);
173
+ // Best-effort VCS-flood detection (Step 5): a branch switch / rebase / merge
174
+ // bumps these refs. We never recurse into .git (it stays ignored above); we
175
+ // watch only these specific files, then collapse the churn into one refresh.
176
+ try {
177
+ const gitDir = join(this.rootPath, '.git');
178
+ const refs = ['HEAD', 'index', 'MERGE_HEAD', 'ORIG_HEAD'].map((f) => join(gitDir, f));
179
+ this.gitWatcher = chokidar.watch(refs, {
180
+ persistent: true,
181
+ ignoreInitial: true,
182
+ followSymlinks: false,
183
+ });
184
+ this.gitWatcher.on('all', () => this.onVcsEvent());
185
+ }
186
+ catch {
187
+ // no .git, or watch failed — VCS detection falls back to the batch-size
188
+ // threshold in handleBatch, which is enough for G3.
189
+ }
190
+ process.stderr.write(`[mcp-watcher] watching ${this.rootPath}` +
191
+ `${this.embed && !this.embedDegraded ? '' : ' (signatures-only)'}\n`);
125
192
  }
126
193
  async stop() {
127
- for (const t of this.timers.values())
128
- clearTimeout(t);
129
- this.timers.clear();
194
+ if (this.debounceTimer)
195
+ clearTimeout(this.debounceTimer);
196
+ if (this.maxBatchTimer)
197
+ clearTimeout(this.maxBatchTimer);
198
+ if (this.embedTimer)
199
+ clearTimeout(this.embedTimer);
200
+ this.debounceTimer = this.maxBatchTimer = this.embedTimer = undefined;
201
+ // Best-effort: persist anything still queued so a save right before shutdown
202
+ // is not lost.
203
+ if (this.pending.size > 0 && !this.running) {
204
+ const batch = Array.from(this.pending);
205
+ this.pending.clear();
206
+ try {
207
+ await this.handleBatch(batch, { syncFlush: true });
208
+ }
209
+ catch { /* ignore */ }
210
+ }
130
211
  await this.fsWatcher?.close();
212
+ await this.gitWatcher?.close();
131
213
  process.stderr.write('[mcp-watcher] stopped\n');
132
214
  }
133
- // ── Debounce ───────────────────────────────────────────────────────────────
134
- scheduleChange(absPath) {
135
- const existing = this.timers.get(absPath);
136
- if (existing)
137
- clearTimeout(existing);
138
- const t = setTimeout(() => {
139
- this.timers.delete(absPath);
140
- if (this.running) {
141
- // Re-schedule instead of dropping — ensures no changes are lost
142
- this.scheduleChange(absPath);
143
- return;
215
+ // ── Coalescing (Step 1) ──────────────────────────────────────────────────────
216
+ /**
217
+ * Add a changed path to the pending set and (re)arm a single debounce timer,
218
+ * plus a one-shot hard ceiling so a continuous stream still flushes.
219
+ */
220
+ enqueue(absPath) {
221
+ this.pending.add(absPath);
222
+ if (this.debounceTimer)
223
+ clearTimeout(this.debounceTimer);
224
+ this.debounceTimer = setTimeout(() => this.flush(), this.debounceMs);
225
+ if (!this.maxBatchTimer) {
226
+ this.maxBatchTimer = setTimeout(() => this.flush(), this.maxBatchMs);
227
+ }
228
+ }
229
+ /** A .git ref changed — settle, then flush whatever changed as one bulk batch. */
230
+ onVcsEvent() {
231
+ this.vcsBulkFlag = true;
232
+ if (this.debounceTimer)
233
+ clearTimeout(this.debounceTimer);
234
+ this.debounceTimer = setTimeout(() => this.flush(), WATCH_VCS_SETTLE_MS);
235
+ if (this.debug) {
236
+ process.stderr.write('[mcp-watcher] VCS operation detected — coalescing into one refresh\n');
237
+ }
238
+ }
239
+ /**
240
+ * Drain the pending set into a single batch. Single-flight: if a flush is
241
+ * already running, leave the new paths in `pending` and reschedule once it
242
+ * finishes — never interleave two flushes.
243
+ */
244
+ flush() {
245
+ if (this.debounceTimer) {
246
+ clearTimeout(this.debounceTimer);
247
+ this.debounceTimer = undefined;
248
+ }
249
+ if (this.maxBatchTimer) {
250
+ clearTimeout(this.maxBatchTimer);
251
+ this.maxBatchTimer = undefined;
252
+ }
253
+ if (this.running)
254
+ return; // a follow-up is scheduled in finally{}
255
+ if (this.pending.size === 0)
256
+ return;
257
+ const batch = Array.from(this.pending);
258
+ this.pending.clear();
259
+ this.running = true;
260
+ this.handleBatch(batch)
261
+ .catch((err) => process.stderr.write(`[mcp-watcher] error: ${err.message}\n`))
262
+ .finally(() => {
263
+ this.running = false;
264
+ if (this.pending.size > 0) {
265
+ this.debounceTimer = setTimeout(() => this.flush(), this.debounceMs);
144
266
  }
145
- this.running = true;
146
- this.handleChange(absPath)
147
- .catch(err => process.stderr.write(`[mcp-watcher] error: ${err.message}\n`))
148
- .finally(() => { this.running = false; });
149
- }, this.debounceMs);
150
- this.timers.set(absPath, t);
267
+ });
151
268
  }
152
269
  // ── Core re-index ──────────────────────────────────────────────────────────
153
270
  /**
154
- * Re-index a single changed file.
155
- * Exposed for unit testing without needing a real file watcher.
271
+ * Re-index a single changed file. Exposed for unit testing without needing a
272
+ * real file watcher; flushes synchronously so callers observe the update on
273
+ * disk immediately. Internally this is just a batch of one.
156
274
  */
157
275
  async handleChange(absPath) {
158
- const rel = relative(this.rootPath, absPath);
159
- // Skip test files and unsupported languages
160
- if (isTestFile(rel))
161
- return;
162
- if (detectLanguage(rel) === 'unknown')
163
- return;
164
- // Read new file content (needed for hash check and re-parse)
165
- let content;
166
- try {
167
- content = await readFile(absPath, 'utf-8');
168
- }
169
- catch {
170
- return; // file may have been deleted between the event and now
276
+ await this.handleBatch([absPath], { syncFlush: true });
277
+ }
278
+ /**
279
+ * Process a coalesced batch of changed files as ONE pipeline pass:
280
+ * • per-file incremental edge update (content-hash skip), all under one open
281
+ * EdgeStore;
282
+ * • ONE signature patch + ONE llm-context persist + ONE read-cache handoff;
283
+ * • ONE vector update (inline when syncFlush, else on the embed lane).
284
+ */
285
+ async handleBatch(absPaths, opts = {}) {
286
+ const t0 = Date.now();
287
+ const consumedVcsBulk = this.vcsBulkFlag;
288
+ this.vcsBulkFlag = false;
289
+ // 1. Resolve + read candidate files (skip tests / unknown langs / deleted).
290
+ const files = [];
291
+ for (const abs of absPaths) {
292
+ const rel = relative(this.rootPath, abs);
293
+ if (isTestFile(rel))
294
+ continue;
295
+ if (detectLanguage(rel) === 'unknown')
296
+ continue;
297
+ let content;
298
+ try {
299
+ content = await readFile(abs, 'utf-8');
300
+ }
301
+ catch {
302
+ continue; // file may have been deleted between the event and now
303
+ }
304
+ files.push({ rel, abs, content });
171
305
  }
172
- // ── Incremental edge update (CGC _handle_modification algorithm) ──────────
306
+ if (files.length === 0)
307
+ return;
308
+ // 2. Incremental edge update (CGC _handle_modification algorithm), one open
309
+ // store for the whole batch. Content-hash skip drops no-op autosaves.
310
+ const changedFiles = [];
311
+ const changedNodes = [];
173
312
  if (EdgeStore.exists(this.outputPath)) {
174
313
  const store = EdgeStore.open(EdgeStore.dbPath(this.outputPath));
175
314
  try {
176
- // Content hash skip entirely on no-op IDE autosaves
177
- const newHash = createHash('sha256').update(content).digest('hex');
178
- if (store.getFileHash(rel) === newHash)
179
- return;
180
- // Reverse lookup BEFORE delete so we know which files call into this one
181
- // callerFiles are relative paths (DB stores relative paths)
182
- const callerFiles = store.getCallerFiles(rel);
183
- // Re-parse BEFORE mutating DB graph stays readable (old state) during parse.
184
- // Seed resolution with all known nodes so the re-parsed caller files'
185
- // calls into other files don't degrade to `external::` (they would
186
- // otherwise, since the subset trie only holds the re-parsed files).
187
- const resolutionNodes = store.getAllInternalNodes();
188
- const { edges: newEdges, nodes: newNodes } = await buildGraphSubset(rel, content, callerFiles, this.rootPath, resolutionNodes);
189
- // Atomic swap: delete stale data and insert fresh data in one transaction
190
- // so concurrent MCP reads never see a torn graph
191
- store.transaction(() => {
192
- store.deleteEdgesForFile(rel);
193
- for (const cf of callerFiles.slice(0, CALLER_REPARSE_LIMIT)) {
194
- store.deleteOutgoingEdgesForFile(cf);
315
+ for (const f of files) {
316
+ const newHash = createHash('sha256').update(f.content).digest('hex');
317
+ if (store.getFileHash(f.rel) === newHash)
318
+ continue; // no-op autosave
319
+ // Reverse lookup BEFORE delete so we know which files call into this one.
320
+ const callerFiles = store.getCallerFiles(f.rel);
321
+ // Re-parse BEFORE mutating DB — graph stays readable (old state) during
322
+ // parse. Seed resolution with all known nodes so re-parsed callers'
323
+ // cross-file calls don't degrade to `external::`.
324
+ const resolutionNodes = store.getAllInternalNodes();
325
+ const { edges: newEdges, nodes: newNodes } = await buildGraphSubset(f.rel, f.content, callerFiles, this.rootPath, resolutionNodes);
326
+ // Atomic swap so concurrent MCP reads never see a torn graph.
327
+ store.transaction(() => {
328
+ store.deleteEdgesForFile(f.rel);
329
+ for (const cf of callerFiles.slice(0, CALLER_REPARSE_LIMIT)) {
330
+ store.deleteOutgoingEdgesForFile(cf);
331
+ }
332
+ store.deleteNodesForFile(f.rel);
333
+ store.insertNodes(newNodes);
334
+ store.insertEdges(newEdges);
335
+ store.setFileHash(f.rel, newHash);
336
+ });
337
+ changedFiles.push({ rel: f.rel, content: f.content });
338
+ for (const n of newNodes)
339
+ changedNodes.push(n);
340
+ if (this.debug) {
341
+ process.stderr.write(`[mcp-watcher] graph: ${f.rel} (+${newNodes.length} nodes, +${newEdges.length} edges, ${callerFiles.length} callers)\n`);
195
342
  }
196
- store.deleteNodesForFile(rel);
197
- store.insertNodes(newNodes);
198
- store.insertEdges(newEdges);
199
- store.setFileHash(rel, newHash);
200
- });
201
- process.stderr.write(`[mcp-watcher] updated graph: ${rel} (+${newNodes.length} nodes, +${newEdges.length} edges, ${callerFiles.length} callers re-parsed)\n`);
343
+ }
202
344
  }
203
345
  finally {
204
346
  store.close();
205
347
  }
206
348
  }
207
- // ── Signature patch ───────────────────────────────────────────────────────
208
- const contextPath = join(this.outputPath, ARTIFACT_LLM_CONTEXT);
209
- let context;
210
- try {
211
- const raw = await readFile(contextPath, 'utf-8');
212
- context = JSON.parse(raw);
349
+ else {
350
+ // No edge store yet — still refresh signatures for every candidate.
351
+ for (const f of files)
352
+ changedFiles.push({ rel: f.rel, content: f.content });
213
353
  }
214
- catch {
215
- process.stderr.write(`[mcp-watcher] no context at ${contextPath} run analyze first\n`);
354
+ if (changedFiles.length === 0)
355
+ return; // every event was a no-op autosave
356
+ // 3. Signatures: load context (shared in-memory cache), patch all changed
357
+ // files, then ONE persist + read-cache handoff (Step 2). The handoff
358
+ // means the next tool call is a cache HIT — no cold 2.1 MB re-parse.
359
+ const context = await this.loadContext();
360
+ if (!context) {
361
+ process.stderr.write(`[mcp-watcher] no context at ${this.contextPath} — run analyze first\n`);
216
362
  return;
217
363
  }
218
- const newMap = extractSignatures(rel, content);
219
364
  if (!context.signatures)
220
365
  context.signatures = [];
221
- const idx = context.signatures.findIndex(m => m.path === rel);
222
- if (idx >= 0) {
223
- context.signatures[idx] = newMap;
366
+ for (const f of changedFiles) {
367
+ const newMap = extractSignatures(f.rel, f.content);
368
+ const idx = context.signatures.findIndex((m) => m.path === f.rel);
369
+ if (idx >= 0)
370
+ context.signatures[idx] = newMap;
371
+ else
372
+ context.signatures.push(newMap);
224
373
  }
225
- else {
226
- context.signatures.push(newMap);
374
+ await this.persistContext(context);
375
+ // 4. Vector update — decoupled from signature freshness (Step 4).
376
+ const isBulk = consumedVcsBulk || changedFiles.length >= this.bulkThreshold;
377
+ if (this.embed && !this.embedDegraded && context.callGraph) {
378
+ if (opts.syncFlush) {
379
+ // Direct handleChange path: inline so callers/tests observe it.
380
+ await this.updateVectors(context, changedFiles, changedNodes);
381
+ }
382
+ else {
383
+ // Watcher path: schedule on the lower-priority embed lane. On a bulk
384
+ // event this still collapses to a single deferred pass.
385
+ this.scheduleEmbed(context, changedFiles, changedNodes);
386
+ }
387
+ }
388
+ // 5. One summary line per batch (Step 6). Per-file detail is behind debug.
389
+ const n = changedFiles.length;
390
+ process.stderr.write(`[mcp-watcher] ${isBulk ? `coalesced ${n} changes` : `updated ${n} file${n === 1 ? '' : 's'}`} (${Date.now() - t0}ms)\n`);
391
+ }
392
+ // ── llm-context load + persistence + read-cache handoff (Step 2) ─────────────
393
+ /**
394
+ * True when this watcher writes to the canonical `<root>/.openlore/analysis`
395
+ * layout that the MCP read handlers cache against. Only then is the shared
396
+ * in-memory read cache (primeContextCache) the right channel to prime; a custom
397
+ * `outputPath` (tests / non-standard installs) writes only to disk.
398
+ */
399
+ get usesStandardLayout() {
400
+ return this.outputPath === join(this.rootPath, OPENLORE_DIR, OPENLORE_ANALYSIS_SUBDIR);
401
+ }
402
+ /**
403
+ * Load the context the watcher is about to patch. This ALWAYS reads fresh from
404
+ * disk — never through the shared read cache — because the cache is a read-path
405
+ * (tool-call) optimization, and patching a possibly-stale cached object could
406
+ * silently drop signatures written by a concurrent `analyze` between events.
407
+ * The writer reads ground truth; persistContext then primes the read cache with
408
+ * the result so the next tool call is still a hit (Step 2a, G1).
409
+ */
410
+ async loadContext() {
411
+ try {
412
+ const raw = await readFile(this.contextPath, 'utf-8');
413
+ return JSON.parse(raw);
414
+ }
415
+ catch {
416
+ return null;
417
+ }
418
+ }
419
+ async persistContext(context) {
420
+ // Strip the runtime-only EdgeStore handle before serializing.
421
+ const { edgeStore: _edgeStore, ...serializable } = context;
422
+ void _edgeStore;
423
+ await writeFile(this.contextPath, JSON.stringify(serializable, null, 2), 'utf-8');
424
+ // Hand the patched object back to the read cache, aligned to the new on-disk
425
+ // mtime, so the next tool call is a cache hit (no cold re-parse). This is the
426
+ // fix for root-cause item 2 (mtime bump forcing a full re-read). Only valid
427
+ // for the canonical layout the read handlers cache against.
428
+ if (this.usesStandardLayout)
429
+ await primeContextCache(this.rootPath, context);
430
+ }
431
+ // ── Embedding lane (Step 4) ──────────────────────────────────────────────────
432
+ scheduleEmbed(context, changedFiles, nodes) {
433
+ for (const f of changedFiles)
434
+ this.embedFiles.set(f.rel, f.content);
435
+ for (const node of nodes)
436
+ this.embedNodes.set(node.id, node);
437
+ this.lastEmbedContext = context;
438
+ if (this.embedTimer)
439
+ clearTimeout(this.embedTimer);
440
+ // Slightly behind the signature debounce so structural freshness always lands
441
+ // first and multiple flushes batch into one embed pass.
442
+ this.embedTimer = setTimeout(() => void this.runEmbedLane(), this.debounceMs);
443
+ }
444
+ async runEmbedLane() {
445
+ if (this.embedRunning) {
446
+ // Re-arm: drain again once the in-flight pass finishes.
447
+ this.embedTimer = setTimeout(() => void this.runEmbedLane(), this.debounceMs);
448
+ return;
449
+ }
450
+ if (this.embedFiles.size === 0 || !this.lastEmbedContext)
451
+ return;
452
+ const changedFiles = Array.from(this.embedFiles, ([rel, content]) => ({ rel, content }));
453
+ const nodes = Array.from(this.embedNodes.values());
454
+ const context = this.lastEmbedContext;
455
+ this.embedFiles.clear();
456
+ this.embedNodes.clear();
457
+ this.embedRunning = true;
458
+ try {
459
+ await this.updateVectors(context, changedFiles, nodes);
460
+ }
461
+ catch (err) {
462
+ process.stderr.write(`[mcp-watcher] embed error: ${err.message}\n`);
227
463
  }
228
- await writeFile(contextPath, JSON.stringify(context, null, 2), 'utf-8');
229
- process.stderr.write(`[mcp-watcher] re-indexed signatures: ${rel}\n`);
230
- // Incremental vector re-embed — silently skipped if no embedding service available
231
- if (context.callGraph) {
232
- await this.reEmbed(context, rel, content);
464
+ finally {
465
+ this.embedRunning = false;
466
+ if (this.embedFiles.size > 0) {
467
+ this.embedTimer = setTimeout(() => void this.runEmbedLane(), this.debounceMs);
468
+ }
233
469
  }
234
470
  }
235
- // ── Embed step ─────────────────────────────────────────────────────────────
236
- async reEmbed(context, rel, content) {
471
+ /**
472
+ * Row-level vector update for the changed files only (Step 3). Falls back to a
473
+ * silent no-op when no embedding service and no index are available.
474
+ */
475
+ async updateVectors(context, changedFiles, changedNodes) {
237
476
  try {
238
477
  const { VectorIndex } = await import('../analyzer/vector-index.js');
239
478
  const { EmbeddingService } = await import('../analyzer/embedding-service.js');
@@ -248,24 +487,64 @@ export class McpWatcher {
248
487
  const cfg = await readOpenLoreConfig(this.rootPath);
249
488
  embedSvc = cfg ? EmbeddingService.fromConfig(cfg) : null;
250
489
  }
251
- // embedSvc may be null: VectorIndex.build then refreshes the BM25-only
252
- // corpus rather than re-embedding. Keeps the keyword index live in watch mode.
490
+ // embedSvc may be null: updateFiles then refreshes the BM25-only corpus
491
+ // rather than re-embedding, keeping the keyword index live in watch mode.
253
492
  const cg = context.callGraph;
254
- const hubIds = new Set((cg.hubFunctions ?? []).map(f => f.id));
255
- const entryIds = new Set((cg.entryPoints ?? []).map(f => f.id));
256
- const fileContents = new Map([[rel, content]]);
257
- const { embedded, reused, total, hasEmbeddings } = await VectorIndex.build(this.outputPath, cg.nodes, context.signatures ?? [], hubIds, entryIds, embedSvc, fileContents,
258
- /* incremental */ true);
259
- process.stderr.write(hasEmbeddings
260
- ? `[mcp-watcher] re-embedded ${rel}: ${embedded} new, ${reused} reused\n`
261
- : `[mcp-watcher] refreshed BM25 index for ${rel}: ${total} functions\n`);
493
+ if (!cg)
494
+ return;
495
+ const hubIds = new Set((cg.hubFunctions ?? []).map((f) => f.id));
496
+ const entryIds = new Set((cg.entryPoints ?? []).map((f) => f.id));
497
+ const changedFilePaths = new Set(changedFiles.map((f) => f.rel));
498
+ const fileContents = new Map(changedFiles.map((f) => [f.rel, f.content]));
499
+ // Prefer the freshly-parsed nodes; fall back to the (possibly stale)
500
+ // call-graph nodes for the changed files when no edge store seeded them.
501
+ const nodes = changedNodes.length > 0
502
+ ? changedNodes
503
+ : (cg.nodes ?? []).filter((n) => changedFilePaths.has(n.filePath));
504
+ const { embedded, reused, total, hasEmbeddings } = await VectorIndex.updateFiles(this.outputPath, nodes, changedFilePaths, context.signatures ?? [], hubIds, entryIds, embedSvc, fileContents);
505
+ if (this.debug) {
506
+ process.stderr.write(hasEmbeddings
507
+ ? `[mcp-watcher] re-embedded ${changedFilePaths.size} file(s): ${embedded} new, ${reused} reused\n`
508
+ : `[mcp-watcher] refreshed BM25 index for ${changedFilePaths.size} file(s): ${total} functions\n`);
509
+ }
262
510
  }
263
511
  catch (err) {
264
512
  process.stderr.write(`[mcp-watcher] embed error: ${err.message}\n`);
265
513
  }
266
514
  }
515
+ // ── Helpers ──────────────────────────────────────────────────────────────────
516
+ /** Bounded count of watched source files; stops early once `cap` is exceeded. */
517
+ async countSourceFiles(cap) {
518
+ let count = 0;
519
+ const walk = async (dir) => {
520
+ if (count > cap)
521
+ return;
522
+ let entries;
523
+ try {
524
+ entries = await readdir(dir, { withFileTypes: true });
525
+ }
526
+ catch {
527
+ return;
528
+ }
529
+ for (const entry of entries) {
530
+ if (count > cap)
531
+ return;
532
+ const abs = join(dir, entry.name);
533
+ const rel = relative(this.rootPath, abs);
534
+ if (entry.isDirectory()) {
535
+ if (!isIgnoredRelPath(rel))
536
+ await walk(abs);
537
+ }
538
+ else if (entry.isFile() && SOURCE_EXTENSIONS.test(entry.name) && !isIgnoredRelPath(rel)) {
539
+ count++;
540
+ }
541
+ }
542
+ };
543
+ await walk(this.rootPath);
544
+ return count;
545
+ }
267
546
  }
268
- // ── Helpers ───────────────────────────────────────────────────────────────────
547
+ // ── Module helpers ──────────────────────────────────────────────────────────────
269
548
  function isTestFile(relPath) {
270
549
  return (relPath.includes('.test.') ||
271
550
  relPath.includes('.spec.') ||
@@ -300,7 +579,7 @@ async function buildGraphSubset(changedRel, changedContent, callerFiles, rootDir
300
579
  const builder = new CallGraphBuilder();
301
580
  const result = await builder.build(files, undefined, undefined, resolutionNodes);
302
581
  // Only return nodes from changedFile — callerFiles nodes are already in DB and unchanged
303
- const changedNodes = Array.from(result.nodes.values()).filter(n => n.filePath === changedRel);
582
+ const changedNodes = Array.from(result.nodes.values()).filter((n) => n.filePath === changedRel);
304
583
  return { edges: result.edges, nodes: changedNodes };
305
584
  }
306
585
  //# sourceMappingURL=mcp-watcher.js.map