openlore 2.0.5 → 2.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/commands/mcp.d.ts.map +1 -1
- package/dist/cli/commands/mcp.js +3 -0
- package/dist/cli/commands/mcp.js.map +1 -1
- package/dist/constants.d.ts +22 -0
- package/dist/constants.d.ts.map +1 -1
- package/dist/constants.js +27 -0
- package/dist/constants.js.map +1 -1
- package/dist/core/analyzer/vector-index.d.ts +20 -0
- package/dist/core/analyzer/vector-index.d.ts.map +1 -1
- package/dist/core/analyzer/vector-index.js +173 -0
- package/dist/core/analyzer/vector-index.js.map +1 -1
- package/dist/core/services/mcp-handlers/utils.d.ts +16 -0
- package/dist/core/services/mcp-handlers/utils.d.ts.map +1 -1
- package/dist/core/services/mcp-handlers/utils.js +32 -0
- package/dist/core/services/mcp-handlers/utils.js.map +1 -1
- package/dist/core/services/mcp-watcher.d.ts +90 -6
- package/dist/core/services/mcp-watcher.d.ts.map +1 -1
- package/dist/core/services/mcp-watcher.js +380 -101
- package/dist/core/services/mcp-watcher.js.map +1 -1
- package/examples/drift-demo/package.json +1 -1
- package/package.json +2 -1
|
@@ -8,14 +8,29 @@
|
|
|
8
8
|
* The call graph is deliberately excluded — rebuilding it requires full
|
|
9
9
|
* tree-sitter analysis of all call sites and is too expensive for a watch loop.
|
|
10
10
|
* It stays current via the post-commit hook (openlore analyze --force --embed).
|
|
11
|
+
*
|
|
12
|
+
* Spec 13.1 (watch-mode performance): freshness is O(change), not O(repo).
|
|
13
|
+
* • Per-file events COALESCE into one batched flush (single debounce timer +
|
|
14
|
+
* hard max-batch ceiling), so a burst / branch-switch runs the pipeline once,
|
|
15
|
+
* not once per file.
|
|
16
|
+
* • The patched llm-context is handed to the MCP read cache in place
|
|
17
|
+
* (primeContextCache), so the next tool call is a cache HIT — no 2.1 MB
|
|
18
|
+
* cold re-parse — even after the disk write.
|
|
19
|
+
* • Vector updates are row-level (VectorIndex.updateFiles), not a full-corpus
|
|
20
|
+
* read+overwrite, and run on a separate lower-priority lane so signature
|
|
21
|
+
* freshness never blocks on embedding.
|
|
22
|
+
* • VCS-flood / bulk batches are detected and collapsed to a single refresh.
|
|
23
|
+
* • stderr emits one summary line per batch by default (per-file detail behind
|
|
24
|
+
* OPENLORE_WATCH_DEBUG).
|
|
11
25
|
*/
|
|
12
|
-
import { readFile, writeFile } from 'node:fs/promises';
|
|
26
|
+
import { readFile, writeFile, readdir } from 'node:fs/promises';
|
|
13
27
|
import { createHash } from 'node:crypto';
|
|
14
28
|
import { join, relative } from 'node:path';
|
|
15
29
|
import chokidar from 'chokidar';
|
|
16
30
|
import { extractSignatures, detectLanguage } from '../analyzer/signature-extractor.js';
|
|
17
31
|
import { EdgeStore } from './edge-store.js';
|
|
18
|
-
import {
|
|
32
|
+
import { primeContextCache } from './mcp-handlers/utils.js';
|
|
33
|
+
import { OPENLORE_DIR, OPENLORE_ANALYSIS_SUBDIR, ARTIFACT_LLM_CONTEXT, WATCH_DEBOUNCE_MS, WATCH_MAX_BATCH_MS, WATCH_BULK_THRESHOLD, WATCH_EMBED_FILE_CEILING, WATCH_VCS_SETTLE_MS, } from '../../constants.js';
|
|
19
34
|
const CALL_GRAPH_LANGS = new Set([
|
|
20
35
|
'Python', 'TypeScript', 'JavaScript', 'Go', 'Rust', 'Ruby', 'Java', 'C++', 'Swift',
|
|
21
36
|
]);
|
|
@@ -82,20 +97,54 @@ export function isIgnoredRelPath(relPath) {
|
|
|
82
97
|
export class McpWatcher {
|
|
83
98
|
rootPath;
|
|
84
99
|
outputPath;
|
|
100
|
+
contextPath;
|
|
85
101
|
debounceMs;
|
|
102
|
+
maxBatchMs;
|
|
103
|
+
bulkThreshold;
|
|
104
|
+
embedFileCeiling;
|
|
86
105
|
extraIgnore;
|
|
106
|
+
debug;
|
|
87
107
|
fsWatcher;
|
|
88
|
-
|
|
89
|
-
|
|
108
|
+
gitWatcher;
|
|
109
|
+
// ── Coalescing queue (Step 1) ──────────────────────────────────────────────
|
|
110
|
+
pending = new Set(); // absolute paths awaiting a flush
|
|
111
|
+
debounceTimer;
|
|
112
|
+
maxBatchTimer;
|
|
113
|
+
running = false; // single-flight for the signature flush
|
|
114
|
+
vcsBulkFlag = false; // set by the .git ref watcher
|
|
115
|
+
// ── Embedding lane (Step 4 — decoupled, lower priority) ─────────────────────
|
|
116
|
+
embed;
|
|
117
|
+
embedDegraded = false; // auto-degraded on a too-large tree
|
|
118
|
+
embedFiles = new Map(); // rel → content awaiting embed
|
|
119
|
+
embedNodes = new Map(); // id → node awaiting embed
|
|
120
|
+
embedTimer;
|
|
121
|
+
embedRunning = false;
|
|
122
|
+
lastEmbedContext;
|
|
90
123
|
constructor(options) {
|
|
91
124
|
this.rootPath = options.rootPath;
|
|
92
125
|
this.outputPath = options.outputPath
|
|
93
126
|
?? join(options.rootPath, OPENLORE_DIR, OPENLORE_ANALYSIS_SUBDIR);
|
|
94
|
-
this.
|
|
127
|
+
this.contextPath = join(this.outputPath, ARTIFACT_LLM_CONTEXT);
|
|
128
|
+
this.debounceMs = options.debounceMs ?? WATCH_DEBOUNCE_MS;
|
|
129
|
+
this.maxBatchMs = options.maxBatchMs ?? WATCH_MAX_BATCH_MS;
|
|
130
|
+
this.bulkThreshold = options.bulkThreshold ?? WATCH_BULK_THRESHOLD;
|
|
131
|
+
this.embedFileCeiling = options.embedFileCeiling ?? WATCH_EMBED_FILE_CEILING;
|
|
132
|
+
this.embed = options.embed ?? true;
|
|
95
133
|
this.extraIgnore = options.ignore ?? [];
|
|
134
|
+
this.debug = !!process.env.OPENLORE_WATCH_DEBUG;
|
|
96
135
|
}
|
|
97
136
|
// ── Lifecycle ──────────────────────────────────────────────────────────────
|
|
98
137
|
async start() {
|
|
138
|
+
// Auto-degrade live embedding on very large trees (Step 4). Counting is
|
|
139
|
+
// bounded — it stops as soon as the ceiling is exceeded.
|
|
140
|
+
if (this.embed) {
|
|
141
|
+
const count = await this.countSourceFiles(this.embedFileCeiling + 1);
|
|
142
|
+
if (count > this.embedFileCeiling) {
|
|
143
|
+
this.embedDegraded = true;
|
|
144
|
+
process.stderr.write(`[mcp-watcher] ${count}+ source files exceed the live-embed ceiling ` +
|
|
145
|
+
`(${this.embedFileCeiling}); running signatures-only — embeddings refresh at commit\n`);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
99
148
|
await new Promise((resolve, reject) => {
|
|
100
149
|
const extraIgnore = this.extraIgnore;
|
|
101
150
|
const rootPath = this.rootPath;
|
|
@@ -115,125 +164,315 @@ export class McpWatcher {
|
|
|
115
164
|
});
|
|
116
165
|
this.fsWatcher.on('change', (absPath) => {
|
|
117
166
|
if (SOURCE_EXTENSIONS.test(absPath)) {
|
|
118
|
-
this.
|
|
167
|
+
this.enqueue(absPath);
|
|
119
168
|
}
|
|
120
169
|
});
|
|
121
170
|
this.fsWatcher.on('ready', () => resolve());
|
|
122
171
|
this.fsWatcher.on('error', (err) => reject(err));
|
|
123
172
|
});
|
|
124
|
-
|
|
173
|
+
// Best-effort VCS-flood detection (Step 5): a branch switch / rebase / merge
|
|
174
|
+
// bumps these refs. We never recurse into .git (it stays ignored above); we
|
|
175
|
+
// watch only these specific files, then collapse the churn into one refresh.
|
|
176
|
+
try {
|
|
177
|
+
const gitDir = join(this.rootPath, '.git');
|
|
178
|
+
const refs = ['HEAD', 'index', 'MERGE_HEAD', 'ORIG_HEAD'].map((f) => join(gitDir, f));
|
|
179
|
+
this.gitWatcher = chokidar.watch(refs, {
|
|
180
|
+
persistent: true,
|
|
181
|
+
ignoreInitial: true,
|
|
182
|
+
followSymlinks: false,
|
|
183
|
+
});
|
|
184
|
+
this.gitWatcher.on('all', () => this.onVcsEvent());
|
|
185
|
+
}
|
|
186
|
+
catch {
|
|
187
|
+
// no .git, or watch failed — VCS detection falls back to the batch-size
|
|
188
|
+
// threshold in handleBatch, which is enough for G3.
|
|
189
|
+
}
|
|
190
|
+
process.stderr.write(`[mcp-watcher] watching ${this.rootPath}` +
|
|
191
|
+
`${this.embed && !this.embedDegraded ? '' : ' (signatures-only)'}\n`);
|
|
125
192
|
}
|
|
126
193
|
async stop() {
|
|
127
|
-
|
|
128
|
-
clearTimeout(
|
|
129
|
-
this.
|
|
194
|
+
if (this.debounceTimer)
|
|
195
|
+
clearTimeout(this.debounceTimer);
|
|
196
|
+
if (this.maxBatchTimer)
|
|
197
|
+
clearTimeout(this.maxBatchTimer);
|
|
198
|
+
if (this.embedTimer)
|
|
199
|
+
clearTimeout(this.embedTimer);
|
|
200
|
+
this.debounceTimer = this.maxBatchTimer = this.embedTimer = undefined;
|
|
201
|
+
// Best-effort: persist anything still queued so a save right before shutdown
|
|
202
|
+
// is not lost.
|
|
203
|
+
if (this.pending.size > 0 && !this.running) {
|
|
204
|
+
const batch = Array.from(this.pending);
|
|
205
|
+
this.pending.clear();
|
|
206
|
+
try {
|
|
207
|
+
await this.handleBatch(batch, { syncFlush: true });
|
|
208
|
+
}
|
|
209
|
+
catch { /* ignore */ }
|
|
210
|
+
}
|
|
130
211
|
await this.fsWatcher?.close();
|
|
212
|
+
await this.gitWatcher?.close();
|
|
131
213
|
process.stderr.write('[mcp-watcher] stopped\n');
|
|
132
214
|
}
|
|
133
|
-
// ──
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
215
|
+
// ── Coalescing (Step 1) ──────────────────────────────────────────────────────
|
|
216
|
+
/**
|
|
217
|
+
* Add a changed path to the pending set and (re)arm a single debounce timer,
|
|
218
|
+
* plus a one-shot hard ceiling so a continuous stream still flushes.
|
|
219
|
+
*/
|
|
220
|
+
enqueue(absPath) {
|
|
221
|
+
this.pending.add(absPath);
|
|
222
|
+
if (this.debounceTimer)
|
|
223
|
+
clearTimeout(this.debounceTimer);
|
|
224
|
+
this.debounceTimer = setTimeout(() => this.flush(), this.debounceMs);
|
|
225
|
+
if (!this.maxBatchTimer) {
|
|
226
|
+
this.maxBatchTimer = setTimeout(() => this.flush(), this.maxBatchMs);
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
/** A .git ref changed — settle, then flush whatever changed as one bulk batch. */
|
|
230
|
+
onVcsEvent() {
|
|
231
|
+
this.vcsBulkFlag = true;
|
|
232
|
+
if (this.debounceTimer)
|
|
233
|
+
clearTimeout(this.debounceTimer);
|
|
234
|
+
this.debounceTimer = setTimeout(() => this.flush(), WATCH_VCS_SETTLE_MS);
|
|
235
|
+
if (this.debug) {
|
|
236
|
+
process.stderr.write('[mcp-watcher] VCS operation detected — coalescing into one refresh\n');
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
/**
|
|
240
|
+
* Drain the pending set into a single batch. Single-flight: if a flush is
|
|
241
|
+
* already running, leave the new paths in `pending` and reschedule once it
|
|
242
|
+
* finishes — never interleave two flushes.
|
|
243
|
+
*/
|
|
244
|
+
flush() {
|
|
245
|
+
if (this.debounceTimer) {
|
|
246
|
+
clearTimeout(this.debounceTimer);
|
|
247
|
+
this.debounceTimer = undefined;
|
|
248
|
+
}
|
|
249
|
+
if (this.maxBatchTimer) {
|
|
250
|
+
clearTimeout(this.maxBatchTimer);
|
|
251
|
+
this.maxBatchTimer = undefined;
|
|
252
|
+
}
|
|
253
|
+
if (this.running)
|
|
254
|
+
return; // a follow-up is scheduled in finally{}
|
|
255
|
+
if (this.pending.size === 0)
|
|
256
|
+
return;
|
|
257
|
+
const batch = Array.from(this.pending);
|
|
258
|
+
this.pending.clear();
|
|
259
|
+
this.running = true;
|
|
260
|
+
this.handleBatch(batch)
|
|
261
|
+
.catch((err) => process.stderr.write(`[mcp-watcher] error: ${err.message}\n`))
|
|
262
|
+
.finally(() => {
|
|
263
|
+
this.running = false;
|
|
264
|
+
if (this.pending.size > 0) {
|
|
265
|
+
this.debounceTimer = setTimeout(() => this.flush(), this.debounceMs);
|
|
144
266
|
}
|
|
145
|
-
|
|
146
|
-
this.handleChange(absPath)
|
|
147
|
-
.catch(err => process.stderr.write(`[mcp-watcher] error: ${err.message}\n`))
|
|
148
|
-
.finally(() => { this.running = false; });
|
|
149
|
-
}, this.debounceMs);
|
|
150
|
-
this.timers.set(absPath, t);
|
|
267
|
+
});
|
|
151
268
|
}
|
|
152
269
|
// ── Core re-index ──────────────────────────────────────────────────────────
|
|
153
270
|
/**
|
|
154
|
-
* Re-index a single changed file.
|
|
155
|
-
*
|
|
271
|
+
* Re-index a single changed file. Exposed for unit testing without needing a
|
|
272
|
+
* real file watcher; flushes synchronously so callers observe the update on
|
|
273
|
+
* disk immediately. Internally this is just a batch of one.
|
|
156
274
|
*/
|
|
157
275
|
async handleChange(absPath) {
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
276
|
+
await this.handleBatch([absPath], { syncFlush: true });
|
|
277
|
+
}
|
|
278
|
+
/**
|
|
279
|
+
* Process a coalesced batch of changed files as ONE pipeline pass:
|
|
280
|
+
* • per-file incremental edge update (content-hash skip), all under one open
|
|
281
|
+
* EdgeStore;
|
|
282
|
+
* • ONE signature patch + ONE llm-context persist + ONE read-cache handoff;
|
|
283
|
+
* • ONE vector update (inline when syncFlush, else on the embed lane).
|
|
284
|
+
*/
|
|
285
|
+
async handleBatch(absPaths, opts = {}) {
|
|
286
|
+
const t0 = Date.now();
|
|
287
|
+
const consumedVcsBulk = this.vcsBulkFlag;
|
|
288
|
+
this.vcsBulkFlag = false;
|
|
289
|
+
// 1. Resolve + read candidate files (skip tests / unknown langs / deleted).
|
|
290
|
+
const files = [];
|
|
291
|
+
for (const abs of absPaths) {
|
|
292
|
+
const rel = relative(this.rootPath, abs);
|
|
293
|
+
if (isTestFile(rel))
|
|
294
|
+
continue;
|
|
295
|
+
if (detectLanguage(rel) === 'unknown')
|
|
296
|
+
continue;
|
|
297
|
+
let content;
|
|
298
|
+
try {
|
|
299
|
+
content = await readFile(abs, 'utf-8');
|
|
300
|
+
}
|
|
301
|
+
catch {
|
|
302
|
+
continue; // file may have been deleted between the event and now
|
|
303
|
+
}
|
|
304
|
+
files.push({ rel, abs, content });
|
|
171
305
|
}
|
|
172
|
-
|
|
306
|
+
if (files.length === 0)
|
|
307
|
+
return;
|
|
308
|
+
// 2. Incremental edge update (CGC _handle_modification algorithm), one open
|
|
309
|
+
// store for the whole batch. Content-hash skip drops no-op autosaves.
|
|
310
|
+
const changedFiles = [];
|
|
311
|
+
const changedNodes = [];
|
|
173
312
|
if (EdgeStore.exists(this.outputPath)) {
|
|
174
313
|
const store = EdgeStore.open(EdgeStore.dbPath(this.outputPath));
|
|
175
314
|
try {
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
store.
|
|
315
|
+
for (const f of files) {
|
|
316
|
+
const newHash = createHash('sha256').update(f.content).digest('hex');
|
|
317
|
+
if (store.getFileHash(f.rel) === newHash)
|
|
318
|
+
continue; // no-op autosave
|
|
319
|
+
// Reverse lookup BEFORE delete so we know which files call into this one.
|
|
320
|
+
const callerFiles = store.getCallerFiles(f.rel);
|
|
321
|
+
// Re-parse BEFORE mutating DB — graph stays readable (old state) during
|
|
322
|
+
// parse. Seed resolution with all known nodes so re-parsed callers'
|
|
323
|
+
// cross-file calls don't degrade to `external::`.
|
|
324
|
+
const resolutionNodes = store.getAllInternalNodes();
|
|
325
|
+
const { edges: newEdges, nodes: newNodes } = await buildGraphSubset(f.rel, f.content, callerFiles, this.rootPath, resolutionNodes);
|
|
326
|
+
// Atomic swap so concurrent MCP reads never see a torn graph.
|
|
327
|
+
store.transaction(() => {
|
|
328
|
+
store.deleteEdgesForFile(f.rel);
|
|
329
|
+
for (const cf of callerFiles.slice(0, CALLER_REPARSE_LIMIT)) {
|
|
330
|
+
store.deleteOutgoingEdgesForFile(cf);
|
|
331
|
+
}
|
|
332
|
+
store.deleteNodesForFile(f.rel);
|
|
333
|
+
store.insertNodes(newNodes);
|
|
334
|
+
store.insertEdges(newEdges);
|
|
335
|
+
store.setFileHash(f.rel, newHash);
|
|
336
|
+
});
|
|
337
|
+
changedFiles.push({ rel: f.rel, content: f.content });
|
|
338
|
+
for (const n of newNodes)
|
|
339
|
+
changedNodes.push(n);
|
|
340
|
+
if (this.debug) {
|
|
341
|
+
process.stderr.write(`[mcp-watcher] graph: ${f.rel} (+${newNodes.length} nodes, +${newEdges.length} edges, ${callerFiles.length} callers)\n`);
|
|
195
342
|
}
|
|
196
|
-
|
|
197
|
-
store.insertNodes(newNodes);
|
|
198
|
-
store.insertEdges(newEdges);
|
|
199
|
-
store.setFileHash(rel, newHash);
|
|
200
|
-
});
|
|
201
|
-
process.stderr.write(`[mcp-watcher] updated graph: ${rel} (+${newNodes.length} nodes, +${newEdges.length} edges, ${callerFiles.length} callers re-parsed)\n`);
|
|
343
|
+
}
|
|
202
344
|
}
|
|
203
345
|
finally {
|
|
204
346
|
store.close();
|
|
205
347
|
}
|
|
206
348
|
}
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
const raw = await readFile(contextPath, 'utf-8');
|
|
212
|
-
context = JSON.parse(raw);
|
|
349
|
+
else {
|
|
350
|
+
// No edge store yet — still refresh signatures for every candidate.
|
|
351
|
+
for (const f of files)
|
|
352
|
+
changedFiles.push({ rel: f.rel, content: f.content });
|
|
213
353
|
}
|
|
214
|
-
|
|
215
|
-
|
|
354
|
+
if (changedFiles.length === 0)
|
|
355
|
+
return; // every event was a no-op autosave
|
|
356
|
+
// 3. Signatures: load context (shared in-memory cache), patch all changed
|
|
357
|
+
// files, then ONE persist + read-cache handoff (Step 2). The handoff
|
|
358
|
+
// means the next tool call is a cache HIT — no cold 2.1 MB re-parse.
|
|
359
|
+
const context = await this.loadContext();
|
|
360
|
+
if (!context) {
|
|
361
|
+
process.stderr.write(`[mcp-watcher] no context at ${this.contextPath} — run analyze first\n`);
|
|
216
362
|
return;
|
|
217
363
|
}
|
|
218
|
-
const newMap = extractSignatures(rel, content);
|
|
219
364
|
if (!context.signatures)
|
|
220
365
|
context.signatures = [];
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
context.signatures
|
|
366
|
+
for (const f of changedFiles) {
|
|
367
|
+
const newMap = extractSignatures(f.rel, f.content);
|
|
368
|
+
const idx = context.signatures.findIndex((m) => m.path === f.rel);
|
|
369
|
+
if (idx >= 0)
|
|
370
|
+
context.signatures[idx] = newMap;
|
|
371
|
+
else
|
|
372
|
+
context.signatures.push(newMap);
|
|
224
373
|
}
|
|
225
|
-
|
|
226
|
-
|
|
374
|
+
await this.persistContext(context);
|
|
375
|
+
// 4. Vector update — decoupled from signature freshness (Step 4).
|
|
376
|
+
const isBulk = consumedVcsBulk || changedFiles.length >= this.bulkThreshold;
|
|
377
|
+
if (this.embed && !this.embedDegraded && context.callGraph) {
|
|
378
|
+
if (opts.syncFlush) {
|
|
379
|
+
// Direct handleChange path: inline so callers/tests observe it.
|
|
380
|
+
await this.updateVectors(context, changedFiles, changedNodes);
|
|
381
|
+
}
|
|
382
|
+
else {
|
|
383
|
+
// Watcher path: schedule on the lower-priority embed lane. On a bulk
|
|
384
|
+
// event this still collapses to a single deferred pass.
|
|
385
|
+
this.scheduleEmbed(context, changedFiles, changedNodes);
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
// 5. One summary line per batch (Step 6). Per-file detail is behind debug.
|
|
389
|
+
const n = changedFiles.length;
|
|
390
|
+
process.stderr.write(`[mcp-watcher] ${isBulk ? `coalesced ${n} changes` : `updated ${n} file${n === 1 ? '' : 's'}`} (${Date.now() - t0}ms)\n`);
|
|
391
|
+
}
|
|
392
|
+
// ── llm-context load + persistence + read-cache handoff (Step 2) ─────────────
|
|
393
|
+
/**
|
|
394
|
+
* True when this watcher writes to the canonical `<root>/.openlore/analysis`
|
|
395
|
+
* layout that the MCP read handlers cache against. Only then is the shared
|
|
396
|
+
* in-memory read cache (primeContextCache) the right channel to prime; a custom
|
|
397
|
+
* `outputPath` (tests / non-standard installs) writes only to disk.
|
|
398
|
+
*/
|
|
399
|
+
get usesStandardLayout() {
|
|
400
|
+
return this.outputPath === join(this.rootPath, OPENLORE_DIR, OPENLORE_ANALYSIS_SUBDIR);
|
|
401
|
+
}
|
|
402
|
+
/**
|
|
403
|
+
* Load the context the watcher is about to patch. This ALWAYS reads fresh from
|
|
404
|
+
* disk — never through the shared read cache — because the cache is a read-path
|
|
405
|
+
* (tool-call) optimization, and patching a possibly-stale cached object could
|
|
406
|
+
* silently drop signatures written by a concurrent `analyze` between events.
|
|
407
|
+
* The writer reads ground truth; persistContext then primes the read cache with
|
|
408
|
+
* the result so the next tool call is still a hit (Step 2a, G1).
|
|
409
|
+
*/
|
|
410
|
+
async loadContext() {
|
|
411
|
+
try {
|
|
412
|
+
const raw = await readFile(this.contextPath, 'utf-8');
|
|
413
|
+
return JSON.parse(raw);
|
|
414
|
+
}
|
|
415
|
+
catch {
|
|
416
|
+
return null;
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
async persistContext(context) {
|
|
420
|
+
// Strip the runtime-only EdgeStore handle before serializing.
|
|
421
|
+
const { edgeStore: _edgeStore, ...serializable } = context;
|
|
422
|
+
void _edgeStore;
|
|
423
|
+
await writeFile(this.contextPath, JSON.stringify(serializable, null, 2), 'utf-8');
|
|
424
|
+
// Hand the patched object back to the read cache, aligned to the new on-disk
|
|
425
|
+
// mtime, so the next tool call is a cache hit (no cold re-parse). This is the
|
|
426
|
+
// fix for root-cause item 2 (mtime bump forcing a full re-read). Only valid
|
|
427
|
+
// for the canonical layout the read handlers cache against.
|
|
428
|
+
if (this.usesStandardLayout)
|
|
429
|
+
await primeContextCache(this.rootPath, context);
|
|
430
|
+
}
|
|
431
|
+
// ── Embedding lane (Step 4) ──────────────────────────────────────────────────
|
|
432
|
+
scheduleEmbed(context, changedFiles, nodes) {
|
|
433
|
+
for (const f of changedFiles)
|
|
434
|
+
this.embedFiles.set(f.rel, f.content);
|
|
435
|
+
for (const node of nodes)
|
|
436
|
+
this.embedNodes.set(node.id, node);
|
|
437
|
+
this.lastEmbedContext = context;
|
|
438
|
+
if (this.embedTimer)
|
|
439
|
+
clearTimeout(this.embedTimer);
|
|
440
|
+
// Slightly behind the signature debounce so structural freshness always lands
|
|
441
|
+
// first and multiple flushes batch into one embed pass.
|
|
442
|
+
this.embedTimer = setTimeout(() => void this.runEmbedLane(), this.debounceMs);
|
|
443
|
+
}
|
|
444
|
+
async runEmbedLane() {
|
|
445
|
+
if (this.embedRunning) {
|
|
446
|
+
// Re-arm: drain again once the in-flight pass finishes.
|
|
447
|
+
this.embedTimer = setTimeout(() => void this.runEmbedLane(), this.debounceMs);
|
|
448
|
+
return;
|
|
449
|
+
}
|
|
450
|
+
if (this.embedFiles.size === 0 || !this.lastEmbedContext)
|
|
451
|
+
return;
|
|
452
|
+
const changedFiles = Array.from(this.embedFiles, ([rel, content]) => ({ rel, content }));
|
|
453
|
+
const nodes = Array.from(this.embedNodes.values());
|
|
454
|
+
const context = this.lastEmbedContext;
|
|
455
|
+
this.embedFiles.clear();
|
|
456
|
+
this.embedNodes.clear();
|
|
457
|
+
this.embedRunning = true;
|
|
458
|
+
try {
|
|
459
|
+
await this.updateVectors(context, changedFiles, nodes);
|
|
460
|
+
}
|
|
461
|
+
catch (err) {
|
|
462
|
+
process.stderr.write(`[mcp-watcher] embed error: ${err.message}\n`);
|
|
227
463
|
}
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
464
|
+
finally {
|
|
465
|
+
this.embedRunning = false;
|
|
466
|
+
if (this.embedFiles.size > 0) {
|
|
467
|
+
this.embedTimer = setTimeout(() => void this.runEmbedLane(), this.debounceMs);
|
|
468
|
+
}
|
|
233
469
|
}
|
|
234
470
|
}
|
|
235
|
-
|
|
236
|
-
|
|
471
|
+
/**
|
|
472
|
+
* Row-level vector update for the changed files only (Step 3). Falls back to a
|
|
473
|
+
* silent no-op when no embedding service and no index are available.
|
|
474
|
+
*/
|
|
475
|
+
async updateVectors(context, changedFiles, changedNodes) {
|
|
237
476
|
try {
|
|
238
477
|
const { VectorIndex } = await import('../analyzer/vector-index.js');
|
|
239
478
|
const { EmbeddingService } = await import('../analyzer/embedding-service.js');
|
|
@@ -248,24 +487,64 @@ export class McpWatcher {
|
|
|
248
487
|
const cfg = await readOpenLoreConfig(this.rootPath);
|
|
249
488
|
embedSvc = cfg ? EmbeddingService.fromConfig(cfg) : null;
|
|
250
489
|
}
|
|
251
|
-
// embedSvc may be null:
|
|
252
|
-
//
|
|
490
|
+
// embedSvc may be null: updateFiles then refreshes the BM25-only corpus
|
|
491
|
+
// rather than re-embedding, keeping the keyword index live in watch mode.
|
|
253
492
|
const cg = context.callGraph;
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
const
|
|
257
|
-
const
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
493
|
+
if (!cg)
|
|
494
|
+
return;
|
|
495
|
+
const hubIds = new Set((cg.hubFunctions ?? []).map((f) => f.id));
|
|
496
|
+
const entryIds = new Set((cg.entryPoints ?? []).map((f) => f.id));
|
|
497
|
+
const changedFilePaths = new Set(changedFiles.map((f) => f.rel));
|
|
498
|
+
const fileContents = new Map(changedFiles.map((f) => [f.rel, f.content]));
|
|
499
|
+
// Prefer the freshly-parsed nodes; fall back to the (possibly stale)
|
|
500
|
+
// call-graph nodes for the changed files when no edge store seeded them.
|
|
501
|
+
const nodes = changedNodes.length > 0
|
|
502
|
+
? changedNodes
|
|
503
|
+
: (cg.nodes ?? []).filter((n) => changedFilePaths.has(n.filePath));
|
|
504
|
+
const { embedded, reused, total, hasEmbeddings } = await VectorIndex.updateFiles(this.outputPath, nodes, changedFilePaths, context.signatures ?? [], hubIds, entryIds, embedSvc, fileContents);
|
|
505
|
+
if (this.debug) {
|
|
506
|
+
process.stderr.write(hasEmbeddings
|
|
507
|
+
? `[mcp-watcher] re-embedded ${changedFilePaths.size} file(s): ${embedded} new, ${reused} reused\n`
|
|
508
|
+
: `[mcp-watcher] refreshed BM25 index for ${changedFilePaths.size} file(s): ${total} functions\n`);
|
|
509
|
+
}
|
|
262
510
|
}
|
|
263
511
|
catch (err) {
|
|
264
512
|
process.stderr.write(`[mcp-watcher] embed error: ${err.message}\n`);
|
|
265
513
|
}
|
|
266
514
|
}
|
|
515
|
+
// ── Helpers ──────────────────────────────────────────────────────────────────
|
|
516
|
+
/** Bounded count of watched source files; stops early once `cap` is exceeded. */
|
|
517
|
+
async countSourceFiles(cap) {
|
|
518
|
+
let count = 0;
|
|
519
|
+
const walk = async (dir) => {
|
|
520
|
+
if (count > cap)
|
|
521
|
+
return;
|
|
522
|
+
let entries;
|
|
523
|
+
try {
|
|
524
|
+
entries = await readdir(dir, { withFileTypes: true });
|
|
525
|
+
}
|
|
526
|
+
catch {
|
|
527
|
+
return;
|
|
528
|
+
}
|
|
529
|
+
for (const entry of entries) {
|
|
530
|
+
if (count > cap)
|
|
531
|
+
return;
|
|
532
|
+
const abs = join(dir, entry.name);
|
|
533
|
+
const rel = relative(this.rootPath, abs);
|
|
534
|
+
if (entry.isDirectory()) {
|
|
535
|
+
if (!isIgnoredRelPath(rel))
|
|
536
|
+
await walk(abs);
|
|
537
|
+
}
|
|
538
|
+
else if (entry.isFile() && SOURCE_EXTENSIONS.test(entry.name) && !isIgnoredRelPath(rel)) {
|
|
539
|
+
count++;
|
|
540
|
+
}
|
|
541
|
+
}
|
|
542
|
+
};
|
|
543
|
+
await walk(this.rootPath);
|
|
544
|
+
return count;
|
|
545
|
+
}
|
|
267
546
|
}
|
|
268
|
-
// ──
|
|
547
|
+
// ── Module helpers ──────────────────────────────────────────────────────────────
|
|
269
548
|
function isTestFile(relPath) {
|
|
270
549
|
return (relPath.includes('.test.') ||
|
|
271
550
|
relPath.includes('.spec.') ||
|
|
@@ -300,7 +579,7 @@ async function buildGraphSubset(changedRel, changedContent, callerFiles, rootDir
|
|
|
300
579
|
const builder = new CallGraphBuilder();
|
|
301
580
|
const result = await builder.build(files, undefined, undefined, resolutionNodes);
|
|
302
581
|
// Only return nodes from changedFile — callerFiles nodes are already in DB and unchanged
|
|
303
|
-
const changedNodes = Array.from(result.nodes.values()).filter(n => n.filePath === changedRel);
|
|
582
|
+
const changedNodes = Array.from(result.nodes.values()).filter((n) => n.filePath === changedRel);
|
|
304
583
|
return { edges: result.edges, nodes: changedNodes };
|
|
305
584
|
}
|
|
306
585
|
//# sourceMappingURL=mcp-watcher.js.map
|