openlore 2.0.4 → 2.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -7
- package/dist/cli/commands/analyze.d.ts.map +1 -1
- package/dist/cli/commands/analyze.js +38 -37
- package/dist/cli/commands/analyze.js.map +1 -1
- package/dist/cli/commands/mcp.d.ts.map +1 -1
- package/dist/cli/commands/mcp.js +4 -0
- package/dist/cli/commands/mcp.js.map +1 -1
- package/dist/cli/commands/orient.d.ts +15 -0
- package/dist/cli/commands/orient.d.ts.map +1 -0
- package/dist/cli/commands/orient.js +156 -0
- package/dist/cli/commands/orient.js.map +1 -0
- package/dist/cli/index.js +2 -0
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/install/index.d.ts +6 -0
- package/dist/cli/install/index.d.ts.map +1 -1
- package/dist/cli/install/index.js +71 -1
- package/dist/cli/install/index.js.map +1 -1
- package/dist/constants.d.ts +22 -0
- package/dist/constants.d.ts.map +1 -1
- package/dist/constants.js +27 -0
- package/dist/constants.js.map +1 -1
- package/dist/core/analyzer/vector-index.d.ts +20 -0
- package/dist/core/analyzer/vector-index.d.ts.map +1 -1
- package/dist/core/analyzer/vector-index.js +173 -0
- package/dist/core/analyzer/vector-index.js.map +1 -1
- package/dist/core/services/mcp-handlers/utils.d.ts +16 -0
- package/dist/core/services/mcp-handlers/utils.d.ts.map +1 -1
- package/dist/core/services/mcp-handlers/utils.js +32 -0
- package/dist/core/services/mcp-handlers/utils.js.map +1 -1
- package/dist/core/services/mcp-watcher.d.ts +99 -6
- package/dist/core/services/mcp-watcher.d.ts.map +1 -1
- package/dist/core/services/mcp-watcher.js +438 -111
- package/dist/core/services/mcp-watcher.js.map +1 -1
- package/examples/drift-demo/package.json +1 -1
- package/package.json +2 -1
|
@@ -8,31 +8,87 @@
|
|
|
8
8
|
* The call graph is deliberately excluded — rebuilding it requires full
|
|
9
9
|
* tree-sitter analysis of all call sites and is too expensive for a watch loop.
|
|
10
10
|
* It stays current via the post-commit hook (openlore analyze --force --embed).
|
|
11
|
+
*
|
|
12
|
+
* Spec 13.1 (watch-mode performance): freshness is O(change), not O(repo).
|
|
13
|
+
* • Per-file events COALESCE into one batched flush (single debounce timer +
|
|
14
|
+
* hard max-batch ceiling), so a burst / branch-switch runs the pipeline once,
|
|
15
|
+
* not once per file.
|
|
16
|
+
* • The patched llm-context is handed to the MCP read cache in place
|
|
17
|
+
* (primeContextCache), so the next tool call is a cache HIT — no 2.1 MB
|
|
18
|
+
* cold re-parse — even after the disk write.
|
|
19
|
+
* • Vector updates are row-level (VectorIndex.updateFiles), not a full-corpus
|
|
20
|
+
* read+overwrite, and run on a separate lower-priority lane so signature
|
|
21
|
+
* freshness never blocks on embedding.
|
|
22
|
+
* • VCS-flood / bulk batches are detected and collapsed to a single refresh.
|
|
23
|
+
* • stderr emits one summary line per batch by default (per-file detail behind
|
|
24
|
+
* OPENLORE_WATCH_DEBUG).
|
|
11
25
|
*/
|
|
12
|
-
import { readFile, writeFile } from 'node:fs/promises';
|
|
26
|
+
import { readFile, writeFile, readdir } from 'node:fs/promises';
|
|
13
27
|
import { createHash } from 'node:crypto';
|
|
14
28
|
import { join, relative } from 'node:path';
|
|
15
29
|
import chokidar from 'chokidar';
|
|
16
30
|
import { extractSignatures, detectLanguage } from '../analyzer/signature-extractor.js';
|
|
17
31
|
import { EdgeStore } from './edge-store.js';
|
|
18
|
-
import {
|
|
32
|
+
import { primeContextCache } from './mcp-handlers/utils.js';
|
|
33
|
+
import { OPENLORE_DIR, OPENLORE_ANALYSIS_SUBDIR, ARTIFACT_LLM_CONTEXT, WATCH_DEBOUNCE_MS, WATCH_MAX_BATCH_MS, WATCH_BULK_THRESHOLD, WATCH_EMBED_FILE_CEILING, WATCH_VCS_SETTLE_MS, } from '../../constants.js';
|
|
19
34
|
const CALL_GRAPH_LANGS = new Set([
|
|
20
35
|
'Python', 'TypeScript', 'JavaScript', 'Go', 'Rust', 'Ruby', 'Java', 'C++', 'Swift',
|
|
21
36
|
]);
|
|
22
37
|
/** Max callerFiles to re-parse in a single watch event (guards against high-fanIn renames). */
|
|
23
38
|
const CALLER_REPARSE_LIMIT = 10;
|
|
24
39
|
const SOURCE_EXTENSIONS = /\.(ts|tsx|js|jsx|py|go|rs|rb|java|kt|php|cs|cpp|cc|cxx|h|hpp|c|swift)$/;
|
|
25
|
-
//
|
|
26
|
-
//
|
|
27
|
-
|
|
40
|
+
// Directory NAMES that must never be watched. Build-output and dependency
|
|
41
|
+
// directories can hold hundreds of thousands of files (a Rust `target/` is
|
|
42
|
+
// routinely tens of GB), so watching them is both wasteful and a hard EMFILE
|
|
43
|
+
// trigger on the first tool call.
|
|
44
|
+
//
|
|
45
|
+
// Matched against root-RELATIVE path segments (see isIgnoredRelPath), which is
|
|
46
|
+
// what makes this robust:
|
|
47
|
+
// • The ignored directory ITSELF matches (not just its children), so chokidar
|
|
48
|
+
// prunes the whole subtree and never opens FDs inside it — the actual EMFILE
|
|
49
|
+
// fix. A naive `path.includes('/target/')` check only matches descendants,
|
|
50
|
+
// so chokidar still descends into target/ and readdir-storms before pruning.
|
|
51
|
+
// • Only segments BELOW the watch root are considered, so a repo that happens
|
|
52
|
+
// to live under e.g. /home/user/dist/myapp is not wrongly ignored.
|
|
53
|
+
const IGNORED_DIR_NAMES = new Set([
|
|
54
|
+
// VCS / openlore
|
|
55
|
+
'.git', '.hg', '.svn', '.openlore',
|
|
56
|
+
// JS / TS
|
|
57
|
+
'node_modules', 'dist', 'build', '.next', '.nuxt', '.svelte-kit',
|
|
58
|
+
'.turbo', '.parcel-cache', '.cache', 'coverage', '.vite',
|
|
59
|
+
// Rust
|
|
60
|
+
'target',
|
|
61
|
+
// Python
|
|
62
|
+
'.venv', 'venv', '__pycache__', '.mypy_cache', '.pytest_cache',
|
|
63
|
+
'.tox', '.ruff_cache',
|
|
64
|
+
// Go / vendored deps
|
|
65
|
+
'vendor',
|
|
66
|
+
// JVM
|
|
67
|
+
'.gradle',
|
|
68
|
+
// .NET
|
|
69
|
+
'obj',
|
|
70
|
+
// Editor metadata
|
|
71
|
+
'.idea',
|
|
72
|
+
]);
|
|
28
73
|
const IGNORED_SUFFIXES = ['.test.ts', '.test.js', '.spec.ts', '.spec.js'];
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
74
|
+
/**
|
|
75
|
+
* True if a root-relative path should never be watched. Evaluated as a cheap
|
|
76
|
+
* segment scan before any FD is opened, so it stays allocation-light. A path is
|
|
77
|
+
* ignored if ANY of its segments is a known build/dependency/VCS directory
|
|
78
|
+
* name, or it has a test-file suffix. Exported for testing.
|
|
79
|
+
*
|
|
80
|
+
* @param relPath path relative to the watch root (forward- or back-slashed)
|
|
81
|
+
*/
|
|
82
|
+
export function isIgnoredRelPath(relPath) {
|
|
83
|
+
if (!relPath || relPath === '.')
|
|
84
|
+
return false;
|
|
85
|
+
const segments = relPath.split(/[/\\]/);
|
|
86
|
+
for (const seg of segments) {
|
|
87
|
+
if (IGNORED_DIR_NAMES.has(seg))
|
|
32
88
|
return true;
|
|
33
89
|
}
|
|
34
90
|
for (const suf of IGNORED_SUFFIXES) {
|
|
35
|
-
if (
|
|
91
|
+
if (relPath.endsWith(suf))
|
|
36
92
|
return true;
|
|
37
93
|
}
|
|
38
94
|
return false;
|
|
@@ -41,25 +97,66 @@ function isIgnoredPath(filePath) {
|
|
|
41
97
|
export class McpWatcher {
|
|
42
98
|
rootPath;
|
|
43
99
|
outputPath;
|
|
100
|
+
contextPath;
|
|
44
101
|
debounceMs;
|
|
102
|
+
maxBatchMs;
|
|
103
|
+
bulkThreshold;
|
|
104
|
+
embedFileCeiling;
|
|
45
105
|
extraIgnore;
|
|
106
|
+
debug;
|
|
46
107
|
fsWatcher;
|
|
47
|
-
|
|
48
|
-
|
|
108
|
+
gitWatcher;
|
|
109
|
+
// ── Coalescing queue (Step 1) ──────────────────────────────────────────────
|
|
110
|
+
pending = new Set(); // absolute paths awaiting a flush
|
|
111
|
+
debounceTimer;
|
|
112
|
+
maxBatchTimer;
|
|
113
|
+
running = false; // single-flight for the signature flush
|
|
114
|
+
vcsBulkFlag = false; // set by the .git ref watcher
|
|
115
|
+
// ── Embedding lane (Step 4 — decoupled, lower priority) ─────────────────────
|
|
116
|
+
embed;
|
|
117
|
+
embedDegraded = false; // auto-degraded on a too-large tree
|
|
118
|
+
embedFiles = new Map(); // rel → content awaiting embed
|
|
119
|
+
embedNodes = new Map(); // id → node awaiting embed
|
|
120
|
+
embedTimer;
|
|
121
|
+
embedRunning = false;
|
|
122
|
+
lastEmbedContext;
|
|
49
123
|
constructor(options) {
|
|
50
124
|
this.rootPath = options.rootPath;
|
|
51
125
|
this.outputPath = options.outputPath
|
|
52
126
|
?? join(options.rootPath, OPENLORE_DIR, OPENLORE_ANALYSIS_SUBDIR);
|
|
53
|
-
this.
|
|
127
|
+
this.contextPath = join(this.outputPath, ARTIFACT_LLM_CONTEXT);
|
|
128
|
+
this.debounceMs = options.debounceMs ?? WATCH_DEBOUNCE_MS;
|
|
129
|
+
this.maxBatchMs = options.maxBatchMs ?? WATCH_MAX_BATCH_MS;
|
|
130
|
+
this.bulkThreshold = options.bulkThreshold ?? WATCH_BULK_THRESHOLD;
|
|
131
|
+
this.embedFileCeiling = options.embedFileCeiling ?? WATCH_EMBED_FILE_CEILING;
|
|
132
|
+
this.embed = options.embed ?? true;
|
|
54
133
|
this.extraIgnore = options.ignore ?? [];
|
|
134
|
+
this.debug = !!process.env.OPENLORE_WATCH_DEBUG;
|
|
55
135
|
}
|
|
56
136
|
// ── Lifecycle ──────────────────────────────────────────────────────────────
|
|
57
137
|
async start() {
|
|
138
|
+
// Auto-degrade live embedding on very large trees (Step 4). Counting is
|
|
139
|
+
// bounded — it stops as soon as the ceiling is exceeded.
|
|
140
|
+
if (this.embed) {
|
|
141
|
+
const count = await this.countSourceFiles(this.embedFileCeiling + 1);
|
|
142
|
+
if (count > this.embedFileCeiling) {
|
|
143
|
+
this.embedDegraded = true;
|
|
144
|
+
process.stderr.write(`[mcp-watcher] ${count}+ source files exceed the live-embed ceiling ` +
|
|
145
|
+
`(${this.embedFileCeiling}); running signatures-only — embeddings refresh at commit\n`);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
58
148
|
await new Promise((resolve, reject) => {
|
|
59
149
|
const extraIgnore = this.extraIgnore;
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
150
|
+
const rootPath = this.rootPath;
|
|
151
|
+
this.fsWatcher = chokidar.watch(rootPath, {
|
|
152
|
+
// Resolve each candidate to a root-relative path first, then prune by
|
|
153
|
+
// directory name. This prunes the ignored directory itself (chokidar
|
|
154
|
+
// never opens FDs inside it — the EMFILE fix) without false-matching on
|
|
155
|
+
// parent path components above the watch root.
|
|
156
|
+
ignored: (filePath) => {
|
|
157
|
+
const rel = relative(rootPath, filePath);
|
|
158
|
+
return isIgnoredRelPath(rel) || extraIgnore.some((p) => rel.includes(p));
|
|
159
|
+
},
|
|
63
160
|
persistent: true,
|
|
64
161
|
ignoreInitial: true,
|
|
65
162
|
followSymlinks: false,
|
|
@@ -67,125 +164,315 @@ export class McpWatcher {
|
|
|
67
164
|
});
|
|
68
165
|
this.fsWatcher.on('change', (absPath) => {
|
|
69
166
|
if (SOURCE_EXTENSIONS.test(absPath)) {
|
|
70
|
-
this.
|
|
167
|
+
this.enqueue(absPath);
|
|
71
168
|
}
|
|
72
169
|
});
|
|
73
170
|
this.fsWatcher.on('ready', () => resolve());
|
|
74
171
|
this.fsWatcher.on('error', (err) => reject(err));
|
|
75
172
|
});
|
|
76
|
-
|
|
173
|
+
// Best-effort VCS-flood detection (Step 5): a branch switch / rebase / merge
|
|
174
|
+
// bumps these refs. We never recurse into .git (it stays ignored above); we
|
|
175
|
+
// watch only these specific files, then collapse the churn into one refresh.
|
|
176
|
+
try {
|
|
177
|
+
const gitDir = join(this.rootPath, '.git');
|
|
178
|
+
const refs = ['HEAD', 'index', 'MERGE_HEAD', 'ORIG_HEAD'].map((f) => join(gitDir, f));
|
|
179
|
+
this.gitWatcher = chokidar.watch(refs, {
|
|
180
|
+
persistent: true,
|
|
181
|
+
ignoreInitial: true,
|
|
182
|
+
followSymlinks: false,
|
|
183
|
+
});
|
|
184
|
+
this.gitWatcher.on('all', () => this.onVcsEvent());
|
|
185
|
+
}
|
|
186
|
+
catch {
|
|
187
|
+
// no .git, or watch failed — VCS detection falls back to the batch-size
|
|
188
|
+
// threshold in handleBatch, which is enough for G3.
|
|
189
|
+
}
|
|
190
|
+
process.stderr.write(`[mcp-watcher] watching ${this.rootPath}` +
|
|
191
|
+
`${this.embed && !this.embedDegraded ? '' : ' (signatures-only)'}\n`);
|
|
77
192
|
}
|
|
78
193
|
async stop() {
|
|
79
|
-
|
|
80
|
-
clearTimeout(
|
|
81
|
-
this.
|
|
194
|
+
if (this.debounceTimer)
|
|
195
|
+
clearTimeout(this.debounceTimer);
|
|
196
|
+
if (this.maxBatchTimer)
|
|
197
|
+
clearTimeout(this.maxBatchTimer);
|
|
198
|
+
if (this.embedTimer)
|
|
199
|
+
clearTimeout(this.embedTimer);
|
|
200
|
+
this.debounceTimer = this.maxBatchTimer = this.embedTimer = undefined;
|
|
201
|
+
// Best-effort: persist anything still queued so a save right before shutdown
|
|
202
|
+
// is not lost.
|
|
203
|
+
if (this.pending.size > 0 && !this.running) {
|
|
204
|
+
const batch = Array.from(this.pending);
|
|
205
|
+
this.pending.clear();
|
|
206
|
+
try {
|
|
207
|
+
await this.handleBatch(batch, { syncFlush: true });
|
|
208
|
+
}
|
|
209
|
+
catch { /* ignore */ }
|
|
210
|
+
}
|
|
82
211
|
await this.fsWatcher?.close();
|
|
212
|
+
await this.gitWatcher?.close();
|
|
83
213
|
process.stderr.write('[mcp-watcher] stopped\n');
|
|
84
214
|
}
|
|
85
|
-
// ──
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
215
|
+
// ── Coalescing (Step 1) ──────────────────────────────────────────────────────
|
|
216
|
+
/**
|
|
217
|
+
* Add a changed path to the pending set and (re)arm a single debounce timer,
|
|
218
|
+
* plus a one-shot hard ceiling so a continuous stream still flushes.
|
|
219
|
+
*/
|
|
220
|
+
enqueue(absPath) {
|
|
221
|
+
this.pending.add(absPath);
|
|
222
|
+
if (this.debounceTimer)
|
|
223
|
+
clearTimeout(this.debounceTimer);
|
|
224
|
+
this.debounceTimer = setTimeout(() => this.flush(), this.debounceMs);
|
|
225
|
+
if (!this.maxBatchTimer) {
|
|
226
|
+
this.maxBatchTimer = setTimeout(() => this.flush(), this.maxBatchMs);
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
/** A .git ref changed — settle, then flush whatever changed as one bulk batch. */
|
|
230
|
+
onVcsEvent() {
|
|
231
|
+
this.vcsBulkFlag = true;
|
|
232
|
+
if (this.debounceTimer)
|
|
233
|
+
clearTimeout(this.debounceTimer);
|
|
234
|
+
this.debounceTimer = setTimeout(() => this.flush(), WATCH_VCS_SETTLE_MS);
|
|
235
|
+
if (this.debug) {
|
|
236
|
+
process.stderr.write('[mcp-watcher] VCS operation detected — coalescing into one refresh\n');
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
/**
|
|
240
|
+
* Drain the pending set into a single batch. Single-flight: if a flush is
|
|
241
|
+
* already running, leave the new paths in `pending` and reschedule once it
|
|
242
|
+
* finishes — never interleave two flushes.
|
|
243
|
+
*/
|
|
244
|
+
flush() {
|
|
245
|
+
if (this.debounceTimer) {
|
|
246
|
+
clearTimeout(this.debounceTimer);
|
|
247
|
+
this.debounceTimer = undefined;
|
|
248
|
+
}
|
|
249
|
+
if (this.maxBatchTimer) {
|
|
250
|
+
clearTimeout(this.maxBatchTimer);
|
|
251
|
+
this.maxBatchTimer = undefined;
|
|
252
|
+
}
|
|
253
|
+
if (this.running)
|
|
254
|
+
return; // a follow-up is scheduled in finally{}
|
|
255
|
+
if (this.pending.size === 0)
|
|
256
|
+
return;
|
|
257
|
+
const batch = Array.from(this.pending);
|
|
258
|
+
this.pending.clear();
|
|
259
|
+
this.running = true;
|
|
260
|
+
this.handleBatch(batch)
|
|
261
|
+
.catch((err) => process.stderr.write(`[mcp-watcher] error: ${err.message}\n`))
|
|
262
|
+
.finally(() => {
|
|
263
|
+
this.running = false;
|
|
264
|
+
if (this.pending.size > 0) {
|
|
265
|
+
this.debounceTimer = setTimeout(() => this.flush(), this.debounceMs);
|
|
96
266
|
}
|
|
97
|
-
|
|
98
|
-
this.handleChange(absPath)
|
|
99
|
-
.catch(err => process.stderr.write(`[mcp-watcher] error: ${err.message}\n`))
|
|
100
|
-
.finally(() => { this.running = false; });
|
|
101
|
-
}, this.debounceMs);
|
|
102
|
-
this.timers.set(absPath, t);
|
|
267
|
+
});
|
|
103
268
|
}
|
|
104
269
|
// ── Core re-index ──────────────────────────────────────────────────────────
|
|
105
270
|
/**
|
|
106
|
-
* Re-index a single changed file.
|
|
107
|
-
*
|
|
271
|
+
* Re-index a single changed file. Exposed for unit testing without needing a
|
|
272
|
+
* real file watcher; flushes synchronously so callers observe the update on
|
|
273
|
+
* disk immediately. Internally this is just a batch of one.
|
|
108
274
|
*/
|
|
109
275
|
async handleChange(absPath) {
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
276
|
+
await this.handleBatch([absPath], { syncFlush: true });
|
|
277
|
+
}
|
|
278
|
+
/**
|
|
279
|
+
* Process a coalesced batch of changed files as ONE pipeline pass:
|
|
280
|
+
* • per-file incremental edge update (content-hash skip), all under one open
|
|
281
|
+
* EdgeStore;
|
|
282
|
+
* • ONE signature patch + ONE llm-context persist + ONE read-cache handoff;
|
|
283
|
+
* • ONE vector update (inline when syncFlush, else on the embed lane).
|
|
284
|
+
*/
|
|
285
|
+
async handleBatch(absPaths, opts = {}) {
|
|
286
|
+
const t0 = Date.now();
|
|
287
|
+
const consumedVcsBulk = this.vcsBulkFlag;
|
|
288
|
+
this.vcsBulkFlag = false;
|
|
289
|
+
// 1. Resolve + read candidate files (skip tests / unknown langs / deleted).
|
|
290
|
+
const files = [];
|
|
291
|
+
for (const abs of absPaths) {
|
|
292
|
+
const rel = relative(this.rootPath, abs);
|
|
293
|
+
if (isTestFile(rel))
|
|
294
|
+
continue;
|
|
295
|
+
if (detectLanguage(rel) === 'unknown')
|
|
296
|
+
continue;
|
|
297
|
+
let content;
|
|
298
|
+
try {
|
|
299
|
+
content = await readFile(abs, 'utf-8');
|
|
300
|
+
}
|
|
301
|
+
catch {
|
|
302
|
+
continue; // file may have been deleted between the event and now
|
|
303
|
+
}
|
|
304
|
+
files.push({ rel, abs, content });
|
|
123
305
|
}
|
|
124
|
-
|
|
306
|
+
if (files.length === 0)
|
|
307
|
+
return;
|
|
308
|
+
// 2. Incremental edge update (CGC _handle_modification algorithm), one open
|
|
309
|
+
// store for the whole batch. Content-hash skip drops no-op autosaves.
|
|
310
|
+
const changedFiles = [];
|
|
311
|
+
const changedNodes = [];
|
|
125
312
|
if (EdgeStore.exists(this.outputPath)) {
|
|
126
313
|
const store = EdgeStore.open(EdgeStore.dbPath(this.outputPath));
|
|
127
314
|
try {
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
store.
|
|
315
|
+
for (const f of files) {
|
|
316
|
+
const newHash = createHash('sha256').update(f.content).digest('hex');
|
|
317
|
+
if (store.getFileHash(f.rel) === newHash)
|
|
318
|
+
continue; // no-op autosave
|
|
319
|
+
// Reverse lookup BEFORE delete so we know which files call into this one.
|
|
320
|
+
const callerFiles = store.getCallerFiles(f.rel);
|
|
321
|
+
// Re-parse BEFORE mutating DB — graph stays readable (old state) during
|
|
322
|
+
// parse. Seed resolution with all known nodes so re-parsed callers'
|
|
323
|
+
// cross-file calls don't degrade to `external::`.
|
|
324
|
+
const resolutionNodes = store.getAllInternalNodes();
|
|
325
|
+
const { edges: newEdges, nodes: newNodes } = await buildGraphSubset(f.rel, f.content, callerFiles, this.rootPath, resolutionNodes);
|
|
326
|
+
// Atomic swap so concurrent MCP reads never see a torn graph.
|
|
327
|
+
store.transaction(() => {
|
|
328
|
+
store.deleteEdgesForFile(f.rel);
|
|
329
|
+
for (const cf of callerFiles.slice(0, CALLER_REPARSE_LIMIT)) {
|
|
330
|
+
store.deleteOutgoingEdgesForFile(cf);
|
|
331
|
+
}
|
|
332
|
+
store.deleteNodesForFile(f.rel);
|
|
333
|
+
store.insertNodes(newNodes);
|
|
334
|
+
store.insertEdges(newEdges);
|
|
335
|
+
store.setFileHash(f.rel, newHash);
|
|
336
|
+
});
|
|
337
|
+
changedFiles.push({ rel: f.rel, content: f.content });
|
|
338
|
+
for (const n of newNodes)
|
|
339
|
+
changedNodes.push(n);
|
|
340
|
+
if (this.debug) {
|
|
341
|
+
process.stderr.write(`[mcp-watcher] graph: ${f.rel} (+${newNodes.length} nodes, +${newEdges.length} edges, ${callerFiles.length} callers)\n`);
|
|
147
342
|
}
|
|
148
|
-
|
|
149
|
-
store.insertNodes(newNodes);
|
|
150
|
-
store.insertEdges(newEdges);
|
|
151
|
-
store.setFileHash(rel, newHash);
|
|
152
|
-
});
|
|
153
|
-
process.stderr.write(`[mcp-watcher] updated graph: ${rel} (+${newNodes.length} nodes, +${newEdges.length} edges, ${callerFiles.length} callers re-parsed)\n`);
|
|
343
|
+
}
|
|
154
344
|
}
|
|
155
345
|
finally {
|
|
156
346
|
store.close();
|
|
157
347
|
}
|
|
158
348
|
}
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
const raw = await readFile(contextPath, 'utf-8');
|
|
164
|
-
context = JSON.parse(raw);
|
|
349
|
+
else {
|
|
350
|
+
// No edge store yet — still refresh signatures for every candidate.
|
|
351
|
+
for (const f of files)
|
|
352
|
+
changedFiles.push({ rel: f.rel, content: f.content });
|
|
165
353
|
}
|
|
166
|
-
|
|
167
|
-
|
|
354
|
+
if (changedFiles.length === 0)
|
|
355
|
+
return; // every event was a no-op autosave
|
|
356
|
+
// 3. Signatures: load context (shared in-memory cache), patch all changed
|
|
357
|
+
// files, then ONE persist + read-cache handoff (Step 2). The handoff
|
|
358
|
+
// means the next tool call is a cache HIT — no cold 2.1 MB re-parse.
|
|
359
|
+
const context = await this.loadContext();
|
|
360
|
+
if (!context) {
|
|
361
|
+
process.stderr.write(`[mcp-watcher] no context at ${this.contextPath} — run analyze first\n`);
|
|
168
362
|
return;
|
|
169
363
|
}
|
|
170
|
-
const newMap = extractSignatures(rel, content);
|
|
171
364
|
if (!context.signatures)
|
|
172
365
|
context.signatures = [];
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
context.signatures
|
|
366
|
+
for (const f of changedFiles) {
|
|
367
|
+
const newMap = extractSignatures(f.rel, f.content);
|
|
368
|
+
const idx = context.signatures.findIndex((m) => m.path === f.rel);
|
|
369
|
+
if (idx >= 0)
|
|
370
|
+
context.signatures[idx] = newMap;
|
|
371
|
+
else
|
|
372
|
+
context.signatures.push(newMap);
|
|
176
373
|
}
|
|
177
|
-
|
|
178
|
-
|
|
374
|
+
await this.persistContext(context);
|
|
375
|
+
// 4. Vector update — decoupled from signature freshness (Step 4).
|
|
376
|
+
const isBulk = consumedVcsBulk || changedFiles.length >= this.bulkThreshold;
|
|
377
|
+
if (this.embed && !this.embedDegraded && context.callGraph) {
|
|
378
|
+
if (opts.syncFlush) {
|
|
379
|
+
// Direct handleChange path: inline so callers/tests observe it.
|
|
380
|
+
await this.updateVectors(context, changedFiles, changedNodes);
|
|
381
|
+
}
|
|
382
|
+
else {
|
|
383
|
+
// Watcher path: schedule on the lower-priority embed lane. On a bulk
|
|
384
|
+
// event this still collapses to a single deferred pass.
|
|
385
|
+
this.scheduleEmbed(context, changedFiles, changedNodes);
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
// 5. One summary line per batch (Step 6). Per-file detail is behind debug.
|
|
389
|
+
const n = changedFiles.length;
|
|
390
|
+
process.stderr.write(`[mcp-watcher] ${isBulk ? `coalesced ${n} changes` : `updated ${n} file${n === 1 ? '' : 's'}`} (${Date.now() - t0}ms)\n`);
|
|
391
|
+
}
|
|
392
|
+
// ── llm-context load + persistence + read-cache handoff (Step 2) ─────────────
|
|
393
|
+
/**
|
|
394
|
+
* True when this watcher writes to the canonical `<root>/.openlore/analysis`
|
|
395
|
+
* layout that the MCP read handlers cache against. Only then is the shared
|
|
396
|
+
* in-memory read cache (primeContextCache) the right channel to prime; a custom
|
|
397
|
+
* `outputPath` (tests / non-standard installs) writes only to disk.
|
|
398
|
+
*/
|
|
399
|
+
get usesStandardLayout() {
|
|
400
|
+
return this.outputPath === join(this.rootPath, OPENLORE_DIR, OPENLORE_ANALYSIS_SUBDIR);
|
|
401
|
+
}
|
|
402
|
+
/**
|
|
403
|
+
* Load the context the watcher is about to patch. This ALWAYS reads fresh from
|
|
404
|
+
* disk — never through the shared read cache — because the cache is a read-path
|
|
405
|
+
* (tool-call) optimization, and patching a possibly-stale cached object could
|
|
406
|
+
* silently drop signatures written by a concurrent `analyze` between events.
|
|
407
|
+
* The writer reads ground truth; persistContext then primes the read cache with
|
|
408
|
+
* the result so the next tool call is still a hit (Step 2a, G1).
|
|
409
|
+
*/
|
|
410
|
+
async loadContext() {
|
|
411
|
+
try {
|
|
412
|
+
const raw = await readFile(this.contextPath, 'utf-8');
|
|
413
|
+
return JSON.parse(raw);
|
|
414
|
+
}
|
|
415
|
+
catch {
|
|
416
|
+
return null;
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
async persistContext(context) {
|
|
420
|
+
// Strip the runtime-only EdgeStore handle before serializing.
|
|
421
|
+
const { edgeStore: _edgeStore, ...serializable } = context;
|
|
422
|
+
void _edgeStore;
|
|
423
|
+
await writeFile(this.contextPath, JSON.stringify(serializable, null, 2), 'utf-8');
|
|
424
|
+
// Hand the patched object back to the read cache, aligned to the new on-disk
|
|
425
|
+
// mtime, so the next tool call is a cache hit (no cold re-parse). This is the
|
|
426
|
+
// fix for root-cause item 2 (mtime bump forcing a full re-read). Only valid
|
|
427
|
+
// for the canonical layout the read handlers cache against.
|
|
428
|
+
if (this.usesStandardLayout)
|
|
429
|
+
await primeContextCache(this.rootPath, context);
|
|
430
|
+
}
|
|
431
|
+
// ── Embedding lane (Step 4) ──────────────────────────────────────────────────
|
|
432
|
+
scheduleEmbed(context, changedFiles, nodes) {
|
|
433
|
+
for (const f of changedFiles)
|
|
434
|
+
this.embedFiles.set(f.rel, f.content);
|
|
435
|
+
for (const node of nodes)
|
|
436
|
+
this.embedNodes.set(node.id, node);
|
|
437
|
+
this.lastEmbedContext = context;
|
|
438
|
+
if (this.embedTimer)
|
|
439
|
+
clearTimeout(this.embedTimer);
|
|
440
|
+
// Slightly behind the signature debounce so structural freshness always lands
|
|
441
|
+
// first and multiple flushes batch into one embed pass.
|
|
442
|
+
this.embedTimer = setTimeout(() => void this.runEmbedLane(), this.debounceMs);
|
|
443
|
+
}
|
|
444
|
+
async runEmbedLane() {
|
|
445
|
+
if (this.embedRunning) {
|
|
446
|
+
// Re-arm: drain again once the in-flight pass finishes.
|
|
447
|
+
this.embedTimer = setTimeout(() => void this.runEmbedLane(), this.debounceMs);
|
|
448
|
+
return;
|
|
179
449
|
}
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
450
|
+
if (this.embedFiles.size === 0 || !this.lastEmbedContext)
|
|
451
|
+
return;
|
|
452
|
+
const changedFiles = Array.from(this.embedFiles, ([rel, content]) => ({ rel, content }));
|
|
453
|
+
const nodes = Array.from(this.embedNodes.values());
|
|
454
|
+
const context = this.lastEmbedContext;
|
|
455
|
+
this.embedFiles.clear();
|
|
456
|
+
this.embedNodes.clear();
|
|
457
|
+
this.embedRunning = true;
|
|
458
|
+
try {
|
|
459
|
+
await this.updateVectors(context, changedFiles, nodes);
|
|
460
|
+
}
|
|
461
|
+
catch (err) {
|
|
462
|
+
process.stderr.write(`[mcp-watcher] embed error: ${err.message}\n`);
|
|
463
|
+
}
|
|
464
|
+
finally {
|
|
465
|
+
this.embedRunning = false;
|
|
466
|
+
if (this.embedFiles.size > 0) {
|
|
467
|
+
this.embedTimer = setTimeout(() => void this.runEmbedLane(), this.debounceMs);
|
|
468
|
+
}
|
|
185
469
|
}
|
|
186
470
|
}
|
|
187
|
-
|
|
188
|
-
|
|
471
|
+
/**
|
|
472
|
+
* Row-level vector update for the changed files only (Step 3). Falls back to a
|
|
473
|
+
* silent no-op when no embedding service and no index are available.
|
|
474
|
+
*/
|
|
475
|
+
async updateVectors(context, changedFiles, changedNodes) {
|
|
189
476
|
try {
|
|
190
477
|
const { VectorIndex } = await import('../analyzer/vector-index.js');
|
|
191
478
|
const { EmbeddingService } = await import('../analyzer/embedding-service.js');
|
|
@@ -200,24 +487,64 @@ export class McpWatcher {
|
|
|
200
487
|
const cfg = await readOpenLoreConfig(this.rootPath);
|
|
201
488
|
embedSvc = cfg ? EmbeddingService.fromConfig(cfg) : null;
|
|
202
489
|
}
|
|
203
|
-
// embedSvc may be null:
|
|
204
|
-
//
|
|
490
|
+
// embedSvc may be null: updateFiles then refreshes the BM25-only corpus
|
|
491
|
+
// rather than re-embedding, keeping the keyword index live in watch mode.
|
|
205
492
|
const cg = context.callGraph;
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
const
|
|
209
|
-
const
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
493
|
+
if (!cg)
|
|
494
|
+
return;
|
|
495
|
+
const hubIds = new Set((cg.hubFunctions ?? []).map((f) => f.id));
|
|
496
|
+
const entryIds = new Set((cg.entryPoints ?? []).map((f) => f.id));
|
|
497
|
+
const changedFilePaths = new Set(changedFiles.map((f) => f.rel));
|
|
498
|
+
const fileContents = new Map(changedFiles.map((f) => [f.rel, f.content]));
|
|
499
|
+
// Prefer the freshly-parsed nodes; fall back to the (possibly stale)
|
|
500
|
+
// call-graph nodes for the changed files when no edge store seeded them.
|
|
501
|
+
const nodes = changedNodes.length > 0
|
|
502
|
+
? changedNodes
|
|
503
|
+
: (cg.nodes ?? []).filter((n) => changedFilePaths.has(n.filePath));
|
|
504
|
+
const { embedded, reused, total, hasEmbeddings } = await VectorIndex.updateFiles(this.outputPath, nodes, changedFilePaths, context.signatures ?? [], hubIds, entryIds, embedSvc, fileContents);
|
|
505
|
+
if (this.debug) {
|
|
506
|
+
process.stderr.write(hasEmbeddings
|
|
507
|
+
? `[mcp-watcher] re-embedded ${changedFilePaths.size} file(s): ${embedded} new, ${reused} reused\n`
|
|
508
|
+
: `[mcp-watcher] refreshed BM25 index for ${changedFilePaths.size} file(s): ${total} functions\n`);
|
|
509
|
+
}
|
|
214
510
|
}
|
|
215
511
|
catch (err) {
|
|
216
512
|
process.stderr.write(`[mcp-watcher] embed error: ${err.message}\n`);
|
|
217
513
|
}
|
|
218
514
|
}
|
|
515
|
+
// ── Helpers ──────────────────────────────────────────────────────────────────
|
|
516
|
+
/** Bounded count of watched source files; stops early once `cap` is exceeded. */
|
|
517
|
+
async countSourceFiles(cap) {
|
|
518
|
+
let count = 0;
|
|
519
|
+
const walk = async (dir) => {
|
|
520
|
+
if (count > cap)
|
|
521
|
+
return;
|
|
522
|
+
let entries;
|
|
523
|
+
try {
|
|
524
|
+
entries = await readdir(dir, { withFileTypes: true });
|
|
525
|
+
}
|
|
526
|
+
catch {
|
|
527
|
+
return;
|
|
528
|
+
}
|
|
529
|
+
for (const entry of entries) {
|
|
530
|
+
if (count > cap)
|
|
531
|
+
return;
|
|
532
|
+
const abs = join(dir, entry.name);
|
|
533
|
+
const rel = relative(this.rootPath, abs);
|
|
534
|
+
if (entry.isDirectory()) {
|
|
535
|
+
if (!isIgnoredRelPath(rel))
|
|
536
|
+
await walk(abs);
|
|
537
|
+
}
|
|
538
|
+
else if (entry.isFile() && SOURCE_EXTENSIONS.test(entry.name) && !isIgnoredRelPath(rel)) {
|
|
539
|
+
count++;
|
|
540
|
+
}
|
|
541
|
+
}
|
|
542
|
+
};
|
|
543
|
+
await walk(this.rootPath);
|
|
544
|
+
return count;
|
|
545
|
+
}
|
|
219
546
|
}
|
|
220
|
-
// ──
|
|
547
|
+
// ── Module helpers ──────────────────────────────────────────────────────────────
|
|
221
548
|
function isTestFile(relPath) {
|
|
222
549
|
return (relPath.includes('.test.') ||
|
|
223
550
|
relPath.includes('.spec.') ||
|
|
@@ -252,7 +579,7 @@ async function buildGraphSubset(changedRel, changedContent, callerFiles, rootDir
|
|
|
252
579
|
const builder = new CallGraphBuilder();
|
|
253
580
|
const result = await builder.build(files, undefined, undefined, resolutionNodes);
|
|
254
581
|
// Only return nodes from changedFile — callerFiles nodes are already in DB and unchanged
|
|
255
|
-
const changedNodes = Array.from(result.nodes.values()).filter(n => n.filePath === changedRel);
|
|
582
|
+
const changedNodes = Array.from(result.nodes.values()).filter((n) => n.filePath === changedRel);
|
|
256
583
|
return { edges: result.edges, nodes: changedNodes };
|
|
257
584
|
}
|
|
258
585
|
//# sourceMappingURL=mcp-watcher.js.map
|