carto-md 2.0.1 → 2.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/BENCHMARK_RESULTS.md +34 -0
- package/CONTRIBUTING.md +28 -2
- package/README.md +1 -0
- package/package.json +3 -2
- package/src/agents/formatter.js +3 -0
- package/src/agents/scan-structure.js +69 -0
- package/src/mcp/change-plan.js +737 -0
- package/src/mcp/server-v2.js +2 -86
- package/src/store/sync-v2.js +5 -3
- package/src/sync.js +1 -18
- package/acp-strategy.md +0 -480
|
@@ -0,0 +1,737 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* change-plan.js — pure module backing the `get_change_plan` MCP tool.
|
|
5
|
+
*
|
|
6
|
+
* Replaces the prior substring-grep implementation with a structural
|
|
7
|
+
* pipeline that uses the real SQLite graph:
|
|
8
|
+
*
|
|
9
|
+
* tokenize(intent) ──► tokens { content, verbs, paths }
|
|
10
|
+
* └──► IDF over indexed corpus (basenames + symbol names)
|
|
11
|
+
* └──► 4-tier anchor selection
|
|
12
|
+
* A. route path/method (searchRoutes)
|
|
13
|
+
* B. file path tokens (pathTokens × IDF)
|
|
14
|
+
* C. exported symbol names (camelTokens × IDF)
|
|
15
|
+
* D. domain name match (domain assignments)
|
|
16
|
+
* └──► graph expansion
|
|
17
|
+
* forward 1-hop imports (getNeighbors)
|
|
18
|
+
* backward 1-hop imports
|
|
19
|
+
* transitive blast radius (getBlastRadius)
|
|
20
|
+
* cross-domain edges
|
|
21
|
+
* conventions (same-domain peers)
|
|
22
|
+
* └──► structured plan
|
|
23
|
+
*
|
|
24
|
+
* formatPlanMarkdown(plan) renders the plan with the historical section
|
|
25
|
+
* headers preserved (`## Relevant Routes`, `## Files to Touch`,
|
|
26
|
+
* `## Affected Domains`, `## Blast Radius`, `## Similar Patterns to Follow`)
|
|
27
|
+
* plus new optional sections `## Files to Review (Callers)` and
|
|
28
|
+
* `## Cross-Domain Edges` when non-empty.
|
|
29
|
+
*/
|
|
30
|
+
|
|
31
|
+
// ─── Tokenization ─────────────────────────────────────────────────────
|
|
32
|
+
|
|
33
|
+
// Common English/dev prose stopwords that flood matches if kept.
|
|
34
|
+
// Note: deliberately does NOT contain meaningful 3-char dev tokens like
|
|
35
|
+
// "log", "url", "csv", "jwt", "ssl", "mcp", "sql", "api", "git", "env".
|
|
36
|
+
// Filter those out via IDF, not by length.
|
|
37
|
+
const STOPWORDS = new Set([
|
|
38
|
+
'the','and','for','with','from','into','that','this','your','you','have',
|
|
39
|
+
'will','would','should','could','can','add','fix','make','use','using',
|
|
40
|
+
'when','then','want','need','get','set','put','let','what','how',
|
|
41
|
+
'why','who','where','these','those','some','also','only','just',
|
|
42
|
+
'every','each','any','all','they','their','them','our','its','it','to',
|
|
43
|
+
'in','on','of','as','at','is','an','be','do','if','or','no','not','but',
|
|
44
|
+
'by','via','about','around','my','me','we','us','was','were','been',
|
|
45
|
+
'so','such','than','too','very','more','most','less','least'
|
|
46
|
+
]);
|
|
47
|
+
|
|
48
|
+
const HTTP_VERBS = ['get','post','put','patch','delete','head','options'];
|
|
49
|
+
const VERB_RE = new RegExp(`\\b(${HTTP_VERBS.join('|')})\\b`, 'g');
|
|
50
|
+
const PATH_RE = /\/[a-z0-9_\-\/{}:]+/g;
|
|
51
|
+
|
|
52
|
+
// Known dev abbreviations of length 3 that participate in prefix matching
|
|
53
|
+
// against longer pathTokens. Length-4+ tokens always prefix-match.
|
|
54
|
+
// Without this allowlist, "sit" would prefix-match "sitter" in
|
|
55
|
+
// "tree-sitter-parser.js" and cause false-positive anchors.
|
|
56
|
+
const ABBREV3 = new Set([
|
|
57
|
+
'sql', 'jwt', 'mcp', 'csv', 'ssl', 'api', 'env', 'orm',
|
|
58
|
+
'dns', 'jpa', 'tcp', 'udp', 'xml', 'oop', 'cli', 'cdn',
|
|
59
|
+
'aws', 'gcp', 'kms', 'iam', 's3', 'rpc', 'dao', 'dto'
|
|
60
|
+
]);
|
|
61
|
+
|
|
62
|
+
const MAX_CONTENT_TOKENS = 50;
|
|
63
|
+
|
|
64
|
+
function tokenize(intent) {
|
|
65
|
+
if (!intent || typeof intent !== 'string') {
|
|
66
|
+
return { content: [], verbs: [], paths: [] };
|
|
67
|
+
}
|
|
68
|
+
const lower = intent.toLowerCase();
|
|
69
|
+
|
|
70
|
+
// 1. URL-path-like tokens (captured first so we can strip them
|
|
71
|
+
// before verb extraction — avoids "post" inside "/api/post"
|
|
72
|
+
// being mis-detected as the HTTP verb).
|
|
73
|
+
const paths = [...new Set(lower.match(PATH_RE) || [])];
|
|
74
|
+
const stripped = lower.replace(PATH_RE, ' ');
|
|
75
|
+
|
|
76
|
+
// 2. HTTP verb detection over the path-stripped text.
|
|
77
|
+
const verbMatches = stripped.match(VERB_RE) || [];
|
|
78
|
+
const verbs = [...new Set(verbMatches.map(v => v.toUpperCase()))];
|
|
79
|
+
|
|
80
|
+
// 3. Content tokens. Two sources, merged & deduped:
|
|
81
|
+
// a) the path-stripped intent split on non-alphanumerics
|
|
82
|
+
// b) the inner segments of each captured path
|
|
83
|
+
// Stopwords are dropped, length ≥ 2 is kept (so `log`, `mcp`,
|
|
84
|
+
// `sql`, `jwt`, `csv`, `api` survive — they're meaningful).
|
|
85
|
+
const seen = new Set();
|
|
86
|
+
const content = [];
|
|
87
|
+
function pushToken(t) {
|
|
88
|
+
if (!t || t.length < 2) return;
|
|
89
|
+
if (STOPWORDS.has(t)) return;
|
|
90
|
+
if (seen.has(t)) return;
|
|
91
|
+
seen.add(t);
|
|
92
|
+
content.push(t);
|
|
93
|
+
}
|
|
94
|
+
for (const t of stripped.split(/[^a-z0-9]+/)) {
|
|
95
|
+
if (content.length >= MAX_CONTENT_TOKENS) break;
|
|
96
|
+
pushToken(t);
|
|
97
|
+
}
|
|
98
|
+
for (const p of paths) {
|
|
99
|
+
if (content.length >= MAX_CONTENT_TOKENS) break;
|
|
100
|
+
for (const seg of p.split(/[^a-z0-9]+/)) {
|
|
101
|
+
if (content.length >= MAX_CONTENT_TOKENS) break;
|
|
102
|
+
pushToken(seg);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
return { content, verbs, paths };
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// ─── Path / symbol token extraction ──────────────────────────────────
|
|
110
|
+
|
|
111
|
+
function pathTokens(filePath) {
|
|
112
|
+
if (!filePath) return [];
|
|
113
|
+
const lower = filePath.toLowerCase();
|
|
114
|
+
// Split on path separators, dots, dashes, underscores; then split
|
|
115
|
+
// each segment on camelCase boundaries (using the original-case form)
|
|
116
|
+
// so e.g. "rateLimitMiddleware.ts" → rate, limit, middleware, ts.
|
|
117
|
+
const segments = filePath.split(/[\/\.\-_]/);
|
|
118
|
+
const out = new Set();
|
|
119
|
+
for (const seg of segments) {
|
|
120
|
+
if (!seg) continue;
|
|
121
|
+
// Split camelCase: insert space before uppercase that follows lowercase
|
|
122
|
+
const camelParts = seg.split(/(?<=[a-z0-9])(?=[A-Z])/);
|
|
123
|
+
for (const p of camelParts) {
|
|
124
|
+
const t = p.toLowerCase();
|
|
125
|
+
if (t) out.add(t);
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
// Also include each lowercased segment as-is so plain "store" still
|
|
129
|
+
// matches even if the segment has no camel boundary.
|
|
130
|
+
for (const seg of lower.split(/[\/\.\-_]/)) {
|
|
131
|
+
if (seg) out.add(seg);
|
|
132
|
+
}
|
|
133
|
+
return [...out];
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
function camelTokens(name) {
|
|
137
|
+
if (!name) return [];
|
|
138
|
+
// Split on camelCase boundaries plus _ and -
|
|
139
|
+
const parts = name.split(/(?<=[a-z0-9])(?=[A-Z])|[_\-]/);
|
|
140
|
+
const out = new Set();
|
|
141
|
+
for (const p of parts) {
|
|
142
|
+
const t = p.toLowerCase();
|
|
143
|
+
if (t) out.add(t);
|
|
144
|
+
}
|
|
145
|
+
// Also include the full lowercased name (for snake_case names that
|
|
146
|
+
// matched as one segment after the split above).
|
|
147
|
+
const full = name.toLowerCase();
|
|
148
|
+
if (full && !/[_\-]/.test(name) && !/[a-z][A-Z]/.test(name)) out.add(full);
|
|
149
|
+
return [...out];
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// ─── IDF over indexed corpus ─────────────────────────────────────────
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Compute IDF weights over file basenames + path tokens + exported
|
|
156
|
+
* symbol names. Common tokens like "src", "store", "file", "index" get
|
|
157
|
+
* low weight; rare tokens like "rate", "throttle", "jwt" get high weight.
|
|
158
|
+
*
|
|
159
|
+
* Returns Map<token, weight>. Unknown tokens default to 1 at lookup time.
|
|
160
|
+
*/
|
|
161
|
+
function computeIdf(store) {
|
|
162
|
+
const built = buildCorpusIndex(store);
|
|
163
|
+
return built.idf;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* buildCorpusIndex(store)
|
|
168
|
+
* → { idf, files: [{ id, path, language, tokenSet }],
|
|
169
|
+
* symbols: [{ name, path, tokenSet }] }
|
|
170
|
+
*
|
|
171
|
+
* Memoized on the store object. On a 5K-file repo this saves ~30ms per
|
|
172
|
+
* `planChange` call (without it, p95 on cal.com sat at 60ms — over the
|
|
173
|
+
* spec's 50ms target). Re-indexing creates a new store instance, so
|
|
174
|
+
* the cache lives only as long as the index it was built from.
|
|
175
|
+
*/
|
|
176
|
+
const CACHE_KEY = '__cartoChangePlanCache';
|
|
177
|
+
|
|
178
|
+
function buildCorpusIndex(store) {
|
|
179
|
+
if (!store) return { idf: new Map(), files: [], symbols: [] };
|
|
180
|
+
// Use the schema_version + last_full_sync as a coarse cache key —
|
|
181
|
+
// when the index is rebuilt, last_full_sync changes, busting the cache.
|
|
182
|
+
let stamp = '';
|
|
183
|
+
try { stamp = (store.getMeta && store.getMeta('last_full_sync')) || ''; } catch {}
|
|
184
|
+
|
|
185
|
+
if (store[CACHE_KEY] && store[CACHE_KEY].stamp === stamp) {
|
|
186
|
+
return store[CACHE_KEY].value;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
const docs = [];
|
|
190
|
+
const files = [];
|
|
191
|
+
let allFiles = [];
|
|
192
|
+
try { allFiles = store.getAllFiles(); } catch { allFiles = []; }
|
|
193
|
+
for (const f of allFiles) {
|
|
194
|
+
if (!f || !f.path) continue;
|
|
195
|
+
const tokens = pathTokens(f.path);
|
|
196
|
+
const tokenSet = new Set(tokens);
|
|
197
|
+
files.push({ id: f.id, path: f.path, language: f.language, tokens, tokenSet });
|
|
198
|
+
docs.push(tokens);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
const symbols = [];
|
|
202
|
+
let symRows = [];
|
|
203
|
+
try {
|
|
204
|
+
if (store.db) {
|
|
205
|
+
symRows = store.db.prepare(`
|
|
206
|
+
SELECT s.name, f.path
|
|
207
|
+
FROM symbols s JOIN files f ON s.file_id = f.id
|
|
208
|
+
WHERE s.exported = 1
|
|
209
|
+
`).all();
|
|
210
|
+
}
|
|
211
|
+
} catch { symRows = []; }
|
|
212
|
+
for (const s of symRows) {
|
|
213
|
+
if (!s || !s.name) continue;
|
|
214
|
+
const tokens = camelTokens(s.name);
|
|
215
|
+
symbols.push({ name: s.name, path: s.path, tokenSet: new Set(tokens) });
|
|
216
|
+
docs.push(tokens);
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
const df = new Map();
|
|
220
|
+
for (const tokens of docs) {
|
|
221
|
+
for (const t of new Set(tokens)) {
|
|
222
|
+
df.set(t, (df.get(t) || 0) + 1);
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
const N = docs.length || 1;
|
|
226
|
+
const idf = new Map();
|
|
227
|
+
for (const [t, n] of df) {
|
|
228
|
+
idf.set(t, Math.log((N + 1) / (n + 1)) + 1);
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
const value = { idf, files, symbols };
|
|
232
|
+
// Stash the cache on the store. Non-enumerable so it doesn't leak
|
|
233
|
+
// through Object.keys / JSON.stringify if the store is serialized.
|
|
234
|
+
try {
|
|
235
|
+
Object.defineProperty(store, CACHE_KEY, {
|
|
236
|
+
value: { stamp, value },
|
|
237
|
+
writable: true, configurable: true, enumerable: false
|
|
238
|
+
});
|
|
239
|
+
} catch {}
|
|
240
|
+
return value;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
function idfWeight(idf, token) {
|
|
244
|
+
if (!idf || !idf.has(token)) return 1;
|
|
245
|
+
return idf.get(token);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// ─── Anchor selection ────────────────────────────────────────────────
|
|
249
|
+
|
|
250
|
+
/**
|
|
251
|
+
* Build anchors from four signal sources, dedupe by file (keeping the
|
|
252
|
+
* highest-scoring kind per file but recording all reasons), and return
|
|
253
|
+
* up to `maxAnchors` entries sorted by score descending.
|
|
254
|
+
*/
|
|
255
|
+
function selectAnchors(store, tokens, idf, maxAnchors = 8) {
|
|
256
|
+
const anchors = [];
|
|
257
|
+
// Reuse the cached corpus index — saves ~30ms p95 on cal.com.
|
|
258
|
+
const corpus = buildCorpusIndex(store);
|
|
259
|
+
|
|
260
|
+
// ── Tier A — route path/method ────────────────────────────────────
|
|
261
|
+
// Use searchRoutes for each detected URL-path-like token. Filter by
|
|
262
|
+
// verb when one was extracted.
|
|
263
|
+
const routesSeen = new Set();
|
|
264
|
+
for (const p of tokens.paths) {
|
|
265
|
+
let routes = [];
|
|
266
|
+
try { routes = store.searchRoutes(p) || []; } catch { routes = []; }
|
|
267
|
+
for (const r of routes) {
|
|
268
|
+
const key = `${r.method} ${r.path} ${r.file}`;
|
|
269
|
+
if (routesSeen.has(key)) continue;
|
|
270
|
+
routesSeen.add(key);
|
|
271
|
+
const methodOk = tokens.verbs.length === 0 || tokens.verbs.includes(r.method);
|
|
272
|
+
if (!methodOk) continue;
|
|
273
|
+
anchors.push({
|
|
274
|
+
kind: 'route',
|
|
275
|
+
value: `${r.method} ${r.path}`,
|
|
276
|
+
file: r.file,
|
|
277
|
+
score: 100,
|
|
278
|
+
reason: `route path matches "${p}"`
|
|
279
|
+
});
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
// Also try matching each content token against route paths — catches
|
|
284
|
+
// intents like "users endpoint" that don't carry a "/path".
|
|
285
|
+
if (tokens.paths.length === 0 && tokens.content.length > 0) {
|
|
286
|
+
for (const t of tokens.content) {
|
|
287
|
+
if (t.length < 3) continue; // avoid 2-char route flooding
|
|
288
|
+
let routes = [];
|
|
289
|
+
try { routes = store.searchRoutes(t) || []; } catch { routes = []; }
|
|
290
|
+
for (const r of routes) {
|
|
291
|
+
const key = `${r.method} ${r.path} ${r.file}`;
|
|
292
|
+
if (routesSeen.has(key)) continue;
|
|
293
|
+
routesSeen.add(key);
|
|
294
|
+
const methodOk = tokens.verbs.length === 0 || tokens.verbs.includes(r.method);
|
|
295
|
+
if (!methodOk) continue;
|
|
296
|
+
anchors.push({
|
|
297
|
+
kind: 'route',
|
|
298
|
+
value: `${r.method} ${r.path}`,
|
|
299
|
+
file: r.file,
|
|
300
|
+
score: 60 * idfWeight(idf, t),
|
|
301
|
+
reason: `route path contains "${t}"`
|
|
302
|
+
});
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
// ── Tier B — file path tokens (IDF-weighted) ──────────────────────
|
|
308
|
+
for (const f of corpus.files) {
|
|
309
|
+
let score = 0;
|
|
310
|
+
const hits = [];
|
|
311
|
+
const partialHits = [];
|
|
312
|
+
for (const t of tokens.content) {
|
|
313
|
+
if (f.tokenSet.has(t)) {
|
|
314
|
+
score += 30 * idfWeight(idf, t);
|
|
315
|
+
hits.push(t);
|
|
316
|
+
} else if (t.length >= 4 || (t.length === 3 && ABBREV3.has(t))) {
|
|
317
|
+
// Prefix-match fallback — e.g. "sql" ⊂ "sqlite",
|
|
318
|
+
// "auth" ⊂ "authentication". Score weakly. 3-char tokens
|
|
319
|
+
// must be on the dev-abbreviation allowlist to avoid noise
|
|
320
|
+
// (e.g. "sit" should NOT match "sitter").
|
|
321
|
+
const matched = f.tokens.find(pt => pt.length > t.length && pt.startsWith(t));
|
|
322
|
+
if (matched) {
|
|
323
|
+
score += 10 * idfWeight(idf, t);
|
|
324
|
+
partialHits.push(`${t}~${matched}`);
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
if (score > 0) {
|
|
329
|
+
const reasonParts = [];
|
|
330
|
+
if (hits.length) reasonParts.push(`path tokens match: ${hits.join(', ')}`);
|
|
331
|
+
if (partialHits.length) reasonParts.push(`prefix match: ${partialHits.join(', ')}`);
|
|
332
|
+
anchors.push({
|
|
333
|
+
kind: 'file',
|
|
334
|
+
value: f.path,
|
|
335
|
+
file: f.path,
|
|
336
|
+
score,
|
|
337
|
+
reason: reasonParts.join('; ')
|
|
338
|
+
});
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
// ── Tier C — exported symbol names (camelCase split + IDF) ────────
|
|
343
|
+
for (const s of corpus.symbols) {
|
|
344
|
+
let score = 0;
|
|
345
|
+
const hits = [];
|
|
346
|
+
for (const t of tokens.content) {
|
|
347
|
+
if (s.tokenSet.has(t)) {
|
|
348
|
+
score += 25 * idfWeight(idf, t);
|
|
349
|
+
hits.push(t);
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
if (score > 0) {
|
|
353
|
+
anchors.push({
|
|
354
|
+
kind: 'symbol',
|
|
355
|
+
value: s.name,
|
|
356
|
+
file: s.path,
|
|
357
|
+
score,
|
|
358
|
+
reason: `symbol "${s.name}" contains: ${hits.join(', ')}`
|
|
359
|
+
});
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
// ── Tier D — domain name match ────────────────────────────────────
|
|
364
|
+
let domains = [];
|
|
365
|
+
try { domains = store.getDomainsList() || []; } catch { domains = []; }
|
|
366
|
+
for (const d of domains) {
|
|
367
|
+
const dlow = (d.name || '').toLowerCase();
|
|
368
|
+
if (!dlow) continue;
|
|
369
|
+
const matches = tokens.content.filter(t => dlow === t || dlow.includes(t) || t.includes(dlow));
|
|
370
|
+
if (matches.length === 0) continue;
|
|
371
|
+
let domainData = null;
|
|
372
|
+
try { domainData = store.getDomain(d.name); } catch { domainData = null; }
|
|
373
|
+
if (!domainData) continue;
|
|
374
|
+
for (const file of (domainData.files || []).slice(0, 3)) {
|
|
375
|
+
anchors.push({
|
|
376
|
+
kind: 'domain',
|
|
377
|
+
value: d.name,
|
|
378
|
+
file,
|
|
379
|
+
score: 15,
|
|
380
|
+
reason: `domain "${d.name}" matches: ${matches.join(', ')}`
|
|
381
|
+
});
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
// ── Dedupe by file: keep highest-scoring entry, accumulate reasons ─
|
|
386
|
+
const byFile = new Map();
|
|
387
|
+
anchors.sort((a, b) => b.score - a.score);
|
|
388
|
+
for (const a of anchors) {
|
|
389
|
+
const cur = byFile.get(a.file);
|
|
390
|
+
if (!cur) {
|
|
391
|
+
byFile.set(a.file, { ...a, reasons: [a.reason] });
|
|
392
|
+
} else if (!cur.reasons.includes(a.reason)) {
|
|
393
|
+
cur.reasons.push(a.reason);
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
const out = [...byFile.values()]
|
|
397
|
+
.sort((a, b) => b.score - a.score)
|
|
398
|
+
.slice(0, maxAnchors);
|
|
399
|
+
// Project a single 'reason' string for backward-compat with tests/log
|
|
400
|
+
for (const a of out) {
|
|
401
|
+
a.reason = a.reasons.join(' | ');
|
|
402
|
+
}
|
|
403
|
+
return out;
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
// ─── Graph expansion ─────────────────────────────────────────────────
|
|
407
|
+
|
|
408
|
+
function expandGraph(store, anchors, opts = {}) {
|
|
409
|
+
const maxBlast = opts.maxBlast || 25;
|
|
410
|
+
const maxBlastHops = opts.maxBlastHops || 5;
|
|
411
|
+
const anchorFiles = anchors.map(a => a.file);
|
|
412
|
+
const anchorSet = new Set(anchorFiles);
|
|
413
|
+
|
|
414
|
+
// Forward 1-hop — files anchors import
|
|
415
|
+
const forwardDeps = new Set();
|
|
416
|
+
// Backward 1-hop — files that import anchors
|
|
417
|
+
const backwardDeps = new Set();
|
|
418
|
+
// Cross-domain edges introduced when walking the 1-hop neighborhood
|
|
419
|
+
const crossDomainEdges = [];
|
|
420
|
+
|
|
421
|
+
for (const af of anchorFiles) {
|
|
422
|
+
let neighbors = { nodes: [], edges: [] };
|
|
423
|
+
try { neighbors = store.getNeighbors(af, 1) || neighbors; } catch {}
|
|
424
|
+
let aDomain = null;
|
|
425
|
+
try { aDomain = store.getDomainForFile(af); } catch {}
|
|
426
|
+
|
|
427
|
+
for (const e of neighbors.edges) {
|
|
428
|
+
if (e.source === af && e.target !== af) forwardDeps.add(e.target);
|
|
429
|
+
if (e.target === af && e.source !== af) backwardDeps.add(e.source);
|
|
430
|
+
}
|
|
431
|
+
// Cross-domain detection
|
|
432
|
+
if (aDomain) {
|
|
433
|
+
for (const n of neighbors.nodes) {
|
|
434
|
+
if (n.id === af) continue;
|
|
435
|
+
const nDomain = n.domain;
|
|
436
|
+
if (nDomain && nDomain !== aDomain) {
|
|
437
|
+
crossDomainEdges.push({
|
|
438
|
+
from: af,
|
|
439
|
+
to: n.id,
|
|
440
|
+
fromDomain: aDomain,
|
|
441
|
+
toDomain: nDomain
|
|
442
|
+
});
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
// Transitive blast radius — merge per-anchor results
|
|
449
|
+
const blastByFile = new Map();
|
|
450
|
+
for (const af of anchorFiles.slice(0, 5)) {
|
|
451
|
+
let radius = [];
|
|
452
|
+
try { radius = store.getBlastRadius(af, maxBlastHops) || []; } catch {}
|
|
453
|
+
for (const r of radius) {
|
|
454
|
+
if (anchorSet.has(r.file)) continue; // anchors aren't blast targets
|
|
455
|
+
const cur = blastByFile.get(r.file);
|
|
456
|
+
if (cur === undefined || r.hop_distance < cur) {
|
|
457
|
+
blastByFile.set(r.file, r.hop_distance);
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
const blastRadius = [...blastByFile.entries()]
|
|
462
|
+
.map(([file, hop]) => ({ file, hop }))
|
|
463
|
+
.sort((a, b) => a.hop - b.hop || a.file.localeCompare(b.file))
|
|
464
|
+
.slice(0, maxBlast);
|
|
465
|
+
|
|
466
|
+
// Affected domains — anchors + 1-hop neighborhood
|
|
467
|
+
const affectedDomains = new Set();
|
|
468
|
+
const allInScope = new Set([...anchorFiles, ...forwardDeps, ...backwardDeps]);
|
|
469
|
+
for (const af of allInScope) {
|
|
470
|
+
let d = null;
|
|
471
|
+
try { d = store.getDomainForFile(af); } catch {}
|
|
472
|
+
if (d) affectedDomains.add(d);
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
// Conventions — same-domain peers with similar import/route count
|
|
476
|
+
const conventions = findConventions(store, anchorFiles, [...affectedDomains]);
|
|
477
|
+
|
|
478
|
+
// Dedupe cross-domain edges
|
|
479
|
+
const cdSeen = new Set();
|
|
480
|
+
const crossDomainDedup = [];
|
|
481
|
+
for (const e of crossDomainEdges) {
|
|
482
|
+
const k = `${e.from}->${e.to}`;
|
|
483
|
+
if (cdSeen.has(k)) continue;
|
|
484
|
+
cdSeen.add(k);
|
|
485
|
+
crossDomainDedup.push(e);
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
return {
|
|
489
|
+
forwardDeps: [...forwardDeps].sort(),
|
|
490
|
+
backwardDeps: [...backwardDeps].sort(),
|
|
491
|
+
blastRadius,
|
|
492
|
+
affectedDomains: [...affectedDomains].sort(),
|
|
493
|
+
crossDomainEdges: crossDomainDedup,
|
|
494
|
+
conventions
|
|
495
|
+
};
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
/**
|
|
499
|
+
* findConventions — same-domain peer files with comparable shape.
|
|
500
|
+
* Returns up to 5 files. Mirrors small bits of get_similar_patterns.
|
|
501
|
+
*/
|
|
502
|
+
function findConventions(store, anchorFiles, domains) {
|
|
503
|
+
if (!store || !store.db || !anchorFiles.length || !domains.length) return [];
|
|
504
|
+
const anchorSet = new Set(anchorFiles);
|
|
505
|
+
const out = [];
|
|
506
|
+
const seen = new Set();
|
|
507
|
+
|
|
508
|
+
for (const domain of domains.slice(0, 3)) {
|
|
509
|
+
let rows = [];
|
|
510
|
+
try {
|
|
511
|
+
rows = store.db.prepare(`
|
|
512
|
+
SELECT f.path, f.language,
|
|
513
|
+
(SELECT COUNT(*) FROM imports WHERE from_file_id = f.id) as import_count,
|
|
514
|
+
(SELECT COUNT(*) FROM routes WHERE file_id = f.id) as route_count
|
|
515
|
+
FROM files f
|
|
516
|
+
JOIN domain_assignments da ON da.file_id = f.id
|
|
517
|
+
JOIN domains d ON da.domain_id = d.id
|
|
518
|
+
WHERE d.name = ?
|
|
519
|
+
ORDER BY (route_count > 0) DESC, import_count DESC
|
|
520
|
+
LIMIT 8
|
|
521
|
+
`).all(domain);
|
|
522
|
+
} catch { rows = []; }
|
|
523
|
+
for (const r of rows) {
|
|
524
|
+
if (anchorSet.has(r.path) || seen.has(r.path)) continue;
|
|
525
|
+
seen.add(r.path);
|
|
526
|
+
out.push({
|
|
527
|
+
file: r.path,
|
|
528
|
+
domain,
|
|
529
|
+
imports: r.import_count,
|
|
530
|
+
routes: r.route_count
|
|
531
|
+
});
|
|
532
|
+
if (out.length >= 5) return out;
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
return out;
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
// ─── Top-level entry: planChange ─────────────────────────────────────
|
|
539
|
+
|
|
540
|
+
function planChange(store, intentRaw) {
|
|
541
|
+
const intent = String(intentRaw || '').trim();
|
|
542
|
+
const tokens = tokenize(intent);
|
|
543
|
+
|
|
544
|
+
// Empty corpus / cold start — bail out cleanly
|
|
545
|
+
let fileCount = 0;
|
|
546
|
+
try { fileCount = store && store.getFileCount ? store.getFileCount() : 0; }
|
|
547
|
+
catch { fileCount = 0; }
|
|
548
|
+
if (fileCount === 0) {
|
|
549
|
+
return {
|
|
550
|
+
intent,
|
|
551
|
+
tokens,
|
|
552
|
+
anchors: [],
|
|
553
|
+
filesToTouch: [],
|
|
554
|
+
filesToReview: [],
|
|
555
|
+
blastRadius: [],
|
|
556
|
+
affectedDomains: [],
|
|
557
|
+
crossDomainEdges: [],
|
|
558
|
+
conventions: [],
|
|
559
|
+
guidance: 'Index is empty. Run `carto sync` first.'
|
|
560
|
+
};
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
if (tokens.content.length === 0 && tokens.paths.length === 0) {
|
|
564
|
+
return {
|
|
565
|
+
intent,
|
|
566
|
+
tokens,
|
|
567
|
+
anchors: [],
|
|
568
|
+
filesToTouch: [],
|
|
569
|
+
filesToReview: [],
|
|
570
|
+
blastRadius: [],
|
|
571
|
+
affectedDomains: [],
|
|
572
|
+
crossDomainEdges: [],
|
|
573
|
+
conventions: [],
|
|
574
|
+
guidance: 'No searchable tokens in intent. Try a more specific phrase, or use `get_routes` / `get_domains_list` / `get_high_impact_files` to browse.'
|
|
575
|
+
};
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
const idf = computeIdf(store);
|
|
579
|
+
const anchors = selectAnchors(store, tokens, idf);
|
|
580
|
+
|
|
581
|
+
if (anchors.length === 0) {
|
|
582
|
+
return {
|
|
583
|
+
intent,
|
|
584
|
+
tokens,
|
|
585
|
+
anchors: [],
|
|
586
|
+
filesToTouch: [],
|
|
587
|
+
filesToReview: [],
|
|
588
|
+
blastRadius: [],
|
|
589
|
+
affectedDomains: [],
|
|
590
|
+
crossDomainEdges: [],
|
|
591
|
+
conventions: [],
|
|
592
|
+
guidance: 'No anchor matched. Try `get_routes` to browse routes, `get_domains_list` to explore domains, or `get_high_impact_files` to see central files.'
|
|
593
|
+
};
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
const expansion = expandGraph(store, anchors);
|
|
597
|
+
|
|
598
|
+
const filesToTouch = [
|
|
599
|
+
...new Set([
|
|
600
|
+
...anchors.map(a => a.file),
|
|
601
|
+
...expansion.forwardDeps
|
|
602
|
+
])
|
|
603
|
+
].sort();
|
|
604
|
+
|
|
605
|
+
return {
|
|
606
|
+
intent,
|
|
607
|
+
tokens,
|
|
608
|
+
anchors,
|
|
609
|
+
filesToTouch,
|
|
610
|
+
filesToReview: expansion.backwardDeps,
|
|
611
|
+
blastRadius: expansion.blastRadius,
|
|
612
|
+
affectedDomains: expansion.affectedDomains,
|
|
613
|
+
crossDomainEdges: expansion.crossDomainEdges,
|
|
614
|
+
conventions: expansion.conventions,
|
|
615
|
+
guidance: null
|
|
616
|
+
};
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
// ─── Markdown formatter ──────────────────────────────────────────────
|
|
620
|
+
|
|
621
|
+
function formatPlanMarkdown(plan) {
|
|
622
|
+
const lines = [`# Change Plan: "${plan.intent}"\n`];
|
|
623
|
+
|
|
624
|
+
// Empty / fallback case
|
|
625
|
+
if (!plan.anchors || plan.anchors.length === 0) {
|
|
626
|
+
if (plan.guidance) {
|
|
627
|
+
lines.push(plan.guidance);
|
|
628
|
+
} else {
|
|
629
|
+
lines.push('_No matching routes or files found for this intent._');
|
|
630
|
+
lines.push('Try `get_routes` to browse all routes, or `get_domains_list` to explore by domain.');
|
|
631
|
+
}
|
|
632
|
+
return lines.join('\n');
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
// ── Relevant Routes ───────────────────────────────────────────────
|
|
636
|
+
const routeAnchors = plan.anchors.filter(a => a.kind === 'route');
|
|
637
|
+
if (routeAnchors.length > 0) {
|
|
638
|
+
lines.push('## Relevant Routes\n');
|
|
639
|
+
lines.push('| Method | Path | File | Why |');
|
|
640
|
+
lines.push('|--------|------|------|-----|');
|
|
641
|
+
for (const a of routeAnchors.slice(0, 8)) {
|
|
642
|
+
// value = "METHOD /path"
|
|
643
|
+
const space = a.value.indexOf(' ');
|
|
644
|
+
const method = space > 0 ? a.value.slice(0, space) : '';
|
|
645
|
+
const p = space > 0 ? a.value.slice(space + 1) : a.value;
|
|
646
|
+
lines.push(`| ${method} | ${p} | \`${a.file}\` | ${a.reason} |`);
|
|
647
|
+
}
|
|
648
|
+
lines.push('');
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
// Symbol anchors get their own subsection so users see WHY a file was
|
|
652
|
+
// chosen even when it has no route.
|
|
653
|
+
const symbolAnchors = plan.anchors.filter(a => a.kind === 'symbol');
|
|
654
|
+
if (symbolAnchors.length > 0) {
|
|
655
|
+
lines.push('## Relevant Symbols\n');
|
|
656
|
+
lines.push('| Symbol | File | Why |');
|
|
657
|
+
lines.push('|--------|------|-----|');
|
|
658
|
+
for (const a of symbolAnchors.slice(0, 8)) {
|
|
659
|
+
lines.push(`| \`${a.value}\` | \`${a.file}\` | ${a.reason} |`);
|
|
660
|
+
}
|
|
661
|
+
lines.push('');
|
|
662
|
+
}
|
|
663
|
+
|
|
664
|
+
// ── Files to Touch (anchors + forward 1-hop) ──────────────────────
|
|
665
|
+
if (plan.filesToTouch && plan.filesToTouch.length > 0) {
|
|
666
|
+
lines.push('## Files to Touch\n');
|
|
667
|
+
const anchorFiles = new Set(plan.anchors.map(a => a.file));
|
|
668
|
+
for (const f of plan.filesToTouch) {
|
|
669
|
+
const tag = anchorFiles.has(f) ? ' _(anchor)_' : ' _(forward import)_';
|
|
670
|
+
lines.push(`- \`${f}\`${tag}`);
|
|
671
|
+
}
|
|
672
|
+
lines.push('');
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
// ── Files to Review (Callers) — only when non-empty ───────────────
|
|
676
|
+
if (plan.filesToReview && plan.filesToReview.length > 0) {
|
|
677
|
+
lines.push('## Files to Review (Callers)\n');
|
|
678
|
+
lines.push('_These files import an anchor — verify their behavior after the change:_\n');
|
|
679
|
+
for (const f of plan.filesToReview) lines.push(`- \`${f}\``);
|
|
680
|
+
lines.push('');
|
|
681
|
+
}
|
|
682
|
+
|
|
683
|
+
// ── Affected Domains ──────────────────────────────────────────────
|
|
684
|
+
if (plan.affectedDomains && plan.affectedDomains.length > 0) {
|
|
685
|
+
lines.push('## Affected Domains\n');
|
|
686
|
+
lines.push(plan.affectedDomains.map(d => `**${d}**`).join(', '));
|
|
687
|
+
lines.push('');
|
|
688
|
+
}
|
|
689
|
+
|
|
690
|
+
// ── Blast Radius ──────────────────────────────────────────────────
|
|
691
|
+
if (plan.blastRadius && plan.blastRadius.length > 0) {
|
|
692
|
+
lines.push('## Blast Radius (files that may break)\n');
|
|
693
|
+
lines.push('| File | Hops |');
|
|
694
|
+
lines.push('|------|------|');
|
|
695
|
+
for (const b of plan.blastRadius) {
|
|
696
|
+
lines.push(`| \`${b.file}\` | ${b.hop} |`);
|
|
697
|
+
}
|
|
698
|
+
lines.push('');
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
// ── Cross-Domain Edges — only when non-empty ──────────────────────
|
|
702
|
+
if (plan.crossDomainEdges && plan.crossDomainEdges.length > 0) {
|
|
703
|
+
lines.push('## Cross-Domain Edges\n');
|
|
704
|
+
lines.push('_Anchors touch files across domain boundaries. Audit these carefully:_\n');
|
|
705
|
+
lines.push('| From | From Domain | To | To Domain |');
|
|
706
|
+
lines.push('|------|-------------|-----|----------|');
|
|
707
|
+
for (const e of plan.crossDomainEdges) {
|
|
708
|
+
lines.push(`| \`${e.from}\` | ${e.fromDomain} | \`${e.to}\` | ${e.toDomain} |`);
|
|
709
|
+
}
|
|
710
|
+
lines.push('');
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
// ── Similar Patterns to Follow ────────────────────────────────────
|
|
714
|
+
if (plan.conventions && plan.conventions.length > 0) {
|
|
715
|
+
lines.push('## Similar Patterns to Follow\n');
|
|
716
|
+
lines.push('_Same-domain peers — use these as conventions:_\n');
|
|
717
|
+
for (const c of plan.conventions) {
|
|
718
|
+
lines.push(`- \`${c.file}\` _(${c.domain}, ${c.imports} imports, ${c.routes} routes)_`);
|
|
719
|
+
}
|
|
720
|
+
lines.push('');
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
return lines.join('\n').trimEnd();
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
module.exports = {
|
|
727
|
+
planChange,
|
|
728
|
+
formatPlanMarkdown,
|
|
729
|
+
// Exposed for unit tests
|
|
730
|
+
tokenize,
|
|
731
|
+
pathTokens,
|
|
732
|
+
camelTokens,
|
|
733
|
+
computeIdf,
|
|
734
|
+
selectAnchors,
|
|
735
|
+
expandGraph,
|
|
736
|
+
STOPWORDS
|
|
737
|
+
};
|