carto-md 2.0.1 → 2.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,737 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * change-plan.js — pure module backing the `get_change_plan` MCP tool.
5
+ *
6
+ * Replaces the prior substring-grep implementation with a structural
7
+ * pipeline that uses the real SQLite graph:
8
+ *
9
+ * tokenize(intent) ──► tokens { content, verbs, paths }
10
+ * └──► IDF over indexed corpus (basenames + symbol names)
11
+ * └──► 4-tier anchor selection
12
+ * A. route path/method (searchRoutes)
13
+ * B. file path tokens (pathTokens × IDF)
14
+ * C. exported symbol names (camelTokens × IDF)
15
+ * D. domain name match (domain assignments)
16
+ * └──► graph expansion
17
+ * forward 1-hop imports (getNeighbors)
18
+ * backward 1-hop imports
19
+ * transitive blast radius (getBlastRadius)
20
+ * cross-domain edges
21
+ * conventions (same-domain peers)
22
+ * └──► structured plan
23
+ *
24
+ * formatPlanMarkdown(plan) renders the plan with the historical section
25
+ * headers preserved (`## Relevant Routes`, `## Files to Touch`,
26
+ * `## Affected Domains`, `## Blast Radius`, `## Similar Patterns to Follow`)
27
+ * plus new optional sections `## Files to Review (Callers)` and
28
+ * `## Cross-Domain Edges` when non-empty.
29
+ */
30
+
31
+ // ─── Tokenization ─────────────────────────────────────────────────────
32
+
33
+ // Common English/dev prose stopwords that flood matches if kept.
34
+ // Note: deliberately does NOT contain meaningful 3-char dev tokens like
35
+ // "log", "url", "csv", "jwt", "ssl", "mcp", "sql", "api", "git", "env".
36
+ // Filter those out via IDF, not by length.
37
+ const STOPWORDS = new Set([
38
+ 'the','and','for','with','from','into','that','this','your','you','have',
39
+ 'will','would','should','could','can','add','fix','make','use','using',
40
+ 'when','then','want','need','get','set','put','let','what','how',
41
+ 'why','who','where','these','those','some','also','only','just',
42
+ 'every','each','any','all','they','their','them','our','its','it','to',
43
+ 'in','on','of','as','at','is','an','be','do','if','or','no','not','but',
44
+ 'by','via','about','around','my','me','we','us','was','were','been',
45
+ 'so','such','than','too','very','more','most','less','least'
46
+ ]);
47
+
48
+ const HTTP_VERBS = ['get','post','put','patch','delete','head','options'];
49
+ const VERB_RE = new RegExp(`\\b(${HTTP_VERBS.join('|')})\\b`, 'g');
50
+ const PATH_RE = /\/[a-z0-9_\-\/{}:]+/g;
51
+
52
+ // Known dev abbreviations of length 3 that participate in prefix matching
53
+ // against longer pathTokens. Length-4+ tokens always prefix-match.
54
+ // Without this allowlist, "sit" would prefix-match "sitter" in
55
+ // "tree-sitter-parser.js" and cause false-positive anchors.
56
+ const ABBREV3 = new Set([
57
+ 'sql', 'jwt', 'mcp', 'csv', 'ssl', 'api', 'env', 'orm',
58
+ 'dns', 'jpa', 'tcp', 'udp', 'xml', 'oop', 'cli', 'cdn',
59
+ 'aws', 'gcp', 'kms', 'iam', 's3', 'rpc', 'dao', 'dto'
60
+ ]);
61
+
62
+ const MAX_CONTENT_TOKENS = 50;
63
+
64
+ function tokenize(intent) {
65
+ if (!intent || typeof intent !== 'string') {
66
+ return { content: [], verbs: [], paths: [] };
67
+ }
68
+ const lower = intent.toLowerCase();
69
+
70
+ // 1. URL-path-like tokens (captured first so we can strip them
71
+ // before verb extraction — avoids "post" inside "/api/post"
72
+ // being mis-detected as the HTTP verb).
73
+ const paths = [...new Set(lower.match(PATH_RE) || [])];
74
+ const stripped = lower.replace(PATH_RE, ' ');
75
+
76
+ // 2. HTTP verb detection over the path-stripped text.
77
+ const verbMatches = stripped.match(VERB_RE) || [];
78
+ const verbs = [...new Set(verbMatches.map(v => v.toUpperCase()))];
79
+
80
+ // 3. Content tokens. Two sources, merged & deduped:
81
+ // a) the path-stripped intent split on non-alphanumerics
82
+ // b) the inner segments of each captured path
83
+ // Stopwords are dropped, length ≥ 2 is kept (so `log`, `mcp`,
84
+ // `sql`, `jwt`, `csv`, `api` survive — they're meaningful).
85
+ const seen = new Set();
86
+ const content = [];
87
+ function pushToken(t) {
88
+ if (!t || t.length < 2) return;
89
+ if (STOPWORDS.has(t)) return;
90
+ if (seen.has(t)) return;
91
+ seen.add(t);
92
+ content.push(t);
93
+ }
94
+ for (const t of stripped.split(/[^a-z0-9]+/)) {
95
+ if (content.length >= MAX_CONTENT_TOKENS) break;
96
+ pushToken(t);
97
+ }
98
+ for (const p of paths) {
99
+ if (content.length >= MAX_CONTENT_TOKENS) break;
100
+ for (const seg of p.split(/[^a-z0-9]+/)) {
101
+ if (content.length >= MAX_CONTENT_TOKENS) break;
102
+ pushToken(seg);
103
+ }
104
+ }
105
+
106
+ return { content, verbs, paths };
107
+ }
108
+
109
+ // ─── Path / symbol token extraction ──────────────────────────────────
110
+
111
+ function pathTokens(filePath) {
112
+ if (!filePath) return [];
113
+ const lower = filePath.toLowerCase();
114
+ // Split on path separators, dots, dashes, underscores; then split
115
+ // each segment on camelCase boundaries (using the original-case form)
116
+ // so e.g. "rateLimitMiddleware.ts" → rate, limit, middleware, ts.
117
+ const segments = filePath.split(/[\/\.\-_]/);
118
+ const out = new Set();
119
+ for (const seg of segments) {
120
+ if (!seg) continue;
121
+ // Split camelCase: insert space before uppercase that follows lowercase
122
+ const camelParts = seg.split(/(?<=[a-z0-9])(?=[A-Z])/);
123
+ for (const p of camelParts) {
124
+ const t = p.toLowerCase();
125
+ if (t) out.add(t);
126
+ }
127
+ }
128
+ // Also include each lowercased segment as-is so plain "store" still
129
+ // matches even if the segment has no camel boundary.
130
+ for (const seg of lower.split(/[\/\.\-_]/)) {
131
+ if (seg) out.add(seg);
132
+ }
133
+ return [...out];
134
+ }
135
+
136
+ function camelTokens(name) {
137
+ if (!name) return [];
138
+ // Split on camelCase boundaries plus _ and -
139
+ const parts = name.split(/(?<=[a-z0-9])(?=[A-Z])|[_\-]/);
140
+ const out = new Set();
141
+ for (const p of parts) {
142
+ const t = p.toLowerCase();
143
+ if (t) out.add(t);
144
+ }
145
+ // Also include the full lowercased name (for snake_case names that
146
+ // matched as one segment after the split above).
147
+ const full = name.toLowerCase();
148
+ if (full && !/[_\-]/.test(name) && !/[a-z][A-Z]/.test(name)) out.add(full);
149
+ return [...out];
150
+ }
151
+
152
+ // ─── IDF over indexed corpus ─────────────────────────────────────────
153
+
154
+ /**
155
+ * Compute IDF weights over file basenames + path tokens + exported
156
+ * symbol names. Common tokens like "src", "store", "file", "index" get
157
+ * low weight; rare tokens like "rate", "throttle", "jwt" get high weight.
158
+ *
159
+ * Returns Map<token, weight>. Unknown tokens default to 1 at lookup time.
160
+ */
161
+ function computeIdf(store) {
162
+ const built = buildCorpusIndex(store);
163
+ return built.idf;
164
+ }
165
+
166
+ /**
167
+ * buildCorpusIndex(store)
168
+ * → { idf, files: [{ id, path, language, tokenSet }],
169
+ * symbols: [{ name, path, tokenSet }] }
170
+ *
171
+ * Memoized on the store object. On a 5K-file repo this saves ~30ms per
172
+ * `planChange` call (without it, p95 on cal.com sat at 60ms — over the
173
+ * spec's 50ms target). Re-indexing creates a new store instance, so
174
+ * the cache lives only as long as the index it was built from.
175
+ */
176
+ const CACHE_KEY = '__cartoChangePlanCache';
177
+
178
+ function buildCorpusIndex(store) {
179
+ if (!store) return { idf: new Map(), files: [], symbols: [] };
180
+ // Use the schema_version + last_full_sync as a coarse cache key —
181
+ // when the index is rebuilt, last_full_sync changes, busting the cache.
182
+ let stamp = '';
183
+ try { stamp = (store.getMeta && store.getMeta('last_full_sync')) || ''; } catch {}
184
+
185
+ if (store[CACHE_KEY] && store[CACHE_KEY].stamp === stamp) {
186
+ return store[CACHE_KEY].value;
187
+ }
188
+
189
+ const docs = [];
190
+ const files = [];
191
+ let allFiles = [];
192
+ try { allFiles = store.getAllFiles(); } catch { allFiles = []; }
193
+ for (const f of allFiles) {
194
+ if (!f || !f.path) continue;
195
+ const tokens = pathTokens(f.path);
196
+ const tokenSet = new Set(tokens);
197
+ files.push({ id: f.id, path: f.path, language: f.language, tokens, tokenSet });
198
+ docs.push(tokens);
199
+ }
200
+
201
+ const symbols = [];
202
+ let symRows = [];
203
+ try {
204
+ if (store.db) {
205
+ symRows = store.db.prepare(`
206
+ SELECT s.name, f.path
207
+ FROM symbols s JOIN files f ON s.file_id = f.id
208
+ WHERE s.exported = 1
209
+ `).all();
210
+ }
211
+ } catch { symRows = []; }
212
+ for (const s of symRows) {
213
+ if (!s || !s.name) continue;
214
+ const tokens = camelTokens(s.name);
215
+ symbols.push({ name: s.name, path: s.path, tokenSet: new Set(tokens) });
216
+ docs.push(tokens);
217
+ }
218
+
219
+ const df = new Map();
220
+ for (const tokens of docs) {
221
+ for (const t of new Set(tokens)) {
222
+ df.set(t, (df.get(t) || 0) + 1);
223
+ }
224
+ }
225
+ const N = docs.length || 1;
226
+ const idf = new Map();
227
+ for (const [t, n] of df) {
228
+ idf.set(t, Math.log((N + 1) / (n + 1)) + 1);
229
+ }
230
+
231
+ const value = { idf, files, symbols };
232
+ // Stash the cache on the store. Non-enumerable so it doesn't leak
233
+ // through Object.keys / JSON.stringify if the store is serialized.
234
+ try {
235
+ Object.defineProperty(store, CACHE_KEY, {
236
+ value: { stamp, value },
237
+ writable: true, configurable: true, enumerable: false
238
+ });
239
+ } catch {}
240
+ return value;
241
+ }
242
+
243
+ function idfWeight(idf, token) {
244
+ if (!idf || !idf.has(token)) return 1;
245
+ return idf.get(token);
246
+ }
247
+
248
+ // ─── Anchor selection ────────────────────────────────────────────────
249
+
250
+ /**
251
+ * Build anchors from four signal sources, dedupe by file (keeping the
252
+ * highest-scoring kind per file but recording all reasons), and return
253
+ * up to `maxAnchors` entries sorted by score descending.
254
+ */
255
+ function selectAnchors(store, tokens, idf, maxAnchors = 8) {
256
+ const anchors = [];
257
+ // Reuse the cached corpus index — saves ~30ms p95 on cal.com.
258
+ const corpus = buildCorpusIndex(store);
259
+
260
+ // ── Tier A — route path/method ────────────────────────────────────
261
+ // Use searchRoutes for each detected URL-path-like token. Filter by
262
+ // verb when one was extracted.
263
+ const routesSeen = new Set();
264
+ for (const p of tokens.paths) {
265
+ let routes = [];
266
+ try { routes = store.searchRoutes(p) || []; } catch { routes = []; }
267
+ for (const r of routes) {
268
+ const key = `${r.method} ${r.path} ${r.file}`;
269
+ if (routesSeen.has(key)) continue;
270
+ routesSeen.add(key);
271
+ const methodOk = tokens.verbs.length === 0 || tokens.verbs.includes(r.method);
272
+ if (!methodOk) continue;
273
+ anchors.push({
274
+ kind: 'route',
275
+ value: `${r.method} ${r.path}`,
276
+ file: r.file,
277
+ score: 100,
278
+ reason: `route path matches "${p}"`
279
+ });
280
+ }
281
+ }
282
+
283
+ // Also try matching each content token against route paths — catches
284
+ // intents like "users endpoint" that don't carry a "/path".
285
+ if (tokens.paths.length === 0 && tokens.content.length > 0) {
286
+ for (const t of tokens.content) {
287
+ if (t.length < 3) continue; // avoid 2-char route flooding
288
+ let routes = [];
289
+ try { routes = store.searchRoutes(t) || []; } catch { routes = []; }
290
+ for (const r of routes) {
291
+ const key = `${r.method} ${r.path} ${r.file}`;
292
+ if (routesSeen.has(key)) continue;
293
+ routesSeen.add(key);
294
+ const methodOk = tokens.verbs.length === 0 || tokens.verbs.includes(r.method);
295
+ if (!methodOk) continue;
296
+ anchors.push({
297
+ kind: 'route',
298
+ value: `${r.method} ${r.path}`,
299
+ file: r.file,
300
+ score: 60 * idfWeight(idf, t),
301
+ reason: `route path contains "${t}"`
302
+ });
303
+ }
304
+ }
305
+ }
306
+
307
+ // ── Tier B — file path tokens (IDF-weighted) ──────────────────────
308
+ for (const f of corpus.files) {
309
+ let score = 0;
310
+ const hits = [];
311
+ const partialHits = [];
312
+ for (const t of tokens.content) {
313
+ if (f.tokenSet.has(t)) {
314
+ score += 30 * idfWeight(idf, t);
315
+ hits.push(t);
316
+ } else if (t.length >= 4 || (t.length === 3 && ABBREV3.has(t))) {
317
+ // Prefix-match fallback — e.g. "sql" ⊂ "sqlite",
318
+ // "auth" ⊂ "authentication". Score weakly. 3-char tokens
319
+ // must be on the dev-abbreviation allowlist to avoid noise
320
+ // (e.g. "sit" should NOT match "sitter").
321
+ const matched = f.tokens.find(pt => pt.length > t.length && pt.startsWith(t));
322
+ if (matched) {
323
+ score += 10 * idfWeight(idf, t);
324
+ partialHits.push(`${t}~${matched}`);
325
+ }
326
+ }
327
+ }
328
+ if (score > 0) {
329
+ const reasonParts = [];
330
+ if (hits.length) reasonParts.push(`path tokens match: ${hits.join(', ')}`);
331
+ if (partialHits.length) reasonParts.push(`prefix match: ${partialHits.join(', ')}`);
332
+ anchors.push({
333
+ kind: 'file',
334
+ value: f.path,
335
+ file: f.path,
336
+ score,
337
+ reason: reasonParts.join('; ')
338
+ });
339
+ }
340
+ }
341
+
342
+ // ── Tier C — exported symbol names (camelCase split + IDF) ────────
343
+ for (const s of corpus.symbols) {
344
+ let score = 0;
345
+ const hits = [];
346
+ for (const t of tokens.content) {
347
+ if (s.tokenSet.has(t)) {
348
+ score += 25 * idfWeight(idf, t);
349
+ hits.push(t);
350
+ }
351
+ }
352
+ if (score > 0) {
353
+ anchors.push({
354
+ kind: 'symbol',
355
+ value: s.name,
356
+ file: s.path,
357
+ score,
358
+ reason: `symbol "${s.name}" contains: ${hits.join(', ')}`
359
+ });
360
+ }
361
+ }
362
+
363
+ // ── Tier D — domain name match ────────────────────────────────────
364
+ let domains = [];
365
+ try { domains = store.getDomainsList() || []; } catch { domains = []; }
366
+ for (const d of domains) {
367
+ const dlow = (d.name || '').toLowerCase();
368
+ if (!dlow) continue;
369
+ const matches = tokens.content.filter(t => dlow === t || dlow.includes(t) || t.includes(dlow));
370
+ if (matches.length === 0) continue;
371
+ let domainData = null;
372
+ try { domainData = store.getDomain(d.name); } catch { domainData = null; }
373
+ if (!domainData) continue;
374
+ for (const file of (domainData.files || []).slice(0, 3)) {
375
+ anchors.push({
376
+ kind: 'domain',
377
+ value: d.name,
378
+ file,
379
+ score: 15,
380
+ reason: `domain "${d.name}" matches: ${matches.join(', ')}`
381
+ });
382
+ }
383
+ }
384
+
385
+ // ── Dedupe by file: keep highest-scoring entry, accumulate reasons ─
386
+ const byFile = new Map();
387
+ anchors.sort((a, b) => b.score - a.score);
388
+ for (const a of anchors) {
389
+ const cur = byFile.get(a.file);
390
+ if (!cur) {
391
+ byFile.set(a.file, { ...a, reasons: [a.reason] });
392
+ } else if (!cur.reasons.includes(a.reason)) {
393
+ cur.reasons.push(a.reason);
394
+ }
395
+ }
396
+ const out = [...byFile.values()]
397
+ .sort((a, b) => b.score - a.score)
398
+ .slice(0, maxAnchors);
399
+ // Project a single 'reason' string for backward-compat with tests/log
400
+ for (const a of out) {
401
+ a.reason = a.reasons.join(' | ');
402
+ }
403
+ return out;
404
+ }
405
+
406
+ // ─── Graph expansion ─────────────────────────────────────────────────
407
+
408
+ function expandGraph(store, anchors, opts = {}) {
409
+ const maxBlast = opts.maxBlast || 25;
410
+ const maxBlastHops = opts.maxBlastHops || 5;
411
+ const anchorFiles = anchors.map(a => a.file);
412
+ const anchorSet = new Set(anchorFiles);
413
+
414
+ // Forward 1-hop — files anchors import
415
+ const forwardDeps = new Set();
416
+ // Backward 1-hop — files that import anchors
417
+ const backwardDeps = new Set();
418
+ // Cross-domain edges introduced when walking the 1-hop neighborhood
419
+ const crossDomainEdges = [];
420
+
421
+ for (const af of anchorFiles) {
422
+ let neighbors = { nodes: [], edges: [] };
423
+ try { neighbors = store.getNeighbors(af, 1) || neighbors; } catch {}
424
+ let aDomain = null;
425
+ try { aDomain = store.getDomainForFile(af); } catch {}
426
+
427
+ for (const e of neighbors.edges) {
428
+ if (e.source === af && e.target !== af) forwardDeps.add(e.target);
429
+ if (e.target === af && e.source !== af) backwardDeps.add(e.source);
430
+ }
431
+ // Cross-domain detection
432
+ if (aDomain) {
433
+ for (const n of neighbors.nodes) {
434
+ if (n.id === af) continue;
435
+ const nDomain = n.domain;
436
+ if (nDomain && nDomain !== aDomain) {
437
+ crossDomainEdges.push({
438
+ from: af,
439
+ to: n.id,
440
+ fromDomain: aDomain,
441
+ toDomain: nDomain
442
+ });
443
+ }
444
+ }
445
+ }
446
+ }
447
+
448
+ // Transitive blast radius — merge per-anchor results
449
+ const blastByFile = new Map();
450
+ for (const af of anchorFiles.slice(0, 5)) {
451
+ let radius = [];
452
+ try { radius = store.getBlastRadius(af, maxBlastHops) || []; } catch {}
453
+ for (const r of radius) {
454
+ if (anchorSet.has(r.file)) continue; // anchors aren't blast targets
455
+ const cur = blastByFile.get(r.file);
456
+ if (cur === undefined || r.hop_distance < cur) {
457
+ blastByFile.set(r.file, r.hop_distance);
458
+ }
459
+ }
460
+ }
461
+ const blastRadius = [...blastByFile.entries()]
462
+ .map(([file, hop]) => ({ file, hop }))
463
+ .sort((a, b) => a.hop - b.hop || a.file.localeCompare(b.file))
464
+ .slice(0, maxBlast);
465
+
466
+ // Affected domains — anchors + 1-hop neighborhood
467
+ const affectedDomains = new Set();
468
+ const allInScope = new Set([...anchorFiles, ...forwardDeps, ...backwardDeps]);
469
+ for (const af of allInScope) {
470
+ let d = null;
471
+ try { d = store.getDomainForFile(af); } catch {}
472
+ if (d) affectedDomains.add(d);
473
+ }
474
+
475
+ // Conventions — same-domain peers with similar import/route count
476
+ const conventions = findConventions(store, anchorFiles, [...affectedDomains]);
477
+
478
+ // Dedupe cross-domain edges
479
+ const cdSeen = new Set();
480
+ const crossDomainDedup = [];
481
+ for (const e of crossDomainEdges) {
482
+ const k = `${e.from}->${e.to}`;
483
+ if (cdSeen.has(k)) continue;
484
+ cdSeen.add(k);
485
+ crossDomainDedup.push(e);
486
+ }
487
+
488
+ return {
489
+ forwardDeps: [...forwardDeps].sort(),
490
+ backwardDeps: [...backwardDeps].sort(),
491
+ blastRadius,
492
+ affectedDomains: [...affectedDomains].sort(),
493
+ crossDomainEdges: crossDomainDedup,
494
+ conventions
495
+ };
496
+ }
497
+
498
+ /**
499
+ * findConventions — same-domain peer files with comparable shape.
500
+ * Returns up to 5 files. Mirrors small bits of get_similar_patterns.
501
+ */
502
+ function findConventions(store, anchorFiles, domains) {
503
+ if (!store || !store.db || !anchorFiles.length || !domains.length) return [];
504
+ const anchorSet = new Set(anchorFiles);
505
+ const out = [];
506
+ const seen = new Set();
507
+
508
+ for (const domain of domains.slice(0, 3)) {
509
+ let rows = [];
510
+ try {
511
+ rows = store.db.prepare(`
512
+ SELECT f.path, f.language,
513
+ (SELECT COUNT(*) FROM imports WHERE from_file_id = f.id) as import_count,
514
+ (SELECT COUNT(*) FROM routes WHERE file_id = f.id) as route_count
515
+ FROM files f
516
+ JOIN domain_assignments da ON da.file_id = f.id
517
+ JOIN domains d ON da.domain_id = d.id
518
+ WHERE d.name = ?
519
+ ORDER BY (route_count > 0) DESC, import_count DESC
520
+ LIMIT 8
521
+ `).all(domain);
522
+ } catch { rows = []; }
523
+ for (const r of rows) {
524
+ if (anchorSet.has(r.path) || seen.has(r.path)) continue;
525
+ seen.add(r.path);
526
+ out.push({
527
+ file: r.path,
528
+ domain,
529
+ imports: r.import_count,
530
+ routes: r.route_count
531
+ });
532
+ if (out.length >= 5) return out;
533
+ }
534
+ }
535
+ return out;
536
+ }
537
+
538
+ // ─── Top-level entry: planChange ─────────────────────────────────────
539
+
540
+ function planChange(store, intentRaw) {
541
+ const intent = String(intentRaw || '').trim();
542
+ const tokens = tokenize(intent);
543
+
544
+ // Empty corpus / cold start — bail out cleanly
545
+ let fileCount = 0;
546
+ try { fileCount = store && store.getFileCount ? store.getFileCount() : 0; }
547
+ catch { fileCount = 0; }
548
+ if (fileCount === 0) {
549
+ return {
550
+ intent,
551
+ tokens,
552
+ anchors: [],
553
+ filesToTouch: [],
554
+ filesToReview: [],
555
+ blastRadius: [],
556
+ affectedDomains: [],
557
+ crossDomainEdges: [],
558
+ conventions: [],
559
+ guidance: 'Index is empty. Run `carto sync` first.'
560
+ };
561
+ }
562
+
563
+ if (tokens.content.length === 0 && tokens.paths.length === 0) {
564
+ return {
565
+ intent,
566
+ tokens,
567
+ anchors: [],
568
+ filesToTouch: [],
569
+ filesToReview: [],
570
+ blastRadius: [],
571
+ affectedDomains: [],
572
+ crossDomainEdges: [],
573
+ conventions: [],
574
+ guidance: 'No searchable tokens in intent. Try a more specific phrase, or use `get_routes` / `get_domains_list` / `get_high_impact_files` to browse.'
575
+ };
576
+ }
577
+
578
+ const idf = computeIdf(store);
579
+ const anchors = selectAnchors(store, tokens, idf);
580
+
581
+ if (anchors.length === 0) {
582
+ return {
583
+ intent,
584
+ tokens,
585
+ anchors: [],
586
+ filesToTouch: [],
587
+ filesToReview: [],
588
+ blastRadius: [],
589
+ affectedDomains: [],
590
+ crossDomainEdges: [],
591
+ conventions: [],
592
+ guidance: 'No anchor matched. Try `get_routes` to browse routes, `get_domains_list` to explore domains, or `get_high_impact_files` to see central files.'
593
+ };
594
+ }
595
+
596
+ const expansion = expandGraph(store, anchors);
597
+
598
+ const filesToTouch = [
599
+ ...new Set([
600
+ ...anchors.map(a => a.file),
601
+ ...expansion.forwardDeps
602
+ ])
603
+ ].sort();
604
+
605
+ return {
606
+ intent,
607
+ tokens,
608
+ anchors,
609
+ filesToTouch,
610
+ filesToReview: expansion.backwardDeps,
611
+ blastRadius: expansion.blastRadius,
612
+ affectedDomains: expansion.affectedDomains,
613
+ crossDomainEdges: expansion.crossDomainEdges,
614
+ conventions: expansion.conventions,
615
+ guidance: null
616
+ };
617
+ }
618
+
619
+ // ─── Markdown formatter ──────────────────────────────────────────────
620
+
621
+ function formatPlanMarkdown(plan) {
622
+ const lines = [`# Change Plan: "${plan.intent}"\n`];
623
+
624
+ // Empty / fallback case
625
+ if (!plan.anchors || plan.anchors.length === 0) {
626
+ if (plan.guidance) {
627
+ lines.push(plan.guidance);
628
+ } else {
629
+ lines.push('_No matching routes or files found for this intent._');
630
+ lines.push('Try `get_routes` to browse all routes, or `get_domains_list` to explore by domain.');
631
+ }
632
+ return lines.join('\n');
633
+ }
634
+
635
+ // ── Relevant Routes ───────────────────────────────────────────────
636
+ const routeAnchors = plan.anchors.filter(a => a.kind === 'route');
637
+ if (routeAnchors.length > 0) {
638
+ lines.push('## Relevant Routes\n');
639
+ lines.push('| Method | Path | File | Why |');
640
+ lines.push('|--------|------|------|-----|');
641
+ for (const a of routeAnchors.slice(0, 8)) {
642
+ // value = "METHOD /path"
643
+ const space = a.value.indexOf(' ');
644
+ const method = space > 0 ? a.value.slice(0, space) : '';
645
+ const p = space > 0 ? a.value.slice(space + 1) : a.value;
646
+ lines.push(`| ${method} | ${p} | \`${a.file}\` | ${a.reason} |`);
647
+ }
648
+ lines.push('');
649
+ }
650
+
651
+ // Symbol anchors get their own subsection so users see WHY a file was
652
+ // chosen even when it has no route.
653
+ const symbolAnchors = plan.anchors.filter(a => a.kind === 'symbol');
654
+ if (symbolAnchors.length > 0) {
655
+ lines.push('## Relevant Symbols\n');
656
+ lines.push('| Symbol | File | Why |');
657
+ lines.push('|--------|------|-----|');
658
+ for (const a of symbolAnchors.slice(0, 8)) {
659
+ lines.push(`| \`${a.value}\` | \`${a.file}\` | ${a.reason} |`);
660
+ }
661
+ lines.push('');
662
+ }
663
+
664
+ // ── Files to Touch (anchors + forward 1-hop) ──────────────────────
665
+ if (plan.filesToTouch && plan.filesToTouch.length > 0) {
666
+ lines.push('## Files to Touch\n');
667
+ const anchorFiles = new Set(plan.anchors.map(a => a.file));
668
+ for (const f of plan.filesToTouch) {
669
+ const tag = anchorFiles.has(f) ? ' _(anchor)_' : ' _(forward import)_';
670
+ lines.push(`- \`${f}\`${tag}`);
671
+ }
672
+ lines.push('');
673
+ }
674
+
675
+ // ── Files to Review (Callers) — only when non-empty ───────────────
676
+ if (plan.filesToReview && plan.filesToReview.length > 0) {
677
+ lines.push('## Files to Review (Callers)\n');
678
+ lines.push('_These files import an anchor — verify their behavior after the change:_\n');
679
+ for (const f of plan.filesToReview) lines.push(`- \`${f}\``);
680
+ lines.push('');
681
+ }
682
+
683
+ // ── Affected Domains ──────────────────────────────────────────────
684
+ if (plan.affectedDomains && plan.affectedDomains.length > 0) {
685
+ lines.push('## Affected Domains\n');
686
+ lines.push(plan.affectedDomains.map(d => `**${d}**`).join(', '));
687
+ lines.push('');
688
+ }
689
+
690
+ // ── Blast Radius ──────────────────────────────────────────────────
691
+ if (plan.blastRadius && plan.blastRadius.length > 0) {
692
+ lines.push('## Blast Radius (files that may break)\n');
693
+ lines.push('| File | Hops |');
694
+ lines.push('|------|------|');
695
+ for (const b of plan.blastRadius) {
696
+ lines.push(`| \`${b.file}\` | ${b.hop} |`);
697
+ }
698
+ lines.push('');
699
+ }
700
+
701
+ // ── Cross-Domain Edges — only when non-empty ──────────────────────
702
+ if (plan.crossDomainEdges && plan.crossDomainEdges.length > 0) {
703
+ lines.push('## Cross-Domain Edges\n');
704
+ lines.push('_Anchors touch files across domain boundaries. Audit these carefully:_\n');
705
+ lines.push('| From | From Domain | To | To Domain |');
706
+ lines.push('|------|-------------|-----|----------|');
707
+ for (const e of plan.crossDomainEdges) {
708
+ lines.push(`| \`${e.from}\` | ${e.fromDomain} | \`${e.to}\` | ${e.toDomain} |`);
709
+ }
710
+ lines.push('');
711
+ }
712
+
713
+ // ── Similar Patterns to Follow ────────────────────────────────────
714
+ if (plan.conventions && plan.conventions.length > 0) {
715
+ lines.push('## Similar Patterns to Follow\n');
716
+ lines.push('_Same-domain peers — use these as conventions:_\n');
717
+ for (const c of plan.conventions) {
718
+ lines.push(`- \`${c.file}\` _(${c.domain}, ${c.imports} imports, ${c.routes} routes)_`);
719
+ }
720
+ lines.push('');
721
+ }
722
+
723
+ return lines.join('\n').trimEnd();
724
+ }
725
+
726
+ module.exports = {
727
+ planChange,
728
+ formatPlanMarkdown,
729
+ // Exposed for unit tests
730
+ tokenize,
731
+ pathTokens,
732
+ camelTokens,
733
+ computeIdf,
734
+ selectAnchors,
735
+ expandGraph,
736
+ STOPWORDS
737
+ };