ucn 3.8.23 → 3.8.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/.claude/skills/ucn/SKILL.md +127 -12
  2. package/README.md +152 -156
  3. package/cli/index.js +363 -37
  4. package/core/analysis.js +936 -32
  5. package/core/bridge.js +1095 -0
  6. package/core/brief.js +408 -0
  7. package/core/cache.js +105 -5
  8. package/core/callers.js +72 -18
  9. package/core/check.js +200 -0
  10. package/core/discovery.js +57 -34
  11. package/core/entrypoints.js +638 -4
  12. package/core/execute.js +304 -5
  13. package/core/git-enrich.js +130 -0
  14. package/core/graph.js +24 -2
  15. package/core/output/analysis.js +157 -25
  16. package/core/output/brief.js +100 -0
  17. package/core/output/check.js +79 -0
  18. package/core/output/doctor.js +85 -0
  19. package/core/output/endpoints.js +239 -0
  20. package/core/output/extraction.js +2 -0
  21. package/core/output/find.js +126 -39
  22. package/core/output/graph.js +48 -15
  23. package/core/output/refactoring.js +103 -5
  24. package/core/output/reporting.js +63 -23
  25. package/core/output/search.js +110 -17
  26. package/core/output/shared.js +56 -2
  27. package/core/output.js +4 -0
  28. package/core/parser.js +8 -2
  29. package/core/project.js +39 -3
  30. package/core/registry.js +30 -14
  31. package/core/reporting.js +465 -2
  32. package/core/search.js +130 -52
  33. package/core/shared.js +101 -5
  34. package/core/tracing.js +16 -6
  35. package/core/verify.js +982 -95
  36. package/languages/go.js +91 -6
  37. package/languages/html.js +10 -0
  38. package/languages/java.js +151 -35
  39. package/languages/javascript.js +290 -33
  40. package/languages/python.js +78 -11
  41. package/languages/rust.js +267 -12
  42. package/languages/utils.js +315 -3
  43. package/mcp/server.js +91 -16
  44. package/package.json +9 -1
package/core/brief.js ADDED
@@ -0,0 +1,408 @@
1
+ /**
2
+ * core/brief.js — Brief: AST-only one-screen summary for a symbol.
3
+ *
4
+ * Returns a compact "before-I-touch-this" snapshot:
5
+ * - typed signature
6
+ * - first-sentence docstring
7
+ * - side-effect classification (fs/network/global mutation/process)
8
+ * - complexity (branches, maxDepth, lineCount)
9
+ * - async/generator flags
10
+ *
11
+ * No LLM, no heuristics that pretend to "summarize" intent.
12
+ * Everything here is derivable from the AST and existing symbol fields.
13
+ */
14
+
15
+ 'use strict';
16
+
17
+ const fs = require('fs');
18
+ const path = require('path');
19
+ const { parse } = require('./parser');
20
+ const { detectLanguage, langTraits } = require('../languages');
21
+ const { formatSymbolHandle } = require('./shared');
22
+
23
+ // ============================================================================
24
+ // Side-effect signal sets (per-language, conservative)
25
+ // ============================================================================
26
+
27
+ // Module/import names that signal a category.
28
+ // Keys are language; values are { fs: Set, network: Set, process: Set }.
29
+ const SIDE_EFFECT_IMPORTS = {
30
+ javascript: {
31
+ fs: new Set(['fs', 'fs/promises', 'graceful-fs', 'node:fs', 'node:fs/promises']),
32
+ network: new Set(['http', 'https', 'net', 'tls', 'dgram', 'axios', 'node-fetch', 'got', 'undici', 'ws', 'node:http', 'node:https', 'node:net']),
33
+ process: new Set(['child_process', 'cluster', 'worker_threads', 'os', 'node:child_process', 'node:cluster', 'node:worker_threads', 'node:os']),
34
+ },
35
+ typescript: {
36
+ fs: new Set(['fs', 'fs/promises', 'graceful-fs', 'node:fs', 'node:fs/promises']),
37
+ network: new Set(['http', 'https', 'net', 'tls', 'dgram', 'axios', 'node-fetch', 'got', 'undici', 'ws', 'node:http', 'node:https', 'node:net']),
38
+ process: new Set(['child_process', 'cluster', 'worker_threads', 'os', 'node:child_process', 'node:cluster', 'node:worker_threads', 'node:os']),
39
+ },
40
+ python: {
41
+ fs: new Set(['os', 'os.path', 'pathlib', 'shutil', 'tempfile', 'io']),
42
+ network: new Set(['urllib', 'urllib.request', 'http', 'http.client', 'socket', 'requests', 'httpx', 'aiohttp']),
43
+ process: new Set(['subprocess', 'multiprocessing', 'os', 'signal', 'threading']),
44
+ },
45
+ go: {
46
+ fs: new Set(['os', 'io', 'io/ioutil', 'path/filepath', 'embed']),
47
+ network: new Set(['net', 'net/http', 'net/url', 'net/rpc']),
48
+ process: new Set(['os/exec', 'syscall', 'runtime']),
49
+ },
50
+ java: {
51
+ fs: new Set(['java.io', 'java.nio', 'java.nio.file']),
52
+ network: new Set(['java.net', 'java.net.http']),
53
+ process: new Set(['java.lang.Runtime', 'java.lang.ProcessBuilder']),
54
+ },
55
+ rust: {
56
+ fs: new Set(['std::fs', 'std::path']),
57
+ network: new Set(['std::net', 'reqwest', 'hyper', 'tokio::net']),
58
+ process: new Set(['std::process']),
59
+ },
60
+ };
61
+
62
+ // Identifier names that signal side effects when called or referenced.
63
+ // Plain identifier match — not regex. We require the call to be the receiver-less form
64
+ // (e.g. `fetch(...)`) OR a member of a recognized object (`fs.readFile`).
65
+ const SIDE_EFFECT_CALLS_BY_LANG = {
66
+ javascript: {
67
+ network: new Set(['fetch', 'XMLHttpRequest']),
68
+ // Top-level browser globals that mutate state
69
+ process: new Set(['exit']),
70
+ },
71
+ typescript: {
72
+ network: new Set(['fetch', 'XMLHttpRequest']),
73
+ process: new Set(['exit']),
74
+ },
75
+ python: {
76
+ fs: new Set(['open']),
77
+ process: new Set(['exit', 'system']),
78
+ },
79
+ // Nominal languages: imports already give a strong signal (e.g. `java.io`),
80
+ // and direct-call tokens like `exit` would cause false positives across
81
+ // generic identifiers. We deliberately keep this empty.
82
+ go: {},
83
+ java: {},
84
+ rust: {},
85
+ };
86
+
87
+ // ============================================================================
88
+ // brief()
89
+ // ============================================================================
90
+
91
+ /**
92
+ * Compute a brief AST summary for a symbol.
93
+ *
94
+ * @param {object} index - ProjectIndex
95
+ * @param {string} name - Symbol name (function/method/class)
96
+ * @param {object} options - { file, className, git }
97
+ * @returns {object|null}
98
+ */
99
+ function brief(index, name, options = {}) {
100
+ index._beginOp();
101
+ try {
102
+ const { def } = index.resolveSymbol(name, { file: options.file, className: options.className, line: options.line });
103
+ if (!def) return null;
104
+
105
+ const language = detectLanguage(def.relativePath || def.file);
106
+ const symbol = {
107
+ name: def.name,
108
+ type: def.type,
109
+ file: def.relativePath || def.file,
110
+ startLine: def.startLine,
111
+ endLine: def.endLine,
112
+ handle: formatSymbolHandle(def),
113
+ language,
114
+ ...(def.params != null && { params: def.params }),
115
+ ...(def.paramsStructured && { paramsStructured: def.paramsStructured }),
116
+ ...(def.paramTypes && { paramTypes: def.paramTypes }),
117
+ ...(def.returnType && { returnType: def.returnType }),
118
+ ...(def.modifiers && def.modifiers.length && { modifiers: def.modifiers }),
119
+ ...(def.decorators && def.decorators.length && { decorators: def.decorators }),
120
+ ...(def.docstring && { docstring: firstSentence(def.docstring) }),
121
+ ...(def.className && { className: def.className }),
122
+ ...(def.isAsync && { isAsync: true }),
123
+ ...(def.isGenerator && { isGenerator: true }),
124
+ };
125
+
126
+ // Optional git enrichment for the primary symbol's file.
127
+ // Skipped silently outside git repos; formatters check `git.available`.
128
+ let gitInfo = null;
129
+ if (options.git) {
130
+ const { getGitInfo } = require('./git-enrich');
131
+ gitInfo = getGitInfo(index.root, def.relativePath || def.file);
132
+ }
133
+
134
+ // For non-callable types (class/struct/interface/type), most fields don't apply
135
+ if (['class', 'struct', 'interface', 'type', 'enum'].includes(def.type)) {
136
+ return {
137
+ symbol,
138
+ kind: 'type',
139
+ lineCount: (def.endLine || def.startLine) - def.startLine + 1,
140
+ memberCount: countMembers(index, def),
141
+ ...(gitInfo && { git: gitInfo }),
142
+ };
143
+ }
144
+
145
+ // For callable symbols, scan the body
146
+ const filePath = path.isAbsolute(def.file) ? def.file : path.join(index.root, def.file);
147
+ let bodyText = '';
148
+ try {
149
+ const content = fs.readFileSync(filePath, 'utf-8');
150
+ const lines = content.split('\n');
151
+ const start = Math.max(0, (def.startLine || 1) - 1);
152
+ const end = Math.min(lines.length, def.endLine || def.startLine || 1);
153
+ bodyText = lines.slice(start, end).join('\n');
154
+ } catch (e) {
155
+ return {
156
+ symbol,
157
+ kind: 'function',
158
+ lineCount: 0,
159
+ sideEffects: [],
160
+ complexity: { branches: 0, maxDepth: 0, lineCount: 0 },
161
+ isAsync: !!def.isAsync,
162
+ error: 'Could not read source',
163
+ ...(gitInfo && { git: gitInfo }),
164
+ };
165
+ }
166
+
167
+ const fileEntry = index.files.get(def.file);
168
+ const fileImports = collectImportNames(fileEntry);
169
+
170
+ const sideEffects = classifySideEffects(bodyText, language, fileImports);
171
+ const complexity = computeComplexity(bodyText, language);
172
+
173
+ return {
174
+ symbol,
175
+ kind: 'function',
176
+ lineCount: complexity.lineCount,
177
+ sideEffects,
178
+ complexity,
179
+ isAsync: !!def.isAsync,
180
+ isGenerator: !!def.isGenerator,
181
+ ...(gitInfo && { git: gitInfo }),
182
+ };
183
+ } finally {
184
+ index._endOp();
185
+ }
186
+ }
187
+
188
+ // ============================================================================
189
+ // Helpers
190
+ // ============================================================================
191
+
192
+ function firstSentence(text) {
193
+ if (!text) return null;
194
+ const trimmed = text.trim();
195
+ // Cut on first sentence terminator. Cap at 200 chars to avoid runaway.
196
+ const m = trimmed.match(/^(.+?[.!?])\s/);
197
+ let s = m ? m[1] : trimmed;
198
+ if (s.length > 200) s = s.slice(0, 197) + '...';
199
+ return s;
200
+ }
201
+
202
+ function countMembers(index, def) {
203
+ if (!def || !def.file) return 0;
204
+ let count = 0;
205
+ for (const arr of index.symbols.values()) {
206
+ for (const s of arr) {
207
+ if (s.file === def.file && s.className === def.name && s.isMethod) count++;
208
+ }
209
+ }
210
+ return count;
211
+ }
212
+
213
+ function collectImportNames(fileEntry) {
214
+ if (!fileEntry) return new Set();
215
+ const names = new Set();
216
+ if (fileEntry.exportDetails) {
217
+ // exportDetails are exports — skip
218
+ }
219
+ // imports map: importName → modulePath (or { source, ... })
220
+ if (fileEntry.imports && typeof fileEntry.imports === 'object') {
221
+ for (const v of Object.values(fileEntry.imports)) {
222
+ if (typeof v === 'string') names.add(v);
223
+ else if (v && v.source) names.add(v.source);
224
+ else if (v && v.from) names.add(v.from);
225
+ }
226
+ }
227
+ if (fileEntry.importDetails && Array.isArray(fileEntry.importDetails)) {
228
+ for (const imp of fileEntry.importDetails) {
229
+ if (imp && imp.source) names.add(imp.source);
230
+ if (imp && imp.from) names.add(imp.from);
231
+ }
232
+ }
233
+ return names;
234
+ }
235
+
236
+ /**
237
+ * Classify side-effects from a function body using string scans.
238
+ *
239
+ * Returns: array of categories the function appears to touch:
240
+ * 'fs' — filesystem reads/writes
241
+ * 'network' — outbound network calls
242
+ * 'process' — child processes / OS-level effects
243
+ * 'global_mutation' — assignments to module-level identifiers (heuristic)
244
+ *
245
+ * NOTE: We use textual scanning over the function body — tree-sitter is great
246
+ * for top-level structure but reparsing every function body for the full AST
247
+ * just to detect well-known names is overkill. The signal sets are tight.
248
+ */
249
+ function classifySideEffects(bodyText, language, fileImports) {
250
+ const out = new Set();
251
+ if (!bodyText) return [];
252
+
253
+ const importsBuckets = SIDE_EFFECT_IMPORTS[language] || {};
254
+ const callsBuckets = SIDE_EFFECT_CALLS_BY_LANG[language] || {};
255
+
256
+ // Resolve which categories the file's imports touch (file-level signal).
257
+ // E.g. if the file imports `fs`, ANY function in the file *could* use it.
258
+ // We confirm by looking for the import-binding name being used in the body.
259
+ // For now, surface category as a "potential" signal if the body references
260
+ // ANY imported binding from a category.
261
+ const fileImportLower = new Set([...fileImports].map(s => s.toLowerCase()));
262
+ for (const [cat, modSet] of Object.entries(importsBuckets)) {
263
+ for (const m of modSet) {
264
+ if (fileImportLower.has(m.toLowerCase())) {
265
+ // Also confirm the body references the module name as an identifier
266
+ // (very common: `fs.readFile`, `requests.get(`, etc.).
267
+ const baseName = m.split(/[./]/).pop();
268
+ if (baseName && new RegExp(`\\b${escapeRegExp(baseName)}\\b`).test(bodyText)) {
269
+ out.add(cat);
270
+ break;
271
+ }
272
+ }
273
+ }
274
+ }
275
+
276
+ // Direct-call signals (no import context needed): `fetch(`, `open(`, etc.
277
+ for (const [cat, callSet] of Object.entries(callsBuckets)) {
278
+ for (const fn of callSet) {
279
+ const re = new RegExp(`\\b${escapeRegExp(fn)}\\s*\\(`);
280
+ if (re.test(bodyText)) {
281
+ out.add(cat);
282
+ break;
283
+ }
284
+ }
285
+ }
286
+
287
+ // Process category for JS console.* (informational, not flagged)
288
+ // Skip — too noisy for "side effect" semantics.
289
+
290
+ // Global-mutation heuristic (cheap):
291
+ // - JS/TS: `module.exports.X = ` / `exports.X = ` / global identifier reassignment at top-level of function body
292
+ // - Python: `global X`
293
+ // - Go: package-level ident on lhs (hard without full AST — skip)
294
+ if (language === 'javascript' || language === 'typescript') {
295
+ if (/\b(module\.exports|exports)\.[A-Za-z_]\w*\s*=/.test(bodyText)) {
296
+ out.add('global_mutation');
297
+ }
298
+ } else if (language === 'python') {
299
+ if (/^\s*global\s+\w/m.test(bodyText)) {
300
+ out.add('global_mutation');
301
+ }
302
+ }
303
+
304
+ return [...out].sort();
305
+ }
306
+
307
+ function escapeRegExp(s) {
308
+ return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
309
+ }
310
+
311
+ /**
312
+ * Compute complexity metrics from a function body.
313
+ * Cheap, AST-free counts on tokenized source.
314
+ */
315
+ function computeComplexity(bodyText, language) {
316
+ const lines = bodyText.split('\n');
317
+ const lineCount = lines.length;
318
+
319
+ // Branch count: count keywords that introduce a new branching path.
320
+ // We deliberately ignore final `else` (it's just the alternate of an `if`).
321
+ const branchPatterns = [
322
+ /\bif\s*\(/g, // JS/TS/Java/Rust/Go/C-like
323
+ /\bif\s+/g, // Python (if x:)
324
+ /\belif\b/g, // Python
325
+ /\belse\s+if\b/g, // JS/Java/etc.
326
+ /\bcase\b/g, // switch case
327
+ /\bwhen\b/g, // Rust match arms (and Kotlin/Scala but we don't support those)
328
+ /\bfor\s*\(/g, // C-like for
329
+ /\bfor\s+\w/g, // Python for x in
330
+ /\bwhile\s*\(/g, // C-like while
331
+ /\bwhile\s+/g, // Python while x:
332
+ /\?[^?]/g, // ternary (rough)
333
+ /\bcatch\s*\(/g, // catch
334
+ /\bexcept\b/g, // Python except
335
+ ];
336
+ let branches = 0;
337
+ for (const re of branchPatterns) branches += (bodyText.match(re) || []).length;
338
+
339
+ // maxDepth: indent-based proxy. Fast, language-agnostic, off-by-one safe.
340
+ let maxDepth = 0;
341
+ let firstNonBlankIndent = -1;
342
+ for (const line of lines) {
343
+ if (!line.trim()) continue;
344
+ const m = line.match(/^(\s*)/);
345
+ const spaces = m ? expandIndent(m[1]) : 0;
346
+ if (firstNonBlankIndent === -1) firstNonBlankIndent = spaces;
347
+ // depth = (current - first) / unit; we don't know "unit", so just track
348
+ // raw delta and divide by 2 (conservative — most code is 2 or 4 space indented).
349
+ const rawDepth = Math.max(0, spaces - firstNonBlankIndent);
350
+ if (rawDepth > maxDepth) maxDepth = rawDepth;
351
+ }
352
+ // Translate raw spaces to depth levels (assume 2-space indent baseline)
353
+ const depth = Math.round(maxDepth / 2);
354
+
355
+ return { branches, maxDepth: depth, lineCount };
356
+ }
357
+
358
+ function expandIndent(s) {
359
+ let n = 0;
360
+ for (const c of s) n += (c === '\t') ? 4 : 1;
361
+ return n;
362
+ }
363
+
364
+ /**
365
+ * Lazy classifier: side-effect tags for an arbitrary symbol record.
366
+ * Used by callee output (`context`, `about`) to surface [fs]/[net]/[proc] tags
367
+ * inline. Cached on the index in `_sideEffectCache` (key: file:startLine).
368
+ *
369
+ * Cheap on cache hit; first hit reads + scans the symbol's body. Returns
370
+ * `null` for non-callable types or unreadable files.
371
+ */
372
+ function sideEffectsFor(index, symbol) {
373
+ if (!index || !symbol) return null;
374
+ if (NON_CALLABLE_KIND.has(symbol.type)) return null;
375
+ const key = `${symbol.file || symbol.relativePath}:${symbol.startLine || 0}`;
376
+ if (!index._sideEffectCache) index._sideEffectCache = new Map();
377
+ if (index._sideEffectCache.has(key)) return index._sideEffectCache.get(key);
378
+
379
+ const filePath = path.isAbsolute(symbol.file || '') ? symbol.file : path.join(index.root, symbol.file || symbol.relativePath || '');
380
+ let bodyText = '';
381
+ try {
382
+ const content = fs.readFileSync(filePath, 'utf-8');
383
+ const lines = content.split('\n');
384
+ const start = Math.max(0, (symbol.startLine || 1) - 1);
385
+ const end = Math.min(lines.length, symbol.endLine || symbol.startLine || 1);
386
+ bodyText = lines.slice(start, end).join('\n');
387
+ } catch (e) {
388
+ index._sideEffectCache.set(key, null);
389
+ return null;
390
+ }
391
+ const language = detectLanguage(symbol.relativePath || symbol.file);
392
+ const fileEntry = index.files.get(symbol.file);
393
+ const fileImports = collectImportNames(fileEntry);
394
+ const tags = classifySideEffects(bodyText, language, fileImports);
395
+ index._sideEffectCache.set(key, tags);
396
+ return tags;
397
+ }
398
+
399
+ const NON_CALLABLE_KIND = new Set(['class', 'struct', 'interface', 'type', 'enum', 'trait', 'impl', 'state', 'field']);
400
+
401
+ module.exports = {
402
+ brief,
403
+ sideEffectsFor,
404
+ // exposed for tests
405
+ classifySideEffects,
406
+ computeComplexity,
407
+ firstSentence,
408
+ };
package/core/cache.js CHANGED
@@ -82,11 +82,39 @@ function saveCache(index, cachePath) {
82
82
  // calleeIndex is NOT persisted in index.json — it's rebuilt lazily from callsCache
83
83
  // on first findCallers/buildCalleeIndex call. Removing it saves ~22MB (14%) on large projects.
84
84
 
85
+ // PERF-1: persist _reachableSymbols if computed. Set keys are
86
+ // "absolutePath:line"; we strip the root prefix on save and re-attach on
87
+ // load so paths stay portable. Sorted for stable output ordering.
88
+ //
89
+ // Also save a fingerprint so we can detect index drift on load: if the
90
+ // saved fingerprint matches the loaded index state, the cached set is
91
+ // still valid. If the index was rebuilt after load (stale cache → build),
92
+ // the fingerprint won't match and computeReachability will recompute.
93
+ let reachableSymbolsRel = undefined;
94
+ let reachableFingerprint = undefined;
95
+ if (index._reachableSymbols && index._reachableSymbols.size > 0) {
96
+ const rels = [];
97
+ for (const k of index._reachableSymbols) {
98
+ const colon = k.lastIndexOf(':');
99
+ if (colon < 0) continue;
100
+ const absFile = k.slice(0, colon);
101
+ const lineStr = k.slice(colon + 1);
102
+ const relFile = path.relative(root, absFile);
103
+ rels.push(`${relFile}:${lineStr}`);
104
+ }
105
+ rels.sort(); // stable ordering — output contract
106
+ reachableSymbolsRel = rels;
107
+ reachableFingerprint = _computeReachabilityFingerprint(index);
108
+ }
109
+
85
110
  const cacheData = {
86
- version: 8, // v8: remove calleeIndex from index.json (rebuilt from callsCache)
111
+ // v10: persist _reachableSymbols set (computed by entrypoints.computeReachability)
112
+ version: 10,
87
113
  ucnVersion: UCN_VERSION, // Invalidate cache when UCN is updated
88
114
  configHash,
89
115
  root,
116
+ // PERF-2: refresh buildTime on each save so partial rebuilds report
117
+ // accurate stats. Falls back to original on first save.
90
118
  buildTime: index.buildTime,
91
119
  timestamp: Date.now(),
92
120
  files: strippedFiles,
@@ -99,9 +127,24 @@ function saveCache(index, cachePath) {
99
127
  failedFiles: index.failedFiles
100
128
  ? Array.from(index.failedFiles).map(f => path.relative(root, f))
101
129
  : [],
130
+ ...(reachableSymbolsRel !== undefined && {
131
+ reachableSymbols: reachableSymbolsRel,
132
+ reachableFingerprint,
133
+ }),
102
134
  };
103
135
 
104
- fs.writeFileSync(cacheFile, JSON.stringify(cacheData));
136
+ // PERF-3: atomic write — tmp file + rename so concurrent readers/writers
137
+ // never see a torn JSON. The calls/ shard write below already does this.
138
+ const tmpFile = cacheFile + '.tmp';
139
+ fs.writeFileSync(tmpFile, JSON.stringify(cacheData));
140
+ fs.renameSync(tmpFile, cacheFile);
141
+
142
+ // MED-1 (Round 5): clear the reachabilityDirty flag now that the set is
143
+ // safely persisted. The cli/index.js cache-save guard checks this flag
144
+ // along with needsCacheSave/callsCacheDirty.
145
+ if (index.reachabilityDirty) {
146
+ index.reachabilityDirty = false;
147
+ }
105
148
 
106
149
  // Save callsCache sharded by directory for lazy loading.
107
150
  // Write to a temp directory first, then atomic swap to avoid data loss on crash.
@@ -170,8 +213,9 @@ function loadCache(index, cachePath) {
170
213
 
171
214
  // Check version compatibility
172
215
  // v7: symbols/bindings stripped from file entries (dedup)
173
- // v8: calleeIndex removed from index.json (rebuilt from callsCache)
174
- if (cacheData.version !== 7 && cacheData.version !== 8) {
216
+ // v9: addSymbol propagates isAsync/isGenerator/paramTypes (force rebuild for old)
217
+ // v10: persists _reachableSymbols set
218
+ if (cacheData.version !== 10) {
175
219
  return false;
176
220
  }
177
221
 
@@ -285,6 +329,29 @@ function loadCache(index, cachePath) {
285
329
  }
286
330
  }
287
331
 
332
+ // PERF-1: restore _reachableSymbols if persisted (v10+).
333
+ // Saved as relative-path keys; rehydrate to absolute keys here so the
334
+ // in-memory set matches what computeReachability would produce fresh.
335
+ // The fingerprint is checked by computeReachability before reuse — if
336
+ // the index drifts (e.g. a rebuild after stale cache), the cached set
337
+ // is dropped and recomputed.
338
+ if (Array.isArray(cacheData.reachableSymbols)) {
339
+ const reachable = new Set();
340
+ for (const k of cacheData.reachableSymbols) {
341
+ if (typeof k !== 'string') continue;
342
+ const colon = k.lastIndexOf(':');
343
+ if (colon < 0) continue;
344
+ const relFile = k.slice(0, colon);
345
+ const lineStr = k.slice(colon + 1);
346
+ const absFile = path.isAbsolute(relFile) ? relFile : toAbs(relFile);
347
+ reachable.add(`${absFile}:${lineStr}`);
348
+ }
349
+ index._reachableSymbols = reachable;
350
+ if (cacheData.reachableFingerprint) {
351
+ index._reachableFingerprint = cacheData.reachableFingerprint;
352
+ }
353
+ }
354
+
288
355
  // Only rebuild graphs if config changed (e.g., aliases modified)
289
356
  const currentConfigHash = crypto.createHash('md5')
290
357
  .update(JSON.stringify(index.config || {})).digest('hex');
@@ -467,4 +534,37 @@ function _loadCallsShard(index, hash) {
467
534
  }
468
535
  }
469
536
 
470
- module.exports = { saveCache, loadCache, loadCallsCache, isCacheStale, ensureCallsCacheLoaded };
537
+ /**
538
+ * Compute a cheap fingerprint of the index used to detect drift since the
539
+ * last reachability computation. Two states with the same fingerprint are
540
+ * indistinguishable for reachability purposes (file count + symbol count are
541
+ * monotonic with structural changes; an extra `entries[0]` byte detects most
542
+ * incremental rebuilds even when counts happen to match).
543
+ *
544
+ * Used by entrypoints.computeReachability to decide whether the persisted
545
+ * `_reachableSymbols` set is still valid.
546
+ *
547
+ * @param {object} index - ProjectIndex instance
548
+ * @returns {string} compact fingerprint
549
+ */
550
+ function _computeReachabilityFingerprint(index) {
551
+ const fileCount = index.files ? index.files.size : 0;
552
+ const symbolCount = index.symbols ? index.symbols.size : 0;
553
+ // Sample a tiny prefix of the symbol map for a cheap structural check.
554
+ // Map iteration order is insertion order, which is stable across an
555
+ // unmodified load (built from cacheData.symbols in the same order).
556
+ let sample = '';
557
+ if (index.symbols && index.symbols.size > 0) {
558
+ let count = 0;
559
+ for (const [name, defs] of index.symbols) {
560
+ sample += name + ':' + (Array.isArray(defs) ? defs.length : 0) + '|';
561
+ if (++count >= 8) break;
562
+ }
563
+ }
564
+ return `${fileCount}:${symbolCount}:${sample}`;
565
+ }
566
+
567
+ module.exports = {
568
+ saveCache, loadCache, loadCallsCache, isCacheStale, ensureCallsCacheLoaded,
569
+ _computeReachabilityFingerprint,
570
+ };
package/core/callers.js CHANGED
@@ -160,6 +160,11 @@ function findCallers(index, name, options = {}) {
160
160
  const pendingByFile = new Map(); // filePath -> [{ call, fileEntry, callerSymbol, isMethod, isFunctionReference, receiver }]
161
161
  let pendingCount = 0;
162
162
  const maxResults = options.maxResults;
163
+ // BUG-H1: when consumers (like `about`) need an accurate truncation header
164
+ // ("showing N of <total>"), they pass needsTotal:true so Phase 1 runs to
165
+ // completion. Phase 2 still only enriches the first `maxResults` items —
166
+ // file reads stay bounded, but the candidate count reflects the true total.
167
+ const needsTotal = !!options.needsTotal;
163
168
  const localTypeCache = new Map(); // `${filePath}:${startLine}` -> localTypes Map or null
164
169
 
165
170
  // Use inverted callee index to skip files that don't contain calls to this name
@@ -169,8 +174,8 @@ function findCallers(index, name, options = {}) {
169
174
  : index.files;
170
175
 
171
176
  for (const [filePath, fileEntry] of fileIterator) {
172
- // Early exit when maxResults is reached
173
- if (maxResults && pendingCount >= maxResults) break;
177
+ // Early exit when maxResults is reached (skip when caller needs the true total)
178
+ if (maxResults && !needsTotal && pendingCount >= maxResults) break;
174
179
  try {
175
180
  const calls = getCachedCalls(index, filePath);
176
181
  if (!calls) continue;
@@ -643,34 +648,83 @@ function findCallers(index, name, options = {}) {
643
648
  }
644
649
  }
645
650
 
651
+ // True total candidate count from Phase 1 (before any Phase 2 truncation).
652
+ // Used by callers that need accurate "showing N of <total>" headers.
653
+ const totalCount = pendingCount;
654
+ // When needsTotal is set with a maxResults cap, only enrich the first
655
+ // `maxResults` candidates in Phase 2 — file reads stay bounded.
656
+ const enrichLimit = (needsTotal && maxResults) ? maxResults : Infinity;
657
+ let enrichedCount = 0;
658
+
659
+ // BUG-H1: shadow records for un-enriched candidates so post-call filters
660
+ // (exclude / minConfidence) can produce an accurate total without forcing
661
+ // a Phase-2 file read for every candidate. Each shadow has just enough
662
+ // info to drive the filter predicates: relativePath + confidence.
663
+ const shadowEntries = [];
664
+
646
665
  // Phase 2: Read content only for files with matching calls (eliminates ~98% of file reads)
647
- for (const [filePath, pending] of pendingByFile) {
648
- try {
649
- const content = fs.readFileSync(filePath, 'utf-8');
650
- for (const { call, fileEntry, callerSymbol, isMethod, isFunctionReference, receiver, receiverType, _evidence } of pending) {
651
- const scored = scoreEdge(_evidence || {});
652
- callers.push({
666
+ outer: for (const [filePath, pending] of pendingByFile) {
667
+ let content = null;
668
+ for (const { call, fileEntry, callerSymbol, isMethod, isFunctionReference, receiver, receiverType, _evidence } of pending) {
669
+ const scored = scoreEdge(_evidence || {});
670
+ if (enrichedCount >= enrichLimit) {
671
+ // Push shadow only — no file read needed.
672
+ shadowEntries.push({
653
673
  file: filePath,
654
674
  relativePath: fileEntry.relativePath,
655
675
  line: call.line,
656
- content: getLine(content, call.line),
657
- callerName: callerSymbol ? callerSymbol.name : null,
658
- callerFile: callerSymbol ? filePath : null,
659
- callerStartLine: callerSymbol ? callerSymbol.startLine : null,
660
- callerEndLine: callerSymbol ? callerSymbol.endLine : null,
661
- isMethod,
676
+ confidence: scored.confidence,
677
+ resolution: scored.resolution,
678
+ isMethod: call.isMethod || false,
662
679
  ...(isFunctionReference && { isFunctionReference: true }),
663
680
  ...(receiver !== undefined && { receiver }),
664
681
  ...(receiverType && { receiverType }),
665
- confidence: scored.confidence,
666
- resolution: scored.resolution,
667
682
  });
683
+ continue;
668
684
  }
669
- } catch (e) {
670
- // File may have been deleted between Phase 1 and Phase 2
685
+ // First time we hit this file's enrichment loop — read the file once.
686
+ if (content === null) {
687
+ try { content = fs.readFileSync(filePath, 'utf-8'); }
688
+ catch (e) { content = ''; /* deleted/unreadable; skip enrichment for rest */ break; }
689
+ }
690
+ callers.push({
691
+ file: filePath,
692
+ relativePath: fileEntry.relativePath,
693
+ line: call.line,
694
+ content: getLine(content, call.line),
695
+ callerName: callerSymbol ? callerSymbol.name : null,
696
+ callerFile: callerSymbol ? filePath : null,
697
+ callerStartLine: callerSymbol ? callerSymbol.startLine : null,
698
+ callerEndLine: callerSymbol ? callerSymbol.endLine : null,
699
+ isMethod,
700
+ ...(isFunctionReference && { isFunctionReference: true }),
701
+ ...(receiver !== undefined && { receiver }),
702
+ ...(receiverType && { receiverType }),
703
+ confidence: scored.confidence,
704
+ resolution: scored.resolution,
705
+ });
706
+ enrichedCount++;
671
707
  }
672
708
  }
673
709
 
710
+ // Tag the returned array with the true total candidate count (only meaningful
711
+ // when needsTotal:true was passed). Defined as non-enumerable so JSON.stringify
712
+ // won't surprise consumers; defaults to callers.length when not set.
713
+ Object.defineProperty(callers, 'totalCount', {
714
+ value: needsTotal ? totalCount : callers.length,
715
+ enumerable: false,
716
+ writable: true,
717
+ configurable: true,
718
+ });
719
+ // Attach shadow entries so consumers can compute post-filter totals without
720
+ // re-running findCallers. Empty when needsTotal:false or all candidates fit.
721
+ Object.defineProperty(callers, 'shadowEntries', {
722
+ value: shadowEntries,
723
+ enumerable: false,
724
+ writable: true,
725
+ configurable: true,
726
+ });
727
+
674
728
  return callers;
675
729
  } finally { index._endOp(); }
676
730
  }