ucn 3.8.25 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,354 @@
1
+ /**
2
+ * core/account.js - Conservation accounting for caller queries
3
+ *
4
+ * Implements the "never silently hide an occurrence" contract: the answer to
5
+ * "who calls X" must be a PARTITION of the text-occurrence ground set, never a
6
+ * subset. Every line that word-boundary-matches the symbol name is assigned to
7
+ * exactly one bucket:
8
+ *
9
+ * confirmed - claimed by an engine caller edge with confirmed-tier evidence
10
+ * unverified - claimed with unverified-tier evidence, or an AST call line
11
+ * no engine candidate claimed (reason: call-not-resolved)
12
+ * excluded - engine positively determined the call targets a DIFFERENT
13
+ * symbol (receiver-type mismatch, other definition, ...)
14
+ * nonCall - import / definition / reference per AST usage type, plus
15
+ * unclassifiedText (no AST usage at the line: comments,
16
+ * strings, and scanner-skipped tokens such as JS builtins —
17
+ * deliberately named "unclassified", not "comment")
18
+ * unparsed - line in a file that failed to parse (still readable text)
19
+ * unaccounted - residual; 0 when the arithmetic is conserved
20
+ *
21
+ * Conservation invariant:
22
+ * groundTotal === confirmed + unverified + nonCall.total + excluded.total
23
+ * + unparsed.lines + unaccounted
24
+ *
25
+ * Engine finds that grep would MISS (alias-resolved call sites whose line does
26
+ * not word-boundary-match the name) are reported in `beyondText` — additive
27
+ * information OUTSIDE the invariant, like `unreadableFiles`.
28
+ *
29
+ * Ground-set semantics are grep `-n -w`: unit is the (file, line) pair, each
30
+ * line with >= 1 word-boundary match counts once, case-sensitive.
31
+ *
32
+ * Performance: the ground scan is one `includes()`-gated read per project file
33
+ * per caller-command — the same I/O profile as the existing `search`/`usages`
34
+ * commands. Deriving counts from callsCache (zero reads) was rejected because
35
+ * comments/strings/references are not in the calls cache and the contract's
36
+ * ground set is text-defined. AST parsing (the expensive part) is restricted
37
+ * to files containing UNCLAIMED ground lines, via the op-cached
38
+ * `index._getCachedUsages`. No file is read twice in one command because
39
+ * context/about/impact run inside `index._beginOp()`.
40
+ */
41
+
42
+ 'use strict';
43
+
44
+ const fs = require('fs');
45
+ const path = require('path');
46
+ const { escapeRegExp } = require('./shared');
47
+
48
+ /**
49
+ * Compute the text-occurrence ground set for a symbol name.
50
+ *
51
+ * @param {object} index - ProjectIndex
52
+ * @param {string} name - Symbol name (matched with \b word boundaries)
53
+ * @returns {{
54
+ * total: number, // matching lines incl. unparsed files
55
+ * fileCount: number, // files (indexed + unparsed) with >= 1 matching line
56
+ * perFile: Map<string, number[]>, // absPath -> sorted 1-indexed line numbers (indexed files only)
57
+ * unparsed: { fileCount: number, lines: number, files: string[] }, // relative paths
58
+ * unreadableFiles: string[] // relative paths; OUTSIDE the arithmetic
59
+ * }}
60
+ */
61
+ function computeGroundSet(index, name) {
62
+ const wordRe = new RegExp('\\b' + escapeRegExp(name) + '\\b');
63
+ const perFile = new Map();
64
+ let total = 0;
65
+ let fileCount = 0;
66
+
67
+ for (const [filePath] of index.files) {
68
+ let content;
69
+ try {
70
+ content = index._readFile(filePath);
71
+ } catch (e) {
72
+ continue; // deleted since indexing; not part of the universe anymore
73
+ }
74
+ if (!content.includes(name)) continue;
75
+ const lines = content.split('\n');
76
+ const matched = [];
77
+ for (let i = 0; i < lines.length; i++) {
78
+ if (wordRe.test(lines[i])) matched.push(i + 1);
79
+ }
80
+ if (matched.length > 0) {
81
+ perFile.set(filePath, matched);
82
+ total += matched.length;
83
+ fileCount++;
84
+ }
85
+ }
86
+
87
+ // Failed-to-parse files are still text: their matching lines are part of
88
+ // the ground set, classified as `unparsed` (loud degradation, not silence).
89
+ const unparsed = { fileCount: 0, lines: 0, files: [] };
90
+ const unreadableFiles = [];
91
+ if (index.failedFiles && index.failedFiles.size > 0) {
92
+ for (const failedPath of index.failedFiles) {
93
+ if (index.files.has(failedPath)) continue; // indexed despite earlier failure
94
+ let content;
95
+ try {
96
+ content = fs.readFileSync(failedPath, 'utf-8');
97
+ } catch (e) {
98
+ unreadableFiles.push(path.relative(index.root, failedPath));
99
+ continue;
100
+ }
101
+ if (!content.includes(name)) continue;
102
+ const lines = content.split('\n');
103
+ let matched = 0;
104
+ for (let i = 0; i < lines.length; i++) {
105
+ if (wordRe.test(lines[i])) matched++;
106
+ }
107
+ if (matched > 0) {
108
+ unparsed.fileCount++;
109
+ unparsed.lines += matched;
110
+ unparsed.files.push(path.relative(index.root, failedPath));
111
+ }
112
+ }
113
+ unparsed.files.sort();
114
+ }
115
+ unreadableFiles.sort();
116
+
117
+ return {
118
+ total: total + unparsed.lines,
119
+ fileCount: fileCount + unparsed.fileCount,
120
+ perFile,
121
+ unparsed,
122
+ unreadableFiles,
123
+ };
124
+ }
125
+
126
+ /**
127
+ * Classify ground lines not claimed by engine results.
128
+ *
129
+ * Precedence per line (first match wins):
130
+ * call -> unverified, reason `call-not-resolved`
131
+ * import -> nonCall.imports
132
+ * definition -> nonCall.definitions
133
+ * reference -> nonCall.references
134
+ * (none) -> nonCall.unclassifiedText
135
+ *
136
+ * @param {object} index - ProjectIndex
137
+ * @param {string} name - Symbol name
138
+ * @param {object} groundSet - from computeGroundSet
139
+ * @param {Set<string>} claimedKeys - `${absPath}:${line}` keys already claimed
140
+ * (confirmed + unverified + excluded engine lines)
141
+ * @returns {{
142
+ * nonCall: { imports: number, definitions: number, references: number, unclassifiedText: number, total: number },
143
+ * callNotResolved: Array<{file: string, relativePath: string, line: number}>
144
+ * }}
145
+ */
146
+ function classifyGroundLines(index, name, groundSet, claimedKeys) {
147
+ const nonCall = { imports: 0, definitions: 0, references: 0, unclassifiedText: 0, total: 0 };
148
+ const callNotResolved = [];
149
+
150
+ // Cheap-first sources that need NO parsing: call lines from the calls
151
+ // cache (populated at index build) and definition lines from the symbol
152
+ // table. Only lines neither source explains fall through to the AST
153
+ // usage scan — typically comment/string/reference lines in test files.
154
+ // Same output as parse-everything, far fewer tree-sitter parses.
155
+ const defLinesByFile = new Map();
156
+ for (const def of index.symbols.get(name) || []) {
157
+ if (!def.file) continue;
158
+ if (!defLinesByFile.has(def.file)) defLinesByFile.set(def.file, new Set());
159
+ defLinesByFile.get(def.file).add(def.startLine);
160
+ }
161
+ let getCachedCalls = null; // lazy require to avoid cycle at module load
162
+
163
+ for (const [filePath, lineNumbers] of groundSet.perFile) {
164
+ let unclaimed = null;
165
+ for (const line of lineNumbers) {
166
+ if (claimedKeys.has(`${filePath}:${line}`)) continue;
167
+ (unclaimed || (unclaimed = [])).push(line);
168
+ }
169
+ if (!unclaimed) continue;
170
+
171
+ const fileEntry = index.files.get(filePath);
172
+ const relativePath = fileEntry ? fileEntry.relativePath : path.relative(index.root, filePath);
173
+
174
+ // Cheap pass: calls cache + symbol table
175
+ if (!getCachedCalls) getCachedCalls = require('./callers').getCachedCalls;
176
+ const cachedCalls = getCachedCalls(index, filePath);
177
+ const callLines = new Set();
178
+ if (Array.isArray(cachedCalls)) {
179
+ for (const c of cachedCalls) {
180
+ if (c.name === name || c.resolvedName === name ||
181
+ (c.resolvedNames && c.resolvedNames.includes(name))) {
182
+ callLines.add(c.line);
183
+ }
184
+ }
185
+ }
186
+ const defLines = defLinesByFile.get(filePath);
187
+
188
+ let needsParse = null;
189
+ for (const line of unclaimed) {
190
+ if (callLines.has(line)) {
191
+ callNotResolved.push({ file: filePath, relativePath, line });
192
+ } else if (defLines && defLines.has(line)) {
193
+ nonCall.definitions++;
194
+ } else {
195
+ (needsParse || (needsParse = [])).push(line);
196
+ }
197
+ }
198
+ if (!needsParse) continue;
199
+
200
+ // Remainder: AST usage scan distinguishes import/definition/reference
201
+ // from comment/string/skipped-token lines.
202
+ const usages = index._getCachedUsages(filePath, name);
203
+ const byLine = new Map();
204
+ if (Array.isArray(usages)) {
205
+ for (const u of usages) {
206
+ const existing = byLine.get(u.line);
207
+ // call outranks import outranks definition outranks reference
208
+ if (!existing || RANK[u.usageType] < RANK[existing]) {
209
+ byLine.set(u.line, u.usageType);
210
+ }
211
+ }
212
+ }
213
+ for (const line of needsParse) {
214
+ const usageType = byLine.get(line);
215
+ if (usageType === 'call') {
216
+ callNotResolved.push({ file: filePath, relativePath, line });
217
+ } else if (usageType === 'import') {
218
+ nonCall.imports++;
219
+ } else if (usageType === 'definition') {
220
+ nonCall.definitions++;
221
+ } else if (usageType === 'reference') {
222
+ nonCall.references++;
223
+ } else {
224
+ // No AST usage at this line (or parse returned null): comment,
225
+ // string, or a token the usage scanner deliberately skips.
226
+ nonCall.unclassifiedText++;
227
+ }
228
+ }
229
+ }
230
+
231
+ nonCall.total = nonCall.imports + nonCall.definitions + nonCall.references + nonCall.unclassifiedText;
232
+ return { nonCall, callNotResolved };
233
+ }
234
+
235
+ const RANK = { call: 0, import: 1, definition: 2, reference: 3 };
236
+
237
+ /**
238
+ * Build the account object from the ground set plus engine claims.
239
+ *
240
+ * Claims whose lines are NOT in the ground set (alias-resolved call sites —
241
+ * lines that don't word-boundary-match the name) go to `beyondText` instead
242
+ * of the conservation arithmetic: they are finds grep would miss.
243
+ *
244
+ * @param {object} index - ProjectIndex
245
+ * @param {string} name - Symbol name
246
+ * @param {object} parts
247
+ * @param {object} parts.groundSet - from computeGroundSet
248
+ * @param {Array<{file: string, line: number}>} [parts.confirmedEntries] - engine confirmed-tier caller lines
249
+ * @param {Array<{file: string, line: number, reason?: string}>} [parts.unverifiedEntries] - engine unverified-tier lines
250
+ * @param {Array<{file: string, line: number, reason: string}>} [parts.excludedEntries] - engine excluded lines (FULL list, not samples)
251
+ * @param {object} [parts.filtered] - display-level hide counts { total, byFlag } (explanatory, outside invariant)
252
+ * @returns {object} account (see file header for shape)
253
+ */
254
+ function buildAccount(index, name, parts) {
255
+ const { groundSet } = parts;
256
+ const confirmedEntries = parts.confirmedEntries || [];
257
+ const unverifiedEntries = parts.unverifiedEntries || [];
258
+ const excludedEntries = parts.excludedEntries || [];
259
+
260
+ const groundKeys = new Set();
261
+ for (const [filePath, lineNumbers] of groundSet.perFile) {
262
+ for (const line of lineNumbers) groundKeys.add(`${filePath}:${line}`);
263
+ }
264
+
265
+ const claimedKeys = new Set();
266
+ const beyondText = { count: 0, sample: [] };
267
+ let confirmed = 0;
268
+ let unverified = 0;
269
+
270
+ const claim = (entry, bucket) => {
271
+ const key = `${entry.file}:${entry.line}`;
272
+ if (claimedKeys.has(key)) return; // one bucket per line; first claim wins
273
+ if (!groundKeys.has(key)) {
274
+ // Engine found a call site grep would miss (alias / indirect name).
275
+ beyondText.count++;
276
+ if (beyondText.sample.length < 3) {
277
+ beyondText.sample.push({
278
+ file: relPath(index, entry.file),
279
+ line: entry.line,
280
+ });
281
+ }
282
+ claimedKeys.add(key);
283
+ return;
284
+ }
285
+ claimedKeys.add(key);
286
+ bucket();
287
+ };
288
+
289
+ for (const e of confirmedEntries) claim(e, () => { confirmed++; });
290
+ for (const e of unverifiedEntries) claim(e, () => { unverified++; });
291
+
292
+ const excludedByReason = {};
293
+ let excludedTotal = 0;
294
+ for (const e of excludedEntries) {
295
+ const key = `${e.file}:${e.line}`;
296
+ if (claimedKeys.has(key)) continue;
297
+ claimedKeys.add(key);
298
+ if (!groundKeys.has(key)) continue; // excluded non-ground line: irrelevant to both grep and display
299
+ excludedTotal++;
300
+ const r = e.reason || 'excluded';
301
+ if (!excludedByReason[r]) excludedByReason[r] = { count: 0, sample: [] };
302
+ excludedByReason[r].count++;
303
+ if (excludedByReason[r].sample.length < 3) {
304
+ excludedByReason[r].sample.push({ file: relPath(index, e.file), line: e.line });
305
+ }
306
+ }
307
+
308
+ const { nonCall, callNotResolved } = classifyGroundLines(index, name, groundSet, claimedKeys);
309
+ // Ground call-lines nobody claimed are unverified by contract: the engine
310
+ // saw call syntax it didn't resolve. This converts engine misses into
311
+ // visible entries instead of silent gaps.
312
+ unverified += callNotResolved.length;
313
+
314
+ const accountedTotal = confirmed + unverified + nonCall.total + excludedTotal + groundSet.unparsed.lines;
315
+ const unaccounted = groundSet.total - accountedTotal;
316
+
317
+ const account = {
318
+ symbol: name,
319
+ groundTotal: groundSet.total,
320
+ fileCount: groundSet.fileCount,
321
+ confirmed,
322
+ unverified,
323
+ nonCall,
324
+ excluded: { total: excludedTotal, byReason: excludedByReason },
325
+ unparsed: groundSet.unparsed,
326
+ unreadableFiles: groundSet.unreadableFiles,
327
+ beyondText,
328
+ unaccounted,
329
+ conserved: unaccounted === 0,
330
+ };
331
+ if (parts.filtered && parts.filtered.total > 0) {
332
+ account.filtered = parts.filtered;
333
+ }
334
+ // Internal (non-enumerable): unclaimed call lines, for engine diagnostics
335
+ // and the Phase-1 baseline gap report. Not part of the JSON surface.
336
+ Object.defineProperty(account, 'callNotResolved', {
337
+ value: callNotResolved,
338
+ enumerable: false,
339
+ writable: true,
340
+ configurable: true,
341
+ });
342
+ return account;
343
+ }
344
+
345
+ function relPath(index, filePath) {
346
+ const fileEntry = index.files.get(filePath);
347
+ return fileEntry ? fileEntry.relativePath : path.relative(index.root, filePath);
348
+ }
349
+
350
+ module.exports = {
351
+ computeGroundSet,
352
+ classifyGroundLines,
353
+ buildAccount,
354
+ };