@snevins/repo-mapper 1.0.3 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,26 +1,61 @@
1
1
  #!/usr/bin/env node
2
- import { writeFile, stat } from "node:fs/promises";
3
- import { resolve, relative } from "node:path";
4
- import { parseCliArgs } from "./cli.js";
2
+ import { writeFile, stat, readFile } from "node:fs/promises";
3
+ import { resolve, relative, dirname, join } from "node:path";
4
+ import { fileURLToPath } from "node:url";
5
+ import { parseCliArgs, HelpRequestedError, VersionRequestedError } from "./cli.js";
5
6
  import { loadCache, getCachedTags, setCacheEntry, saveCache } from "./cache.js";
6
- import { discoverFiles, DEFAULT_IGNORED_PATTERNS } from "./files.js";
7
+ import { discoverFiles, DEFAULT_IGNORED_PATTERNS, isTestOrScriptPath } from "./files.js";
8
+ import { getExtensionsForLanguage } from "./languages.js";
7
9
  import { initParser, parseFile, shutdownParser } from "./parser.js";
8
- import { buildFileGraph } from "./graph.js";
10
+ import { buildFileGraph, buildImportGraph } from "./graph.js";
9
11
  import { computePageRank } from "./pagerank.js";
10
- import { buildPersonalization, rankDefinitions } from "./ranking.js";
12
+ import { buildFocusPersonalization, buildEntrypointPersonalization, rankDefinitions, adjustFileRanks, combineRanks } from "./ranking.js";
13
+ import { computeFileDegrees } from "./graph.js";
11
14
  import { fitToTokenBudget } from "./output.js";
15
+ import { detectDuplicates } from "./dedup.js";
12
16
  async function main() {
13
17
  const args = parseCliArgs(process.argv.slice(2));
14
18
  const { options } = args;
15
19
  // Determine root directory (first path or cwd)
16
20
  const firstPath = args.paths[0];
17
21
  const rootDir = firstPath !== undefined ? resolve(firstPath) : process.cwd();
18
- // Resolve focus files to relative paths
19
- const focusFiles = options.focus.map((f) => relative(rootDir, resolve(f)));
20
- const focusSet = new Set(focusFiles);
22
+ // Resolve focus files to relative paths (resolve relative to rootDir, not CWD)
23
+ const focusFiles = options.focus.map((f) => relative(rootDir, resolve(rootDir, f)));
24
+ // Validate focus files exist
25
+ const validFocusFiles = [];
26
+ for (const relPath of focusFiles) {
27
+ const absPath = join(rootDir, relPath);
28
+ try {
29
+ await stat(absPath);
30
+ validFocusFiles.push(relPath);
31
+ }
32
+ catch {
33
+ console.error(`Warning: Focus file "${relPath}" does not exist, ignoring`);
34
+ }
35
+ }
36
+ const focusSet = new Set(validFocusFiles);
21
37
  if (options.verbose) {
22
38
  console.error(`Root: ${rootDir}`);
23
- console.error(`Focus files: ${String(focusFiles.length)}`);
39
+ console.error(`Focus files: ${String(validFocusFiles.length)}`);
40
+ }
41
+ // Build extensions filter from --type flags
42
+ let extensions;
43
+ if (options.type.length > 0) {
44
+ extensions = new Set();
45
+ for (const langId of options.type) {
46
+ const langExts = getExtensionsForLanguage(langId);
47
+ if (!langExts) {
48
+ console.error(`Warning: Unknown language type "${langId}", ignoring`);
49
+ continue;
50
+ }
51
+ for (const ext of langExts) {
52
+ extensions.add(ext);
53
+ }
54
+ }
55
+ if (extensions.size === 0) {
56
+ console.error("Error: No valid language types specified");
57
+ process.exit(1);
58
+ }
24
59
  }
25
60
  // 1. Discover files (apply ignore patterns unless --no-ignore)
26
61
  const ignoredPatterns = options.noIgnore
@@ -28,8 +63,11 @@ async function main() {
28
63
  : [...DEFAULT_IGNORED_PATTERNS, ...options.ignore]; // defaults + custom
29
64
  const discoveryResult = await discoverFiles({
30
65
  rootDir,
66
+ extensions,
31
67
  ignoredPatterns: ignoredPatterns.length > 0 ? ignoredPatterns : undefined,
68
+ includePatterns: options.include.length > 0 ? options.include : undefined,
32
69
  maxFiles: options.maxFiles,
70
+ ignoredDirs: undefined,
33
71
  });
34
72
  const files = discoveryResult.files;
35
73
  if (discoveryResult.wasLimited) {
@@ -88,21 +126,73 @@ async function main() {
88
126
  console.error(`Cache: ${String(cacheHits)} hits, ${String(cacheMisses)} misses`);
89
127
  console.error(`Parsed ${String(defs.length)} definitions, ${String(refs.length)} references`);
90
128
  }
91
- // 3. Build graph
92
- const graph = buildFileGraph(allTags);
93
- // 4. Compute PageRank
94
- const personalization = focusFiles.length > 0
95
- ? buildPersonalization(focusFiles)
96
- : undefined;
97
- const fileRanks = computePageRank(graph, { personalization });
98
- // 5. Rank definitions (exclude focus files)
99
- const rankedDefs = rankDefinitions(graph, fileRanks, focusSet);
129
+ // 3. Build graphs (downweight edges from test/script files)
130
+ // Two-stage ranking: import graph for structure, ref graph for density
131
+ const TEST_WEIGHT = 0.2;
132
+ const edgeWeightMultiplier = (path) => isTestOrScriptPath(path) ? TEST_WEIGHT : 1.0;
133
+ // Import graph: binary edges (1 per file→file connection) for structural importance
134
+ const importGraph = buildImportGraph(allTags, { edgeWeightMultiplier });
135
+ // Ref graph: weighted edges (ref counts) for density and definition ranking
136
+ const refGraph = buildFileGraph(allTags, { edgeWeightMultiplier });
137
+ // 4. Two-phase PageRank
138
+ // Phase 1: Compute structural importance from import graph
139
+ let personalization;
140
+ if (validFocusFiles.length > 0) {
141
+ personalization = buildFocusPersonalization(validFocusFiles, refGraph);
142
+ }
143
+ else {
144
+ // Auto-detect entrypoints for default personalization
145
+ const entrypointMap = buildEntrypointPersonalization(importGraph.nodes);
146
+ personalization = entrypointMap.size > 0 ? entrypointMap : undefined;
147
+ }
148
+ const structuralRanks = computePageRank(importGraph, { personalization });
149
+ // Phase 2: Combine structural rank with reference density
150
+ const rawFileRanks = combineRanks(structuralRanks, refGraph);
151
+ // 5. Detect duplicate files (copy-pasted code)
152
+ const tagsByFile = new Map();
153
+ for (const tag of allTags) {
154
+ const existing = tagsByFile.get(tag.relPath);
155
+ if (existing) {
156
+ existing.push(tag);
157
+ }
158
+ else {
159
+ tagsByFile.set(tag.relPath, [tag]);
160
+ }
161
+ }
162
+ const duplicates = detectDuplicates(tagsByFile);
163
+ if (options.verbose && duplicates.size > 0) {
164
+ // Count unique duplicate groups
165
+ const uniqueGroups = new Set([...duplicates.values()].map((g) => g.fingerprint));
166
+ console.error(`Detected ${String(uniqueGroups.size)} duplicate file group(s)`);
167
+ }
168
+ // 6. Apply architecture-aware adjustments (utility penalty, entry boost, diversity, duplicates)
169
+ const degrees = computeFileDegrees(refGraph);
170
+ const fileRanks = adjustFileRanks(rawFileRanks, degrees, refGraph.nodes, duplicates);
171
+ // 7. Rank definitions (exclude focus files)
172
+ const rankedDefs = rankDefinitions(refGraph, fileRanks, focusSet);
100
173
  if (options.verbose) {
101
174
  console.error(`Ranked ${String(rankedDefs.length)} definitions`);
102
175
  }
103
- // 6. Fit to token budget and format output
104
- const output = fitToTokenBudget(rankedDefs, graph, fileRanks, options.tokens, focusFiles.length > 0 ? focusFiles : undefined);
105
- // 7. Write output
176
+ // Warn if focus file results are dominated by utility paths
177
+ if (validFocusFiles.length > 0 && rankedDefs.length >= 2) {
178
+ const UTILITY_PATTERNS = ["utils/", "lib/", "common/", "shared/", "helpers/"];
179
+ const topN = rankedDefs.slice(0, Math.min(3, rankedDefs.length));
180
+ const utilityCount = topN.filter((d) => UTILITY_PATTERNS.some((p) => d.file.includes(p))).length;
181
+ if (utilityCount >= 2) {
182
+ console.error(`Tip: Top results are utility files. Try subdirectory scoping instead: ` +
183
+ `repo-mapper ./path/to/module -t ${String(options.tokens)}`);
184
+ }
185
+ }
186
+ // Warn if output is suspiciously sparse with large token budget
187
+ const SPARSE_THRESHOLD = 5;
188
+ const LARGE_BUDGET_THRESHOLD = 1000;
189
+ if (options.tokens >= LARGE_BUDGET_THRESHOLD && rankedDefs.length < SPARSE_THRESHOLD) {
190
+ console.error(`Warning: Output is sparse (${String(rankedDefs.length)} definitions) with ${String(options.tokens)} token budget. ` +
191
+ `This may indicate overly broad ignore patterns or missing language support.`);
192
+ }
193
+ // 8. Fit to token budget and format output
194
+ const output = fitToTokenBudget(rankedDefs, refGraph, fileRanks, options.tokens, validFocusFiles.length > 0 ? validFocusFiles : undefined);
195
+ // 9. Write output
106
196
  if (options.output) {
107
197
  await writeFile(options.output, output);
108
198
  if (options.verbose) {
@@ -113,18 +203,50 @@ async function main() {
113
203
  console.log(output);
114
204
  }
115
205
  }
116
- main().catch((err) => {
206
+ const USAGE = `Usage: repo-mapper [paths...] [options]
207
+
208
+ Options:
209
+ -h, --help Show this help message
210
+ -V, --version Show version number
211
+ -t, --tokens <n> Max tokens (default: 2000)
212
+ -f, --focus <file> Focus file (repeatable)
213
+ -o, --output <file> Output file
214
+ -r, --refresh Ignore cache
215
+ -v, --verbose Verbose output
216
+ --ignore <pattern> Add ignore pattern (repeatable)
217
+ --include <pattern> Only include files matching pattern (repeatable)
218
+ --no-ignore Disable default ignore patterns
219
+ --max-files <n> Max files to process (default: 10000, 0=unlimited)
220
+ --type <lang> Filter by language: ts, js, python, go, rust, solidity (repeatable)
221
+
222
+ Examples:
223
+ repo-mapper . # Map current directory
224
+ repo-mapper ./src -t 3000 # Map src/ with larger budget
225
+ repo-mapper . -f src/api.ts # Bias ranking toward api.ts
226
+ repo-mapper . --type ts # Only TypeScript files
227
+ repo-mapper . -v # Show progress info`;
228
+ async function getVersion() {
229
+ const __dirname = dirname(fileURLToPath(import.meta.url));
230
+ const packagePath = join(__dirname, "..", "package.json");
231
+ const content = await readFile(packagePath, "utf-8");
232
+ const pkg = JSON.parse(content);
233
+ return pkg.version;
234
+ }
235
+ main().catch(async (err) => {
236
+ // --help or -h: print usage and exit success
237
+ if (err instanceof HelpRequestedError) {
238
+ console.log(USAGE);
239
+ process.exit(0);
240
+ }
241
+ // --version or -V: print version and exit success
242
+ if (err instanceof VersionRequestedError) {
243
+ const version = await getVersion();
244
+ console.log(version);
245
+ process.exit(0);
246
+ }
247
+ // Other errors: print error + usage and exit failure
117
248
  const message = err instanceof Error ? err.message : String(err);
118
- console.error(`Error: ${message}`);
119
- console.error("\nUsage: repo-mapper [paths...] [options]");
120
- console.error("Options:");
121
- console.error(" -t, --tokens <n> Max tokens (default: 1024)");
122
- console.error(" -f, --focus <file> Focus file (repeatable)");
123
- console.error(" -o, --output <file> Output file");
124
- console.error(" -r, --refresh Ignore cache");
125
- console.error(" -v, --verbose Verbose output");
126
- console.error(" --ignore <pattern> Add ignore pattern (repeatable)");
127
- console.error(" --no-ignore Disable default ignore patterns");
128
- console.error(" --max-files <n> Max files to process (default: 10000, 0=unlimited)");
249
+ console.error(`Error: ${message}\n`);
250
+ console.error(USAGE);
129
251
  process.exit(1);
130
252
  });
@@ -38,3 +38,8 @@ export declare function isTreeSitterSupported(ext: string): boolean;
38
38
  * If no config provided, uses TS/JS builtins as default.
39
39
  */
40
40
  export declare function isBuiltin(name: string, config?: LanguageConfig): boolean;
41
+ /**
42
+ * Get file extensions for a language ID or alias.
43
+ * Returns undefined for unknown languages.
44
+ */
45
+ export declare function getExtensionsForLanguage(langId: string): ReadonlySet<string> | undefined;
package/dist/languages.js CHANGED
@@ -607,3 +607,24 @@ export function isBuiltin(name, config) {
607
607
  const builtins = config?.builtins ?? TS_JS_BUILTINS;
608
608
  return builtins.has(name);
609
609
  }
610
+ /**
611
+ * Short aliases for common language IDs.
612
+ */
613
+ const LANG_ALIASES = {
614
+ ts: "typescript",
615
+ js: "javascript",
616
+ py: "python",
617
+ rs: "rust",
618
+ sol: "solidity",
619
+ };
620
+ /**
621
+ * Get file extensions for a language ID or alias.
622
+ * Returns undefined for unknown languages.
623
+ */
624
+ export function getExtensionsForLanguage(langId) {
625
+ const normalized = LANG_ALIASES[langId] ?? langId;
626
+ const config = LANGUAGE_REGISTRY[normalized];
627
+ if (!config)
628
+ return undefined;
629
+ return new Set(config.extensions);
630
+ }
package/dist/output.d.ts CHANGED
@@ -1,11 +1,17 @@
1
1
  import type { RankedDefinition, FileGraph } from "./types.js";
2
2
  /**
3
- * Format output per spec §3.5.
4
- * Files sorted by PageRank descending.
3
+ * Get module name (first path segment) from a file path.
4
+ * Returns "(root)" for files in the root directory.
5
+ */
6
+ export declare function getModuleName(path: string): string;
7
+ /**
8
+ * Format output per spec §3.5, grouped by module.
9
+ * Modules sorted by highest-ranked file descending.
10
+ * Files within module sorted by PageRank descending.
5
11
  * Definitions within file sorted by line ascending.
6
12
  * Line numbers right-aligned to max width.
7
13
  */
8
- export declare function formatOutput(defs: readonly RankedDefinition[], graph: FileGraph, fileRanks: ReadonlyMap<string, number>, focusFiles?: readonly string[]): string;
14
+ export declare function formatOutput(defs: readonly RankedDefinition[], graph: FileGraph, fileRanks: ReadonlyMap<string, number>, focusFiles?: readonly string[], maxFilesPerModule?: number): string;
9
15
  /**
10
16
  * Fit ranked definitions to token budget using binary search.
11
17
  * Returns the largest output that fits within the budget.
@@ -16,4 +22,4 @@ export declare function formatOutput(defs: readonly RankedDefinition[], graph: F
16
22
  * 3. If tokens ≤ budget, try more; else try fewer
17
23
  * 4. Return best fit
18
24
  */
19
- export declare function fitToTokenBudget(defs: readonly RankedDefinition[], graph: FileGraph, fileRanks: ReadonlyMap<string, number>, tokenBudget: number, focusFiles?: readonly string[]): string;
25
+ export declare function fitToTokenBudget(defs: readonly RankedDefinition[], graph: FileGraph, fileRanks: ReadonlyMap<string, number>, tokenBudget: number, focusFiles?: readonly string[], maxFilesPerModule?: number): string;
package/dist/output.js CHANGED
@@ -1,11 +1,63 @@
1
1
  import { estimateTokens } from "./tokens.js";
2
+ /**
3
+ * Max definitions per unique name in output to prevent repetition.
4
+ */
5
+ const MAX_DEFS_PER_NAME = 2;
6
+ /**
7
+ * Default max files per module to spread budget across modules.
8
+ */
9
+ const MAX_FILES_PER_MODULE = 5;
10
+ /**
11
+ * Top K files globally guaranteed inclusion regardless of per-module limits.
12
+ */
13
+ const GLOBAL_TOP_K = 50;
14
+ /**
15
+ * Get module name (first path segment) from a file path.
16
+ * Returns "(root)" for files in the root directory.
17
+ */
18
+ export function getModuleName(path) {
19
+ const slash = path.indexOf("/");
20
+ return slash === -1 ? "(root)" : path.slice(0, slash);
21
+ }
22
+ /**
23
+ * Compute percentile rank (0-100) for a file based on position in sorted ranks.
24
+ * Higher rank value = higher percentile.
25
+ */
26
+ function computePercentile(rank, sortedRanks) {
27
+ if (sortedRanks.length === 0)
28
+ return 100;
29
+ if (sortedRanks.length === 1)
30
+ return 100;
31
+ // Count how many ranks are below this one
32
+ let countBelow = 0;
33
+ for (const r of sortedRanks) {
34
+ if (r < rank)
35
+ countBelow++;
36
+ }
37
+ // Percentile = (countBelow / (n-1)) * 100, rounded
38
+ return Math.round((countBelow / (sortedRanks.length - 1)) * 100);
39
+ }
40
+ /**
41
+ * Compute percentile as decimal (0.0 to 1.0)
42
+ */
43
+ function formatRankDisplay(rank, sortedRanks) {
44
+ const percentile = computePercentile(rank, sortedRanks);
45
+ return (percentile / 100).toFixed(1);
46
+ }
2
47
  /**
3
48
  * Resolve RankedDefinitions to their Tags by looking up in graph.defsByName.
4
49
  * Returns only definitions that can be found.
50
+ * Applies diversity cap to limit repetition of same-named definitions.
5
51
  */
6
52
  function resolveDefs(defs, graph) {
7
53
  const result = [];
54
+ const nameCounts = new Map();
8
55
  for (const def of defs) {
56
+ // Check diversity cap before processing
57
+ const currentCount = nameCounts.get(def.ident) ?? 0;
58
+ if (currentCount >= MAX_DEFS_PER_NAME) {
59
+ continue;
60
+ }
9
61
  const tags = graph.defsByName.get(def.ident);
10
62
  if (!tags)
11
63
  continue;
@@ -13,6 +65,7 @@ function resolveDefs(defs, graph) {
13
65
  if (!tag)
14
66
  continue;
15
67
  result.push({ file: def.file, tag, rank: def.rank });
68
+ nameCounts.set(def.ident, currentCount + 1);
16
69
  }
17
70
  return result;
18
71
  }
@@ -29,52 +82,131 @@ function groupByFile(defs) {
29
82
  return groups;
30
83
  }
31
84
  /**
32
- * Format output per spec §3.5.
33
- * Files sorted by PageRank descending.
85
+ * Group files by module (first path segment).
86
+ */
87
+ function groupByModule(files) {
88
+ const modules = new Map();
89
+ for (const file of files) {
90
+ const mod = getModuleName(file);
91
+ const existing = modules.get(mod) ?? [];
92
+ existing.push(file);
93
+ modules.set(mod, existing);
94
+ }
95
+ return modules;
96
+ }
97
+ /**
98
+ * Limit files per module, keeping top N by rank.
99
+ * Global top-K files are always included regardless of per-module limits.
100
+ * Returns set of allowed files and count of omitted per module.
101
+ */
102
+ function limitFilesPerModule(moduleGroups, fileRanks, maxPerModule, globalTopK = GLOBAL_TOP_K) {
103
+ if (maxPerModule <= 0 && globalTopK <= 0) {
104
+ // No limits - return all files
105
+ const allFiles = new Set();
106
+ for (const files of moduleGroups.values()) {
107
+ for (const f of files)
108
+ allFiles.add(f);
109
+ }
110
+ return { allowedFiles: allFiles, omittedByModule: new Map() };
111
+ }
112
+ // Get all files sorted by global rank
113
+ const allFilesList = [...moduleGroups.values()].flat();
114
+ const sortedGlobally = [...allFilesList].sort((a, b) => (fileRanks.get(b) ?? 0) - (fileRanks.get(a) ?? 0));
115
+ // Guarantee top-K files globally
116
+ const guaranteedFiles = new Set(sortedGlobally.slice(0, globalTopK));
117
+ const allowedFiles = new Set();
118
+ const omittedByModule = new Map();
119
+ for (const [mod, files] of moduleGroups) {
120
+ let keptNonGuaranteed = 0;
121
+ let omitted = 0;
122
+ for (const f of files) {
123
+ const isGuaranteed = guaranteedFiles.has(f);
124
+ const underLimit = maxPerModule <= 0 || keptNonGuaranteed < maxPerModule;
125
+ if (isGuaranteed || underLimit) {
126
+ allowedFiles.add(f);
127
+ if (!isGuaranteed)
128
+ keptNonGuaranteed++;
129
+ }
130
+ else {
131
+ omitted++;
132
+ }
133
+ }
134
+ if (omitted > 0)
135
+ omittedByModule.set(mod, omitted);
136
+ }
137
+ return { allowedFiles, omittedByModule };
138
+ }
139
+ /**
140
+ * Format output per spec §3.5, grouped by module.
141
+ * Modules sorted by highest-ranked file descending.
142
+ * Files within module sorted by PageRank descending.
34
143
  * Definitions within file sorted by line ascending.
35
144
  * Line numbers right-aligned to max width.
36
145
  */
37
- export function formatOutput(defs, graph, fileRanks, focusFiles) {
146
+ export function formatOutput(defs, graph, fileRanks, focusFiles, maxFilesPerModule = MAX_FILES_PER_MODULE) {
38
147
  const resolved = resolveDefs(defs, graph);
39
148
  if (resolved.length === 0 && (!focusFiles || focusFiles.length === 0)) {
40
149
  return "";
41
150
  }
42
- const groups = groupByFile(resolved);
151
+ const fileGroups = groupByFile(resolved);
43
152
  // Sort files by PageRank descending
44
- const sortedFiles = [...groups.keys()].sort((a, b) => {
153
+ const sortedFiles = [...fileGroups.keys()].sort((a, b) => {
45
154
  const rankA = fileRanks.get(a) ?? 0;
46
155
  const rankB = fileRanks.get(b) ?? 0;
47
156
  if (rankB !== rankA)
48
157
  return rankB - rankA;
49
158
  return a.localeCompare(b);
50
159
  });
51
- // Find max line number for padding
52
- let maxLine = 1;
53
- for (const def of resolved) {
54
- if (def.tag.line > maxLine)
55
- maxLine = def.tag.line;
56
- }
57
- const lineWidth = String(maxLine).length;
160
+ // Group files by module
161
+ const moduleGroups = groupByModule(sortedFiles);
162
+ // Apply module file limits
163
+ const { allowedFiles, omittedByModule } = limitFilesPerModule(moduleGroups, fileRanks, maxFilesPerModule);
164
+ // Sort modules by highest-ranked file in each module
165
+ const sortedModules = [...moduleGroups.keys()].sort((a, b) => {
166
+ const filesA = moduleGroups.get(a) ?? [];
167
+ const filesB = moduleGroups.get(b) ?? [];
168
+ const maxRankA = Math.max(...filesA.map((f) => fileRanks.get(f) ?? 0));
169
+ const maxRankB = Math.max(...filesB.map((f) => fileRanks.get(f) ?? 0));
170
+ if (maxRankB !== maxRankA)
171
+ return maxRankB - maxRankA;
172
+ return a.localeCompare(b);
173
+ });
58
174
  const lines = [];
59
175
  // Focus files header
60
176
  if (focusFiles && focusFiles.length > 0) {
61
177
  lines.push(`[Focus: [${focusFiles.join(", ")}]]`);
62
178
  lines.push("");
63
179
  }
64
- // Format each file
65
- for (const file of sortedFiles) {
66
- const fileDefs = groups.get(file);
67
- if (!fileDefs || fileDefs.length === 0)
180
+ // Get all ranks for percentile calculation (only from allowed files)
181
+ const allRanks = [...allowedFiles].map((f) => fileRanks.get(f) ?? 0);
182
+ // Format by module
183
+ for (const mod of sortedModules) {
184
+ const allModFiles = moduleGroups.get(mod) ?? [];
185
+ // Filter to allowed files only
186
+ const modFiles = allModFiles.filter((f) => allowedFiles.has(f));
187
+ if (modFiles.length === 0)
68
188
  continue;
69
- const fileRank = fileRanks.get(file) ?? 0;
70
- lines.push(`${file}:`);
71
- lines.push(`(Rank: ${fileRank.toFixed(4)})`);
72
- lines.push("");
73
- // Sort definitions by line ascending
74
- const sorted = [...fileDefs].sort((a, b) => a.tag.line - b.tag.line);
75
- for (const def of sorted) {
76
- const lineNum = String(def.tag.line).padStart(lineWidth, " ");
77
- lines.push(`${lineNum}: ${def.tag.signature ?? def.tag.name}`);
189
+ // Module header - shows kept count, not total
190
+ const suffix = mod === "(root)" ? "" : "/";
191
+ lines.push(`## ${mod}${suffix} (${String(modFiles.length)} file${modFiles.length > 1 ? "s" : ""})`);
192
+ // Files in this module (already sorted by rank)
193
+ for (const file of modFiles) {
194
+ const fileDefs = fileGroups.get(file);
195
+ if (!fileDefs || fileDefs.length === 0)
196
+ continue;
197
+ const fileRank = fileRanks.get(file) ?? 0;
198
+ lines.push(`${file}: ${formatRankDisplay(fileRank, allRanks)}`);
199
+ // Sort definitions by line ascending
200
+ const sorted = [...fileDefs].sort((a, b) => a.tag.line - b.tag.line);
201
+ for (const def of sorted) {
202
+ lines.push(` ${def.tag.signature ?? def.tag.name}`);
203
+ }
204
+ }
205
+ // Omission annotation if files were limited
206
+ const omitted = omittedByModule.get(mod);
207
+ if (omitted !== undefined && omitted > 0) {
208
+ const fileWord = omitted === 1 ? "file" : "files";
209
+ lines.push(` ... (${String(omitted)} more ${fileWord} in ${mod}${suffix})`);
78
210
  }
79
211
  lines.push("");
80
212
  }
@@ -90,7 +222,7 @@ export function formatOutput(defs, graph, fileRanks, focusFiles) {
90
222
  * 3. If tokens ≤ budget, try more; else try fewer
91
223
  * 4. Return best fit
92
224
  */
93
- export function fitToTokenBudget(defs, graph, fileRanks, tokenBudget, focusFiles) {
225
+ export function fitToTokenBudget(defs, graph, fileRanks, tokenBudget, focusFiles, maxFilesPerModule = MAX_FILES_PER_MODULE) {
94
226
  if (tokenBudget <= 0)
95
227
  return "";
96
228
  if (defs.length === 0) {
@@ -111,7 +243,7 @@ export function fitToTokenBudget(defs, graph, fileRanks, tokenBudget, focusFiles
111
243
  continue;
112
244
  }
113
245
  const subset = defs.slice(0, mid);
114
- const output = formatOutput(subset, graph, fileRanks, focusFiles);
246
+ const output = formatOutput(subset, graph, fileRanks, focusFiles, maxFilesPerModule);
115
247
  const tokens = estimateTokens(output);
116
248
  if (tokens <= tokenBudget) {
117
249
  if (mid > bestCount) {
package/dist/pagerank.js CHANGED
@@ -39,11 +39,10 @@ export function computePageRank(graph, options) {
39
39
  // Power iteration
40
40
  const next = new Float64Array(n);
41
41
  for (let iter = 0; iter < maxIter; iter++) {
42
- // Base teleport term: (1-d)/N per spec §7.2
43
- // Uses uniform distribution (not personalized) for teleport
44
- const base = (1 - d) / n;
42
+ // Personalized teleport: (1-d) * pVec[i]
43
+ // Uses personalization vector for stronger focus biasing
45
44
  for (let i = 0; i < n; i++) {
46
- next[i] = base;
45
+ next[i] = (1 - d) * (pVec[i] ?? 1 / n);
47
46
  }
48
47
  // Compute dangling sum
49
48
  let danglingSum = 0;
package/dist/parser.js CHANGED
@@ -61,8 +61,47 @@ async function loadLanguage(config) {
61
61
  * Derived from LANGUAGE_REGISTRY to ensure single source of truth.
62
62
  */
63
63
  const ALL_DEFINITION_TYPES = new Set(Object.values(LANGUAGE_REGISTRY).flatMap((config) => [...config.definitionTypes]));
64
+ /**
65
+ * Node types that block export inheritance.
66
+ * Local variables inside these should not be considered exported.
67
+ */
68
+ const EXPORT_BLOCKERS = new Set([
69
+ "function_declaration",
70
+ "function",
71
+ "function_expression",
72
+ "arrow_function",
73
+ "method_definition",
74
+ "class_declaration",
75
+ "class",
76
+ "class_expression",
77
+ ]);
78
+ /**
79
+ * Check if a variable_declarator is directly exported (not inside a function/class).
80
+ * Stops at function/class boundaries to prevent local variables from being captured.
81
+ */
82
+ function isDirectlyExportedVariable(node) {
83
+ let current = node;
84
+ while (current && current.type !== "program") {
85
+ // Stop if we hit a function/class boundary - local variables not exported
86
+ if (EXPORT_BLOCKERS.has(current.type))
87
+ return false;
88
+ const parent = current.parent;
89
+ if (parent?.type === "export_statement") {
90
+ const decl = parent.childForFieldName("declaration");
91
+ if (!decl)
92
+ return false;
93
+ // Compare by node ID, not object identity (tree-sitter creates new wrapper objects)
94
+ if (current.id !== decl.id)
95
+ return false;
96
+ return decl.type === "lexical_declaration" || decl.type === "variable_declaration";
97
+ }
98
+ current = parent;
99
+ }
100
+ return false;
101
+ }
64
102
  /**
65
103
  * Check if a node is inside an export statement (JS/TS).
104
+ * For function/class declarations (not variables).
66
105
  */
67
106
  function isExported(node) {
68
107
  let current = node.parent;
@@ -239,6 +278,22 @@ export async function parseFile(absPath, relPath) {
239
278
  if (shouldIncludeDefinition(node, langConfig)) {
240
279
  const name = getDefinitionName(node);
241
280
  if (name && !isBuiltin(name, langConfig)) {
281
+ // Determine export status based on language and node type
282
+ let nodeIsExported;
283
+ if (langConfig.grammarName === "typescript") {
284
+ // For TS/JS: check if it's directly exported or is a method
285
+ // Methods are considered non-exported (internal to class)
286
+ if (node.type === "method_definition") {
287
+ nodeIsExported = false;
288
+ }
289
+ else {
290
+ nodeIsExported = isExported(node);
291
+ }
292
+ }
293
+ else {
294
+ // For Python/Go/Rust/Solidity: all captured defs are "exported" (public API)
295
+ nodeIsExported = true;
296
+ }
242
297
  tags.push({
243
298
  relPath,
244
299
  absPath,
@@ -246,12 +301,13 @@ export async function parseFile(absPath, relPath) {
246
301
  name,
247
302
  kind: "def",
248
303
  signature: getSignature(node, sourceLines),
304
+ isExported: nodeIsExported,
249
305
  });
250
306
  }
251
307
  }
252
308
  }
253
309
  // Check for exported variable declarators (const/let) - TypeScript/JavaScript only
254
- if (node.type === "variable_declarator" && isExported(node)) {
310
+ if (node.type === "variable_declarator" && isDirectlyExportedVariable(node)) {
255
311
  const name = getVariableDeclaratorName(node);
256
312
  if (name && !isBuiltin(name, langConfig)) {
257
313
  tags.push({
@@ -261,6 +317,7 @@ export async function parseFile(absPath, relPath) {
261
317
  name,
262
318
  kind: "def",
263
319
  signature: getSignature(node, sourceLines),
320
+ isExported: true, // isDirectlyExportedVariable already checks this
264
321
  });
265
322
  }
266
323
  }