@snevins/repo-mapper 1.0.3 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -3
- package/dist/cli.d.ts +16 -0
- package/dist/cli.js +35 -1
- package/dist/dedup.d.ts +27 -0
- package/dist/dedup.js +67 -0
- package/dist/files.d.ts +5 -0
- package/dist/files.js +110 -3
- package/dist/graph.d.ts +18 -2
- package/dist/graph.js +183 -8
- package/dist/index.js +156 -34
- package/dist/languages.d.ts +5 -0
- package/dist/languages.js +21 -0
- package/dist/output.d.ts +10 -4
- package/dist/output.js +159 -27
- package/dist/pagerank.js +3 -4
- package/dist/parser.js +58 -1
- package/dist/ranking.d.ts +37 -1
- package/dist/ranking.js +242 -1
- package/dist/types.d.ts +23 -0
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -1,26 +1,61 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import { writeFile, stat } from "node:fs/promises";
|
|
3
|
-
import { resolve, relative } from "node:path";
|
|
4
|
-
import {
|
|
2
|
+
import { writeFile, stat, readFile } from "node:fs/promises";
|
|
3
|
+
import { resolve, relative, dirname, join } from "node:path";
|
|
4
|
+
import { fileURLToPath } from "node:url";
|
|
5
|
+
import { parseCliArgs, HelpRequestedError, VersionRequestedError } from "./cli.js";
|
|
5
6
|
import { loadCache, getCachedTags, setCacheEntry, saveCache } from "./cache.js";
|
|
6
|
-
import { discoverFiles, DEFAULT_IGNORED_PATTERNS } from "./files.js";
|
|
7
|
+
import { discoverFiles, DEFAULT_IGNORED_PATTERNS, isTestOrScriptPath } from "./files.js";
|
|
8
|
+
import { getExtensionsForLanguage } from "./languages.js";
|
|
7
9
|
import { initParser, parseFile, shutdownParser } from "./parser.js";
|
|
8
|
-
import { buildFileGraph } from "./graph.js";
|
|
10
|
+
import { buildFileGraph, buildImportGraph } from "./graph.js";
|
|
9
11
|
import { computePageRank } from "./pagerank.js";
|
|
10
|
-
import {
|
|
12
|
+
import { buildFocusPersonalization, buildEntrypointPersonalization, rankDefinitions, adjustFileRanks, combineRanks } from "./ranking.js";
|
|
13
|
+
import { computeFileDegrees } from "./graph.js";
|
|
11
14
|
import { fitToTokenBudget } from "./output.js";
|
|
15
|
+
import { detectDuplicates } from "./dedup.js";
|
|
12
16
|
async function main() {
|
|
13
17
|
const args = parseCliArgs(process.argv.slice(2));
|
|
14
18
|
const { options } = args;
|
|
15
19
|
// Determine root directory (first path or cwd)
|
|
16
20
|
const firstPath = args.paths[0];
|
|
17
21
|
const rootDir = firstPath !== undefined ? resolve(firstPath) : process.cwd();
|
|
18
|
-
// Resolve focus files to relative paths
|
|
19
|
-
const focusFiles = options.focus.map((f) => relative(rootDir, resolve(f)));
|
|
20
|
-
|
|
22
|
+
// Resolve focus files to relative paths (resolve relative to rootDir, not CWD)
|
|
23
|
+
const focusFiles = options.focus.map((f) => relative(rootDir, resolve(rootDir, f)));
|
|
24
|
+
// Validate focus files exist
|
|
25
|
+
const validFocusFiles = [];
|
|
26
|
+
for (const relPath of focusFiles) {
|
|
27
|
+
const absPath = join(rootDir, relPath);
|
|
28
|
+
try {
|
|
29
|
+
await stat(absPath);
|
|
30
|
+
validFocusFiles.push(relPath);
|
|
31
|
+
}
|
|
32
|
+
catch {
|
|
33
|
+
console.error(`Warning: Focus file "${relPath}" does not exist, ignoring`);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
const focusSet = new Set(validFocusFiles);
|
|
21
37
|
if (options.verbose) {
|
|
22
38
|
console.error(`Root: ${rootDir}`);
|
|
23
|
-
console.error(`Focus files: ${String(
|
|
39
|
+
console.error(`Focus files: ${String(validFocusFiles.length)}`);
|
|
40
|
+
}
|
|
41
|
+
// Build extensions filter from --type flags
|
|
42
|
+
let extensions;
|
|
43
|
+
if (options.type.length > 0) {
|
|
44
|
+
extensions = new Set();
|
|
45
|
+
for (const langId of options.type) {
|
|
46
|
+
const langExts = getExtensionsForLanguage(langId);
|
|
47
|
+
if (!langExts) {
|
|
48
|
+
console.error(`Warning: Unknown language type "${langId}", ignoring`);
|
|
49
|
+
continue;
|
|
50
|
+
}
|
|
51
|
+
for (const ext of langExts) {
|
|
52
|
+
extensions.add(ext);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
if (extensions.size === 0) {
|
|
56
|
+
console.error("Error: No valid language types specified");
|
|
57
|
+
process.exit(1);
|
|
58
|
+
}
|
|
24
59
|
}
|
|
25
60
|
// 1. Discover files (apply ignore patterns unless --no-ignore)
|
|
26
61
|
const ignoredPatterns = options.noIgnore
|
|
@@ -28,8 +63,11 @@ async function main() {
|
|
|
28
63
|
: [...DEFAULT_IGNORED_PATTERNS, ...options.ignore]; // defaults + custom
|
|
29
64
|
const discoveryResult = await discoverFiles({
|
|
30
65
|
rootDir,
|
|
66
|
+
extensions,
|
|
31
67
|
ignoredPatterns: ignoredPatterns.length > 0 ? ignoredPatterns : undefined,
|
|
68
|
+
includePatterns: options.include.length > 0 ? options.include : undefined,
|
|
32
69
|
maxFiles: options.maxFiles,
|
|
70
|
+
ignoredDirs: undefined,
|
|
33
71
|
});
|
|
34
72
|
const files = discoveryResult.files;
|
|
35
73
|
if (discoveryResult.wasLimited) {
|
|
@@ -88,21 +126,73 @@ async function main() {
|
|
|
88
126
|
console.error(`Cache: ${String(cacheHits)} hits, ${String(cacheMisses)} misses`);
|
|
89
127
|
console.error(`Parsed ${String(defs.length)} definitions, ${String(refs.length)} references`);
|
|
90
128
|
}
|
|
91
|
-
// 3. Build
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
const
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
129
|
+
// 3. Build graphs (downweight edges from test/script files)
|
|
130
|
+
// Two-stage ranking: import graph for structure, ref graph for density
|
|
131
|
+
const TEST_WEIGHT = 0.2;
|
|
132
|
+
const edgeWeightMultiplier = (path) => isTestOrScriptPath(path) ? TEST_WEIGHT : 1.0;
|
|
133
|
+
// Import graph: binary edges (1 per file→file connection) for structural importance
|
|
134
|
+
const importGraph = buildImportGraph(allTags, { edgeWeightMultiplier });
|
|
135
|
+
// Ref graph: weighted edges (ref counts) for density and definition ranking
|
|
136
|
+
const refGraph = buildFileGraph(allTags, { edgeWeightMultiplier });
|
|
137
|
+
// 4. Two-phase PageRank
|
|
138
|
+
// Phase 1: Compute structural importance from import graph
|
|
139
|
+
let personalization;
|
|
140
|
+
if (validFocusFiles.length > 0) {
|
|
141
|
+
personalization = buildFocusPersonalization(validFocusFiles, refGraph);
|
|
142
|
+
}
|
|
143
|
+
else {
|
|
144
|
+
// Auto-detect entrypoints for default personalization
|
|
145
|
+
const entrypointMap = buildEntrypointPersonalization(importGraph.nodes);
|
|
146
|
+
personalization = entrypointMap.size > 0 ? entrypointMap : undefined;
|
|
147
|
+
}
|
|
148
|
+
const structuralRanks = computePageRank(importGraph, { personalization });
|
|
149
|
+
// Phase 2: Combine structural rank with reference density
|
|
150
|
+
const rawFileRanks = combineRanks(structuralRanks, refGraph);
|
|
151
|
+
// 5. Detect duplicate files (copy-pasted code)
|
|
152
|
+
const tagsByFile = new Map();
|
|
153
|
+
for (const tag of allTags) {
|
|
154
|
+
const existing = tagsByFile.get(tag.relPath);
|
|
155
|
+
if (existing) {
|
|
156
|
+
existing.push(tag);
|
|
157
|
+
}
|
|
158
|
+
else {
|
|
159
|
+
tagsByFile.set(tag.relPath, [tag]);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
const duplicates = detectDuplicates(tagsByFile);
|
|
163
|
+
if (options.verbose && duplicates.size > 0) {
|
|
164
|
+
// Count unique duplicate groups
|
|
165
|
+
const uniqueGroups = new Set([...duplicates.values()].map((g) => g.fingerprint));
|
|
166
|
+
console.error(`Detected ${String(uniqueGroups.size)} duplicate file group(s)`);
|
|
167
|
+
}
|
|
168
|
+
// 6. Apply architecture-aware adjustments (utility penalty, entry boost, diversity, duplicates)
|
|
169
|
+
const degrees = computeFileDegrees(refGraph);
|
|
170
|
+
const fileRanks = adjustFileRanks(rawFileRanks, degrees, refGraph.nodes, duplicates);
|
|
171
|
+
// 7. Rank definitions (exclude focus files)
|
|
172
|
+
const rankedDefs = rankDefinitions(refGraph, fileRanks, focusSet);
|
|
100
173
|
if (options.verbose) {
|
|
101
174
|
console.error(`Ranked ${String(rankedDefs.length)} definitions`);
|
|
102
175
|
}
|
|
103
|
-
//
|
|
104
|
-
|
|
105
|
-
|
|
176
|
+
// Warn if focus file results are dominated by utility paths
|
|
177
|
+
if (validFocusFiles.length > 0 && rankedDefs.length >= 2) {
|
|
178
|
+
const UTILITY_PATTERNS = ["utils/", "lib/", "common/", "shared/", "helpers/"];
|
|
179
|
+
const topN = rankedDefs.slice(0, Math.min(3, rankedDefs.length));
|
|
180
|
+
const utilityCount = topN.filter((d) => UTILITY_PATTERNS.some((p) => d.file.includes(p))).length;
|
|
181
|
+
if (utilityCount >= 2) {
|
|
182
|
+
console.error(`Tip: Top results are utility files. Try subdirectory scoping instead: ` +
|
|
183
|
+
`repo-mapper ./path/to/module -t ${String(options.tokens)}`);
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
// Warn if output is suspiciously sparse with large token budget
|
|
187
|
+
const SPARSE_THRESHOLD = 5;
|
|
188
|
+
const LARGE_BUDGET_THRESHOLD = 1000;
|
|
189
|
+
if (options.tokens >= LARGE_BUDGET_THRESHOLD && rankedDefs.length < SPARSE_THRESHOLD) {
|
|
190
|
+
console.error(`Warning: Output is sparse (${String(rankedDefs.length)} definitions) with ${String(options.tokens)} token budget. ` +
|
|
191
|
+
`This may indicate overly broad ignore patterns or missing language support.`);
|
|
192
|
+
}
|
|
193
|
+
// 8. Fit to token budget and format output
|
|
194
|
+
const output = fitToTokenBudget(rankedDefs, refGraph, fileRanks, options.tokens, validFocusFiles.length > 0 ? validFocusFiles : undefined);
|
|
195
|
+
// 9. Write output
|
|
106
196
|
if (options.output) {
|
|
107
197
|
await writeFile(options.output, output);
|
|
108
198
|
if (options.verbose) {
|
|
@@ -113,18 +203,50 @@ async function main() {
|
|
|
113
203
|
console.log(output);
|
|
114
204
|
}
|
|
115
205
|
}
|
|
116
|
-
|
|
206
|
+
const USAGE = `Usage: repo-mapper [paths...] [options]
|
|
207
|
+
|
|
208
|
+
Options:
|
|
209
|
+
-h, --help Show this help message
|
|
210
|
+
-V, --version Show version number
|
|
211
|
+
-t, --tokens <n> Max tokens (default: 2000)
|
|
212
|
+
-f, --focus <file> Focus file (repeatable)
|
|
213
|
+
-o, --output <file> Output file
|
|
214
|
+
-r, --refresh Ignore cache
|
|
215
|
+
-v, --verbose Verbose output
|
|
216
|
+
--ignore <pattern> Add ignore pattern (repeatable)
|
|
217
|
+
--include <pattern> Only include files matching pattern (repeatable)
|
|
218
|
+
--no-ignore Disable default ignore patterns
|
|
219
|
+
--max-files <n> Max files to process (default: 10000, 0=unlimited)
|
|
220
|
+
--type <lang> Filter by language: ts, js, python, go, rust, solidity (repeatable)
|
|
221
|
+
|
|
222
|
+
Examples:
|
|
223
|
+
repo-mapper . # Map current directory
|
|
224
|
+
repo-mapper ./src -t 3000 # Map src/ with larger budget
|
|
225
|
+
repo-mapper . -f src/api.ts # Bias ranking toward api.ts
|
|
226
|
+
repo-mapper . --type ts # Only TypeScript files
|
|
227
|
+
repo-mapper . -v # Show progress info`;
|
|
228
|
+
async function getVersion() {
|
|
229
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
230
|
+
const packagePath = join(__dirname, "..", "package.json");
|
|
231
|
+
const content = await readFile(packagePath, "utf-8");
|
|
232
|
+
const pkg = JSON.parse(content);
|
|
233
|
+
return pkg.version;
|
|
234
|
+
}
|
|
235
|
+
main().catch(async (err) => {
|
|
236
|
+
// --help or -h: print usage and exit success
|
|
237
|
+
if (err instanceof HelpRequestedError) {
|
|
238
|
+
console.log(USAGE);
|
|
239
|
+
process.exit(0);
|
|
240
|
+
}
|
|
241
|
+
// --version or -V: print version and exit success
|
|
242
|
+
if (err instanceof VersionRequestedError) {
|
|
243
|
+
const version = await getVersion();
|
|
244
|
+
console.log(version);
|
|
245
|
+
process.exit(0);
|
|
246
|
+
}
|
|
247
|
+
// Other errors: print error + usage and exit failure
|
|
117
248
|
const message = err instanceof Error ? err.message : String(err);
|
|
118
|
-
console.error(`Error: ${message}`);
|
|
119
|
-
console.error(
|
|
120
|
-
console.error("Options:");
|
|
121
|
-
console.error(" -t, --tokens <n> Max tokens (default: 1024)");
|
|
122
|
-
console.error(" -f, --focus <file> Focus file (repeatable)");
|
|
123
|
-
console.error(" -o, --output <file> Output file");
|
|
124
|
-
console.error(" -r, --refresh Ignore cache");
|
|
125
|
-
console.error(" -v, --verbose Verbose output");
|
|
126
|
-
console.error(" --ignore <pattern> Add ignore pattern (repeatable)");
|
|
127
|
-
console.error(" --no-ignore Disable default ignore patterns");
|
|
128
|
-
console.error(" --max-files <n> Max files to process (default: 10000, 0=unlimited)");
|
|
249
|
+
console.error(`Error: ${message}\n`);
|
|
250
|
+
console.error(USAGE);
|
|
129
251
|
process.exit(1);
|
|
130
252
|
});
|
package/dist/languages.d.ts
CHANGED
|
@@ -38,3 +38,8 @@ export declare function isTreeSitterSupported(ext: string): boolean;
|
|
|
38
38
|
* If no config provided, uses TS/JS builtins as default.
|
|
39
39
|
*/
|
|
40
40
|
export declare function isBuiltin(name: string, config?: LanguageConfig): boolean;
|
|
41
|
+
/**
|
|
42
|
+
* Get file extensions for a language ID or alias.
|
|
43
|
+
* Returns undefined for unknown languages.
|
|
44
|
+
*/
|
|
45
|
+
export declare function getExtensionsForLanguage(langId: string): ReadonlySet<string> | undefined;
|
package/dist/languages.js
CHANGED
|
@@ -607,3 +607,24 @@ export function isBuiltin(name, config) {
|
|
|
607
607
|
const builtins = config?.builtins ?? TS_JS_BUILTINS;
|
|
608
608
|
return builtins.has(name);
|
|
609
609
|
}
|
|
610
|
+
/**
|
|
611
|
+
* Short aliases for common language IDs.
|
|
612
|
+
*/
|
|
613
|
+
const LANG_ALIASES = {
|
|
614
|
+
ts: "typescript",
|
|
615
|
+
js: "javascript",
|
|
616
|
+
py: "python",
|
|
617
|
+
rs: "rust",
|
|
618
|
+
sol: "solidity",
|
|
619
|
+
};
|
|
620
|
+
/**
|
|
621
|
+
* Get file extensions for a language ID or alias.
|
|
622
|
+
* Returns undefined for unknown languages.
|
|
623
|
+
*/
|
|
624
|
+
export function getExtensionsForLanguage(langId) {
|
|
625
|
+
const normalized = LANG_ALIASES[langId] ?? langId;
|
|
626
|
+
const config = LANGUAGE_REGISTRY[normalized];
|
|
627
|
+
if (!config)
|
|
628
|
+
return undefined;
|
|
629
|
+
return new Set(config.extensions);
|
|
630
|
+
}
|
package/dist/output.d.ts
CHANGED
|
@@ -1,11 +1,17 @@
|
|
|
1
1
|
import type { RankedDefinition, FileGraph } from "./types.js";
|
|
2
2
|
/**
|
|
3
|
-
*
|
|
4
|
-
*
|
|
3
|
+
* Get module name (first path segment) from a file path.
|
|
4
|
+
* Returns "(root)" for files in the root directory.
|
|
5
|
+
*/
|
|
6
|
+
export declare function getModuleName(path: string): string;
|
|
7
|
+
/**
|
|
8
|
+
* Format output per spec §3.5, grouped by module.
|
|
9
|
+
* Modules sorted by highest-ranked file descending.
|
|
10
|
+
* Files within module sorted by PageRank descending.
|
|
5
11
|
* Definitions within file sorted by line ascending.
|
|
6
12
|
* Line numbers right-aligned to max width.
|
|
7
13
|
*/
|
|
8
|
-
export declare function formatOutput(defs: readonly RankedDefinition[], graph: FileGraph, fileRanks: ReadonlyMap<string, number>, focusFiles?: readonly string[]): string;
|
|
14
|
+
export declare function formatOutput(defs: readonly RankedDefinition[], graph: FileGraph, fileRanks: ReadonlyMap<string, number>, focusFiles?: readonly string[], maxFilesPerModule?: number): string;
|
|
9
15
|
/**
|
|
10
16
|
* Fit ranked definitions to token budget using binary search.
|
|
11
17
|
* Returns the largest output that fits within the budget.
|
|
@@ -16,4 +22,4 @@ export declare function formatOutput(defs: readonly RankedDefinition[], graph: F
|
|
|
16
22
|
* 3. If tokens ≤ budget, try more; else try fewer
|
|
17
23
|
* 4. Return best fit
|
|
18
24
|
*/
|
|
19
|
-
export declare function fitToTokenBudget(defs: readonly RankedDefinition[], graph: FileGraph, fileRanks: ReadonlyMap<string, number>, tokenBudget: number, focusFiles?: readonly string[]): string;
|
|
25
|
+
export declare function fitToTokenBudget(defs: readonly RankedDefinition[], graph: FileGraph, fileRanks: ReadonlyMap<string, number>, tokenBudget: number, focusFiles?: readonly string[], maxFilesPerModule?: number): string;
|
package/dist/output.js
CHANGED
|
@@ -1,11 +1,63 @@
|
|
|
1
1
|
import { estimateTokens } from "./tokens.js";
|
|
2
|
+
/**
|
|
3
|
+
* Max definitions per unique name in output to prevent repetition.
|
|
4
|
+
*/
|
|
5
|
+
const MAX_DEFS_PER_NAME = 2;
|
|
6
|
+
/**
|
|
7
|
+
* Default max files per module to spread budget across modules.
|
|
8
|
+
*/
|
|
9
|
+
const MAX_FILES_PER_MODULE = 5;
|
|
10
|
+
/**
|
|
11
|
+
* Top K files globally guaranteed inclusion regardless of per-module limits.
|
|
12
|
+
*/
|
|
13
|
+
const GLOBAL_TOP_K = 50;
|
|
14
|
+
/**
|
|
15
|
+
* Get module name (first path segment) from a file path.
|
|
16
|
+
* Returns "(root)" for files in the root directory.
|
|
17
|
+
*/
|
|
18
|
+
export function getModuleName(path) {
|
|
19
|
+
const slash = path.indexOf("/");
|
|
20
|
+
return slash === -1 ? "(root)" : path.slice(0, slash);
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Compute percentile rank (0-100) for a file based on position in sorted ranks.
|
|
24
|
+
* Higher rank value = higher percentile.
|
|
25
|
+
*/
|
|
26
|
+
function computePercentile(rank, sortedRanks) {
|
|
27
|
+
if (sortedRanks.length === 0)
|
|
28
|
+
return 100;
|
|
29
|
+
if (sortedRanks.length === 1)
|
|
30
|
+
return 100;
|
|
31
|
+
// Count how many ranks are below this one
|
|
32
|
+
let countBelow = 0;
|
|
33
|
+
for (const r of sortedRanks) {
|
|
34
|
+
if (r < rank)
|
|
35
|
+
countBelow++;
|
|
36
|
+
}
|
|
37
|
+
// Percentile = (countBelow / (n-1)) * 100, rounded
|
|
38
|
+
return Math.round((countBelow / (sortedRanks.length - 1)) * 100);
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Compute percentile as decimal (0.0 to 1.0)
|
|
42
|
+
*/
|
|
43
|
+
function formatRankDisplay(rank, sortedRanks) {
|
|
44
|
+
const percentile = computePercentile(rank, sortedRanks);
|
|
45
|
+
return (percentile / 100).toFixed(1);
|
|
46
|
+
}
|
|
2
47
|
/**
|
|
3
48
|
* Resolve RankedDefinitions to their Tags by looking up in graph.defsByName.
|
|
4
49
|
* Returns only definitions that can be found.
|
|
50
|
+
* Applies diversity cap to limit repetition of same-named definitions.
|
|
5
51
|
*/
|
|
6
52
|
function resolveDefs(defs, graph) {
|
|
7
53
|
const result = [];
|
|
54
|
+
const nameCounts = new Map();
|
|
8
55
|
for (const def of defs) {
|
|
56
|
+
// Check diversity cap before processing
|
|
57
|
+
const currentCount = nameCounts.get(def.ident) ?? 0;
|
|
58
|
+
if (currentCount >= MAX_DEFS_PER_NAME) {
|
|
59
|
+
continue;
|
|
60
|
+
}
|
|
9
61
|
const tags = graph.defsByName.get(def.ident);
|
|
10
62
|
if (!tags)
|
|
11
63
|
continue;
|
|
@@ -13,6 +65,7 @@ function resolveDefs(defs, graph) {
|
|
|
13
65
|
if (!tag)
|
|
14
66
|
continue;
|
|
15
67
|
result.push({ file: def.file, tag, rank: def.rank });
|
|
68
|
+
nameCounts.set(def.ident, currentCount + 1);
|
|
16
69
|
}
|
|
17
70
|
return result;
|
|
18
71
|
}
|
|
@@ -29,52 +82,131 @@ function groupByFile(defs) {
|
|
|
29
82
|
return groups;
|
|
30
83
|
}
|
|
31
84
|
/**
|
|
32
|
-
*
|
|
33
|
-
|
|
85
|
+
* Group files by module (first path segment).
|
|
86
|
+
*/
|
|
87
|
+
function groupByModule(files) {
|
|
88
|
+
const modules = new Map();
|
|
89
|
+
for (const file of files) {
|
|
90
|
+
const mod = getModuleName(file);
|
|
91
|
+
const existing = modules.get(mod) ?? [];
|
|
92
|
+
existing.push(file);
|
|
93
|
+
modules.set(mod, existing);
|
|
94
|
+
}
|
|
95
|
+
return modules;
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Limit files per module, keeping top N by rank.
|
|
99
|
+
* Global top-K files are always included regardless of per-module limits.
|
|
100
|
+
* Returns set of allowed files and count of omitted per module.
|
|
101
|
+
*/
|
|
102
|
+
function limitFilesPerModule(moduleGroups, fileRanks, maxPerModule, globalTopK = GLOBAL_TOP_K) {
|
|
103
|
+
if (maxPerModule <= 0 && globalTopK <= 0) {
|
|
104
|
+
// No limits - return all files
|
|
105
|
+
const allFiles = new Set();
|
|
106
|
+
for (const files of moduleGroups.values()) {
|
|
107
|
+
for (const f of files)
|
|
108
|
+
allFiles.add(f);
|
|
109
|
+
}
|
|
110
|
+
return { allowedFiles: allFiles, omittedByModule: new Map() };
|
|
111
|
+
}
|
|
112
|
+
// Get all files sorted by global rank
|
|
113
|
+
const allFilesList = [...moduleGroups.values()].flat();
|
|
114
|
+
const sortedGlobally = [...allFilesList].sort((a, b) => (fileRanks.get(b) ?? 0) - (fileRanks.get(a) ?? 0));
|
|
115
|
+
// Guarantee top-K files globally
|
|
116
|
+
const guaranteedFiles = new Set(sortedGlobally.slice(0, globalTopK));
|
|
117
|
+
const allowedFiles = new Set();
|
|
118
|
+
const omittedByModule = new Map();
|
|
119
|
+
for (const [mod, files] of moduleGroups) {
|
|
120
|
+
let keptNonGuaranteed = 0;
|
|
121
|
+
let omitted = 0;
|
|
122
|
+
for (const f of files) {
|
|
123
|
+
const isGuaranteed = guaranteedFiles.has(f);
|
|
124
|
+
const underLimit = maxPerModule <= 0 || keptNonGuaranteed < maxPerModule;
|
|
125
|
+
if (isGuaranteed || underLimit) {
|
|
126
|
+
allowedFiles.add(f);
|
|
127
|
+
if (!isGuaranteed)
|
|
128
|
+
keptNonGuaranteed++;
|
|
129
|
+
}
|
|
130
|
+
else {
|
|
131
|
+
omitted++;
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
if (omitted > 0)
|
|
135
|
+
omittedByModule.set(mod, omitted);
|
|
136
|
+
}
|
|
137
|
+
return { allowedFiles, omittedByModule };
|
|
138
|
+
}
|
|
139
|
+
/**
|
|
140
|
+
* Format output per spec §3.5, grouped by module.
|
|
141
|
+
* Modules sorted by highest-ranked file descending.
|
|
142
|
+
* Files within module sorted by PageRank descending.
|
|
34
143
|
* Definitions within file sorted by line ascending.
|
|
35
144
|
* Line numbers right-aligned to max width.
|
|
36
145
|
*/
|
|
37
|
-
export function formatOutput(defs, graph, fileRanks, focusFiles) {
|
|
146
|
+
export function formatOutput(defs, graph, fileRanks, focusFiles, maxFilesPerModule = MAX_FILES_PER_MODULE) {
|
|
38
147
|
const resolved = resolveDefs(defs, graph);
|
|
39
148
|
if (resolved.length === 0 && (!focusFiles || focusFiles.length === 0)) {
|
|
40
149
|
return "";
|
|
41
150
|
}
|
|
42
|
-
const
|
|
151
|
+
const fileGroups = groupByFile(resolved);
|
|
43
152
|
// Sort files by PageRank descending
|
|
44
|
-
const sortedFiles = [...
|
|
153
|
+
const sortedFiles = [...fileGroups.keys()].sort((a, b) => {
|
|
45
154
|
const rankA = fileRanks.get(a) ?? 0;
|
|
46
155
|
const rankB = fileRanks.get(b) ?? 0;
|
|
47
156
|
if (rankB !== rankA)
|
|
48
157
|
return rankB - rankA;
|
|
49
158
|
return a.localeCompare(b);
|
|
50
159
|
});
|
|
51
|
-
//
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
160
|
+
// Group files by module
|
|
161
|
+
const moduleGroups = groupByModule(sortedFiles);
|
|
162
|
+
// Apply module file limits
|
|
163
|
+
const { allowedFiles, omittedByModule } = limitFilesPerModule(moduleGroups, fileRanks, maxFilesPerModule);
|
|
164
|
+
// Sort modules by highest-ranked file in each module
|
|
165
|
+
const sortedModules = [...moduleGroups.keys()].sort((a, b) => {
|
|
166
|
+
const filesA = moduleGroups.get(a) ?? [];
|
|
167
|
+
const filesB = moduleGroups.get(b) ?? [];
|
|
168
|
+
const maxRankA = Math.max(...filesA.map((f) => fileRanks.get(f) ?? 0));
|
|
169
|
+
const maxRankB = Math.max(...filesB.map((f) => fileRanks.get(f) ?? 0));
|
|
170
|
+
if (maxRankB !== maxRankA)
|
|
171
|
+
return maxRankB - maxRankA;
|
|
172
|
+
return a.localeCompare(b);
|
|
173
|
+
});
|
|
58
174
|
const lines = [];
|
|
59
175
|
// Focus files header
|
|
60
176
|
if (focusFiles && focusFiles.length > 0) {
|
|
61
177
|
lines.push(`[Focus: [${focusFiles.join(", ")}]]`);
|
|
62
178
|
lines.push("");
|
|
63
179
|
}
|
|
64
|
-
//
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
180
|
+
// Get all ranks for percentile calculation (only from allowed files)
|
|
181
|
+
const allRanks = [...allowedFiles].map((f) => fileRanks.get(f) ?? 0);
|
|
182
|
+
// Format by module
|
|
183
|
+
for (const mod of sortedModules) {
|
|
184
|
+
const allModFiles = moduleGroups.get(mod) ?? [];
|
|
185
|
+
// Filter to allowed files only
|
|
186
|
+
const modFiles = allModFiles.filter((f) => allowedFiles.has(f));
|
|
187
|
+
if (modFiles.length === 0)
|
|
68
188
|
continue;
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
lines.push(
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
189
|
+
// Module header - shows kept count, not total
|
|
190
|
+
const suffix = mod === "(root)" ? "" : "/";
|
|
191
|
+
lines.push(`## ${mod}${suffix} (${String(modFiles.length)} file${modFiles.length > 1 ? "s" : ""})`);
|
|
192
|
+
// Files in this module (already sorted by rank)
|
|
193
|
+
for (const file of modFiles) {
|
|
194
|
+
const fileDefs = fileGroups.get(file);
|
|
195
|
+
if (!fileDefs || fileDefs.length === 0)
|
|
196
|
+
continue;
|
|
197
|
+
const fileRank = fileRanks.get(file) ?? 0;
|
|
198
|
+
lines.push(`${file}: ${formatRankDisplay(fileRank, allRanks)}`);
|
|
199
|
+
// Sort definitions by line ascending
|
|
200
|
+
const sorted = [...fileDefs].sort((a, b) => a.tag.line - b.tag.line);
|
|
201
|
+
for (const def of sorted) {
|
|
202
|
+
lines.push(` ${def.tag.signature ?? def.tag.name}`);
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
// Omission annotation if files were limited
|
|
206
|
+
const omitted = omittedByModule.get(mod);
|
|
207
|
+
if (omitted !== undefined && omitted > 0) {
|
|
208
|
+
const fileWord = omitted === 1 ? "file" : "files";
|
|
209
|
+
lines.push(` ... (${String(omitted)} more ${fileWord} in ${mod}${suffix})`);
|
|
78
210
|
}
|
|
79
211
|
lines.push("");
|
|
80
212
|
}
|
|
@@ -90,7 +222,7 @@ export function formatOutput(defs, graph, fileRanks, focusFiles) {
|
|
|
90
222
|
* 3. If tokens ≤ budget, try more; else try fewer
|
|
91
223
|
* 4. Return best fit
|
|
92
224
|
*/
|
|
93
|
-
export function fitToTokenBudget(defs, graph, fileRanks, tokenBudget, focusFiles) {
|
|
225
|
+
export function fitToTokenBudget(defs, graph, fileRanks, tokenBudget, focusFiles, maxFilesPerModule = MAX_FILES_PER_MODULE) {
|
|
94
226
|
if (tokenBudget <= 0)
|
|
95
227
|
return "";
|
|
96
228
|
if (defs.length === 0) {
|
|
@@ -111,7 +243,7 @@ export function fitToTokenBudget(defs, graph, fileRanks, tokenBudget, focusFiles
|
|
|
111
243
|
continue;
|
|
112
244
|
}
|
|
113
245
|
const subset = defs.slice(0, mid);
|
|
114
|
-
const output = formatOutput(subset, graph, fileRanks, focusFiles);
|
|
246
|
+
const output = formatOutput(subset, graph, fileRanks, focusFiles, maxFilesPerModule);
|
|
115
247
|
const tokens = estimateTokens(output);
|
|
116
248
|
if (tokens <= tokenBudget) {
|
|
117
249
|
if (mid > bestCount) {
|
package/dist/pagerank.js
CHANGED
|
@@ -39,11 +39,10 @@ export function computePageRank(graph, options) {
|
|
|
39
39
|
// Power iteration
|
|
40
40
|
const next = new Float64Array(n);
|
|
41
41
|
for (let iter = 0; iter < maxIter; iter++) {
|
|
42
|
-
//
|
|
43
|
-
// Uses
|
|
44
|
-
const base = (1 - d) / n;
|
|
42
|
+
// Personalized teleport: (1-d) * pVec[i]
|
|
43
|
+
// Uses personalization vector for stronger focus biasing
|
|
45
44
|
for (let i = 0; i < n; i++) {
|
|
46
|
-
next[i] =
|
|
45
|
+
next[i] = (1 - d) * (pVec[i] ?? 1 / n);
|
|
47
46
|
}
|
|
48
47
|
// Compute dangling sum
|
|
49
48
|
let danglingSum = 0;
|
package/dist/parser.js
CHANGED
|
@@ -61,8 +61,47 @@ async function loadLanguage(config) {
|
|
|
61
61
|
* Derived from LANGUAGE_REGISTRY to ensure single source of truth.
|
|
62
62
|
*/
|
|
63
63
|
const ALL_DEFINITION_TYPES = new Set(Object.values(LANGUAGE_REGISTRY).flatMap((config) => [...config.definitionTypes]));
|
|
64
|
+
/**
|
|
65
|
+
* Node types that block export inheritance.
|
|
66
|
+
* Local variables inside these should not be considered exported.
|
|
67
|
+
*/
|
|
68
|
+
const EXPORT_BLOCKERS = new Set([
|
|
69
|
+
"function_declaration",
|
|
70
|
+
"function",
|
|
71
|
+
"function_expression",
|
|
72
|
+
"arrow_function",
|
|
73
|
+
"method_definition",
|
|
74
|
+
"class_declaration",
|
|
75
|
+
"class",
|
|
76
|
+
"class_expression",
|
|
77
|
+
]);
|
|
78
|
+
/**
|
|
79
|
+
* Check if a variable_declarator is directly exported (not inside a function/class).
|
|
80
|
+
* Stops at function/class boundaries to prevent local variables from being captured.
|
|
81
|
+
*/
|
|
82
|
+
function isDirectlyExportedVariable(node) {
|
|
83
|
+
let current = node;
|
|
84
|
+
while (current && current.type !== "program") {
|
|
85
|
+
// Stop if we hit a function/class boundary - local variables not exported
|
|
86
|
+
if (EXPORT_BLOCKERS.has(current.type))
|
|
87
|
+
return false;
|
|
88
|
+
const parent = current.parent;
|
|
89
|
+
if (parent?.type === "export_statement") {
|
|
90
|
+
const decl = parent.childForFieldName("declaration");
|
|
91
|
+
if (!decl)
|
|
92
|
+
return false;
|
|
93
|
+
// Compare by node ID, not object identity (tree-sitter creates new wrapper objects)
|
|
94
|
+
if (current.id !== decl.id)
|
|
95
|
+
return false;
|
|
96
|
+
return decl.type === "lexical_declaration" || decl.type === "variable_declaration";
|
|
97
|
+
}
|
|
98
|
+
current = parent;
|
|
99
|
+
}
|
|
100
|
+
return false;
|
|
101
|
+
}
|
|
64
102
|
/**
|
|
65
103
|
* Check if a node is inside an export statement (JS/TS).
|
|
104
|
+
* For function/class declarations (not variables).
|
|
66
105
|
*/
|
|
67
106
|
function isExported(node) {
|
|
68
107
|
let current = node.parent;
|
|
@@ -239,6 +278,22 @@ export async function parseFile(absPath, relPath) {
|
|
|
239
278
|
if (shouldIncludeDefinition(node, langConfig)) {
|
|
240
279
|
const name = getDefinitionName(node);
|
|
241
280
|
if (name && !isBuiltin(name, langConfig)) {
|
|
281
|
+
// Determine export status based on language and node type
|
|
282
|
+
let nodeIsExported;
|
|
283
|
+
if (langConfig.grammarName === "typescript") {
|
|
284
|
+
// For TS/JS: check if it's directly exported or is a method
|
|
285
|
+
// Methods are considered non-exported (internal to class)
|
|
286
|
+
if (node.type === "method_definition") {
|
|
287
|
+
nodeIsExported = false;
|
|
288
|
+
}
|
|
289
|
+
else {
|
|
290
|
+
nodeIsExported = isExported(node);
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
else {
|
|
294
|
+
// For Python/Go/Rust/Solidity: all captured defs are "exported" (public API)
|
|
295
|
+
nodeIsExported = true;
|
|
296
|
+
}
|
|
242
297
|
tags.push({
|
|
243
298
|
relPath,
|
|
244
299
|
absPath,
|
|
@@ -246,12 +301,13 @@ export async function parseFile(absPath, relPath) {
|
|
|
246
301
|
name,
|
|
247
302
|
kind: "def",
|
|
248
303
|
signature: getSignature(node, sourceLines),
|
|
304
|
+
isExported: nodeIsExported,
|
|
249
305
|
});
|
|
250
306
|
}
|
|
251
307
|
}
|
|
252
308
|
}
|
|
253
309
|
// Check for exported variable declarators (const/let) - TypeScript/JavaScript only
|
|
254
|
-
if (node.type === "variable_declarator" &&
|
|
310
|
+
if (node.type === "variable_declarator" && isDirectlyExportedVariable(node)) {
|
|
255
311
|
const name = getVariableDeclaratorName(node);
|
|
256
312
|
if (name && !isBuiltin(name, langConfig)) {
|
|
257
313
|
tags.push({
|
|
@@ -261,6 +317,7 @@ export async function parseFile(absPath, relPath) {
|
|
|
261
317
|
name,
|
|
262
318
|
kind: "def",
|
|
263
319
|
signature: getSignature(node, sourceLines),
|
|
320
|
+
isExported: true, // isDirectlyExportedVariable already checks this
|
|
264
321
|
});
|
|
265
322
|
}
|
|
266
323
|
}
|