@snevins/repo-mapper 1.0.3 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -3
- package/dist/cli.d.ts +16 -0
- package/dist/cli.js +35 -1
- package/dist/dedup.d.ts +27 -0
- package/dist/dedup.js +67 -0
- package/dist/files.d.ts +5 -0
- package/dist/files.js +110 -3
- package/dist/graph.d.ts +18 -2
- package/dist/graph.js +183 -8
- package/dist/index.js +156 -34
- package/dist/languages.d.ts +5 -0
- package/dist/languages.js +21 -0
- package/dist/output.d.ts +10 -4
- package/dist/output.js +159 -27
- package/dist/pagerank.js +3 -4
- package/dist/parser.js +58 -1
- package/dist/ranking.d.ts +37 -1
- package/dist/ranking.js +242 -1
- package/dist/types.d.ts +23 -0
- package/package.json +1 -1
package/dist/ranking.d.ts
CHANGED
|
@@ -1,9 +1,36 @@
|
|
|
1
|
-
import type { FileGraph, RankedDefinition } from "./types.js";
|
|
1
|
+
import type { FileGraph, RankedDefinition, FileDegrees } from "./types.js";
|
|
2
|
+
import type { DuplicateGroup } from "./dedup.js";
|
|
3
|
+
/**
|
|
4
|
+
* Detect entrypoint files from a list of file paths.
|
|
5
|
+
*/
|
|
6
|
+
export declare function detectEntrypoints(nodes: readonly string[]): string[];
|
|
7
|
+
/**
|
|
8
|
+
* Build personalization vector for detected entrypoints.
|
|
9
|
+
* Used when no focus files specified to boost common entrypoint patterns.
|
|
10
|
+
*/
|
|
11
|
+
export declare function buildEntrypointPersonalization(nodes: readonly string[]): Map<string, number>;
|
|
12
|
+
/**
|
|
13
|
+
* Adjust file ranks based on architecture signals.
|
|
14
|
+
* - Hub penalty: penalize files with high in-degree (many importers)
|
|
15
|
+
* - Path penalty: penalize utility/internal paths
|
|
16
|
+
* - Entry point boost: boost files matching entry point patterns
|
|
17
|
+
* - Diversity bonus: boost files that reference many different modules
|
|
18
|
+
* - Duplicate penalty: penalize non-canonical duplicates
|
|
19
|
+
*/
|
|
20
|
+
export declare function adjustFileRanks(fileRanks: ReadonlyMap<string, number>, degrees: ReadonlyMap<string, FileDegrees>, nodes: readonly string[], duplicates?: ReadonlyMap<string, DuplicateGroup>): Map<string, number>;
|
|
2
21
|
/**
|
|
3
22
|
* Build personalization vector for focus files.
|
|
4
23
|
* Each focus file gets weight 1.0.
|
|
5
24
|
*/
|
|
6
25
|
export declare function buildPersonalization(focusFiles: readonly string[]): Map<string, number>;
|
|
26
|
+
/**
|
|
27
|
+
* Build personalization vector with boosted first-order neighbors.
|
|
28
|
+
* This strengthens the focus bias by giving higher weights to:
|
|
29
|
+
* - Focus files (highest)
|
|
30
|
+
* - Files that focus file references (dependencies)
|
|
31
|
+
* - Files that reference focus file (dependents)
|
|
32
|
+
*/
|
|
33
|
+
export declare function buildFocusPersonalization(focusFiles: readonly string[], graph: FileGraph): Map<string, number>;
|
|
7
34
|
/**
|
|
8
35
|
* Rank definitions by propagating PageRank through symbol edges.
|
|
9
36
|
* Focus file definitions are excluded from output.
|
|
@@ -11,3 +38,12 @@ export declare function buildPersonalization(focusFiles: readonly string[]): Map
|
|
|
11
38
|
* Formula: def_rank[definer:ident] += PR(referencer) * edge_weight / out_weight(referencer)
|
|
12
39
|
*/
|
|
13
40
|
export declare function rankDefinitions(graph: FileGraph, fileRanks: Map<string, number>, focusFiles?: ReadonlySet<string>): RankedDefinition[];
|
|
41
|
+
/**
|
|
42
|
+
* Combine structural ranks (import graph) with reference density (ref graph).
|
|
43
|
+
*
|
|
44
|
+
* Structural importance (from binary import graph) is weighted more heavily
|
|
45
|
+
* than raw reference counts to prevent "noisy" files from dominating.
|
|
46
|
+
*
|
|
47
|
+
* Formula: combined = structRank * structWeight + normalizedDensity * (1 - structWeight)
|
|
48
|
+
*/
|
|
49
|
+
export declare function combineRanks(structuralRanks: ReadonlyMap<string, number>, refGraph: FileGraph, structuralWeight?: number): Map<string, number>;
|
package/dist/ranking.js
CHANGED
|
@@ -1,3 +1,154 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Boilerplate method names to filter from ranked definitions.
|
|
3
|
+
* These are common trait implementations and overrides that add noise.
|
|
4
|
+
*/
|
|
5
|
+
const BOILERPLATE_NAMES = new Set([
|
|
6
|
+
// Rust trait impls
|
|
7
|
+
"fmt", "default", "new", "from", "into", "clone",
|
|
8
|
+
"eq", "hash", "deref", "drop", "serialize", "deserialize",
|
|
9
|
+
// JS/TS common overrides
|
|
10
|
+
"toString", "valueOf", "toJSON",
|
|
11
|
+
// Generic method names (inflate rankings, not meaningful)
|
|
12
|
+
"log", "init", "get", "set", "update", "handle",
|
|
13
|
+
"on", "parse", "format", "name", "value", "path",
|
|
14
|
+
"start", "stop", "reset", "run", "execute",
|
|
15
|
+
]);
|
|
16
|
+
/**
|
|
17
|
+
* Patterns identifying internal/tool/utility paths that should be penalized.
|
|
18
|
+
* These are typically support code, not primary business logic.
|
|
19
|
+
*
|
|
20
|
+
* Note: We don't penalize top-level `internal/` because Go uses this for
|
|
21
|
+
* production code (prevents external imports), not utility code.
|
|
22
|
+
*/
|
|
23
|
+
const INTERNAL_PATH_PATTERNS = [
|
|
24
|
+
// Tool/script directories (at any level)
|
|
25
|
+
/^tools\//,
|
|
26
|
+
/^scripts\//,
|
|
27
|
+
/\/tools\//,
|
|
28
|
+
/\/scripts\//,
|
|
29
|
+
// Nested internal directories (not top-level, which Go uses for prod code)
|
|
30
|
+
/\/internal\//,
|
|
31
|
+
// Utility/helper directories (common across languages)
|
|
32
|
+
/\/utils\//,
|
|
33
|
+
/\/util\//,
|
|
34
|
+
/\/helpers\//,
|
|
35
|
+
/\/helper\//,
|
|
36
|
+
/\/common\//,
|
|
37
|
+
/\/shared\//,
|
|
38
|
+
/\/support\//,
|
|
39
|
+
/\/primitives\//, // Solidity primitive types
|
|
40
|
+
/\/vendor\//, // Vendored code
|
|
41
|
+
// Language-specific patterns
|
|
42
|
+
/^crates\/.*\/src\/util/, // Rust internal utils
|
|
43
|
+
/^pkg\/util/, // Go pkg/util pattern
|
|
44
|
+
];
|
|
45
|
+
const INTERNAL_PATH_PENALTY = 0.5;
|
|
46
|
+
/**
|
|
47
|
+
* Check if a file path matches any internal path pattern.
|
|
48
|
+
*/
|
|
49
|
+
function isInternalPath(path) {
|
|
50
|
+
for (const pattern of INTERNAL_PATH_PATTERNS) {
|
|
51
|
+
if (pattern.test(path)) {
|
|
52
|
+
return true;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
return false;
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Patterns that identify likely entrypoint files.
|
|
59
|
+
*/
|
|
60
|
+
const ENTRYPOINT_PATTERNS = [
|
|
61
|
+
/^src\/main\.(ts|js|py|go|rs)$/,
|
|
62
|
+
/^src\/index\.(ts|js)$/,
|
|
63
|
+
/^src\/lib\.(ts|js|rs)$/,
|
|
64
|
+
/^main\.(ts|js|py|go|rs)$/,
|
|
65
|
+
/^index\.(ts|js)$/,
|
|
66
|
+
/^lib\.rs$/,
|
|
67
|
+
/^app\/.+\.(ts|tsx|js|jsx)$/, // Next.js app router
|
|
68
|
+
/^pages\/.+\.(ts|tsx|js|jsx)$/, // Next.js pages router
|
|
69
|
+
/^cmd\/.+\.go$/, // Go cmd pattern
|
|
70
|
+
];
|
|
71
|
+
const ENTRYPOINT_WEIGHT = 2.0;
|
|
72
|
+
/**
|
|
73
|
+
* Detect entrypoint files from a list of file paths.
|
|
74
|
+
*/
|
|
75
|
+
export function detectEntrypoints(nodes) {
|
|
76
|
+
const result = [];
|
|
77
|
+
for (const node of nodes) {
|
|
78
|
+
for (const pattern of ENTRYPOINT_PATTERNS) {
|
|
79
|
+
if (pattern.test(node)) {
|
|
80
|
+
result.push(node);
|
|
81
|
+
break;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
return result;
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Build personalization vector for detected entrypoints.
|
|
89
|
+
* Used when no focus files specified to boost common entrypoint patterns.
|
|
90
|
+
*/
|
|
91
|
+
export function buildEntrypointPersonalization(nodes) {
|
|
92
|
+
const entrypoints = detectEntrypoints(nodes);
|
|
93
|
+
const result = new Map();
|
|
94
|
+
for (const file of entrypoints) {
|
|
95
|
+
result.set(file, ENTRYPOINT_WEIGHT);
|
|
96
|
+
}
|
|
97
|
+
return result;
|
|
98
|
+
}
|
|
99
|
+
// Architecture-aware adjustment coefficients
|
|
100
|
+
const ENTRY_POINT_BOOST = 2.0; // Boost for entry point files
|
|
101
|
+
const DIVERSITY_BONUS_FACTOR = 0.15; // log2(modules) * factor
|
|
102
|
+
const DUPLICATE_PENALTY = 0.3; // Non-canonical duplicates get 70% penalty
|
|
103
|
+
/**
|
|
104
|
+
* Adjust file ranks based on architecture signals.
|
|
105
|
+
* - Hub penalty: penalize files with high in-degree (many importers)
|
|
106
|
+
* - Path penalty: penalize utility/internal paths
|
|
107
|
+
* - Entry point boost: boost files matching entry point patterns
|
|
108
|
+
* - Diversity bonus: boost files that reference many different modules
|
|
109
|
+
* - Duplicate penalty: penalize non-canonical duplicates
|
|
110
|
+
*/
|
|
111
|
+
export function adjustFileRanks(fileRanks, degrees, nodes, duplicates) {
|
|
112
|
+
const entrypoints = new Set(detectEntrypoints(nodes));
|
|
113
|
+
const result = new Map();
|
|
114
|
+
for (const [file, rank] of fileRanks) {
|
|
115
|
+
const deg = degrees.get(file);
|
|
116
|
+
if (!deg) {
|
|
117
|
+
result.set(file, rank);
|
|
118
|
+
continue;
|
|
119
|
+
}
|
|
120
|
+
let adjustedRank = rank;
|
|
121
|
+
// 1. Hub penalty: log-based penalty for high in-degree files
|
|
122
|
+
// Files imported by many others get penalized proportionally
|
|
123
|
+
const { inDegree, uniqueModulesReferenced } = deg;
|
|
124
|
+
if (inDegree > 0) {
|
|
125
|
+
const hubPenalty = 1 / Math.log2(1 + inDegree);
|
|
126
|
+
adjustedRank *= hubPenalty;
|
|
127
|
+
}
|
|
128
|
+
// 2. Path penalty: penalize utility/internal paths
|
|
129
|
+
if (isInternalPath(file)) {
|
|
130
|
+
adjustedRank *= INTERNAL_PATH_PENALTY;
|
|
131
|
+
}
|
|
132
|
+
// 3. Entry point boost
|
|
133
|
+
if (entrypoints.has(file)) {
|
|
134
|
+
adjustedRank *= ENTRY_POINT_BOOST;
|
|
135
|
+
}
|
|
136
|
+
// 4. Module diversity bonus: files that import from many modules
|
|
137
|
+
if (uniqueModulesReferenced > 1) {
|
|
138
|
+
const diversityBonus = 1 + Math.log2(uniqueModulesReferenced) * DIVERSITY_BONUS_FACTOR;
|
|
139
|
+
adjustedRank *= diversityBonus;
|
|
140
|
+
}
|
|
141
|
+
// 5. Duplicate penalty: non-canonical duplicates get reduced rank
|
|
142
|
+
if (duplicates) {
|
|
143
|
+
const group = duplicates.get(file);
|
|
144
|
+
if (group && group.canonical !== file) {
|
|
145
|
+
adjustedRank *= DUPLICATE_PENALTY;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
result.set(file, adjustedRank);
|
|
149
|
+
}
|
|
150
|
+
return result;
|
|
151
|
+
}
|
|
1
152
|
/**
|
|
2
153
|
* Build personalization vector for focus files.
|
|
3
154
|
* Each focus file gets weight 1.0.
|
|
@@ -9,6 +160,53 @@ export function buildPersonalization(focusFiles) {
|
|
|
9
160
|
}
|
|
10
161
|
return result;
|
|
11
162
|
}
|
|
163
|
+
// Weight constants for focus personalization
|
|
164
|
+
const FOCUS_WEIGHT = 10.0; // Focus file itself
|
|
165
|
+
const DEPENDENCY_WEIGHT = 3.0; // Files that focus file imports
|
|
166
|
+
const DEPENDENT_WEIGHT = 1.0; // Files that import focus file
|
|
167
|
+
/**
|
|
168
|
+
* Build personalization vector with boosted first-order neighbors.
|
|
169
|
+
* This strengthens the focus bias by giving higher weights to:
|
|
170
|
+
* - Focus files (highest)
|
|
171
|
+
* - Files that focus file references (dependencies)
|
|
172
|
+
* - Files that reference focus file (dependents)
|
|
173
|
+
*/
|
|
174
|
+
export function buildFocusPersonalization(focusFiles, graph) {
|
|
175
|
+
if (focusFiles.length === 0) {
|
|
176
|
+
return new Map();
|
|
177
|
+
}
|
|
178
|
+
const result = new Map();
|
|
179
|
+
const focusSet = new Set(focusFiles);
|
|
180
|
+
// Give focus files highest weight
|
|
181
|
+
for (const file of focusFiles) {
|
|
182
|
+
result.set(file, FOCUS_WEIGHT);
|
|
183
|
+
}
|
|
184
|
+
// Find files that focus files reference (outgoing edges = dependencies)
|
|
185
|
+
for (const focusFile of focusFiles) {
|
|
186
|
+
const outgoing = graph.edges.get(focusFile);
|
|
187
|
+
if (outgoing) {
|
|
188
|
+
for (const [target] of outgoing) {
|
|
189
|
+
if (!focusSet.has(target)) {
|
|
190
|
+
const current = result.get(target) ?? 0;
|
|
191
|
+
result.set(target, Math.max(current, DEPENDENCY_WEIGHT));
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
// Find files that reference focus files (incoming edges = dependents)
|
|
197
|
+
for (const [from, toMap] of graph.edges) {
|
|
198
|
+
if (focusSet.has(from))
|
|
199
|
+
continue;
|
|
200
|
+
for (const [to] of toMap) {
|
|
201
|
+
if (focusSet.has(to)) {
|
|
202
|
+
const current = result.get(from) ?? 0;
|
|
203
|
+
result.set(from, Math.max(current, DEPENDENT_WEIGHT));
|
|
204
|
+
break; // Only need to count once per file
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
return result;
|
|
209
|
+
}
|
|
12
210
|
/**
|
|
13
211
|
* Rank definitions by propagating PageRank through symbol edges.
|
|
14
212
|
* Focus file definitions are excluded from output.
|
|
@@ -30,7 +228,11 @@ export function rankDefinitions(graph, fileRanks, focusFiles) {
|
|
|
30
228
|
}
|
|
31
229
|
for (const [symbol, count] of symbolMap) {
|
|
32
230
|
const key = `${to}\0${symbol}`;
|
|
33
|
-
|
|
231
|
+
let contribution = (pr * count) / outWeight;
|
|
232
|
+
// Apply penalty for internal/tool paths
|
|
233
|
+
if (isInternalPath(to)) {
|
|
234
|
+
contribution *= INTERNAL_PATH_PENALTY;
|
|
235
|
+
}
|
|
34
236
|
accumulator.set(key, (accumulator.get(key) ?? 0) + contribution);
|
|
35
237
|
}
|
|
36
238
|
}
|
|
@@ -40,6 +242,10 @@ export function rankDefinitions(graph, fileRanks, focusFiles) {
|
|
|
40
242
|
const sepIdx = key.indexOf("\0");
|
|
41
243
|
const file = key.slice(0, sepIdx);
|
|
42
244
|
const ident = key.slice(sepIdx + 1);
|
|
245
|
+
// Filter out boilerplate names
|
|
246
|
+
if (BOILERPLATE_NAMES.has(ident)) {
|
|
247
|
+
continue;
|
|
248
|
+
}
|
|
43
249
|
result.push({ file, ident, rank });
|
|
44
250
|
}
|
|
45
251
|
result.sort((a, b) => {
|
|
@@ -53,3 +259,38 @@ export function rankDefinitions(graph, fileRanks, focusFiles) {
|
|
|
53
259
|
});
|
|
54
260
|
return result;
|
|
55
261
|
}
|
|
262
|
+
/**
|
|
263
|
+
* Combine structural ranks (import graph) with reference density (ref graph).
|
|
264
|
+
*
|
|
265
|
+
* Structural importance (from binary import graph) is weighted more heavily
|
|
266
|
+
* than raw reference counts to prevent "noisy" files from dominating.
|
|
267
|
+
*
|
|
268
|
+
* Formula: combined = structRank * structWeight + normalizedDensity * (1 - structWeight)
|
|
269
|
+
*/
|
|
270
|
+
export function combineRanks(structuralRanks, refGraph, structuralWeight = 0.7) {
|
|
271
|
+
const result = new Map();
|
|
272
|
+
if (structuralRanks.size === 0) {
|
|
273
|
+
return result;
|
|
274
|
+
}
|
|
275
|
+
// Find max outWeight for normalization
|
|
276
|
+
let maxOutWeight = 0;
|
|
277
|
+
for (const weight of refGraph.outWeights.values()) {
|
|
278
|
+
if (weight > maxOutWeight) {
|
|
279
|
+
maxOutWeight = weight;
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
// If no refs at all, just return structural ranks scaled
|
|
283
|
+
if (maxOutWeight === 0) {
|
|
284
|
+
for (const [file, rank] of structuralRanks) {
|
|
285
|
+
result.set(file, rank * structuralWeight);
|
|
286
|
+
}
|
|
287
|
+
return result;
|
|
288
|
+
}
|
|
289
|
+
for (const [file, structRank] of structuralRanks) {
|
|
290
|
+
const refDensity = refGraph.outWeights.get(file) ?? 0;
|
|
291
|
+
const normalizedDensity = refDensity / maxOutWeight;
|
|
292
|
+
const combined = structRank * structuralWeight + normalizedDensity * (1 - structuralWeight);
|
|
293
|
+
result.set(file, combined);
|
|
294
|
+
}
|
|
295
|
+
return result;
|
|
296
|
+
}
|
package/dist/types.d.ts
CHANGED
|
@@ -8,6 +8,8 @@ export interface Tag {
|
|
|
8
8
|
readonly name: string;
|
|
9
9
|
readonly kind: "def" | "ref";
|
|
10
10
|
readonly signature?: string;
|
|
11
|
+
/** Whether this definition is exported (public API). Only set for "def" kind. */
|
|
12
|
+
readonly isExported?: boolean;
|
|
11
13
|
}
|
|
12
14
|
/**
|
|
13
15
|
* CLI options parsed from command line arguments.
|
|
@@ -19,8 +21,10 @@ export interface CliOptions {
|
|
|
19
21
|
readonly refresh: boolean;
|
|
20
22
|
readonly verbose: boolean;
|
|
21
23
|
readonly ignore: readonly string[];
|
|
24
|
+
readonly include: readonly string[];
|
|
22
25
|
readonly noIgnore: boolean;
|
|
23
26
|
readonly maxFiles: number;
|
|
27
|
+
readonly type: readonly string[];
|
|
24
28
|
}
|
|
25
29
|
/**
|
|
26
30
|
* Result of parsing CLI arguments.
|
|
@@ -38,6 +42,7 @@ export interface FileDiscoveryOptions {
|
|
|
38
42
|
readonly extensions?: ReadonlySet<string>;
|
|
39
43
|
readonly ignoredDirs?: ReadonlySet<string>;
|
|
40
44
|
readonly ignoredPatterns?: readonly string[];
|
|
45
|
+
readonly includePatterns?: readonly string[];
|
|
41
46
|
readonly respectGitignore?: boolean;
|
|
42
47
|
readonly includeHidden?: boolean;
|
|
43
48
|
readonly maxFiles?: number;
|
|
@@ -63,6 +68,13 @@ export interface PageRankOptions {
|
|
|
63
68
|
/** Optional personalization vector to bias scores toward specific files */
|
|
64
69
|
readonly personalization?: ReadonlyMap<string, number>;
|
|
65
70
|
}
|
|
71
|
+
/**
|
|
72
|
+
* Options for building the file reference graph.
|
|
73
|
+
*/
|
|
74
|
+
export interface GraphBuildOptions {
|
|
75
|
+
/** Return weight multiplier for edges FROM this file (default: 1.0) */
|
|
76
|
+
readonly edgeWeightMultiplier?: (fromPath: string) => number;
|
|
77
|
+
}
|
|
66
78
|
/**
|
|
67
79
|
* File reference graph for ranking.
|
|
68
80
|
* Nodes are files (relPath), edges are symbol references.
|
|
@@ -90,6 +102,17 @@ export interface RankedDefinition {
|
|
|
90
102
|
readonly ident: string;
|
|
91
103
|
readonly rank: number;
|
|
92
104
|
}
|
|
105
|
+
/**
|
|
106
|
+
* Degree metrics for a file in the graph.
|
|
107
|
+
*/
|
|
108
|
+
export interface FileDegrees {
|
|
109
|
+
/** Number of unique files that reference this file */
|
|
110
|
+
readonly inDegree: number;
|
|
111
|
+
/** Number of unique files this file references */
|
|
112
|
+
readonly outDegree: number;
|
|
113
|
+
/** Number of unique first-level modules this file references */
|
|
114
|
+
readonly uniqueModulesReferenced: number;
|
|
115
|
+
}
|
|
93
116
|
/**
|
|
94
117
|
* A cached entry for a single file.
|
|
95
118
|
*/
|