@snevins/repo-mapper 1.0.3 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/ranking.d.ts CHANGED
@@ -1,9 +1,36 @@
1
- import type { FileGraph, RankedDefinition } from "./types.js";
1
+ import type { FileGraph, RankedDefinition, FileDegrees } from "./types.js";
2
+ import type { DuplicateGroup } from "./dedup.js";
3
+ /**
4
+ * Detect entrypoint files from a list of file paths.
5
+ */
6
+ export declare function detectEntrypoints(nodes: readonly string[]): string[];
7
+ /**
8
+ * Build personalization vector for detected entrypoints.
9
+ * Used when no focus files specified to boost common entrypoint patterns.
10
+ */
11
+ export declare function buildEntrypointPersonalization(nodes: readonly string[]): Map<string, number>;
12
+ /**
13
+ * Adjust file ranks based on architecture signals.
14
+ * - Hub penalty: penalize files with high in-degree (many importers)
15
+ * - Path penalty: penalize utility/internal paths
16
+ * - Entry point boost: boost files matching entry point patterns
17
+ * - Diversity bonus: boost files that reference many different modules
18
+ * - Duplicate penalty: penalize non-canonical duplicates
19
+ */
20
+ export declare function adjustFileRanks(fileRanks: ReadonlyMap<string, number>, degrees: ReadonlyMap<string, FileDegrees>, nodes: readonly string[], duplicates?: ReadonlyMap<string, DuplicateGroup>): Map<string, number>;
2
21
  /**
3
22
  * Build personalization vector for focus files.
4
23
  * Each focus file gets weight 1.0.
5
24
  */
6
25
  export declare function buildPersonalization(focusFiles: readonly string[]): Map<string, number>;
26
+ /**
27
+ * Build personalization vector with boosted first-order neighbors.
28
+ * This strengthens the focus bias by giving higher weights to:
29
+ * - Focus files (highest)
30
+ * - Files that focus file references (dependencies)
31
+ * - Files that reference focus file (dependents)
32
+ */
33
+ export declare function buildFocusPersonalization(focusFiles: readonly string[], graph: FileGraph): Map<string, number>;
7
34
  /**
8
35
  * Rank definitions by propagating PageRank through symbol edges.
9
36
  * Focus file definitions are excluded from output.
@@ -11,3 +38,12 @@ export declare function buildPersonalization(focusFiles: readonly string[]): Map
11
38
  * Formula: def_rank[definer:ident] += PR(referencer) * edge_weight / out_weight(referencer)
12
39
  */
13
40
  export declare function rankDefinitions(graph: FileGraph, fileRanks: Map<string, number>, focusFiles?: ReadonlySet<string>): RankedDefinition[];
41
+ /**
42
+ * Combine structural ranks (import graph) with reference density (ref graph).
43
+ *
44
+ * Structural importance (from binary import graph) is weighted more heavily
45
+ * than raw reference counts to prevent "noisy" files from dominating.
46
+ *
47
+ * Formula: combined = structRank * structWeight + normalizedDensity * (1 - structWeight)
48
+ */
49
+ export declare function combineRanks(structuralRanks: ReadonlyMap<string, number>, refGraph: FileGraph, structuralWeight?: number): Map<string, number>;
package/dist/ranking.js CHANGED
@@ -1,3 +1,154 @@
1
+ /**
2
+ * Boilerplate method names to filter from ranked definitions.
3
+ * These are common trait implementations and overrides that add noise.
4
+ */
5
+ const BOILERPLATE_NAMES = new Set([
6
+ // Rust trait impls
7
+ "fmt", "default", "new", "from", "into", "clone",
8
+ "eq", "hash", "deref", "drop", "serialize", "deserialize",
9
+ // JS/TS common overrides
10
+ "toString", "valueOf", "toJSON",
11
+ // Generic method names (inflate rankings, not meaningful)
12
+ "log", "init", "get", "set", "update", "handle",
13
+ "on", "parse", "format", "name", "value", "path",
14
+ "start", "stop", "reset", "run", "execute",
15
+ ]);
16
+ /**
17
+ * Patterns identifying internal/tool/utility paths that should be penalized.
18
+ * These are typically support code, not primary business logic.
19
+ *
20
+ * Note: We don't penalize top-level `internal/` because Go uses this for
21
+ * production code (prevents external imports), not utility code.
22
+ */
23
+ const INTERNAL_PATH_PATTERNS = [
24
+ // Tool/script directories (at any level)
25
+ /^tools\//,
26
+ /^scripts\//,
27
+ /\/tools\//,
28
+ /\/scripts\//,
29
+ // Nested internal directories (not top-level, which Go uses for prod code)
30
+ /\/internal\//,
31
+ // Utility/helper directories (common across languages)
32
+ /\/utils\//,
33
+ /\/util\//,
34
+ /\/helpers\//,
35
+ /\/helper\//,
36
+ /\/common\//,
37
+ /\/shared\//,
38
+ /\/support\//,
39
+ /\/primitives\//, // Solidity primitive types
40
+ /\/vendor\//, // Vendored code
41
+ // Language-specific patterns
42
+ /^crates\/.*\/src\/util/, // Rust internal utils
43
+ /^pkg\/util/, // Go pkg/util pattern
44
+ ];
45
+ const INTERNAL_PATH_PENALTY = 0.5;
46
+ /**
47
+ * Check if a file path matches any internal path pattern.
48
+ */
49
+ function isInternalPath(path) {
50
+ for (const pattern of INTERNAL_PATH_PATTERNS) {
51
+ if (pattern.test(path)) {
52
+ return true;
53
+ }
54
+ }
55
+ return false;
56
+ }
57
+ /**
58
+ * Patterns that identify likely entrypoint files.
59
+ */
60
+ const ENTRYPOINT_PATTERNS = [
61
+ /^src\/main\.(ts|js|py|go|rs)$/,
62
+ /^src\/index\.(ts|js)$/,
63
+ /^src\/lib\.(ts|js|rs)$/,
64
+ /^main\.(ts|js|py|go|rs)$/,
65
+ /^index\.(ts|js)$/,
66
+ /^lib\.rs$/,
67
+ /^app\/.+\.(ts|tsx|js|jsx)$/, // Next.js app router
68
+ /^pages\/.+\.(ts|tsx|js|jsx)$/, // Next.js pages router
69
+ /^cmd\/.+\.go$/, // Go cmd pattern
70
+ ];
71
+ const ENTRYPOINT_WEIGHT = 2.0;
72
+ /**
73
+ * Detect entrypoint files from a list of file paths.
74
+ */
75
+ export function detectEntrypoints(nodes) {
76
+ const result = [];
77
+ for (const node of nodes) {
78
+ for (const pattern of ENTRYPOINT_PATTERNS) {
79
+ if (pattern.test(node)) {
80
+ result.push(node);
81
+ break;
82
+ }
83
+ }
84
+ }
85
+ return result;
86
+ }
87
+ /**
88
+ * Build personalization vector for detected entrypoints.
89
+ * Used when no focus files specified to boost common entrypoint patterns.
90
+ */
91
+ export function buildEntrypointPersonalization(nodes) {
92
+ const entrypoints = detectEntrypoints(nodes);
93
+ const result = new Map();
94
+ for (const file of entrypoints) {
95
+ result.set(file, ENTRYPOINT_WEIGHT);
96
+ }
97
+ return result;
98
+ }
99
+ // Architecture-aware adjustment coefficients
100
+ const ENTRY_POINT_BOOST = 2.0; // Boost for entry point files
101
+ const DIVERSITY_BONUS_FACTOR = 0.15; // log2(modules) * factor
102
+ const DUPLICATE_PENALTY = 0.3; // Non-canonical duplicates get 70% penalty
103
+ /**
104
+ * Adjust file ranks based on architecture signals.
105
+ * - Hub penalty: penalize files with high in-degree (many importers)
106
+ * - Path penalty: penalize utility/internal paths
107
+ * - Entry point boost: boost files matching entry point patterns
108
+ * - Diversity bonus: boost files that reference many different modules
109
+ * - Duplicate penalty: penalize non-canonical duplicates
110
+ */
111
+ export function adjustFileRanks(fileRanks, degrees, nodes, duplicates) {
112
+ const entrypoints = new Set(detectEntrypoints(nodes));
113
+ const result = new Map();
114
+ for (const [file, rank] of fileRanks) {
115
+ const deg = degrees.get(file);
116
+ if (!deg) {
117
+ result.set(file, rank);
118
+ continue;
119
+ }
120
+ let adjustedRank = rank;
121
+ // 1. Hub penalty: log-based penalty for high in-degree files
122
+ // Files imported by many others get penalized proportionally
123
+ const { inDegree, uniqueModulesReferenced } = deg;
124
+ if (inDegree > 0) {
125
+ const hubPenalty = 1 / Math.log2(1 + inDegree);
126
+ adjustedRank *= hubPenalty;
127
+ }
128
+ // 2. Path penalty: penalize utility/internal paths
129
+ if (isInternalPath(file)) {
130
+ adjustedRank *= INTERNAL_PATH_PENALTY;
131
+ }
132
+ // 3. Entry point boost
133
+ if (entrypoints.has(file)) {
134
+ adjustedRank *= ENTRY_POINT_BOOST;
135
+ }
136
+ // 4. Module diversity bonus: files that import from many modules
137
+ if (uniqueModulesReferenced > 1) {
138
+ const diversityBonus = 1 + Math.log2(uniqueModulesReferenced) * DIVERSITY_BONUS_FACTOR;
139
+ adjustedRank *= diversityBonus;
140
+ }
141
+ // 5. Duplicate penalty: non-canonical duplicates get reduced rank
142
+ if (duplicates) {
143
+ const group = duplicates.get(file);
144
+ if (group && group.canonical !== file) {
145
+ adjustedRank *= DUPLICATE_PENALTY;
146
+ }
147
+ }
148
+ result.set(file, adjustedRank);
149
+ }
150
+ return result;
151
+ }
1
152
  /**
2
153
  * Build personalization vector for focus files.
3
154
  * Each focus file gets weight 1.0.
@@ -9,6 +160,53 @@ export function buildPersonalization(focusFiles) {
9
160
  }
10
161
  return result;
11
162
  }
163
+ // Weight constants for focus personalization
164
+ const FOCUS_WEIGHT = 10.0; // Focus file itself
165
+ const DEPENDENCY_WEIGHT = 3.0; // Files that focus file imports
166
+ const DEPENDENT_WEIGHT = 1.0; // Files that import focus file
167
+ /**
168
+ * Build personalization vector with boosted first-order neighbors.
169
+ * This strengthens the focus bias by giving higher weights to:
170
+ * - Focus files (highest)
171
+ * - Files that focus file references (dependencies)
172
+ * - Files that reference focus file (dependents)
173
+ */
174
+ export function buildFocusPersonalization(focusFiles, graph) {
175
+ if (focusFiles.length === 0) {
176
+ return new Map();
177
+ }
178
+ const result = new Map();
179
+ const focusSet = new Set(focusFiles);
180
+ // Give focus files highest weight
181
+ for (const file of focusFiles) {
182
+ result.set(file, FOCUS_WEIGHT);
183
+ }
184
+ // Find files that focus files reference (outgoing edges = dependencies)
185
+ for (const focusFile of focusFiles) {
186
+ const outgoing = graph.edges.get(focusFile);
187
+ if (outgoing) {
188
+ for (const [target] of outgoing) {
189
+ if (!focusSet.has(target)) {
190
+ const current = result.get(target) ?? 0;
191
+ result.set(target, Math.max(current, DEPENDENCY_WEIGHT));
192
+ }
193
+ }
194
+ }
195
+ }
196
+ // Find files that reference focus files (incoming edges = dependents)
197
+ for (const [from, toMap] of graph.edges) {
198
+ if (focusSet.has(from))
199
+ continue;
200
+ for (const [to] of toMap) {
201
+ if (focusSet.has(to)) {
202
+ const current = result.get(from) ?? 0;
203
+ result.set(from, Math.max(current, DEPENDENT_WEIGHT));
204
+ break; // Only need to count once per file
205
+ }
206
+ }
207
+ }
208
+ return result;
209
+ }
12
210
  /**
13
211
  * Rank definitions by propagating PageRank through symbol edges.
14
212
  * Focus file definitions are excluded from output.
@@ -30,7 +228,11 @@ export function rankDefinitions(graph, fileRanks, focusFiles) {
30
228
  }
31
229
  for (const [symbol, count] of symbolMap) {
32
230
  const key = `${to}\0${symbol}`;
33
- const contribution = (pr * count) / outWeight;
231
+ let contribution = (pr * count) / outWeight;
232
+ // Apply penalty for internal/tool paths
233
+ if (isInternalPath(to)) {
234
+ contribution *= INTERNAL_PATH_PENALTY;
235
+ }
34
236
  accumulator.set(key, (accumulator.get(key) ?? 0) + contribution);
35
237
  }
36
238
  }
@@ -40,6 +242,10 @@ export function rankDefinitions(graph, fileRanks, focusFiles) {
40
242
  const sepIdx = key.indexOf("\0");
41
243
  const file = key.slice(0, sepIdx);
42
244
  const ident = key.slice(sepIdx + 1);
245
+ // Filter out boilerplate names
246
+ if (BOILERPLATE_NAMES.has(ident)) {
247
+ continue;
248
+ }
43
249
  result.push({ file, ident, rank });
44
250
  }
45
251
  result.sort((a, b) => {
@@ -53,3 +259,38 @@ export function rankDefinitions(graph, fileRanks, focusFiles) {
53
259
  });
54
260
  return result;
55
261
  }
262
+ /**
263
+ * Combine structural ranks (import graph) with reference density (ref graph).
264
+ *
265
+ * Structural importance (from binary import graph) is weighted more heavily
266
+ * than raw reference counts to prevent "noisy" files from dominating.
267
+ *
268
+ * Formula: combined = structRank * structWeight + normalizedDensity * (1 - structWeight)
269
+ */
270
+ export function combineRanks(structuralRanks, refGraph, structuralWeight = 0.7) {
271
+ const result = new Map();
272
+ if (structuralRanks.size === 0) {
273
+ return result;
274
+ }
275
+ // Find max outWeight for normalization
276
+ let maxOutWeight = 0;
277
+ for (const weight of refGraph.outWeights.values()) {
278
+ if (weight > maxOutWeight) {
279
+ maxOutWeight = weight;
280
+ }
281
+ }
282
+ // If no refs at all, just return structural ranks scaled
283
+ if (maxOutWeight === 0) {
284
+ for (const [file, rank] of structuralRanks) {
285
+ result.set(file, rank * structuralWeight);
286
+ }
287
+ return result;
288
+ }
289
+ for (const [file, structRank] of structuralRanks) {
290
+ const refDensity = refGraph.outWeights.get(file) ?? 0;
291
+ const normalizedDensity = refDensity / maxOutWeight;
292
+ const combined = structRank * structuralWeight + normalizedDensity * (1 - structuralWeight);
293
+ result.set(file, combined);
294
+ }
295
+ return result;
296
+ }
package/dist/types.d.ts CHANGED
@@ -8,6 +8,8 @@ export interface Tag {
8
8
  readonly name: string;
9
9
  readonly kind: "def" | "ref";
10
10
  readonly signature?: string;
11
+ /** Whether this definition is exported (public API). Only set for "def" kind. */
12
+ readonly isExported?: boolean;
11
13
  }
12
14
  /**
13
15
  * CLI options parsed from command line arguments.
@@ -19,8 +21,10 @@ export interface CliOptions {
19
21
  readonly refresh: boolean;
20
22
  readonly verbose: boolean;
21
23
  readonly ignore: readonly string[];
24
+ readonly include: readonly string[];
22
25
  readonly noIgnore: boolean;
23
26
  readonly maxFiles: number;
27
+ readonly type: readonly string[];
24
28
  }
25
29
  /**
26
30
  * Result of parsing CLI arguments.
@@ -38,6 +42,7 @@ export interface FileDiscoveryOptions {
38
42
  readonly extensions?: ReadonlySet<string>;
39
43
  readonly ignoredDirs?: ReadonlySet<string>;
40
44
  readonly ignoredPatterns?: readonly string[];
45
+ readonly includePatterns?: readonly string[];
41
46
  readonly respectGitignore?: boolean;
42
47
  readonly includeHidden?: boolean;
43
48
  readonly maxFiles?: number;
@@ -63,6 +68,13 @@ export interface PageRankOptions {
63
68
  /** Optional personalization vector to bias scores toward specific files */
64
69
  readonly personalization?: ReadonlyMap<string, number>;
65
70
  }
71
+ /**
72
+ * Options for building the file reference graph.
73
+ */
74
+ export interface GraphBuildOptions {
75
+ /** Return weight multiplier for edges FROM this file (default: 1.0) */
76
+ readonly edgeWeightMultiplier?: (fromPath: string) => number;
77
+ }
66
78
  /**
67
79
  * File reference graph for ranking.
68
80
  * Nodes are files (relPath), edges are symbol references.
@@ -90,6 +102,17 @@ export interface RankedDefinition {
90
102
  readonly ident: string;
91
103
  readonly rank: number;
92
104
  }
105
+ /**
106
+ * Degree metrics for a file in the graph.
107
+ */
108
+ export interface FileDegrees {
109
+ /** Number of unique files that reference this file */
110
+ readonly inDegree: number;
111
+ /** Number of unique files this file references */
112
+ readonly outDegree: number;
113
+ /** Number of unique first-level modules this file references */
114
+ readonly uniqueModulesReferenced: number;
115
+ }
93
116
  /**
94
117
  * A cached entry for a single file.
95
118
  */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@snevins/repo-mapper",
3
- "version": "1.0.3",
3
+ "version": "1.2.0",
4
4
  "description": "Generate token-budgeted repo maps for LLM context using tree-sitter and PageRank",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",