@snevins/repo-mapper 1.0.3 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -9,6 +9,7 @@ Generate token-budgeted maps of code repositories for LLM context. Uses tree-sit
9
9
 
10
10
  - **Token budgeting**: Fit the most important code into your context window
11
11
  - **Smart ranking**: PageRank algorithm prioritizes frequently-referenced code
12
+ - **Module clustering**: Groups files by directory with global top-50 guarantee
12
13
  - **Focus mode**: Bias output toward files you're working on
13
14
  - **Fast caching**: Only re-parses changed files
14
15
 
@@ -44,12 +45,14 @@ repo-mapper [paths...] [options]
44
45
 
45
46
  | Option | Default | Description |
46
47
  |--------|---------|-------------|
47
- | `-t, --tokens <n>` | 1024 | Maximum tokens for output |
48
+ | `-t, --tokens <n>` | 4000 | Maximum tokens for output |
48
49
  | `-f, --focus <file>` | | Files to prioritize (repeatable) |
49
50
  | `-o, --output <file>` | stdout | Write output to file |
50
51
  | `-r, --refresh` | false | Ignore cache, re-parse all |
51
52
  | `-v, --verbose` | false | Print progress info |
53
+ | `--type <lang>` | | Filter by language: ts, js, python, go, rust, solidity (repeatable) |
52
54
  | `--ignore <pattern>` | | Additional ignore patterns (repeatable) |
55
+ | `--include <pattern>` | | Only include files matching pattern (repeatable) |
53
56
  | `--no-ignore` | false | Disable default ignores |
54
57
  | `--max-files <n>` | 10000 | Maximum files to process |
55
58
 
@@ -62,6 +65,12 @@ repo-mapper . -t 2000
62
65
  # Focus on specific files you're editing
63
66
  repo-mapper . -f src/api.ts -f src/types.ts -t 4000
64
67
 
68
+ # Only TypeScript files
69
+ repo-mapper . --type ts -t 2000
70
+
71
+ # Only Rust files in a mixed repo
72
+ repo-mapper . --type rust -t 2000
73
+
65
74
  # Save to file for LLM prompt
66
75
  repo-mapper . -t 8000 -o context.md
67
76
 
@@ -86,9 +95,9 @@ src/pagerank.ts:
86
95
 
87
96
  ## Caching
88
97
 
89
- Cache is stored in `.repomap.cache.v1/` in the repository root. Files are re-parsed only when modified (mtime-based invalidation).
98
+ Cache is stored in `.repomap.cache.v2/` in the repository root. Files are re-parsed only when modified (mtime-based invalidation).
90
99
 
91
- Clear cache with: `rm -rf .repomap.cache.v1`
100
+ Clear cache with: `rm -rf .repomap.cache.v2`
92
101
 
93
102
  ## How It Works
94
103
 
@@ -98,6 +107,28 @@ Clear cache with: `rm -rf .repomap.cache.v1`
98
107
  4. **Rank** - Run PageRank to score files by importance
99
108
  5. **Budget** - Binary search to fit top definitions within token limit
100
109
 
110
+ ## Claude Code Plugin
111
+
112
+ repo-mapper includes a Claude Code plugin with commands and skills.
113
+
114
+ ### Installation
115
+
116
+ ```bash
117
+ # Add to your Claude Code plugins
118
+ claude plugins add /path/to/repo-mapper-ts
119
+ ```
120
+
121
+ ### Commands
122
+
123
+ | Command | Description |
124
+ |---------|-------------|
125
+ | `/status` | Check if repo-mapper is installed and up to date |
126
+ | `/update-codemap` | Generate or update CODEMAP.md with smart language detection |
127
+
128
+ ### Skills
129
+
130
+ The `using-repo-mapper` skill provides guidance on effective repo-mapper usage patterns.
131
+
101
132
  ## License
102
133
 
103
134
  MIT
package/dist/cli.d.ts CHANGED
@@ -1,6 +1,22 @@
1
1
  import type { ParsedArgs } from "./types.js";
2
+ /**
3
+ * Error thrown when --help or -h is requested.
4
+ * Main should catch this, print usage, and exit(0).
5
+ */
6
+ export declare class HelpRequestedError extends Error {
7
+ constructor();
8
+ }
9
+ /**
10
+ * Error thrown when --version or -V is requested.
11
+ * Main should catch this, print version, and exit(0).
12
+ */
13
+ export declare class VersionRequestedError extends Error {
14
+ constructor();
15
+ }
2
16
  /**
3
17
  * Parse CLI arguments into structured format.
18
+ * Throws HelpRequestedError if --help or -h is present.
19
+ * Throws VersionRequestedError if --version or -V is present.
4
20
  * Throws on invalid input.
5
21
  */
6
22
  export declare function parseCliArgs(argv: readonly string[]): ParsedArgs;
package/dist/cli.js CHANGED
@@ -1,11 +1,41 @@
1
1
  import { parseArgs } from "node:util";
2
- const DEFAULT_TOKENS = 1024;
2
+ const DEFAULT_TOKENS = 4000;
3
3
  const DEFAULT_MAX_FILES = 10000;
4
+ /**
5
+ * Error thrown when --help or -h is requested.
6
+ * Main should catch this, print usage, and exit(0).
7
+ */
8
+ export class HelpRequestedError extends Error {
9
+ constructor() {
10
+ super("Help requested");
11
+ this.name = "HelpRequestedError";
12
+ }
13
+ }
14
+ /**
15
+ * Error thrown when --version or -V is requested.
16
+ * Main should catch this, print version, and exit(0).
17
+ */
18
+ export class VersionRequestedError extends Error {
19
+ constructor() {
20
+ super("Version requested");
21
+ this.name = "VersionRequestedError";
22
+ }
23
+ }
4
24
  /**
5
25
  * Parse CLI arguments into structured format.
26
+ * Throws HelpRequestedError if --help or -h is present.
27
+ * Throws VersionRequestedError if --version or -V is present.
6
28
  * Throws on invalid input.
7
29
  */
8
30
  export function parseCliArgs(argv) {
31
+ // Check for --help or -h BEFORE calling parseArgs (which would throw on unknown flags)
32
+ if (argv.includes("--help") || argv.includes("-h")) {
33
+ throw new HelpRequestedError();
34
+ }
35
+ // Check for --version or -V BEFORE calling parseArgs
36
+ if (argv.includes("--version") || argv.includes("-V")) {
37
+ throw new VersionRequestedError();
38
+ }
9
39
  const { values, positionals } = parseArgs({
10
40
  args: argv,
11
41
  options: {
@@ -15,8 +45,10 @@ export function parseCliArgs(argv) {
15
45
  refresh: { type: "boolean", short: "r" },
16
46
  verbose: { type: "boolean", short: "v" },
17
47
  ignore: { type: "string", multiple: true },
48
+ include: { type: "string", multiple: true },
18
49
  "no-ignore": { type: "boolean" },
19
50
  "max-files": { type: "string" },
51
+ type: { type: "string", multiple: true },
20
52
  },
21
53
  allowPositionals: true,
22
54
  strict: true,
@@ -32,8 +64,10 @@ export function parseCliArgs(argv) {
32
64
  refresh: values.refresh ?? false,
33
65
  verbose: values.verbose ?? false,
34
66
  ignore: values.ignore ?? [],
67
+ include: values.include ?? [],
35
68
  noIgnore: values["no-ignore"] ?? false,
36
69
  maxFiles,
70
+ type: values.type ?? [],
37
71
  },
38
72
  };
39
73
  }
@@ -0,0 +1,27 @@
1
+ import type { Tag } from "./types.js";
2
+ /**
3
+ * A group of files that appear to be duplicates based on their definition names.
4
+ */
5
+ export interface DuplicateGroup {
6
+ readonly fingerprint: string;
7
+ readonly files: readonly string[];
8
+ /** First alphabetically - the "canonical" version to keep */
9
+ readonly canonical: string;
10
+ }
11
+ /**
12
+ * Compute a fingerprint for a file based on its definition names.
13
+ * Files with the same fingerprint likely contain copy-pasted code.
14
+ */
15
+ export declare function computeFingerprint(tags: readonly Tag[]): string;
16
+ /**
17
+ * Detect duplicate files based on their definition fingerprints.
18
+ * Returns a map from file path to its duplicate group info.
19
+ */
20
+ export declare function detectDuplicates(tagsByFile: ReadonlyMap<string, readonly Tag[]>): Map<string, DuplicateGroup>;
21
+ /**
22
+ * Get the ranking penalty for a file based on duplicate status.
23
+ * - Canonical file: 1.0 (no penalty)
24
+ * - Non-canonical duplicate: 0.3 (70% penalty)
25
+ * - Non-duplicate: 1.0 (no penalty)
26
+ */
27
+ export declare function getDuplicatePenalty(file: string, duplicates: ReadonlyMap<string, DuplicateGroup>): number;
package/dist/dedup.js ADDED
@@ -0,0 +1,67 @@
1
+ import { createHash } from "node:crypto";
2
+ /**
3
+ * Compute a fingerprint for a file based on its definition names.
4
+ * Files with the same fingerprint likely contain copy-pasted code.
5
+ */
6
+ export function computeFingerprint(tags) {
7
+ const names = tags
8
+ .filter((t) => t.kind === "def")
9
+ .map((t) => t.name)
10
+ .sort();
11
+ return createHash("sha256")
12
+ .update(names.join("\0"))
13
+ .digest("hex")
14
+ .slice(0, 16);
15
+ }
16
+ /**
17
+ * Detect duplicate files based on their definition fingerprints.
18
+ * Returns a map from file path to its duplicate group info.
19
+ */
20
+ export function detectDuplicates(tagsByFile) {
21
+ // Group files by fingerprint
22
+ const byFingerprint = new Map();
23
+ for (const [file, tags] of tagsByFile) {
24
+ // Skip tiny files - not enough signal to detect duplicates
25
+ const defs = tags.filter((t) => t.kind === "def");
26
+ if (defs.length < 3)
27
+ continue;
28
+ const fp = computeFingerprint(tags);
29
+ const group = byFingerprint.get(fp) ?? [];
30
+ group.push(file);
31
+ byFingerprint.set(fp, group);
32
+ }
33
+ // Build result map - only include actual duplicates (2+ files)
34
+ const result = new Map();
35
+ for (const [fp, files] of byFingerprint) {
36
+ if (files.length < 2)
37
+ continue;
38
+ // Sort for determinism, first becomes canonical
39
+ const sorted = [...files].sort();
40
+ const firstFile = sorted[0];
41
+ if (!firstFile)
42
+ continue; // Defensive - can't happen due to length check above
43
+ const canonical = firstFile;
44
+ const group = {
45
+ fingerprint: fp,
46
+ files: sorted,
47
+ canonical,
48
+ };
49
+ // Map each file to the group
50
+ for (const file of sorted) {
51
+ result.set(file, group);
52
+ }
53
+ }
54
+ return result;
55
+ }
56
+ /**
57
+ * Get the ranking penalty for a file based on duplicate status.
58
+ * - Canonical file: 1.0 (no penalty)
59
+ * - Non-canonical duplicate: 0.3 (70% penalty)
60
+ * - Non-duplicate: 1.0 (no penalty)
61
+ */
62
+ export function getDuplicatePenalty(file, duplicates) {
63
+ const group = duplicates.get(file);
64
+ if (!group)
65
+ return 1.0;
66
+ return group.canonical === file ? 1.0 : 0.3;
67
+ }
package/dist/files.d.ts CHANGED
@@ -1,4 +1,9 @@
1
1
  import type { FileDiscoveryOptions, FileDiscoveryResult } from "./types.js";
2
+ /**
3
+ * Check if a path indicates a test or script file.
4
+ * Used for soft-ignore (downweight edges from these files).
5
+ */
6
+ export declare function isTestOrScriptPath(relPath: string): boolean;
2
7
  /**
3
8
  * Directories always skipped during discovery.
4
9
  */
package/dist/files.js CHANGED
@@ -2,6 +2,39 @@ import { readdir, readFile } from "node:fs/promises";
2
2
  import { join, extname, resolve, relative } from "node:path";
3
3
  import ignore from "ignore";
4
4
  import { LANGUAGE_REGISTRY } from "./languages.js";
5
+ /**
6
+ * Patterns that indicate test/script files (for soft-ignore weighting).
7
+ * Files matching these patterns get downweighted in the graph, not excluded.
8
+ */
9
+ const TEST_SCRIPT_PATTERNS = [
10
+ // Test directories (at start or after /)
11
+ /(^|[/\\])test[/\\]/,
12
+ /(^|[/\\])tests[/\\]/,
13
+ /(^|[/\\])testing[/\\]/,
14
+ /(^|[/\\])__tests__[/\\]/,
15
+ /(^|[/\\])spec[/\\]/,
16
+ // Test utility files (anywhere in path)
17
+ /testutil/,
18
+ /test_helper/,
19
+ /test_utils/,
20
+ // Test file naming patterns
21
+ /_test\.(ts|js|py|go|rs)$/,
22
+ /\.test\.(ts|js)$/,
23
+ /\.spec\.(ts|js)$/,
24
+ /(^|[/\\])test_[^/\\]*\.py$/, // test_*.py
25
+ // Script/tool directories (at start or after /)
26
+ /(^|[/\\])scripts?[/\\]/,
27
+ /(^|[/\\])tools?[/\\]/,
28
+ /(^|[/\\])examples?[/\\]/,
29
+ /(^|[/\\])benchmarks?[/\\]/,
30
+ ];
31
+ /**
32
+ * Check if a path indicates a test or script file.
33
+ * Used for soft-ignore (downweight edges from these files).
34
+ */
35
+ export function isTestOrScriptPath(relPath) {
36
+ return TEST_SCRIPT_PATTERNS.some((p) => p.test(relPath));
37
+ }
5
38
  /**
6
39
  * Directories always skipped during discovery.
7
40
  */
@@ -29,6 +62,15 @@ export const DEFAULT_IGNORED_DIRS = new Set([
29
62
  ".cache",
30
63
  "vendor",
31
64
  "third_party",
65
+ "compiled",
66
+ "examples",
67
+ "test",
68
+ "tests",
69
+ "testing",
70
+ "__tests__",
71
+ "__testfixtures__",
72
+ "benches",
73
+ "benchmarks",
32
74
  ]);
33
75
  /**
34
76
  * Default patterns for files to ignore (generated, tests, mocks).
@@ -58,14 +100,16 @@ export const DEFAULT_IGNORED_PATTERNS = [
58
100
  "**/*.spec.tsx",
59
101
  "**/*.spec.js",
60
102
  "**/*.spec.jsx",
61
- "**/__tests__/**",
62
103
  // Mocks/fixtures
63
104
  "**/__mocks__/**",
64
- "**/mocks/**",
65
- "**/mock_*/**",
105
+ "**/mock*/**",
66
106
  "**/*_mock.go",
67
107
  "**/fixtures/**",
68
108
  "**/testdata/**",
109
+ // Test helpers
110
+ "**/tests/**/extension/**",
111
+ "**/test_helpers/**",
112
+ "**/spec/support/**",
69
113
  // Bundled/minified files (Phase 16)
70
114
  "**/*.min.js",
71
115
  "**/*.min.mjs",
@@ -79,6 +123,50 @@ export const DEFAULT_IGNORED_PATTERNS = [
79
123
  "**/vendor*.js",
80
124
  "**/runtime*.js",
81
125
  "**/edge-runtime/**",
126
+ // Test utilities
127
+ "**/*.util.ts",
128
+ "**/*.util.js",
129
+ // Test utilities (language-specific)
130
+ "**/testutil.rs",
131
+ "**/testutil.go",
132
+ "**/testutil.ts",
133
+ "**/testutil.js",
134
+ "**/testutils.rs",
135
+ "**/testutils.go",
136
+ "**/testutils.ts",
137
+ "**/testutils.js",
138
+ "**/test_util.py",
139
+ "**/test_utils.py",
140
+ "**/*_fake.go",
141
+ "**/*_fake.rs",
142
+ // Storybook files
143
+ "**/*.stories.ts",
144
+ "**/*.stories.tsx",
145
+ "**/*.stories.js",
146
+ "**/*.stories.jsx",
147
+ "**/*.stories.mdx",
148
+ "**/*.story.ts",
149
+ "**/*.story.tsx",
150
+ "**/*.story.js",
151
+ "**/*.story.jsx",
152
+ // Formal verification / test harnesses
153
+ "**/fv/**",
154
+ "**/harnesses/**",
155
+ // Type declaration files (implementation-less)
156
+ "**/*.d.ts",
157
+ "**/*.d.mts",
158
+ "**/*.d.cts",
159
+ // Template/scaffold directories
160
+ "**/templates/**",
161
+ "**/template/**",
162
+ "**/create-*-app/**",
163
+ "**/starter/**",
164
+ "**/boilerplate/**",
165
+ // Additional generated/internal
166
+ "**/bundles/**",
167
+ "**/*.snap",
168
+ "**/*.map",
169
+ "**/*compiled*",
82
170
  ];
83
171
  /**
84
172
  * File extensions supported for parsing.
@@ -96,6 +184,7 @@ export async function discoverFiles(options) {
96
184
  const extensions = options.extensions ?? SUPPORTED_EXTENSIONS;
97
185
  const ignoredDirs = options.ignoredDirs ?? DEFAULT_IGNORED_DIRS;
98
186
  const ignoredPatterns = options.ignoredPatterns;
187
+ const includePatterns = options.includePatterns;
99
188
  const respectGitignore = options.respectGitignore ?? true;
100
189
  const includeHidden = options.includeHidden ?? false;
101
190
  const maxFiles = options.maxFiles;
@@ -112,11 +201,27 @@ export async function discoverFiles(options) {
112
201
  throw new Error(`Invalid ignore pattern: ${msg}`);
113
202
  }
114
203
  }
204
+ // Compile include pattern matcher (if patterns provided)
205
+ let includeMatcher;
206
+ if (includePatterns && includePatterns.length > 0) {
207
+ try {
208
+ includeMatcher = ignore().add([...includePatterns]);
209
+ }
210
+ catch (err) {
211
+ const msg = err instanceof Error ? err.message : String(err);
212
+ throw new Error(`Invalid include pattern: ${msg}`);
213
+ }
214
+ }
115
215
  function isIgnoredByPatterns(relPath) {
116
216
  if (!patternMatcher)
117
217
  return false;
118
218
  return patternMatcher.ignores(relPath);
119
219
  }
220
+ function isIncludedByPatterns(relPath) {
221
+ if (!includeMatcher)
222
+ return true; // no include patterns = include all
223
+ return includeMatcher.ignores(relPath); // "ignores" means "matches"
224
+ }
120
225
  async function loadGitignore(dir) {
121
226
  if (!respectGitignore)
122
227
  return undefined;
@@ -183,6 +288,8 @@ export async function discoverFiles(options) {
183
288
  if (isIgnoredByGitignore(absPath, false))
184
289
  continue;
185
290
  const relPath = relative(rootDir, absPath);
291
+ if (!isIncludedByPatterns(relPath))
292
+ continue;
186
293
  if (isIgnoredByPatterns(relPath))
187
294
  continue;
188
295
  files.push(absPath);
package/dist/graph.d.ts CHANGED
@@ -1,6 +1,22 @@
1
- import type { Tag, FileGraph } from "./types.js";
1
+ import type { Tag, FileGraph, GraphBuildOptions, FileDegrees } from "./types.js";
2
+ /**
3
+ * Compute in-degree, out-degree, and module diversity for each file.
4
+ * - outDegree: number of unique files this file references
5
+ * - inDegree: number of unique files that reference this file
6
+ * - uniqueModulesReferenced: number of unique first-level directories in out-edges
7
+ */
8
+ export declare function computeFileDegrees(graph: FileGraph): Map<string, FileDegrees>;
2
9
  /**
3
10
  * Build file reference graph from parsed tags.
4
11
  * Nodes are files, edges are symbol references from one file to another.
5
12
  */
6
- export declare function buildFileGraph(tags: readonly Tag[]): FileGraph;
13
+ export declare function buildFileGraph(tags: readonly Tag[], options?: GraphBuildOptions): FileGraph;
14
+ /**
15
+ * Build import graph with binary edges (1 per file→file relationship).
16
+ * Captures module structure without reference frequency noise.
17
+ *
18
+ * Unlike buildFileGraph which uses ref counts, this assigns weight 1
19
+ * to each unique file-to-file import relationship regardless of how
20
+ * many times symbols are referenced.
21
+ */
22
+ export declare function buildImportGraph(tags: readonly Tag[], options?: GraphBuildOptions): FileGraph;
package/dist/graph.js CHANGED
@@ -1,8 +1,55 @@
1
+ /**
2
+ * Get first path segment (module) from a file path.
3
+ * e.g., "utils/helper.ts" -> "utils", "main.ts" -> ""
4
+ */
5
+ function getModule(path) {
6
+ const slash = path.indexOf("/");
7
+ return slash === -1 ? "" : path.slice(0, slash);
8
+ }
9
+ /**
10
+ * Compute in-degree, out-degree, and module diversity for each file.
11
+ * - outDegree: number of unique files this file references
12
+ * - inDegree: number of unique files that reference this file
13
+ * - uniqueModulesReferenced: number of unique first-level directories in out-edges
14
+ */
15
+ export function computeFileDegrees(graph) {
16
+ const result = new Map();
17
+ // Initialize all nodes with zero degrees
18
+ for (const node of graph.nodes) {
19
+ result.set(node, { inDegree: 0, outDegree: 0, uniqueModulesReferenced: 0 });
20
+ }
21
+ // Compute out-degree and in-degree from edges
22
+ for (const [from, toMap] of graph.edges) {
23
+ const outDegree = toMap.size;
24
+ const modules = new Set();
25
+ for (const to of toMap.keys()) {
26
+ modules.add(getModule(to));
27
+ // Increment in-degree for target
28
+ const targetDegrees = result.get(to);
29
+ if (targetDegrees) {
30
+ result.set(to, {
31
+ ...targetDegrees,
32
+ inDegree: targetDegrees.inDegree + 1,
33
+ });
34
+ }
35
+ }
36
+ const current = result.get(from);
37
+ if (current) {
38
+ result.set(from, {
39
+ ...current,
40
+ outDegree,
41
+ uniqueModulesReferenced: modules.size,
42
+ });
43
+ }
44
+ }
45
+ return result;
46
+ }
1
47
  /**
2
48
  * Build file reference graph from parsed tags.
3
49
  * Nodes are files, edges are symbol references from one file to another.
4
50
  */
5
- export function buildFileGraph(tags) {
51
+ export function buildFileGraph(tags, options) {
52
+ const getMultiplier = options?.edgeWeightMultiplier ?? (() => 1.0);
6
53
  // First pass: collect nodes and index defs
7
54
  const nodeSet = new Set();
8
55
  const defsByName = new Map();
@@ -20,8 +67,10 @@ export function buildFileGraph(tags) {
20
67
  }
21
68
  // Sort nodes for determinism
22
69
  const nodes = [...nodeSet].sort();
23
- // Second pass: build edges (need defsByName complete first)
24
- // Track both file-level and symbol-level edges
70
+ // Weight multiplier for non-exported symbols (internal helpers rank lower)
71
+ const NON_EXPORTED_WEIGHT = 0.5;
72
+ // Second pass: build edges with semantic weighting
73
+ // Exported symbols get full weight, non-exported get reduced weight
25
74
  const edgesBuilder = new Map();
26
75
  const symbolEdgesBuilder = new Map();
27
76
  for (const tag of tags) {
@@ -37,14 +86,17 @@ export function buildFileGraph(tags) {
37
86
  const from = tag.relPath;
38
87
  const to = def.relPath;
39
88
  const symbol = tag.name;
40
- // File-level edge
89
+ // Semantic weight: exported symbols get full weight, non-exported get reduced
90
+ // Treat undefined isExported as exported (backwards compatibility)
91
+ const semanticWeight = def.isExported === false ? NON_EXPORTED_WEIGHT : 1.0;
92
+ // File-level edge (with semantic weight)
41
93
  let fromEdges = edgesBuilder.get(from);
42
94
  if (!fromEdges) {
43
95
  fromEdges = new Map();
44
96
  edgesBuilder.set(from, fromEdges);
45
97
  }
46
- fromEdges.set(to, (fromEdges.get(to) ?? 0) + 1);
47
- // Symbol-level edge
98
+ fromEdges.set(to, (fromEdges.get(to) ?? 0) + semanticWeight);
99
+ // Symbol-level edge (with semantic weight)
48
100
  let fromSymbolEdges = symbolEdgesBuilder.get(from);
49
101
  if (!fromSymbolEdges) {
50
102
  fromSymbolEdges = new Map();
@@ -55,10 +107,133 @@ export function buildFileGraph(tags) {
55
107
  toSymbolEdges = new Map();
56
108
  fromSymbolEdges.set(to, toSymbolEdges);
57
109
  }
58
- toSymbolEdges.set(symbol, (toSymbolEdges.get(symbol) ?? 0) + 1);
110
+ toSymbolEdges.set(symbol, (toSymbolEdges.get(symbol) ?? 0) + semanticWeight);
111
+ }
112
+ }
113
+ // Compute raw outWeights (before normalization)
114
+ const rawOutWeights = new Map();
115
+ for (const [from, toMap] of edgesBuilder) {
116
+ let total = 0;
117
+ for (const weight of toMap.values()) {
118
+ total += weight;
119
+ }
120
+ rawOutWeights.set(from, total);
121
+ }
122
+ // Normalize edges: divide each edge weight by total outgoing weight
123
+ // This makes all files have equal influence regardless of reference count
124
+ // Then apply multiplier to scale file's total influence (test files get less)
125
+ for (const [from, toMap] of edgesBuilder) {
126
+ const totalRefs = rawOutWeights.get(from) ?? 1;
127
+ const multiplier = getMultiplier(from);
128
+ if (totalRefs > 0) {
129
+ for (const [to, weight] of toMap) {
130
+ toMap.set(to, (weight / totalRefs) * multiplier);
131
+ }
132
+ }
133
+ }
134
+ // Normalize symbolEdges similarly
135
+ for (const [from, toMap] of symbolEdgesBuilder) {
136
+ const totalRefs = rawOutWeights.get(from) ?? 1;
137
+ const multiplier = getMultiplier(from);
138
+ if (totalRefs > 0) {
139
+ for (const [, symbolMap] of toMap) {
140
+ for (const [symbol, weight] of symbolMap) {
141
+ symbolMap.set(symbol, (weight / totalRefs) * multiplier);
142
+ }
143
+ }
144
+ }
145
+ }
146
+ // Compute final outWeights (normalized * multiplier)
147
+ const outWeights = new Map();
148
+ for (const [from, toMap] of edgesBuilder) {
149
+ let total = 0;
150
+ for (const weight of toMap.values()) {
151
+ total += weight;
152
+ }
153
+ outWeights.set(from, total);
154
+ }
155
+ return {
156
+ nodes,
157
+ edges: edgesBuilder,
158
+ symbolEdges: symbolEdgesBuilder,
159
+ outWeights,
160
+ defsByName,
161
+ };
162
+ }
163
+ /**
164
+ * Build import graph with binary edges (1 per file→file relationship).
165
+ * Captures module structure without reference frequency noise.
166
+ *
167
+ * Unlike buildFileGraph which uses ref counts, this assigns weight 1
168
+ * to each unique file-to-file import relationship regardless of how
169
+ * many times symbols are referenced.
170
+ */
171
+ export function buildImportGraph(tags, options) {
172
+ const getMultiplier = options?.edgeWeightMultiplier ?? (() => 1.0);
173
+ // First pass: collect nodes and index defs
174
+ const nodeSet = new Set();
175
+ const defsByName = new Map();
176
+ for (const tag of tags) {
177
+ nodeSet.add(tag.relPath);
178
+ if (tag.kind === "def") {
179
+ const existing = defsByName.get(tag.name);
180
+ if (existing) {
181
+ existing.push(tag);
182
+ }
183
+ else {
184
+ defsByName.set(tag.name, [tag]);
185
+ }
186
+ }
187
+ }
188
+ const nodes = [...nodeSet].sort();
189
+ // Second pass: build binary edges (1 per unique file→file connection)
190
+ // Track unique file→file relationships, not ref counts
191
+ const edgesBuilder = new Map();
192
+ const symbolEdgesBuilder = new Map();
193
+ const seenEdges = new Map(); // from → Set<to>
194
+ for (const tag of tags) {
195
+ if (tag.kind !== "ref")
196
+ continue;
197
+ const defs = defsByName.get(tag.name);
198
+ if (!defs)
199
+ continue;
200
+ for (const def of defs) {
201
+ if (def.relPath === tag.relPath)
202
+ continue;
203
+ const from = tag.relPath;
204
+ const to = def.relPath;
205
+ // Check if we already have this edge (binary: only count once)
206
+ let fromSeen = seenEdges.get(from);
207
+ if (!fromSeen) {
208
+ fromSeen = new Set();
209
+ seenEdges.set(from, fromSeen);
210
+ }
211
+ if (!fromSeen.has(to)) {
212
+ fromSeen.add(to);
213
+ // Create binary edge: weight = 1 * multiplier
214
+ const multiplier = getMultiplier(from);
215
+ let fromEdges = edgesBuilder.get(from);
216
+ if (!fromEdges) {
217
+ fromEdges = new Map();
218
+ edgesBuilder.set(from, fromEdges);
219
+ }
220
+ fromEdges.set(to, multiplier);
221
+ // Symbol edges: just track which symbol connected them (weight 1)
222
+ let fromSymbolEdges = symbolEdgesBuilder.get(from);
223
+ if (!fromSymbolEdges) {
224
+ fromSymbolEdges = new Map();
225
+ symbolEdgesBuilder.set(from, fromSymbolEdges);
226
+ }
227
+ let toSymbolEdges = fromSymbolEdges.get(to);
228
+ if (!toSymbolEdges) {
229
+ toSymbolEdges = new Map();
230
+ fromSymbolEdges.set(to, toSymbolEdges);
231
+ }
232
+ toSymbolEdges.set(tag.name, multiplier);
233
+ }
59
234
  }
60
235
  }
61
- // Compute outWeights
236
+ // Compute outWeights (sum of binary edges * multiplier)
62
237
  const outWeights = new Map();
63
238
  for (const [from, toMap] of edgesBuilder) {
64
239
  let total = 0;