@snevins/repo-mapper 1.0.3 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -3
- package/dist/cli.d.ts +16 -0
- package/dist/cli.js +35 -1
- package/dist/dedup.d.ts +27 -0
- package/dist/dedup.js +67 -0
- package/dist/files.d.ts +5 -0
- package/dist/files.js +110 -3
- package/dist/graph.d.ts +18 -2
- package/dist/graph.js +183 -8
- package/dist/index.js +156 -34
- package/dist/languages.d.ts +5 -0
- package/dist/languages.js +21 -0
- package/dist/output.d.ts +10 -4
- package/dist/output.js +159 -27
- package/dist/pagerank.js +3 -4
- package/dist/parser.js +58 -1
- package/dist/ranking.d.ts +37 -1
- package/dist/ranking.js +242 -1
- package/dist/types.d.ts +23 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -9,6 +9,7 @@ Generate token-budgeted maps of code repositories for LLM context. Uses tree-sit
|
|
|
9
9
|
|
|
10
10
|
- **Token budgeting**: Fit the most important code into your context window
|
|
11
11
|
- **Smart ranking**: PageRank algorithm prioritizes frequently-referenced code
|
|
12
|
+
- **Module clustering**: Groups files by directory with global top-50 guarantee
|
|
12
13
|
- **Focus mode**: Bias output toward files you're working on
|
|
13
14
|
- **Fast caching**: Only re-parses changed files
|
|
14
15
|
|
|
@@ -44,12 +45,14 @@ repo-mapper [paths...] [options]
|
|
|
44
45
|
|
|
45
46
|
| Option | Default | Description |
|
|
46
47
|
|--------|---------|-------------|
|
|
47
|
-
| `-t, --tokens <n>` |
|
|
48
|
+
| `-t, --tokens <n>` | 4000 | Maximum tokens for output |
|
|
48
49
|
| `-f, --focus <file>` | | Files to prioritize (repeatable) |
|
|
49
50
|
| `-o, --output <file>` | stdout | Write output to file |
|
|
50
51
|
| `-r, --refresh` | false | Ignore cache, re-parse all |
|
|
51
52
|
| `-v, --verbose` | false | Print progress info |
|
|
53
|
+
| `--type <lang>` | | Filter by language: ts, js, python, go, rust, solidity (repeatable) |
|
|
52
54
|
| `--ignore <pattern>` | | Additional ignore patterns (repeatable) |
|
|
55
|
+
| `--include <pattern>` | | Only include files matching pattern (repeatable) |
|
|
53
56
|
| `--no-ignore` | false | Disable default ignores |
|
|
54
57
|
| `--max-files <n>` | 10000 | Maximum files to process |
|
|
55
58
|
|
|
@@ -62,6 +65,12 @@ repo-mapper . -t 2000
|
|
|
62
65
|
# Focus on specific files you're editing
|
|
63
66
|
repo-mapper . -f src/api.ts -f src/types.ts -t 4000
|
|
64
67
|
|
|
68
|
+
# Only TypeScript files
|
|
69
|
+
repo-mapper . --type ts -t 2000
|
|
70
|
+
|
|
71
|
+
# Only Rust files in a mixed repo
|
|
72
|
+
repo-mapper . --type rust -t 2000
|
|
73
|
+
|
|
65
74
|
# Save to file for LLM prompt
|
|
66
75
|
repo-mapper . -t 8000 -o context.md
|
|
67
76
|
|
|
@@ -86,9 +95,9 @@ src/pagerank.ts:
|
|
|
86
95
|
|
|
87
96
|
## Caching
|
|
88
97
|
|
|
89
|
-
Cache is stored in `.repomap.cache.
|
|
98
|
+
Cache is stored in `.repomap.cache.v2/` in the repository root. Files are re-parsed only when modified (mtime-based invalidation).
|
|
90
99
|
|
|
91
|
-
Clear cache with: `rm -rf .repomap.cache.
|
|
100
|
+
Clear cache with: `rm -rf .repomap.cache.v2`
|
|
92
101
|
|
|
93
102
|
## How It Works
|
|
94
103
|
|
|
@@ -98,6 +107,28 @@ Clear cache with: `rm -rf .repomap.cache.v1`
|
|
|
98
107
|
4. **Rank** - Run PageRank to score files by importance
|
|
99
108
|
5. **Budget** - Binary search to fit top definitions within token limit
|
|
100
109
|
|
|
110
|
+
## Claude Code Plugin
|
|
111
|
+
|
|
112
|
+
repo-mapper includes a Claude Code plugin with commands and skills.
|
|
113
|
+
|
|
114
|
+
### Installation
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
# Add to your Claude Code plugins
|
|
118
|
+
claude plugins add /path/to/repo-mapper-ts
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### Commands
|
|
122
|
+
|
|
123
|
+
| Command | Description |
|
|
124
|
+
|---------|-------------|
|
|
125
|
+
| `/status` | Check if repo-mapper is installed and up to date |
|
|
126
|
+
| `/update-codemap` | Generate or update CODEMAP.md with smart language detection |
|
|
127
|
+
|
|
128
|
+
### Skills
|
|
129
|
+
|
|
130
|
+
The `using-repo-mapper` skill provides guidance on effective repo-mapper usage patterns.
|
|
131
|
+
|
|
101
132
|
## License
|
|
102
133
|
|
|
103
134
|
MIT
|
package/dist/cli.d.ts
CHANGED
|
@@ -1,6 +1,22 @@
|
|
|
1
1
|
import type { ParsedArgs } from "./types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Error thrown when --help or -h is requested.
|
|
4
|
+
* Main should catch this, print usage, and exit(0).
|
|
5
|
+
*/
|
|
6
|
+
export declare class HelpRequestedError extends Error {
|
|
7
|
+
constructor();
|
|
8
|
+
}
|
|
9
|
+
/**
|
|
10
|
+
* Error thrown when --version or -V is requested.
|
|
11
|
+
* Main should catch this, print version, and exit(0).
|
|
12
|
+
*/
|
|
13
|
+
export declare class VersionRequestedError extends Error {
|
|
14
|
+
constructor();
|
|
15
|
+
}
|
|
2
16
|
/**
|
|
3
17
|
* Parse CLI arguments into structured format.
|
|
18
|
+
* Throws HelpRequestedError if --help or -h is present.
|
|
19
|
+
* Throws VersionRequestedError if --version or -V is present.
|
|
4
20
|
* Throws on invalid input.
|
|
5
21
|
*/
|
|
6
22
|
export declare function parseCliArgs(argv: readonly string[]): ParsedArgs;
|
package/dist/cli.js
CHANGED
|
@@ -1,11 +1,41 @@
|
|
|
1
1
|
import { parseArgs } from "node:util";
|
|
2
|
-
const DEFAULT_TOKENS =
|
|
2
|
+
const DEFAULT_TOKENS = 4000;
|
|
3
3
|
const DEFAULT_MAX_FILES = 10000;
|
|
4
|
+
/**
|
|
5
|
+
* Error thrown when --help or -h is requested.
|
|
6
|
+
* Main should catch this, print usage, and exit(0).
|
|
7
|
+
*/
|
|
8
|
+
export class HelpRequestedError extends Error {
|
|
9
|
+
constructor() {
|
|
10
|
+
super("Help requested");
|
|
11
|
+
this.name = "HelpRequestedError";
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Error thrown when --version or -V is requested.
|
|
16
|
+
* Main should catch this, print version, and exit(0).
|
|
17
|
+
*/
|
|
18
|
+
export class VersionRequestedError extends Error {
|
|
19
|
+
constructor() {
|
|
20
|
+
super("Version requested");
|
|
21
|
+
this.name = "VersionRequestedError";
|
|
22
|
+
}
|
|
23
|
+
}
|
|
4
24
|
/**
|
|
5
25
|
* Parse CLI arguments into structured format.
|
|
26
|
+
* Throws HelpRequestedError if --help or -h is present.
|
|
27
|
+
* Throws VersionRequestedError if --version or -V is present.
|
|
6
28
|
* Throws on invalid input.
|
|
7
29
|
*/
|
|
8
30
|
export function parseCliArgs(argv) {
|
|
31
|
+
// Check for --help or -h BEFORE calling parseArgs (which would throw on unknown flags)
|
|
32
|
+
if (argv.includes("--help") || argv.includes("-h")) {
|
|
33
|
+
throw new HelpRequestedError();
|
|
34
|
+
}
|
|
35
|
+
// Check for --version or -V BEFORE calling parseArgs
|
|
36
|
+
if (argv.includes("--version") || argv.includes("-V")) {
|
|
37
|
+
throw new VersionRequestedError();
|
|
38
|
+
}
|
|
9
39
|
const { values, positionals } = parseArgs({
|
|
10
40
|
args: argv,
|
|
11
41
|
options: {
|
|
@@ -15,8 +45,10 @@ export function parseCliArgs(argv) {
|
|
|
15
45
|
refresh: { type: "boolean", short: "r" },
|
|
16
46
|
verbose: { type: "boolean", short: "v" },
|
|
17
47
|
ignore: { type: "string", multiple: true },
|
|
48
|
+
include: { type: "string", multiple: true },
|
|
18
49
|
"no-ignore": { type: "boolean" },
|
|
19
50
|
"max-files": { type: "string" },
|
|
51
|
+
type: { type: "string", multiple: true },
|
|
20
52
|
},
|
|
21
53
|
allowPositionals: true,
|
|
22
54
|
strict: true,
|
|
@@ -32,8 +64,10 @@ export function parseCliArgs(argv) {
|
|
|
32
64
|
refresh: values.refresh ?? false,
|
|
33
65
|
verbose: values.verbose ?? false,
|
|
34
66
|
ignore: values.ignore ?? [],
|
|
67
|
+
include: values.include ?? [],
|
|
35
68
|
noIgnore: values["no-ignore"] ?? false,
|
|
36
69
|
maxFiles,
|
|
70
|
+
type: values.type ?? [],
|
|
37
71
|
},
|
|
38
72
|
};
|
|
39
73
|
}
|
package/dist/dedup.d.ts
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import type { Tag } from "./types.js";
|
|
2
|
+
/**
|
|
3
|
+
* A group of files that appear to be duplicates based on their definition names.
|
|
4
|
+
*/
|
|
5
|
+
export interface DuplicateGroup {
|
|
6
|
+
readonly fingerprint: string;
|
|
7
|
+
readonly files: readonly string[];
|
|
8
|
+
/** First alphabetically - the "canonical" version to keep */
|
|
9
|
+
readonly canonical: string;
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* Compute a fingerprint for a file based on its definition names.
|
|
13
|
+
* Files with the same fingerprint likely contain copy-pasted code.
|
|
14
|
+
*/
|
|
15
|
+
export declare function computeFingerprint(tags: readonly Tag[]): string;
|
|
16
|
+
/**
|
|
17
|
+
* Detect duplicate files based on their definition fingerprints.
|
|
18
|
+
* Returns a map from file path to its duplicate group info.
|
|
19
|
+
*/
|
|
20
|
+
export declare function detectDuplicates(tagsByFile: ReadonlyMap<string, readonly Tag[]>): Map<string, DuplicateGroup>;
|
|
21
|
+
/**
|
|
22
|
+
* Get the ranking penalty for a file based on duplicate status.
|
|
23
|
+
* - Canonical file: 1.0 (no penalty)
|
|
24
|
+
* - Non-canonical duplicate: 0.3 (70% penalty)
|
|
25
|
+
* - Non-duplicate: 1.0 (no penalty)
|
|
26
|
+
*/
|
|
27
|
+
export declare function getDuplicatePenalty(file: string, duplicates: ReadonlyMap<string, DuplicateGroup>): number;
|
package/dist/dedup.js
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
/**
|
|
3
|
+
* Compute a fingerprint for a file based on its definition names.
|
|
4
|
+
* Files with the same fingerprint likely contain copy-pasted code.
|
|
5
|
+
*/
|
|
6
|
+
export function computeFingerprint(tags) {
|
|
7
|
+
const names = tags
|
|
8
|
+
.filter((t) => t.kind === "def")
|
|
9
|
+
.map((t) => t.name)
|
|
10
|
+
.sort();
|
|
11
|
+
return createHash("sha256")
|
|
12
|
+
.update(names.join("\0"))
|
|
13
|
+
.digest("hex")
|
|
14
|
+
.slice(0, 16);
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Detect duplicate files based on their definition fingerprints.
|
|
18
|
+
* Returns a map from file path to its duplicate group info.
|
|
19
|
+
*/
|
|
20
|
+
export function detectDuplicates(tagsByFile) {
|
|
21
|
+
// Group files by fingerprint
|
|
22
|
+
const byFingerprint = new Map();
|
|
23
|
+
for (const [file, tags] of tagsByFile) {
|
|
24
|
+
// Skip tiny files - not enough signal to detect duplicates
|
|
25
|
+
const defs = tags.filter((t) => t.kind === "def");
|
|
26
|
+
if (defs.length < 3)
|
|
27
|
+
continue;
|
|
28
|
+
const fp = computeFingerprint(tags);
|
|
29
|
+
const group = byFingerprint.get(fp) ?? [];
|
|
30
|
+
group.push(file);
|
|
31
|
+
byFingerprint.set(fp, group);
|
|
32
|
+
}
|
|
33
|
+
// Build result map - only include actual duplicates (2+ files)
|
|
34
|
+
const result = new Map();
|
|
35
|
+
for (const [fp, files] of byFingerprint) {
|
|
36
|
+
if (files.length < 2)
|
|
37
|
+
continue;
|
|
38
|
+
// Sort for determinism, first becomes canonical
|
|
39
|
+
const sorted = [...files].sort();
|
|
40
|
+
const firstFile = sorted[0];
|
|
41
|
+
if (!firstFile)
|
|
42
|
+
continue; // Defensive - can't happen due to length check above
|
|
43
|
+
const canonical = firstFile;
|
|
44
|
+
const group = {
|
|
45
|
+
fingerprint: fp,
|
|
46
|
+
files: sorted,
|
|
47
|
+
canonical,
|
|
48
|
+
};
|
|
49
|
+
// Map each file to the group
|
|
50
|
+
for (const file of sorted) {
|
|
51
|
+
result.set(file, group);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
return result;
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Get the ranking penalty for a file based on duplicate status.
|
|
58
|
+
* - Canonical file: 1.0 (no penalty)
|
|
59
|
+
* - Non-canonical duplicate: 0.3 (70% penalty)
|
|
60
|
+
* - Non-duplicate: 1.0 (no penalty)
|
|
61
|
+
*/
|
|
62
|
+
export function getDuplicatePenalty(file, duplicates) {
|
|
63
|
+
const group = duplicates.get(file);
|
|
64
|
+
if (!group)
|
|
65
|
+
return 1.0;
|
|
66
|
+
return group.canonical === file ? 1.0 : 0.3;
|
|
67
|
+
}
|
package/dist/files.d.ts
CHANGED
|
@@ -1,4 +1,9 @@
|
|
|
1
1
|
import type { FileDiscoveryOptions, FileDiscoveryResult } from "./types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Check if a path indicates a test or script file.
|
|
4
|
+
* Used for soft-ignore (downweight edges from these files).
|
|
5
|
+
*/
|
|
6
|
+
export declare function isTestOrScriptPath(relPath: string): boolean;
|
|
2
7
|
/**
|
|
3
8
|
* Directories always skipped during discovery.
|
|
4
9
|
*/
|
package/dist/files.js
CHANGED
|
@@ -2,6 +2,39 @@ import { readdir, readFile } from "node:fs/promises";
|
|
|
2
2
|
import { join, extname, resolve, relative } from "node:path";
|
|
3
3
|
import ignore from "ignore";
|
|
4
4
|
import { LANGUAGE_REGISTRY } from "./languages.js";
|
|
5
|
+
/**
|
|
6
|
+
* Patterns that indicate test/script files (for soft-ignore weighting).
|
|
7
|
+
* Files matching these patterns get downweighted in the graph, not excluded.
|
|
8
|
+
*/
|
|
9
|
+
const TEST_SCRIPT_PATTERNS = [
|
|
10
|
+
// Test directories (at start or after /)
|
|
11
|
+
/(^|[/\\])test[/\\]/,
|
|
12
|
+
/(^|[/\\])tests[/\\]/,
|
|
13
|
+
/(^|[/\\])testing[/\\]/,
|
|
14
|
+
/(^|[/\\])__tests__[/\\]/,
|
|
15
|
+
/(^|[/\\])spec[/\\]/,
|
|
16
|
+
// Test utility files (anywhere in path)
|
|
17
|
+
/testutil/,
|
|
18
|
+
/test_helper/,
|
|
19
|
+
/test_utils/,
|
|
20
|
+
// Test file naming patterns
|
|
21
|
+
/_test\.(ts|js|py|go|rs)$/,
|
|
22
|
+
/\.test\.(ts|js)$/,
|
|
23
|
+
/\.spec\.(ts|js)$/,
|
|
24
|
+
/(^|[/\\])test_[^/\\]*\.py$/, // test_*.py
|
|
25
|
+
// Script/tool directories (at start or after /)
|
|
26
|
+
/(^|[/\\])scripts?[/\\]/,
|
|
27
|
+
/(^|[/\\])tools?[/\\]/,
|
|
28
|
+
/(^|[/\\])examples?[/\\]/,
|
|
29
|
+
/(^|[/\\])benchmarks?[/\\]/,
|
|
30
|
+
];
|
|
31
|
+
/**
|
|
32
|
+
* Check if a path indicates a test or script file.
|
|
33
|
+
* Used for soft-ignore (downweight edges from these files).
|
|
34
|
+
*/
|
|
35
|
+
export function isTestOrScriptPath(relPath) {
|
|
36
|
+
return TEST_SCRIPT_PATTERNS.some((p) => p.test(relPath));
|
|
37
|
+
}
|
|
5
38
|
/**
|
|
6
39
|
* Directories always skipped during discovery.
|
|
7
40
|
*/
|
|
@@ -29,6 +62,15 @@ export const DEFAULT_IGNORED_DIRS = new Set([
|
|
|
29
62
|
".cache",
|
|
30
63
|
"vendor",
|
|
31
64
|
"third_party",
|
|
65
|
+
"compiled",
|
|
66
|
+
"examples",
|
|
67
|
+
"test",
|
|
68
|
+
"tests",
|
|
69
|
+
"testing",
|
|
70
|
+
"__tests__",
|
|
71
|
+
"__testfixtures__",
|
|
72
|
+
"benches",
|
|
73
|
+
"benchmarks",
|
|
32
74
|
]);
|
|
33
75
|
/**
|
|
34
76
|
* Default patterns for files to ignore (generated, tests, mocks).
|
|
@@ -58,14 +100,16 @@ export const DEFAULT_IGNORED_PATTERNS = [
|
|
|
58
100
|
"**/*.spec.tsx",
|
|
59
101
|
"**/*.spec.js",
|
|
60
102
|
"**/*.spec.jsx",
|
|
61
|
-
"**/__tests__/**",
|
|
62
103
|
// Mocks/fixtures
|
|
63
104
|
"**/__mocks__/**",
|
|
64
|
-
"**/
|
|
65
|
-
"**/mock_*/**",
|
|
105
|
+
"**/mock*/**",
|
|
66
106
|
"**/*_mock.go",
|
|
67
107
|
"**/fixtures/**",
|
|
68
108
|
"**/testdata/**",
|
|
109
|
+
// Test helpers
|
|
110
|
+
"**/tests/**/extension/**",
|
|
111
|
+
"**/test_helpers/**",
|
|
112
|
+
"**/spec/support/**",
|
|
69
113
|
// Bundled/minified files (Phase 16)
|
|
70
114
|
"**/*.min.js",
|
|
71
115
|
"**/*.min.mjs",
|
|
@@ -79,6 +123,50 @@ export const DEFAULT_IGNORED_PATTERNS = [
|
|
|
79
123
|
"**/vendor*.js",
|
|
80
124
|
"**/runtime*.js",
|
|
81
125
|
"**/edge-runtime/**",
|
|
126
|
+
// Test utilities
|
|
127
|
+
"**/*.util.ts",
|
|
128
|
+
"**/*.util.js",
|
|
129
|
+
// Test utilities (language-specific)
|
|
130
|
+
"**/testutil.rs",
|
|
131
|
+
"**/testutil.go",
|
|
132
|
+
"**/testutil.ts",
|
|
133
|
+
"**/testutil.js",
|
|
134
|
+
"**/testutils.rs",
|
|
135
|
+
"**/testutils.go",
|
|
136
|
+
"**/testutils.ts",
|
|
137
|
+
"**/testutils.js",
|
|
138
|
+
"**/test_util.py",
|
|
139
|
+
"**/test_utils.py",
|
|
140
|
+
"**/*_fake.go",
|
|
141
|
+
"**/*_fake.rs",
|
|
142
|
+
// Storybook files
|
|
143
|
+
"**/*.stories.ts",
|
|
144
|
+
"**/*.stories.tsx",
|
|
145
|
+
"**/*.stories.js",
|
|
146
|
+
"**/*.stories.jsx",
|
|
147
|
+
"**/*.stories.mdx",
|
|
148
|
+
"**/*.story.ts",
|
|
149
|
+
"**/*.story.tsx",
|
|
150
|
+
"**/*.story.js",
|
|
151
|
+
"**/*.story.jsx",
|
|
152
|
+
// Formal verification / test harnesses
|
|
153
|
+
"**/fv/**",
|
|
154
|
+
"**/harnesses/**",
|
|
155
|
+
// Type declaration files (implementation-less)
|
|
156
|
+
"**/*.d.ts",
|
|
157
|
+
"**/*.d.mts",
|
|
158
|
+
"**/*.d.cts",
|
|
159
|
+
// Template/scaffold directories
|
|
160
|
+
"**/templates/**",
|
|
161
|
+
"**/template/**",
|
|
162
|
+
"**/create-*-app/**",
|
|
163
|
+
"**/starter/**",
|
|
164
|
+
"**/boilerplate/**",
|
|
165
|
+
// Additional generated/internal
|
|
166
|
+
"**/bundles/**",
|
|
167
|
+
"**/*.snap",
|
|
168
|
+
"**/*.map",
|
|
169
|
+
"**/*compiled*",
|
|
82
170
|
];
|
|
83
171
|
/**
|
|
84
172
|
* File extensions supported for parsing.
|
|
@@ -96,6 +184,7 @@ export async function discoverFiles(options) {
|
|
|
96
184
|
const extensions = options.extensions ?? SUPPORTED_EXTENSIONS;
|
|
97
185
|
const ignoredDirs = options.ignoredDirs ?? DEFAULT_IGNORED_DIRS;
|
|
98
186
|
const ignoredPatterns = options.ignoredPatterns;
|
|
187
|
+
const includePatterns = options.includePatterns;
|
|
99
188
|
const respectGitignore = options.respectGitignore ?? true;
|
|
100
189
|
const includeHidden = options.includeHidden ?? false;
|
|
101
190
|
const maxFiles = options.maxFiles;
|
|
@@ -112,11 +201,27 @@ export async function discoverFiles(options) {
|
|
|
112
201
|
throw new Error(`Invalid ignore pattern: ${msg}`);
|
|
113
202
|
}
|
|
114
203
|
}
|
|
204
|
+
// Compile include pattern matcher (if patterns provided)
|
|
205
|
+
let includeMatcher;
|
|
206
|
+
if (includePatterns && includePatterns.length > 0) {
|
|
207
|
+
try {
|
|
208
|
+
includeMatcher = ignore().add([...includePatterns]);
|
|
209
|
+
}
|
|
210
|
+
catch (err) {
|
|
211
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
212
|
+
throw new Error(`Invalid include pattern: ${msg}`);
|
|
213
|
+
}
|
|
214
|
+
}
|
|
115
215
|
function isIgnoredByPatterns(relPath) {
|
|
116
216
|
if (!patternMatcher)
|
|
117
217
|
return false;
|
|
118
218
|
return patternMatcher.ignores(relPath);
|
|
119
219
|
}
|
|
220
|
+
function isIncludedByPatterns(relPath) {
|
|
221
|
+
if (!includeMatcher)
|
|
222
|
+
return true; // no include patterns = include all
|
|
223
|
+
return includeMatcher.ignores(relPath); // "ignores" means "matches"
|
|
224
|
+
}
|
|
120
225
|
async function loadGitignore(dir) {
|
|
121
226
|
if (!respectGitignore)
|
|
122
227
|
return undefined;
|
|
@@ -183,6 +288,8 @@ export async function discoverFiles(options) {
|
|
|
183
288
|
if (isIgnoredByGitignore(absPath, false))
|
|
184
289
|
continue;
|
|
185
290
|
const relPath = relative(rootDir, absPath);
|
|
291
|
+
if (!isIncludedByPatterns(relPath))
|
|
292
|
+
continue;
|
|
186
293
|
if (isIgnoredByPatterns(relPath))
|
|
187
294
|
continue;
|
|
188
295
|
files.push(absPath);
|
package/dist/graph.d.ts
CHANGED
|
@@ -1,6 +1,22 @@
|
|
|
1
|
-
import type { Tag, FileGraph } from "./types.js";
|
|
1
|
+
import type { Tag, FileGraph, GraphBuildOptions, FileDegrees } from "./types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Compute in-degree, out-degree, and module diversity for each file.
|
|
4
|
+
* - outDegree: number of unique files this file references
|
|
5
|
+
* - inDegree: number of unique files that reference this file
|
|
6
|
+
* - uniqueModulesReferenced: number of unique first-level directories in out-edges
|
|
7
|
+
*/
|
|
8
|
+
export declare function computeFileDegrees(graph: FileGraph): Map<string, FileDegrees>;
|
|
2
9
|
/**
|
|
3
10
|
* Build file reference graph from parsed tags.
|
|
4
11
|
* Nodes are files, edges are symbol references from one file to another.
|
|
5
12
|
*/
|
|
6
|
-
export declare function buildFileGraph(tags: readonly Tag[]): FileGraph;
|
|
13
|
+
export declare function buildFileGraph(tags: readonly Tag[], options?: GraphBuildOptions): FileGraph;
|
|
14
|
+
/**
|
|
15
|
+
* Build import graph with binary edges (1 per file→file relationship).
|
|
16
|
+
* Captures module structure without reference frequency noise.
|
|
17
|
+
*
|
|
18
|
+
* Unlike buildFileGraph which uses ref counts, this assigns weight 1
|
|
19
|
+
* to each unique file-to-file import relationship regardless of how
|
|
20
|
+
* many times symbols are referenced.
|
|
21
|
+
*/
|
|
22
|
+
export declare function buildImportGraph(tags: readonly Tag[], options?: GraphBuildOptions): FileGraph;
|
package/dist/graph.js
CHANGED
|
@@ -1,8 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Get first path segment (module) from a file path.
|
|
3
|
+
* e.g., "utils/helper.ts" -> "utils", "main.ts" -> ""
|
|
4
|
+
*/
|
|
5
|
+
function getModule(path) {
|
|
6
|
+
const slash = path.indexOf("/");
|
|
7
|
+
return slash === -1 ? "" : path.slice(0, slash);
|
|
8
|
+
}
|
|
9
|
+
/**
|
|
10
|
+
* Compute in-degree, out-degree, and module diversity for each file.
|
|
11
|
+
* - outDegree: number of unique files this file references
|
|
12
|
+
* - inDegree: number of unique files that reference this file
|
|
13
|
+
* - uniqueModulesReferenced: number of unique first-level directories in out-edges
|
|
14
|
+
*/
|
|
15
|
+
export function computeFileDegrees(graph) {
|
|
16
|
+
const result = new Map();
|
|
17
|
+
// Initialize all nodes with zero degrees
|
|
18
|
+
for (const node of graph.nodes) {
|
|
19
|
+
result.set(node, { inDegree: 0, outDegree: 0, uniqueModulesReferenced: 0 });
|
|
20
|
+
}
|
|
21
|
+
// Compute out-degree and in-degree from edges
|
|
22
|
+
for (const [from, toMap] of graph.edges) {
|
|
23
|
+
const outDegree = toMap.size;
|
|
24
|
+
const modules = new Set();
|
|
25
|
+
for (const to of toMap.keys()) {
|
|
26
|
+
modules.add(getModule(to));
|
|
27
|
+
// Increment in-degree for target
|
|
28
|
+
const targetDegrees = result.get(to);
|
|
29
|
+
if (targetDegrees) {
|
|
30
|
+
result.set(to, {
|
|
31
|
+
...targetDegrees,
|
|
32
|
+
inDegree: targetDegrees.inDegree + 1,
|
|
33
|
+
});
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
const current = result.get(from);
|
|
37
|
+
if (current) {
|
|
38
|
+
result.set(from, {
|
|
39
|
+
...current,
|
|
40
|
+
outDegree,
|
|
41
|
+
uniqueModulesReferenced: modules.size,
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
return result;
|
|
46
|
+
}
|
|
1
47
|
/**
|
|
2
48
|
* Build file reference graph from parsed tags.
|
|
3
49
|
* Nodes are files, edges are symbol references from one file to another.
|
|
4
50
|
*/
|
|
5
|
-
export function buildFileGraph(tags) {
|
|
51
|
+
export function buildFileGraph(tags, options) {
|
|
52
|
+
const getMultiplier = options?.edgeWeightMultiplier ?? (() => 1.0);
|
|
6
53
|
// First pass: collect nodes and index defs
|
|
7
54
|
const nodeSet = new Set();
|
|
8
55
|
const defsByName = new Map();
|
|
@@ -20,8 +67,10 @@ export function buildFileGraph(tags) {
|
|
|
20
67
|
}
|
|
21
68
|
// Sort nodes for determinism
|
|
22
69
|
const nodes = [...nodeSet].sort();
|
|
23
|
-
//
|
|
24
|
-
|
|
70
|
+
// Weight multiplier for non-exported symbols (internal helpers rank lower)
|
|
71
|
+
const NON_EXPORTED_WEIGHT = 0.5;
|
|
72
|
+
// Second pass: build edges with semantic weighting
|
|
73
|
+
// Exported symbols get full weight, non-exported get reduced weight
|
|
25
74
|
const edgesBuilder = new Map();
|
|
26
75
|
const symbolEdgesBuilder = new Map();
|
|
27
76
|
for (const tag of tags) {
|
|
@@ -37,14 +86,17 @@ export function buildFileGraph(tags) {
|
|
|
37
86
|
const from = tag.relPath;
|
|
38
87
|
const to = def.relPath;
|
|
39
88
|
const symbol = tag.name;
|
|
40
|
-
//
|
|
89
|
+
// Semantic weight: exported symbols get full weight, non-exported get reduced
|
|
90
|
+
// Treat undefined isExported as exported (backwards compatibility)
|
|
91
|
+
const semanticWeight = def.isExported === false ? NON_EXPORTED_WEIGHT : 1.0;
|
|
92
|
+
// File-level edge (with semantic weight)
|
|
41
93
|
let fromEdges = edgesBuilder.get(from);
|
|
42
94
|
if (!fromEdges) {
|
|
43
95
|
fromEdges = new Map();
|
|
44
96
|
edgesBuilder.set(from, fromEdges);
|
|
45
97
|
}
|
|
46
|
-
fromEdges.set(to, (fromEdges.get(to) ?? 0) +
|
|
47
|
-
// Symbol-level edge
|
|
98
|
+
fromEdges.set(to, (fromEdges.get(to) ?? 0) + semanticWeight);
|
|
99
|
+
// Symbol-level edge (with semantic weight)
|
|
48
100
|
let fromSymbolEdges = symbolEdgesBuilder.get(from);
|
|
49
101
|
if (!fromSymbolEdges) {
|
|
50
102
|
fromSymbolEdges = new Map();
|
|
@@ -55,10 +107,133 @@ export function buildFileGraph(tags) {
|
|
|
55
107
|
toSymbolEdges = new Map();
|
|
56
108
|
fromSymbolEdges.set(to, toSymbolEdges);
|
|
57
109
|
}
|
|
58
|
-
toSymbolEdges.set(symbol, (toSymbolEdges.get(symbol) ?? 0) +
|
|
110
|
+
toSymbolEdges.set(symbol, (toSymbolEdges.get(symbol) ?? 0) + semanticWeight);
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
// Compute raw outWeights (before normalization)
|
|
114
|
+
const rawOutWeights = new Map();
|
|
115
|
+
for (const [from, toMap] of edgesBuilder) {
|
|
116
|
+
let total = 0;
|
|
117
|
+
for (const weight of toMap.values()) {
|
|
118
|
+
total += weight;
|
|
119
|
+
}
|
|
120
|
+
rawOutWeights.set(from, total);
|
|
121
|
+
}
|
|
122
|
+
// Normalize edges: divide each edge weight by total outgoing weight
|
|
123
|
+
// This makes all files have equal influence regardless of reference count
|
|
124
|
+
// Then apply multiplier to scale file's total influence (test files get less)
|
|
125
|
+
for (const [from, toMap] of edgesBuilder) {
|
|
126
|
+
const totalRefs = rawOutWeights.get(from) ?? 1;
|
|
127
|
+
const multiplier = getMultiplier(from);
|
|
128
|
+
if (totalRefs > 0) {
|
|
129
|
+
for (const [to, weight] of toMap) {
|
|
130
|
+
toMap.set(to, (weight / totalRefs) * multiplier);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
// Normalize symbolEdges similarly
|
|
135
|
+
for (const [from, toMap] of symbolEdgesBuilder) {
|
|
136
|
+
const totalRefs = rawOutWeights.get(from) ?? 1;
|
|
137
|
+
const multiplier = getMultiplier(from);
|
|
138
|
+
if (totalRefs > 0) {
|
|
139
|
+
for (const [, symbolMap] of toMap) {
|
|
140
|
+
for (const [symbol, weight] of symbolMap) {
|
|
141
|
+
symbolMap.set(symbol, (weight / totalRefs) * multiplier);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
// Compute final outWeights (normalized * multiplier)
|
|
147
|
+
const outWeights = new Map();
|
|
148
|
+
for (const [from, toMap] of edgesBuilder) {
|
|
149
|
+
let total = 0;
|
|
150
|
+
for (const weight of toMap.values()) {
|
|
151
|
+
total += weight;
|
|
152
|
+
}
|
|
153
|
+
outWeights.set(from, total);
|
|
154
|
+
}
|
|
155
|
+
return {
|
|
156
|
+
nodes,
|
|
157
|
+
edges: edgesBuilder,
|
|
158
|
+
symbolEdges: symbolEdgesBuilder,
|
|
159
|
+
outWeights,
|
|
160
|
+
defsByName,
|
|
161
|
+
};
|
|
162
|
+
}
|
|
163
|
+
/**
|
|
164
|
+
* Build import graph with binary edges (1 per file→file relationship).
|
|
165
|
+
* Captures module structure without reference frequency noise.
|
|
166
|
+
*
|
|
167
|
+
* Unlike buildFileGraph which uses ref counts, this assigns weight 1
|
|
168
|
+
* to each unique file-to-file import relationship regardless of how
|
|
169
|
+
* many times symbols are referenced.
|
|
170
|
+
*/
|
|
171
|
+
export function buildImportGraph(tags, options) {
|
|
172
|
+
const getMultiplier = options?.edgeWeightMultiplier ?? (() => 1.0);
|
|
173
|
+
// First pass: collect nodes and index defs
|
|
174
|
+
const nodeSet = new Set();
|
|
175
|
+
const defsByName = new Map();
|
|
176
|
+
for (const tag of tags) {
|
|
177
|
+
nodeSet.add(tag.relPath);
|
|
178
|
+
if (tag.kind === "def") {
|
|
179
|
+
const existing = defsByName.get(tag.name);
|
|
180
|
+
if (existing) {
|
|
181
|
+
existing.push(tag);
|
|
182
|
+
}
|
|
183
|
+
else {
|
|
184
|
+
defsByName.set(tag.name, [tag]);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
const nodes = [...nodeSet].sort();
|
|
189
|
+
// Second pass: build binary edges (1 per unique file→file connection)
|
|
190
|
+
// Track unique file→file relationships, not ref counts
|
|
191
|
+
const edgesBuilder = new Map();
|
|
192
|
+
const symbolEdgesBuilder = new Map();
|
|
193
|
+
const seenEdges = new Map(); // from → Set<to>
|
|
194
|
+
for (const tag of tags) {
|
|
195
|
+
if (tag.kind !== "ref")
|
|
196
|
+
continue;
|
|
197
|
+
const defs = defsByName.get(tag.name);
|
|
198
|
+
if (!defs)
|
|
199
|
+
continue;
|
|
200
|
+
for (const def of defs) {
|
|
201
|
+
if (def.relPath === tag.relPath)
|
|
202
|
+
continue;
|
|
203
|
+
const from = tag.relPath;
|
|
204
|
+
const to = def.relPath;
|
|
205
|
+
// Check if we already have this edge (binary: only count once)
|
|
206
|
+
let fromSeen = seenEdges.get(from);
|
|
207
|
+
if (!fromSeen) {
|
|
208
|
+
fromSeen = new Set();
|
|
209
|
+
seenEdges.set(from, fromSeen);
|
|
210
|
+
}
|
|
211
|
+
if (!fromSeen.has(to)) {
|
|
212
|
+
fromSeen.add(to);
|
|
213
|
+
// Create binary edge: weight = 1 * multiplier
|
|
214
|
+
const multiplier = getMultiplier(from);
|
|
215
|
+
let fromEdges = edgesBuilder.get(from);
|
|
216
|
+
if (!fromEdges) {
|
|
217
|
+
fromEdges = new Map();
|
|
218
|
+
edgesBuilder.set(from, fromEdges);
|
|
219
|
+
}
|
|
220
|
+
fromEdges.set(to, multiplier);
|
|
221
|
+
// Symbol edges: just track which symbol connected them (weight 1)
|
|
222
|
+
let fromSymbolEdges = symbolEdgesBuilder.get(from);
|
|
223
|
+
if (!fromSymbolEdges) {
|
|
224
|
+
fromSymbolEdges = new Map();
|
|
225
|
+
symbolEdgesBuilder.set(from, fromSymbolEdges);
|
|
226
|
+
}
|
|
227
|
+
let toSymbolEdges = fromSymbolEdges.get(to);
|
|
228
|
+
if (!toSymbolEdges) {
|
|
229
|
+
toSymbolEdges = new Map();
|
|
230
|
+
fromSymbolEdges.set(to, toSymbolEdges);
|
|
231
|
+
}
|
|
232
|
+
toSymbolEdges.set(tag.name, multiplier);
|
|
233
|
+
}
|
|
59
234
|
}
|
|
60
235
|
}
|
|
61
|
-
// Compute outWeights
|
|
236
|
+
// Compute outWeights (sum of binary edges * multiplier)
|
|
62
237
|
const outWeights = new Map();
|
|
63
238
|
for (const [from, toMap] of edgesBuilder) {
|
|
64
239
|
let total = 0;
|