codedeep-mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +177 -0
- package/dist/config.js +223 -0
- package/dist/git/analyzer.js +177 -0
- package/dist/git/git-service.js +568 -0
- package/dist/git/head-watcher.js +113 -0
- package/dist/git/runner.js +204 -0
- package/dist/index.js +138 -0
- package/dist/indexer/code-index.js +1801 -0
- package/dist/indexer/complexity.js +633 -0
- package/dist/indexer/extractor.js +354 -0
- package/dist/indexer/languages/cpp.js +934 -0
- package/dist/indexer/languages/csharp.js +854 -0
- package/dist/indexer/languages/dart.js +777 -0
- package/dist/indexer/languages/go.js +665 -0
- package/dist/indexer/languages/java.js +507 -0
- package/dist/indexer/languages/kotlin.js +709 -0
- package/dist/indexer/languages/objc.js +397 -0
- package/dist/indexer/languages/php.js +771 -0
- package/dist/indexer/languages/python.js +455 -0
- package/dist/indexer/languages/ruby.js +697 -0
- package/dist/indexer/languages/rust.js +754 -0
- package/dist/indexer/languages/swift.js +691 -0
- package/dist/indexer/languages/typescript.js +485 -0
- package/dist/indexer/parser.js +175 -0
- package/dist/indexer/pipeline.js +342 -0
- package/dist/indexer/scanner.js +279 -0
- package/dist/indexer/watcher.js +353 -0
- package/dist/logger.js +16 -0
- package/dist/server.js +170 -0
- package/dist/tools/common.js +207 -0
- package/dist/tools/find-references.js +224 -0
- package/dist/tools/find-symbol.js +94 -0
- package/dist/tools/get-context.js +370 -0
- package/dist/tools/impact.js +218 -0
- package/dist/tools/overview.js +482 -0
- package/dist/tools/search-structure.js +303 -0
- package/dist/types.js +61 -0
- package/grammars/tree-sitter-c.wasm +0 -0
- package/grammars/tree-sitter-c_sharp.wasm +0 -0
- package/grammars/tree-sitter-cpp.wasm +0 -0
- package/grammars/tree-sitter-dart.wasm +0 -0
- package/grammars/tree-sitter-go.wasm +0 -0
- package/grammars/tree-sitter-java.wasm +0 -0
- package/grammars/tree-sitter-javascript.wasm +0 -0
- package/grammars/tree-sitter-kotlin.wasm +0 -0
- package/grammars/tree-sitter-objc.wasm +0 -0
- package/grammars/tree-sitter-php.wasm +0 -0
- package/grammars/tree-sitter-python.wasm +0 -0
- package/grammars/tree-sitter-ruby.wasm +0 -0
- package/grammars/tree-sitter-rust.wasm +0 -0
- package/grammars/tree-sitter-swift.wasm +0 -0
- package/grammars/tree-sitter-tsx.wasm +0 -0
- package/grammars/tree-sitter-typescript.wasm +0 -0
- package/package.json +67 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Danh Hung
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
# codedeep-mcp
|
|
2
|
+
|
|
3
|
+
[](https://github.com/planexhq/codedeep-mcp/actions/workflows/ci.yml)
|
|
4
|
+
[](https://www.npmjs.com/package/codedeep-mcp)
|
|
5
|
+
[](https://modelcontextprotocol.io/specification/2025-11-25)
|
|
6
|
+
[](./LICENSE)
|
|
7
|
+
[](https://nodejs.org)
|
|
8
|
+
|
|
9
|
+
An MCP server that gives AI coding agents structural understanding of codebases.
|
|
10
|
+
|
|
11
|
+
**One tool call replaces 5-10 Grep-Read cycles.**
|
|
12
|
+
|
|
13
|
+
codedeep-mcp parses your code with [tree-sitter](https://tree-sitter.github.io/tree-sitter/), builds a symbol index, and exposes 6 tools over the [Model Context Protocol](https://modelcontextprotocol.io/) that answer structural questions directly: find symbols, trace callers, assess blast radius, search by structure.
|
|
14
|
+
|
|
15
|
+
## Why
|
|
16
|
+
|
|
17
|
+
AI coding agents explore codebases with text tools (grep, file reads). This works but is expensive:
|
|
18
|
+
|
|
19
|
+
- "Find all callers of X" requires 5+ grep-read cycles and returns false positives
|
|
20
|
+
- "What breaks if I change this?" requires exhaustive manual search
|
|
21
|
+
- Grep can't tell `user` the variable from `User` the class from `user()` the function
|
|
22
|
+
|
|
23
|
+
codedeep-mcp solves this by parsing code into symbols and relationships, then answering structural questions in a single call.
|
|
24
|
+
|
|
25
|
+
## Tools
|
|
26
|
+
|
|
27
|
+
| Tool | Purpose | Example |
|
|
28
|
+
|------|---------|---------|
|
|
29
|
+
| `overview` | Orient in an unfamiliar codebase | Language breakdown, entry points, structure |
|
|
30
|
+
| `find_symbol` | AST-aware symbol lookup | Find function by name — matches definitions, not text |
|
|
31
|
+
| `get_context` | Full context for a symbol | Body + callers/callees + imports + co-change & complexity |
|
|
32
|
+
| `find_references` | Cross-file usage search | Who calls this function, and from where? |
|
|
33
|
+
| `impact` | Depth-N blast radius | Transitive upstream callers, grouped by hop |
|
|
34
|
+
| `search_structure` | Keyword and structural search | Find by name/signature (all languages), or AST pattern (TS/JS) |
|
|
35
|
+
|
|
36
|
+
## Quick Start
|
|
37
|
+
|
|
38
|
+
### Claude Code
|
|
39
|
+
|
|
40
|
+
Add to `~/.claude/settings.json`:
|
|
41
|
+
|
|
42
|
+
```json
|
|
43
|
+
{
|
|
44
|
+
"mcpServers": {
|
|
45
|
+
"codedeep-mcp": {
|
|
46
|
+
"command": "npx",
|
|
47
|
+
"args": ["codedeep-mcp"]
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### Cursor / Windsurf / Other MCP Clients
|
|
54
|
+
|
|
55
|
+
Any MCP client that supports stdio transport works. Configure it to run `npx codedeep-mcp`.
|
|
56
|
+
|
|
57
|
+
> **Note:** `npx codedeep-mcp` is a stdio server — it won't produce visible
|
|
58
|
+
> output when run directly. It communicates via JSON-RPC with the MCP client.
|
|
59
|
+
|
|
60
|
+
## How It Works
|
|
61
|
+
|
|
62
|
+
```
|
|
63
|
+
Your Code ──> tree-sitter (parse) ──> In-Memory Index ──> MCP Tools
|
|
64
|
+
│
|
|
65
|
+
Git (optional)
|
|
66
|
+
LSP (planned)
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
**Structural index (always, instant):**
|
|
70
|
+
tree-sitter parses every file into an AST. Symbols, call relationships,
|
|
71
|
+
and imports are extracted and indexed in memory — with per-language call
|
|
72
|
+
resolution tuned for precision (an explicit 0-wrong-kind-edge goal), not
|
|
73
|
+
just text matching. Works on any repo with zero configuration.
|
|
74
|
+
|
|
75
|
+
**Complexity metrics (all 14 languages):**
|
|
76
|
+
Per-symbol cyclomatic and cognitive complexity, computed at index time and
|
|
77
|
+
pinned for behavioral comparability to McCabe / the Cognitive Complexity
|
|
78
|
+
whitepaper / open-source analyzers (SonarJS, sonar-java, gocyclo+gocognit,
|
|
79
|
+
rust-code-analysis, …). Shown on `find_symbol` / `get_context`.
|
|
80
|
+
|
|
81
|
+
**Git enrichment (when in a git repo):**
|
|
82
|
+
Commit frequency identifies hotspot files; co-change analysis reveals
|
|
83
|
+
behavioral coupling (files that change together); and a risk score
|
|
84
|
+
(churn × coupling × complexity) ranks the most change-prone, tangled hubs.
|
|
85
|
+
|
|
86
|
+
**Planned — LSP semantic tier:**
|
|
87
|
+
LSP integration (tsserver, pyright, gopls, …) for compiler-precise
|
|
88
|
+
cross-file references and type info is designed but **not yet shipped** —
|
|
89
|
+
cross-file edges today are AST name-matches.
|
|
90
|
+
|
|
91
|
+
## Example
|
|
92
|
+
|
|
93
|
+
````
|
|
94
|
+
> find_symbol({ name: "authenticate" })
|
|
95
|
+
|
|
96
|
+
src/auth/middleware.ts:42-67 | function | exported
|
|
97
|
+
async function authenticate(req: Request, res: Response, next: NextFunction): Promise<void>
|
|
98
|
+
Validates the JWT token and attaches user to request
|
|
99
|
+
References: ~5
|
|
100
|
+
Fan-out: 2
|
|
101
|
+
Complexity: cyc 3 / cog 1 [structural]
|
|
102
|
+
|
|
103
|
+
> get_context({ file: "src/auth/middleware.ts", symbol: "authenticate" })
|
|
104
|
+
|
|
105
|
+
src/auth/middleware.ts:42-67 | function | exported
|
|
106
|
+
async function authenticate(req: Request, res: Response, next: NextFunction): Promise<void>
|
|
107
|
+
Validates the JWT token and attaches user to request
|
|
108
|
+
|
|
109
|
+
### Body
|
|
110
|
+
```typescript
|
|
111
|
+
async function authenticate(req: Request, res: Response, next: NextFunction): Promise<void> {
|
|
112
|
+
const token = extractToken(req);
|
|
113
|
+
const payload = verify(token);
|
|
114
|
+
req.user = payload as User;
|
|
115
|
+
next();
|
|
116
|
+
}
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### Callers
|
|
120
|
+
- src/routes/api.ts:67 — handleRequest() [structural]
|
|
121
|
+
- src/routes/webhook.ts:23 — verifyWebhook() [structural]
|
|
122
|
+
|
|
123
|
+
(get_context also emits ### Callees and ### Coupling sections here, omitted for brevity)
|
|
124
|
+
|
|
125
|
+
### Imports
|
|
126
|
+
- jsonwebtoken: verify, decode
|
|
127
|
+
- ./types: User, AuthToken
|
|
128
|
+
|
|
129
|
+
### Co-change Partners (2 behavioral)
|
|
130
|
+
- src/auth/types.ts 78% confidence (9 shared commits)
|
|
131
|
+
- tests/auth.test.ts 64% confidence (7 shared commits)
|
|
132
|
+
````
|
|
133
|
+
|
|
134
|
+
## Supported Languages
|
|
135
|
+
|
|
136
|
+
**14 languages**, each with tree-sitter symbol/reference extraction **and**
|
|
137
|
+
cyclomatic + cognitive complexity:
|
|
138
|
+
|
|
139
|
+
TypeScript / JS · Python · Java · Go · Rust · Swift · Kotlin · Dart · C# ·
|
|
140
|
+
PHP · Ruby · C++ · C · Objective-C
|
|
141
|
+
|
|
142
|
+
A planned LSP tier (see *How It Works*) will add compiler-precise cross-file
|
|
143
|
+
resolution per language.
|
|
144
|
+
|
|
145
|
+
## Configuration
|
|
146
|
+
|
|
147
|
+
Optional `.codedeep/config.json` in your project root:
|
|
148
|
+
|
|
149
|
+
```jsonc
|
|
150
|
+
{
|
|
151
|
+
"exclude": ["vendor/**", "generated/**"],
|
|
152
|
+
"languages": ["typescript", "python"],
|
|
153
|
+
"maxFiles": 100000,
|
|
154
|
+
"maxFileSize": 1048576,
|
|
155
|
+
"watch": true,
|
|
156
|
+
"gitEnabled": true,
|
|
157
|
+
"gitWindow": 180
|
|
158
|
+
}
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
All fields are optional. Works with no config file.
|
|
162
|
+
|
|
163
|
+
Add `.codedeep/` to your `.gitignore` — the index cache is stored there.
|
|
164
|
+
|
|
165
|
+
Environment variables: `CODEDEEP_CACHE_DIR`, `CODEDEEP_EXCLUDE`, `CODEDEEP_GIT`, `CODEDEEP_GIT_WINDOW`, `CODEDEEP_WATCH`, `CODEDEEP_DEBUG`.
|
|
166
|
+
|
|
167
|
+
## Development
|
|
168
|
+
|
|
169
|
+
```bash
|
|
170
|
+
npm install
|
|
171
|
+
npm run build
|
|
172
|
+
npm test
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
## License
|
|
176
|
+
|
|
177
|
+
MIT — see [LICENSE](./LICENSE).
|
package/dist/config.js
ADDED
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
import { createHash } from 'node:crypto';
|
|
2
|
+
import { constants as fsConstants, readFileSync } from 'node:fs';
|
|
3
|
+
import { access, mkdir } from 'node:fs/promises';
|
|
4
|
+
import { homedir } from 'node:os';
|
|
5
|
+
import { isAbsolute, join, relative, resolve, sep } from 'node:path';
|
|
6
|
+
import { toPosix } from './indexer/scanner.js';
|
|
7
|
+
import { errMsg, log } from './logger.js';
|
|
8
|
+
const DEFAULT_EXCLUDES = [
|
|
9
|
+
'node_modules',
|
|
10
|
+
'.git',
|
|
11
|
+
'.codedeep',
|
|
12
|
+
'__pycache__',
|
|
13
|
+
'.venv',
|
|
14
|
+
'dist',
|
|
15
|
+
'build',
|
|
16
|
+
'vendor',
|
|
17
|
+
'.next',
|
|
18
|
+
'.nuxt',
|
|
19
|
+
'target',
|
|
20
|
+
'__generated__',
|
|
21
|
+
'*.min.js',
|
|
22
|
+
'*.bundle.js',
|
|
23
|
+
];
|
|
24
|
+
const DEFAULT_LANGUAGES = ['typescript', 'tsx', 'javascript', 'python', 'java', 'go', 'rust', 'swift', 'kotlin', 'dart', 'csharp', 'php', 'ruby', 'cpp', 'c', 'objc'];
|
|
25
|
+
const DEFAULT_MAX_FILES = 100_000;
|
|
26
|
+
const DEFAULT_MAX_FILE_SIZE = 1_048_576;
|
|
27
|
+
const DEFAULT_GIT_WINDOW = 180;
|
|
28
|
+
function readFileConfig(root) {
|
|
29
|
+
const path = join(root, '.codedeep', 'config.json');
|
|
30
|
+
let raw;
|
|
31
|
+
try {
|
|
32
|
+
raw = readFileSync(path, 'utf8');
|
|
33
|
+
}
|
|
34
|
+
catch (err) {
|
|
35
|
+
if (err.code === 'ENOENT')
|
|
36
|
+
return {};
|
|
37
|
+
log.warn(`config: failed to read ${path}: ${err.message}; using defaults`);
|
|
38
|
+
return {};
|
|
39
|
+
}
|
|
40
|
+
let parsed;
|
|
41
|
+
try {
|
|
42
|
+
parsed = JSON.parse(raw);
|
|
43
|
+
}
|
|
44
|
+
catch (err) {
|
|
45
|
+
log.warn(`config: failed to parse ${path}: ${err.message}; using defaults`);
|
|
46
|
+
return {};
|
|
47
|
+
}
|
|
48
|
+
if (typeof parsed !== 'object' || parsed === null || Array.isArray(parsed)) {
|
|
49
|
+
log.warn(`config: ${path} is not a JSON object; using defaults`);
|
|
50
|
+
return {};
|
|
51
|
+
}
|
|
52
|
+
return parsed;
|
|
53
|
+
}
|
|
54
|
+
function asStringArray(value) {
|
|
55
|
+
if (!Array.isArray(value))
|
|
56
|
+
return undefined;
|
|
57
|
+
if (!value.every((v) => typeof v === 'string'))
|
|
58
|
+
return undefined;
|
|
59
|
+
return value;
|
|
60
|
+
}
|
|
61
|
+
function asNonNegativeInt(value) {
|
|
62
|
+
if (typeof value !== 'number' || !Number.isFinite(value) || value < 0)
|
|
63
|
+
return undefined;
|
|
64
|
+
return Math.floor(value);
|
|
65
|
+
}
|
|
66
|
+
// A 0-day git window is meaningless (empty analysis marked fresh), so the
|
|
67
|
+
// git window requires >= 1, unlike maxFiles/maxFileSize where 0 is valid.
|
|
68
|
+
function asPositiveInt(value) {
|
|
69
|
+
if (typeof value !== 'number' || !Number.isFinite(value) || value < 1)
|
|
70
|
+
return undefined;
|
|
71
|
+
return Math.floor(value);
|
|
72
|
+
}
|
|
73
|
+
function asNonBlankString(value) {
|
|
74
|
+
if (typeof value !== 'string')
|
|
75
|
+
return undefined;
|
|
76
|
+
const trimmed = value.trim();
|
|
77
|
+
return trimmed.length > 0 ? trimmed : undefined;
|
|
78
|
+
}
|
|
79
|
+
function asBoolean(value) {
|
|
80
|
+
return typeof value === 'boolean' ? value : undefined;
|
|
81
|
+
}
|
|
82
|
+
function parseEnvBool(name) {
|
|
83
|
+
const raw = process.env[name]?.trim().toLowerCase();
|
|
84
|
+
if (raw === undefined || raw === '')
|
|
85
|
+
return undefined;
|
|
86
|
+
if (raw === '0' || raw === 'false')
|
|
87
|
+
return false;
|
|
88
|
+
if (raw === '1' || raw === 'true')
|
|
89
|
+
return true;
|
|
90
|
+
log.warn(`config: ${name}=${raw} not recognized; expected 0/1/true/false`);
|
|
91
|
+
return undefined;
|
|
92
|
+
}
|
|
93
|
+
function parseEnvGitWindow() {
|
|
94
|
+
const raw = process.env.CODEDEEP_GIT_WINDOW?.trim();
|
|
95
|
+
if (raw === undefined || raw === '')
|
|
96
|
+
return undefined;
|
|
97
|
+
const parsed = asPositiveInt(Number(raw));
|
|
98
|
+
if (parsed === undefined) {
|
|
99
|
+
log.warn(`config: CODEDEEP_GIT_WINDOW=${raw} not recognized; expected a positive integer (days)`);
|
|
100
|
+
}
|
|
101
|
+
return parsed;
|
|
102
|
+
}
|
|
103
|
+
function parseEnvExclude() {
|
|
104
|
+
const raw = process.env.CODEDEEP_EXCLUDE;
|
|
105
|
+
if (!raw)
|
|
106
|
+
return [];
|
|
107
|
+
return raw
|
|
108
|
+
.split(',')
|
|
109
|
+
.map((s) => s.trim())
|
|
110
|
+
.filter(Boolean);
|
|
111
|
+
}
|
|
112
|
+
// When cacheDir lives inside projectRoot, the scanner must skip it.
|
|
113
|
+
// Otherwise persist() bumps cache/index.json's mtime, the next
|
|
114
|
+
// indexChanged() sees the divergence, re-indexes the cache, and writes
|
|
115
|
+
// it again — a self-feeding loop. Push both `<rel>` (so walk()'s
|
|
116
|
+
// dir-prune triggers) and `<rel>/**` (so file-level matchExclude in
|
|
117
|
+
// scanner.ts and indexer.indexFile catches children of multi-segment
|
|
118
|
+
// paths picomatch wouldn't auto-expand).
|
|
119
|
+
function computeCacheDirExcludes(root, cacheDir) {
|
|
120
|
+
const rel = relative(root, cacheDir);
|
|
121
|
+
if (rel.length === 0)
|
|
122
|
+
return [];
|
|
123
|
+
if (rel === '..' || rel.startsWith(`..${sep}`))
|
|
124
|
+
return [];
|
|
125
|
+
if (isAbsolute(rel))
|
|
126
|
+
return [];
|
|
127
|
+
const posixRel = toPosix(rel);
|
|
128
|
+
return [posixRel, `${posixRel}/**`];
|
|
129
|
+
}
|
|
130
|
+
export function loadConfig(projectRoot = process.cwd()) {
|
|
131
|
+
const root = resolve(projectRoot);
|
|
132
|
+
const fileCfg = readFileConfig(root);
|
|
133
|
+
const fileExclude = asStringArray(fileCfg.exclude) ?? [];
|
|
134
|
+
const fileLanguages = asStringArray(fileCfg.languages);
|
|
135
|
+
const fileMaxFiles = asNonNegativeInt(fileCfg.maxFiles);
|
|
136
|
+
const fileMaxFileSize = asNonNegativeInt(fileCfg.maxFileSize);
|
|
137
|
+
const fileCacheDir = asNonBlankString(fileCfg.cacheDir);
|
|
138
|
+
const envCacheDir = asNonBlankString(process.env.CODEDEEP_CACHE_DIR);
|
|
139
|
+
const envExclude = parseEnvExclude();
|
|
140
|
+
const cacheDirRaw = envCacheDir ?? fileCacheDir ?? join(root, '.codedeep', 'cache');
|
|
141
|
+
const resolvedCacheDir = resolve(root, cacheDirRaw);
|
|
142
|
+
// cacheDir === root produces no excludes, so <root>/index.json is admitted
|
|
143
|
+
// as an unknown source and re-indexed on every save (loop). Other invalid
|
|
144
|
+
// inputs degrade to defaults; this one corrupts the index, so fail loud.
|
|
145
|
+
// Default path is structurally non-root, guard only explicit input.
|
|
146
|
+
if ((envCacheDir ?? fileCacheDir) && relative(root, resolvedCacheDir) === '') {
|
|
147
|
+
throw new Error(`cacheDir resolves to the project root (${resolvedCacheDir}); ` +
|
|
148
|
+
`set CODEDEEP_CACHE_DIR or .codedeep/config.json "cacheDir" to a subdirectory or external path`);
|
|
149
|
+
}
|
|
150
|
+
const cacheDirExcludes = computeCacheDirExcludes(root, resolvedCacheDir);
|
|
151
|
+
const merged = [
|
|
152
|
+
...DEFAULT_EXCLUDES,
|
|
153
|
+
...fileExclude,
|
|
154
|
+
...envExclude,
|
|
155
|
+
...cacheDirExcludes,
|
|
156
|
+
]
|
|
157
|
+
.map((s) => s.trim())
|
|
158
|
+
.filter(Boolean);
|
|
159
|
+
const exclude = Array.from(new Set(merged));
|
|
160
|
+
const cfg = {
|
|
161
|
+
projectRoot: root,
|
|
162
|
+
exclude: Object.freeze(exclude),
|
|
163
|
+
languages: Object.freeze(fileLanguages ?? [...DEFAULT_LANGUAGES]),
|
|
164
|
+
maxFiles: fileMaxFiles ?? DEFAULT_MAX_FILES,
|
|
165
|
+
maxFileSize: fileMaxFileSize ?? DEFAULT_MAX_FILE_SIZE,
|
|
166
|
+
cacheDir: resolvedCacheDir,
|
|
167
|
+
watch: parseEnvBool('CODEDEEP_WATCH') ?? asBoolean(fileCfg.watch) ?? true,
|
|
168
|
+
gitEnabled: parseEnvBool('CODEDEEP_GIT') ?? asBoolean(fileCfg.gitEnabled) ?? true,
|
|
169
|
+
gitWindow: parseEnvGitWindow() ?? asPositiveInt(fileCfg.gitWindow) ?? DEFAULT_GIT_WINDOW,
|
|
170
|
+
};
|
|
171
|
+
return Object.freeze(cfg);
|
|
172
|
+
}
|
|
173
|
+
export function defaultCacheDir(projectRoot) {
|
|
174
|
+
return resolve(projectRoot, '.codedeep', 'cache');
|
|
175
|
+
}
|
|
176
|
+
export function fallbackCacheDir(projectRoot) {
|
|
177
|
+
const hash = createHash('sha1').update(projectRoot).digest('hex').slice(0, 16);
|
|
178
|
+
return join(homedir(), '.cache', 'codedeep', hash);
|
|
179
|
+
}
|
|
180
|
+
// Ensures the configured cacheDir is writable. When the path equals the
|
|
181
|
+
// project-default and is not usable (read-only repo, EROFS mount, or a
|
|
182
|
+
// `.codedeep`-is-a-file FS conflict), falls back silently to
|
|
183
|
+
// ~/.cache/codedeep/<sha1(projectRoot)>/. Explicit user overrides fail loudly
|
|
184
|
+
// so they know their CODEDEEP_CACHE_DIR / cacheDir is broken instead of being
|
|
185
|
+
// silently ignored.
|
|
186
|
+
export async function resolveCacheDir(config) {
|
|
187
|
+
const isDefault = config.cacheDir === defaultCacheDir(config.projectRoot);
|
|
188
|
+
try {
|
|
189
|
+
await mkdir(config.cacheDir, { recursive: true });
|
|
190
|
+
// mkdir({recursive:true}) is idempotent, so a pre-existing cacheDir can
|
|
191
|
+
// slip through with restrictive permissions. Probe W+X explicitly:
|
|
192
|
+
// creating files inside a dir requires both bits per POSIX, so W alone
|
|
193
|
+
// admits modes like 0o200 / 0o600 where open(O_CREAT) still fails.
|
|
194
|
+
await access(config.cacheDir, fsConstants.W_OK | fsConstants.X_OK);
|
|
195
|
+
return config.cacheDir;
|
|
196
|
+
}
|
|
197
|
+
catch (err) {
|
|
198
|
+
const code = err?.code;
|
|
199
|
+
// ENOTDIR/EEXIST cover default-path FS conflicts (e.g. `.codedeep` is a
|
|
200
|
+
// regular file). Explicit overrides still throw so misconfig surfaces.
|
|
201
|
+
const canFallback = code === 'EACCES' ||
|
|
202
|
+
code === 'EROFS' ||
|
|
203
|
+
code === 'EPERM' ||
|
|
204
|
+
code === 'ENOTDIR' ||
|
|
205
|
+
code === 'EEXIST';
|
|
206
|
+
if (!canFallback || !isDefault)
|
|
207
|
+
throw err;
|
|
208
|
+
const fallback = fallbackCacheDir(config.projectRoot);
|
|
209
|
+
log.warn(`config: ${config.cacheDir} not usable (${code}); falling back to ${fallback}`);
|
|
210
|
+
try {
|
|
211
|
+
await mkdir(fallback, { recursive: true });
|
|
212
|
+
await access(fallback, fsConstants.W_OK | fsConstants.X_OK);
|
|
213
|
+
}
|
|
214
|
+
catch (fallbackErr) {
|
|
215
|
+
const wrapped = new Error(`Cache fallback ${fallback} is also not writable: ${errMsg(fallbackErr)}. ` +
|
|
216
|
+
`Set CODEDEEP_CACHE_DIR to a writable directory.`);
|
|
217
|
+
wrapped.code = fallbackErr?.code;
|
|
218
|
+
wrapped.cause = fallbackErr;
|
|
219
|
+
throw wrapped;
|
|
220
|
+
}
|
|
221
|
+
return fallback;
|
|
222
|
+
}
|
|
223
|
+
}
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
// The bulk git-log pass: one parse of `git log --name-only` output builds
|
|
2
|
+
// BOTH per-file commit counts (hotspots / commitFrequency) and the
|
|
3
|
+
// co-change pair matrix. Pure functions, no I/O — the GitService owns the
|
|
4
|
+
// subprocess; tests feed canned stdout strings.
|
|
5
|
+
//
|
|
6
|
+
// Output format contract (verified against real git):
|
|
7
|
+
// --pretty=format:%x00%ct --name-only
|
|
8
|
+
// emits, per commit, a NUL byte, the committer epoch-seconds, a newline,
|
|
9
|
+
// then one path per line (blank-line separated from the next record):
|
|
10
|
+
// \0<epoch>\n<path>\n<path>\n\n\0<epoch>\n<path>\n
|
|
11
|
+
// NUL can never appear in %ct output or in a path line, so splitting
|
|
12
|
+
// stdout on NUL yields exactly one chunk per commit. We deliberately do
|
|
13
|
+
// NOT print %H or %s: this pass needs only boundaries and timestamps,
|
|
14
|
+
// and omitting the subject removes the entire weird-subject parsing
|
|
15
|
+
// class. core.quotepath=false (prepended by GitRunner) keeps non-ASCII
|
|
16
|
+
// paths literal; a pathological newline-containing filename just becomes
|
|
17
|
+
// a non-matching line that the membership filters discard.
|
|
18
|
+
import { posix } from 'node:path';
|
|
19
|
+
import { log } from '../logger.js';
|
|
20
|
+
// Delegated to git via --max-count; also asserted parse-side so a huge
|
|
21
|
+
// repo can't blow the pair map regardless of what git returns.
|
|
22
|
+
export const GIT_COMMIT_CAP = 10_000;
|
|
23
|
+
// Commits touching more than this many files (vendored-dep updates, mass
|
|
24
|
+
// renames, formatting sweeps) are skipped ENTIRELY — both for pairs and
|
|
25
|
+
// for counts. Using one filtered stream for numerators AND denominators
|
|
26
|
+
// keeps confidence <= 1 as an invariant.
|
|
27
|
+
export const MAX_FILES_PER_COMMIT = 30;
|
|
28
|
+
// A pair must share at least this many commits to register as coupling.
|
|
29
|
+
export const MIN_SHARED_COMMITS = 3;
|
|
30
|
+
// Per-file partner lists are truncated to this many strongest partners
|
|
31
|
+
// to bound persisted cache size.
|
|
32
|
+
export const COCHANGES_PER_FILE_CAP = 20;
|
|
33
|
+
// Tools render top 10; the extra headroom serves the search boost and
|
|
34
|
+
// survives files dropping out of the index between analyses.
|
|
35
|
+
export const HOTSPOTS_KEPT = 50;
|
|
36
|
+
// Bounds transient memory for the pair accumulation (worst case ~40 MB).
|
|
37
|
+
// git log is newest-first, so when the cap hits, the most recent (most
|
|
38
|
+
// relevant) pairs are already in the map; we stop inserting NEW keys but
|
|
39
|
+
// keep incrementing existing ones.
|
|
40
|
+
const PAIR_MAP_CAP = 250_000;
|
|
41
|
+
export function buildLogArgs(windowDays, now = Date.now()) {
|
|
42
|
+
const since = new Date(now - windowDays * 86_400_000).toISOString();
|
|
43
|
+
return [
|
|
44
|
+
'log',
|
|
45
|
+
'--no-merges',
|
|
46
|
+
// Rename detection is heuristic and git-version-dependent; with it
|
|
47
|
+
// disabled a rename is a plain delete+add, so the old path simply
|
|
48
|
+
// stops accruing and the new path starts fresh. Deterministic.
|
|
49
|
+
'--no-renames',
|
|
50
|
+
'--name-only',
|
|
51
|
+
`--max-count=${GIT_COMMIT_CAP}`,
|
|
52
|
+
`--since=${since}`,
|
|
53
|
+
'--pretty=format:%x00%ct',
|
|
54
|
+
];
|
|
55
|
+
}
|
|
56
|
+
// `pathPrefix` handles project roots that are a SUBDIRECTORY of the git
|
|
57
|
+
// toplevel (monorepo packages): git log emits repo-relative paths
|
|
58
|
+
// ('packages/app/src/x.ts') while index keys are project-relative
|
|
59
|
+
// ('src/x.ts'). Paths under the prefix are stripped to index-relative;
|
|
60
|
+
// paths OUTSIDE it are rewritten project-relative too ('../'-prefixed
|
|
61
|
+
// via posix.relative) — index keys never start with '..', so an outside
|
|
62
|
+
// file like the toplevel package.json can never collide with the
|
|
63
|
+
// package's own package.json key (it would silently merge counts and
|
|
64
|
+
// fabricate co-change pairs otherwise). Outside paths only ever serve
|
|
65
|
+
// as confidence denominators and partner values.
|
|
66
|
+
// Pass '' (the default) when the project root IS the toplevel.
|
|
67
|
+
export function analyzeLog(stdout, isIndexed, pathPrefix = '') {
|
|
68
|
+
const counts = new Map();
|
|
69
|
+
const pairs = new Map();
|
|
70
|
+
let commitCount = 0;
|
|
71
|
+
let pairCapWarned = false;
|
|
72
|
+
for (const chunk of stdout.split('\u0000')) {
|
|
73
|
+
if (commitCount >= GIT_COMMIT_CAP)
|
|
74
|
+
break;
|
|
75
|
+
if (chunk.length === 0)
|
|
76
|
+
continue; // leading separator before the first record
|
|
77
|
+
const lines = chunk.split('\n');
|
|
78
|
+
const timestampSec = Number(lines[0]?.trim());
|
|
79
|
+
if (!Number.isFinite(timestampSec))
|
|
80
|
+
continue; // garbled record — drop, never throw
|
|
81
|
+
const timestampMs = timestampSec * 1000;
|
|
82
|
+
const files = new Set();
|
|
83
|
+
for (let i = 1; i < lines.length; i++) {
|
|
84
|
+
let path = lines[i].replace(/\r$/, '');
|
|
85
|
+
if (path.length === 0)
|
|
86
|
+
continue;
|
|
87
|
+
if (pathPrefix.length > 0) {
|
|
88
|
+
path = path.startsWith(pathPrefix)
|
|
89
|
+
? path.slice(pathPrefix.length)
|
|
90
|
+
: posix.relative(pathPrefix, path);
|
|
91
|
+
}
|
|
92
|
+
if (path.length > 0)
|
|
93
|
+
files.add(path);
|
|
94
|
+
}
|
|
95
|
+
if (files.size === 0)
|
|
96
|
+
continue; // empty commit
|
|
97
|
+
if (files.size > MAX_FILES_PER_COMMIT)
|
|
98
|
+
continue;
|
|
99
|
+
commitCount++;
|
|
100
|
+
for (const path of files) {
|
|
101
|
+
counts.set(path, (counts.get(path) ?? 0) + 1);
|
|
102
|
+
}
|
|
103
|
+
const sorted = [...files].sort();
|
|
104
|
+
// Hoisted out of the O(k²) pair loop: per-pair isIndexed calls would
|
|
105
|
+
// re-resolve each path up to k-1 times.
|
|
106
|
+
const indexedFlags = sorted.map(isIndexed);
|
|
107
|
+
for (let i = 0; i < sorted.length; i++) {
|
|
108
|
+
for (let j = i + 1; j < sorted.length; j++) {
|
|
109
|
+
if (!indexedFlags[i] && !indexedFlags[j])
|
|
110
|
+
continue;
|
|
111
|
+
const key = `${sorted[i]}\u0000${sorted[j]}`;
|
|
112
|
+
const existing = pairs.get(key);
|
|
113
|
+
if (existing) {
|
|
114
|
+
existing.shared++;
|
|
115
|
+
}
|
|
116
|
+
else if (pairs.size < PAIR_MAP_CAP) {
|
|
117
|
+
// Newest-first log order: first sighting IS the most recent.
|
|
118
|
+
pairs.set(key, { shared: 1, lastSeen: timestampMs });
|
|
119
|
+
}
|
|
120
|
+
else if (!pairCapWarned) {
|
|
121
|
+
pairCapWarned = true;
|
|
122
|
+
log.debug(`git: co-change pair map hit ${PAIR_MAP_CAP} entries; older pairs ignored`);
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
const cochanges = new Map();
|
|
128
|
+
for (const [key, accum] of pairs) {
|
|
129
|
+
if (accum.shared < MIN_SHARED_COMMITS)
|
|
130
|
+
continue;
|
|
131
|
+
const sep = key.indexOf('\u0000');
|
|
132
|
+
const fileA = key.slice(0, sep);
|
|
133
|
+
const fileB = key.slice(sep + 1);
|
|
134
|
+
const commitsA = counts.get(fileA);
|
|
135
|
+
const commitsB = counts.get(fileB);
|
|
136
|
+
if (!commitsA || !commitsB)
|
|
137
|
+
continue; // defensive; both sides were counted
|
|
138
|
+
const record = {
|
|
139
|
+
fileA,
|
|
140
|
+
fileB,
|
|
141
|
+
sharedCommits: accum.shared,
|
|
142
|
+
confidenceAB: accum.shared / commitsA,
|
|
143
|
+
confidenceBA: accum.shared / commitsB,
|
|
144
|
+
lastSeen: accum.lastSeen,
|
|
145
|
+
};
|
|
146
|
+
if (isIndexed(fileA))
|
|
147
|
+
pushTo(cochanges, fileA, record);
|
|
148
|
+
if (isIndexed(fileB))
|
|
149
|
+
pushTo(cochanges, fileB, record);
|
|
150
|
+
}
|
|
151
|
+
for (const [path, list] of cochanges) {
|
|
152
|
+
list.sort((a, b) => b.sharedCommits - a.sharedCommits ||
|
|
153
|
+
comparePaths(partnerOf(a, path), partnerOf(b, path)));
|
|
154
|
+
if (list.length > COCHANGES_PER_FILE_CAP) {
|
|
155
|
+
cochanges.set(path, list.slice(0, COCHANGES_PER_FILE_CAP));
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
const hotspots = [...counts.entries()]
|
|
159
|
+
.filter(([path]) => isIndexed(path))
|
|
160
|
+
.sort((a, b) => b[1] - a[1] || comparePaths(a[0], b[0]))
|
|
161
|
+
.slice(0, HOTSPOTS_KEPT)
|
|
162
|
+
.map(([path]) => path);
|
|
163
|
+
return { counts, cochanges, hotspots, commitCount };
|
|
164
|
+
}
|
|
165
|
+
export function partnerOf(record, selfPath) {
|
|
166
|
+
return record.fileA === selfPath ? record.fileB : record.fileA;
|
|
167
|
+
}
|
|
168
|
+
function comparePaths(a, b) {
|
|
169
|
+
return a < b ? -1 : a > b ? 1 : 0;
|
|
170
|
+
}
|
|
171
|
+
function pushTo(map, key, value) {
|
|
172
|
+
const list = map.get(key);
|
|
173
|
+
if (list)
|
|
174
|
+
list.push(value);
|
|
175
|
+
else
|
|
176
|
+
map.set(key, [value]);
|
|
177
|
+
}
|