@optave/codegraph 1.3.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +109 -40
- package/package.json +5 -5
- package/src/builder.js +52 -8
- package/src/cli.js +127 -1
- package/src/config.js +45 -3
- package/src/constants.js +0 -2
- package/src/cycles.js +2 -2
- package/src/db.js +13 -0
- package/src/export.js +44 -9
- package/src/index.js +21 -0
- package/src/mcp.js +308 -8
- package/src/parser.js +13 -14
- package/src/queries.js +30 -0
- package/src/registry.js +145 -0
- package/src/resolve.js +1 -1
- package/src/structure.js +491 -0
- package/src/watcher.js +2 -2
package/README.md
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
<h1 align="center">codegraph</h1>
|
|
6
6
|
|
|
7
7
|
<p align="center">
|
|
8
|
-
<strong>
|
|
8
|
+
<strong>Always-fresh code intelligence for AI agents — sub-second incremental rebuilds, zero-cost by default, optionally enhanced with your LLM.</strong>
|
|
9
9
|
</p>
|
|
10
10
|
|
|
11
11
|
<p align="center">
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
<a href="https://github.com/optave/codegraph/blob/main/LICENSE"><img src="https://img.shields.io/github/license/optave/codegraph?style=flat-square&logo=opensourceinitiative&logoColor=white" alt="Apache-2.0 License" /></a>
|
|
14
14
|
<a href="https://github.com/optave/codegraph/actions"><img src="https://img.shields.io/github/actions/workflow/status/optave/codegraph/codegraph-impact.yml?style=flat-square&logo=githubactions&logoColor=white&label=CI" alt="CI" /></a>
|
|
15
15
|
<img src="https://img.shields.io/badge/node-%3E%3D20-339933?style=flat-square&logo=node.js&logoColor=white" alt="Node >= 20" />
|
|
16
|
-
<img src="https://img.shields.io/badge/
|
|
16
|
+
<img src="https://img.shields.io/badge/graph-always%20fresh-brightgreen?style=flat-square&logo=shield&logoColor=white" alt="Always Fresh" />
|
|
17
17
|
</p>
|
|
18
18
|
|
|
19
19
|
<p align="center">
|
|
@@ -31,9 +31,35 @@
|
|
|
31
31
|
|
|
32
32
|
---
|
|
33
33
|
|
|
34
|
-
> **
|
|
34
|
+
> **The code graph that keeps up with your commits.**
|
|
35
35
|
>
|
|
36
|
-
> Codegraph
|
|
36
|
+
> Codegraph parses your codebase with [tree-sitter](https://tree-sitter.github.io/) (native Rust or WASM), builds a function-level dependency graph in SQLite, and keeps it current with sub-second incremental rebuilds. Every query runs locally — no API keys, no Docker, no setup. When you want deeper intelligence, bring your own LLM provider and codegraph enhances search and analysis through the same API you already use. Your code only goes where you choose to send it.
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
## 🔄 Why most code graph tools can't keep up with your commits
|
|
41
|
+
|
|
42
|
+
If you use a code graph with an AI agent, the graph needs to be **current**. A stale graph gives the agent wrong answers — deleted functions still show up, new dependencies are invisible, impact analysis misses the code you just wrote. The graph should rebuild on every commit, ideally on every save.
|
|
43
|
+
|
|
44
|
+
Most tools in this space can't do that:
|
|
45
|
+
|
|
46
|
+
| Problem | Who has it | Why it breaks on every commit |
|
|
47
|
+
|---|---|---|
|
|
48
|
+
| **Full re-index on every change** | code-graph-rag, CodeMCP, axon, autodev-codebase | No file-level change tracking. Change one file → re-parse and re-insert the entire codebase. On a 3,000-file project, that's 30+ seconds per commit minimum |
|
|
49
|
+
| **Cloud API calls baked into the pipeline** | code-graph-rag, autodev-codebase, Claude-code-memory, CodeRAG | Embeddings are generated through cloud APIs (OpenAI, Voyage AI, Gemini). Every rebuild = API round-trips for every function. Slow, expensive, and rate-limited. You can't put this in a commit hook |
|
|
50
|
+
| **Heavy infrastructure that's slow to restart** | code-graph-rag (Memgraph), axon (KuzuDB), badger-graph (Dgraph) | External databases add latency to every write. Bulk-inserting a full graph into Memgraph is not a sub-second operation |
|
|
51
|
+
| **No persistence between runs** | glimpse, pyan, cflow | Re-parse from scratch every time. No database, no delta, no incremental anything |
|
|
52
|
+
|
|
53
|
+
**Codegraph solves this with incremental builds:**
|
|
54
|
+
|
|
55
|
+
1. Every file gets an MD5 hash stored in SQLite
|
|
56
|
+
2. On rebuild, only files whose hash changed get re-parsed
|
|
57
|
+
3. Stale nodes and edges for changed files are cleaned, then re-inserted
|
|
58
|
+
4. Everything else is untouched
|
|
59
|
+
|
|
60
|
+
**Result:** change one file in a 3,000-file project → rebuild completes in **under a second**. Put it in a commit hook, a file watcher, or let your AI agent trigger it. The graph is always current.
|
|
61
|
+
|
|
62
|
+
And because the core pipeline is pure local computation (tree-sitter + SQLite), there are no API calls, no network latency, and no cost. LLM-powered features (semantic search, richer embeddings) are a separate optional layer — they enhance the graph but never block it from being current.
|
|
37
63
|
|
|
38
64
|
---
|
|
39
65
|
|
|
@@ -41,52 +67,53 @@
|
|
|
41
67
|
|
|
42
68
|
<sub>Comparison last verified: February 2026</sub>
|
|
43
69
|
|
|
44
|
-
Most
|
|
70
|
+
Most code graph tools make you choose: **fast local analysis with no AI, or powerful AI features that require full re-indexing through cloud APIs on every change.** Codegraph gives you both — a graph that rebuilds in milliseconds on every commit, with optional LLM enhancement through the provider you're already using.
|
|
45
71
|
|
|
46
72
|
### Feature comparison
|
|
47
73
|
|
|
48
|
-
| Capability | codegraph |
|
|
49
|
-
|
|
50
|
-
| Function-level analysis | **Yes** |
|
|
51
|
-
| Multi-language | **
|
|
52
|
-
| Semantic search | **Yes** |
|
|
53
|
-
| MCP / AI agent support | **Yes** |
|
|
54
|
-
| Git diff impact | **Yes** | — | — | — |
|
|
55
|
-
|
|
|
56
|
-
|
|
|
57
|
-
|
|
|
58
|
-
|
|
|
59
|
-
| Zero config | **Yes** | Yes | — |
|
|
60
|
-
|
|
|
61
|
-
|
|
|
74
|
+
| Capability | codegraph | [code-graph-rag](https://github.com/vitali87/code-graph-rag) | [glimpse](https://github.com/seatedro/glimpse) | [CodeMCP](https://github.com/SimplyLiz/CodeMCP) | [axon](https://github.com/harshkedia177/axon) | [autodev-codebase](https://github.com/anrgct/autodev-codebase) | [arbor](https://github.com/Anandb71/arbor) | [Claude-code-memory](https://github.com/Durafen/Claude-code-memory) |
|
|
75
|
+
|---|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|
|
|
76
|
+
| Function-level analysis | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | — |
|
|
77
|
+
| Multi-language | **11** | Multi | Multi | SCIP langs | Few | **40+** | Multi | — |
|
|
78
|
+
| Semantic search | **Yes** | **Yes** | — | — | — | **Yes** | **Yes** | **Yes** |
|
|
79
|
+
| MCP / AI agent support | **Yes** | **Yes** | — | **Yes** | — | — | **Yes** | **Yes** |
|
|
80
|
+
| Git diff impact | **Yes** | — | — | — | **Yes** | — | — | — |
|
|
81
|
+
| Watch mode | **Yes** | — | — | — | — | — | — | — |
|
|
82
|
+
| CI workflow included | **Yes** | — | — | — | — | — | — | — |
|
|
83
|
+
| Cycle detection | **Yes** | — | — | — | **Yes** | — | — | — |
|
|
84
|
+
| Incremental rebuilds | **Yes** | — | — | — | — | — | — | — |
|
|
85
|
+
| Zero config | **Yes** | — | **Yes** | — | — | — | **Yes** | — |
|
|
86
|
+
| LLM-optional (works without API keys) | **Yes** | — | **Yes** | **Yes** | **Yes** | — | **Yes** | — |
|
|
87
|
+
| Open source | **Yes** | Yes | Yes | Custom | — | — | Yes | — |
|
|
62
88
|
|
|
63
89
|
### What makes codegraph different
|
|
64
90
|
|
|
65
91
|
| | Differentiator | In practice |
|
|
66
92
|
|---|---|---|
|
|
93
|
+
| **⚡** | **Always-fresh graph** | Sub-second incremental rebuilds via file-hash tracking. Run on every commit, every save, in watch mode — the graph is never stale. Competitors re-index everything from scratch |
|
|
94
|
+
| **🔓** | **Zero-cost core, LLM-enhanced when you want** | Full graph analysis with no API keys, no accounts, no cost. Optionally bring your own LLM provider for richer embeddings and AI-powered search — your code only goes to the provider you already chose |
|
|
67
95
|
| **🔬** | **Function-level, not just files** | Traces `handleAuth()` → `validateToken()` → `decryptJWT()` and shows 14 callers across 9 files break if `decryptJWT` changes |
|
|
68
|
-
|
|
|
69
|
-
|
|
|
96
|
+
| **🤖** | **Built for AI agents** | 13-tool [MCP server](https://modelcontextprotocol.io/) — AI assistants query your graph directly. Single-repo by default, your code doesn't leak to other projects |
|
|
97
|
+
| **🌐** | **Multi-language, one CLI** | JS/TS + Python + Go + Rust + Java + C# + PHP + Ruby + HCL in a single graph — no juggling Madge, pyan, and cflow |
|
|
70
98
|
| **💥** | **Git diff impact** | `codegraph diff-impact` shows changed functions, their callers, and full blast radius — ships with a GitHub Actions workflow |
|
|
71
|
-
|
|
|
72
|
-
| **⚡** | **Build once, query instantly** | SQLite-backed — build in ~30s, every query under 100ms. Native Rust engine with WASM fallback. Most competitors re-parse every run |
|
|
73
|
-
| **🧠** | **Semantic search** | `codegraph search "handle auth"` uses local embeddings — multi-query with RRF ranking via `"auth; token; JWT"` |
|
|
99
|
+
| **🧠** | **Semantic search** | Local embeddings by default, LLM-powered embeddings when opted in — multi-query with RRF ranking via `"auth; token; JWT"` |
|
|
74
100
|
|
|
75
101
|
### How other tools compare
|
|
76
102
|
|
|
77
|
-
|
|
103
|
+
The key question is: **can you rebuild your graph on every commit in a large codebase without it costing money or taking minutes?** Most tools in this space either re-index everything from scratch (slow), require cloud API calls for core features (costly), or both. Codegraph's incremental builds keep the graph current in milliseconds — and the core pipeline needs no API keys at all. LLM-powered features are opt-in, using whichever provider you already work with.
|
|
78
104
|
|
|
79
|
-
| Tool | What it does well |
|
|
105
|
+
| Tool | What it does well | The tradeoff |
|
|
80
106
|
|---|---|---|
|
|
107
|
+
| [code-graph-rag](https://github.com/vitali87/code-graph-rag) | Graph RAG with Memgraph, multi-provider AI, semantic search, code editing via AST | No incremental rebuilds — full re-index + re-embed through cloud APIs on every change. Requires Docker |
|
|
108
|
+
| [glimpse](https://github.com/seatedro/glimpse) | Clipboard-first LLM context tool, call graphs, LSP resolution, token counting | Context-packing tool, not a dependency graph — no persistence, no MCP, no incremental updates |
|
|
109
|
+
| [CodeMCP](https://github.com/SimplyLiz/CodeMCP) | SCIP compiler-grade indexing, compound operations (83% token savings), secret scanning | No incremental builds. Custom license, requires SCIP toolchains per language |
|
|
110
|
+
| [axon](https://github.com/harshkedia177/axon) | 11-phase pipeline, KuzuDB, community detection, dead code, change coupling | Full pipeline re-run on changes. No license, Python-only, no MCP |
|
|
111
|
+
| [autodev-codebase](https://github.com/anrgct/autodev-codebase) | 40+ languages, interactive Cytoscape.js visualization, LLM reranking | Re-embeds through cloud APIs on changes. No license, complex setup |
|
|
112
|
+
| [arbor](https://github.com/Anandb71/arbor) | Native GUI, confidence scoring, architectural role classification, fuzzy search | GUI-focused — no CLI pipeline, no watch mode, no CI integration |
|
|
113
|
+
| [Claude-code-memory](https://github.com/Durafen/Claude-code-memory) | Persistent codebase memory for Claude Code, Memory Guard quality gate | Requires Voyage AI (cloud) + Qdrant (Docker) for core features |
|
|
81
114
|
| [Madge](https://github.com/pahen/madge) | Simple file-level JS/TS dependency graphs | No function-level analysis, no impact tracing, JS/TS only |
|
|
82
115
|
| [dependency-cruiser](https://github.com/sverweij/dependency-cruiser) | Architectural rule validation for JS/TS | Module-level only (function-level explicitly out of scope), requires config |
|
|
83
|
-
| [Skott](https://github.com/antoine-music/skott) | Module graph with unused code detection | File-level only, JS/TS only, no persistent database |
|
|
84
116
|
| [Nx graph](https://nx.dev/) | Monorepo project-level dependency graph | Requires Nx workspace, project-level only (not file or function) |
|
|
85
|
-
| [Sourcetrail](https://github.com/CoatiSoftware/Sourcetrail) | Rich GUI with symbol-level graphs | Archived/discontinued (2021), no JS/TS, no CLI |
|
|
86
|
-
| [Sourcegraph](https://sourcegraph.com/) | Enterprise code search and navigation | Cloud/SaaS — code sent to servers, $19+/user/mo, no longer open source |
|
|
87
|
-
| [CodeSee](https://www.codesee.io/) | Visual codebase maps | Cloud-based — code leaves your machine, acquired by GitKraken |
|
|
88
|
-
| [Understand](https://scitools.com/) | Deep multi-language static analysis | $100+/month per seat, proprietary, GUI-only, no CI or AI integration |
|
|
89
|
-
| [Snyk Code](https://snyk.io/) | AI-powered security scanning | Cloud-based — code sent to Snyk servers for analysis, not a dependency graph tool |
|
|
90
117
|
| [pyan](https://github.com/Technologicat/pyan) / [cflow](https://www.gnu.org/software/cflow/) | Function-level call graphs | Single-language each (Python / C only), no persistence, no queries |
|
|
91
118
|
|
|
92
119
|
---
|
|
@@ -127,8 +154,8 @@ codegraph deps src/index.ts # file-level import/export map
|
|
|
127
154
|
| 📤 | **Export** | DOT (Graphviz), Mermaid, and JSON graph export |
|
|
128
155
|
| 🧠 | **Semantic search** | Embeddings-powered natural language search with multi-query RRF ranking |
|
|
129
156
|
| 👀 | **Watch mode** | Incrementally update the graph as files change |
|
|
130
|
-
| 🤖 | **MCP server** |
|
|
131
|
-
| 🔒 | **
|
|
157
|
+
| 🤖 | **MCP server** | 13-tool MCP server for AI assistants; single-repo by default, opt-in multi-repo |
|
|
158
|
+
| 🔒 | **Your code, your choice** | Zero-cost core with no API keys. Optionally enhance with your LLM provider — your code only goes where you send it |
|
|
132
159
|
|
|
133
160
|
## 📦 Commands
|
|
134
161
|
|
|
@@ -212,12 +239,30 @@ A single trailing semicolon is ignored (falls back to single-query mode). The `-
|
|
|
212
239
|
|
|
213
240
|
The model used during `embed` is stored in the database, so `search` auto-detects it — no need to pass `--model` when searching.
|
|
214
241
|
|
|
242
|
+
### Multi-Repo Registry
|
|
243
|
+
|
|
244
|
+
Manage a global registry of codegraph-enabled projects. The registry stores paths to your built graphs so the MCP server can query them when multi-repo mode is enabled.
|
|
245
|
+
|
|
246
|
+
```bash
|
|
247
|
+
codegraph registry list # List all registered repos
|
|
248
|
+
codegraph registry list --json # JSON output
|
|
249
|
+
codegraph registry add <dir> # Register a project directory
|
|
250
|
+
codegraph registry add <dir> -n my-name # Custom name
|
|
251
|
+
codegraph registry remove <name> # Unregister
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
`codegraph build` auto-registers the project — no manual setup needed.
|
|
255
|
+
|
|
215
256
|
### AI Integration
|
|
216
257
|
|
|
217
258
|
```bash
|
|
218
|
-
codegraph mcp # Start MCP server
|
|
259
|
+
codegraph mcp # Start MCP server (single-repo, current project only)
|
|
260
|
+
codegraph mcp --multi-repo # Enable access to all registered repos
|
|
261
|
+
codegraph mcp --repos a,b # Restrict to specific repos (implies --multi-repo)
|
|
219
262
|
```
|
|
220
263
|
|
|
264
|
+
By default, the MCP server only exposes the local project's graph. AI agents cannot access other repositories unless you explicitly opt in with `--multi-repo` or `--repos`.
|
|
265
|
+
|
|
221
266
|
### Common Flags
|
|
222
267
|
|
|
223
268
|
| Flag | Description |
|
|
@@ -228,7 +273,7 @@ codegraph mcp # Start MCP server for AI assistants
|
|
|
228
273
|
| `-j, --json` | Output as JSON |
|
|
229
274
|
| `-v, --verbose` | Enable debug output |
|
|
230
275
|
| `--engine <engine>` | Parser engine: `native`, `wasm`, or `auto` (default: `auto`) |
|
|
231
|
-
| `-k, --kind <kind>` | Filter by kind: `function`, `method`, `class` (search) |
|
|
276
|
+
| `-k, --kind <kind>` | Filter by kind: `function`, `method`, `class`, `struct`, `enum`, `trait`, `record`, `module` (search) |
|
|
232
277
|
| `--file <pattern>` | Filter by file path pattern (search) |
|
|
233
278
|
| `--rrf-k <n>` | RRF smoothing constant for multi-query search (default 60) |
|
|
234
279
|
|
|
@@ -309,12 +354,18 @@ Benchmarked on a ~3,200-file TypeScript project:
|
|
|
309
354
|
|
|
310
355
|
### MCP Server
|
|
311
356
|
|
|
312
|
-
Codegraph includes a built-in [Model Context Protocol](https://modelcontextprotocol.io/) server, so AI assistants can query your dependency graph directly:
|
|
357
|
+
Codegraph includes a built-in [Model Context Protocol](https://modelcontextprotocol.io/) server with 13 tools, so AI assistants can query your dependency graph directly:
|
|
313
358
|
|
|
314
359
|
```bash
|
|
315
|
-
codegraph mcp
|
|
360
|
+
codegraph mcp # Single-repo mode (default) — only local project
|
|
361
|
+
codegraph mcp --multi-repo # Multi-repo — all registered repos accessible
|
|
362
|
+
codegraph mcp --repos a,b # Multi-repo with allowlist
|
|
316
363
|
```
|
|
317
364
|
|
|
365
|
+
**Single-repo mode (default):** Tools operate only on the local `.codegraph/graph.db`. The `repo` parameter and `list_repos` tool are not exposed to the AI agent.
|
|
366
|
+
|
|
367
|
+
**Multi-repo mode (`--multi-repo`):** All tools gain an optional `repo` parameter to target any registered repository, and `list_repos` becomes available. Use `--repos` to restrict which repos the agent can access.
|
|
368
|
+
|
|
318
369
|
### CLAUDE.md / Agent Instructions
|
|
319
370
|
|
|
320
371
|
Add this to your project's `CLAUDE.md` to help AI agents use codegraph:
|
|
@@ -366,6 +417,7 @@ See **[docs/recommended-practices.md](docs/recommended-practices.md)** for integ
|
|
|
366
417
|
- **CI/CD** — PR impact comments, threshold gates, graph caching
|
|
367
418
|
- **AI agents** — MCP server, CLAUDE.md templates, Claude Code hooks
|
|
368
419
|
- **Developer workflow** — watch mode, explore-before-you-edit, semantic search
|
|
420
|
+
- **Secure credentials** — `apiKeyCommand` with 1Password, Bitwarden, Vault, macOS Keychain, `pass`
|
|
369
421
|
|
|
370
422
|
## 🔁 CI / GitHub Actions
|
|
371
423
|
|
|
@@ -395,6 +447,23 @@ Create a `.codegraphrc.json` in your project root to customize behavior:
|
|
|
395
447
|
}
|
|
396
448
|
```
|
|
397
449
|
|
|
450
|
+
### LLM credentials
|
|
451
|
+
|
|
452
|
+
Codegraph supports an `apiKeyCommand` field for secure credential management. Instead of storing API keys in config files or environment variables, you can shell out to a secret manager at runtime:
|
|
453
|
+
|
|
454
|
+
```json
|
|
455
|
+
{
|
|
456
|
+
"llm": {
|
|
457
|
+
"provider": "openai",
|
|
458
|
+
"apiKeyCommand": "op read op://vault/openai/api-key"
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
```
|
|
462
|
+
|
|
463
|
+
The command is split on whitespace and executed with `execFileSync` (no shell injection risk). Priority: **command output > `CODEGRAPH_LLM_API_KEY` env var > file config**. On failure, codegraph warns and falls back to the next source.
|
|
464
|
+
|
|
465
|
+
Works with any secret manager: 1Password CLI (`op`), Bitwarden (`bw`), `pass`, HashiCorp Vault, macOS Keychain (`security`), AWS Secrets Manager, etc.
|
|
466
|
+
|
|
398
467
|
## 📖 Programmatic API
|
|
399
468
|
|
|
400
469
|
Codegraph also exports a full API for use in your own tools:
|
|
@@ -449,7 +518,7 @@ const { results: fused } = await multiSearchData(
|
|
|
449
518
|
See **[ROADMAP.md](ROADMAP.md)** for the full development roadmap. Current plan:
|
|
450
519
|
|
|
451
520
|
1. ~~**Rust Core**~~ — **Complete** (v1.3.0) — native tree-sitter parsing via napi-rs, parallel multi-core parsing, incremental re-parsing, import resolution & cycle detection in Rust
|
|
452
|
-
2.
|
|
521
|
+
2. ~~**Foundation Hardening**~~ — **Complete** (v1.4.0) — parser registry, 12-tool MCP server with multi-repo support, test coverage 62%→75%, `apiKeyCommand` secret resolution, global repo registry
|
|
453
522
|
3. **Intelligent Embeddings** — LLM-generated descriptions, hybrid search
|
|
454
523
|
4. **Natural Language Queries** — `codegraph ask` command, conversational sessions
|
|
455
524
|
5. **Expanded Language Support** — 8 new languages (12 → 20)
|
|
@@ -476,5 +545,5 @@ Looking to add a new language? Check out **[Adding a New Language](docs/adding-a
|
|
|
476
545
|
---
|
|
477
546
|
|
|
478
547
|
<p align="center">
|
|
479
|
-
<sub>Built with <a href="https://tree-sitter.github.io/">tree-sitter</a> and <a href="https://github.com/WiseLibs/better-sqlite3">better-sqlite3</a>.
|
|
548
|
+
<sub>Built with <a href="https://tree-sitter.github.io/">tree-sitter</a> and <a href="https://github.com/WiseLibs/better-sqlite3">better-sqlite3</a>. Your code only goes where you choose to send it.</sub>
|
|
480
549
|
</p>
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@optave/codegraph",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "2.0.0",
|
|
4
4
|
"description": "Local code graph CLI — parse codebases with tree-sitter, build dependency graphs, query them",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/index.js",
|
|
@@ -61,10 +61,10 @@
|
|
|
61
61
|
"optionalDependencies": {
|
|
62
62
|
"@huggingface/transformers": "^3.8.1",
|
|
63
63
|
"@modelcontextprotocol/sdk": "^1.0.0",
|
|
64
|
-
"@optave/codegraph-darwin-arm64": "
|
|
65
|
-
"@optave/codegraph-darwin-x64": "
|
|
66
|
-
"@optave/codegraph-linux-x64-gnu": "
|
|
67
|
-
"@optave/codegraph-win32-x64-msvc": "
|
|
64
|
+
"@optave/codegraph-darwin-arm64": "2.0.0",
|
|
65
|
+
"@optave/codegraph-darwin-x64": "2.0.0",
|
|
66
|
+
"@optave/codegraph-linux-x64-gnu": "2.0.0",
|
|
67
|
+
"@optave/codegraph-win32-x64-msvc": "2.0.0"
|
|
68
68
|
},
|
|
69
69
|
"devDependencies": {
|
|
70
70
|
"@biomejs/biome": "^2.4.4",
|
package/src/builder.js
CHANGED
|
@@ -4,24 +4,26 @@ import path from 'node:path';
|
|
|
4
4
|
import { loadConfig } from './config.js';
|
|
5
5
|
import { EXTENSIONS, IGNORE_DIRS, normalizePath } from './constants.js';
|
|
6
6
|
import { initSchema, openDb } from './db.js';
|
|
7
|
-
import { warn } from './logger.js';
|
|
7
|
+
import { debug, warn } from './logger.js';
|
|
8
8
|
import { getActiveEngine, parseFilesAuto } from './parser.js';
|
|
9
9
|
import { computeConfidence, resolveImportPath, resolveImportsBatch } from './resolve.js';
|
|
10
10
|
|
|
11
11
|
export { resolveImportPath } from './resolve.js';
|
|
12
12
|
|
|
13
|
-
export function collectFiles(dir, files = [], config = {}) {
|
|
13
|
+
export function collectFiles(dir, files = [], config = {}, directories = null) {
|
|
14
|
+
const trackDirs = directories !== null;
|
|
14
15
|
let entries;
|
|
15
16
|
try {
|
|
16
17
|
entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
17
18
|
} catch (err) {
|
|
18
19
|
warn(`Cannot read directory ${dir}: ${err.message}`);
|
|
19
|
-
return files;
|
|
20
|
+
return trackDirs ? { files, directories } : files;
|
|
20
21
|
}
|
|
21
22
|
|
|
22
23
|
// Merge config ignoreDirs with defaults
|
|
23
24
|
const extraIgnore = config.ignoreDirs ? new Set(config.ignoreDirs) : null;
|
|
24
25
|
|
|
26
|
+
let hasFiles = false;
|
|
25
27
|
for (const entry of entries) {
|
|
26
28
|
if (entry.name.startsWith('.') && entry.name !== '.') {
|
|
27
29
|
if (IGNORE_DIRS.has(entry.name)) continue;
|
|
@@ -32,12 +34,16 @@ export function collectFiles(dir, files = [], config = {}) {
|
|
|
32
34
|
|
|
33
35
|
const full = path.join(dir, entry.name);
|
|
34
36
|
if (entry.isDirectory()) {
|
|
35
|
-
collectFiles(full, files, config);
|
|
37
|
+
collectFiles(full, files, config, directories);
|
|
36
38
|
} else if (EXTENSIONS.has(path.extname(entry.name))) {
|
|
37
39
|
files.push(full);
|
|
40
|
+
hasFiles = true;
|
|
38
41
|
}
|
|
39
42
|
}
|
|
40
|
-
|
|
43
|
+
if (trackDirs && hasFiles) {
|
|
44
|
+
directories.add(dir);
|
|
45
|
+
}
|
|
46
|
+
return trackDirs ? { files, directories } : files;
|
|
41
47
|
}
|
|
42
48
|
|
|
43
49
|
export function loadPathAliases(rootDir) {
|
|
@@ -163,7 +169,9 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
163
169
|
);
|
|
164
170
|
}
|
|
165
171
|
|
|
166
|
-
const
|
|
172
|
+
const collected = collectFiles(rootDir, [], config, new Set());
|
|
173
|
+
const files = collected.files;
|
|
174
|
+
const discoveredDirs = collected.directories;
|
|
167
175
|
console.log(`Found ${files.length} files to parse`);
|
|
168
176
|
|
|
169
177
|
// Check for incremental build
|
|
@@ -179,23 +187,28 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
179
187
|
|
|
180
188
|
if (isFullBuild) {
|
|
181
189
|
db.exec(
|
|
182
|
-
'PRAGMA foreign_keys = OFF; DELETE FROM edges; DELETE FROM nodes; PRAGMA foreign_keys = ON;',
|
|
190
|
+
'PRAGMA foreign_keys = OFF; DELETE FROM node_metrics; DELETE FROM edges; DELETE FROM nodes; PRAGMA foreign_keys = ON;',
|
|
183
191
|
);
|
|
184
192
|
} else {
|
|
185
193
|
console.log(`Incremental: ${changed.length} changed, ${removed.length} removed`);
|
|
186
|
-
// Remove
|
|
194
|
+
// Remove metrics/edges/nodes for changed and removed files
|
|
187
195
|
const deleteNodesForFile = db.prepare('DELETE FROM nodes WHERE file = ?');
|
|
188
196
|
const deleteEdgesForFile = db.prepare(`
|
|
189
197
|
DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = @f)
|
|
190
198
|
OR target_id IN (SELECT id FROM nodes WHERE file = @f)
|
|
191
199
|
`);
|
|
200
|
+
const deleteMetricsForFile = db.prepare(
|
|
201
|
+
'DELETE FROM node_metrics WHERE node_id IN (SELECT id FROM nodes WHERE file = ?)',
|
|
202
|
+
);
|
|
192
203
|
for (const relPath of removed) {
|
|
193
204
|
deleteEdgesForFile.run({ f: relPath });
|
|
205
|
+
deleteMetricsForFile.run(relPath);
|
|
194
206
|
deleteNodesForFile.run(relPath);
|
|
195
207
|
}
|
|
196
208
|
for (const item of changed) {
|
|
197
209
|
const relPath = item.relPath || normalizePath(path.relative(rootDir, item.file));
|
|
198
210
|
deleteEdgesForFile.run({ f: relPath });
|
|
211
|
+
deleteMetricsForFile.run(relPath);
|
|
199
212
|
deleteNodesForFile.run(relPath);
|
|
200
213
|
}
|
|
201
214
|
}
|
|
@@ -539,8 +552,39 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
539
552
|
});
|
|
540
553
|
buildEdges();
|
|
541
554
|
|
|
555
|
+
// Build line count map for structure metrics
|
|
556
|
+
const lineCountMap = new Map();
|
|
557
|
+
for (const [relPath] of fileSymbols) {
|
|
558
|
+
const absPath = path.join(rootDir, relPath);
|
|
559
|
+
try {
|
|
560
|
+
const content = fs.readFileSync(absPath, 'utf-8');
|
|
561
|
+
lineCountMap.set(relPath, content.split('\n').length);
|
|
562
|
+
} catch {
|
|
563
|
+
lineCountMap.set(relPath, 0);
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
// Build directory structure, containment edges, and metrics
|
|
568
|
+
const relDirs = new Set();
|
|
569
|
+
for (const absDir of discoveredDirs) {
|
|
570
|
+
relDirs.add(normalizePath(path.relative(rootDir, absDir)));
|
|
571
|
+
}
|
|
572
|
+
try {
|
|
573
|
+
const { buildStructure } = await import('./structure.js');
|
|
574
|
+
buildStructure(db, fileSymbols, rootDir, lineCountMap, relDirs);
|
|
575
|
+
} catch (err) {
|
|
576
|
+
debug(`Structure analysis failed: ${err.message}`);
|
|
577
|
+
}
|
|
578
|
+
|
|
542
579
|
const nodeCount = db.prepare('SELECT COUNT(*) as c FROM nodes').get().c;
|
|
543
580
|
console.log(`Graph built: ${nodeCount} nodes, ${edgeCount} edges`);
|
|
544
581
|
console.log(`Stored in ${dbPath}`);
|
|
545
582
|
db.close();
|
|
583
|
+
|
|
584
|
+
try {
|
|
585
|
+
const { registerRepo } = await import('./registry.js');
|
|
586
|
+
registerRepo(rootDir);
|
|
587
|
+
} catch (err) {
|
|
588
|
+
debug(`Auto-registration failed: ${err.message}`);
|
|
589
|
+
}
|
|
546
590
|
}
|
package/src/cli.js
CHANGED
|
@@ -19,6 +19,13 @@ import {
|
|
|
19
19
|
moduleMap,
|
|
20
20
|
queryName,
|
|
21
21
|
} from './queries.js';
|
|
22
|
+
import {
|
|
23
|
+
listRepos,
|
|
24
|
+
pruneRegistry,
|
|
25
|
+
REGISTRY_PATH,
|
|
26
|
+
registerRepo,
|
|
27
|
+
unregisterRepo,
|
|
28
|
+
} from './registry.js';
|
|
22
29
|
import { watchProject } from './watcher.js';
|
|
23
30
|
|
|
24
31
|
const program = new Command();
|
|
@@ -186,9 +193,81 @@ program
|
|
|
186
193
|
.command('mcp')
|
|
187
194
|
.description('Start MCP (Model Context Protocol) server for AI assistant integration')
|
|
188
195
|
.option('-d, --db <path>', 'Path to graph.db')
|
|
196
|
+
.option('--multi-repo', 'Enable access to all registered repositories')
|
|
197
|
+
.option('--repos <names>', 'Comma-separated list of allowed repo names (restricts access)')
|
|
189
198
|
.action(async (opts) => {
|
|
190
199
|
const { startMCPServer } = await import('./mcp.js');
|
|
191
|
-
|
|
200
|
+
const mcpOpts = {};
|
|
201
|
+
mcpOpts.multiRepo = opts.multiRepo || !!opts.repos;
|
|
202
|
+
if (opts.repos) {
|
|
203
|
+
mcpOpts.allowedRepos = opts.repos.split(',').map((s) => s.trim());
|
|
204
|
+
}
|
|
205
|
+
await startMCPServer(opts.db, mcpOpts);
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
// ─── Registry commands ──────────────────────────────────────────────────
|
|
209
|
+
|
|
210
|
+
const registry = program.command('registry').description('Manage the multi-repo project registry');
|
|
211
|
+
|
|
212
|
+
registry
|
|
213
|
+
.command('list')
|
|
214
|
+
.description('List all registered repositories')
|
|
215
|
+
.option('-j, --json', 'Output as JSON')
|
|
216
|
+
.action((opts) => {
|
|
217
|
+
const repos = listRepos();
|
|
218
|
+
if (opts.json) {
|
|
219
|
+
console.log(JSON.stringify(repos, null, 2));
|
|
220
|
+
} else if (repos.length === 0) {
|
|
221
|
+
console.log(`No repositories registered.\nRegistry: ${REGISTRY_PATH}`);
|
|
222
|
+
} else {
|
|
223
|
+
console.log(`Registered repositories (${REGISTRY_PATH}):\n`);
|
|
224
|
+
for (const r of repos) {
|
|
225
|
+
const dbExists = fs.existsSync(r.dbPath);
|
|
226
|
+
const status = dbExists ? '' : ' [DB missing]';
|
|
227
|
+
console.log(` ${r.name}${status}`);
|
|
228
|
+
console.log(` Path: ${r.path}`);
|
|
229
|
+
console.log(` DB: ${r.dbPath}`);
|
|
230
|
+
console.log();
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
registry
|
|
236
|
+
.command('add <dir>')
|
|
237
|
+
.description('Register a project directory')
|
|
238
|
+
.option('-n, --name <name>', 'Custom name (defaults to directory basename)')
|
|
239
|
+
.action((dir, opts) => {
|
|
240
|
+
const absDir = path.resolve(dir);
|
|
241
|
+
const { name, entry } = registerRepo(absDir, opts.name);
|
|
242
|
+
console.log(`Registered "${name}" → ${entry.path}`);
|
|
243
|
+
});
|
|
244
|
+
|
|
245
|
+
registry
|
|
246
|
+
.command('remove <name>')
|
|
247
|
+
.description('Unregister a repository by name')
|
|
248
|
+
.action((name) => {
|
|
249
|
+
const removed = unregisterRepo(name);
|
|
250
|
+
if (removed) {
|
|
251
|
+
console.log(`Removed "${name}" from registry.`);
|
|
252
|
+
} else {
|
|
253
|
+
console.error(`Repository "${name}" not found in registry.`);
|
|
254
|
+
process.exit(1);
|
|
255
|
+
}
|
|
256
|
+
});
|
|
257
|
+
|
|
258
|
+
registry
|
|
259
|
+
.command('prune')
|
|
260
|
+
.description('Remove registry entries whose directories no longer exist')
|
|
261
|
+
.action(() => {
|
|
262
|
+
const pruned = pruneRegistry();
|
|
263
|
+
if (pruned.length === 0) {
|
|
264
|
+
console.log('No stale entries found.');
|
|
265
|
+
} else {
|
|
266
|
+
for (const entry of pruned) {
|
|
267
|
+
console.log(`Pruned "${entry.name}" (${entry.path})`);
|
|
268
|
+
}
|
|
269
|
+
console.log(`\nRemoved ${pruned.length} stale ${pruned.length === 1 ? 'entry' : 'entries'}.`);
|
|
270
|
+
}
|
|
192
271
|
});
|
|
193
272
|
|
|
194
273
|
// ─── Embedding commands ─────────────────────────────────────────────────
|
|
@@ -244,6 +323,53 @@ program
|
|
|
244
323
|
});
|
|
245
324
|
});
|
|
246
325
|
|
|
326
|
+
program
|
|
327
|
+
.command('structure [dir]')
|
|
328
|
+
.description(
|
|
329
|
+
'Show project directory structure with hierarchy, cohesion scores, and per-file metrics',
|
|
330
|
+
)
|
|
331
|
+
.option('-d, --db <path>', 'Path to graph.db')
|
|
332
|
+
.option('--depth <n>', 'Max directory depth')
|
|
333
|
+
.option('--sort <metric>', 'Sort by: cohesion | fan-in | fan-out | density | files', 'files')
|
|
334
|
+
.option('-j, --json', 'Output as JSON')
|
|
335
|
+
.action(async (dir, opts) => {
|
|
336
|
+
const { structureData, formatStructure } = await import('./structure.js');
|
|
337
|
+
const data = structureData(opts.db, {
|
|
338
|
+
directory: dir,
|
|
339
|
+
depth: opts.depth ? parseInt(opts.depth, 10) : undefined,
|
|
340
|
+
sort: opts.sort,
|
|
341
|
+
});
|
|
342
|
+
if (opts.json) {
|
|
343
|
+
console.log(JSON.stringify(data, null, 2));
|
|
344
|
+
} else {
|
|
345
|
+
console.log(formatStructure(data));
|
|
346
|
+
}
|
|
347
|
+
});
|
|
348
|
+
|
|
349
|
+
program
|
|
350
|
+
.command('hotspots')
|
|
351
|
+
.description(
|
|
352
|
+
'Find structural hotspots: files or directories with extreme fan-in, fan-out, or symbol density',
|
|
353
|
+
)
|
|
354
|
+
.option('-d, --db <path>', 'Path to graph.db')
|
|
355
|
+
.option('-n, --limit <number>', 'Number of results', '10')
|
|
356
|
+
.option('--metric <metric>', 'fan-in | fan-out | density | coupling', 'fan-in')
|
|
357
|
+
.option('--level <level>', 'file | directory', 'file')
|
|
358
|
+
.option('-j, --json', 'Output as JSON')
|
|
359
|
+
.action(async (opts) => {
|
|
360
|
+
const { hotspotsData, formatHotspots } = await import('./structure.js');
|
|
361
|
+
const data = hotspotsData(opts.db, {
|
|
362
|
+
metric: opts.metric,
|
|
363
|
+
level: opts.level,
|
|
364
|
+
limit: parseInt(opts.limit, 10),
|
|
365
|
+
});
|
|
366
|
+
if (opts.json) {
|
|
367
|
+
console.log(JSON.stringify(data, null, 2));
|
|
368
|
+
} else {
|
|
369
|
+
console.log(formatHotspots(data));
|
|
370
|
+
}
|
|
371
|
+
});
|
|
372
|
+
|
|
247
373
|
program
|
|
248
374
|
.command('watch [dir]')
|
|
249
375
|
.description('Watch project for file changes and incrementally update the graph')
|
package/src/config.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
+
import { execFileSync } from 'node:child_process';
|
|
1
2
|
import fs from 'node:fs';
|
|
2
3
|
import path from 'node:path';
|
|
3
|
-
import { debug } from './logger.js';
|
|
4
|
+
import { debug, warn } from './logger.js';
|
|
4
5
|
|
|
5
6
|
export const CONFIG_FILES = ['.codegraphrc.json', '.codegraphrc', 'codegraph.config.json'];
|
|
6
7
|
|
|
@@ -18,6 +19,10 @@ export const DEFAULTS = {
|
|
|
18
19
|
defaultDepth: 3,
|
|
19
20
|
defaultLimit: 20,
|
|
20
21
|
},
|
|
22
|
+
embeddings: { model: 'minilm', llmProvider: null },
|
|
23
|
+
llm: { provider: null, model: null, baseUrl: null, apiKey: null, apiKeyCommand: null },
|
|
24
|
+
search: { defaultMinScore: 0.2, rrfK: 60, topK: 15 },
|
|
25
|
+
ci: { failOnCycles: false, impactThreshold: null },
|
|
21
26
|
};
|
|
22
27
|
|
|
23
28
|
/**
|
|
@@ -33,13 +38,50 @@ export function loadConfig(cwd) {
|
|
|
33
38
|
const raw = fs.readFileSync(filePath, 'utf-8');
|
|
34
39
|
const config = JSON.parse(raw);
|
|
35
40
|
debug(`Loaded config from ${filePath}`);
|
|
36
|
-
return mergeConfig(DEFAULTS, config);
|
|
41
|
+
return resolveSecrets(applyEnvOverrides(mergeConfig(DEFAULTS, config)));
|
|
37
42
|
} catch (err) {
|
|
38
43
|
debug(`Failed to parse config ${filePath}: ${err.message}`);
|
|
39
44
|
}
|
|
40
45
|
}
|
|
41
46
|
}
|
|
42
|
-
return { ...DEFAULTS };
|
|
47
|
+
return resolveSecrets(applyEnvOverrides({ ...DEFAULTS }));
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const ENV_LLM_MAP = {
|
|
51
|
+
CODEGRAPH_LLM_PROVIDER: 'provider',
|
|
52
|
+
CODEGRAPH_LLM_API_KEY: 'apiKey',
|
|
53
|
+
CODEGRAPH_LLM_MODEL: 'model',
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
export function applyEnvOverrides(config) {
|
|
57
|
+
for (const [envKey, field] of Object.entries(ENV_LLM_MAP)) {
|
|
58
|
+
if (process.env[envKey] !== undefined) {
|
|
59
|
+
config.llm[field] = process.env[envKey];
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
return config;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
export function resolveSecrets(config) {
|
|
66
|
+
const cmd = config.llm.apiKeyCommand;
|
|
67
|
+
if (typeof cmd !== 'string' || cmd.trim() === '') return config;
|
|
68
|
+
|
|
69
|
+
const parts = cmd.trim().split(/\s+/);
|
|
70
|
+
const [executable, ...args] = parts;
|
|
71
|
+
try {
|
|
72
|
+
const result = execFileSync(executable, args, {
|
|
73
|
+
encoding: 'utf-8',
|
|
74
|
+
timeout: 10_000,
|
|
75
|
+
maxBuffer: 64 * 1024,
|
|
76
|
+
stdio: ['ignore', 'pipe', 'pipe'],
|
|
77
|
+
}).trim();
|
|
78
|
+
if (result) {
|
|
79
|
+
config.llm.apiKey = result;
|
|
80
|
+
}
|
|
81
|
+
} catch (err) {
|
|
82
|
+
warn(`apiKeyCommand failed: ${err.message}`);
|
|
83
|
+
}
|
|
84
|
+
return config;
|
|
43
85
|
}
|
|
44
86
|
|
|
45
87
|
function mergeConfig(defaults, overrides) {
|
package/src/constants.js
CHANGED
|
@@ -20,8 +20,6 @@ export const IGNORE_DIRS = new Set([
|
|
|
20
20
|
'.env',
|
|
21
21
|
]);
|
|
22
22
|
|
|
23
|
-
// Re-export as an indirect binding to avoid TDZ in the circular
|
|
24
|
-
// parser.js ↔ constants.js import (no value read at evaluation time).
|
|
25
23
|
export { SUPPORTED_EXTENSIONS as EXTENSIONS };
|
|
26
24
|
|
|
27
25
|
export function shouldIgnore(dirName) {
|