@optave/codegraph 1.4.1 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +127 -40
- package/package.json +10 -10
- package/src/builder.js +61 -8
- package/src/cli.js +147 -5
- package/src/config.js +1 -1
- package/src/constants.js +0 -2
- package/src/cycles.js +2 -2
- package/src/db.js +13 -0
- package/src/embedder.js +3 -3
- package/src/export.js +44 -9
- package/src/extractors/csharp.js +243 -0
- package/src/extractors/go.js +167 -0
- package/src/extractors/hcl.js +73 -0
- package/src/extractors/helpers.js +10 -0
- package/src/extractors/index.js +9 -0
- package/src/extractors/java.js +227 -0
- package/src/extractors/javascript.js +396 -0
- package/src/extractors/php.js +237 -0
- package/src/extractors/python.js +143 -0
- package/src/extractors/ruby.js +185 -0
- package/src/extractors/rust.js +215 -0
- package/src/index.js +22 -0
- package/src/mcp.js +141 -6
- package/src/parser.js +29 -1893
- package/src/queries.js +190 -4
- package/src/registry.js +162 -0
- package/src/resolve.js +4 -3
- package/src/structure.js +491 -0
- package/src/watcher.js +2 -2
package/README.md
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
<h1 align="center">codegraph</h1>
|
|
6
6
|
|
|
7
7
|
<p align="center">
|
|
8
|
-
<strong>
|
|
8
|
+
<strong>Always-fresh code intelligence for AI agents — sub-second incremental rebuilds, zero-cost by default, optionally enhanced with your LLM.</strong>
|
|
9
9
|
</p>
|
|
10
10
|
|
|
11
11
|
<p align="center">
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
<a href="https://github.com/optave/codegraph/blob/main/LICENSE"><img src="https://img.shields.io/github/license/optave/codegraph?style=flat-square&logo=opensourceinitiative&logoColor=white" alt="Apache-2.0 License" /></a>
|
|
14
14
|
<a href="https://github.com/optave/codegraph/actions"><img src="https://img.shields.io/github/actions/workflow/status/optave/codegraph/codegraph-impact.yml?style=flat-square&logo=githubactions&logoColor=white&label=CI" alt="CI" /></a>
|
|
15
15
|
<img src="https://img.shields.io/badge/node-%3E%3D20-339933?style=flat-square&logo=node.js&logoColor=white" alt="Node >= 20" />
|
|
16
|
-
<img src="https://img.shields.io/badge/
|
|
16
|
+
<img src="https://img.shields.io/badge/graph-always%20fresh-brightgreen?style=flat-square&logo=shield&logoColor=white" alt="Always Fresh" />
|
|
17
17
|
</p>
|
|
18
18
|
|
|
19
19
|
<p align="center">
|
|
@@ -31,9 +31,35 @@
|
|
|
31
31
|
|
|
32
32
|
---
|
|
33
33
|
|
|
34
|
-
> **
|
|
34
|
+
> **The code graph that keeps up with your commits.**
|
|
35
35
|
>
|
|
36
|
-
> Codegraph
|
|
36
|
+
> Codegraph parses your codebase with [tree-sitter](https://tree-sitter.github.io/) (native Rust or WASM), builds a function-level dependency graph in SQLite, and keeps it current with sub-second incremental rebuilds. Every query runs locally — no API keys, no Docker, no setup. When you want deeper intelligence, bring your own LLM provider and codegraph enhances search and analysis through the same API you already use. Your code only goes where you choose to send it.
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
## 🔄 Why most code graph tools can't keep up with your commits
|
|
41
|
+
|
|
42
|
+
If you use a code graph with an AI agent, the graph needs to be **current**. A stale graph gives the agent wrong answers — deleted functions still show up, new dependencies are invisible, impact analysis misses the code you just wrote. The graph should rebuild on every commit, ideally on every save.
|
|
43
|
+
|
|
44
|
+
Most tools in this space can't do that:
|
|
45
|
+
|
|
46
|
+
| Problem | Who has it | Why it breaks on every commit |
|
|
47
|
+
|---|---|---|
|
|
48
|
+
| **Full re-index on every change** | code-graph-rag, CodeMCP, axon, joern, cpg, GitNexus | No file-level change tracking. Change one file → re-parse and re-insert the entire codebase. On a 3,000-file project, that's 30+ seconds per commit minimum |
|
|
49
|
+
| **Cloud API calls baked into the pipeline** | code-graph-rag, CodeRAG | Embeddings are generated through cloud APIs (OpenAI, Voyage AI, Gemini). Every rebuild = API round-trips for every function. Slow, expensive, and rate-limited. You can't put this in a commit hook |
|
|
50
|
+
| **Heavy infrastructure that's slow to restart** | code-graph-rag (Memgraph), axon (KuzuDB), badger-graph (Dgraph) | External databases add latency to every write. Bulk-inserting a full graph into Memgraph is not a sub-second operation |
|
|
51
|
+
| **No persistence between runs** | pyan, cflow | Re-parse from scratch every time. No database, no delta, no incremental anything |
|
|
52
|
+
|
|
53
|
+
**Codegraph solves this with incremental builds:**
|
|
54
|
+
|
|
55
|
+
1. Every file gets an MD5 hash stored in SQLite
|
|
56
|
+
2. On rebuild, only files whose hash changed get re-parsed
|
|
57
|
+
3. Stale nodes and edges for changed files are cleaned, then re-inserted
|
|
58
|
+
4. Everything else is untouched
|
|
59
|
+
|
|
60
|
+
**Result:** change one file in a 3,000-file project → rebuild completes in **under a second**. Put it in a commit hook, a file watcher, or let your AI agent trigger it. The graph is always current.
|
|
61
|
+
|
|
62
|
+
And because the core pipeline is pure local computation (tree-sitter + SQLite), there are no API calls, no network latency, and no cost. LLM-powered features (semantic search, richer embeddings) are a separate optional layer — they enhance the graph but never block it from being current.
|
|
37
63
|
|
|
38
64
|
---
|
|
39
65
|
|
|
@@ -41,54 +67,91 @@
|
|
|
41
67
|
|
|
42
68
|
<sub>Comparison last verified: February 2026</sub>
|
|
43
69
|
|
|
44
|
-
Most
|
|
70
|
+
Most code graph tools make you choose: **fast local analysis with no AI, or powerful AI features that require full re-indexing through cloud APIs on every change.** Codegraph gives you both — a graph that rebuilds in milliseconds on every commit, with optional LLM enhancement through the provider you're already using.
|
|
45
71
|
|
|
46
72
|
### Feature comparison
|
|
47
73
|
|
|
48
|
-
| Capability | codegraph |
|
|
49
|
-
|
|
50
|
-
| Function-level analysis | **Yes** |
|
|
51
|
-
| Multi-language | **
|
|
52
|
-
| Semantic search | **Yes** | — |
|
|
53
|
-
| MCP / AI agent support | **Yes** | — |
|
|
54
|
-
| Git diff impact | **Yes** | — | — | — |
|
|
55
|
-
|
|
|
56
|
-
|
|
|
57
|
-
|
|
|
58
|
-
|
|
|
59
|
-
|
|
|
60
|
-
|
|
|
61
|
-
|
|
|
74
|
+
| Capability | codegraph | [joern](https://github.com/joernio/joern) | [narsil-mcp](https://github.com/postrv/narsil-mcp) | [code-graph-rag](https://github.com/vitali87/code-graph-rag) | [cpg](https://github.com/Fraunhofer-AISEC/cpg) | [GitNexus](https://github.com/abhigyanpatwari/GitNexus) | [CodeMCP](https://github.com/SimplyLiz/CodeMCP) | [axon](https://github.com/harshkedia177/axon) |
|
|
75
|
+
|---|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|
|
|
76
|
+
| Function-level analysis | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** |
|
|
77
|
+
| Multi-language | **11** | **14** | **32** | Multi | **~10** | **9** | SCIP langs | Few |
|
|
78
|
+
| Semantic search | **Yes** | — | **Yes** | **Yes** | — | **Yes** | — | — |
|
|
79
|
+
| MCP / AI agent support | **Yes** | — | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | — |
|
|
80
|
+
| Git diff impact | **Yes** | — | — | — | — | **Yes** | — | **Yes** |
|
|
81
|
+
| Watch mode | **Yes** | — | **Yes** | — | — | — | — | — |
|
|
82
|
+
| Cycle detection | **Yes** | — | **Yes** | — | — | — | — | **Yes** |
|
|
83
|
+
| Incremental rebuilds | **Yes** | — | **Yes** | — | — | — | — | — |
|
|
84
|
+
| Zero config | **Yes** | — | **Yes** | — | — | — | — | — |
|
|
85
|
+
| Embeddable JS library (`npm install`) | **Yes** | — | — | — | — | — | — | — |
|
|
86
|
+
| LLM-optional (works without API keys) | **Yes** | **Yes** | **Yes** | — | **Yes** | **Yes** | **Yes** | **Yes** |
|
|
87
|
+
| Commercial use allowed | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | — | — | — |
|
|
88
|
+
| Open source | **Yes** | Yes | Yes | Yes | Yes | Yes | Custom | — |
|
|
62
89
|
|
|
63
90
|
### What makes codegraph different
|
|
64
91
|
|
|
65
92
|
| | Differentiator | In practice |
|
|
66
93
|
|---|---|---|
|
|
94
|
+
| **⚡** | **Always-fresh graph** | Sub-second incremental rebuilds via file-hash tracking. Run on every commit, every save, in watch mode — the graph is never stale. Competitors re-index everything from scratch |
|
|
95
|
+
| **🔓** | **Zero-cost core, LLM-enhanced when you want** | Full graph analysis with no API keys, no accounts, no cost. Optionally bring your own LLM provider for richer embeddings and AI-powered search — your code only goes to the provider you already chose |
|
|
67
96
|
| **🔬** | **Function-level, not just files** | Traces `handleAuth()` → `validateToken()` → `decryptJWT()` and shows 14 callers across 9 files break if `decryptJWT` changes |
|
|
68
|
-
|
|
|
69
|
-
|
|
|
97
|
+
| **🤖** | **Built for AI agents** | 13-tool [MCP server](https://modelcontextprotocol.io/) — AI assistants query your graph directly. Single-repo by default, your code doesn't leak to other projects |
|
|
98
|
+
| **🌐** | **Multi-language, one CLI** | JS/TS + Python + Go + Rust + Java + C# + PHP + Ruby + HCL in a single graph — no juggling Madge, pyan, and cflow |
|
|
70
99
|
| **💥** | **Git diff impact** | `codegraph diff-impact` shows changed functions, their callers, and full blast radius — ships with a GitHub Actions workflow |
|
|
71
|
-
|
|
|
72
|
-
| **⚡** | **Build once, query instantly** | SQLite-backed — build in ~30s, every query under 100ms. Native Rust engine with WASM fallback. Most competitors re-parse every run |
|
|
73
|
-
| **🧠** | **Semantic search** | `codegraph search "handle auth"` uses local embeddings — multi-query with RRF ranking via `"auth; token; JWT"` |
|
|
100
|
+
| **🧠** | **Semantic search** | Local embeddings by default, LLM-powered embeddings when opted in — multi-query with RRF ranking via `"auth; token; JWT"` |
|
|
74
101
|
|
|
75
102
|
### How other tools compare
|
|
76
103
|
|
|
77
|
-
|
|
104
|
+
The key question is: **can you rebuild your graph on every commit in a large codebase without it costing money or taking minutes?** Most tools in this space either re-index everything from scratch (slow), require cloud API calls for core features (costly), or both. Codegraph's incremental builds keep the graph current in milliseconds — and the core pipeline needs no API keys at all. LLM-powered features are opt-in, using whichever provider you already work with.
|
|
78
105
|
|
|
79
|
-
| Tool | What it does well |
|
|
106
|
+
| Tool | What it does well | The tradeoff |
|
|
80
107
|
|---|---|---|
|
|
108
|
+
| [joern](https://github.com/joernio/joern) | Full CPG (AST + CFG + PDG) for vulnerability discovery, Scala query DSL, 14 languages, daily releases | No incremental builds — full re-parse on every change. Requires JDK 21, no built-in MCP, no watch mode |
|
|
109
|
+
| [narsil-mcp](https://github.com/postrv/narsil-mcp) | 90 MCP tools, 32 languages, taint analysis, SBOM, dead code, neural search, Merkle-tree incremental indexing, single ~30MB binary | Primarily MCP-only — no standalone CLI query interface. Neural search requires API key or ONNX source build |
|
|
110
|
+
| [code-graph-rag](https://github.com/vitali87/code-graph-rag) | Graph RAG with Memgraph, multi-provider AI, semantic search, code editing via AST | No incremental rebuilds — full re-index + re-embed through cloud APIs on every change. Requires Docker |
|
|
111
|
+
| [cpg](https://github.com/Fraunhofer-AISEC/cpg) | Formal Code Property Graph (AST + CFG + PDG + DFG), ~10 languages, MCP module, LLVM IR support, academic specifications | No incremental builds. Requires JVM + Gradle, no zero config, no watch mode |
|
|
112
|
+
| [GitNexus](https://github.com/abhigyanpatwari/GitNexus) | Knowledge graph with precomputed structural intelligence, 7 MCP tools, hybrid search (BM25 + semantic + RRF), clustering, process tracing | Full 6-phase pipeline re-run on changes. KuzuDB graph DB, browser mode limited to ~5,000 files. **PolyForm NC — no commercial use** |
|
|
113
|
+
| [CodeMCP](https://github.com/SimplyLiz/CodeMCP) | SCIP compiler-grade indexing, compound operations (83% token savings), secret scanning | No incremental builds. Custom license, requires SCIP toolchains per language |
|
|
114
|
+
| [axon](https://github.com/harshkedia177/axon) | 11-phase pipeline, KuzuDB, community detection, dead code, change coupling | Full pipeline re-run on changes. No license, Python-only, no MCP |
|
|
81
115
|
| [Madge](https://github.com/pahen/madge) | Simple file-level JS/TS dependency graphs | No function-level analysis, no impact tracing, JS/TS only |
|
|
82
116
|
| [dependency-cruiser](https://github.com/sverweij/dependency-cruiser) | Architectural rule validation for JS/TS | Module-level only (function-level explicitly out of scope), requires config |
|
|
83
|
-
| [Skott](https://github.com/antoine-music/skott) | Module graph with unused code detection | File-level only, JS/TS only, no persistent database |
|
|
84
117
|
| [Nx graph](https://nx.dev/) | Monorepo project-level dependency graph | Requires Nx workspace, project-level only (not file or function) |
|
|
85
|
-
| [Sourcetrail](https://github.com/CoatiSoftware/Sourcetrail) | Rich GUI with symbol-level graphs | Archived/discontinued (2021), no JS/TS, no CLI |
|
|
86
|
-
| [Sourcegraph](https://sourcegraph.com/) | Enterprise code search and navigation | Cloud/SaaS — code sent to servers, $19+/user/mo, no longer open source |
|
|
87
|
-
| [CodeSee](https://www.codesee.io/) | Visual codebase maps | Cloud-based — code leaves your machine, acquired by GitKraken |
|
|
88
|
-
| [Understand](https://scitools.com/) | Deep multi-language static analysis | $100+/month per seat, proprietary, GUI-only, no CI or AI integration |
|
|
89
|
-
| [Snyk Code](https://snyk.io/) | AI-powered security scanning | Cloud-based — code sent to Snyk servers for analysis, not a dependency graph tool |
|
|
90
118
|
| [pyan](https://github.com/Technologicat/pyan) / [cflow](https://www.gnu.org/software/cflow/) | Function-level call graphs | Single-language each (Python / C only), no persistence, no queries |
|
|
91
119
|
|
|
120
|
+
### Codegraph vs. Narsil-MCP: How to Decide
|
|
121
|
+
|
|
122
|
+
If you are looking for local code intelligence over MCP, the closest alternative to `codegraph` is [postrv/narsil-mcp](https://github.com/postrv/narsil-mcp). Both projects aim to give AI agents deep context about your codebase, but they approach the problem with fundamentally different philosophies.
|
|
123
|
+
|
|
124
|
+
Here is a cold, analytical breakdown to help you decide which tool fits your workflow.
|
|
125
|
+
|
|
126
|
+
#### The Core Difference
|
|
127
|
+
|
|
128
|
+
* **Codegraph is a surgical scalpel.** It does one thing exceptionally well: building an always-fresh, function-level dependency graph in SQLite and exposing it to AI agents with zero fluff.
|
|
129
|
+
* **Narsil-MCP is a Swiss Army knife.** It is a sprawling, "batteries-included" intelligence server that includes everything from taint analysis and SBOM generation to SPARQL knowledge graphs.
|
|
130
|
+
|
|
131
|
+
#### Feature Comparison
|
|
132
|
+
|
|
133
|
+
| Aspect | Optave Codegraph | Narsil-MCP |
|
|
134
|
+
| :--- | :--- | :--- |
|
|
135
|
+
| **Philosophy** | Lean, deterministic, AI-optimized | Comprehensive, feature-dense |
|
|
136
|
+
| **AI Tool Count** | 13 focused tools | 90 distinct tools |
|
|
137
|
+
| **Language Support** | 11 languages | 32 languages |
|
|
138
|
+
| **Primary Interface** | CLI-first with MCP integration | MCP-first (CLI is secondary) |
|
|
139
|
+
| **Supply Chain Risk** | Low (minimal dependency tree) | Higher (requires massive dependency graph for embedded ML/scanners) |
|
|
140
|
+
| **Graph Updates** | Sub-second incremental (file-hash) | Parallel re-indexing / Merkle trees |
|
|
141
|
+
|
|
142
|
+
#### Choose Codegraph if:
|
|
143
|
+
|
|
144
|
+
* **You want to optimize AI agent reasoning.** Large Language Models degrade in performance and hallucinate when overwhelmed with choices. Codegraph’s tight 13-tool surface area ensures agents quickly understand their capabilities without wasting context window tokens.
|
|
145
|
+
* **You are concerned about supply chain attacks.** To support 90 tools, SBOMs, and neural embeddings, a tool must pull in a massive dependency tree. Codegraph keeps its dependencies minimal, dramatically reducing the risk of malicious code sneaking onto your machine.
|
|
146
|
+
* **You want deterministic blast-radius checks.** Features like `diff-impact` are built specifically to tell you exactly how a changed function cascades through your codebase before you merge a PR.
|
|
147
|
+
* **You value a strong standalone CLI.** You want to query your code graph locally without necessarily spinning up an AI agent.
|
|
148
|
+
|
|
149
|
+
#### Choose Narsil-MCP if:
|
|
150
|
+
|
|
151
|
+
* **You want security and code intelligence together.** You dont want a separated MCP for security and prefer an 'all-in-one solution.
|
|
152
|
+
* **You use niche languages.** Your codebase relies heavily on languages outside of Codegraph's core 11 (e.g., Fortran, Erlang, Zig, Swift).
|
|
153
|
+
* **You are willing to manage tool presets.** Because 90 tools will overload an AI's context window, you don't mind manually configuring preset files (like "Minimal" or "Balanced") to restrict what the AI can see depending on your editor.
|
|
154
|
+
|
|
92
155
|
---
|
|
93
156
|
|
|
94
157
|
## 🚀 Quick Start
|
|
@@ -127,8 +190,8 @@ codegraph deps src/index.ts # file-level import/export map
|
|
|
127
190
|
| 📤 | **Export** | DOT (Graphviz), Mermaid, and JSON graph export |
|
|
128
191
|
| 🧠 | **Semantic search** | Embeddings-powered natural language search with multi-query RRF ranking |
|
|
129
192
|
| 👀 | **Watch mode** | Incrementally update the graph as files change |
|
|
130
|
-
| 🤖 | **MCP server** |
|
|
131
|
-
| 🔒 | **
|
|
193
|
+
| 🤖 | **MCP server** | 13-tool MCP server for AI assistants; single-repo by default, opt-in multi-repo |
|
|
194
|
+
| 🔒 | **Your code, your choice** | Zero-cost core with no API keys. Optionally enhance with your LLM provider — your code only goes where you send it |
|
|
132
195
|
|
|
133
196
|
## 📦 Commands
|
|
134
197
|
|
|
@@ -212,12 +275,30 @@ A single trailing semicolon is ignored (falls back to single-query mode). The `-
|
|
|
212
275
|
|
|
213
276
|
The model used during `embed` is stored in the database, so `search` auto-detects it — no need to pass `--model` when searching.
|
|
214
277
|
|
|
278
|
+
### Multi-Repo Registry
|
|
279
|
+
|
|
280
|
+
Manage a global registry of codegraph-enabled projects. The registry stores paths to your built graphs so the MCP server can query them when multi-repo mode is enabled.
|
|
281
|
+
|
|
282
|
+
```bash
|
|
283
|
+
codegraph registry list # List all registered repos
|
|
284
|
+
codegraph registry list --json # JSON output
|
|
285
|
+
codegraph registry add <dir> # Register a project directory
|
|
286
|
+
codegraph registry add <dir> -n my-name # Custom name
|
|
287
|
+
codegraph registry remove <name> # Unregister
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
`codegraph build` auto-registers the project — no manual setup needed.
|
|
291
|
+
|
|
215
292
|
### AI Integration
|
|
216
293
|
|
|
217
294
|
```bash
|
|
218
|
-
codegraph mcp # Start MCP server
|
|
295
|
+
codegraph mcp # Start MCP server (single-repo, current project only)
|
|
296
|
+
codegraph mcp --multi-repo # Enable access to all registered repos
|
|
297
|
+
codegraph mcp --repos a,b # Restrict to specific repos (implies --multi-repo)
|
|
219
298
|
```
|
|
220
299
|
|
|
300
|
+
By default, the MCP server only exposes the local project's graph. AI agents cannot access other repositories unless you explicitly opt in with `--multi-repo` or `--repos`.
|
|
301
|
+
|
|
221
302
|
### Common Flags
|
|
222
303
|
|
|
223
304
|
| Flag | Description |
|
|
@@ -228,7 +309,7 @@ codegraph mcp # Start MCP server for AI assistants
|
|
|
228
309
|
| `-j, --json` | Output as JSON |
|
|
229
310
|
| `-v, --verbose` | Enable debug output |
|
|
230
311
|
| `--engine <engine>` | Parser engine: `native`, `wasm`, or `auto` (default: `auto`) |
|
|
231
|
-
| `-k, --kind <kind>` | Filter by kind: `function`, `method`, `class` (search) |
|
|
312
|
+
| `-k, --kind <kind>` | Filter by kind: `function`, `method`, `class`, `struct`, `enum`, `trait`, `record`, `module` (search) |
|
|
232
313
|
| `--file <pattern>` | Filter by file path pattern (search) |
|
|
233
314
|
| `--rrf-k <n>` | RRF smoothing constant for multi-query search (default 60) |
|
|
234
315
|
|
|
@@ -309,12 +390,18 @@ Benchmarked on a ~3,200-file TypeScript project:
|
|
|
309
390
|
|
|
310
391
|
### MCP Server
|
|
311
392
|
|
|
312
|
-
Codegraph includes a built-in [Model Context Protocol](https://modelcontextprotocol.io/) server, so AI assistants can query your dependency graph directly:
|
|
393
|
+
Codegraph includes a built-in [Model Context Protocol](https://modelcontextprotocol.io/) server with 13 tools, so AI assistants can query your dependency graph directly:
|
|
313
394
|
|
|
314
395
|
```bash
|
|
315
|
-
codegraph mcp
|
|
396
|
+
codegraph mcp # Single-repo mode (default) — only local project
|
|
397
|
+
codegraph mcp --multi-repo # Multi-repo — all registered repos accessible
|
|
398
|
+
codegraph mcp --repos a,b # Multi-repo with allowlist
|
|
316
399
|
```
|
|
317
400
|
|
|
401
|
+
**Single-repo mode (default):** Tools operate only on the local `.codegraph/graph.db`. The `repo` parameter and `list_repos` tool are not exposed to the AI agent.
|
|
402
|
+
|
|
403
|
+
**Multi-repo mode (`--multi-repo`):** All tools gain an optional `repo` parameter to target any registered repository, and `list_repos` becomes available. Use `--repos` to restrict which repos the agent can access.
|
|
404
|
+
|
|
318
405
|
### CLAUDE.md / Agent Instructions
|
|
319
406
|
|
|
320
407
|
Add this to your project's `CLAUDE.md` to help AI agents use codegraph:
|
|
@@ -467,7 +554,7 @@ const { results: fused } = await multiSearchData(
|
|
|
467
554
|
See **[ROADMAP.md](ROADMAP.md)** for the full development roadmap. Current plan:
|
|
468
555
|
|
|
469
556
|
1. ~~**Rust Core**~~ — **Complete** (v1.3.0) — native tree-sitter parsing via napi-rs, parallel multi-core parsing, incremental re-parsing, import resolution & cycle detection in Rust
|
|
470
|
-
2. ~~**Foundation Hardening**~~ — **Complete** (v1.4.0) — parser registry,
|
|
557
|
+
2. ~~**Foundation Hardening**~~ — **Complete** (v1.4.0) — parser registry, 12-tool MCP server with multi-repo support, test coverage 62%→75%, `apiKeyCommand` secret resolution, global repo registry
|
|
471
558
|
3. **Intelligent Embeddings** — LLM-generated descriptions, hybrid search
|
|
472
559
|
4. **Natural Language Queries** — `codegraph ask` command, conversational sessions
|
|
473
560
|
5. **Expanded Language Support** — 8 new languages (12 → 20)
|
|
@@ -494,5 +581,5 @@ Looking to add a new language? Check out **[Adding a New Language](docs/adding-a
|
|
|
494
581
|
---
|
|
495
582
|
|
|
496
583
|
<p align="center">
|
|
497
|
-
<sub>Built with <a href="https://tree-sitter.github.io/">tree-sitter</a> and <a href="https://github.com/WiseLibs/better-sqlite3">better-sqlite3</a>.
|
|
584
|
+
<sub>Built with <a href="https://tree-sitter.github.io/">tree-sitter</a> and <a href="https://github.com/WiseLibs/better-sqlite3">better-sqlite3</a>. Your code only goes where you choose to send it.</sub>
|
|
498
585
|
</p>
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@optave/codegraph",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "2.1.0",
|
|
4
4
|
"description": "Local code graph CLI — parse codebases with tree-sitter, build dependency graphs, query them",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/index.js",
|
|
@@ -29,11 +29,11 @@
|
|
|
29
29
|
"lint": "biome check src/ tests/",
|
|
30
30
|
"lint:fix": "biome check --write src/ tests/",
|
|
31
31
|
"format": "biome format --write src/ tests/",
|
|
32
|
-
"prepare": "npm run build:wasm && husky",
|
|
32
|
+
"prepare": "npm run build:wasm && husky && npm run deps:tree",
|
|
33
|
+
"deps:tree": "node scripts/gen-deps.cjs",
|
|
33
34
|
"release": "commit-and-tag-version",
|
|
34
35
|
"release:dry-run": "commit-and-tag-version --dry-run",
|
|
35
|
-
"version": "node scripts/sync-native-versions.js && git add package.json"
|
|
36
|
-
"prepublishOnly": "npm test"
|
|
36
|
+
"version": "node scripts/sync-native-versions.js && git add package.json"
|
|
37
37
|
},
|
|
38
38
|
"keywords": [
|
|
39
39
|
"codegraph",
|
|
@@ -61,19 +61,19 @@
|
|
|
61
61
|
"optionalDependencies": {
|
|
62
62
|
"@huggingface/transformers": "^3.8.1",
|
|
63
63
|
"@modelcontextprotocol/sdk": "^1.0.0",
|
|
64
|
-
"@optave/codegraph-darwin-arm64": "1.
|
|
65
|
-
"@optave/codegraph-darwin-x64": "1.
|
|
66
|
-
"@optave/codegraph-linux-x64-gnu": "1.
|
|
67
|
-
"@optave/codegraph-win32-x64-msvc": "1.
|
|
64
|
+
"@optave/codegraph-darwin-arm64": "2.1.0",
|
|
65
|
+
"@optave/codegraph-darwin-x64": "2.1.0",
|
|
66
|
+
"@optave/codegraph-linux-x64-gnu": "2.1.0",
|
|
67
|
+
"@optave/codegraph-win32-x64-msvc": "2.1.0"
|
|
68
68
|
},
|
|
69
69
|
"devDependencies": {
|
|
70
70
|
"@biomejs/biome": "^2.4.4",
|
|
71
71
|
"@commitlint/cli": "^19.8",
|
|
72
72
|
"@commitlint/config-conventional": "^19.8",
|
|
73
|
-
"commit-and-tag-version": "^12.5",
|
|
74
|
-
"husky": "^9.1",
|
|
75
73
|
"@tree-sitter-grammars/tree-sitter-hcl": "^1.2.0",
|
|
76
74
|
"@vitest/coverage-v8": "^4.0.18",
|
|
75
|
+
"commit-and-tag-version": "^12.5",
|
|
76
|
+
"husky": "^9.1",
|
|
77
77
|
"tree-sitter-c-sharp": "^0.23.1",
|
|
78
78
|
"tree-sitter-cli": "^0.26.5",
|
|
79
79
|
"tree-sitter-go": "^0.23.4",
|
package/src/builder.js
CHANGED
|
@@ -1,27 +1,30 @@
|
|
|
1
1
|
import { createHash } from 'node:crypto';
|
|
2
2
|
import fs from 'node:fs';
|
|
3
|
+
import os from 'node:os';
|
|
3
4
|
import path from 'node:path';
|
|
4
5
|
import { loadConfig } from './config.js';
|
|
5
6
|
import { EXTENSIONS, IGNORE_DIRS, normalizePath } from './constants.js';
|
|
6
7
|
import { initSchema, openDb } from './db.js';
|
|
7
|
-
import { warn } from './logger.js';
|
|
8
|
+
import { debug, warn } from './logger.js';
|
|
8
9
|
import { getActiveEngine, parseFilesAuto } from './parser.js';
|
|
9
10
|
import { computeConfidence, resolveImportPath, resolveImportsBatch } from './resolve.js';
|
|
10
11
|
|
|
11
12
|
export { resolveImportPath } from './resolve.js';
|
|
12
13
|
|
|
13
|
-
export function collectFiles(dir, files = [], config = {}) {
|
|
14
|
+
export function collectFiles(dir, files = [], config = {}, directories = null) {
|
|
15
|
+
const trackDirs = directories !== null;
|
|
14
16
|
let entries;
|
|
15
17
|
try {
|
|
16
18
|
entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
17
19
|
} catch (err) {
|
|
18
20
|
warn(`Cannot read directory ${dir}: ${err.message}`);
|
|
19
|
-
return files;
|
|
21
|
+
return trackDirs ? { files, directories } : files;
|
|
20
22
|
}
|
|
21
23
|
|
|
22
24
|
// Merge config ignoreDirs with defaults
|
|
23
25
|
const extraIgnore = config.ignoreDirs ? new Set(config.ignoreDirs) : null;
|
|
24
26
|
|
|
27
|
+
let hasFiles = false;
|
|
25
28
|
for (const entry of entries) {
|
|
26
29
|
if (entry.name.startsWith('.') && entry.name !== '.') {
|
|
27
30
|
if (IGNORE_DIRS.has(entry.name)) continue;
|
|
@@ -32,12 +35,16 @@ export function collectFiles(dir, files = [], config = {}) {
|
|
|
32
35
|
|
|
33
36
|
const full = path.join(dir, entry.name);
|
|
34
37
|
if (entry.isDirectory()) {
|
|
35
|
-
collectFiles(full, files, config);
|
|
38
|
+
collectFiles(full, files, config, directories);
|
|
36
39
|
} else if (EXTENSIONS.has(path.extname(entry.name))) {
|
|
37
40
|
files.push(full);
|
|
41
|
+
hasFiles = true;
|
|
38
42
|
}
|
|
39
43
|
}
|
|
40
|
-
|
|
44
|
+
if (trackDirs && hasFiles) {
|
|
45
|
+
directories.add(dir);
|
|
46
|
+
}
|
|
47
|
+
return trackDirs ? { files, directories } : files;
|
|
41
48
|
}
|
|
42
49
|
|
|
43
50
|
export function loadPathAliases(rootDir) {
|
|
@@ -163,7 +170,9 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
163
170
|
);
|
|
164
171
|
}
|
|
165
172
|
|
|
166
|
-
const
|
|
173
|
+
const collected = collectFiles(rootDir, [], config, new Set());
|
|
174
|
+
const files = collected.files;
|
|
175
|
+
const discoveredDirs = collected.directories;
|
|
167
176
|
console.log(`Found ${files.length} files to parse`);
|
|
168
177
|
|
|
169
178
|
// Check for incremental build
|
|
@@ -179,23 +188,28 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
179
188
|
|
|
180
189
|
if (isFullBuild) {
|
|
181
190
|
db.exec(
|
|
182
|
-
'PRAGMA foreign_keys = OFF; DELETE FROM edges; DELETE FROM nodes; PRAGMA foreign_keys = ON;',
|
|
191
|
+
'PRAGMA foreign_keys = OFF; DELETE FROM node_metrics; DELETE FROM edges; DELETE FROM nodes; PRAGMA foreign_keys = ON;',
|
|
183
192
|
);
|
|
184
193
|
} else {
|
|
185
194
|
console.log(`Incremental: ${changed.length} changed, ${removed.length} removed`);
|
|
186
|
-
// Remove
|
|
195
|
+
// Remove metrics/edges/nodes for changed and removed files
|
|
187
196
|
const deleteNodesForFile = db.prepare('DELETE FROM nodes WHERE file = ?');
|
|
188
197
|
const deleteEdgesForFile = db.prepare(`
|
|
189
198
|
DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = @f)
|
|
190
199
|
OR target_id IN (SELECT id FROM nodes WHERE file = @f)
|
|
191
200
|
`);
|
|
201
|
+
const deleteMetricsForFile = db.prepare(
|
|
202
|
+
'DELETE FROM node_metrics WHERE node_id IN (SELECT id FROM nodes WHERE file = ?)',
|
|
203
|
+
);
|
|
192
204
|
for (const relPath of removed) {
|
|
193
205
|
deleteEdgesForFile.run({ f: relPath });
|
|
206
|
+
deleteMetricsForFile.run(relPath);
|
|
194
207
|
deleteNodesForFile.run(relPath);
|
|
195
208
|
}
|
|
196
209
|
for (const item of changed) {
|
|
197
210
|
const relPath = item.relPath || normalizePath(path.relative(rootDir, item.file));
|
|
198
211
|
deleteEdgesForFile.run({ f: relPath });
|
|
212
|
+
deleteMetricsForFile.run(relPath);
|
|
199
213
|
deleteNodesForFile.run(relPath);
|
|
200
214
|
}
|
|
201
215
|
}
|
|
@@ -539,8 +553,47 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
539
553
|
});
|
|
540
554
|
buildEdges();
|
|
541
555
|
|
|
556
|
+
// Build line count map for structure metrics
|
|
557
|
+
const lineCountMap = new Map();
|
|
558
|
+
for (const [relPath] of fileSymbols) {
|
|
559
|
+
const absPath = path.join(rootDir, relPath);
|
|
560
|
+
try {
|
|
561
|
+
const content = fs.readFileSync(absPath, 'utf-8');
|
|
562
|
+
lineCountMap.set(relPath, content.split('\n').length);
|
|
563
|
+
} catch {
|
|
564
|
+
lineCountMap.set(relPath, 0);
|
|
565
|
+
}
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
// Build directory structure, containment edges, and metrics
|
|
569
|
+
const relDirs = new Set();
|
|
570
|
+
for (const absDir of discoveredDirs) {
|
|
571
|
+
relDirs.add(normalizePath(path.relative(rootDir, absDir)));
|
|
572
|
+
}
|
|
573
|
+
try {
|
|
574
|
+
const { buildStructure } = await import('./structure.js');
|
|
575
|
+
buildStructure(db, fileSymbols, rootDir, lineCountMap, relDirs);
|
|
576
|
+
} catch (err) {
|
|
577
|
+
debug(`Structure analysis failed: ${err.message}`);
|
|
578
|
+
}
|
|
579
|
+
|
|
542
580
|
const nodeCount = db.prepare('SELECT COUNT(*) as c FROM nodes').get().c;
|
|
543
581
|
console.log(`Graph built: ${nodeCount} nodes, ${edgeCount} edges`);
|
|
544
582
|
console.log(`Stored in ${dbPath}`);
|
|
545
583
|
db.close();
|
|
584
|
+
|
|
585
|
+
if (!opts.skipRegistry) {
|
|
586
|
+
const tmpDir = path.resolve(os.tmpdir());
|
|
587
|
+
const resolvedRoot = path.resolve(rootDir);
|
|
588
|
+
if (resolvedRoot.startsWith(tmpDir)) {
|
|
589
|
+
debug(`Skipping auto-registration for temp directory: ${resolvedRoot}`);
|
|
590
|
+
} else {
|
|
591
|
+
try {
|
|
592
|
+
const { registerRepo } = await import('./registry.js');
|
|
593
|
+
registerRepo(rootDir);
|
|
594
|
+
} catch (err) {
|
|
595
|
+
debug(`Auto-registration failed: ${err.message}`);
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
}
|
|
546
599
|
}
|