@optave/codegraph 2.0.0 → 2.1.1-dev.3c12b64
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +58 -22
- package/package.json +10 -10
- package/src/builder.js +14 -5
- package/src/cli.js +24 -8
- package/src/config.js +1 -1
- package/src/embedder.js +3 -3
- package/src/extractors/csharp.js +243 -0
- package/src/extractors/go.js +167 -0
- package/src/extractors/hcl.js +73 -0
- package/src/extractors/helpers.js +10 -0
- package/src/extractors/index.js +9 -0
- package/src/extractors/java.js +227 -0
- package/src/extractors/javascript.js +396 -0
- package/src/extractors/php.js +237 -0
- package/src/extractors/python.js +143 -0
- package/src/extractors/ruby.js +185 -0
- package/src/extractors/rust.js +215 -0
- package/src/index.js +1 -0
- package/src/mcp.js +2 -1
- package/src/parser.js +27 -1890
- package/src/queries.js +190 -4
- package/src/registry.js +24 -7
- package/src/resolve.js +4 -3
package/README.md
CHANGED
|
@@ -45,10 +45,10 @@ Most tools in this space can't do that:
|
|
|
45
45
|
|
|
46
46
|
| Problem | Who has it | Why it breaks on every commit |
|
|
47
47
|
|---|---|---|
|
|
48
|
-
| **Full re-index on every change** | code-graph-rag, CodeMCP, axon,
|
|
49
|
-
| **Cloud API calls baked into the pipeline** | code-graph-rag,
|
|
48
|
+
| **Full re-index on every change** | code-graph-rag, CodeMCP, axon, joern, cpg, GitNexus | No file-level change tracking. Change one file → re-parse and re-insert the entire codebase. On a 3,000-file project, that's 30+ seconds per commit minimum |
|
|
49
|
+
| **Cloud API calls baked into the pipeline** | code-graph-rag, CodeRAG | Embeddings are generated through cloud APIs (OpenAI, Voyage AI, Gemini). Every rebuild = API round-trips for every function. Slow, expensive, and rate-limited. You can't put this in a commit hook |
|
|
50
50
|
| **Heavy infrastructure that's slow to restart** | code-graph-rag (Memgraph), axon (KuzuDB), badger-graph (Dgraph) | External databases add latency to every write. Bulk-inserting a full graph into Memgraph is not a sub-second operation |
|
|
51
|
-
| **No persistence between runs** |
|
|
51
|
+
| **No persistence between runs** | pyan, cflow | Re-parse from scratch every time. No database, no delta, no incremental anything |
|
|
52
52
|
|
|
53
53
|
**Codegraph solves this with incremental builds:**
|
|
54
54
|
|
|
@@ -71,20 +71,21 @@ Most code graph tools make you choose: **fast local analysis with no AI, or powe
|
|
|
71
71
|
|
|
72
72
|
### Feature comparison
|
|
73
73
|
|
|
74
|
-
| Capability | codegraph | [
|
|
74
|
+
| Capability | codegraph | [joern](https://github.com/joernio/joern) | [narsil-mcp](https://github.com/postrv/narsil-mcp) | [code-graph-rag](https://github.com/vitali87/code-graph-rag) | [cpg](https://github.com/Fraunhofer-AISEC/cpg) | [GitNexus](https://github.com/abhigyanpatwari/GitNexus) | [CodeMCP](https://github.com/SimplyLiz/CodeMCP) | [axon](https://github.com/harshkedia177/axon) |
|
|
75
75
|
|---|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|
|
|
76
|
-
| Function-level analysis | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** |
|
|
77
|
-
| Multi-language | **11** |
|
|
78
|
-
| Semantic search | **Yes** | **Yes** |
|
|
79
|
-
| MCP / AI agent support | **Yes** |
|
|
80
|
-
| Git diff impact | **Yes** | — | — | — | **Yes** | — |
|
|
81
|
-
| Watch mode | **Yes** | — |
|
|
82
|
-
|
|
|
83
|
-
|
|
|
84
|
-
|
|
|
85
|
-
|
|
|
86
|
-
| LLM-optional (works without API keys) | **Yes** |
|
|
87
|
-
|
|
|
76
|
+
| Function-level analysis | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** |
|
|
77
|
+
| Multi-language | **11** | **14** | **32** | Multi | **~10** | **9** | SCIP langs | Few |
|
|
78
|
+
| Semantic search | **Yes** | — | **Yes** | **Yes** | — | **Yes** | — | — |
|
|
79
|
+
| MCP / AI agent support | **Yes** | — | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | — |
|
|
80
|
+
| Git diff impact | **Yes** | — | — | — | — | **Yes** | — | **Yes** |
|
|
81
|
+
| Watch mode | **Yes** | — | **Yes** | — | — | — | — | — |
|
|
82
|
+
| Cycle detection | **Yes** | — | **Yes** | — | — | — | — | **Yes** |
|
|
83
|
+
| Incremental rebuilds | **Yes** | — | **Yes** | — | — | — | — | — |
|
|
84
|
+
| Zero config | **Yes** | — | **Yes** | — | — | — | — | — |
|
|
85
|
+
| Embeddable JS library (`npm install`) | **Yes** | — | — | — | — | — | — | — |
|
|
86
|
+
| LLM-optional (works without API keys) | **Yes** | **Yes** | **Yes** | — | **Yes** | **Yes** | **Yes** | **Yes** |
|
|
87
|
+
| Commercial use allowed | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | — | — | — |
|
|
88
|
+
| Open source | **Yes** | Yes | Yes | Yes | Yes | Yes | Custom | — |
|
|
88
89
|
|
|
89
90
|
### What makes codegraph different
|
|
90
91
|
|
|
@@ -104,18 +105,53 @@ The key question is: **can you rebuild your graph on every commit in a large cod
|
|
|
104
105
|
|
|
105
106
|
| Tool | What it does well | The tradeoff |
|
|
106
107
|
|---|---|---|
|
|
108
|
+
| [joern](https://github.com/joernio/joern) | Full CPG (AST + CFG + PDG) for vulnerability discovery, Scala query DSL, 14 languages, daily releases | No incremental builds — full re-parse on every change. Requires JDK 21, no built-in MCP, no watch mode |
|
|
109
|
+
| [narsil-mcp](https://github.com/postrv/narsil-mcp) | 90 MCP tools, 32 languages, taint analysis, SBOM, dead code, neural search, Merkle-tree incremental indexing, single ~30MB binary | Primarily MCP-only — no standalone CLI query interface. Neural search requires API key or ONNX source build |
|
|
107
110
|
| [code-graph-rag](https://github.com/vitali87/code-graph-rag) | Graph RAG with Memgraph, multi-provider AI, semantic search, code editing via AST | No incremental rebuilds — full re-index + re-embed through cloud APIs on every change. Requires Docker |
|
|
108
|
-
| [
|
|
111
|
+
| [cpg](https://github.com/Fraunhofer-AISEC/cpg) | Formal Code Property Graph (AST + CFG + PDG + DFG), ~10 languages, MCP module, LLVM IR support, academic specifications | No incremental builds. Requires JVM + Gradle, no zero config, no watch mode |
|
|
112
|
+
| [GitNexus](https://github.com/abhigyanpatwari/GitNexus) | Knowledge graph with precomputed structural intelligence, 7 MCP tools, hybrid search (BM25 + semantic + RRF), clustering, process tracing | Full 6-phase pipeline re-run on changes. KuzuDB graph DB, browser mode limited to ~5,000 files. **PolyForm NC — no commercial use** |
|
|
109
113
|
| [CodeMCP](https://github.com/SimplyLiz/CodeMCP) | SCIP compiler-grade indexing, compound operations (83% token savings), secret scanning | No incremental builds. Custom license, requires SCIP toolchains per language |
|
|
110
114
|
| [axon](https://github.com/harshkedia177/axon) | 11-phase pipeline, KuzuDB, community detection, dead code, change coupling | Full pipeline re-run on changes. No license, Python-only, no MCP |
|
|
111
|
-
| [autodev-codebase](https://github.com/anrgct/autodev-codebase) | 40+ languages, interactive Cytoscape.js visualization, LLM reranking | Re-embeds through cloud APIs on changes. No license, complex setup |
|
|
112
|
-
| [arbor](https://github.com/Anandb71/arbor) | Native GUI, confidence scoring, architectural role classification, fuzzy search | GUI-focused — no CLI pipeline, no watch mode, no CI integration |
|
|
113
|
-
| [Claude-code-memory](https://github.com/Durafen/Claude-code-memory) | Persistent codebase memory for Claude Code, Memory Guard quality gate | Requires Voyage AI (cloud) + Qdrant (Docker) for core features |
|
|
114
115
|
| [Madge](https://github.com/pahen/madge) | Simple file-level JS/TS dependency graphs | No function-level analysis, no impact tracing, JS/TS only |
|
|
115
116
|
| [dependency-cruiser](https://github.com/sverweij/dependency-cruiser) | Architectural rule validation for JS/TS | Module-level only (function-level explicitly out of scope), requires config |
|
|
116
117
|
| [Nx graph](https://nx.dev/) | Monorepo project-level dependency graph | Requires Nx workspace, project-level only (not file or function) |
|
|
117
118
|
| [pyan](https://github.com/Technologicat/pyan) / [cflow](https://www.gnu.org/software/cflow/) | Function-level call graphs | Single-language each (Python / C only), no persistence, no queries |
|
|
118
119
|
|
|
120
|
+
### Codegraph vs. Narsil-MCP: How to Decide
|
|
121
|
+
|
|
122
|
+
If you are looking for local code intelligence over MCP, the closest alternative to `codegraph` is [postrv/narsil-mcp](https://github.com/postrv/narsil-mcp). Both projects aim to give AI agents deep context about your codebase, but they approach the problem with fundamentally different philosophies.
|
|
123
|
+
|
|
124
|
+
Here is a cold, analytical breakdown to help you decide which tool fits your workflow.
|
|
125
|
+
|
|
126
|
+
#### The Core Difference
|
|
127
|
+
|
|
128
|
+
* **Codegraph is a surgical scalpel.** It does one thing exceptionally well: building an always-fresh, function-level dependency graph in SQLite and exposing it to AI agents with zero fluff.
|
|
129
|
+
* **Narsil-MCP is a Swiss Army knife.** It is a sprawling, "batteries-included" intelligence server that includes everything from taint analysis and SBOM generation to SPARQL knowledge graphs.
|
|
130
|
+
|
|
131
|
+
#### Feature Comparison
|
|
132
|
+
|
|
133
|
+
| Aspect | Optave Codegraph | Narsil-MCP |
|
|
134
|
+
| :--- | :--- | :--- |
|
|
135
|
+
| **Philosophy** | Lean, deterministic, AI-optimized | Comprehensive, feature-dense |
|
|
136
|
+
| **AI Tool Count** | 13 focused tools | 90 distinct tools |
|
|
137
|
+
| **Language Support** | 11 languages | 32 languages |
|
|
138
|
+
| **Primary Interface** | CLI-first with MCP integration | MCP-first (CLI is secondary) |
|
|
139
|
+
| **Supply Chain Risk** | Low (minimal dependency tree) | Higher (requires massive dependency graph for embedded ML/scanners) |
|
|
140
|
+
| **Graph Updates** | Sub-second incremental (file-hash) | Parallel re-indexing / Merkle trees |
|
|
141
|
+
|
|
142
|
+
#### Choose Codegraph if:
|
|
143
|
+
|
|
144
|
+
* **You want to optimize AI agent reasoning.** Large Language Models degrade in performance and hallucinate when overwhelmed with choices. Codegraph’s tight 13-tool surface area ensures agents quickly understand their capabilities without wasting context window tokens.
|
|
145
|
+
* **You are concerned about supply chain attacks.** To support 90 tools, SBOMs, and neural embeddings, a tool must pull in a massive dependency tree. Codegraph keeps its dependencies minimal, dramatically reducing the risk of malicious code sneaking onto your machine.
|
|
146
|
+
* **You want deterministic blast-radius checks.** Features like `diff-impact` are built specifically to tell you exactly how a changed function cascades through your codebase before you merge a PR.
|
|
147
|
+
* **You value a strong standalone CLI.** You want to query your code graph locally without necessarily spinning up an AI agent.
|
|
148
|
+
|
|
149
|
+
#### Choose Narsil-MCP if:
|
|
150
|
+
|
|
151
|
+
* **You want security and code intelligence together.** You dont want a separated MCP for security and prefer an 'all-in-one solution.
|
|
152
|
+
* **You use niche languages.** Your codebase relies heavily on languages outside of Codegraph's core 11 (e.g., Fortran, Erlang, Zig, Swift).
|
|
153
|
+
* **You are willing to manage tool presets.** Because 90 tools will overload an AI's context window, you don't mind manually configuring preset files (like "Minimal" or "Balanced") to restrict what the AI can see depending on your editor.
|
|
154
|
+
|
|
119
155
|
---
|
|
120
156
|
|
|
121
157
|
## 🚀 Quick Start
|
|
@@ -229,10 +265,10 @@ A single trailing semicolon is ignored (falls back to single-query mode). The `-
|
|
|
229
265
|
|
|
230
266
|
| Flag | Model | Dimensions | Size | License | Notes |
|
|
231
267
|
|---|---|---|---|---|---|
|
|
232
|
-
| `minilm`
|
|
268
|
+
| `minilm` | all-MiniLM-L6-v2 | 384 | ~23 MB | Apache-2.0 | Fastest, good for quick iteration |
|
|
233
269
|
| `jina-small` | jina-embeddings-v2-small-en | 512 | ~33 MB | Apache-2.0 | Better quality, still small |
|
|
234
270
|
| `jina-base` | jina-embeddings-v2-base-en | 768 | ~137 MB | Apache-2.0 | High quality, 8192 token context |
|
|
235
|
-
| `jina-code` | jina-embeddings-v2-base-code | 768 | ~137 MB | Apache-2.0 | **Best for code search**, trained on code+text |
|
|
271
|
+
| `jina-code` (default) | jina-embeddings-v2-base-code | 768 | ~137 MB | Apache-2.0 | **Best for code search**, trained on code+text |
|
|
236
272
|
| `nomic` | nomic-embed-text-v1 | 768 | ~137 MB | Apache-2.0 | Good quality, 8192 context |
|
|
237
273
|
| `nomic-v1.5` | nomic-embed-text-v1.5 | 768 | ~137 MB | Apache-2.0 | Improved nomic, Matryoshka dimensions |
|
|
238
274
|
| `bge-large` | bge-large-en-v1.5 | 1024 | ~335 MB | MIT | Best general retrieval, top MTEB scores |
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@optave/codegraph",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.1.1-dev.3c12b64",
|
|
4
4
|
"description": "Local code graph CLI — parse codebases with tree-sitter, build dependency graphs, query them",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/index.js",
|
|
@@ -29,11 +29,11 @@
|
|
|
29
29
|
"lint": "biome check src/ tests/",
|
|
30
30
|
"lint:fix": "biome check --write src/ tests/",
|
|
31
31
|
"format": "biome format --write src/ tests/",
|
|
32
|
-
"prepare": "npm run build:wasm && husky",
|
|
32
|
+
"prepare": "npm run build:wasm && husky && npm run deps:tree",
|
|
33
|
+
"deps:tree": "node scripts/gen-deps.cjs",
|
|
33
34
|
"release": "commit-and-tag-version",
|
|
34
35
|
"release:dry-run": "commit-and-tag-version --dry-run",
|
|
35
|
-
"version": "node scripts/sync-native-versions.js && git add package.json"
|
|
36
|
-
"prepublishOnly": "npm test"
|
|
36
|
+
"version": "node scripts/sync-native-versions.js && git add package.json"
|
|
37
37
|
},
|
|
38
38
|
"keywords": [
|
|
39
39
|
"codegraph",
|
|
@@ -61,19 +61,19 @@
|
|
|
61
61
|
"optionalDependencies": {
|
|
62
62
|
"@huggingface/transformers": "^3.8.1",
|
|
63
63
|
"@modelcontextprotocol/sdk": "^1.0.0",
|
|
64
|
-
"@optave/codegraph-darwin-arm64": "2.
|
|
65
|
-
"@optave/codegraph-darwin-x64": "2.
|
|
66
|
-
"@optave/codegraph-linux-x64-gnu": "2.
|
|
67
|
-
"@optave/codegraph-win32-x64-msvc": "2.
|
|
64
|
+
"@optave/codegraph-darwin-arm64": "2.1.1-dev.3c12b64",
|
|
65
|
+
"@optave/codegraph-darwin-x64": "2.1.1-dev.3c12b64",
|
|
66
|
+
"@optave/codegraph-linux-x64-gnu": "2.1.1-dev.3c12b64",
|
|
67
|
+
"@optave/codegraph-win32-x64-msvc": "2.1.1-dev.3c12b64"
|
|
68
68
|
},
|
|
69
69
|
"devDependencies": {
|
|
70
70
|
"@biomejs/biome": "^2.4.4",
|
|
71
71
|
"@commitlint/cli": "^19.8",
|
|
72
72
|
"@commitlint/config-conventional": "^19.8",
|
|
73
|
-
"commit-and-tag-version": "^12.5",
|
|
74
|
-
"husky": "^9.1",
|
|
75
73
|
"@tree-sitter-grammars/tree-sitter-hcl": "^1.2.0",
|
|
76
74
|
"@vitest/coverage-v8": "^4.0.18",
|
|
75
|
+
"commit-and-tag-version": "^12.5",
|
|
76
|
+
"husky": "^9.1",
|
|
77
77
|
"tree-sitter-c-sharp": "^0.23.1",
|
|
78
78
|
"tree-sitter-cli": "^0.26.5",
|
|
79
79
|
"tree-sitter-go": "^0.23.4",
|
package/src/builder.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { createHash } from 'node:crypto';
|
|
2
2
|
import fs from 'node:fs';
|
|
3
|
+
import os from 'node:os';
|
|
3
4
|
import path from 'node:path';
|
|
4
5
|
import { loadConfig } from './config.js';
|
|
5
6
|
import { EXTENSIONS, IGNORE_DIRS, normalizePath } from './constants.js';
|
|
@@ -581,10 +582,18 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
581
582
|
console.log(`Stored in ${dbPath}`);
|
|
582
583
|
db.close();
|
|
583
584
|
|
|
584
|
-
|
|
585
|
-
const
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
585
|
+
if (!opts.skipRegistry) {
|
|
586
|
+
const tmpDir = path.resolve(os.tmpdir());
|
|
587
|
+
const resolvedRoot = path.resolve(rootDir);
|
|
588
|
+
if (resolvedRoot.startsWith(tmpDir)) {
|
|
589
|
+
debug(`Skipping auto-registration for temp directory: ${resolvedRoot}`);
|
|
590
|
+
} else {
|
|
591
|
+
try {
|
|
592
|
+
const { registerRepo } = await import('./registry.js');
|
|
593
|
+
registerRepo(rootDir);
|
|
594
|
+
} catch (err) {
|
|
595
|
+
debug(`Auto-registration failed: ${err.message}`);
|
|
596
|
+
}
|
|
597
|
+
}
|
|
589
598
|
}
|
|
590
599
|
}
|
package/src/cli.js
CHANGED
|
@@ -18,6 +18,7 @@ import {
|
|
|
18
18
|
impactAnalysis,
|
|
19
19
|
moduleMap,
|
|
20
20
|
queryName,
|
|
21
|
+
stats,
|
|
21
22
|
} from './queries.js';
|
|
22
23
|
import {
|
|
23
24
|
listRepos,
|
|
@@ -28,11 +29,14 @@ import {
|
|
|
28
29
|
} from './registry.js';
|
|
29
30
|
import { watchProject } from './watcher.js';
|
|
30
31
|
|
|
32
|
+
const __cliDir = path.dirname(new URL(import.meta.url).pathname.replace(/^\/([A-Z]:)/i, '$1'));
|
|
33
|
+
const pkg = JSON.parse(fs.readFileSync(path.join(__cliDir, '..', 'package.json'), 'utf-8'));
|
|
34
|
+
|
|
31
35
|
const program = new Command();
|
|
32
36
|
program
|
|
33
37
|
.name('codegraph')
|
|
34
38
|
.description('Local code dependency graph tool')
|
|
35
|
-
.version(
|
|
39
|
+
.version(pkg.version)
|
|
36
40
|
.option('-v, --verbose', 'Enable verbose/debug output')
|
|
37
41
|
.option('--engine <engine>', 'Parser engine: native, wasm, or auto (default: auto)', 'auto')
|
|
38
42
|
.hook('preAction', (thisCommand) => {
|
|
@@ -78,6 +82,15 @@ program
|
|
|
78
82
|
moduleMap(opts.db, parseInt(opts.limit, 10), { json: opts.json });
|
|
79
83
|
});
|
|
80
84
|
|
|
85
|
+
program
|
|
86
|
+
.command('stats')
|
|
87
|
+
.description('Show graph health overview: nodes, edges, languages, cycles, hotspots, embeddings')
|
|
88
|
+
.option('-d, --db <path>', 'Path to graph.db')
|
|
89
|
+
.option('-j, --json', 'Output as JSON')
|
|
90
|
+
.action((opts) => {
|
|
91
|
+
stats(opts.db, { json: opts.json });
|
|
92
|
+
});
|
|
93
|
+
|
|
81
94
|
program
|
|
82
95
|
.command('deps <file>')
|
|
83
96
|
.description('Show what this file imports and what imports it')
|
|
@@ -214,6 +227,7 @@ registry
|
|
|
214
227
|
.description('List all registered repositories')
|
|
215
228
|
.option('-j, --json', 'Output as JSON')
|
|
216
229
|
.action((opts) => {
|
|
230
|
+
pruneRegistry();
|
|
217
231
|
const repos = listRepos();
|
|
218
232
|
if (opts.json) {
|
|
219
233
|
console.log(JSON.stringify(repos, null, 2));
|
|
@@ -257,14 +271,16 @@ registry
|
|
|
257
271
|
|
|
258
272
|
registry
|
|
259
273
|
.command('prune')
|
|
260
|
-
.description('Remove registry entries
|
|
261
|
-
.
|
|
262
|
-
|
|
274
|
+
.description('Remove stale registry entries (missing directories or idle beyond TTL)')
|
|
275
|
+
.option('--ttl <days>', 'Days of inactivity before pruning (default: 30)', '30')
|
|
276
|
+
.action((opts) => {
|
|
277
|
+
const pruned = pruneRegistry(undefined, parseInt(opts.ttl, 10));
|
|
263
278
|
if (pruned.length === 0) {
|
|
264
279
|
console.log('No stale entries found.');
|
|
265
280
|
} else {
|
|
266
281
|
for (const entry of pruned) {
|
|
267
|
-
|
|
282
|
+
const tag = entry.reason === 'expired' ? 'expired' : 'missing';
|
|
283
|
+
console.log(`Pruned "${entry.name}" (${entry.path}) [${tag}]`);
|
|
268
284
|
}
|
|
269
285
|
console.log(`\nRemoved ${pruned.length} stale ${pruned.length === 1 ? 'entry' : 'entries'}.`);
|
|
270
286
|
}
|
|
@@ -278,7 +294,7 @@ program
|
|
|
278
294
|
.action(() => {
|
|
279
295
|
console.log('\nAvailable embedding models:\n');
|
|
280
296
|
for (const [key, config] of Object.entries(MODELS)) {
|
|
281
|
-
const def = key === '
|
|
297
|
+
const def = key === 'jina-code' ? ' (default)' : '';
|
|
282
298
|
console.log(` ${key.padEnd(12)} ${String(config.dim).padStart(4)}d ${config.desc}${def}`);
|
|
283
299
|
}
|
|
284
300
|
console.log('\nUsage: codegraph embed --model <name>');
|
|
@@ -292,8 +308,8 @@ program
|
|
|
292
308
|
)
|
|
293
309
|
.option(
|
|
294
310
|
'-m, --model <name>',
|
|
295
|
-
'Embedding model: minilm
|
|
296
|
-
'
|
|
311
|
+
'Embedding model: minilm, jina-small, jina-base, jina-code (default), nomic, nomic-v1.5, bge-large. Run `codegraph models` for details',
|
|
312
|
+
'jina-code',
|
|
297
313
|
)
|
|
298
314
|
.action(async (dir, opts) => {
|
|
299
315
|
const root = path.resolve(dir || '.');
|
package/src/config.js
CHANGED
|
@@ -19,7 +19,7 @@ export const DEFAULTS = {
|
|
|
19
19
|
defaultDepth: 3,
|
|
20
20
|
defaultLimit: 20,
|
|
21
21
|
},
|
|
22
|
-
embeddings: { model: '
|
|
22
|
+
embeddings: { model: 'jina-code', llmProvider: null },
|
|
23
23
|
llm: { provider: null, model: null, baseUrl: null, apiKey: null, apiKeyCommand: null },
|
|
24
24
|
search: { defaultMinScore: 0.2, rrfK: 60, topK: 15 },
|
|
25
25
|
ci: { failOnCycles: false, impactThreshold: null },
|
package/src/embedder.js
CHANGED
|
@@ -55,7 +55,7 @@ export const MODELS = {
|
|
|
55
55
|
},
|
|
56
56
|
};
|
|
57
57
|
|
|
58
|
-
export const DEFAULT_MODEL = '
|
|
58
|
+
export const DEFAULT_MODEL = 'jina-code';
|
|
59
59
|
const BATCH_SIZE_MAP = {
|
|
60
60
|
minilm: 32,
|
|
61
61
|
'jina-small': 16,
|
|
@@ -173,10 +173,10 @@ function initEmbeddingsSchema(db) {
|
|
|
173
173
|
/**
|
|
174
174
|
* Build embeddings for all functions/methods/classes in the graph.
|
|
175
175
|
*/
|
|
176
|
-
export async function buildEmbeddings(rootDir, modelKey) {
|
|
176
|
+
export async function buildEmbeddings(rootDir, modelKey, customDbPath) {
|
|
177
177
|
// path already imported at top
|
|
178
178
|
// fs already imported at top
|
|
179
|
-
const dbPath = findDbPath(null);
|
|
179
|
+
const dbPath = customDbPath || findDbPath(null);
|
|
180
180
|
|
|
181
181
|
const db = new Database(dbPath);
|
|
182
182
|
initEmbeddingsSchema(db);
|
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
import { findChild, nodeEndLine } from './helpers.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Extract symbols from C# files.
|
|
5
|
+
*/
|
|
6
|
+
export function extractCSharpSymbols(tree, _filePath) {
|
|
7
|
+
const definitions = [];
|
|
8
|
+
const calls = [];
|
|
9
|
+
const imports = [];
|
|
10
|
+
const classes = [];
|
|
11
|
+
const exports = [];
|
|
12
|
+
|
|
13
|
+
function findCSharpParentType(node) {
|
|
14
|
+
let current = node.parent;
|
|
15
|
+
while (current) {
|
|
16
|
+
if (
|
|
17
|
+
current.type === 'class_declaration' ||
|
|
18
|
+
current.type === 'struct_declaration' ||
|
|
19
|
+
current.type === 'interface_declaration' ||
|
|
20
|
+
current.type === 'enum_declaration' ||
|
|
21
|
+
current.type === 'record_declaration'
|
|
22
|
+
) {
|
|
23
|
+
const nameNode = current.childForFieldName('name');
|
|
24
|
+
return nameNode ? nameNode.text : null;
|
|
25
|
+
}
|
|
26
|
+
current = current.parent;
|
|
27
|
+
}
|
|
28
|
+
return null;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function walkCSharpNode(node) {
|
|
32
|
+
switch (node.type) {
|
|
33
|
+
case 'class_declaration': {
|
|
34
|
+
const nameNode = node.childForFieldName('name');
|
|
35
|
+
if (nameNode) {
|
|
36
|
+
definitions.push({
|
|
37
|
+
name: nameNode.text,
|
|
38
|
+
kind: 'class',
|
|
39
|
+
line: node.startPosition.row + 1,
|
|
40
|
+
endLine: nodeEndLine(node),
|
|
41
|
+
});
|
|
42
|
+
extractCSharpBaseTypes(node, nameNode.text, classes);
|
|
43
|
+
}
|
|
44
|
+
break;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
case 'struct_declaration': {
|
|
48
|
+
const nameNode = node.childForFieldName('name');
|
|
49
|
+
if (nameNode) {
|
|
50
|
+
definitions.push({
|
|
51
|
+
name: nameNode.text,
|
|
52
|
+
kind: 'struct',
|
|
53
|
+
line: node.startPosition.row + 1,
|
|
54
|
+
endLine: nodeEndLine(node),
|
|
55
|
+
});
|
|
56
|
+
extractCSharpBaseTypes(node, nameNode.text, classes);
|
|
57
|
+
}
|
|
58
|
+
break;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
case 'record_declaration': {
|
|
62
|
+
const nameNode = node.childForFieldName('name');
|
|
63
|
+
if (nameNode) {
|
|
64
|
+
definitions.push({
|
|
65
|
+
name: nameNode.text,
|
|
66
|
+
kind: 'record',
|
|
67
|
+
line: node.startPosition.row + 1,
|
|
68
|
+
endLine: nodeEndLine(node),
|
|
69
|
+
});
|
|
70
|
+
extractCSharpBaseTypes(node, nameNode.text, classes);
|
|
71
|
+
}
|
|
72
|
+
break;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
case 'interface_declaration': {
|
|
76
|
+
const nameNode = node.childForFieldName('name');
|
|
77
|
+
if (nameNode) {
|
|
78
|
+
definitions.push({
|
|
79
|
+
name: nameNode.text,
|
|
80
|
+
kind: 'interface',
|
|
81
|
+
line: node.startPosition.row + 1,
|
|
82
|
+
endLine: nodeEndLine(node),
|
|
83
|
+
});
|
|
84
|
+
const body = node.childForFieldName('body');
|
|
85
|
+
if (body) {
|
|
86
|
+
for (let i = 0; i < body.childCount; i++) {
|
|
87
|
+
const child = body.child(i);
|
|
88
|
+
if (child && child.type === 'method_declaration') {
|
|
89
|
+
const methName = child.childForFieldName('name');
|
|
90
|
+
if (methName) {
|
|
91
|
+
definitions.push({
|
|
92
|
+
name: `${nameNode.text}.${methName.text}`,
|
|
93
|
+
kind: 'method',
|
|
94
|
+
line: child.startPosition.row + 1,
|
|
95
|
+
endLine: child.endPosition.row + 1,
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
break;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
case 'enum_declaration': {
|
|
106
|
+
const nameNode = node.childForFieldName('name');
|
|
107
|
+
if (nameNode) {
|
|
108
|
+
definitions.push({
|
|
109
|
+
name: nameNode.text,
|
|
110
|
+
kind: 'enum',
|
|
111
|
+
line: node.startPosition.row + 1,
|
|
112
|
+
endLine: nodeEndLine(node),
|
|
113
|
+
});
|
|
114
|
+
}
|
|
115
|
+
break;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
case 'method_declaration': {
|
|
119
|
+
const nameNode = node.childForFieldName('name');
|
|
120
|
+
if (nameNode) {
|
|
121
|
+
const parentType = findCSharpParentType(node);
|
|
122
|
+
const fullName = parentType ? `${parentType}.${nameNode.text}` : nameNode.text;
|
|
123
|
+
definitions.push({
|
|
124
|
+
name: fullName,
|
|
125
|
+
kind: 'method',
|
|
126
|
+
line: node.startPosition.row + 1,
|
|
127
|
+
endLine: nodeEndLine(node),
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
break;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
case 'constructor_declaration': {
|
|
134
|
+
const nameNode = node.childForFieldName('name');
|
|
135
|
+
if (nameNode) {
|
|
136
|
+
const parentType = findCSharpParentType(node);
|
|
137
|
+
const fullName = parentType ? `${parentType}.${nameNode.text}` : nameNode.text;
|
|
138
|
+
definitions.push({
|
|
139
|
+
name: fullName,
|
|
140
|
+
kind: 'method',
|
|
141
|
+
line: node.startPosition.row + 1,
|
|
142
|
+
endLine: nodeEndLine(node),
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
break;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
case 'property_declaration': {
|
|
149
|
+
const nameNode = node.childForFieldName('name');
|
|
150
|
+
if (nameNode) {
|
|
151
|
+
const parentType = findCSharpParentType(node);
|
|
152
|
+
const fullName = parentType ? `${parentType}.${nameNode.text}` : nameNode.text;
|
|
153
|
+
definitions.push({
|
|
154
|
+
name: fullName,
|
|
155
|
+
kind: 'method',
|
|
156
|
+
line: node.startPosition.row + 1,
|
|
157
|
+
endLine: nodeEndLine(node),
|
|
158
|
+
});
|
|
159
|
+
}
|
|
160
|
+
break;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
case 'using_directive': {
|
|
164
|
+
// using System.Collections.Generic;
|
|
165
|
+
const nameNode =
|
|
166
|
+
node.childForFieldName('name') ||
|
|
167
|
+
findChild(node, 'qualified_name') ||
|
|
168
|
+
findChild(node, 'identifier');
|
|
169
|
+
if (nameNode) {
|
|
170
|
+
const fullPath = nameNode.text;
|
|
171
|
+
const lastName = fullPath.split('.').pop();
|
|
172
|
+
imports.push({
|
|
173
|
+
source: fullPath,
|
|
174
|
+
names: [lastName],
|
|
175
|
+
line: node.startPosition.row + 1,
|
|
176
|
+
csharpUsing: true,
|
|
177
|
+
});
|
|
178
|
+
}
|
|
179
|
+
break;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
case 'invocation_expression': {
|
|
183
|
+
const fn = node.childForFieldName('function') || node.child(0);
|
|
184
|
+
if (fn) {
|
|
185
|
+
if (fn.type === 'identifier') {
|
|
186
|
+
calls.push({ name: fn.text, line: node.startPosition.row + 1 });
|
|
187
|
+
} else if (fn.type === 'member_access_expression') {
|
|
188
|
+
const name = fn.childForFieldName('name');
|
|
189
|
+
if (name) calls.push({ name: name.text, line: node.startPosition.row + 1 });
|
|
190
|
+
} else if (fn.type === 'generic_name' || fn.type === 'member_binding_expression') {
|
|
191
|
+
const name = fn.childForFieldName('name') || fn.child(0);
|
|
192
|
+
if (name) calls.push({ name: name.text, line: node.startPosition.row + 1 });
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
break;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
case 'object_creation_expression': {
|
|
199
|
+
const typeNode = node.childForFieldName('type');
|
|
200
|
+
if (typeNode) {
|
|
201
|
+
const typeName =
|
|
202
|
+
typeNode.type === 'generic_name'
|
|
203
|
+
? typeNode.childForFieldName('name')?.text || typeNode.child(0)?.text
|
|
204
|
+
: typeNode.text;
|
|
205
|
+
if (typeName) calls.push({ name: typeName, line: node.startPosition.row + 1 });
|
|
206
|
+
}
|
|
207
|
+
break;
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
for (let i = 0; i < node.childCount; i++) walkCSharpNode(node.child(i));
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
walkCSharpNode(tree.rootNode);
|
|
215
|
+
return { definitions, calls, imports, classes, exports };
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
function extractCSharpBaseTypes(node, className, classes) {
|
|
219
|
+
const baseList = node.childForFieldName('bases');
|
|
220
|
+
if (!baseList) return;
|
|
221
|
+
for (let i = 0; i < baseList.childCount; i++) {
|
|
222
|
+
const child = baseList.child(i);
|
|
223
|
+
if (!child) continue;
|
|
224
|
+
if (child.type === 'identifier' || child.type === 'qualified_name') {
|
|
225
|
+
classes.push({ name: className, extends: child.text, line: node.startPosition.row + 1 });
|
|
226
|
+
} else if (child.type === 'generic_name') {
|
|
227
|
+
const name = child.childForFieldName('name') || child.child(0);
|
|
228
|
+
if (name)
|
|
229
|
+
classes.push({ name: className, extends: name.text, line: node.startPosition.row + 1 });
|
|
230
|
+
} else if (child.type === 'base_list') {
|
|
231
|
+
for (let j = 0; j < child.childCount; j++) {
|
|
232
|
+
const base = child.child(j);
|
|
233
|
+
if (base && (base.type === 'identifier' || base.type === 'qualified_name')) {
|
|
234
|
+
classes.push({ name: className, extends: base.text, line: node.startPosition.row + 1 });
|
|
235
|
+
} else if (base && base.type === 'generic_name') {
|
|
236
|
+
const name = base.childForFieldName('name') || base.child(0);
|
|
237
|
+
if (name)
|
|
238
|
+
classes.push({ name: className, extends: name.text, line: node.startPosition.row + 1 });
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
}
|