@optave/codegraph 2.1.0 → 2.1.1-dev.00f091c
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -20
- package/package.json +5 -5
- package/src/builder.js +238 -33
- package/src/cli.js +20 -0
- package/src/db.js +4 -0
- package/src/extractors/csharp.js +6 -1
- package/src/extractors/go.js +6 -1
- package/src/extractors/java.js +4 -1
- package/src/extractors/javascript.js +23 -5
- package/src/extractors/php.js +8 -2
- package/src/extractors/python.js +8 -1
- package/src/extractors/ruby.js +4 -1
- package/src/extractors/rust.js +12 -2
- package/src/index.js +1 -0
- package/src/journal.js +109 -0
- package/src/mcp.js +45 -3
- package/src/parser.js +1 -0
- package/src/queries.js +396 -0
- package/src/watcher.js +25 -0
package/README.md
CHANGED
|
@@ -50,14 +50,13 @@ Most tools in this space can't do that:
|
|
|
50
50
|
| **Heavy infrastructure that's slow to restart** | code-graph-rag (Memgraph), axon (KuzuDB), badger-graph (Dgraph) | External databases add latency to every write. Bulk-inserting a full graph into Memgraph is not a sub-second operation |
|
|
51
51
|
| **No persistence between runs** | pyan, cflow | Re-parse from scratch every time. No database, no delta, no incremental anything |
|
|
52
52
|
|
|
53
|
-
**Codegraph solves this with incremental
|
|
53
|
+
**Codegraph solves this with three-tier incremental change detection:**
|
|
54
54
|
|
|
55
|
-
1.
|
|
56
|
-
2.
|
|
57
|
-
3.
|
|
58
|
-
4. Everything else is untouched
|
|
55
|
+
1. **Tier 0 — Journal (O(changed)):** If `codegraph watch` was running, a change journal records exactly which files were touched. The next build reads the journal and only processes those files — zero filesystem scanning
|
|
56
|
+
2. **Tier 1 — mtime+size (O(n) stats, O(changed) reads):** No journal? Codegraph stats every file and compares mtime + size against stored values. Matching files are skipped without reading a single byte — 10-100x cheaper than hashing
|
|
57
|
+
3. **Tier 2 — Hash (O(changed) reads):** Files that fail the mtime/size check are read and MD5-hashed. Only files whose hash actually changed get re-parsed and re-inserted
|
|
59
58
|
|
|
60
|
-
**Result:** change one file in a 3,000-file project → rebuild completes in **under a second**. Put it in a commit hook, a file watcher, or let your AI agent trigger it. The graph is always current.
|
|
59
|
+
**Result:** change one file in a 3,000-file project → rebuild completes in **under a second**. With watch mode active, rebuilds are near-instant — the journal makes the build proportional to the number of changed files, not the size of the codebase. Put it in a commit hook, a file watcher, or let your AI agent trigger it. The graph is always current.
|
|
61
60
|
|
|
62
61
|
And because the core pipeline is pure local computation (tree-sitter + SQLite), there are no API calls, no network latency, and no cost. LLM-powered features (semantic search, richer embeddings) are a separate optional layer — they enhance the graph but never block it from being current.
|
|
63
62
|
|
|
@@ -80,7 +79,7 @@ Most code graph tools make you choose: **fast local analysis with no AI, or powe
|
|
|
80
79
|
| Git diff impact | **Yes** | — | — | — | — | **Yes** | — | **Yes** |
|
|
81
80
|
| Watch mode | **Yes** | — | **Yes** | — | — | — | — | — |
|
|
82
81
|
| Cycle detection | **Yes** | — | **Yes** | — | — | — | — | **Yes** |
|
|
83
|
-
| Incremental rebuilds | **
|
|
82
|
+
| Incremental rebuilds | **O(changed)** | — | O(n) Merkle | — | — | — | — | — |
|
|
84
83
|
| Zero config | **Yes** | — | **Yes** | — | — | — | — | — |
|
|
85
84
|
| Embeddable JS library (`npm install`) | **Yes** | — | — | — | — | — | — | — |
|
|
86
85
|
| LLM-optional (works without API keys) | **Yes** | **Yes** | **Yes** | — | **Yes** | **Yes** | **Yes** | **Yes** |
|
|
@@ -91,7 +90,7 @@ Most code graph tools make you choose: **fast local analysis with no AI, or powe
|
|
|
91
90
|
|
|
92
91
|
| | Differentiator | In practice |
|
|
93
92
|
|---|---|---|
|
|
94
|
-
| **⚡** | **Always-fresh graph** |
|
|
93
|
+
| **⚡** | **Always-fresh graph** | Three-tier change detection: journal (O(changed)) → mtime+size (O(n) stats) → hash (O(changed) reads). Sub-second rebuilds even on large codebases. Competitors re-index everything from scratch; Merkle-tree approaches still require O(n) filesystem scanning |
|
|
95
94
|
| **🔓** | **Zero-cost core, LLM-enhanced when you want** | Full graph analysis with no API keys, no accounts, no cost. Optionally bring your own LLM provider for richer embeddings and AI-powered search — your code only goes to the provider you already chose |
|
|
96
95
|
| **🔬** | **Function-level, not just files** | Traces `handleAuth()` → `validateToken()` → `decryptJWT()` and shows 14 callers across 9 files break if `decryptJWT` changes |
|
|
97
96
|
| **🤖** | **Built for AI agents** | 13-tool [MCP server](https://modelcontextprotocol.io/) — AI assistants query your graph directly. Single-repo by default, your code doesn't leak to other projects |
|
|
@@ -101,12 +100,12 @@ Most code graph tools make you choose: **fast local analysis with no AI, or powe
|
|
|
101
100
|
|
|
102
101
|
### How other tools compare
|
|
103
102
|
|
|
104
|
-
The key question is: **can you rebuild your graph on every commit in a large codebase without it costing money or taking minutes?** Most tools in this space either re-index everything from scratch (slow), require cloud API calls for core features (costly), or both. Codegraph's incremental
|
|
103
|
+
The key question is: **can you rebuild your graph on every commit in a large codebase without it costing money or taking minutes?** Most tools in this space either re-index everything from scratch (slow), require cloud API calls for core features (costly), or both. Codegraph's three-tier incremental detection achieves true O(changed) in the best case — when the watcher is running, rebuilds are proportional only to the number of files that changed, not the size of the codebase. The core pipeline needs no API keys at all. LLM-powered features are opt-in, using whichever provider you already work with.
|
|
105
104
|
|
|
106
105
|
| Tool | What it does well | The tradeoff |
|
|
107
106
|
|---|---|---|
|
|
108
107
|
| [joern](https://github.com/joernio/joern) | Full CPG (AST + CFG + PDG) for vulnerability discovery, Scala query DSL, 14 languages, daily releases | No incremental builds — full re-parse on every change. Requires JDK 21, no built-in MCP, no watch mode |
|
|
109
|
-
| [narsil-mcp](https://github.com/postrv/narsil-mcp) | 90 MCP tools, 32 languages, taint analysis, SBOM, dead code, neural search, Merkle-tree incremental indexing, single ~30MB binary | Primarily MCP-only — no standalone CLI query interface. Neural search requires API key or ONNX source build |
|
|
108
|
+
| [narsil-mcp](https://github.com/postrv/narsil-mcp) | 90 MCP tools, 32 languages, taint analysis, SBOM, dead code, neural search, Merkle-tree incremental indexing, single ~30MB binary | Merkle trees still require O(n) filesystem scanning on every rebuild. Primarily MCP-only — no standalone CLI query interface. Neural search requires API key or ONNX source build |
|
|
110
109
|
| [code-graph-rag](https://github.com/vitali87/code-graph-rag) | Graph RAG with Memgraph, multi-provider AI, semantic search, code editing via AST | No incremental rebuilds — full re-index + re-embed through cloud APIs on every change. Requires Docker |
|
|
111
110
|
| [cpg](https://github.com/Fraunhofer-AISEC/cpg) | Formal Code Property Graph (AST + CFG + PDG + DFG), ~10 languages, MCP module, LLVM IR support, academic specifications | No incremental builds. Requires JVM + Gradle, no zero config, no watch mode |
|
|
112
111
|
| [GitNexus](https://github.com/abhigyanpatwari/GitNexus) | Knowledge graph with precomputed structural intelligence, 7 MCP tools, hybrid search (BM25 + semantic + RRF), clustering, process tracing | Full 6-phase pipeline re-run on changes. KuzuDB graph DB, browser mode limited to ~5,000 files. **PolyForm NC — no commercial use** |
|
|
@@ -137,10 +136,11 @@ Here is a cold, analytical breakdown to help you decide which tool fits your wor
|
|
|
137
136
|
| **Language Support** | 11 languages | 32 languages |
|
|
138
137
|
| **Primary Interface** | CLI-first with MCP integration | MCP-first (CLI is secondary) |
|
|
139
138
|
| **Supply Chain Risk** | Low (minimal dependency tree) | Higher (requires massive dependency graph for embedded ML/scanners) |
|
|
140
|
-
| **Graph Updates** |
|
|
139
|
+
| **Graph Updates** | **Three-tier O(changed)** — journal → mtime+size → hash. With watch mode, only changed files are touched | Merkle trees — O(n) filesystem scan on every rebuild to recompute tree hashes |
|
|
141
140
|
|
|
142
141
|
#### Choose Codegraph if:
|
|
143
142
|
|
|
143
|
+
* **You need the fastest possible incremental rebuilds.** Codegraph’s three-tier change detection (journal → mtime+size → hash) achieves true O(changed) when the watcher is running — only touched files are processed. Narsil’s Merkle trees still require O(n) filesystem scanning to recompute hashes on every rebuild, even when nothing changed. On a 3,000-file project, this is the difference between near-instant and noticeable.
|
|
144
144
|
* **You want to optimize AI agent reasoning.** Large Language Models degrade in performance and hallucinate when overwhelmed with choices. Codegraph’s tight 13-tool surface area ensures agents quickly understand their capabilities without wasting context window tokens.
|
|
145
145
|
* **You are concerned about supply chain attacks.** To support 90 tools, SBOMs, and neural embeddings, a tool must pull in a massive dependency tree. Codegraph keeps its dependencies minimal, dramatically reducing the risk of malicious code sneaking onto your machine.
|
|
146
146
|
* **You want deterministic blast-radius checks.** Features like `diff-impact` are built specifically to tell you exactly how a changed function cascades through your codebase before you merge a PR.
|
|
@@ -265,10 +265,10 @@ A single trailing semicolon is ignored (falls back to single-query mode). The `-
|
|
|
265
265
|
|
|
266
266
|
| Flag | Model | Dimensions | Size | License | Notes |
|
|
267
267
|
|---|---|---|---|---|---|
|
|
268
|
-
| `minilm`
|
|
268
|
+
| `minilm` | all-MiniLM-L6-v2 | 384 | ~23 MB | Apache-2.0 | Fastest, good for quick iteration |
|
|
269
269
|
| `jina-small` | jina-embeddings-v2-small-en | 512 | ~33 MB | Apache-2.0 | Better quality, still small |
|
|
270
270
|
| `jina-base` | jina-embeddings-v2-base-en | 768 | ~137 MB | Apache-2.0 | High quality, 8192 token context |
|
|
271
|
-
| `jina-code` | jina-embeddings-v2-base-code | 768 | ~137 MB | Apache-2.0 | **Best for code search**, trained on code+text |
|
|
271
|
+
| `jina-code` (default) | jina-embeddings-v2-base-code | 768 | ~137 MB | Apache-2.0 | **Best for code search**, trained on code+text |
|
|
272
272
|
| `nomic` | nomic-embed-text-v1 | 768 | ~137 MB | Apache-2.0 | Good quality, 8192 context |
|
|
273
273
|
| `nomic-v1.5` | nomic-embed-text-v1.5 | 768 | ~137 MB | Apache-2.0 | Improved nomic, Matryoshka dimensions |
|
|
274
274
|
| `bge-large` | bge-large-en-v1.5 | 1024 | ~335 MB | MIT | Best general retrieval, top MTEB scores |
|
|
@@ -376,15 +376,16 @@ Dynamic patterns like `fn.call()`, `fn.apply()`, `fn.bind()`, and `obj["method"]
|
|
|
376
376
|
|
|
377
377
|
## 📊 Performance
|
|
378
378
|
|
|
379
|
-
|
|
379
|
+
Self-measured on every release via CI ([full history](generated/BENCHMARKS.md)):
|
|
380
380
|
|
|
381
|
-
| Metric |
|
|
381
|
+
| Metric | Latest |
|
|
382
382
|
|---|---|
|
|
383
|
-
| Build
|
|
384
|
-
|
|
|
385
|
-
|
|
|
386
|
-
|
|
|
387
|
-
|
|
383
|
+
| Build speed (native) | **2.5 ms/file** |
|
|
384
|
+
| Build speed (WASM) | **5 ms/file** |
|
|
385
|
+
| Query time | **1ms** |
|
|
386
|
+
| ~50,000 files (est.) | **~125.0s build** |
|
|
387
|
+
|
|
388
|
+
Metrics are normalized per file for cross-version comparability. Times above are for a full initial build — incremental rebuilds only re-parse changed files.
|
|
388
389
|
|
|
389
390
|
## 🤖 AI Agent Integration
|
|
390
391
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@optave/codegraph",
|
|
3
|
-
"version": "2.1.
|
|
3
|
+
"version": "2.1.1-dev.00f091c",
|
|
4
4
|
"description": "Local code graph CLI — parse codebases with tree-sitter, build dependency graphs, query them",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/index.js",
|
|
@@ -61,10 +61,10 @@
|
|
|
61
61
|
"optionalDependencies": {
|
|
62
62
|
"@huggingface/transformers": "^3.8.1",
|
|
63
63
|
"@modelcontextprotocol/sdk": "^1.0.0",
|
|
64
|
-
"@optave/codegraph-darwin-arm64": "2.1.
|
|
65
|
-
"@optave/codegraph-darwin-x64": "2.1.
|
|
66
|
-
"@optave/codegraph-linux-x64-gnu": "2.1.
|
|
67
|
-
"@optave/codegraph-win32-x64-msvc": "2.1.
|
|
64
|
+
"@optave/codegraph-darwin-arm64": "2.1.1-dev.00f091c",
|
|
65
|
+
"@optave/codegraph-darwin-x64": "2.1.1-dev.00f091c",
|
|
66
|
+
"@optave/codegraph-linux-x64-gnu": "2.1.1-dev.00f091c",
|
|
67
|
+
"@optave/codegraph-win32-x64-msvc": "2.1.1-dev.00f091c"
|
|
68
68
|
},
|
|
69
69
|
"devDependencies": {
|
|
70
70
|
"@biomejs/biome": "^2.4.4",
|
package/src/builder.js
CHANGED
|
@@ -5,12 +5,44 @@ import path from 'node:path';
|
|
|
5
5
|
import { loadConfig } from './config.js';
|
|
6
6
|
import { EXTENSIONS, IGNORE_DIRS, normalizePath } from './constants.js';
|
|
7
7
|
import { initSchema, openDb } from './db.js';
|
|
8
|
+
import { readJournal, writeJournalHeader } from './journal.js';
|
|
8
9
|
import { debug, warn } from './logger.js';
|
|
9
10
|
import { getActiveEngine, parseFilesAuto } from './parser.js';
|
|
10
11
|
import { computeConfidence, resolveImportPath, resolveImportsBatch } from './resolve.js';
|
|
11
12
|
|
|
12
13
|
export { resolveImportPath } from './resolve.js';
|
|
13
14
|
|
|
15
|
+
const BUILTIN_RECEIVERS = new Set([
|
|
16
|
+
'console',
|
|
17
|
+
'Math',
|
|
18
|
+
'JSON',
|
|
19
|
+
'Object',
|
|
20
|
+
'Array',
|
|
21
|
+
'String',
|
|
22
|
+
'Number',
|
|
23
|
+
'Boolean',
|
|
24
|
+
'Date',
|
|
25
|
+
'RegExp',
|
|
26
|
+
'Map',
|
|
27
|
+
'Set',
|
|
28
|
+
'WeakMap',
|
|
29
|
+
'WeakSet',
|
|
30
|
+
'Promise',
|
|
31
|
+
'Symbol',
|
|
32
|
+
'Error',
|
|
33
|
+
'TypeError',
|
|
34
|
+
'RangeError',
|
|
35
|
+
'Proxy',
|
|
36
|
+
'Reflect',
|
|
37
|
+
'Intl',
|
|
38
|
+
'globalThis',
|
|
39
|
+
'window',
|
|
40
|
+
'document',
|
|
41
|
+
'process',
|
|
42
|
+
'Buffer',
|
|
43
|
+
'require',
|
|
44
|
+
]);
|
|
45
|
+
|
|
14
46
|
export function collectFiles(dir, files = [], config = {}, directories = null) {
|
|
15
47
|
const trackDirs = directories !== null;
|
|
16
48
|
let entries;
|
|
@@ -81,8 +113,24 @@ function fileHash(content) {
|
|
|
81
113
|
return createHash('md5').update(content).digest('hex');
|
|
82
114
|
}
|
|
83
115
|
|
|
116
|
+
/**
|
|
117
|
+
* Stat a file, returning { mtimeMs, size } or null on error.
|
|
118
|
+
*/
|
|
119
|
+
function fileStat(filePath) {
|
|
120
|
+
try {
|
|
121
|
+
const s = fs.statSync(filePath);
|
|
122
|
+
return { mtimeMs: s.mtimeMs, size: s.size };
|
|
123
|
+
} catch {
|
|
124
|
+
return null;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
84
128
|
/**
|
|
85
129
|
* Determine which files have changed since last build.
|
|
130
|
+
* Three-tier cascade:
|
|
131
|
+
* Tier 0 — Journal: O(changed) when watcher was running
|
|
132
|
+
* Tier 1 — mtime+size: O(n) stats, O(changed) reads
|
|
133
|
+
* Tier 2 — Hash comparison: O(changed) reads (fallback from Tier 1)
|
|
86
134
|
*/
|
|
87
135
|
function getChangedFiles(db, allFiles, rootDir) {
|
|
88
136
|
// Check if file_hashes table exists
|
|
@@ -95,7 +143,6 @@ function getChangedFiles(db, allFiles, rootDir) {
|
|
|
95
143
|
}
|
|
96
144
|
|
|
97
145
|
if (!hasTable) {
|
|
98
|
-
// No hash table = first build, everything is new
|
|
99
146
|
return {
|
|
100
147
|
changed: allFiles.map((f) => ({ file: f })),
|
|
101
148
|
removed: [],
|
|
@@ -105,36 +152,140 @@ function getChangedFiles(db, allFiles, rootDir) {
|
|
|
105
152
|
|
|
106
153
|
const existing = new Map(
|
|
107
154
|
db
|
|
108
|
-
.prepare('SELECT file, hash FROM file_hashes')
|
|
155
|
+
.prepare('SELECT file, hash, mtime, size FROM file_hashes')
|
|
109
156
|
.all()
|
|
110
|
-
.map((r) => [r.file, r
|
|
157
|
+
.map((r) => [r.file, r]),
|
|
111
158
|
);
|
|
112
159
|
|
|
113
|
-
|
|
160
|
+
// Build set of current files for removal detection
|
|
114
161
|
const currentFiles = new Set();
|
|
162
|
+
for (const file of allFiles) {
|
|
163
|
+
currentFiles.add(normalizePath(path.relative(rootDir, file)));
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
const removed = [];
|
|
167
|
+
for (const existingFile of existing.keys()) {
|
|
168
|
+
if (!currentFiles.has(existingFile)) {
|
|
169
|
+
removed.push(existingFile);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// ── Tier 0: Journal ──────────────────────────────────────────────
|
|
174
|
+
const journal = readJournal(rootDir);
|
|
175
|
+
if (journal.valid) {
|
|
176
|
+
// Validate journal timestamp against DB — journal should be from after the last build
|
|
177
|
+
const dbMtimes = db.prepare('SELECT MAX(mtime) as latest FROM file_hashes').get();
|
|
178
|
+
const latestDbMtime = dbMtimes?.latest || 0;
|
|
179
|
+
|
|
180
|
+
// Empty journal = no watcher was running, fall to Tier 1 for safety
|
|
181
|
+
const hasJournalEntries = journal.changed.length > 0 || journal.removed.length > 0;
|
|
182
|
+
|
|
183
|
+
if (hasJournalEntries && journal.timestamp >= latestDbMtime) {
|
|
184
|
+
debug(
|
|
185
|
+
`Tier 0: journal valid, ${journal.changed.length} changed, ${journal.removed.length} removed`,
|
|
186
|
+
);
|
|
187
|
+
const changed = [];
|
|
188
|
+
|
|
189
|
+
for (const relPath of journal.changed) {
|
|
190
|
+
const absPath = path.join(rootDir, relPath);
|
|
191
|
+
const stat = fileStat(absPath);
|
|
192
|
+
if (!stat) continue;
|
|
193
|
+
|
|
194
|
+
let content;
|
|
195
|
+
try {
|
|
196
|
+
content = fs.readFileSync(absPath, 'utf-8');
|
|
197
|
+
} catch {
|
|
198
|
+
continue;
|
|
199
|
+
}
|
|
200
|
+
const hash = fileHash(content);
|
|
201
|
+
const record = existing.get(relPath);
|
|
202
|
+
if (!record || record.hash !== hash) {
|
|
203
|
+
changed.push({ file: absPath, content, hash, relPath, stat });
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// Merge journal removals with filesystem removals (dedup)
|
|
208
|
+
const removedSet = new Set(removed);
|
|
209
|
+
for (const relPath of journal.removed) {
|
|
210
|
+
if (existing.has(relPath)) removedSet.add(relPath);
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
return { changed, removed: [...removedSet], isFullBuild: false };
|
|
214
|
+
}
|
|
215
|
+
debug(
|
|
216
|
+
`Tier 0: skipped (${hasJournalEntries ? 'timestamp stale' : 'no entries'}), falling to Tier 1`,
|
|
217
|
+
);
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// ── Tier 1: mtime+size fast-path ─────────────────────────────────
|
|
221
|
+
const needsHash = []; // Files that failed mtime+size check
|
|
222
|
+
const skipped = []; // Files that passed mtime+size check
|
|
115
223
|
|
|
116
224
|
for (const file of allFiles) {
|
|
117
225
|
const relPath = normalizePath(path.relative(rootDir, file));
|
|
118
|
-
|
|
226
|
+
const record = existing.get(relPath);
|
|
227
|
+
|
|
228
|
+
if (!record) {
|
|
229
|
+
// New file — needs full read+hash
|
|
230
|
+
needsHash.push({ file, relPath });
|
|
231
|
+
continue;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
const stat = fileStat(file);
|
|
235
|
+
if (!stat) continue;
|
|
236
|
+
|
|
237
|
+
const storedMtime = record.mtime || 0;
|
|
238
|
+
const storedSize = record.size || 0;
|
|
239
|
+
|
|
240
|
+
// size > 0 guard: pre-v4 rows have size=0, always fall through to hash
|
|
241
|
+
if (storedSize > 0 && Math.floor(stat.mtimeMs) === storedMtime && stat.size === storedSize) {
|
|
242
|
+
skipped.push(relPath);
|
|
243
|
+
continue;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
needsHash.push({ file, relPath, stat });
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
if (needsHash.length > 0) {
|
|
250
|
+
debug(`Tier 1: ${skipped.length} skipped by mtime+size, ${needsHash.length} need hash check`);
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// ── Tier 2: Hash comparison ──────────────────────────────────────
|
|
254
|
+
const changed = [];
|
|
119
255
|
|
|
256
|
+
for (const item of needsHash) {
|
|
120
257
|
let content;
|
|
121
258
|
try {
|
|
122
|
-
content = fs.readFileSync(file, 'utf-8');
|
|
259
|
+
content = fs.readFileSync(item.file, 'utf-8');
|
|
123
260
|
} catch {
|
|
124
261
|
continue;
|
|
125
262
|
}
|
|
126
263
|
const hash = fileHash(content);
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
264
|
+
const stat = item.stat || fileStat(item.file);
|
|
265
|
+
const record = existing.get(item.relPath);
|
|
266
|
+
|
|
267
|
+
if (!record || record.hash !== hash) {
|
|
268
|
+
changed.push({ file: item.file, content, hash, relPath: item.relPath, stat });
|
|
269
|
+
} else if (stat) {
|
|
270
|
+
// Hash matches but mtime/size was stale — self-heal by updating stored metadata
|
|
271
|
+
changed.push({
|
|
272
|
+
file: item.file,
|
|
273
|
+
content,
|
|
274
|
+
hash,
|
|
275
|
+
relPath: item.relPath,
|
|
276
|
+
stat,
|
|
277
|
+
metadataOnly: true,
|
|
278
|
+
});
|
|
130
279
|
}
|
|
131
280
|
}
|
|
132
281
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
282
|
+
// Filter out metadata-only updates from the "changed" list for parsing,
|
|
283
|
+
// but keep them so the caller can update file_hashes
|
|
284
|
+
const parseChanged = changed.filter((c) => !c.metadataOnly);
|
|
285
|
+
if (needsHash.length > 0) {
|
|
286
|
+
debug(
|
|
287
|
+
`Tier 2: ${parseChanged.length} actually changed, ${changed.length - parseChanged.length} metadata-only`,
|
|
288
|
+
);
|
|
138
289
|
}
|
|
139
290
|
|
|
140
291
|
return { changed, removed, isFullBuild: false };
|
|
@@ -180,9 +331,33 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
180
331
|
? getChangedFiles(db, files, rootDir)
|
|
181
332
|
: { changed: files.map((f) => ({ file: f })), removed: [], isFullBuild: true };
|
|
182
333
|
|
|
183
|
-
|
|
334
|
+
// Separate metadata-only updates (mtime/size self-heal) from real changes
|
|
335
|
+
const parseChanges = changed.filter((c) => !c.metadataOnly);
|
|
336
|
+
const metadataUpdates = changed.filter((c) => c.metadataOnly);
|
|
337
|
+
|
|
338
|
+
if (!isFullBuild && parseChanges.length === 0 && removed.length === 0) {
|
|
339
|
+
// Still update metadata for self-healing even when no real changes
|
|
340
|
+
if (metadataUpdates.length > 0) {
|
|
341
|
+
try {
|
|
342
|
+
const healHash = db.prepare(
|
|
343
|
+
'INSERT OR REPLACE INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)',
|
|
344
|
+
);
|
|
345
|
+
const healTx = db.transaction(() => {
|
|
346
|
+
for (const item of metadataUpdates) {
|
|
347
|
+
const mtime = item.stat ? Math.floor(item.stat.mtimeMs) : 0;
|
|
348
|
+
const size = item.stat ? item.stat.size : 0;
|
|
349
|
+
healHash.run(item.relPath, item.hash, mtime, size);
|
|
350
|
+
}
|
|
351
|
+
});
|
|
352
|
+
healTx();
|
|
353
|
+
debug(`Self-healed mtime/size for ${metadataUpdates.length} files`);
|
|
354
|
+
} catch {
|
|
355
|
+
/* ignore heal errors */
|
|
356
|
+
}
|
|
357
|
+
}
|
|
184
358
|
console.log('No changes detected. Graph is up to date.');
|
|
185
359
|
db.close();
|
|
360
|
+
writeJournalHeader(rootDir, Date.now());
|
|
186
361
|
return;
|
|
187
362
|
}
|
|
188
363
|
|
|
@@ -191,7 +366,7 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
191
366
|
'PRAGMA foreign_keys = OFF; DELETE FROM node_metrics; DELETE FROM edges; DELETE FROM nodes; PRAGMA foreign_keys = ON;',
|
|
192
367
|
);
|
|
193
368
|
} else {
|
|
194
|
-
console.log(`Incremental: ${
|
|
369
|
+
console.log(`Incremental: ${parseChanges.length} changed, ${removed.length} removed`);
|
|
195
370
|
// Remove metrics/edges/nodes for changed and removed files
|
|
196
371
|
const deleteNodesForFile = db.prepare('DELETE FROM nodes WHERE file = ?');
|
|
197
372
|
const deleteEdgesForFile = db.prepare(`
|
|
@@ -206,7 +381,7 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
206
381
|
deleteMetricsForFile.run(relPath);
|
|
207
382
|
deleteNodesForFile.run(relPath);
|
|
208
383
|
}
|
|
209
|
-
for (const item of
|
|
384
|
+
for (const item of parseChanges) {
|
|
210
385
|
const relPath = item.relPath || normalizePath(path.relative(rootDir, item.file));
|
|
211
386
|
deleteEdgesForFile.run({ f: relPath });
|
|
212
387
|
deleteMetricsForFile.run(relPath);
|
|
@@ -224,11 +399,11 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
224
399
|
'INSERT INTO edges (source_id, target_id, kind, confidence, dynamic) VALUES (?, ?, ?, ?, ?)',
|
|
225
400
|
);
|
|
226
401
|
|
|
227
|
-
// Prepare hash upsert
|
|
402
|
+
// Prepare hash upsert (with size column from migration v4)
|
|
228
403
|
let upsertHash;
|
|
229
404
|
try {
|
|
230
405
|
upsertHash = db.prepare(
|
|
231
|
-
'INSERT OR REPLACE INTO file_hashes (file, hash, mtime) VALUES (?, ?, ?)',
|
|
406
|
+
'INSERT OR REPLACE INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)',
|
|
232
407
|
);
|
|
233
408
|
} catch {
|
|
234
409
|
upsertHash = null;
|
|
@@ -246,17 +421,17 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
246
421
|
// We'll fill these in during the parse pass + edge pass
|
|
247
422
|
}
|
|
248
423
|
|
|
249
|
-
const filesToParse = isFullBuild ? files.map((f) => ({ file: f })) :
|
|
424
|
+
const filesToParse = isFullBuild ? files.map((f) => ({ file: f })) : parseChanges;
|
|
250
425
|
|
|
251
426
|
// ── Unified parse via parseFilesAuto ───────────────────────────────
|
|
252
427
|
const filePaths = filesToParse.map((item) => item.file);
|
|
253
428
|
const allSymbols = await parseFilesAuto(filePaths, rootDir, engineOpts);
|
|
254
429
|
|
|
255
|
-
// Build a
|
|
256
|
-
const
|
|
430
|
+
// Build a lookup from incremental data (changed items may carry pre-computed hashes + stats)
|
|
431
|
+
const precomputedData = new Map();
|
|
257
432
|
for (const item of filesToParse) {
|
|
258
|
-
if (item.
|
|
259
|
-
|
|
433
|
+
if (item.relPath) {
|
|
434
|
+
precomputedData.set(item.relPath, item);
|
|
260
435
|
}
|
|
261
436
|
}
|
|
262
437
|
|
|
@@ -272,11 +447,14 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
272
447
|
insertNode.run(exp.name, exp.kind, relPath, exp.line, null);
|
|
273
448
|
}
|
|
274
449
|
|
|
275
|
-
// Update file hash for incremental builds
|
|
450
|
+
// Update file hash with real mtime+size for incremental builds
|
|
276
451
|
if (upsertHash) {
|
|
277
|
-
const
|
|
278
|
-
if (
|
|
279
|
-
|
|
452
|
+
const precomputed = precomputedData.get(relPath);
|
|
453
|
+
if (precomputed?.hash) {
|
|
454
|
+
const stat = precomputed.stat || fileStat(path.join(rootDir, relPath));
|
|
455
|
+
const mtime = stat ? Math.floor(stat.mtimeMs) : 0;
|
|
456
|
+
const size = stat ? stat.size : 0;
|
|
457
|
+
upsertHash.run(relPath, precomputed.hash, mtime, size);
|
|
280
458
|
} else {
|
|
281
459
|
const absPath = path.join(rootDir, relPath);
|
|
282
460
|
let code;
|
|
@@ -286,11 +464,23 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
286
464
|
code = null;
|
|
287
465
|
}
|
|
288
466
|
if (code !== null) {
|
|
289
|
-
|
|
467
|
+
const stat = fileStat(absPath);
|
|
468
|
+
const mtime = stat ? Math.floor(stat.mtimeMs) : 0;
|
|
469
|
+
const size = stat ? stat.size : 0;
|
|
470
|
+
upsertHash.run(relPath, fileHash(code), mtime, size);
|
|
290
471
|
}
|
|
291
472
|
}
|
|
292
473
|
}
|
|
293
474
|
}
|
|
475
|
+
|
|
476
|
+
// Also update metadata-only entries (self-heal mtime/size without re-parse)
|
|
477
|
+
if (upsertHash) {
|
|
478
|
+
for (const item of metadataUpdates) {
|
|
479
|
+
const mtime = item.stat ? Math.floor(item.stat.mtimeMs) : 0;
|
|
480
|
+
const size = item.stat ? item.stat.size : 0;
|
|
481
|
+
upsertHash.run(item.relPath, item.hash, mtime, size);
|
|
482
|
+
}
|
|
483
|
+
}
|
|
294
484
|
});
|
|
295
485
|
insertAll();
|
|
296
486
|
|
|
@@ -458,7 +648,9 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
458
648
|
}
|
|
459
649
|
|
|
460
650
|
// Call edges with confidence scoring — using pre-loaded lookup maps (N+1 fix)
|
|
651
|
+
const seenCallEdges = new Set();
|
|
461
652
|
for (const call of symbols.calls) {
|
|
653
|
+
if (call.receiver && BUILTIN_RECEIVERS.has(call.receiver)) continue;
|
|
462
654
|
let caller = null;
|
|
463
655
|
for (const def of symbols.definitions) {
|
|
464
656
|
if (def.line <= call.line) {
|
|
@@ -493,10 +685,18 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
493
685
|
);
|
|
494
686
|
if (methodCandidates.length > 0) {
|
|
495
687
|
targets = methodCandidates;
|
|
496
|
-
} else
|
|
497
|
-
|
|
498
|
-
|
|
688
|
+
} else if (
|
|
689
|
+
!call.receiver ||
|
|
690
|
+
call.receiver === 'this' ||
|
|
691
|
+
call.receiver === 'self' ||
|
|
692
|
+
call.receiver === 'super'
|
|
693
|
+
) {
|
|
694
|
+
// Scoped fallback — same-dir or parent-dir only, not global
|
|
695
|
+
targets = (nodesByName.get(call.name) || []).filter(
|
|
696
|
+
(n) => computeConfidence(relPath, n.file, null) >= 0.5,
|
|
697
|
+
);
|
|
499
698
|
}
|
|
699
|
+
// else: method call on a receiver — skip global fallback entirely
|
|
500
700
|
}
|
|
501
701
|
}
|
|
502
702
|
|
|
@@ -509,7 +709,9 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
509
709
|
}
|
|
510
710
|
|
|
511
711
|
for (const t of targets) {
|
|
512
|
-
|
|
712
|
+
const edgeKey = `${caller.id}|${t.id}`;
|
|
713
|
+
if (t.id !== caller.id && !seenCallEdges.has(edgeKey)) {
|
|
714
|
+
seenCallEdges.add(edgeKey);
|
|
513
715
|
const confidence = computeConfidence(relPath, t.file, importedFrom);
|
|
514
716
|
insertEdge.run(caller.id, t.id, 'calls', confidence, isDynamic);
|
|
515
717
|
edgeCount++;
|
|
@@ -582,6 +784,9 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
582
784
|
console.log(`Stored in ${dbPath}`);
|
|
583
785
|
db.close();
|
|
584
786
|
|
|
787
|
+
// Write journal header after successful build
|
|
788
|
+
writeJournalHeader(rootDir, Date.now());
|
|
789
|
+
|
|
585
790
|
if (!opts.skipRegistry) {
|
|
586
791
|
const tmpDir = path.resolve(os.tmpdir());
|
|
587
792
|
const resolvedRoot = path.resolve(rootDir);
|
package/src/cli.js
CHANGED
|
@@ -11,6 +11,7 @@ import { buildEmbeddings, MODELS, search } from './embedder.js';
|
|
|
11
11
|
import { exportDOT, exportJSON, exportMermaid } from './export.js';
|
|
12
12
|
import { setVerbose } from './logger.js';
|
|
13
13
|
import {
|
|
14
|
+
context,
|
|
14
15
|
diffImpact,
|
|
15
16
|
fileDeps,
|
|
16
17
|
fnDeps,
|
|
@@ -130,6 +131,25 @@ program
|
|
|
130
131
|
});
|
|
131
132
|
});
|
|
132
133
|
|
|
134
|
+
program
|
|
135
|
+
.command('context <name>')
|
|
136
|
+
.description('Full context for a function: source, deps, callers, tests, signature')
|
|
137
|
+
.option('-d, --db <path>', 'Path to graph.db')
|
|
138
|
+
.option('--depth <n>', 'Include callee source up to N levels deep', '0')
|
|
139
|
+
.option('--no-source', 'Metadata only (skip source extraction)')
|
|
140
|
+
.option('--include-tests', 'Include test source code')
|
|
141
|
+
.option('-T, --no-tests', 'Exclude test files from callers')
|
|
142
|
+
.option('-j, --json', 'Output as JSON')
|
|
143
|
+
.action((name, opts) => {
|
|
144
|
+
context(name, opts.db, {
|
|
145
|
+
depth: parseInt(opts.depth, 10),
|
|
146
|
+
noSource: !opts.source,
|
|
147
|
+
noTests: !opts.tests,
|
|
148
|
+
includeTests: opts.includeTests,
|
|
149
|
+
json: opts.json,
|
|
150
|
+
});
|
|
151
|
+
});
|
|
152
|
+
|
|
133
153
|
program
|
|
134
154
|
.command('diff-impact [ref]')
|
|
135
155
|
.description('Show impact of git changes (unstaged, staged, or vs a ref)')
|
package/src/db.js
CHANGED
package/src/extractors/csharp.js
CHANGED
|
@@ -186,7 +186,12 @@ export function extractCSharpSymbols(tree, _filePath) {
|
|
|
186
186
|
calls.push({ name: fn.text, line: node.startPosition.row + 1 });
|
|
187
187
|
} else if (fn.type === 'member_access_expression') {
|
|
188
188
|
const name = fn.childForFieldName('name');
|
|
189
|
-
if (name)
|
|
189
|
+
if (name) {
|
|
190
|
+
const expr = fn.childForFieldName('expression');
|
|
191
|
+
const call = { name: name.text, line: node.startPosition.row + 1 };
|
|
192
|
+
if (expr) call.receiver = expr.text;
|
|
193
|
+
calls.push(call);
|
|
194
|
+
}
|
|
190
195
|
} else if (fn.type === 'generic_name' || fn.type === 'member_binding_expression') {
|
|
191
196
|
const name = fn.childForFieldName('name') || fn.child(0);
|
|
192
197
|
if (name) calls.push({ name: name.text, line: node.startPosition.row + 1 });
|
package/src/extractors/go.js
CHANGED
|
@@ -152,7 +152,12 @@ export function extractGoSymbols(tree, _filePath) {
|
|
|
152
152
|
calls.push({ name: fn.text, line: node.startPosition.row + 1 });
|
|
153
153
|
} else if (fn.type === 'selector_expression') {
|
|
154
154
|
const field = fn.childForFieldName('field');
|
|
155
|
-
if (field)
|
|
155
|
+
if (field) {
|
|
156
|
+
const operand = fn.childForFieldName('operand');
|
|
157
|
+
const call = { name: field.text, line: node.startPosition.row + 1 };
|
|
158
|
+
if (operand) call.receiver = operand.text;
|
|
159
|
+
calls.push(call);
|
|
160
|
+
}
|
|
156
161
|
}
|
|
157
162
|
}
|
|
158
163
|
break;
|
package/src/extractors/java.js
CHANGED
|
@@ -203,7 +203,10 @@ export function extractJavaSymbols(tree, _filePath) {
|
|
|
203
203
|
case 'method_invocation': {
|
|
204
204
|
const nameNode = node.childForFieldName('name');
|
|
205
205
|
if (nameNode) {
|
|
206
|
-
|
|
206
|
+
const obj = node.childForFieldName('object');
|
|
207
|
+
const call = { name: nameNode.text, line: node.startPosition.row + 1 };
|
|
208
|
+
if (obj) call.receiver = obj.text;
|
|
209
|
+
calls.push(call);
|
|
207
210
|
}
|
|
208
211
|
break;
|
|
209
212
|
}
|