@optave/codegraph 2.1.1-dev.3c12b64 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +49 -31
- package/package.json +5 -5
- package/src/builder.js +238 -33
- package/src/cli.js +93 -9
- package/src/cycles.js +13 -1
- package/src/db.js +4 -0
- package/src/export.js +20 -7
- package/src/extractors/csharp.js +6 -1
- package/src/extractors/go.js +6 -1
- package/src/extractors/java.js +4 -1
- package/src/extractors/javascript.js +145 -5
- package/src/extractors/php.js +8 -2
- package/src/extractors/python.js +8 -1
- package/src/extractors/ruby.js +4 -1
- package/src/extractors/rust.js +12 -2
- package/src/index.js +6 -0
- package/src/journal.js +109 -0
- package/src/mcp.js +131 -7
- package/src/parser.js +1 -0
- package/src/queries.js +1143 -38
- package/src/structure.js +21 -7
- package/src/watcher.js +25 -0
package/README.md
CHANGED
|
@@ -50,14 +50,13 @@ Most tools in this space can't do that:
|
|
|
50
50
|
| **Heavy infrastructure that's slow to restart** | code-graph-rag (Memgraph), axon (KuzuDB), badger-graph (Dgraph) | External databases add latency to every write. Bulk-inserting a full graph into Memgraph is not a sub-second operation |
|
|
51
51
|
| **No persistence between runs** | pyan, cflow | Re-parse from scratch every time. No database, no delta, no incremental anything |
|
|
52
52
|
|
|
53
|
-
**Codegraph solves this with incremental
|
|
53
|
+
**Codegraph solves this with three-tier incremental change detection:**
|
|
54
54
|
|
|
55
|
-
1.
|
|
56
|
-
2.
|
|
57
|
-
3.
|
|
58
|
-
4. Everything else is untouched
|
|
55
|
+
1. **Tier 0 — Journal (O(changed)):** If `codegraph watch` was running, a change journal records exactly which files were touched. The next build reads the journal and only processes those files — zero filesystem scanning
|
|
56
|
+
2. **Tier 1 — mtime+size (O(n) stats, O(changed) reads):** No journal? Codegraph stats every file and compares mtime + size against stored values. Matching files are skipped without reading a single byte — 10-100x cheaper than hashing
|
|
57
|
+
3. **Tier 2 — Hash (O(changed) reads):** Files that fail the mtime/size check are read and MD5-hashed. Only files whose hash actually changed get re-parsed and re-inserted
|
|
59
58
|
|
|
60
|
-
**Result:** change one file in a 3,000-file project → rebuild completes in **under a second**. Put it in a commit hook, a file watcher, or let your AI agent trigger it. The graph is always current.
|
|
59
|
+
**Result:** change one file in a 3,000-file project → rebuild completes in **under a second**. With watch mode active, rebuilds are near-instant — the journal makes the build proportional to the number of changed files, not the size of the codebase. Put it in a commit hook, a file watcher, or let your AI agent trigger it. The graph is always current.
|
|
61
60
|
|
|
62
61
|
And because the core pipeline is pure local computation (tree-sitter + SQLite), there are no API calls, no network latency, and no cost. LLM-powered features (semantic search, richer embeddings) are a separate optional layer — they enhance the graph but never block it from being current.
|
|
63
62
|
|
|
@@ -80,7 +79,7 @@ Most code graph tools make you choose: **fast local analysis with no AI, or powe
|
|
|
80
79
|
| Git diff impact | **Yes** | — | — | — | — | **Yes** | — | **Yes** |
|
|
81
80
|
| Watch mode | **Yes** | — | **Yes** | — | — | — | — | — |
|
|
82
81
|
| Cycle detection | **Yes** | — | **Yes** | — | — | — | — | **Yes** |
|
|
83
|
-
| Incremental rebuilds | **
|
|
82
|
+
| Incremental rebuilds | **O(changed)** | — | O(n) Merkle | — | — | — | — | — |
|
|
84
83
|
| Zero config | **Yes** | — | **Yes** | — | — | — | — | — |
|
|
85
84
|
| Embeddable JS library (`npm install`) | **Yes** | — | — | — | — | — | — | — |
|
|
86
85
|
| LLM-optional (works without API keys) | **Yes** | **Yes** | **Yes** | — | **Yes** | **Yes** | **Yes** | **Yes** |
|
|
@@ -91,22 +90,22 @@ Most code graph tools make you choose: **fast local analysis with no AI, or powe
|
|
|
91
90
|
|
|
92
91
|
| | Differentiator | In practice |
|
|
93
92
|
|---|---|---|
|
|
94
|
-
| **⚡** | **Always-fresh graph** |
|
|
93
|
+
| **⚡** | **Always-fresh graph** | Three-tier change detection: journal (O(changed)) → mtime+size (O(n) stats) → hash (O(changed) reads). Sub-second rebuilds even on large codebases. Competitors re-index everything from scratch; Merkle-tree approaches still require O(n) filesystem scanning |
|
|
95
94
|
| **🔓** | **Zero-cost core, LLM-enhanced when you want** | Full graph analysis with no API keys, no accounts, no cost. Optionally bring your own LLM provider for richer embeddings and AI-powered search — your code only goes to the provider you already chose |
|
|
96
95
|
| **🔬** | **Function-level, not just files** | Traces `handleAuth()` → `validateToken()` → `decryptJWT()` and shows 14 callers across 9 files break if `decryptJWT` changes |
|
|
97
|
-
| **🤖** | **Built for AI agents** |
|
|
96
|
+
| **🤖** | **Built for AI agents** | 17-tool [MCP server](https://modelcontextprotocol.io/) — AI assistants query your graph directly. Single-repo by default, your code doesn't leak to other projects |
|
|
98
97
|
| **🌐** | **Multi-language, one CLI** | JS/TS + Python + Go + Rust + Java + C# + PHP + Ruby + HCL in a single graph — no juggling Madge, pyan, and cflow |
|
|
99
98
|
| **💥** | **Git diff impact** | `codegraph diff-impact` shows changed functions, their callers, and full blast radius — ships with a GitHub Actions workflow |
|
|
100
99
|
| **🧠** | **Semantic search** | Local embeddings by default, LLM-powered embeddings when opted in — multi-query with RRF ranking via `"auth; token; JWT"` |
|
|
101
100
|
|
|
102
101
|
### How other tools compare
|
|
103
102
|
|
|
104
|
-
The key question is: **can you rebuild your graph on every commit in a large codebase without it costing money or taking minutes?** Most tools in this space either re-index everything from scratch (slow), require cloud API calls for core features (costly), or both. Codegraph's incremental
|
|
103
|
+
The key question is: **can you rebuild your graph on every commit in a large codebase without it costing money or taking minutes?** Most tools in this space either re-index everything from scratch (slow), require cloud API calls for core features (costly), or both. Codegraph's three-tier incremental detection achieves true O(changed) in the best case — when the watcher is running, rebuilds are proportional only to the number of files that changed, not the size of the codebase. The core pipeline needs no API keys at all. LLM-powered features are opt-in, using whichever provider you already work with.
|
|
105
104
|
|
|
106
105
|
| Tool | What it does well | The tradeoff |
|
|
107
106
|
|---|---|---|
|
|
108
107
|
| [joern](https://github.com/joernio/joern) | Full CPG (AST + CFG + PDG) for vulnerability discovery, Scala query DSL, 14 languages, daily releases | No incremental builds — full re-parse on every change. Requires JDK 21, no built-in MCP, no watch mode |
|
|
109
|
-
| [narsil-mcp](https://github.com/postrv/narsil-mcp) | 90 MCP tools, 32 languages, taint analysis, SBOM, dead code, neural search, Merkle-tree incremental indexing, single ~30MB binary | Primarily MCP-only — no standalone CLI query interface. Neural search requires API key or ONNX source build |
|
|
108
|
+
| [narsil-mcp](https://github.com/postrv/narsil-mcp) | 90 MCP tools, 32 languages, taint analysis, SBOM, dead code, neural search, Merkle-tree incremental indexing, single ~30MB binary | Merkle trees still require O(n) filesystem scanning on every rebuild. Primarily MCP-only — no standalone CLI query interface. Neural search requires API key or ONNX source build |
|
|
110
109
|
| [code-graph-rag](https://github.com/vitali87/code-graph-rag) | Graph RAG with Memgraph, multi-provider AI, semantic search, code editing via AST | No incremental rebuilds — full re-index + re-embed through cloud APIs on every change. Requires Docker |
|
|
111
110
|
| [cpg](https://github.com/Fraunhofer-AISEC/cpg) | Formal Code Property Graph (AST + CFG + PDG + DFG), ~10 languages, MCP module, LLVM IR support, academic specifications | No incremental builds. Requires JVM + Gradle, no zero config, no watch mode |
|
|
112
111
|
| [GitNexus](https://github.com/abhigyanpatwari/GitNexus) | Knowledge graph with precomputed structural intelligence, 7 MCP tools, hybrid search (BM25 + semantic + RRF), clustering, process tracing | Full 6-phase pipeline re-run on changes. KuzuDB graph DB, browser mode limited to ~5,000 files. **PolyForm NC — no commercial use** |
|
|
@@ -133,15 +132,16 @@ Here is a cold, analytical breakdown to help you decide which tool fits your wor
|
|
|
133
132
|
| Aspect | Optave Codegraph | Narsil-MCP |
|
|
134
133
|
| :--- | :--- | :--- |
|
|
135
134
|
| **Philosophy** | Lean, deterministic, AI-optimized | Comprehensive, feature-dense |
|
|
136
|
-
| **AI Tool Count** |
|
|
135
|
+
| **AI Tool Count** | 17 focused tools | 90 distinct tools |
|
|
137
136
|
| **Language Support** | 11 languages | 32 languages |
|
|
138
137
|
| **Primary Interface** | CLI-first with MCP integration | MCP-first (CLI is secondary) |
|
|
139
138
|
| **Supply Chain Risk** | Low (minimal dependency tree) | Higher (requires massive dependency graph for embedded ML/scanners) |
|
|
140
|
-
| **Graph Updates** |
|
|
139
|
+
| **Graph Updates** | **Three-tier O(changed)** — journal → mtime+size → hash. With watch mode, only changed files are touched | Merkle trees — O(n) filesystem scan on every rebuild to recompute tree hashes |
|
|
141
140
|
|
|
142
141
|
#### Choose Codegraph if:
|
|
143
142
|
|
|
144
|
-
* **You
|
|
143
|
+
* **You need the fastest possible incremental rebuilds.** Codegraph’s three-tier change detection (journal → mtime+size → hash) achieves true O(changed) when the watcher is running — only touched files are processed. Narsil’s Merkle trees still require O(n) filesystem scanning to recompute hashes on every rebuild, even when nothing changed. On a 3,000-file project, this is the difference between near-instant and noticeable.
|
|
144
|
+
* **You want to optimize AI agent reasoning.** Large Language Models degrade in performance and hallucinate when overwhelmed with choices. Codegraph’s tight 17-tool surface area ensures agents quickly understand their capabilities without wasting context window tokens.
|
|
145
145
|
* **You are concerned about supply chain attacks.** To support 90 tools, SBOMs, and neural embeddings, a tool must pull in a massive dependency tree. Codegraph keeps its dependencies minimal, dramatically reducing the risk of malicious code sneaking onto your machine.
|
|
146
146
|
* **You want deterministic blast-radius checks.** Features like `diff-impact` are built specifically to tell you exactly how a changed function cascades through your codebase before you merge a PR.
|
|
147
147
|
* **You value a strong standalone CLI.** You want to query your code graph locally without necessarily spinning up an AI agent.
|
|
@@ -190,7 +190,7 @@ codegraph deps src/index.ts # file-level import/export map
|
|
|
190
190
|
| 📤 | **Export** | DOT (Graphviz), Mermaid, and JSON graph export |
|
|
191
191
|
| 🧠 | **Semantic search** | Embeddings-powered natural language search with multi-query RRF ranking |
|
|
192
192
|
| 👀 | **Watch mode** | Incrementally update the graph as files change |
|
|
193
|
-
| 🤖 | **MCP server** |
|
|
193
|
+
| 🤖 | **MCP server** | 17-tool MCP server for AI assistants; single-repo by default, opt-in multi-repo |
|
|
194
194
|
| 🔒 | **Your code, your choice** | Zero-cost core with no API keys. Optionally enhance with your LLM provider — your code only goes where you send it |
|
|
195
195
|
|
|
196
196
|
## 📦 Commands
|
|
@@ -376,21 +376,22 @@ Dynamic patterns like `fn.call()`, `fn.apply()`, `fn.bind()`, and `obj["method"]
|
|
|
376
376
|
|
|
377
377
|
## 📊 Performance
|
|
378
378
|
|
|
379
|
-
|
|
379
|
+
Self-measured on every release via CI ([full history](generated/BENCHMARKS.md)):
|
|
380
380
|
|
|
381
|
-
| Metric |
|
|
381
|
+
| Metric | Latest |
|
|
382
382
|
|---|---|
|
|
383
|
-
| Build
|
|
384
|
-
|
|
|
385
|
-
|
|
|
386
|
-
|
|
|
387
|
-
|
|
383
|
+
| Build speed (native) | **2.5 ms/file** |
|
|
384
|
+
| Build speed (WASM) | **5 ms/file** |
|
|
385
|
+
| Query time | **1ms** |
|
|
386
|
+
| ~50,000 files (est.) | **~125.0s build** |
|
|
387
|
+
|
|
388
|
+
Metrics are normalized per file for cross-version comparability. Times above are for a full initial build — incremental rebuilds only re-parse changed files.
|
|
388
389
|
|
|
389
390
|
## 🤖 AI Agent Integration
|
|
390
391
|
|
|
391
392
|
### MCP Server
|
|
392
393
|
|
|
393
|
-
Codegraph includes a built-in [Model Context Protocol](https://modelcontextprotocol.io/) server with
|
|
394
|
+
Codegraph includes a built-in [Model Context Protocol](https://modelcontextprotocol.io/) server with 17 tools, so AI assistants can query your dependency graph directly:
|
|
394
395
|
|
|
395
396
|
```bash
|
|
396
397
|
codegraph mcp # Single-repo mode (default) — only local project
|
|
@@ -404,20 +405,35 @@ codegraph mcp --repos a,b # Multi-repo with allowlist
|
|
|
404
405
|
|
|
405
406
|
### CLAUDE.md / Agent Instructions
|
|
406
407
|
|
|
407
|
-
Add this to your project's `CLAUDE.md` to help AI agents use codegraph:
|
|
408
|
+
Add this to your project's `CLAUDE.md` to help AI agents use codegraph (full template in the [AI Agent Guide](docs/ai-agent-guide.md#claudemd-template)):
|
|
408
409
|
|
|
409
410
|
```markdown
|
|
410
411
|
## Code Navigation
|
|
411
412
|
|
|
412
413
|
This project uses codegraph. The database is at `.codegraph/graph.db`.
|
|
413
414
|
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
415
|
+
### Before modifying code, always:
|
|
416
|
+
1. `codegraph where <name>` — find where the symbol lives
|
|
417
|
+
2. `codegraph explain <file-or-function>` — understand the structure
|
|
418
|
+
3. `codegraph context <name> -T` — get full context (source, deps, callers)
|
|
419
|
+
4. `codegraph fn-impact <name> -T` — check blast radius before editing
|
|
420
|
+
|
|
421
|
+
### After modifying code:
|
|
422
|
+
5. `codegraph diff-impact --staged -T` — verify impact before committing
|
|
423
|
+
|
|
424
|
+
### Other useful commands
|
|
425
|
+
- `codegraph build .` — rebuild the graph (incremental by default)
|
|
426
|
+
- `codegraph map` — module overview
|
|
427
|
+
- `codegraph fn <name> -T` — function call chain
|
|
428
|
+
- `codegraph deps <file>` — file-level dependencies
|
|
429
|
+
- `codegraph search "<query>"` — semantic search (requires `codegraph embed`)
|
|
430
|
+
- `codegraph cycles` — check for circular dependencies
|
|
431
|
+
|
|
432
|
+
### Flags
|
|
433
|
+
- `-T` / `--no-tests` — exclude test files (use by default)
|
|
434
|
+
- `-j` / `--json` — JSON output for programmatic use
|
|
435
|
+
- `-f, --file <path>` — scope to a specific file
|
|
436
|
+
- `-k, --kind <kind>` — filter by symbol kind
|
|
421
437
|
|
|
422
438
|
### Semantic search
|
|
423
439
|
|
|
@@ -455,6 +471,8 @@ See **[docs/recommended-practices.md](docs/recommended-practices.md)** for integ
|
|
|
455
471
|
- **Developer workflow** — watch mode, explore-before-you-edit, semantic search
|
|
456
472
|
- **Secure credentials** — `apiKeyCommand` with 1Password, Bitwarden, Vault, macOS Keychain, `pass`
|
|
457
473
|
|
|
474
|
+
For AI-specific integration, see the **[AI Agent Guide](docs/ai-agent-guide.md)** — a comprehensive reference covering the 6-step agent workflow, complete command-to-MCP mapping, Claude Code hooks, and token-saving patterns.
|
|
475
|
+
|
|
458
476
|
## 🔁 CI / GitHub Actions
|
|
459
477
|
|
|
460
478
|
Codegraph ships with a ready-to-use GitHub Actions workflow that comments impact analysis on every pull request.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@optave/codegraph",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.2.0",
|
|
4
4
|
"description": "Local code graph CLI — parse codebases with tree-sitter, build dependency graphs, query them",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/index.js",
|
|
@@ -61,10 +61,10 @@
|
|
|
61
61
|
"optionalDependencies": {
|
|
62
62
|
"@huggingface/transformers": "^3.8.1",
|
|
63
63
|
"@modelcontextprotocol/sdk": "^1.0.0",
|
|
64
|
-
"@optave/codegraph-darwin-arm64": "2.
|
|
65
|
-
"@optave/codegraph-darwin-x64": "2.
|
|
66
|
-
"@optave/codegraph-linux-x64-gnu": "2.
|
|
67
|
-
"@optave/codegraph-win32-x64-msvc": "2.
|
|
64
|
+
"@optave/codegraph-darwin-arm64": "2.2.0",
|
|
65
|
+
"@optave/codegraph-darwin-x64": "2.2.0",
|
|
66
|
+
"@optave/codegraph-linux-x64-gnu": "2.2.0",
|
|
67
|
+
"@optave/codegraph-win32-x64-msvc": "2.2.0"
|
|
68
68
|
},
|
|
69
69
|
"devDependencies": {
|
|
70
70
|
"@biomejs/biome": "^2.4.4",
|
package/src/builder.js
CHANGED
|
@@ -5,12 +5,44 @@ import path from 'node:path';
|
|
|
5
5
|
import { loadConfig } from './config.js';
|
|
6
6
|
import { EXTENSIONS, IGNORE_DIRS, normalizePath } from './constants.js';
|
|
7
7
|
import { initSchema, openDb } from './db.js';
|
|
8
|
+
import { readJournal, writeJournalHeader } from './journal.js';
|
|
8
9
|
import { debug, warn } from './logger.js';
|
|
9
10
|
import { getActiveEngine, parseFilesAuto } from './parser.js';
|
|
10
11
|
import { computeConfidence, resolveImportPath, resolveImportsBatch } from './resolve.js';
|
|
11
12
|
|
|
12
13
|
export { resolveImportPath } from './resolve.js';
|
|
13
14
|
|
|
15
|
+
const BUILTIN_RECEIVERS = new Set([
|
|
16
|
+
'console',
|
|
17
|
+
'Math',
|
|
18
|
+
'JSON',
|
|
19
|
+
'Object',
|
|
20
|
+
'Array',
|
|
21
|
+
'String',
|
|
22
|
+
'Number',
|
|
23
|
+
'Boolean',
|
|
24
|
+
'Date',
|
|
25
|
+
'RegExp',
|
|
26
|
+
'Map',
|
|
27
|
+
'Set',
|
|
28
|
+
'WeakMap',
|
|
29
|
+
'WeakSet',
|
|
30
|
+
'Promise',
|
|
31
|
+
'Symbol',
|
|
32
|
+
'Error',
|
|
33
|
+
'TypeError',
|
|
34
|
+
'RangeError',
|
|
35
|
+
'Proxy',
|
|
36
|
+
'Reflect',
|
|
37
|
+
'Intl',
|
|
38
|
+
'globalThis',
|
|
39
|
+
'window',
|
|
40
|
+
'document',
|
|
41
|
+
'process',
|
|
42
|
+
'Buffer',
|
|
43
|
+
'require',
|
|
44
|
+
]);
|
|
45
|
+
|
|
14
46
|
export function collectFiles(dir, files = [], config = {}, directories = null) {
|
|
15
47
|
const trackDirs = directories !== null;
|
|
16
48
|
let entries;
|
|
@@ -81,8 +113,24 @@ function fileHash(content) {
|
|
|
81
113
|
return createHash('md5').update(content).digest('hex');
|
|
82
114
|
}
|
|
83
115
|
|
|
116
|
+
/**
|
|
117
|
+
* Stat a file, returning { mtimeMs, size } or null on error.
|
|
118
|
+
*/
|
|
119
|
+
function fileStat(filePath) {
|
|
120
|
+
try {
|
|
121
|
+
const s = fs.statSync(filePath);
|
|
122
|
+
return { mtimeMs: s.mtimeMs, size: s.size };
|
|
123
|
+
} catch {
|
|
124
|
+
return null;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
84
128
|
/**
|
|
85
129
|
* Determine which files have changed since last build.
|
|
130
|
+
* Three-tier cascade:
|
|
131
|
+
* Tier 0 — Journal: O(changed) when watcher was running
|
|
132
|
+
* Tier 1 — mtime+size: O(n) stats, O(changed) reads
|
|
133
|
+
* Tier 2 — Hash comparison: O(changed) reads (fallback from Tier 1)
|
|
86
134
|
*/
|
|
87
135
|
function getChangedFiles(db, allFiles, rootDir) {
|
|
88
136
|
// Check if file_hashes table exists
|
|
@@ -95,7 +143,6 @@ function getChangedFiles(db, allFiles, rootDir) {
|
|
|
95
143
|
}
|
|
96
144
|
|
|
97
145
|
if (!hasTable) {
|
|
98
|
-
// No hash table = first build, everything is new
|
|
99
146
|
return {
|
|
100
147
|
changed: allFiles.map((f) => ({ file: f })),
|
|
101
148
|
removed: [],
|
|
@@ -105,36 +152,140 @@ function getChangedFiles(db, allFiles, rootDir) {
|
|
|
105
152
|
|
|
106
153
|
const existing = new Map(
|
|
107
154
|
db
|
|
108
|
-
.prepare('SELECT file, hash FROM file_hashes')
|
|
155
|
+
.prepare('SELECT file, hash, mtime, size FROM file_hashes')
|
|
109
156
|
.all()
|
|
110
|
-
.map((r) => [r.file, r
|
|
157
|
+
.map((r) => [r.file, r]),
|
|
111
158
|
);
|
|
112
159
|
|
|
113
|
-
|
|
160
|
+
// Build set of current files for removal detection
|
|
114
161
|
const currentFiles = new Set();
|
|
162
|
+
for (const file of allFiles) {
|
|
163
|
+
currentFiles.add(normalizePath(path.relative(rootDir, file)));
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
const removed = [];
|
|
167
|
+
for (const existingFile of existing.keys()) {
|
|
168
|
+
if (!currentFiles.has(existingFile)) {
|
|
169
|
+
removed.push(existingFile);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// ── Tier 0: Journal ──────────────────────────────────────────────
|
|
174
|
+
const journal = readJournal(rootDir);
|
|
175
|
+
if (journal.valid) {
|
|
176
|
+
// Validate journal timestamp against DB — journal should be from after the last build
|
|
177
|
+
const dbMtimes = db.prepare('SELECT MAX(mtime) as latest FROM file_hashes').get();
|
|
178
|
+
const latestDbMtime = dbMtimes?.latest || 0;
|
|
179
|
+
|
|
180
|
+
// Empty journal = no watcher was running, fall to Tier 1 for safety
|
|
181
|
+
const hasJournalEntries = journal.changed.length > 0 || journal.removed.length > 0;
|
|
182
|
+
|
|
183
|
+
if (hasJournalEntries && journal.timestamp >= latestDbMtime) {
|
|
184
|
+
debug(
|
|
185
|
+
`Tier 0: journal valid, ${journal.changed.length} changed, ${journal.removed.length} removed`,
|
|
186
|
+
);
|
|
187
|
+
const changed = [];
|
|
188
|
+
|
|
189
|
+
for (const relPath of journal.changed) {
|
|
190
|
+
const absPath = path.join(rootDir, relPath);
|
|
191
|
+
const stat = fileStat(absPath);
|
|
192
|
+
if (!stat) continue;
|
|
193
|
+
|
|
194
|
+
let content;
|
|
195
|
+
try {
|
|
196
|
+
content = fs.readFileSync(absPath, 'utf-8');
|
|
197
|
+
} catch {
|
|
198
|
+
continue;
|
|
199
|
+
}
|
|
200
|
+
const hash = fileHash(content);
|
|
201
|
+
const record = existing.get(relPath);
|
|
202
|
+
if (!record || record.hash !== hash) {
|
|
203
|
+
changed.push({ file: absPath, content, hash, relPath, stat });
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// Merge journal removals with filesystem removals (dedup)
|
|
208
|
+
const removedSet = new Set(removed);
|
|
209
|
+
for (const relPath of journal.removed) {
|
|
210
|
+
if (existing.has(relPath)) removedSet.add(relPath);
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
return { changed, removed: [...removedSet], isFullBuild: false };
|
|
214
|
+
}
|
|
215
|
+
debug(
|
|
216
|
+
`Tier 0: skipped (${hasJournalEntries ? 'timestamp stale' : 'no entries'}), falling to Tier 1`,
|
|
217
|
+
);
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// ── Tier 1: mtime+size fast-path ─────────────────────────────────
|
|
221
|
+
const needsHash = []; // Files that failed mtime+size check
|
|
222
|
+
const skipped = []; // Files that passed mtime+size check
|
|
115
223
|
|
|
116
224
|
for (const file of allFiles) {
|
|
117
225
|
const relPath = normalizePath(path.relative(rootDir, file));
|
|
118
|
-
|
|
226
|
+
const record = existing.get(relPath);
|
|
227
|
+
|
|
228
|
+
if (!record) {
|
|
229
|
+
// New file — needs full read+hash
|
|
230
|
+
needsHash.push({ file, relPath });
|
|
231
|
+
continue;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
const stat = fileStat(file);
|
|
235
|
+
if (!stat) continue;
|
|
236
|
+
|
|
237
|
+
const storedMtime = record.mtime || 0;
|
|
238
|
+
const storedSize = record.size || 0;
|
|
239
|
+
|
|
240
|
+
// size > 0 guard: pre-v4 rows have size=0, always fall through to hash
|
|
241
|
+
if (storedSize > 0 && Math.floor(stat.mtimeMs) === storedMtime && stat.size === storedSize) {
|
|
242
|
+
skipped.push(relPath);
|
|
243
|
+
continue;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
needsHash.push({ file, relPath, stat });
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
if (needsHash.length > 0) {
|
|
250
|
+
debug(`Tier 1: ${skipped.length} skipped by mtime+size, ${needsHash.length} need hash check`);
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// ── Tier 2: Hash comparison ──────────────────────────────────────
|
|
254
|
+
const changed = [];
|
|
119
255
|
|
|
256
|
+
for (const item of needsHash) {
|
|
120
257
|
let content;
|
|
121
258
|
try {
|
|
122
|
-
content = fs.readFileSync(file, 'utf-8');
|
|
259
|
+
content = fs.readFileSync(item.file, 'utf-8');
|
|
123
260
|
} catch {
|
|
124
261
|
continue;
|
|
125
262
|
}
|
|
126
263
|
const hash = fileHash(content);
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
264
|
+
const stat = item.stat || fileStat(item.file);
|
|
265
|
+
const record = existing.get(item.relPath);
|
|
266
|
+
|
|
267
|
+
if (!record || record.hash !== hash) {
|
|
268
|
+
changed.push({ file: item.file, content, hash, relPath: item.relPath, stat });
|
|
269
|
+
} else if (stat) {
|
|
270
|
+
// Hash matches but mtime/size was stale — self-heal by updating stored metadata
|
|
271
|
+
changed.push({
|
|
272
|
+
file: item.file,
|
|
273
|
+
content,
|
|
274
|
+
hash,
|
|
275
|
+
relPath: item.relPath,
|
|
276
|
+
stat,
|
|
277
|
+
metadataOnly: true,
|
|
278
|
+
});
|
|
130
279
|
}
|
|
131
280
|
}
|
|
132
281
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
282
|
+
// Filter out metadata-only updates from the "changed" list for parsing,
|
|
283
|
+
// but keep them so the caller can update file_hashes
|
|
284
|
+
const parseChanged = changed.filter((c) => !c.metadataOnly);
|
|
285
|
+
if (needsHash.length > 0) {
|
|
286
|
+
debug(
|
|
287
|
+
`Tier 2: ${parseChanged.length} actually changed, ${changed.length - parseChanged.length} metadata-only`,
|
|
288
|
+
);
|
|
138
289
|
}
|
|
139
290
|
|
|
140
291
|
return { changed, removed, isFullBuild: false };
|
|
@@ -180,9 +331,33 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
180
331
|
? getChangedFiles(db, files, rootDir)
|
|
181
332
|
: { changed: files.map((f) => ({ file: f })), removed: [], isFullBuild: true };
|
|
182
333
|
|
|
183
|
-
|
|
334
|
+
// Separate metadata-only updates (mtime/size self-heal) from real changes
|
|
335
|
+
const parseChanges = changed.filter((c) => !c.metadataOnly);
|
|
336
|
+
const metadataUpdates = changed.filter((c) => c.metadataOnly);
|
|
337
|
+
|
|
338
|
+
if (!isFullBuild && parseChanges.length === 0 && removed.length === 0) {
|
|
339
|
+
// Still update metadata for self-healing even when no real changes
|
|
340
|
+
if (metadataUpdates.length > 0) {
|
|
341
|
+
try {
|
|
342
|
+
const healHash = db.prepare(
|
|
343
|
+
'INSERT OR REPLACE INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)',
|
|
344
|
+
);
|
|
345
|
+
const healTx = db.transaction(() => {
|
|
346
|
+
for (const item of metadataUpdates) {
|
|
347
|
+
const mtime = item.stat ? Math.floor(item.stat.mtimeMs) : 0;
|
|
348
|
+
const size = item.stat ? item.stat.size : 0;
|
|
349
|
+
healHash.run(item.relPath, item.hash, mtime, size);
|
|
350
|
+
}
|
|
351
|
+
});
|
|
352
|
+
healTx();
|
|
353
|
+
debug(`Self-healed mtime/size for ${metadataUpdates.length} files`);
|
|
354
|
+
} catch {
|
|
355
|
+
/* ignore heal errors */
|
|
356
|
+
}
|
|
357
|
+
}
|
|
184
358
|
console.log('No changes detected. Graph is up to date.');
|
|
185
359
|
db.close();
|
|
360
|
+
writeJournalHeader(rootDir, Date.now());
|
|
186
361
|
return;
|
|
187
362
|
}
|
|
188
363
|
|
|
@@ -191,7 +366,7 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
191
366
|
'PRAGMA foreign_keys = OFF; DELETE FROM node_metrics; DELETE FROM edges; DELETE FROM nodes; PRAGMA foreign_keys = ON;',
|
|
192
367
|
);
|
|
193
368
|
} else {
|
|
194
|
-
console.log(`Incremental: ${
|
|
369
|
+
console.log(`Incremental: ${parseChanges.length} changed, ${removed.length} removed`);
|
|
195
370
|
// Remove metrics/edges/nodes for changed and removed files
|
|
196
371
|
const deleteNodesForFile = db.prepare('DELETE FROM nodes WHERE file = ?');
|
|
197
372
|
const deleteEdgesForFile = db.prepare(`
|
|
@@ -206,7 +381,7 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
206
381
|
deleteMetricsForFile.run(relPath);
|
|
207
382
|
deleteNodesForFile.run(relPath);
|
|
208
383
|
}
|
|
209
|
-
for (const item of
|
|
384
|
+
for (const item of parseChanges) {
|
|
210
385
|
const relPath = item.relPath || normalizePath(path.relative(rootDir, item.file));
|
|
211
386
|
deleteEdgesForFile.run({ f: relPath });
|
|
212
387
|
deleteMetricsForFile.run(relPath);
|
|
@@ -224,11 +399,11 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
224
399
|
'INSERT INTO edges (source_id, target_id, kind, confidence, dynamic) VALUES (?, ?, ?, ?, ?)',
|
|
225
400
|
);
|
|
226
401
|
|
|
227
|
-
// Prepare hash upsert
|
|
402
|
+
// Prepare hash upsert (with size column from migration v4)
|
|
228
403
|
let upsertHash;
|
|
229
404
|
try {
|
|
230
405
|
upsertHash = db.prepare(
|
|
231
|
-
'INSERT OR REPLACE INTO file_hashes (file, hash, mtime) VALUES (?, ?, ?)',
|
|
406
|
+
'INSERT OR REPLACE INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)',
|
|
232
407
|
);
|
|
233
408
|
} catch {
|
|
234
409
|
upsertHash = null;
|
|
@@ -246,17 +421,17 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
246
421
|
// We'll fill these in during the parse pass + edge pass
|
|
247
422
|
}
|
|
248
423
|
|
|
249
|
-
const filesToParse = isFullBuild ? files.map((f) => ({ file: f })) :
|
|
424
|
+
const filesToParse = isFullBuild ? files.map((f) => ({ file: f })) : parseChanges;
|
|
250
425
|
|
|
251
426
|
// ── Unified parse via parseFilesAuto ───────────────────────────────
|
|
252
427
|
const filePaths = filesToParse.map((item) => item.file);
|
|
253
428
|
const allSymbols = await parseFilesAuto(filePaths, rootDir, engineOpts);
|
|
254
429
|
|
|
255
|
-
// Build a
|
|
256
|
-
const
|
|
430
|
+
// Build a lookup from incremental data (changed items may carry pre-computed hashes + stats)
|
|
431
|
+
const precomputedData = new Map();
|
|
257
432
|
for (const item of filesToParse) {
|
|
258
|
-
if (item.
|
|
259
|
-
|
|
433
|
+
if (item.relPath) {
|
|
434
|
+
precomputedData.set(item.relPath, item);
|
|
260
435
|
}
|
|
261
436
|
}
|
|
262
437
|
|
|
@@ -272,11 +447,14 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
272
447
|
insertNode.run(exp.name, exp.kind, relPath, exp.line, null);
|
|
273
448
|
}
|
|
274
449
|
|
|
275
|
-
// Update file hash for incremental builds
|
|
450
|
+
// Update file hash with real mtime+size for incremental builds
|
|
276
451
|
if (upsertHash) {
|
|
277
|
-
const
|
|
278
|
-
if (
|
|
279
|
-
|
|
452
|
+
const precomputed = precomputedData.get(relPath);
|
|
453
|
+
if (precomputed?.hash) {
|
|
454
|
+
const stat = precomputed.stat || fileStat(path.join(rootDir, relPath));
|
|
455
|
+
const mtime = stat ? Math.floor(stat.mtimeMs) : 0;
|
|
456
|
+
const size = stat ? stat.size : 0;
|
|
457
|
+
upsertHash.run(relPath, precomputed.hash, mtime, size);
|
|
280
458
|
} else {
|
|
281
459
|
const absPath = path.join(rootDir, relPath);
|
|
282
460
|
let code;
|
|
@@ -286,11 +464,23 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
286
464
|
code = null;
|
|
287
465
|
}
|
|
288
466
|
if (code !== null) {
|
|
289
|
-
|
|
467
|
+
const stat = fileStat(absPath);
|
|
468
|
+
const mtime = stat ? Math.floor(stat.mtimeMs) : 0;
|
|
469
|
+
const size = stat ? stat.size : 0;
|
|
470
|
+
upsertHash.run(relPath, fileHash(code), mtime, size);
|
|
290
471
|
}
|
|
291
472
|
}
|
|
292
473
|
}
|
|
293
474
|
}
|
|
475
|
+
|
|
476
|
+
// Also update metadata-only entries (self-heal mtime/size without re-parse)
|
|
477
|
+
if (upsertHash) {
|
|
478
|
+
for (const item of metadataUpdates) {
|
|
479
|
+
const mtime = item.stat ? Math.floor(item.stat.mtimeMs) : 0;
|
|
480
|
+
const size = item.stat ? item.stat.size : 0;
|
|
481
|
+
upsertHash.run(item.relPath, item.hash, mtime, size);
|
|
482
|
+
}
|
|
483
|
+
}
|
|
294
484
|
});
|
|
295
485
|
insertAll();
|
|
296
486
|
|
|
@@ -458,7 +648,9 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
458
648
|
}
|
|
459
649
|
|
|
460
650
|
// Call edges with confidence scoring — using pre-loaded lookup maps (N+1 fix)
|
|
651
|
+
const seenCallEdges = new Set();
|
|
461
652
|
for (const call of symbols.calls) {
|
|
653
|
+
if (call.receiver && BUILTIN_RECEIVERS.has(call.receiver)) continue;
|
|
462
654
|
let caller = null;
|
|
463
655
|
for (const def of symbols.definitions) {
|
|
464
656
|
if (def.line <= call.line) {
|
|
@@ -493,10 +685,18 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
493
685
|
);
|
|
494
686
|
if (methodCandidates.length > 0) {
|
|
495
687
|
targets = methodCandidates;
|
|
496
|
-
} else
|
|
497
|
-
|
|
498
|
-
|
|
688
|
+
} else if (
|
|
689
|
+
!call.receiver ||
|
|
690
|
+
call.receiver === 'this' ||
|
|
691
|
+
call.receiver === 'self' ||
|
|
692
|
+
call.receiver === 'super'
|
|
693
|
+
) {
|
|
694
|
+
// Scoped fallback — same-dir or parent-dir only, not global
|
|
695
|
+
targets = (nodesByName.get(call.name) || []).filter(
|
|
696
|
+
(n) => computeConfidence(relPath, n.file, null) >= 0.5,
|
|
697
|
+
);
|
|
499
698
|
}
|
|
699
|
+
// else: method call on a receiver — skip global fallback entirely
|
|
500
700
|
}
|
|
501
701
|
}
|
|
502
702
|
|
|
@@ -509,7 +709,9 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
509
709
|
}
|
|
510
710
|
|
|
511
711
|
for (const t of targets) {
|
|
512
|
-
|
|
712
|
+
const edgeKey = `${caller.id}|${t.id}`;
|
|
713
|
+
if (t.id !== caller.id && !seenCallEdges.has(edgeKey)) {
|
|
714
|
+
seenCallEdges.add(edgeKey);
|
|
513
715
|
const confidence = computeConfidence(relPath, t.file, importedFrom);
|
|
514
716
|
insertEdge.run(caller.id, t.id, 'calls', confidence, isDynamic);
|
|
515
717
|
edgeCount++;
|
|
@@ -582,6 +784,9 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
582
784
|
console.log(`Stored in ${dbPath}`);
|
|
583
785
|
db.close();
|
|
584
786
|
|
|
787
|
+
// Write journal header after successful build
|
|
788
|
+
writeJournalHeader(rootDir, Date.now());
|
|
789
|
+
|
|
585
790
|
if (!opts.skipRegistry) {
|
|
586
791
|
const tmpDir = path.resolve(os.tmpdir());
|
|
587
792
|
const resolvedRoot = path.resolve(rootDir);
|