@optave/codegraph 3.0.0 → 3.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -195,6 +195,7 @@ Full agent setup: [AI Agent Guide](docs/guides/ai-agent-guide.md) · [CLAU
195
195
  | 👀 | **Watch mode** | Incrementally update the graph as files change |
196
196
  | 🤖 | **MCP server** | 30-tool MCP server for AI assistants; single-repo by default, opt-in multi-repo |
197
197
  | ⚡ | **Always fresh** | Three-tier incremental detection — sub-second rebuilds even on large codebases |
198
+ | 🔬 | **Data flow analysis** | Intraprocedural parameter tracking, return consumers, argument flows, and mutation detection — all 11 languages |
198
199
  | 🧮 | **Complexity metrics** | Cognitive, cyclomatic, nesting depth, Halstead, and Maintainability Index per function |
199
200
  | 🏘️ | **Community detection** | Louvain clustering to discover natural module boundaries and architectural drift |
200
201
  | 📜 | **Manifesto rule engine** | Configurable pass/fail rules with warn/fail thresholds for CI gates via `check` (exit code 1 on fail) |
@@ -208,8 +209,8 @@ Full agent setup: [AI Agent Guide](docs/guides/ai-agent-guide.md) · [CLAU
208
209
  | 📋 | **Composite audit** | Single `audit` command combining explain + impact + health metrics per function — one call instead of 3-4 |
209
210
  | 🚦 | **Triage queue** | `triage` merges connectivity, hotspots, roles, and complexity into a ranked audit priority queue |
210
211
  | 📦 | **Batch querying** | Accept a list of targets and return all results in one JSON payload — enables multi-agent parallel dispatch |
211
- | 🔬 | **Dataflow analysis** | Track how data moves through functions with `flows_to`, `returns`, and `mutates` edges — opt-in via `build --dataflow` (JS/TS) |
212
- | 🧩 | **Control flow graph** | Intraprocedural CFG construction for all 11 languages — `cfg` command with text/DOT/Mermaid output, opt-in via `build --cfg` |
212
+ | 🔬 | **Dataflow analysis** | Track how data moves through functions with `flows_to`, `returns`, and `mutates` edges — all 11 languages, included by default, skip with `--no-dataflow` |
213
+ | 🧩 | **Control flow graph** | Intraprocedural CFG construction for all 11 languages — `cfg` command with text/DOT/Mermaid output, included by default, skip with `--no-cfg` |
213
214
  | 🔎 | **AST node querying** | Stored queryable AST nodes (calls, `new`, string, regex, throw, await) — `ast` command with SQL GLOB pattern matching |
214
215
  | 🧬 | **Expanded node/edge types** | `parameter`, `property`, `constant` node kinds with `parent_id` for sub-declaration queries; `contains`, `parameter_of`, `receiver` edge kinds |
215
216
  | 📊 | **Exports analysis** | `exports <file>` shows all exported symbols with per-symbol consumers, re-export detection, and counts |
@@ -225,6 +226,7 @@ See [docs/examples](docs/examples) for real-world CLI and MCP usage examples.
225
226
  ```bash
226
227
  codegraph build [dir] # Parse and build the dependency graph
227
228
  codegraph build --no-incremental # Force full rebuild
229
+ codegraph build --dataflow # Extract data flow edges (flows_to, returns, mutates)
228
230
  codegraph build --engine wasm # Force WASM engine (skip native)
229
231
  codegraph watch [dir] # Watch for changes, update graph incrementally
230
232
  ```
@@ -327,7 +329,8 @@ codegraph ast -k call # Filter by kind: call, new, string, regex
327
329
  codegraph ast -k throw --file src/ # Combine kind and file filters
328
330
  ```
329
331
 
330
- > **Note:** Dataflow requires `codegraph build --dataflow` (JS/TS only). CFG requires `codegraph build --cfg`. Both are opt-in to keep default builds fast.
332
+ > **Note:** Dataflow and CFG are included by default for all 11 languages. Use `--no-dataflow` / `--no-cfg` for faster builds.
333
+
331
334
 
332
335
  ### Audit, Triage & Batch
333
336
 
@@ -477,15 +480,15 @@ codegraph registry remove <name> # Unregister
477
480
 
478
481
  | Language | Extensions | Coverage |
479
482
  |---|---|---|
480
- | ![JavaScript](https://img.shields.io/badge/-JavaScript-F7DF1E?style=flat-square&logo=javascript&logoColor=black) | `.js`, `.jsx`, `.mjs`, `.cjs` | Full — functions, classes, imports, call sites |
481
- | ![TypeScript](https://img.shields.io/badge/-TypeScript-3178C6?style=flat-square&logo=typescript&logoColor=white) | `.ts`, `.tsx` | Full — interfaces, type aliases, `.d.ts` |
482
- | ![Python](https://img.shields.io/badge/-Python-3776AB?style=flat-square&logo=python&logoColor=white) | `.py` | Functions, classes, methods, imports, decorators |
483
- | ![Go](https://img.shields.io/badge/-Go-00ADD8?style=flat-square&logo=go&logoColor=white) | `.go` | Functions, methods, structs, interfaces, imports, call sites |
484
- | ![Rust](https://img.shields.io/badge/-Rust-000000?style=flat-square&logo=rust&logoColor=white) | `.rs` | Functions, methods, structs, traits, `use` imports, call sites |
485
- | ![Java](https://img.shields.io/badge/-Java-ED8B00?style=flat-square&logo=openjdk&logoColor=white) | `.java` | Classes, methods, constructors, interfaces, imports, call sites |
486
- | ![C#](https://img.shields.io/badge/-C%23-512BD4?style=flat-square&logo=dotnet&logoColor=white) | `.cs` | Classes, structs, records, interfaces, enums, methods, constructors, using directives, invocations |
487
- | ![PHP](https://img.shields.io/badge/-PHP-777BB4?style=flat-square&logo=php&logoColor=white) | `.php` | Functions, classes, interfaces, traits, enums, methods, namespace use, calls |
488
- | ![Ruby](https://img.shields.io/badge/-Ruby-CC342D?style=flat-square&logo=ruby&logoColor=white) | `.rb` | Classes, modules, methods, singleton methods, require/require_relative, include/extend |
483
+ | ![JavaScript](https://img.shields.io/badge/-JavaScript-F7DF1E?style=flat-square&logo=javascript&logoColor=black) | `.js`, `.jsx`, `.mjs`, `.cjs` | Full — functions, classes, imports, call sites, dataflow |
484
+ | ![TypeScript](https://img.shields.io/badge/-TypeScript-3178C6?style=flat-square&logo=typescript&logoColor=white) | `.ts`, `.tsx` | Full — interfaces, type aliases, `.d.ts`, dataflow |
485
+ | ![Python](https://img.shields.io/badge/-Python-3776AB?style=flat-square&logo=python&logoColor=white) | `.py` | Functions, classes, methods, imports, decorators, dataflow |
486
+ | ![Go](https://img.shields.io/badge/-Go-00ADD8?style=flat-square&logo=go&logoColor=white) | `.go` | Functions, methods, structs, interfaces, imports, call sites, dataflow |
487
+ | ![Rust](https://img.shields.io/badge/-Rust-000000?style=flat-square&logo=rust&logoColor=white) | `.rs` | Functions, methods, structs, traits, `use` imports, call sites, dataflow |
488
+ | ![Java](https://img.shields.io/badge/-Java-ED8B00?style=flat-square&logo=openjdk&logoColor=white) | `.java` | Classes, methods, constructors, interfaces, imports, call sites, dataflow |
489
+ | ![C#](https://img.shields.io/badge/-C%23-512BD4?style=flat-square&logo=dotnet&logoColor=white) | `.cs` | Classes, structs, records, interfaces, enums, methods, constructors, using directives, invocations, dataflow |
490
+ | ![PHP](https://img.shields.io/badge/-PHP-777BB4?style=flat-square&logo=php&logoColor=white) | `.php` | Functions, classes, interfaces, traits, enums, methods, namespace use, calls, dataflow |
491
+ | ![Ruby](https://img.shields.io/badge/-Ruby-CC342D?style=flat-square&logo=ruby&logoColor=white) | `.rb` | Classes, modules, methods, singleton methods, require/require_relative, include/extend, dataflow |
489
492
  | ![Terraform](https://img.shields.io/badge/-Terraform-844FBA?style=flat-square&logo=terraform&logoColor=white) | `.tf`, `.hcl` | Resource, data, variable, module, output blocks |
490
493
 
491
494
  ## ⚙️ How It Works
@@ -552,14 +555,14 @@ Self-measured on every release via CI ([build benchmarks](generated/benchmarks/B
552
555
 
553
556
  | Metric | Latest |
554
557
  |---|---|
555
- | Build speed (native) | **1.9 ms/file** |
556
- | Build speed (WASM) | **8.3 ms/file** |
558
+ | Build speed (native) | **14.1 ms/file** |
559
+ | Build speed (WASM) | **24.4 ms/file** |
557
560
  | Query time | **3ms** |
558
- | No-op rebuild (native) | **4ms** |
559
- | 1-file rebuild (native) | **124ms** |
560
- | Query: fn-deps | **1.4ms** |
561
- | Query: path | **1.4ms** |
562
- | ~50,000 files (est.) | **~95.0s build** |
561
+ | No-op rebuild (native) | **5ms** |
562
+ | 1-file rebuild (native) | **915ms** |
563
+ | Query: fn-deps | **0.9ms** |
564
+ | Query: path | **0.8ms** |
565
+ | ~50,000 files (est.) | **~705.0s build** |
563
566
 
564
567
  Metrics are normalized per file for cross-version comparability. Times above are for a full initial build — incremental rebuilds only re-parse changed files.
565
568
 
@@ -804,7 +807,7 @@ const { results: fused } = await multiSearchData(
804
807
  - **No full type inference** — parses `.d.ts` interfaces but doesn't use TypeScript's type checker for overload resolution
805
808
  - **Dynamic calls are best-effort** — complex computed property access and `eval` patterns are not resolved
806
809
  - **Python imports** — resolves relative imports but doesn't follow `sys.path` or virtual environment packages
807
- - **Dataflow analysis** — currently JS/TS only; intraprocedural (single-function scope), not interprocedural
810
+ - **Dataflow analysis** — intraprocedural (single-function scope), not interprocedural
808
811
 
809
812
  ## 🗺️ Roadmap
810
813
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@optave/codegraph",
3
- "version": "3.0.0",
3
+ "version": "3.0.2",
4
4
  "description": "Local code graph CLI — parse codebases with tree-sitter, build dependency graphs, query them",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
@@ -71,10 +71,10 @@
71
71
  },
72
72
  "optionalDependencies": {
73
73
  "@modelcontextprotocol/sdk": "^1.0.0",
74
- "@optave/codegraph-darwin-arm64": "3.0.0",
75
- "@optave/codegraph-darwin-x64": "3.0.0",
76
- "@optave/codegraph-linux-x64-gnu": "3.0.0",
77
- "@optave/codegraph-win32-x64-msvc": "3.0.0"
74
+ "@optave/codegraph-darwin-arm64": "3.0.2",
75
+ "@optave/codegraph-darwin-x64": "3.0.2",
76
+ "@optave/codegraph-linux-x64-gnu": "3.0.2",
77
+ "@optave/codegraph-win32-x64-msvc": "3.0.2"
78
78
  },
79
79
  "devDependencies": {
80
80
  "@biomejs/biome": "^2.4.4",
package/src/ast.js CHANGED
@@ -156,9 +156,8 @@ export async function buildAstNodes(db, fileSymbols, _rootDir, _engineOpts) {
156
156
  return;
157
157
  }
158
158
 
159
- const getNodeId = db.prepare(
160
- 'SELECT id FROM nodes WHERE name = ? AND kind = ? AND file = ? AND line = ?',
161
- );
159
+ // Bulk-fetch all node IDs per file (replaces per-def getNodeId calls)
160
+ const bulkGetNodeIds = db.prepare('SELECT id, name, kind, line FROM nodes WHERE file = ?');
162
161
 
163
162
  const tx = db.transaction((rows) => {
164
163
  for (const r of rows) {
@@ -172,14 +171,20 @@ export async function buildAstNodes(db, fileSymbols, _rootDir, _engineOpts) {
172
171
  const rows = [];
173
172
  const defs = symbols.definitions || [];
174
173
 
174
+ // Pre-load all node IDs for this file into a map
175
+ const nodeIdMap = new Map();
176
+ for (const row of bulkGetNodeIds.all(relPath)) {
177
+ nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id);
178
+ }
179
+
175
180
  // 1. Call nodes from symbols.calls (all languages)
176
181
  if (symbols.calls) {
177
182
  for (const call of symbols.calls) {
178
183
  const parentDef = findParentDef(defs, call.line);
179
184
  let parentNodeId = null;
180
185
  if (parentDef) {
181
- const row = getNodeId.get(parentDef.name, parentDef.kind, relPath, parentDef.line);
182
- if (row) parentNodeId = row.id;
186
+ parentNodeId =
187
+ nodeIdMap.get(`${parentDef.name}|${parentDef.kind}|${parentDef.line}`) || null;
183
188
  }
184
189
  rows.push({
185
190
  file: relPath,
@@ -195,10 +200,32 @@ export async function buildAstNodes(db, fileSymbols, _rootDir, _engineOpts) {
195
200
 
196
201
  // 2. AST walk for JS/TS/TSX — extract new, throw, await, string, regex
197
202
  const ext = path.extname(relPath).toLowerCase();
198
- if (WALK_EXTENSIONS.has(ext) && symbols._tree) {
199
- const astRows = [];
200
- walkAst(symbols._tree.rootNode, defs, relPath, astRows, getNodeId);
201
- rows.push(...astRows);
203
+ if (WALK_EXTENSIONS.has(ext)) {
204
+ if (symbols._tree) {
205
+ // WASM path: walk the tree-sitter AST
206
+ const astRows = [];
207
+ walkAst(symbols._tree.rootNode, defs, relPath, astRows, nodeIdMap);
208
+ rows.push(...astRows);
209
+ } else if (symbols.astNodes?.length) {
210
+ // Native path: use pre-extracted AST nodes from Rust
211
+ for (const n of symbols.astNodes) {
212
+ const parentDef = findParentDef(defs, n.line);
213
+ let parentNodeId = null;
214
+ if (parentDef) {
215
+ parentNodeId =
216
+ nodeIdMap.get(`${parentDef.name}|${parentDef.kind}|${parentDef.line}`) || null;
217
+ }
218
+ rows.push({
219
+ file: relPath,
220
+ line: n.line,
221
+ kind: n.kind,
222
+ name: n.name,
223
+ text: n.text || null,
224
+ receiver: n.receiver || null,
225
+ parentNodeId,
226
+ });
227
+ }
228
+ }
202
229
  }
203
230
 
204
231
  if (rows.length > 0) {
@@ -213,7 +240,7 @@ export async function buildAstNodes(db, fileSymbols, _rootDir, _engineOpts) {
213
240
  /**
214
241
  * Walk a tree-sitter AST and collect new/throw/await/string/regex nodes.
215
242
  */
216
- function walkAst(node, defs, relPath, rows, getNodeId) {
243
+ function walkAst(node, defs, relPath, rows, nodeIdMap) {
217
244
  const kind = JS_TS_AST_TYPES[node.type];
218
245
  if (kind) {
219
246
  // tree-sitter lines are 0-indexed, our DB uses 1-indexed
@@ -237,7 +264,7 @@ function walkAst(node, defs, relPath, rows, getNodeId) {
237
264
  if (content.length < 2) {
238
265
  // Still recurse children
239
266
  for (let i = 0; i < node.childCount; i++) {
240
- walkAst(node.child(i), defs, relPath, rows, getNodeId);
267
+ walkAst(node.child(i), defs, relPath, rows, nodeIdMap);
241
268
  }
242
269
  return;
243
270
  }
@@ -251,8 +278,7 @@ function walkAst(node, defs, relPath, rows, getNodeId) {
251
278
  const parentDef = findParentDef(defs, line);
252
279
  let parentNodeId = null;
253
280
  if (parentDef) {
254
- const row = getNodeId.get(parentDef.name, parentDef.kind, relPath, parentDef.line);
255
- if (row) parentNodeId = row.id;
281
+ parentNodeId = nodeIdMap.get(`${parentDef.name}|${parentDef.kind}|${parentDef.line}`) || null;
256
282
  }
257
283
 
258
284
  rows.push({
@@ -271,7 +297,7 @@ function walkAst(node, defs, relPath, rows, getNodeId) {
271
297
  }
272
298
 
273
299
  for (let i = 0; i < node.childCount; i++) {
274
- walkAst(node.child(i), defs, relPath, rows, getNodeId);
300
+ walkAst(node.child(i), defs, relPath, rows, nodeIdMap);
275
301
  }
276
302
  }
277
303
 
package/src/builder.js CHANGED
@@ -4,7 +4,7 @@ import path from 'node:path';
4
4
  import { performance } from 'node:perf_hooks';
5
5
  import { loadConfig } from './config.js';
6
6
  import { EXTENSIONS, IGNORE_DIRS, normalizePath } from './constants.js';
7
- import { closeDb, getBuildMeta, initSchema, openDb, setBuildMeta } from './db.js';
7
+ import { closeDb, getBuildMeta, initSchema, MIGRATIONS, openDb, setBuildMeta } from './db.js';
8
8
  import { readJournal, writeJournalHeader } from './journal.js';
9
9
  import { debug, info, warn } from './logger.js';
10
10
  import { getActiveEngine, parseFilesAuto } from './parser.js';
@@ -448,17 +448,21 @@ export async function buildGraph(rootDir, opts = {}) {
448
448
  const { name: engineName, version: engineVersion } = getActiveEngine(engineOpts);
449
449
  info(`Using ${engineName} engine${engineVersion ? ` (v${engineVersion})` : ''}`);
450
450
 
451
- // Check for engine/version mismatch — auto-promote to full rebuild
451
+ // Check for engine/schema mismatch — auto-promote to full rebuild
452
+ // Only trigger on engine change or schema version change (not every patch/minor bump)
453
+ const CURRENT_SCHEMA_VERSION = MIGRATIONS[MIGRATIONS.length - 1].version;
452
454
  let forceFullRebuild = false;
453
455
  if (incremental) {
454
456
  const prevEngine = getBuildMeta(db, 'engine');
455
- const prevVersion = getBuildMeta(db, 'codegraph_version');
456
457
  if (prevEngine && prevEngine !== engineName) {
457
458
  info(`Engine changed (${prevEngine} → ${engineName}), promoting to full rebuild.`);
458
459
  forceFullRebuild = true;
459
460
  }
460
- if (prevVersion && prevVersion !== CODEGRAPH_VERSION) {
461
- info(`Version changed (${prevVersion} ${CODEGRAPH_VERSION}), promoting to full rebuild.`);
461
+ const prevSchema = getBuildMeta(db, 'schema_version');
462
+ if (prevSchema && Number(prevSchema) !== CURRENT_SCHEMA_VERSION) {
463
+ info(
464
+ `Schema version changed (${prevSchema} → ${CURRENT_SCHEMA_VERSION}), promoting to full rebuild.`,
465
+ );
462
466
  forceFullRebuild = true;
463
467
  }
464
468
  }
@@ -522,9 +526,9 @@ export async function buildGraph(rootDir, opts = {}) {
522
526
  }
523
527
 
524
528
  if (!isFullBuild && parseChanges.length === 0 && removed.length === 0) {
525
- // Check if optional analysis was requested but never computed
529
+ // Check if default analyses were never computed (e.g. legacy DB)
526
530
  const needsCfg =
527
- opts.cfg &&
531
+ opts.cfg !== false &&
528
532
  (() => {
529
533
  try {
530
534
  return db.prepare('SELECT COUNT(*) as c FROM cfg_blocks').get().c === 0;
@@ -533,16 +537,10 @@ export async function buildGraph(rootDir, opts = {}) {
533
537
  }
534
538
  })();
535
539
  const needsDataflow =
536
- opts.dataflow &&
540
+ opts.dataflow !== false &&
537
541
  (() => {
538
542
  try {
539
- return (
540
- db
541
- .prepare(
542
- "SELECT COUNT(*) as c FROM edges WHERE kind IN ('flows_to','returns','mutates')",
543
- )
544
- .get().c === 0
545
- );
543
+ return db.prepare('SELECT COUNT(*) as c FROM dataflow').get().c === 0;
546
544
  } catch {
547
545
  return true;
548
546
  }
@@ -721,44 +719,66 @@ export async function buildGraph(rootDir, opts = {}) {
721
719
  }
722
720
  }
723
721
 
722
+ // Bulk-fetch all node IDs for a file in one query (replaces per-node getNodeId calls)
723
+ const bulkGetNodeIds = db.prepare('SELECT id, name, kind, line FROM nodes WHERE file = ?');
724
+
724
725
  const insertAll = db.transaction(() => {
725
726
  for (const [relPath, symbols] of allSymbols) {
726
727
  fileSymbols.set(relPath, symbols);
727
728
 
729
+ // Phase 1: Insert file node + definitions + exports (no children yet)
728
730
  insertNode.run(relPath, 'file', relPath, 0, null, null);
729
- const fileRow = getNodeId.get(relPath, 'file', relPath, 0);
730
731
  for (const def of symbols.definitions) {
731
732
  insertNode.run(def.name, def.kind, relPath, def.line, def.endLine || null, null);
732
- const defRow = getNodeId.get(def.name, def.kind, relPath, def.line);
733
+ }
734
+ for (const exp of symbols.exports) {
735
+ insertNode.run(exp.name, exp.kind, relPath, exp.line, null, null);
736
+ }
737
+
738
+ // Phase 2: Bulk-fetch IDs for file + definitions
739
+ const nodeIdMap = new Map();
740
+ for (const row of bulkGetNodeIds.all(relPath)) {
741
+ nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id);
742
+ }
743
+
744
+ // Phase 3: Insert children with parent_id from the map
745
+ for (const def of symbols.definitions) {
746
+ if (!def.children?.length) continue;
747
+ const defId = nodeIdMap.get(`${def.name}|${def.kind}|${def.line}`);
748
+ if (!defId) continue;
749
+ for (const child of def.children) {
750
+ insertNode.run(child.name, child.kind, relPath, child.line, child.endLine || null, defId);
751
+ }
752
+ }
753
+
754
+ // Phase 4: Re-fetch to include children IDs
755
+ nodeIdMap.clear();
756
+ for (const row of bulkGetNodeIds.all(relPath)) {
757
+ nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id);
758
+ }
759
+
760
+ // Phase 5: Insert edges using the cached ID map
761
+ const fileId = nodeIdMap.get(`${relPath}|file|0`);
762
+ for (const def of symbols.definitions) {
763
+ const defId = nodeIdMap.get(`${def.name}|${def.kind}|${def.line}`);
733
764
  // File → top-level definition contains edge
734
- if (fileRow && defRow) {
735
- insertEdge.run(fileRow.id, defRow.id, 'contains', 1.0, 0);
765
+ if (fileId && defId) {
766
+ insertEdge.run(fileId, defId, 'contains', 1.0, 0);
736
767
  }
737
- if (def.children?.length && defRow) {
768
+ if (def.children?.length && defId) {
738
769
  for (const child of def.children) {
739
- insertNode.run(
740
- child.name,
741
- child.kind,
742
- relPath,
743
- child.line,
744
- child.endLine || null,
745
- defRow.id,
746
- );
747
- // Parent → child contains edge
748
- const childRow = getNodeId.get(child.name, child.kind, relPath, child.line);
749
- if (childRow) {
750
- insertEdge.run(defRow.id, childRow.id, 'contains', 1.0, 0);
770
+ const childId = nodeIdMap.get(`${child.name}|${child.kind}|${child.line}`);
771
+ if (childId) {
772
+ // Parent → child contains edge
773
+ insertEdge.run(defId, childId, 'contains', 1.0, 0);
751
774
  // Parameter → parent parameter_of edge (inverse direction)
752
775
  if (child.kind === 'parameter') {
753
- insertEdge.run(childRow.id, defRow.id, 'parameter_of', 1.0, 0);
776
+ insertEdge.run(childId, defId, 'parameter_of', 1.0, 0);
754
777
  }
755
778
  }
756
779
  }
757
780
  }
758
781
  }
759
- for (const exp of symbols.exports) {
760
- insertNode.run(exp.name, exp.kind, relPath, exp.line, null, null);
761
- }
762
782
 
763
783
  // Update file hash with real mtime+size for incremental builds
764
784
  // Skip for reverse-dep files — they didn't actually change
@@ -1229,7 +1249,9 @@ export async function buildGraph(rootDir, opts = {}) {
1229
1249
  }
1230
1250
  try {
1231
1251
  const { buildStructure } = await import('./structure.js');
1232
- buildStructure(db, fileSymbols, rootDir, lineCountMap, relDirs);
1252
+ // Pass changed file paths so incremental builds can scope the rebuild
1253
+ const changedFilePaths = isFullBuild ? null : [...allSymbols.keys()];
1254
+ buildStructure(db, fileSymbols, rootDir, lineCountMap, relDirs, changedFilePaths);
1233
1255
  } catch (err) {
1234
1256
  debug(`Structure analysis failed: ${err.message}`);
1235
1257
  }
@@ -1250,45 +1272,81 @@ export async function buildGraph(rootDir, opts = {}) {
1250
1272
  }
1251
1273
  _t.rolesMs = performance.now() - _t.roles0;
1252
1274
 
1253
- // Always-on AST node extraction (calls, new, string, regex, throw, await)
1254
- // Must run before complexity which releases _tree references
1275
+ // For incremental builds, filter out reverse-dep-only files from AST/complexity/CFG/dataflow
1276
+ // their content didn't change, so existing ast_nodes/function_complexity rows are valid.
1277
+ let astComplexitySymbols = allSymbols;
1278
+ if (!isFullBuild) {
1279
+ const reverseDepFiles = new Set(
1280
+ filesToParse.filter((item) => item._reverseDepOnly).map((item) => item.relPath),
1281
+ );
1282
+ if (reverseDepFiles.size > 0) {
1283
+ astComplexitySymbols = new Map();
1284
+ for (const [relPath, symbols] of allSymbols) {
1285
+ if (!reverseDepFiles.has(relPath)) {
1286
+ astComplexitySymbols.set(relPath, symbols);
1287
+ }
1288
+ }
1289
+ debug(
1290
+ `AST/complexity/CFG/dataflow: processing ${astComplexitySymbols.size} changed files (skipping ${reverseDepFiles.size} reverse-deps)`,
1291
+ );
1292
+ }
1293
+ }
1294
+
1295
+ // AST node extraction (calls, new, string, regex, throw, await)
1255
1296
  _t.ast0 = performance.now();
1256
- try {
1257
- const { buildAstNodes } = await import('./ast.js');
1258
- await buildAstNodes(db, allSymbols, rootDir, engineOpts);
1259
- } catch (err) {
1260
- debug(`AST node extraction failed: ${err.message}`);
1297
+ if (opts.ast !== false) {
1298
+ try {
1299
+ const { buildAstNodes } = await import('./ast.js');
1300
+ await buildAstNodes(db, astComplexitySymbols, rootDir, engineOpts);
1301
+ } catch (err) {
1302
+ debug(`AST node extraction failed: ${err.message}`);
1303
+ }
1261
1304
  }
1262
1305
  _t.astMs = performance.now() - _t.ast0;
1263
1306
 
1264
1307
  // Compute per-function complexity metrics (cognitive, cyclomatic, nesting)
1265
1308
  _t.complexity0 = performance.now();
1266
- try {
1267
- const { buildComplexityMetrics } = await import('./complexity.js');
1268
- await buildComplexityMetrics(db, allSymbols, rootDir, engineOpts);
1269
- } catch (err) {
1270
- debug(`Complexity analysis failed: ${err.message}`);
1309
+ if (opts.complexity !== false) {
1310
+ try {
1311
+ const { buildComplexityMetrics } = await import('./complexity.js');
1312
+ await buildComplexityMetrics(db, astComplexitySymbols, rootDir, engineOpts);
1313
+ } catch (err) {
1314
+ debug(`Complexity analysis failed: ${err.message}`);
1315
+ }
1271
1316
  }
1272
1317
  _t.complexityMs = performance.now() - _t.complexity0;
1273
1318
 
1274
- // Opt-in CFG analysis (--cfg)
1275
- if (opts.cfg) {
1319
+ // Pre-parse files missing WASM trees (native builds) so CFG + dataflow
1320
+ // share a single parse pass instead of each creating parsers independently
1321
+ if (opts.cfg !== false || opts.dataflow !== false) {
1322
+ _t.wasmPre0 = performance.now();
1323
+ try {
1324
+ const { ensureWasmTrees } = await import('./parser.js');
1325
+ await ensureWasmTrees(astComplexitySymbols, rootDir);
1326
+ } catch (err) {
1327
+ debug(`WASM pre-parse failed: ${err.message}`);
1328
+ }
1329
+ _t.wasmPreMs = performance.now() - _t.wasmPre0;
1330
+ }
1331
+
1332
+ // CFG analysis (skip with --no-cfg)
1333
+ if (opts.cfg !== false) {
1276
1334
  _t.cfg0 = performance.now();
1277
1335
  try {
1278
1336
  const { buildCFGData } = await import('./cfg.js');
1279
- await buildCFGData(db, allSymbols, rootDir, engineOpts);
1337
+ await buildCFGData(db, astComplexitySymbols, rootDir, engineOpts);
1280
1338
  } catch (err) {
1281
1339
  debug(`CFG analysis failed: ${err.message}`);
1282
1340
  }
1283
1341
  _t.cfgMs = performance.now() - _t.cfg0;
1284
1342
  }
1285
1343
 
1286
- // Opt-in dataflow analysis (--dataflow)
1287
- if (opts.dataflow) {
1344
+ // Dataflow analysis (skip with --no-dataflow)
1345
+ if (opts.dataflow !== false) {
1288
1346
  _t.dataflow0 = performance.now();
1289
1347
  try {
1290
1348
  const { buildDataflowEdges } = await import('./dataflow.js');
1291
- await buildDataflowEdges(db, allSymbols, rootDir, engineOpts);
1349
+ await buildDataflowEdges(db, astComplexitySymbols, rootDir, engineOpts);
1292
1350
  } catch (err) {
1293
1351
  debug(`Dataflow analysis failed: ${err.message}`);
1294
1352
  }
@@ -1348,6 +1406,7 @@ export async function buildGraph(rootDir, opts = {}) {
1348
1406
  engine: engineName,
1349
1407
  engine_version: engineVersion || '',
1350
1408
  codegraph_version: CODEGRAPH_VERSION,
1409
+ schema_version: String(CURRENT_SCHEMA_VERSION),
1351
1410
  built_at: new Date().toISOString(),
1352
1411
  node_count: nodeCount,
1353
1412
  edge_count: actualEdgeCount,
@@ -1385,8 +1444,11 @@ export async function buildGraph(rootDir, opts = {}) {
1385
1444
  edgesMs: +_t.edgesMs.toFixed(1),
1386
1445
  structureMs: +_t.structureMs.toFixed(1),
1387
1446
  rolesMs: +_t.rolesMs.toFixed(1),
1447
+ astMs: +_t.astMs.toFixed(1),
1388
1448
  complexityMs: +_t.complexityMs.toFixed(1),
1449
+ ...(_t.wasmPreMs != null && { wasmPreMs: +_t.wasmPreMs.toFixed(1) }),
1389
1450
  ...(_t.cfgMs != null && { cfgMs: +_t.cfgMs.toFixed(1) }),
1451
+ ...(_t.dataflowMs != null && { dataflowMs: +_t.dataflowMs.toFixed(1) }),
1390
1452
  },
1391
1453
  };
1392
1454
  }
package/src/cfg.js CHANGED
@@ -1241,7 +1241,8 @@ export function cfgData(name, customDbPath, opts = {}) {
1241
1241
  return {
1242
1242
  name,
1243
1243
  results: [],
1244
- warning: 'No CFG data found. Run `codegraph build --cfg` first.',
1244
+ warning:
1245
+ 'No CFG data found. Rebuild with `codegraph build` (CFG is now included by default).',
1245
1246
  };
1246
1247
  }
1247
1248
 
package/src/cli.js CHANGED
@@ -105,13 +105,17 @@ program
105
105
  .command('build [dir]')
106
106
  .description('Parse repo and build graph in .codegraph/graph.db')
107
107
  .option('--no-incremental', 'Force full rebuild (ignore file hashes)')
108
- .option('--dataflow', 'Extract data flow edges (flows_to, returns, mutates)')
109
- .option('--cfg', 'Build intraprocedural control flow graphs')
108
+ .option('--no-ast', 'Skip AST node extraction (calls, new, string, regex, throw, await)')
109
+ .option('--no-complexity', 'Skip complexity metrics computation')
110
+ .option('--no-dataflow', 'Skip data flow edge extraction')
111
+ .option('--no-cfg', 'Skip control flow graph building')
110
112
  .action(async (dir, opts) => {
111
113
  const root = path.resolve(dir || '.');
112
114
  const engine = program.opts().engine;
113
115
  await buildGraph(root, {
114
116
  incremental: opts.incremental,
117
+ ast: opts.ast,
118
+ complexity: opts.complexity,
115
119
  engine,
116
120
  dataflow: opts.dataflow,
117
121
  cfg: opts.cfg,
package/src/complexity.js CHANGED
@@ -1769,9 +1769,6 @@ export async function buildComplexityMetrics(db, fileSymbols, rootDir, _engineOp
1769
1769
  );
1770
1770
  analyzed++;
1771
1771
  }
1772
-
1773
- // Release cached tree for GC
1774
- symbols._tree = null;
1775
1772
  }
1776
1773
  });
1777
1774