@optave/codegraph 3.0.0 → 3.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -20
- package/package.json +5 -5
- package/src/ast.js +40 -14
- package/src/builder.js +117 -55
- package/src/cfg.js +2 -1
- package/src/cli.js +6 -2
- package/src/complexity.js +0 -3
- package/src/dataflow.js +766 -275
- package/src/extractors/javascript.js +51 -0
- package/src/flow.js +5 -2
- package/src/index.js +1 -1
- package/src/mcp.js +2 -2
- package/src/parser.js +70 -0
- package/src/structure.js +64 -11
package/README.md
CHANGED
|
@@ -195,6 +195,7 @@ Full agent setup: [AI Agent Guide](docs/guides/ai-agent-guide.md) · [CLAU
|
|
|
195
195
|
| 👀 | **Watch mode** | Incrementally update the graph as files change |
|
|
196
196
|
| 🤖 | **MCP server** | 30-tool MCP server for AI assistants; single-repo by default, opt-in multi-repo |
|
|
197
197
|
| ⚡ | **Always fresh** | Three-tier incremental detection — sub-second rebuilds even on large codebases |
|
|
198
|
+
| 🔬 | **Data flow analysis** | Intraprocedural parameter tracking, return consumers, argument flows, and mutation detection — all 11 languages |
|
|
198
199
|
| 🧮 | **Complexity metrics** | Cognitive, cyclomatic, nesting depth, Halstead, and Maintainability Index per function |
|
|
199
200
|
| 🏘️ | **Community detection** | Louvain clustering to discover natural module boundaries and architectural drift |
|
|
200
201
|
| 📜 | **Manifesto rule engine** | Configurable pass/fail rules with warn/fail thresholds for CI gates via `check` (exit code 1 on fail) |
|
|
@@ -208,8 +209,8 @@ Full agent setup: [AI Agent Guide](docs/guides/ai-agent-guide.md) · [CLAU
|
|
|
208
209
|
| 📋 | **Composite audit** | Single `audit` command combining explain + impact + health metrics per function — one call instead of 3-4 |
|
|
209
210
|
| 🚦 | **Triage queue** | `triage` merges connectivity, hotspots, roles, and complexity into a ranked audit priority queue |
|
|
210
211
|
| 📦 | **Batch querying** | Accept a list of targets and return all results in one JSON payload — enables multi-agent parallel dispatch |
|
|
211
|
-
| 🔬 | **Dataflow analysis** | Track how data moves through functions with `flows_to`, `returns`, and `mutates` edges —
|
|
212
|
-
| 🧩 | **Control flow graph** | Intraprocedural CFG construction for all 11 languages — `cfg` command with text/DOT/Mermaid output,
|
|
212
|
+
| 🔬 | **Dataflow analysis** | Track how data moves through functions with `flows_to`, `returns`, and `mutates` edges — all 11 languages, included by default, skip with `--no-dataflow` |
|
|
213
|
+
| 🧩 | **Control flow graph** | Intraprocedural CFG construction for all 11 languages — `cfg` command with text/DOT/Mermaid output, included by default, skip with `--no-cfg` |
|
|
213
214
|
| 🔎 | **AST node querying** | Stored queryable AST nodes (calls, `new`, string, regex, throw, await) — `ast` command with SQL GLOB pattern matching |
|
|
214
215
|
| 🧬 | **Expanded node/edge types** | `parameter`, `property`, `constant` node kinds with `parent_id` for sub-declaration queries; `contains`, `parameter_of`, `receiver` edge kinds |
|
|
215
216
|
| 📊 | **Exports analysis** | `exports <file>` shows all exported symbols with per-symbol consumers, re-export detection, and counts |
|
|
@@ -225,6 +226,7 @@ See [docs/examples](docs/examples) for real-world CLI and MCP usage examples.
|
|
|
225
226
|
```bash
|
|
226
227
|
codegraph build [dir] # Parse and build the dependency graph
|
|
227
228
|
codegraph build --no-incremental # Force full rebuild
|
|
229
|
+
codegraph build --dataflow # Extract data flow edges (flows_to, returns, mutates)
|
|
228
230
|
codegraph build --engine wasm # Force WASM engine (skip native)
|
|
229
231
|
codegraph watch [dir] # Watch for changes, update graph incrementally
|
|
230
232
|
```
|
|
@@ -327,7 +329,8 @@ codegraph ast -k call # Filter by kind: call, new, string, regex
|
|
|
327
329
|
codegraph ast -k throw --file src/ # Combine kind and file filters
|
|
328
330
|
```
|
|
329
331
|
|
|
330
|
-
> **Note:** Dataflow
|
|
332
|
+
> **Note:** Dataflow and CFG are included by default for all 11 languages. Use `--no-dataflow` / `--no-cfg` for faster builds.
|
|
333
|
+
|
|
331
334
|
|
|
332
335
|
### Audit, Triage & Batch
|
|
333
336
|
|
|
@@ -477,15 +480,15 @@ codegraph registry remove <name> # Unregister
|
|
|
477
480
|
|
|
478
481
|
| Language | Extensions | Coverage |
|
|
479
482
|
|---|---|---|
|
|
480
|
-
|  | `.js`, `.jsx`, `.mjs`, `.cjs` | Full — functions, classes, imports, call sites |
|
|
481
|
-
|  | `.ts`, `.tsx` | Full — interfaces, type aliases, `.d.ts
|
|
482
|
-
|  | `.py` | Functions, classes, methods, imports, decorators |
|
|
483
|
-
|  | `.go` | Functions, methods, structs, interfaces, imports, call sites |
|
|
484
|
-
|  | `.rs` | Functions, methods, structs, traits, `use` imports, call sites |
|
|
485
|
-
|  | `.java` | Classes, methods, constructors, interfaces, imports, call sites |
|
|
486
|
-
|  | `.cs` | Classes, structs, records, interfaces, enums, methods, constructors, using directives, invocations |
|
|
487
|
-
|  | `.php` | Functions, classes, interfaces, traits, enums, methods, namespace use, calls |
|
|
488
|
-
|  | `.rb` | Classes, modules, methods, singleton methods, require/require_relative, include/extend |
|
|
483
|
+
|  | `.js`, `.jsx`, `.mjs`, `.cjs` | Full — functions, classes, imports, call sites, dataflow |
|
|
484
|
+
|  | `.ts`, `.tsx` | Full — interfaces, type aliases, `.d.ts`, dataflow |
|
|
485
|
+
|  | `.py` | Functions, classes, methods, imports, decorators, dataflow |
|
|
486
|
+
|  | `.go` | Functions, methods, structs, interfaces, imports, call sites, dataflow |
|
|
487
|
+
|  | `.rs` | Functions, methods, structs, traits, `use` imports, call sites, dataflow |
|
|
488
|
+
|  | `.java` | Classes, methods, constructors, interfaces, imports, call sites, dataflow |
|
|
489
|
+
|  | `.cs` | Classes, structs, records, interfaces, enums, methods, constructors, using directives, invocations, dataflow |
|
|
490
|
+
|  | `.php` | Functions, classes, interfaces, traits, enums, methods, namespace use, calls, dataflow |
|
|
491
|
+
|  | `.rb` | Classes, modules, methods, singleton methods, require/require_relative, include/extend, dataflow |
|
|
489
492
|
|  | `.tf`, `.hcl` | Resource, data, variable, module, output blocks |
|
|
490
493
|
|
|
491
494
|
## ⚙️ How It Works
|
|
@@ -552,14 +555,14 @@ Self-measured on every release via CI ([build benchmarks](generated/benchmarks/B
|
|
|
552
555
|
|
|
553
556
|
| Metric | Latest |
|
|
554
557
|
|---|---|
|
|
555
|
-
| Build speed (native) | **1
|
|
556
|
-
| Build speed (WASM) | **
|
|
558
|
+
| Build speed (native) | **14.1 ms/file** |
|
|
559
|
+
| Build speed (WASM) | **24.4 ms/file** |
|
|
557
560
|
| Query time | **3ms** |
|
|
558
|
-
| No-op rebuild (native) | **
|
|
559
|
-
| 1-file rebuild (native) | **
|
|
560
|
-
| Query: fn-deps | **
|
|
561
|
-
| Query: path | **
|
|
562
|
-
| ~50,000 files (est.) | **~
|
|
561
|
+
| No-op rebuild (native) | **5ms** |
|
|
562
|
+
| 1-file rebuild (native) | **915ms** |
|
|
563
|
+
| Query: fn-deps | **0.9ms** |
|
|
564
|
+
| Query: path | **0.8ms** |
|
|
565
|
+
| ~50,000 files (est.) | **~705.0s build** |
|
|
563
566
|
|
|
564
567
|
Metrics are normalized per file for cross-version comparability. Times above are for a full initial build — incremental rebuilds only re-parse changed files.
|
|
565
568
|
|
|
@@ -804,7 +807,7 @@ const { results: fused } = await multiSearchData(
|
|
|
804
807
|
- **No full type inference** — parses `.d.ts` interfaces but doesn't use TypeScript's type checker for overload resolution
|
|
805
808
|
- **Dynamic calls are best-effort** — complex computed property access and `eval` patterns are not resolved
|
|
806
809
|
- **Python imports** — resolves relative imports but doesn't follow `sys.path` or virtual environment packages
|
|
807
|
-
- **Dataflow analysis** —
|
|
810
|
+
- **Dataflow analysis** — intraprocedural (single-function scope), not interprocedural
|
|
808
811
|
|
|
809
812
|
## 🗺️ Roadmap
|
|
810
813
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@optave/codegraph",
|
|
3
|
-
"version": "3.0.
|
|
3
|
+
"version": "3.0.2",
|
|
4
4
|
"description": "Local code graph CLI — parse codebases with tree-sitter, build dependency graphs, query them",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/index.js",
|
|
@@ -71,10 +71,10 @@
|
|
|
71
71
|
},
|
|
72
72
|
"optionalDependencies": {
|
|
73
73
|
"@modelcontextprotocol/sdk": "^1.0.0",
|
|
74
|
-
"@optave/codegraph-darwin-arm64": "3.0.
|
|
75
|
-
"@optave/codegraph-darwin-x64": "3.0.
|
|
76
|
-
"@optave/codegraph-linux-x64-gnu": "3.0.
|
|
77
|
-
"@optave/codegraph-win32-x64-msvc": "3.0.
|
|
74
|
+
"@optave/codegraph-darwin-arm64": "3.0.2",
|
|
75
|
+
"@optave/codegraph-darwin-x64": "3.0.2",
|
|
76
|
+
"@optave/codegraph-linux-x64-gnu": "3.0.2",
|
|
77
|
+
"@optave/codegraph-win32-x64-msvc": "3.0.2"
|
|
78
78
|
},
|
|
79
79
|
"devDependencies": {
|
|
80
80
|
"@biomejs/biome": "^2.4.4",
|
package/src/ast.js
CHANGED
|
@@ -156,9 +156,8 @@ export async function buildAstNodes(db, fileSymbols, _rootDir, _engineOpts) {
|
|
|
156
156
|
return;
|
|
157
157
|
}
|
|
158
158
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
);
|
|
159
|
+
// Bulk-fetch all node IDs per file (replaces per-def getNodeId calls)
|
|
160
|
+
const bulkGetNodeIds = db.prepare('SELECT id, name, kind, line FROM nodes WHERE file = ?');
|
|
162
161
|
|
|
163
162
|
const tx = db.transaction((rows) => {
|
|
164
163
|
for (const r of rows) {
|
|
@@ -172,14 +171,20 @@ export async function buildAstNodes(db, fileSymbols, _rootDir, _engineOpts) {
|
|
|
172
171
|
const rows = [];
|
|
173
172
|
const defs = symbols.definitions || [];
|
|
174
173
|
|
|
174
|
+
// Pre-load all node IDs for this file into a map
|
|
175
|
+
const nodeIdMap = new Map();
|
|
176
|
+
for (const row of bulkGetNodeIds.all(relPath)) {
|
|
177
|
+
nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id);
|
|
178
|
+
}
|
|
179
|
+
|
|
175
180
|
// 1. Call nodes from symbols.calls (all languages)
|
|
176
181
|
if (symbols.calls) {
|
|
177
182
|
for (const call of symbols.calls) {
|
|
178
183
|
const parentDef = findParentDef(defs, call.line);
|
|
179
184
|
let parentNodeId = null;
|
|
180
185
|
if (parentDef) {
|
|
181
|
-
|
|
182
|
-
|
|
186
|
+
parentNodeId =
|
|
187
|
+
nodeIdMap.get(`${parentDef.name}|${parentDef.kind}|${parentDef.line}`) || null;
|
|
183
188
|
}
|
|
184
189
|
rows.push({
|
|
185
190
|
file: relPath,
|
|
@@ -195,10 +200,32 @@ export async function buildAstNodes(db, fileSymbols, _rootDir, _engineOpts) {
|
|
|
195
200
|
|
|
196
201
|
// 2. AST walk for JS/TS/TSX — extract new, throw, await, string, regex
|
|
197
202
|
const ext = path.extname(relPath).toLowerCase();
|
|
198
|
-
if (WALK_EXTENSIONS.has(ext)
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
203
|
+
if (WALK_EXTENSIONS.has(ext)) {
|
|
204
|
+
if (symbols._tree) {
|
|
205
|
+
// WASM path: walk the tree-sitter AST
|
|
206
|
+
const astRows = [];
|
|
207
|
+
walkAst(symbols._tree.rootNode, defs, relPath, astRows, nodeIdMap);
|
|
208
|
+
rows.push(...astRows);
|
|
209
|
+
} else if (symbols.astNodes?.length) {
|
|
210
|
+
// Native path: use pre-extracted AST nodes from Rust
|
|
211
|
+
for (const n of symbols.astNodes) {
|
|
212
|
+
const parentDef = findParentDef(defs, n.line);
|
|
213
|
+
let parentNodeId = null;
|
|
214
|
+
if (parentDef) {
|
|
215
|
+
parentNodeId =
|
|
216
|
+
nodeIdMap.get(`${parentDef.name}|${parentDef.kind}|${parentDef.line}`) || null;
|
|
217
|
+
}
|
|
218
|
+
rows.push({
|
|
219
|
+
file: relPath,
|
|
220
|
+
line: n.line,
|
|
221
|
+
kind: n.kind,
|
|
222
|
+
name: n.name,
|
|
223
|
+
text: n.text || null,
|
|
224
|
+
receiver: n.receiver || null,
|
|
225
|
+
parentNodeId,
|
|
226
|
+
});
|
|
227
|
+
}
|
|
228
|
+
}
|
|
202
229
|
}
|
|
203
230
|
|
|
204
231
|
if (rows.length > 0) {
|
|
@@ -213,7 +240,7 @@ export async function buildAstNodes(db, fileSymbols, _rootDir, _engineOpts) {
|
|
|
213
240
|
/**
|
|
214
241
|
* Walk a tree-sitter AST and collect new/throw/await/string/regex nodes.
|
|
215
242
|
*/
|
|
216
|
-
function walkAst(node, defs, relPath, rows,
|
|
243
|
+
function walkAst(node, defs, relPath, rows, nodeIdMap) {
|
|
217
244
|
const kind = JS_TS_AST_TYPES[node.type];
|
|
218
245
|
if (kind) {
|
|
219
246
|
// tree-sitter lines are 0-indexed, our DB uses 1-indexed
|
|
@@ -237,7 +264,7 @@ function walkAst(node, defs, relPath, rows, getNodeId) {
|
|
|
237
264
|
if (content.length < 2) {
|
|
238
265
|
// Still recurse children
|
|
239
266
|
for (let i = 0; i < node.childCount; i++) {
|
|
240
|
-
walkAst(node.child(i), defs, relPath, rows,
|
|
267
|
+
walkAst(node.child(i), defs, relPath, rows, nodeIdMap);
|
|
241
268
|
}
|
|
242
269
|
return;
|
|
243
270
|
}
|
|
@@ -251,8 +278,7 @@ function walkAst(node, defs, relPath, rows, getNodeId) {
|
|
|
251
278
|
const parentDef = findParentDef(defs, line);
|
|
252
279
|
let parentNodeId = null;
|
|
253
280
|
if (parentDef) {
|
|
254
|
-
|
|
255
|
-
if (row) parentNodeId = row.id;
|
|
281
|
+
parentNodeId = nodeIdMap.get(`${parentDef.name}|${parentDef.kind}|${parentDef.line}`) || null;
|
|
256
282
|
}
|
|
257
283
|
|
|
258
284
|
rows.push({
|
|
@@ -271,7 +297,7 @@ function walkAst(node, defs, relPath, rows, getNodeId) {
|
|
|
271
297
|
}
|
|
272
298
|
|
|
273
299
|
for (let i = 0; i < node.childCount; i++) {
|
|
274
|
-
walkAst(node.child(i), defs, relPath, rows,
|
|
300
|
+
walkAst(node.child(i), defs, relPath, rows, nodeIdMap);
|
|
275
301
|
}
|
|
276
302
|
}
|
|
277
303
|
|
package/src/builder.js
CHANGED
|
@@ -4,7 +4,7 @@ import path from 'node:path';
|
|
|
4
4
|
import { performance } from 'node:perf_hooks';
|
|
5
5
|
import { loadConfig } from './config.js';
|
|
6
6
|
import { EXTENSIONS, IGNORE_DIRS, normalizePath } from './constants.js';
|
|
7
|
-
import { closeDb, getBuildMeta, initSchema, openDb, setBuildMeta } from './db.js';
|
|
7
|
+
import { closeDb, getBuildMeta, initSchema, MIGRATIONS, openDb, setBuildMeta } from './db.js';
|
|
8
8
|
import { readJournal, writeJournalHeader } from './journal.js';
|
|
9
9
|
import { debug, info, warn } from './logger.js';
|
|
10
10
|
import { getActiveEngine, parseFilesAuto } from './parser.js';
|
|
@@ -448,17 +448,21 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
448
448
|
const { name: engineName, version: engineVersion } = getActiveEngine(engineOpts);
|
|
449
449
|
info(`Using ${engineName} engine${engineVersion ? ` (v${engineVersion})` : ''}`);
|
|
450
450
|
|
|
451
|
-
// Check for engine/
|
|
451
|
+
// Check for engine/schema mismatch — auto-promote to full rebuild
|
|
452
|
+
// Only trigger on engine change or schema version change (not every patch/minor bump)
|
|
453
|
+
const CURRENT_SCHEMA_VERSION = MIGRATIONS[MIGRATIONS.length - 1].version;
|
|
452
454
|
let forceFullRebuild = false;
|
|
453
455
|
if (incremental) {
|
|
454
456
|
const prevEngine = getBuildMeta(db, 'engine');
|
|
455
|
-
const prevVersion = getBuildMeta(db, 'codegraph_version');
|
|
456
457
|
if (prevEngine && prevEngine !== engineName) {
|
|
457
458
|
info(`Engine changed (${prevEngine} → ${engineName}), promoting to full rebuild.`);
|
|
458
459
|
forceFullRebuild = true;
|
|
459
460
|
}
|
|
460
|
-
|
|
461
|
-
|
|
461
|
+
const prevSchema = getBuildMeta(db, 'schema_version');
|
|
462
|
+
if (prevSchema && Number(prevSchema) !== CURRENT_SCHEMA_VERSION) {
|
|
463
|
+
info(
|
|
464
|
+
`Schema version changed (${prevSchema} → ${CURRENT_SCHEMA_VERSION}), promoting to full rebuild.`,
|
|
465
|
+
);
|
|
462
466
|
forceFullRebuild = true;
|
|
463
467
|
}
|
|
464
468
|
}
|
|
@@ -522,9 +526,9 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
522
526
|
}
|
|
523
527
|
|
|
524
528
|
if (!isFullBuild && parseChanges.length === 0 && removed.length === 0) {
|
|
525
|
-
// Check if
|
|
529
|
+
// Check if default analyses were never computed (e.g. legacy DB)
|
|
526
530
|
const needsCfg =
|
|
527
|
-
opts.cfg &&
|
|
531
|
+
opts.cfg !== false &&
|
|
528
532
|
(() => {
|
|
529
533
|
try {
|
|
530
534
|
return db.prepare('SELECT COUNT(*) as c FROM cfg_blocks').get().c === 0;
|
|
@@ -533,16 +537,10 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
533
537
|
}
|
|
534
538
|
})();
|
|
535
539
|
const needsDataflow =
|
|
536
|
-
opts.dataflow &&
|
|
540
|
+
opts.dataflow !== false &&
|
|
537
541
|
(() => {
|
|
538
542
|
try {
|
|
539
|
-
return (
|
|
540
|
-
db
|
|
541
|
-
.prepare(
|
|
542
|
-
"SELECT COUNT(*) as c FROM edges WHERE kind IN ('flows_to','returns','mutates')",
|
|
543
|
-
)
|
|
544
|
-
.get().c === 0
|
|
545
|
-
);
|
|
543
|
+
return db.prepare('SELECT COUNT(*) as c FROM dataflow').get().c === 0;
|
|
546
544
|
} catch {
|
|
547
545
|
return true;
|
|
548
546
|
}
|
|
@@ -721,44 +719,66 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
721
719
|
}
|
|
722
720
|
}
|
|
723
721
|
|
|
722
|
+
// Bulk-fetch all node IDs for a file in one query (replaces per-node getNodeId calls)
|
|
723
|
+
const bulkGetNodeIds = db.prepare('SELECT id, name, kind, line FROM nodes WHERE file = ?');
|
|
724
|
+
|
|
724
725
|
const insertAll = db.transaction(() => {
|
|
725
726
|
for (const [relPath, symbols] of allSymbols) {
|
|
726
727
|
fileSymbols.set(relPath, symbols);
|
|
727
728
|
|
|
729
|
+
// Phase 1: Insert file node + definitions + exports (no children yet)
|
|
728
730
|
insertNode.run(relPath, 'file', relPath, 0, null, null);
|
|
729
|
-
const fileRow = getNodeId.get(relPath, 'file', relPath, 0);
|
|
730
731
|
for (const def of symbols.definitions) {
|
|
731
732
|
insertNode.run(def.name, def.kind, relPath, def.line, def.endLine || null, null);
|
|
732
|
-
|
|
733
|
+
}
|
|
734
|
+
for (const exp of symbols.exports) {
|
|
735
|
+
insertNode.run(exp.name, exp.kind, relPath, exp.line, null, null);
|
|
736
|
+
}
|
|
737
|
+
|
|
738
|
+
// Phase 2: Bulk-fetch IDs for file + definitions
|
|
739
|
+
const nodeIdMap = new Map();
|
|
740
|
+
for (const row of bulkGetNodeIds.all(relPath)) {
|
|
741
|
+
nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id);
|
|
742
|
+
}
|
|
743
|
+
|
|
744
|
+
// Phase 3: Insert children with parent_id from the map
|
|
745
|
+
for (const def of symbols.definitions) {
|
|
746
|
+
if (!def.children?.length) continue;
|
|
747
|
+
const defId = nodeIdMap.get(`${def.name}|${def.kind}|${def.line}`);
|
|
748
|
+
if (!defId) continue;
|
|
749
|
+
for (const child of def.children) {
|
|
750
|
+
insertNode.run(child.name, child.kind, relPath, child.line, child.endLine || null, defId);
|
|
751
|
+
}
|
|
752
|
+
}
|
|
753
|
+
|
|
754
|
+
// Phase 4: Re-fetch to include children IDs
|
|
755
|
+
nodeIdMap.clear();
|
|
756
|
+
for (const row of bulkGetNodeIds.all(relPath)) {
|
|
757
|
+
nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id);
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
// Phase 5: Insert edges using the cached ID map
|
|
761
|
+
const fileId = nodeIdMap.get(`${relPath}|file|0`);
|
|
762
|
+
for (const def of symbols.definitions) {
|
|
763
|
+
const defId = nodeIdMap.get(`${def.name}|${def.kind}|${def.line}`);
|
|
733
764
|
// File → top-level definition contains edge
|
|
734
|
-
if (
|
|
735
|
-
insertEdge.run(
|
|
765
|
+
if (fileId && defId) {
|
|
766
|
+
insertEdge.run(fileId, defId, 'contains', 1.0, 0);
|
|
736
767
|
}
|
|
737
|
-
if (def.children?.length &&
|
|
768
|
+
if (def.children?.length && defId) {
|
|
738
769
|
for (const child of def.children) {
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
child
|
|
742
|
-
|
|
743
|
-
child.line,
|
|
744
|
-
child.endLine || null,
|
|
745
|
-
defRow.id,
|
|
746
|
-
);
|
|
747
|
-
// Parent → child contains edge
|
|
748
|
-
const childRow = getNodeId.get(child.name, child.kind, relPath, child.line);
|
|
749
|
-
if (childRow) {
|
|
750
|
-
insertEdge.run(defRow.id, childRow.id, 'contains', 1.0, 0);
|
|
770
|
+
const childId = nodeIdMap.get(`${child.name}|${child.kind}|${child.line}`);
|
|
771
|
+
if (childId) {
|
|
772
|
+
// Parent → child contains edge
|
|
773
|
+
insertEdge.run(defId, childId, 'contains', 1.0, 0);
|
|
751
774
|
// Parameter → parent parameter_of edge (inverse direction)
|
|
752
775
|
if (child.kind === 'parameter') {
|
|
753
|
-
insertEdge.run(
|
|
776
|
+
insertEdge.run(childId, defId, 'parameter_of', 1.0, 0);
|
|
754
777
|
}
|
|
755
778
|
}
|
|
756
779
|
}
|
|
757
780
|
}
|
|
758
781
|
}
|
|
759
|
-
for (const exp of symbols.exports) {
|
|
760
|
-
insertNode.run(exp.name, exp.kind, relPath, exp.line, null, null);
|
|
761
|
-
}
|
|
762
782
|
|
|
763
783
|
// Update file hash with real mtime+size for incremental builds
|
|
764
784
|
// Skip for reverse-dep files — they didn't actually change
|
|
@@ -1229,7 +1249,9 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
1229
1249
|
}
|
|
1230
1250
|
try {
|
|
1231
1251
|
const { buildStructure } = await import('./structure.js');
|
|
1232
|
-
|
|
1252
|
+
// Pass changed file paths so incremental builds can scope the rebuild
|
|
1253
|
+
const changedFilePaths = isFullBuild ? null : [...allSymbols.keys()];
|
|
1254
|
+
buildStructure(db, fileSymbols, rootDir, lineCountMap, relDirs, changedFilePaths);
|
|
1233
1255
|
} catch (err) {
|
|
1234
1256
|
debug(`Structure analysis failed: ${err.message}`);
|
|
1235
1257
|
}
|
|
@@ -1250,45 +1272,81 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
1250
1272
|
}
|
|
1251
1273
|
_t.rolesMs = performance.now() - _t.roles0;
|
|
1252
1274
|
|
|
1253
|
-
//
|
|
1254
|
-
//
|
|
1275
|
+
// For incremental builds, filter out reverse-dep-only files from AST/complexity/CFG/dataflow
|
|
1276
|
+
// — their content didn't change, so existing ast_nodes/function_complexity rows are valid.
|
|
1277
|
+
let astComplexitySymbols = allSymbols;
|
|
1278
|
+
if (!isFullBuild) {
|
|
1279
|
+
const reverseDepFiles = new Set(
|
|
1280
|
+
filesToParse.filter((item) => item._reverseDepOnly).map((item) => item.relPath),
|
|
1281
|
+
);
|
|
1282
|
+
if (reverseDepFiles.size > 0) {
|
|
1283
|
+
astComplexitySymbols = new Map();
|
|
1284
|
+
for (const [relPath, symbols] of allSymbols) {
|
|
1285
|
+
if (!reverseDepFiles.has(relPath)) {
|
|
1286
|
+
astComplexitySymbols.set(relPath, symbols);
|
|
1287
|
+
}
|
|
1288
|
+
}
|
|
1289
|
+
debug(
|
|
1290
|
+
`AST/complexity/CFG/dataflow: processing ${astComplexitySymbols.size} changed files (skipping ${reverseDepFiles.size} reverse-deps)`,
|
|
1291
|
+
);
|
|
1292
|
+
}
|
|
1293
|
+
}
|
|
1294
|
+
|
|
1295
|
+
// AST node extraction (calls, new, string, regex, throw, await)
|
|
1255
1296
|
_t.ast0 = performance.now();
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1297
|
+
if (opts.ast !== false) {
|
|
1298
|
+
try {
|
|
1299
|
+
const { buildAstNodes } = await import('./ast.js');
|
|
1300
|
+
await buildAstNodes(db, astComplexitySymbols, rootDir, engineOpts);
|
|
1301
|
+
} catch (err) {
|
|
1302
|
+
debug(`AST node extraction failed: ${err.message}`);
|
|
1303
|
+
}
|
|
1261
1304
|
}
|
|
1262
1305
|
_t.astMs = performance.now() - _t.ast0;
|
|
1263
1306
|
|
|
1264
1307
|
// Compute per-function complexity metrics (cognitive, cyclomatic, nesting)
|
|
1265
1308
|
_t.complexity0 = performance.now();
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1309
|
+
if (opts.complexity !== false) {
|
|
1310
|
+
try {
|
|
1311
|
+
const { buildComplexityMetrics } = await import('./complexity.js');
|
|
1312
|
+
await buildComplexityMetrics(db, astComplexitySymbols, rootDir, engineOpts);
|
|
1313
|
+
} catch (err) {
|
|
1314
|
+
debug(`Complexity analysis failed: ${err.message}`);
|
|
1315
|
+
}
|
|
1271
1316
|
}
|
|
1272
1317
|
_t.complexityMs = performance.now() - _t.complexity0;
|
|
1273
1318
|
|
|
1274
|
-
//
|
|
1275
|
-
|
|
1319
|
+
// Pre-parse files missing WASM trees (native builds) so CFG + dataflow
|
|
1320
|
+
// share a single parse pass instead of each creating parsers independently
|
|
1321
|
+
if (opts.cfg !== false || opts.dataflow !== false) {
|
|
1322
|
+
_t.wasmPre0 = performance.now();
|
|
1323
|
+
try {
|
|
1324
|
+
const { ensureWasmTrees } = await import('./parser.js');
|
|
1325
|
+
await ensureWasmTrees(astComplexitySymbols, rootDir);
|
|
1326
|
+
} catch (err) {
|
|
1327
|
+
debug(`WASM pre-parse failed: ${err.message}`);
|
|
1328
|
+
}
|
|
1329
|
+
_t.wasmPreMs = performance.now() - _t.wasmPre0;
|
|
1330
|
+
}
|
|
1331
|
+
|
|
1332
|
+
// CFG analysis (skip with --no-cfg)
|
|
1333
|
+
if (opts.cfg !== false) {
|
|
1276
1334
|
_t.cfg0 = performance.now();
|
|
1277
1335
|
try {
|
|
1278
1336
|
const { buildCFGData } = await import('./cfg.js');
|
|
1279
|
-
await buildCFGData(db,
|
|
1337
|
+
await buildCFGData(db, astComplexitySymbols, rootDir, engineOpts);
|
|
1280
1338
|
} catch (err) {
|
|
1281
1339
|
debug(`CFG analysis failed: ${err.message}`);
|
|
1282
1340
|
}
|
|
1283
1341
|
_t.cfgMs = performance.now() - _t.cfg0;
|
|
1284
1342
|
}
|
|
1285
1343
|
|
|
1286
|
-
//
|
|
1287
|
-
if (opts.dataflow) {
|
|
1344
|
+
// Dataflow analysis (skip with --no-dataflow)
|
|
1345
|
+
if (opts.dataflow !== false) {
|
|
1288
1346
|
_t.dataflow0 = performance.now();
|
|
1289
1347
|
try {
|
|
1290
1348
|
const { buildDataflowEdges } = await import('./dataflow.js');
|
|
1291
|
-
await buildDataflowEdges(db,
|
|
1349
|
+
await buildDataflowEdges(db, astComplexitySymbols, rootDir, engineOpts);
|
|
1292
1350
|
} catch (err) {
|
|
1293
1351
|
debug(`Dataflow analysis failed: ${err.message}`);
|
|
1294
1352
|
}
|
|
@@ -1348,6 +1406,7 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
1348
1406
|
engine: engineName,
|
|
1349
1407
|
engine_version: engineVersion || '',
|
|
1350
1408
|
codegraph_version: CODEGRAPH_VERSION,
|
|
1409
|
+
schema_version: String(CURRENT_SCHEMA_VERSION),
|
|
1351
1410
|
built_at: new Date().toISOString(),
|
|
1352
1411
|
node_count: nodeCount,
|
|
1353
1412
|
edge_count: actualEdgeCount,
|
|
@@ -1385,8 +1444,11 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
1385
1444
|
edgesMs: +_t.edgesMs.toFixed(1),
|
|
1386
1445
|
structureMs: +_t.structureMs.toFixed(1),
|
|
1387
1446
|
rolesMs: +_t.rolesMs.toFixed(1),
|
|
1447
|
+
astMs: +_t.astMs.toFixed(1),
|
|
1388
1448
|
complexityMs: +_t.complexityMs.toFixed(1),
|
|
1449
|
+
...(_t.wasmPreMs != null && { wasmPreMs: +_t.wasmPreMs.toFixed(1) }),
|
|
1389
1450
|
...(_t.cfgMs != null && { cfgMs: +_t.cfgMs.toFixed(1) }),
|
|
1451
|
+
...(_t.dataflowMs != null && { dataflowMs: +_t.dataflowMs.toFixed(1) }),
|
|
1390
1452
|
},
|
|
1391
1453
|
};
|
|
1392
1454
|
}
|
package/src/cfg.js
CHANGED
|
@@ -1241,7 +1241,8 @@ export function cfgData(name, customDbPath, opts = {}) {
|
|
|
1241
1241
|
return {
|
|
1242
1242
|
name,
|
|
1243
1243
|
results: [],
|
|
1244
|
-
warning:
|
|
1244
|
+
warning:
|
|
1245
|
+
'No CFG data found. Rebuild with `codegraph build` (CFG is now included by default).',
|
|
1245
1246
|
};
|
|
1246
1247
|
}
|
|
1247
1248
|
|
package/src/cli.js
CHANGED
|
@@ -105,13 +105,17 @@ program
|
|
|
105
105
|
.command('build [dir]')
|
|
106
106
|
.description('Parse repo and build graph in .codegraph/graph.db')
|
|
107
107
|
.option('--no-incremental', 'Force full rebuild (ignore file hashes)')
|
|
108
|
-
.option('--
|
|
109
|
-
.option('--
|
|
108
|
+
.option('--no-ast', 'Skip AST node extraction (calls, new, string, regex, throw, await)')
|
|
109
|
+
.option('--no-complexity', 'Skip complexity metrics computation')
|
|
110
|
+
.option('--no-dataflow', 'Skip data flow edge extraction')
|
|
111
|
+
.option('--no-cfg', 'Skip control flow graph building')
|
|
110
112
|
.action(async (dir, opts) => {
|
|
111
113
|
const root = path.resolve(dir || '.');
|
|
112
114
|
const engine = program.opts().engine;
|
|
113
115
|
await buildGraph(root, {
|
|
114
116
|
incremental: opts.incremental,
|
|
117
|
+
ast: opts.ast,
|
|
118
|
+
complexity: opts.complexity,
|
|
115
119
|
engine,
|
|
116
120
|
dataflow: opts.dataflow,
|
|
117
121
|
cfg: opts.cfg,
|