@optave/codegraph 2.2.2-dev.c252ef9 β 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -7
- package/package.json +7 -6
- package/src/builder.js +49 -13
- package/src/cli.js +86 -29
- package/src/config.js +1 -0
- package/src/embedder.js +204 -15
- package/src/export.js +16 -7
- package/src/index.js +2 -0
- package/src/queries.js +65 -15
- package/src/structure.js +2 -1
package/README.md
CHANGED
|
@@ -373,7 +373,7 @@ Codegraph also extracts symbols from common callback patterns: Commander `.comma
|
|
|
373
373
|
|
|
374
374
|
## π Performance
|
|
375
375
|
|
|
376
|
-
Self-measured on every release via CI ([
|
|
376
|
+
Self-measured on every release via CI ([build benchmarks](generated/BUILD-BENCHMARKS.md) | [embedding benchmarks](generated/EMBEDDING-BENCHMARKS.md)):
|
|
377
377
|
|
|
378
378
|
| Metric | Latest |
|
|
379
379
|
|---|---|
|
|
@@ -384,6 +384,20 @@ Self-measured on every release via CI ([full history](generated/BENCHMARKS.md)):
|
|
|
384
384
|
|
|
385
385
|
Metrics are normalized per file for cross-version comparability. Times above are for a full initial build β incremental rebuilds only re-parse changed files.
|
|
386
386
|
|
|
387
|
+
### Lightweight Footprint
|
|
388
|
+
|
|
389
|
+
<a href="https://www.npmjs.com/package/@optave/codegraph"><img src="https://img.shields.io/npm/unpacked-size/@optave/codegraph?style=flat-square&label=unpacked%20size" alt="npm unpacked size" /></a>
|
|
390
|
+
|
|
391
|
+
Only **3 runtime dependencies** β everything else is optional or a devDependency:
|
|
392
|
+
|
|
393
|
+
| Dependency | What it does | | |
|
|
394
|
+
|---|---|---|---|
|
|
395
|
+
| [better-sqlite3](https://github.com/WiseLibs/better-sqlite3) | Fast, synchronous SQLite driver |  |  |
|
|
396
|
+
| [commander](https://github.com/tj/commander.js) | CLI argument parsing |  |  |
|
|
397
|
+
| [web-tree-sitter](https://github.com/tree-sitter/tree-sitter) | WASM tree-sitter bindings |  |  |
|
|
398
|
+
|
|
399
|
+
Optional: `@huggingface/transformers` (semantic search), `@modelcontextprotocol/sdk` (MCP server) β lazy-loaded only when needed.
|
|
400
|
+
|
|
387
401
|
## π€ AI Agent Integration
|
|
388
402
|
|
|
389
403
|
### MCP Server
|
|
@@ -583,15 +597,16 @@ const { results: fused } = await multiSearchData(
|
|
|
583
597
|
|
|
584
598
|
## πΊοΈ Roadmap
|
|
585
599
|
|
|
586
|
-
See **[ROADMAP.md](ROADMAP.md)** for the full development roadmap. Current plan:
|
|
600
|
+
See **[ROADMAP.md](ROADMAP.md)** for the full development roadmap and **[STABILITY.md](STABILITY.md)** for the stability policy and versioning guarantees. Current plan:
|
|
587
601
|
|
|
588
602
|
1. ~~**Rust Core**~~ β **Complete** (v1.3.0) β native tree-sitter parsing via napi-rs, parallel multi-core parsing, incremental re-parsing, import resolution & cycle detection in Rust
|
|
589
603
|
2. ~~**Foundation Hardening**~~ β **Complete** (v1.4.0) β parser registry, 12-tool MCP server with multi-repo support, test coverage 62%β75%, `apiKeyCommand` secret resolution, global repo registry
|
|
590
|
-
3. **
|
|
591
|
-
4. **
|
|
592
|
-
5. **
|
|
593
|
-
6. **
|
|
594
|
-
7. **
|
|
604
|
+
3. **Architectural Refactoring** β parser plugin system, repository pattern, pipeline builder, engine strategy, domain errors, curated API
|
|
605
|
+
4. **Intelligent Embeddings** β LLM-generated descriptions, hybrid search
|
|
606
|
+
5. **Natural Language Queries** β `codegraph ask` command, conversational sessions
|
|
607
|
+
6. **Expanded Language Support** β 8 new languages (12 β 20)
|
|
608
|
+
7. **GitHub Integration & CI** β reusable GitHub Action, PR review, SARIF output
|
|
609
|
+
8. **Visualization & Advanced** β web UI, dead code detection, monorepo support, agentic search
|
|
595
610
|
|
|
596
611
|
## π€ Contributing
|
|
597
612
|
|
package/package.json
CHANGED
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@optave/codegraph",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.3.0",
|
|
4
4
|
"description": "Local code graph CLI β parse codebases with tree-sitter, build dependency graphs, query them",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/index.js",
|
|
7
7
|
"exports": {
|
|
8
8
|
".": {
|
|
9
9
|
"import": "./src/index.js"
|
|
10
|
-
}
|
|
10
|
+
},
|
|
11
|
+
"./package.json": "./package.json"
|
|
11
12
|
},
|
|
12
13
|
"bin": {
|
|
13
14
|
"codegraph": "./src/cli.js"
|
|
@@ -61,10 +62,10 @@
|
|
|
61
62
|
"optionalDependencies": {
|
|
62
63
|
"@huggingface/transformers": "^3.8.1",
|
|
63
64
|
"@modelcontextprotocol/sdk": "^1.0.0",
|
|
64
|
-
"@optave/codegraph-darwin-arm64": "2.
|
|
65
|
-
"@optave/codegraph-darwin-x64": "2.
|
|
66
|
-
"@optave/codegraph-linux-x64-gnu": "2.
|
|
67
|
-
"@optave/codegraph-win32-x64-msvc": "2.
|
|
65
|
+
"@optave/codegraph-darwin-arm64": "2.3.0",
|
|
66
|
+
"@optave/codegraph-darwin-x64": "2.3.0",
|
|
67
|
+
"@optave/codegraph-linux-x64-gnu": "2.3.0",
|
|
68
|
+
"@optave/codegraph-win32-x64-msvc": "2.3.0"
|
|
68
69
|
},
|
|
69
70
|
"devDependencies": {
|
|
70
71
|
"@biomejs/biome": "^2.4.4",
|
package/src/builder.js
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
import { createHash } from 'node:crypto';
|
|
2
2
|
import fs from 'node:fs';
|
|
3
|
-
import os from 'node:os';
|
|
4
3
|
import path from 'node:path';
|
|
5
4
|
import { loadConfig } from './config.js';
|
|
6
5
|
import { EXTENSIONS, IGNORE_DIRS, normalizePath } from './constants.js';
|
|
7
6
|
import { initSchema, openDb } from './db.js';
|
|
8
7
|
import { readJournal, writeJournalHeader } from './journal.js';
|
|
9
|
-
import { debug, warn } from './logger.js';
|
|
8
|
+
import { debug, info, warn } from './logger.js';
|
|
10
9
|
import { getActiveEngine, parseFilesAuto } from './parser.js';
|
|
11
10
|
import { computeConfidence, resolveImportPath, resolveImportsBatch } from './resolve.js';
|
|
12
11
|
|
|
@@ -345,7 +344,7 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
345
344
|
// Engine selection: 'native', 'wasm', or 'auto' (default)
|
|
346
345
|
const engineOpts = { engine: opts.engine || 'auto' };
|
|
347
346
|
const { name: engineName, version: engineVersion } = getActiveEngine(engineOpts);
|
|
348
|
-
|
|
347
|
+
info(`Using ${engineName} engine${engineVersion ? ` (v${engineVersion})` : ''}`);
|
|
349
348
|
|
|
350
349
|
const aliases = loadPathAliases(rootDir);
|
|
351
350
|
// Merge config aliases
|
|
@@ -358,7 +357,7 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
358
357
|
}
|
|
359
358
|
|
|
360
359
|
if (aliases.baseUrl || Object.keys(aliases.paths).length > 0) {
|
|
361
|
-
|
|
360
|
+
info(
|
|
362
361
|
`Loaded path aliases: baseUrl=${aliases.baseUrl || 'none'}, ${Object.keys(aliases.paths).length} path mappings`,
|
|
363
362
|
);
|
|
364
363
|
}
|
|
@@ -366,7 +365,7 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
366
365
|
const collected = collectFiles(rootDir, [], config, new Set());
|
|
367
366
|
const files = collected.files;
|
|
368
367
|
const discoveredDirs = collected.directories;
|
|
369
|
-
|
|
368
|
+
info(`Found ${files.length} files to parse`);
|
|
370
369
|
|
|
371
370
|
// Check for incremental build
|
|
372
371
|
const { changed, removed, isFullBuild } = incremental
|
|
@@ -397,19 +396,36 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
397
396
|
/* ignore heal errors */
|
|
398
397
|
}
|
|
399
398
|
}
|
|
400
|
-
|
|
399
|
+
info('No changes detected. Graph is up to date.');
|
|
401
400
|
db.close();
|
|
402
401
|
writeJournalHeader(rootDir, Date.now());
|
|
403
402
|
return;
|
|
404
403
|
}
|
|
405
404
|
|
|
405
|
+
// Check if embeddings table exists (created by `embed`, not by initSchema)
|
|
406
|
+
let hasEmbeddings = false;
|
|
407
|
+
try {
|
|
408
|
+
db.prepare('SELECT 1 FROM embeddings LIMIT 1').get();
|
|
409
|
+
hasEmbeddings = true;
|
|
410
|
+
} catch {
|
|
411
|
+
/* table doesn't exist */
|
|
412
|
+
}
|
|
413
|
+
|
|
406
414
|
if (isFullBuild) {
|
|
415
|
+
const deletions =
|
|
416
|
+
'PRAGMA foreign_keys = OFF; DELETE FROM node_metrics; DELETE FROM edges; DELETE FROM nodes; PRAGMA foreign_keys = ON;';
|
|
407
417
|
db.exec(
|
|
408
|
-
|
|
418
|
+
hasEmbeddings
|
|
419
|
+
? `${deletions.replace('PRAGMA foreign_keys = ON;', '')} DELETE FROM embeddings; PRAGMA foreign_keys = ON;`
|
|
420
|
+
: deletions,
|
|
409
421
|
);
|
|
410
422
|
} else {
|
|
411
|
-
|
|
412
|
-
// Remove metrics/edges/nodes for changed and removed files
|
|
423
|
+
info(`Incremental: ${parseChanges.length} changed, ${removed.length} removed`);
|
|
424
|
+
// Remove embeddings/metrics/edges/nodes for changed and removed files
|
|
425
|
+
// Embeddings must be deleted BEFORE nodes (we need node IDs to find them)
|
|
426
|
+
const deleteEmbeddingsForFile = hasEmbeddings
|
|
427
|
+
? db.prepare('DELETE FROM embeddings WHERE node_id IN (SELECT id FROM nodes WHERE file = ?)')
|
|
428
|
+
: null;
|
|
413
429
|
const deleteNodesForFile = db.prepare('DELETE FROM nodes WHERE file = ?');
|
|
414
430
|
const deleteEdgesForFile = db.prepare(`
|
|
415
431
|
DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = @f)
|
|
@@ -419,12 +435,14 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
419
435
|
'DELETE FROM node_metrics WHERE node_id IN (SELECT id FROM nodes WHERE file = ?)',
|
|
420
436
|
);
|
|
421
437
|
for (const relPath of removed) {
|
|
438
|
+
deleteEmbeddingsForFile?.run(relPath);
|
|
422
439
|
deleteEdgesForFile.run({ f: relPath });
|
|
423
440
|
deleteMetricsForFile.run(relPath);
|
|
424
441
|
deleteNodesForFile.run(relPath);
|
|
425
442
|
}
|
|
426
443
|
for (const item of parseChanges) {
|
|
427
444
|
const relPath = item.relPath || normalizePath(path.relative(rootDir, item.file));
|
|
445
|
+
deleteEmbeddingsForFile?.run(relPath);
|
|
428
446
|
deleteEdgesForFile.run({ f: relPath });
|
|
429
447
|
deleteMetricsForFile.run(relPath);
|
|
430
448
|
deleteNodesForFile.run(relPath);
|
|
@@ -528,7 +546,7 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
528
546
|
|
|
529
547
|
const parsed = allSymbols.size;
|
|
530
548
|
const skipped = filesToParse.length - parsed;
|
|
531
|
-
|
|
549
|
+
info(`Parsed ${parsed} files (${skipped} skipped)`);
|
|
532
550
|
|
|
533
551
|
// Clean up removed file hashes
|
|
534
552
|
if (upsertHash && removed.length > 0) {
|
|
@@ -822,15 +840,33 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
822
840
|
}
|
|
823
841
|
|
|
824
842
|
const nodeCount = db.prepare('SELECT COUNT(*) as c FROM nodes').get().c;
|
|
825
|
-
|
|
826
|
-
|
|
843
|
+
info(`Graph built: ${nodeCount} nodes, ${edgeCount} edges`);
|
|
844
|
+
info(`Stored in ${dbPath}`);
|
|
845
|
+
|
|
846
|
+
// Warn about orphaned embeddings that no longer match any node
|
|
847
|
+
if (hasEmbeddings) {
|
|
848
|
+
try {
|
|
849
|
+
const orphaned = db
|
|
850
|
+
.prepare('SELECT COUNT(*) as c FROM embeddings WHERE node_id NOT IN (SELECT id FROM nodes)')
|
|
851
|
+
.get().c;
|
|
852
|
+
if (orphaned > 0) {
|
|
853
|
+
warn(
|
|
854
|
+
`${orphaned} embeddings are orphaned (nodes changed). Run "codegraph embed" to refresh.`,
|
|
855
|
+
);
|
|
856
|
+
}
|
|
857
|
+
} catch {
|
|
858
|
+
/* ignore β embeddings table may have been dropped */
|
|
859
|
+
}
|
|
860
|
+
}
|
|
861
|
+
|
|
827
862
|
db.close();
|
|
828
863
|
|
|
829
864
|
// Write journal header after successful build
|
|
830
865
|
writeJournalHeader(rootDir, Date.now());
|
|
831
866
|
|
|
832
867
|
if (!opts.skipRegistry) {
|
|
833
|
-
const
|
|
868
|
+
const { tmpdir } = await import('node:os');
|
|
869
|
+
const tmpDir = path.resolve(tmpdir());
|
|
834
870
|
const resolvedRoot = path.resolve(rootDir);
|
|
835
871
|
if (resolvedRoot.startsWith(tmpDir)) {
|
|
836
872
|
debug(`Skipping auto-registration for temp directory: ${resolvedRoot}`);
|
package/src/cli.js
CHANGED
|
@@ -2,12 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
import fs from 'node:fs';
|
|
4
4
|
import path from 'node:path';
|
|
5
|
-
import Database from 'better-sqlite3';
|
|
6
5
|
import { Command } from 'commander';
|
|
7
6
|
import { buildGraph } from './builder.js';
|
|
7
|
+
import { loadConfig } from './config.js';
|
|
8
8
|
import { findCycles, formatCycles } from './cycles.js';
|
|
9
|
-
import {
|
|
10
|
-
import { buildEmbeddings, MODELS, search } from './embedder.js';
|
|
9
|
+
import { openReadonlyOrFail } from './db.js';
|
|
10
|
+
import { buildEmbeddings, EMBEDDING_STRATEGIES, MODELS, search } from './embedder.js';
|
|
11
11
|
import { exportDOT, exportJSON, exportMermaid } from './export.js';
|
|
12
12
|
import { setVerbose } from './logger.js';
|
|
13
13
|
import {
|
|
@@ -36,6 +36,8 @@ import { watchProject } from './watcher.js';
|
|
|
36
36
|
const __cliDir = path.dirname(new URL(import.meta.url).pathname.replace(/^\/([A-Z]:)/i, '$1'));
|
|
37
37
|
const pkg = JSON.parse(fs.readFileSync(path.join(__cliDir, '..', 'package.json'), 'utf-8'));
|
|
38
38
|
|
|
39
|
+
const config = loadConfig(process.cwd());
|
|
40
|
+
|
|
39
41
|
const program = new Command();
|
|
40
42
|
program
|
|
41
43
|
.name('codegraph')
|
|
@@ -48,6 +50,18 @@ program
|
|
|
48
50
|
if (opts.verbose) setVerbose(true);
|
|
49
51
|
});
|
|
50
52
|
|
|
53
|
+
/**
|
|
54
|
+
* Resolve the effective noTests value: CLI flag > config > false.
|
|
55
|
+
* Commander sets opts.tests to false when --no-tests is passed.
|
|
56
|
+
* When --include-tests is passed, always return false (include tests).
|
|
57
|
+
* Otherwise, fall back to config.query.excludeTests.
|
|
58
|
+
*/
|
|
59
|
+
function resolveNoTests(opts) {
|
|
60
|
+
if (opts.includeTests) return false;
|
|
61
|
+
if (opts.tests === false) return true;
|
|
62
|
+
return config.query?.excludeTests || false;
|
|
63
|
+
}
|
|
64
|
+
|
|
51
65
|
program
|
|
52
66
|
.command('build [dir]')
|
|
53
67
|
.description('Parse repo and build graph in .codegraph/graph.db')
|
|
@@ -63,9 +77,10 @@ program
|
|
|
63
77
|
.description('Find a function/class, show callers and callees')
|
|
64
78
|
.option('-d, --db <path>', 'Path to graph.db')
|
|
65
79
|
.option('-T, --no-tests', 'Exclude test/spec files from results')
|
|
80
|
+
.option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
|
|
66
81
|
.option('-j, --json', 'Output as JSON')
|
|
67
82
|
.action((name, opts) => {
|
|
68
|
-
queryName(name, opts.db, { noTests:
|
|
83
|
+
queryName(name, opts.db, { noTests: resolveNoTests(opts), json: opts.json });
|
|
69
84
|
});
|
|
70
85
|
|
|
71
86
|
program
|
|
@@ -73,9 +88,10 @@ program
|
|
|
73
88
|
.description('Show what depends on this file (transitive)')
|
|
74
89
|
.option('-d, --db <path>', 'Path to graph.db')
|
|
75
90
|
.option('-T, --no-tests', 'Exclude test/spec files from results')
|
|
91
|
+
.option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
|
|
76
92
|
.option('-j, --json', 'Output as JSON')
|
|
77
93
|
.action((file, opts) => {
|
|
78
|
-
impactAnalysis(file, opts.db, { noTests:
|
|
94
|
+
impactAnalysis(file, opts.db, { noTests: resolveNoTests(opts), json: opts.json });
|
|
79
95
|
});
|
|
80
96
|
|
|
81
97
|
program
|
|
@@ -84,9 +100,13 @@ program
|
|
|
84
100
|
.option('-d, --db <path>', 'Path to graph.db')
|
|
85
101
|
.option('-n, --limit <number>', 'Number of top nodes', '20')
|
|
86
102
|
.option('-T, --no-tests', 'Exclude test/spec files from results')
|
|
103
|
+
.option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
|
|
87
104
|
.option('-j, --json', 'Output as JSON')
|
|
88
105
|
.action((opts) => {
|
|
89
|
-
moduleMap(opts.db, parseInt(opts.limit, 10), {
|
|
106
|
+
moduleMap(opts.db, parseInt(opts.limit, 10), {
|
|
107
|
+
noTests: resolveNoTests(opts),
|
|
108
|
+
json: opts.json,
|
|
109
|
+
});
|
|
90
110
|
});
|
|
91
111
|
|
|
92
112
|
program
|
|
@@ -94,9 +114,10 @@ program
|
|
|
94
114
|
.description('Show graph health overview: nodes, edges, languages, cycles, hotspots, embeddings')
|
|
95
115
|
.option('-d, --db <path>', 'Path to graph.db')
|
|
96
116
|
.option('-T, --no-tests', 'Exclude test/spec files from results')
|
|
117
|
+
.option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
|
|
97
118
|
.option('-j, --json', 'Output as JSON')
|
|
98
119
|
.action((opts) => {
|
|
99
|
-
stats(opts.db, { noTests:
|
|
120
|
+
stats(opts.db, { noTests: resolveNoTests(opts), json: opts.json });
|
|
100
121
|
});
|
|
101
122
|
|
|
102
123
|
program
|
|
@@ -104,9 +125,10 @@ program
|
|
|
104
125
|
.description('Show what this file imports and what imports it')
|
|
105
126
|
.option('-d, --db <path>', 'Path to graph.db')
|
|
106
127
|
.option('-T, --no-tests', 'Exclude test/spec files from results')
|
|
128
|
+
.option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
|
|
107
129
|
.option('-j, --json', 'Output as JSON')
|
|
108
130
|
.action((file, opts) => {
|
|
109
|
-
fileDeps(file, opts.db, { noTests:
|
|
131
|
+
fileDeps(file, opts.db, { noTests: resolveNoTests(opts), json: opts.json });
|
|
110
132
|
});
|
|
111
133
|
|
|
112
134
|
program
|
|
@@ -117,6 +139,7 @@ program
|
|
|
117
139
|
.option('-f, --file <path>', 'Scope search to functions in this file (partial match)')
|
|
118
140
|
.option('-k, --kind <kind>', 'Filter to a specific symbol kind')
|
|
119
141
|
.option('-T, --no-tests', 'Exclude test/spec files from results')
|
|
142
|
+
.option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
|
|
120
143
|
.option('-j, --json', 'Output as JSON')
|
|
121
144
|
.action((name, opts) => {
|
|
122
145
|
if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) {
|
|
@@ -127,7 +150,7 @@ program
|
|
|
127
150
|
depth: parseInt(opts.depth, 10),
|
|
128
151
|
file: opts.file,
|
|
129
152
|
kind: opts.kind,
|
|
130
|
-
noTests:
|
|
153
|
+
noTests: resolveNoTests(opts),
|
|
131
154
|
json: opts.json,
|
|
132
155
|
});
|
|
133
156
|
});
|
|
@@ -140,6 +163,7 @@ program
|
|
|
140
163
|
.option('-f, --file <path>', 'Scope search to functions in this file (partial match)')
|
|
141
164
|
.option('-k, --kind <kind>', 'Filter to a specific symbol kind')
|
|
142
165
|
.option('-T, --no-tests', 'Exclude test/spec files from results')
|
|
166
|
+
.option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
|
|
143
167
|
.option('-j, --json', 'Output as JSON')
|
|
144
168
|
.action((name, opts) => {
|
|
145
169
|
if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) {
|
|
@@ -150,7 +174,7 @@ program
|
|
|
150
174
|
depth: parseInt(opts.depth, 10),
|
|
151
175
|
file: opts.file,
|
|
152
176
|
kind: opts.kind,
|
|
153
|
-
noTests:
|
|
177
|
+
noTests: resolveNoTests(opts),
|
|
154
178
|
json: opts.json,
|
|
155
179
|
});
|
|
156
180
|
});
|
|
@@ -163,8 +187,9 @@ program
|
|
|
163
187
|
.option('-f, --file <path>', 'Scope search to functions in this file (partial match)')
|
|
164
188
|
.option('-k, --kind <kind>', 'Filter to a specific symbol kind')
|
|
165
189
|
.option('--no-source', 'Metadata only (skip source extraction)')
|
|
166
|
-
.option('--
|
|
190
|
+
.option('--with-test-source', 'Include test source code')
|
|
167
191
|
.option('-T, --no-tests', 'Exclude test/spec files from results')
|
|
192
|
+
.option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
|
|
168
193
|
.option('-j, --json', 'Output as JSON')
|
|
169
194
|
.action((name, opts) => {
|
|
170
195
|
if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) {
|
|
@@ -176,8 +201,8 @@ program
|
|
|
176
201
|
file: opts.file,
|
|
177
202
|
kind: opts.kind,
|
|
178
203
|
noSource: !opts.source,
|
|
179
|
-
noTests:
|
|
180
|
-
includeTests: opts.
|
|
204
|
+
noTests: resolveNoTests(opts),
|
|
205
|
+
includeTests: opts.withTestSource,
|
|
181
206
|
json: opts.json,
|
|
182
207
|
});
|
|
183
208
|
});
|
|
@@ -186,10 +211,16 @@ program
|
|
|
186
211
|
.command('explain <target>')
|
|
187
212
|
.description('Structural summary of a file or function (no LLM needed)')
|
|
188
213
|
.option('-d, --db <path>', 'Path to graph.db')
|
|
214
|
+
.option('--depth <n>', 'Recursively explain dependencies up to N levels deep', '0')
|
|
189
215
|
.option('-T, --no-tests', 'Exclude test/spec files from results')
|
|
216
|
+
.option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
|
|
190
217
|
.option('-j, --json', 'Output as JSON')
|
|
191
218
|
.action((target, opts) => {
|
|
192
|
-
explain(target, opts.db, {
|
|
219
|
+
explain(target, opts.db, {
|
|
220
|
+
depth: parseInt(opts.depth, 10),
|
|
221
|
+
noTests: resolveNoTests(opts),
|
|
222
|
+
json: opts.json,
|
|
223
|
+
});
|
|
193
224
|
});
|
|
194
225
|
|
|
195
226
|
program
|
|
@@ -198,6 +229,7 @@ program
|
|
|
198
229
|
.option('-d, --db <path>', 'Path to graph.db')
|
|
199
230
|
.option('-f, --file <path>', 'File overview: list symbols, imports, exports')
|
|
200
231
|
.option('-T, --no-tests', 'Exclude test/spec files from results')
|
|
232
|
+
.option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
|
|
201
233
|
.option('-j, --json', 'Output as JSON')
|
|
202
234
|
.action((name, opts) => {
|
|
203
235
|
if (!name && !opts.file) {
|
|
@@ -205,7 +237,7 @@ program
|
|
|
205
237
|
process.exit(1);
|
|
206
238
|
}
|
|
207
239
|
const target = opts.file || name;
|
|
208
|
-
where(target, opts.db, { file: !!opts.file, noTests:
|
|
240
|
+
where(target, opts.db, { file: !!opts.file, noTests: resolveNoTests(opts), json: opts.json });
|
|
209
241
|
});
|
|
210
242
|
|
|
211
243
|
program
|
|
@@ -215,6 +247,7 @@ program
|
|
|
215
247
|
.option('--staged', 'Analyze staged changes instead of unstaged')
|
|
216
248
|
.option('--depth <n>', 'Max transitive caller depth', '3')
|
|
217
249
|
.option('-T, --no-tests', 'Exclude test/spec files from results')
|
|
250
|
+
.option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
|
|
218
251
|
.option('-j, --json', 'Output as JSON')
|
|
219
252
|
.option('-f, --format <format>', 'Output format: text, mermaid, json', 'text')
|
|
220
253
|
.action((ref, opts) => {
|
|
@@ -222,7 +255,7 @@ program
|
|
|
222
255
|
ref,
|
|
223
256
|
staged: opts.staged,
|
|
224
257
|
depth: parseInt(opts.depth, 10),
|
|
225
|
-
noTests:
|
|
258
|
+
noTests: resolveNoTests(opts),
|
|
226
259
|
json: opts.json,
|
|
227
260
|
format: opts.format,
|
|
228
261
|
});
|
|
@@ -237,10 +270,16 @@ program
|
|
|
237
270
|
.option('-f, --format <format>', 'Output format: dot, mermaid, json', 'dot')
|
|
238
271
|
.option('--functions', 'Function-level graph instead of file-level')
|
|
239
272
|
.option('-T, --no-tests', 'Exclude test/spec files')
|
|
273
|
+
.option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
|
|
274
|
+
.option('--min-confidence <score>', 'Minimum edge confidence threshold (default: 0.5)', '0.5')
|
|
240
275
|
.option('-o, --output <file>', 'Write to file instead of stdout')
|
|
241
276
|
.action((opts) => {
|
|
242
|
-
const db =
|
|
243
|
-
const exportOpts = {
|
|
277
|
+
const db = openReadonlyOrFail(opts.db);
|
|
278
|
+
const exportOpts = {
|
|
279
|
+
fileLevel: !opts.functions,
|
|
280
|
+
noTests: resolveNoTests(opts),
|
|
281
|
+
minConfidence: parseFloat(opts.minConfidence),
|
|
282
|
+
};
|
|
244
283
|
|
|
245
284
|
let output;
|
|
246
285
|
switch (opts.format) {
|
|
@@ -271,10 +310,11 @@ program
|
|
|
271
310
|
.option('-d, --db <path>', 'Path to graph.db')
|
|
272
311
|
.option('--functions', 'Function-level cycle detection')
|
|
273
312
|
.option('-T, --no-tests', 'Exclude test/spec files')
|
|
313
|
+
.option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
|
|
274
314
|
.option('-j, --json', 'Output as JSON')
|
|
275
315
|
.action((opts) => {
|
|
276
|
-
const db =
|
|
277
|
-
const cycles = findCycles(db, { fileLevel: !opts.functions, noTests:
|
|
316
|
+
const db = openReadonlyOrFail(opts.db);
|
|
317
|
+
const cycles = findCycles(db, { fileLevel: !opts.functions, noTests: resolveNoTests(opts) });
|
|
278
318
|
db.close();
|
|
279
319
|
|
|
280
320
|
if (opts.json) {
|
|
@@ -376,10 +416,13 @@ program
|
|
|
376
416
|
.action(() => {
|
|
377
417
|
console.log('\nAvailable embedding models:\n');
|
|
378
418
|
for (const [key, config] of Object.entries(MODELS)) {
|
|
379
|
-
const def = key === '
|
|
380
|
-
|
|
419
|
+
const def = key === 'minilm' ? ' (default)' : '';
|
|
420
|
+
const ctx = config.contextWindow ? `${config.contextWindow} ctx` : '';
|
|
421
|
+
console.log(
|
|
422
|
+
` ${key.padEnd(12)} ${String(config.dim).padStart(4)}d ${ctx.padEnd(9)} ${config.desc}${def}`,
|
|
423
|
+
);
|
|
381
424
|
}
|
|
382
|
-
console.log('\nUsage: codegraph embed --model <name>');
|
|
425
|
+
console.log('\nUsage: codegraph embed --model <name> --strategy <structured|source>');
|
|
383
426
|
console.log(' codegraph search "query" --model <name>\n');
|
|
384
427
|
});
|
|
385
428
|
|
|
@@ -390,12 +433,23 @@ program
|
|
|
390
433
|
)
|
|
391
434
|
.option(
|
|
392
435
|
'-m, --model <name>',
|
|
393
|
-
'Embedding model: minilm, jina-small, jina-base, jina-code, nomic, nomic-v1.5
|
|
394
|
-
'
|
|
436
|
+
'Embedding model: minilm (default), jina-small, jina-base, jina-code, nomic, nomic-v1.5, bge-large. Run `codegraph models` for details',
|
|
437
|
+
'minilm',
|
|
438
|
+
)
|
|
439
|
+
.option(
|
|
440
|
+
'-s, --strategy <name>',
|
|
441
|
+
`Embedding strategy: ${EMBEDDING_STRATEGIES.join(', ')}. "structured" uses graph context (callers/callees), "source" embeds raw code`,
|
|
442
|
+
'structured',
|
|
395
443
|
)
|
|
396
444
|
.action(async (dir, opts) => {
|
|
445
|
+
if (!EMBEDDING_STRATEGIES.includes(opts.strategy)) {
|
|
446
|
+
console.error(
|
|
447
|
+
`Unknown strategy: ${opts.strategy}. Available: ${EMBEDDING_STRATEGIES.join(', ')}`,
|
|
448
|
+
);
|
|
449
|
+
process.exit(1);
|
|
450
|
+
}
|
|
397
451
|
const root = path.resolve(dir || '.');
|
|
398
|
-
await buildEmbeddings(root, opts.model);
|
|
452
|
+
await buildEmbeddings(root, opts.model, undefined, { strategy: opts.strategy });
|
|
399
453
|
});
|
|
400
454
|
|
|
401
455
|
program
|
|
@@ -405,6 +459,7 @@ program
|
|
|
405
459
|
.option('-m, --model <name>', 'Override embedding model (auto-detects from DB)')
|
|
406
460
|
.option('-n, --limit <number>', 'Max results', '15')
|
|
407
461
|
.option('-T, --no-tests', 'Exclude test/spec files from results')
|
|
462
|
+
.option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
|
|
408
463
|
.option('--min-score <score>', 'Minimum similarity threshold', '0.2')
|
|
409
464
|
.option('-k, --kind <kind>', 'Filter by kind: function, method, class')
|
|
410
465
|
.option('--file <pattern>', 'Filter by file path pattern')
|
|
@@ -412,7 +467,7 @@ program
|
|
|
412
467
|
.action(async (query, opts) => {
|
|
413
468
|
await search(query, opts.db, {
|
|
414
469
|
limit: parseInt(opts.limit, 10),
|
|
415
|
-
noTests:
|
|
470
|
+
noTests: resolveNoTests(opts),
|
|
416
471
|
minScore: parseFloat(opts.minScore),
|
|
417
472
|
model: opts.model,
|
|
418
473
|
kind: opts.kind,
|
|
@@ -430,6 +485,7 @@ program
|
|
|
430
485
|
.option('--depth <n>', 'Max directory depth')
|
|
431
486
|
.option('--sort <metric>', 'Sort by: cohesion | fan-in | fan-out | density | files', 'files')
|
|
432
487
|
.option('-T, --no-tests', 'Exclude test/spec files')
|
|
488
|
+
.option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
|
|
433
489
|
.option('-j, --json', 'Output as JSON')
|
|
434
490
|
.action(async (dir, opts) => {
|
|
435
491
|
const { structureData, formatStructure } = await import('./structure.js');
|
|
@@ -437,7 +493,7 @@ program
|
|
|
437
493
|
directory: dir,
|
|
438
494
|
depth: opts.depth ? parseInt(opts.depth, 10) : undefined,
|
|
439
495
|
sort: opts.sort,
|
|
440
|
-
noTests:
|
|
496
|
+
noTests: resolveNoTests(opts),
|
|
441
497
|
});
|
|
442
498
|
if (opts.json) {
|
|
443
499
|
console.log(JSON.stringify(data, null, 2));
|
|
@@ -456,6 +512,7 @@ program
|
|
|
456
512
|
.option('--metric <metric>', 'fan-in | fan-out | density | coupling', 'fan-in')
|
|
457
513
|
.option('--level <level>', 'file | directory', 'file')
|
|
458
514
|
.option('-T, --no-tests', 'Exclude test/spec files from results')
|
|
515
|
+
.option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
|
|
459
516
|
.option('-j, --json', 'Output as JSON')
|
|
460
517
|
.action(async (opts) => {
|
|
461
518
|
const { hotspotsData, formatHotspots } = await import('./structure.js');
|
|
@@ -463,7 +520,7 @@ program
|
|
|
463
520
|
metric: opts.metric,
|
|
464
521
|
level: opts.level,
|
|
465
522
|
limit: parseInt(opts.limit, 10),
|
|
466
|
-
noTests:
|
|
523
|
+
noTests: resolveNoTests(opts),
|
|
467
524
|
});
|
|
468
525
|
if (opts.json) {
|
|
469
526
|
console.log(JSON.stringify(data, null, 2));
|
package/src/config.js
CHANGED
package/src/embedder.js
CHANGED
|
@@ -4,6 +4,18 @@ import Database from 'better-sqlite3';
|
|
|
4
4
|
import { findDbPath, openReadonlyOrFail } from './db.js';
|
|
5
5
|
import { warn } from './logger.js';
|
|
6
6
|
|
|
7
|
+
/**
|
|
8
|
+
* Split an identifier into readable words.
|
|
9
|
+
* camelCase/PascalCase β "camel Case", snake_case β "snake case", kebab-case β "kebab case"
|
|
10
|
+
*/
|
|
11
|
+
function splitIdentifier(name) {
|
|
12
|
+
return name
|
|
13
|
+
.replace(/([a-z])([A-Z])/g, '$1 $2')
|
|
14
|
+
.replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
|
|
15
|
+
.replace(/[_-]+/g, ' ')
|
|
16
|
+
.trim();
|
|
17
|
+
}
|
|
18
|
+
|
|
7
19
|
// Lazy-load transformers (heavy, optional module)
|
|
8
20
|
let pipeline = null;
|
|
9
21
|
let _cos_sim = null;
|
|
@@ -14,48 +26,57 @@ export const MODELS = {
|
|
|
14
26
|
minilm: {
|
|
15
27
|
name: 'Xenova/all-MiniLM-L6-v2',
|
|
16
28
|
dim: 384,
|
|
29
|
+
contextWindow: 256,
|
|
17
30
|
desc: 'Smallest, fastest (~23MB). General text.',
|
|
18
31
|
quantized: true,
|
|
19
32
|
},
|
|
20
33
|
'jina-small': {
|
|
21
34
|
name: 'Xenova/jina-embeddings-v2-small-en',
|
|
22
35
|
dim: 512,
|
|
36
|
+
contextWindow: 8192,
|
|
23
37
|
desc: 'Small, good quality (~33MB). General text.',
|
|
24
38
|
quantized: false,
|
|
25
39
|
},
|
|
26
40
|
'jina-base': {
|
|
27
41
|
name: 'Xenova/jina-embeddings-v2-base-en',
|
|
28
42
|
dim: 768,
|
|
43
|
+
contextWindow: 8192,
|
|
29
44
|
desc: 'Good quality (~137MB). General text, 8192 token context.',
|
|
30
45
|
quantized: false,
|
|
31
46
|
},
|
|
32
47
|
'jina-code': {
|
|
33
48
|
name: 'Xenova/jina-embeddings-v2-base-code',
|
|
34
49
|
dim: 768,
|
|
50
|
+
contextWindow: 8192,
|
|
35
51
|
desc: 'Code-aware (~137MB). Trained on code+text, best for code search.',
|
|
36
52
|
quantized: false,
|
|
37
53
|
},
|
|
38
54
|
nomic: {
|
|
39
55
|
name: 'Xenova/nomic-embed-text-v1',
|
|
40
56
|
dim: 768,
|
|
57
|
+
contextWindow: 8192,
|
|
41
58
|
desc: 'Good local quality (~137MB). 8192 context.',
|
|
42
59
|
quantized: false,
|
|
43
60
|
},
|
|
44
61
|
'nomic-v1.5': {
|
|
45
62
|
name: 'nomic-ai/nomic-embed-text-v1.5',
|
|
46
63
|
dim: 768,
|
|
64
|
+
contextWindow: 8192,
|
|
47
65
|
desc: 'Improved nomic (~137MB). Matryoshka dimensions, 8192 context.',
|
|
48
66
|
quantized: false,
|
|
49
67
|
},
|
|
50
68
|
'bge-large': {
|
|
51
69
|
name: 'Xenova/bge-large-en-v1.5',
|
|
52
70
|
dim: 1024,
|
|
71
|
+
contextWindow: 512,
|
|
53
72
|
desc: 'Best general retrieval (~335MB). Top MTEB scores.',
|
|
54
73
|
quantized: false,
|
|
55
74
|
},
|
|
56
75
|
};
|
|
57
76
|
|
|
58
|
-
export const
|
|
77
|
+
export const EMBEDDING_STRATEGIES = ['structured', 'source'];
|
|
78
|
+
|
|
79
|
+
export const DEFAULT_MODEL = 'minilm';
|
|
59
80
|
const BATCH_SIZE_MAP = {
|
|
60
81
|
minilm: 32,
|
|
61
82
|
'jina-small': 16,
|
|
@@ -77,6 +98,108 @@ function getModelConfig(modelKey) {
|
|
|
77
98
|
return config;
|
|
78
99
|
}
|
|
79
100
|
|
|
101
|
+
/**
|
|
102
|
+
* Rough token estimate (~4 chars per token for code/English).
|
|
103
|
+
* Conservative β avoids adding a tokenizer dependency.
|
|
104
|
+
*/
|
|
105
|
+
export function estimateTokens(text) {
|
|
106
|
+
return Math.ceil(text.length / 4);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Extract leading comment text (JSDoc, //, #, etc.) above a function line.
|
|
111
|
+
* Returns the cleaned comment text or null if none found.
|
|
112
|
+
*/
|
|
113
|
+
function extractLeadingComment(lines, fnLineIndex) {
|
|
114
|
+
const raw = [];
|
|
115
|
+
for (let i = fnLineIndex - 1; i >= Math.max(0, fnLineIndex - 15); i--) {
|
|
116
|
+
const trimmed = lines[i].trim();
|
|
117
|
+
if (/^(\/\/|\/\*|\*\/|\*|#|\/\/\/)/.test(trimmed)) {
|
|
118
|
+
raw.unshift(trimmed);
|
|
119
|
+
} else if (trimmed === '') {
|
|
120
|
+
if (raw.length > 0) break;
|
|
121
|
+
} else {
|
|
122
|
+
break;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
if (raw.length === 0) return null;
|
|
126
|
+
return raw
|
|
127
|
+
.map((line) =>
|
|
128
|
+
line
|
|
129
|
+
.replace(/^\/\*\*?\s?|\*\/$/g, '') // opening /** or /* and closing */
|
|
130
|
+
.replace(/^\*\s?/, '') // middle * lines
|
|
131
|
+
.replace(/^\/\/\/?\s?/, '') // // or ///
|
|
132
|
+
.replace(/^#\s?/, '') // # (Python/Ruby)
|
|
133
|
+
.trim(),
|
|
134
|
+
)
|
|
135
|
+
.filter((l) => l.length > 0)
|
|
136
|
+
.join(' ');
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Build graph-enriched text for a symbol using dependency context.
|
|
141
|
+
* Produces compact, semantic text (~100 tokens) instead of full source code.
|
|
142
|
+
*/
|
|
143
|
+
function buildStructuredText(node, file, lines, calleesStmt, callersStmt) {
|
|
144
|
+
const readable = splitIdentifier(node.name);
|
|
145
|
+
const parts = [`${node.kind} ${node.name} (${readable}) in ${file}`];
|
|
146
|
+
const startLine = Math.max(0, node.line - 1);
|
|
147
|
+
|
|
148
|
+
// Extract parameters from signature (best-effort, single-line)
|
|
149
|
+
const sigLine = lines[startLine] || '';
|
|
150
|
+
const paramMatch = sigLine.match(/\(([^)]*)\)/);
|
|
151
|
+
if (paramMatch?.[1]?.trim()) {
|
|
152
|
+
parts.push(`Parameters: ${paramMatch[1].trim()}`);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Graph context: callees (capped at 10)
|
|
156
|
+
const callees = calleesStmt.all(node.id);
|
|
157
|
+
if (callees.length > 0) {
|
|
158
|
+
parts.push(
|
|
159
|
+
`Calls: ${callees
|
|
160
|
+
.slice(0, 10)
|
|
161
|
+
.map((c) => c.name)
|
|
162
|
+
.join(', ')}`,
|
|
163
|
+
);
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// Graph context: callers (capped at 10)
|
|
167
|
+
const callers = callersStmt.all(node.id);
|
|
168
|
+
if (callers.length > 0) {
|
|
169
|
+
parts.push(
|
|
170
|
+
`Called by: ${callers
|
|
171
|
+
.slice(0, 10)
|
|
172
|
+
.map((c) => c.name)
|
|
173
|
+
.join(', ')}`,
|
|
174
|
+
);
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// Leading comment (high semantic value) or first few lines of code
|
|
178
|
+
const comment = extractLeadingComment(lines, startLine);
|
|
179
|
+
if (comment) {
|
|
180
|
+
parts.push(comment);
|
|
181
|
+
} else {
|
|
182
|
+
const endLine = Math.min(lines.length, startLine + 4);
|
|
183
|
+
const snippet = lines.slice(startLine, endLine).join('\n').trim();
|
|
184
|
+
if (snippet) parts.push(snippet);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
return parts.join('\n');
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Build raw source-code text for a symbol (original strategy).
|
|
192
|
+
*/
|
|
193
|
+
function buildSourceText(node, file, lines) {
|
|
194
|
+
const startLine = Math.max(0, node.line - 1);
|
|
195
|
+
const endLine = node.end_line
|
|
196
|
+
? Math.min(lines.length, node.end_line)
|
|
197
|
+
: Math.min(lines.length, startLine + 15);
|
|
198
|
+
const context = lines.slice(startLine, endLine).join('\n');
|
|
199
|
+
const readable = splitIdentifier(node.name);
|
|
200
|
+
return `${node.kind} ${node.name} (${readable}) in ${file}\n${context}`;
|
|
201
|
+
}
|
|
202
|
+
|
|
80
203
|
/**
|
|
81
204
|
* Lazy-load @huggingface/transformers.
|
|
82
205
|
* This is an optional dependency β gives a clear error if not installed.
|
|
@@ -103,8 +226,27 @@ async function loadModel(modelKey) {
|
|
|
103
226
|
_cos_sim = transformers.cos_sim;
|
|
104
227
|
|
|
105
228
|
console.log(`Loading embedding model: ${config.name} (${config.dim}d)...`);
|
|
106
|
-
const
|
|
107
|
-
|
|
229
|
+
const pipelineOpts = config.quantized ? { quantized: true } : {};
|
|
230
|
+
try {
|
|
231
|
+
extractor = await pipeline('feature-extraction', config.name, pipelineOpts);
|
|
232
|
+
} catch (err) {
|
|
233
|
+
const msg = err.message || String(err);
|
|
234
|
+
if (msg.includes('Unauthorized') || msg.includes('401') || msg.includes('gated')) {
|
|
235
|
+
console.error(
|
|
236
|
+
`\nModel "${config.name}" requires authentication.\n` +
|
|
237
|
+
`This model is gated on HuggingFace and needs an access token.\n\n` +
|
|
238
|
+
`Options:\n` +
|
|
239
|
+
` 1. Set HF_TOKEN env var: export HF_TOKEN=hf_...\n` +
|
|
240
|
+
` 2. Use a public model instead: codegraph embed --model minilm\n`,
|
|
241
|
+
);
|
|
242
|
+
} else {
|
|
243
|
+
console.error(
|
|
244
|
+
`\nFailed to load model "${config.name}": ${msg}\n` +
|
|
245
|
+
`Try a different model: codegraph embed --model minilm\n`,
|
|
246
|
+
);
|
|
247
|
+
}
|
|
248
|
+
process.exit(1);
|
|
249
|
+
}
|
|
108
250
|
activeModel = config.name;
|
|
109
251
|
console.log('Model loaded.');
|
|
110
252
|
return { extractor, config };
|
|
@@ -172,12 +314,24 @@ function initEmbeddingsSchema(db) {
|
|
|
172
314
|
|
|
173
315
|
/**
|
|
174
316
|
* Build embeddings for all functions/methods/classes in the graph.
|
|
317
|
+
* @param {string} rootDir - Project root directory
|
|
318
|
+
* @param {string} modelKey - Model identifier from MODELS registry
|
|
319
|
+
* @param {string} [customDbPath] - Override path to graph.db
|
|
320
|
+
* @param {object} [options] - Embedding options
|
|
321
|
+
* @param {string} [options.strategy='structured'] - 'structured' (graph-enriched) or 'source' (raw code)
|
|
175
322
|
*/
|
|
176
|
-
export async function buildEmbeddings(rootDir, modelKey, customDbPath) {
|
|
177
|
-
|
|
178
|
-
// fs already imported at top
|
|
323
|
+
export async function buildEmbeddings(rootDir, modelKey, customDbPath, options = {}) {
|
|
324
|
+
const strategy = options.strategy || 'structured';
|
|
179
325
|
const dbPath = customDbPath || findDbPath(null);
|
|
180
326
|
|
|
327
|
+
if (!fs.existsSync(dbPath)) {
|
|
328
|
+
console.error(
|
|
329
|
+
`No codegraph database found at ${dbPath}.\n` +
|
|
330
|
+
`Run "codegraph build" first to analyze your codebase.`,
|
|
331
|
+
);
|
|
332
|
+
process.exit(1);
|
|
333
|
+
}
|
|
334
|
+
|
|
181
335
|
const db = new Database(dbPath);
|
|
182
336
|
initEmbeddingsSchema(db);
|
|
183
337
|
|
|
@@ -190,7 +344,24 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath) {
|
|
|
190
344
|
)
|
|
191
345
|
.all();
|
|
192
346
|
|
|
193
|
-
console.log(`Building embeddings for ${nodes.length} symbols...`);
|
|
347
|
+
console.log(`Building embeddings for ${nodes.length} symbols (strategy: ${strategy})...`);
|
|
348
|
+
|
|
349
|
+
// Prepare graph-context queries for structured strategy
|
|
350
|
+
let calleesStmt, callersStmt;
|
|
351
|
+
if (strategy === 'structured') {
|
|
352
|
+
calleesStmt = db.prepare(`
|
|
353
|
+
SELECT DISTINCT n.name FROM edges e
|
|
354
|
+
JOIN nodes n ON e.target_id = n.id
|
|
355
|
+
WHERE e.source_id = ? AND e.kind = 'calls'
|
|
356
|
+
ORDER BY n.name
|
|
357
|
+
`);
|
|
358
|
+
callersStmt = db.prepare(`
|
|
359
|
+
SELECT DISTINCT n.name FROM edges e
|
|
360
|
+
JOIN nodes n ON e.source_id = n.id
|
|
361
|
+
WHERE e.target_id = ? AND e.kind = 'calls'
|
|
362
|
+
ORDER BY n.name
|
|
363
|
+
`);
|
|
364
|
+
}
|
|
194
365
|
|
|
195
366
|
const byFile = new Map();
|
|
196
367
|
for (const node of nodes) {
|
|
@@ -201,6 +372,9 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath) {
|
|
|
201
372
|
const texts = [];
|
|
202
373
|
const nodeIds = [];
|
|
203
374
|
const previews = [];
|
|
375
|
+
const config = getModelConfig(modelKey);
|
|
376
|
+
const contextWindow = config.contextWindow;
|
|
377
|
+
let overflowCount = 0;
|
|
204
378
|
|
|
205
379
|
for (const [file, fileNodes] of byFile) {
|
|
206
380
|
const fullPath = path.join(rootDir, file);
|
|
@@ -213,19 +387,31 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath) {
|
|
|
213
387
|
}
|
|
214
388
|
|
|
215
389
|
for (const node of fileNodes) {
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
390
|
+
let text =
|
|
391
|
+
strategy === 'structured'
|
|
392
|
+
? buildStructuredText(node, file, lines, calleesStmt, callersStmt)
|
|
393
|
+
: buildSourceText(node, file, lines);
|
|
394
|
+
|
|
395
|
+
// Detect and handle context window overflow
|
|
396
|
+
const tokens = estimateTokens(text);
|
|
397
|
+
if (tokens > contextWindow) {
|
|
398
|
+
overflowCount++;
|
|
399
|
+
const maxChars = contextWindow * 4;
|
|
400
|
+
text = text.slice(0, maxChars);
|
|
401
|
+
}
|
|
221
402
|
|
|
222
|
-
const text = `${node.kind} ${node.name} in ${file}\n${context}`;
|
|
223
403
|
texts.push(text);
|
|
224
404
|
nodeIds.push(node.id);
|
|
225
405
|
previews.push(`${node.name} (${node.kind}) -- ${file}:${node.line}`);
|
|
226
406
|
}
|
|
227
407
|
}
|
|
228
408
|
|
|
409
|
+
if (overflowCount > 0) {
|
|
410
|
+
warn(
|
|
411
|
+
`${overflowCount} symbol(s) exceeded model context window (${contextWindow} tokens) and were truncated`,
|
|
412
|
+
);
|
|
413
|
+
}
|
|
414
|
+
|
|
229
415
|
console.log(`Embedding ${texts.length} symbols...`);
|
|
230
416
|
const { vectors, dim } = await embed(texts, modelKey);
|
|
231
417
|
|
|
@@ -237,16 +423,19 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath) {
|
|
|
237
423
|
for (let i = 0; i < vectors.length; i++) {
|
|
238
424
|
insert.run(nodeIds[i], Buffer.from(vectors[i].buffer), previews[i]);
|
|
239
425
|
}
|
|
240
|
-
const config = getModelConfig(modelKey);
|
|
241
426
|
insertMeta.run('model', config.name);
|
|
242
427
|
insertMeta.run('dim', String(dim));
|
|
243
428
|
insertMeta.run('count', String(vectors.length));
|
|
429
|
+
insertMeta.run('strategy', strategy);
|
|
244
430
|
insertMeta.run('built_at', new Date().toISOString());
|
|
431
|
+
if (overflowCount > 0) {
|
|
432
|
+
insertMeta.run('truncated_count', String(overflowCount));
|
|
433
|
+
}
|
|
245
434
|
});
|
|
246
435
|
insertAll();
|
|
247
436
|
|
|
248
437
|
console.log(
|
|
249
|
-
`\nStored ${vectors.length} embeddings (${dim}d, ${
|
|
438
|
+
`\nStored ${vectors.length} embeddings (${dim}d, ${config.name}, strategy: ${strategy}) in graph.db`,
|
|
250
439
|
);
|
|
251
440
|
db.close();
|
|
252
441
|
}
|
package/src/export.js
CHANGED
|
@@ -1,12 +1,15 @@
|
|
|
1
1
|
import path from 'node:path';
|
|
2
2
|
import { isTestFile } from './queries.js';
|
|
3
3
|
|
|
4
|
+
const DEFAULT_MIN_CONFIDENCE = 0.5;
|
|
5
|
+
|
|
4
6
|
/**
|
|
5
7
|
* Export the dependency graph in DOT (Graphviz) format.
|
|
6
8
|
*/
|
|
7
9
|
export function exportDOT(db, opts = {}) {
|
|
8
10
|
const fileLevel = opts.fileLevel !== false;
|
|
9
11
|
const noTests = opts.noTests || false;
|
|
12
|
+
const minConf = opts.minConfidence ?? DEFAULT_MIN_CONFIDENCE;
|
|
10
13
|
const lines = [
|
|
11
14
|
'digraph codegraph {',
|
|
12
15
|
' rankdir=LR;',
|
|
@@ -23,8 +26,9 @@ export function exportDOT(db, opts = {}) {
|
|
|
23
26
|
JOIN nodes n1 ON e.source_id = n1.id
|
|
24
27
|
JOIN nodes n2 ON e.target_id = n2.id
|
|
25
28
|
WHERE n1.file != n2.file AND e.kind IN ('imports', 'imports-type', 'calls')
|
|
29
|
+
AND e.confidence >= ?
|
|
26
30
|
`)
|
|
27
|
-
.all();
|
|
31
|
+
.all(minConf);
|
|
28
32
|
if (noTests) edges = edges.filter((e) => !isTestFile(e.source) && !isTestFile(e.target));
|
|
29
33
|
|
|
30
34
|
// Try to use directory nodes from DB (built by structure analysis)
|
|
@@ -102,8 +106,9 @@ export function exportDOT(db, opts = {}) {
|
|
|
102
106
|
JOIN nodes n2 ON e.target_id = n2.id
|
|
103
107
|
WHERE n1.kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module') AND n2.kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module')
|
|
104
108
|
AND e.kind = 'calls'
|
|
109
|
+
AND e.confidence >= ?
|
|
105
110
|
`)
|
|
106
|
-
.all();
|
|
111
|
+
.all(minConf);
|
|
107
112
|
if (noTests)
|
|
108
113
|
edges = edges.filter((e) => !isTestFile(e.source_file) && !isTestFile(e.target_file));
|
|
109
114
|
|
|
@@ -126,6 +131,7 @@ export function exportDOT(db, opts = {}) {
|
|
|
126
131
|
export function exportMermaid(db, opts = {}) {
|
|
127
132
|
const fileLevel = opts.fileLevel !== false;
|
|
128
133
|
const noTests = opts.noTests || false;
|
|
134
|
+
const minConf = opts.minConfidence ?? DEFAULT_MIN_CONFIDENCE;
|
|
129
135
|
const lines = ['graph LR'];
|
|
130
136
|
|
|
131
137
|
if (fileLevel) {
|
|
@@ -136,8 +142,9 @@ export function exportMermaid(db, opts = {}) {
|
|
|
136
142
|
JOIN nodes n1 ON e.source_id = n1.id
|
|
137
143
|
JOIN nodes n2 ON e.target_id = n2.id
|
|
138
144
|
WHERE n1.file != n2.file AND e.kind IN ('imports', 'imports-type', 'calls')
|
|
145
|
+
AND e.confidence >= ?
|
|
139
146
|
`)
|
|
140
|
-
.all();
|
|
147
|
+
.all(minConf);
|
|
141
148
|
if (noTests) edges = edges.filter((e) => !isTestFile(e.source) && !isTestFile(e.target));
|
|
142
149
|
|
|
143
150
|
for (const { source, target } of edges) {
|
|
@@ -155,8 +162,9 @@ export function exportMermaid(db, opts = {}) {
|
|
|
155
162
|
JOIN nodes n2 ON e.target_id = n2.id
|
|
156
163
|
WHERE n1.kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module') AND n2.kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module')
|
|
157
164
|
AND e.kind = 'calls'
|
|
165
|
+
AND e.confidence >= ?
|
|
158
166
|
`)
|
|
159
|
-
.all();
|
|
167
|
+
.all(minConf);
|
|
160
168
|
if (noTests)
|
|
161
169
|
edges = edges.filter((e) => !isTestFile(e.source_file) && !isTestFile(e.target_file));
|
|
162
170
|
|
|
@@ -175,6 +183,7 @@ export function exportMermaid(db, opts = {}) {
|
|
|
175
183
|
*/
|
|
176
184
|
export function exportJSON(db, opts = {}) {
|
|
177
185
|
const noTests = opts.noTests || false;
|
|
186
|
+
const minConf = opts.minConfidence ?? DEFAULT_MIN_CONFIDENCE;
|
|
178
187
|
|
|
179
188
|
let nodes = db
|
|
180
189
|
.prepare(`
|
|
@@ -185,13 +194,13 @@ export function exportJSON(db, opts = {}) {
|
|
|
185
194
|
|
|
186
195
|
let edges = db
|
|
187
196
|
.prepare(`
|
|
188
|
-
SELECT DISTINCT n1.file AS source, n2.file AS target, e.kind
|
|
197
|
+
SELECT DISTINCT n1.file AS source, n2.file AS target, e.kind, e.confidence
|
|
189
198
|
FROM edges e
|
|
190
199
|
JOIN nodes n1 ON e.source_id = n1.id
|
|
191
200
|
JOIN nodes n2 ON e.target_id = n2.id
|
|
192
|
-
WHERE n1.file != n2.file
|
|
201
|
+
WHERE n1.file != n2.file AND e.confidence >= ?
|
|
193
202
|
`)
|
|
194
|
-
.all();
|
|
203
|
+
.all(minConf);
|
|
195
204
|
if (noTests) edges = edges.filter((e) => !isTestFile(e.source) && !isTestFile(e.target));
|
|
196
205
|
|
|
197
206
|
return { nodes, edges };
|
package/src/index.js
CHANGED
package/src/queries.js
CHANGED
|
@@ -334,6 +334,7 @@ export function moduleMapData(customDbPath, limit = 20, opts = {}) {
|
|
|
334
334
|
dir: path.dirname(n.file) || '.',
|
|
335
335
|
inEdges: n.in_edges,
|
|
336
336
|
outEdges: n.out_edges,
|
|
337
|
+
coupling: n.in_edges + n.out_edges,
|
|
337
338
|
}));
|
|
338
339
|
|
|
339
340
|
const totalNodes = db.prepare('SELECT COUNT(*) as c FROM nodes').get().c;
|
|
@@ -1263,10 +1264,10 @@ export function moduleMap(customDbPath, limit = 20, opts = {}) {
|
|
|
1263
1264
|
for (const [dir, files] of [...dirs].sort()) {
|
|
1264
1265
|
console.log(` [${dir}/]`);
|
|
1265
1266
|
for (const f of files) {
|
|
1266
|
-
const
|
|
1267
|
-
const bar = '#'.repeat(Math.min(
|
|
1267
|
+
const coupling = f.inEdges + f.outEdges;
|
|
1268
|
+
const bar = '#'.repeat(Math.min(coupling, 40));
|
|
1268
1269
|
console.log(
|
|
1269
|
-
` ${path.basename(f.file).padEnd(35)} <-${String(f.inEdges).padStart(3)} ->${String(f.outEdges).padStart(3)} ${bar}`,
|
|
1270
|
+
` ${path.basename(f.file).padEnd(35)} <-${String(f.inEdges).padStart(3)} ->${String(f.outEdges).padStart(3)} =${String(coupling).padStart(3)} ${bar}`,
|
|
1270
1271
|
);
|
|
1271
1272
|
}
|
|
1272
1273
|
}
|
|
@@ -1920,6 +1921,7 @@ function explainFunctionImpl(db, target, noTests, getFileLines) {
|
|
|
1920
1921
|
export function explainData(target, customDbPath, opts = {}) {
|
|
1921
1922
|
const db = openReadonlyOrFail(customDbPath);
|
|
1922
1923
|
const noTests = opts.noTests || false;
|
|
1924
|
+
const depth = opts.depth || 0;
|
|
1923
1925
|
const kind = isFileLikeTarget(target) ? 'file' : 'function';
|
|
1924
1926
|
|
|
1925
1927
|
const dbPath = findDbPath(customDbPath);
|
|
@@ -1949,6 +1951,37 @@ export function explainData(target, customDbPath, opts = {}) {
|
|
|
1949
1951
|
? explainFileImpl(db, target, getFileLines)
|
|
1950
1952
|
: explainFunctionImpl(db, target, noTests, getFileLines);
|
|
1951
1953
|
|
|
1954
|
+
// Recursive dependency explanation for function targets
|
|
1955
|
+
if (kind === 'function' && depth > 0 && results.length > 0) {
|
|
1956
|
+
const visited = new Set(results.map((r) => `${r.name}:${r.file}:${r.line}`));
|
|
1957
|
+
|
|
1958
|
+
function explainCallees(parentResults, currentDepth) {
|
|
1959
|
+
if (currentDepth <= 0) return;
|
|
1960
|
+
for (const r of parentResults) {
|
|
1961
|
+
const newCallees = [];
|
|
1962
|
+
for (const callee of r.callees) {
|
|
1963
|
+
const key = `${callee.name}:${callee.file}:${callee.line}`;
|
|
1964
|
+
if (visited.has(key)) continue;
|
|
1965
|
+
visited.add(key);
|
|
1966
|
+
const calleeResults = explainFunctionImpl(db, callee.name, noTests, getFileLines);
|
|
1967
|
+
const exact = calleeResults.find(
|
|
1968
|
+
(cr) => cr.file === callee.file && cr.line === callee.line,
|
|
1969
|
+
);
|
|
1970
|
+
if (exact) {
|
|
1971
|
+
exact._depth = (r._depth || 0) + 1;
|
|
1972
|
+
newCallees.push(exact);
|
|
1973
|
+
}
|
|
1974
|
+
}
|
|
1975
|
+
if (newCallees.length > 0) {
|
|
1976
|
+
r.depDetails = newCallees;
|
|
1977
|
+
explainCallees(newCallees, currentDepth - 1);
|
|
1978
|
+
}
|
|
1979
|
+
}
|
|
1980
|
+
}
|
|
1981
|
+
|
|
1982
|
+
explainCallees(results, depth);
|
|
1983
|
+
}
|
|
1984
|
+
|
|
1952
1985
|
db.close();
|
|
1953
1986
|
return { target, kind, results };
|
|
1954
1987
|
}
|
|
@@ -2008,46 +2041,63 @@ export function explain(target, customDbPath, opts = {}) {
|
|
|
2008
2041
|
console.log();
|
|
2009
2042
|
}
|
|
2010
2043
|
} else {
|
|
2011
|
-
|
|
2044
|
+
function printFunctionExplain(r, indent = '') {
|
|
2012
2045
|
const lineRange = r.endLine ? `${r.line}-${r.endLine}` : `${r.line}`;
|
|
2013
2046
|
const lineInfo = r.lineCount ? `${r.lineCount} lines` : '';
|
|
2014
2047
|
const summaryPart = r.summary ? ` | ${r.summary}` : '';
|
|
2015
|
-
|
|
2048
|
+
const depthLevel = r._depth || 0;
|
|
2049
|
+
const heading = depthLevel === 0 ? '#' : '##'.padEnd(depthLevel + 2, '#');
|
|
2050
|
+
console.log(`\n${indent}${heading} ${r.name} (${r.kind}) ${r.file}:${lineRange}`);
|
|
2016
2051
|
if (lineInfo || r.summary) {
|
|
2017
|
-
console.log(
|
|
2052
|
+
console.log(`${indent} ${lineInfo}${summaryPart}`);
|
|
2018
2053
|
}
|
|
2019
2054
|
if (r.signature) {
|
|
2020
|
-
if (r.signature.params != null)
|
|
2021
|
-
|
|
2055
|
+
if (r.signature.params != null)
|
|
2056
|
+
console.log(`${indent} Parameters: (${r.signature.params})`);
|
|
2057
|
+
if (r.signature.returnType) console.log(`${indent} Returns: ${r.signature.returnType}`);
|
|
2022
2058
|
}
|
|
2023
2059
|
|
|
2024
2060
|
if (r.callees.length > 0) {
|
|
2025
|
-
console.log(`\n
|
|
2061
|
+
console.log(`\n${indent} Calls (${r.callees.length}):`);
|
|
2026
2062
|
for (const c of r.callees) {
|
|
2027
|
-
console.log(
|
|
2063
|
+
console.log(`${indent} ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`);
|
|
2028
2064
|
}
|
|
2029
2065
|
}
|
|
2030
2066
|
|
|
2031
2067
|
if (r.callers.length > 0) {
|
|
2032
|
-
console.log(`\n
|
|
2068
|
+
console.log(`\n${indent} Called by (${r.callers.length}):`);
|
|
2033
2069
|
for (const c of r.callers) {
|
|
2034
|
-
console.log(
|
|
2070
|
+
console.log(`${indent} ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`);
|
|
2035
2071
|
}
|
|
2036
2072
|
}
|
|
2037
2073
|
|
|
2038
2074
|
if (r.relatedTests.length > 0) {
|
|
2039
2075
|
const label = r.relatedTests.length === 1 ? 'file' : 'files';
|
|
2040
|
-
console.log(`\n
|
|
2076
|
+
console.log(`\n${indent} Tests (${r.relatedTests.length} ${label}):`);
|
|
2041
2077
|
for (const t of r.relatedTests) {
|
|
2042
|
-
console.log(
|
|
2078
|
+
console.log(`${indent} ${t.file}`);
|
|
2043
2079
|
}
|
|
2044
2080
|
}
|
|
2045
2081
|
|
|
2046
2082
|
if (r.callees.length === 0 && r.callers.length === 0) {
|
|
2047
|
-
console.log(
|
|
2083
|
+
console.log(
|
|
2084
|
+
`${indent} (no call edges found -- may be invoked dynamically or via re-exports)`,
|
|
2085
|
+
);
|
|
2086
|
+
}
|
|
2087
|
+
|
|
2088
|
+
// Render recursive dependency details
|
|
2089
|
+
if (r.depDetails && r.depDetails.length > 0) {
|
|
2090
|
+
console.log(`\n${indent} --- Dependencies (depth ${depthLevel + 1}) ---`);
|
|
2091
|
+
for (const dep of r.depDetails) {
|
|
2092
|
+
printFunctionExplain(dep, `${indent} `);
|
|
2093
|
+
}
|
|
2048
2094
|
}
|
|
2049
2095
|
console.log();
|
|
2050
2096
|
}
|
|
2097
|
+
|
|
2098
|
+
for (const r of data.results) {
|
|
2099
|
+
printFunctionExplain(r);
|
|
2100
|
+
}
|
|
2051
2101
|
}
|
|
2052
2102
|
}
|
|
2053
2103
|
|
package/src/structure.js
CHANGED
|
@@ -231,7 +231,8 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director
|
|
|
231
231
|
*/
|
|
232
232
|
export function structureData(customDbPath, opts = {}) {
|
|
233
233
|
const db = openReadonlyOrFail(customDbPath);
|
|
234
|
-
const
|
|
234
|
+
const rawDir = opts.directory || null;
|
|
235
|
+
const filterDir = rawDir && normalizePath(rawDir) !== '.' ? rawDir : null;
|
|
235
236
|
const maxDepth = opts.depth || null;
|
|
236
237
|
const sortBy = opts.sort || 'files';
|
|
237
238
|
const noTests = opts.noTests || false;
|