@optave/codegraph 2.2.2-dev.c252ef9 β 2.2.3-dev.44e8146
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -6
- package/package.json +5 -5
- package/src/builder.js +49 -13
- package/src/cli.js +83 -25
- package/src/config.js +1 -0
- package/src/embedder.js +196 -15
- package/src/export.js +16 -7
- package/src/index.js +2 -0
- package/src/queries.js +65 -15
- package/src/structure.js +2 -1
package/README.md
CHANGED
|
@@ -583,15 +583,16 @@ const { results: fused } = await multiSearchData(
|
|
|
583
583
|
|
|
584
584
|
## πΊοΈ Roadmap
|
|
585
585
|
|
|
586
|
-
See **[ROADMAP.md](ROADMAP.md)** for the full development roadmap. Current plan:
|
|
586
|
+
See **[ROADMAP.md](ROADMAP.md)** for the full development roadmap and **[STABILITY.md](STABILITY.md)** for the stability policy and versioning guarantees. Current plan:
|
|
587
587
|
|
|
588
588
|
1. ~~**Rust Core**~~ β **Complete** (v1.3.0) β native tree-sitter parsing via napi-rs, parallel multi-core parsing, incremental re-parsing, import resolution & cycle detection in Rust
|
|
589
589
|
2. ~~**Foundation Hardening**~~ β **Complete** (v1.4.0) β parser registry, 12-tool MCP server with multi-repo support, test coverage 62%β75%, `apiKeyCommand` secret resolution, global repo registry
|
|
590
|
-
3. **
|
|
591
|
-
4. **
|
|
592
|
-
5. **
|
|
593
|
-
6. **
|
|
594
|
-
7. **
|
|
590
|
+
3. **Architectural Refactoring** β parser plugin system, repository pattern, pipeline builder, engine strategy, domain errors, curated API
|
|
591
|
+
4. **Intelligent Embeddings** β LLM-generated descriptions, hybrid search
|
|
592
|
+
5. **Natural Language Queries** β `codegraph ask` command, conversational sessions
|
|
593
|
+
6. **Expanded Language Support** β 8 new languages (12 β 20)
|
|
594
|
+
7. **GitHub Integration & CI** β reusable GitHub Action, PR review, SARIF output
|
|
595
|
+
8. **Visualization & Advanced** β web UI, dead code detection, monorepo support, agentic search
|
|
595
596
|
|
|
596
597
|
## π€ Contributing
|
|
597
598
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@optave/codegraph",
|
|
3
|
-
"version": "2.2.
|
|
3
|
+
"version": "2.2.3-dev.44e8146",
|
|
4
4
|
"description": "Local code graph CLI β parse codebases with tree-sitter, build dependency graphs, query them",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/index.js",
|
|
@@ -61,10 +61,10 @@
|
|
|
61
61
|
"optionalDependencies": {
|
|
62
62
|
"@huggingface/transformers": "^3.8.1",
|
|
63
63
|
"@modelcontextprotocol/sdk": "^1.0.0",
|
|
64
|
-
"@optave/codegraph-darwin-arm64": "2.2.
|
|
65
|
-
"@optave/codegraph-darwin-x64": "2.2.
|
|
66
|
-
"@optave/codegraph-linux-x64-gnu": "2.2.
|
|
67
|
-
"@optave/codegraph-win32-x64-msvc": "2.2.
|
|
64
|
+
"@optave/codegraph-darwin-arm64": "2.2.3-dev.44e8146",
|
|
65
|
+
"@optave/codegraph-darwin-x64": "2.2.3-dev.44e8146",
|
|
66
|
+
"@optave/codegraph-linux-x64-gnu": "2.2.3-dev.44e8146",
|
|
67
|
+
"@optave/codegraph-win32-x64-msvc": "2.2.3-dev.44e8146"
|
|
68
68
|
},
|
|
69
69
|
"devDependencies": {
|
|
70
70
|
"@biomejs/biome": "^2.4.4",
|
package/src/builder.js
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
import { createHash } from 'node:crypto';
|
|
2
2
|
import fs from 'node:fs';
|
|
3
|
-
import os from 'node:os';
|
|
4
3
|
import path from 'node:path';
|
|
5
4
|
import { loadConfig } from './config.js';
|
|
6
5
|
import { EXTENSIONS, IGNORE_DIRS, normalizePath } from './constants.js';
|
|
7
6
|
import { initSchema, openDb } from './db.js';
|
|
8
7
|
import { readJournal, writeJournalHeader } from './journal.js';
|
|
9
|
-
import { debug, warn } from './logger.js';
|
|
8
|
+
import { debug, info, warn } from './logger.js';
|
|
10
9
|
import { getActiveEngine, parseFilesAuto } from './parser.js';
|
|
11
10
|
import { computeConfidence, resolveImportPath, resolveImportsBatch } from './resolve.js';
|
|
12
11
|
|
|
@@ -345,7 +344,7 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
345
344
|
// Engine selection: 'native', 'wasm', or 'auto' (default)
|
|
346
345
|
const engineOpts = { engine: opts.engine || 'auto' };
|
|
347
346
|
const { name: engineName, version: engineVersion } = getActiveEngine(engineOpts);
|
|
348
|
-
|
|
347
|
+
info(`Using ${engineName} engine${engineVersion ? ` (v${engineVersion})` : ''}`);
|
|
349
348
|
|
|
350
349
|
const aliases = loadPathAliases(rootDir);
|
|
351
350
|
// Merge config aliases
|
|
@@ -358,7 +357,7 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
358
357
|
}
|
|
359
358
|
|
|
360
359
|
if (aliases.baseUrl || Object.keys(aliases.paths).length > 0) {
|
|
361
|
-
|
|
360
|
+
info(
|
|
362
361
|
`Loaded path aliases: baseUrl=${aliases.baseUrl || 'none'}, ${Object.keys(aliases.paths).length} path mappings`,
|
|
363
362
|
);
|
|
364
363
|
}
|
|
@@ -366,7 +365,7 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
366
365
|
const collected = collectFiles(rootDir, [], config, new Set());
|
|
367
366
|
const files = collected.files;
|
|
368
367
|
const discoveredDirs = collected.directories;
|
|
369
|
-
|
|
368
|
+
info(`Found ${files.length} files to parse`);
|
|
370
369
|
|
|
371
370
|
// Check for incremental build
|
|
372
371
|
const { changed, removed, isFullBuild } = incremental
|
|
@@ -397,19 +396,36 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
397
396
|
/* ignore heal errors */
|
|
398
397
|
}
|
|
399
398
|
}
|
|
400
|
-
|
|
399
|
+
info('No changes detected. Graph is up to date.');
|
|
401
400
|
db.close();
|
|
402
401
|
writeJournalHeader(rootDir, Date.now());
|
|
403
402
|
return;
|
|
404
403
|
}
|
|
405
404
|
|
|
405
|
+
// Check if embeddings table exists (created by `embed`, not by initSchema)
|
|
406
|
+
let hasEmbeddings = false;
|
|
407
|
+
try {
|
|
408
|
+
db.prepare('SELECT 1 FROM embeddings LIMIT 1').get();
|
|
409
|
+
hasEmbeddings = true;
|
|
410
|
+
} catch {
|
|
411
|
+
/* table doesn't exist */
|
|
412
|
+
}
|
|
413
|
+
|
|
406
414
|
if (isFullBuild) {
|
|
415
|
+
const deletions =
|
|
416
|
+
'PRAGMA foreign_keys = OFF; DELETE FROM node_metrics; DELETE FROM edges; DELETE FROM nodes; PRAGMA foreign_keys = ON;';
|
|
407
417
|
db.exec(
|
|
408
|
-
|
|
418
|
+
hasEmbeddings
|
|
419
|
+
? `${deletions.replace('PRAGMA foreign_keys = ON;', '')} DELETE FROM embeddings; PRAGMA foreign_keys = ON;`
|
|
420
|
+
: deletions,
|
|
409
421
|
);
|
|
410
422
|
} else {
|
|
411
|
-
|
|
412
|
-
// Remove metrics/edges/nodes for changed and removed files
|
|
423
|
+
info(`Incremental: ${parseChanges.length} changed, ${removed.length} removed`);
|
|
424
|
+
// Remove embeddings/metrics/edges/nodes for changed and removed files
|
|
425
|
+
// Embeddings must be deleted BEFORE nodes (we need node IDs to find them)
|
|
426
|
+
const deleteEmbeddingsForFile = hasEmbeddings
|
|
427
|
+
? db.prepare('DELETE FROM embeddings WHERE node_id IN (SELECT id FROM nodes WHERE file = ?)')
|
|
428
|
+
: null;
|
|
413
429
|
const deleteNodesForFile = db.prepare('DELETE FROM nodes WHERE file = ?');
|
|
414
430
|
const deleteEdgesForFile = db.prepare(`
|
|
415
431
|
DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = @f)
|
|
@@ -419,12 +435,14 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
419
435
|
'DELETE FROM node_metrics WHERE node_id IN (SELECT id FROM nodes WHERE file = ?)',
|
|
420
436
|
);
|
|
421
437
|
for (const relPath of removed) {
|
|
438
|
+
deleteEmbeddingsForFile?.run(relPath);
|
|
422
439
|
deleteEdgesForFile.run({ f: relPath });
|
|
423
440
|
deleteMetricsForFile.run(relPath);
|
|
424
441
|
deleteNodesForFile.run(relPath);
|
|
425
442
|
}
|
|
426
443
|
for (const item of parseChanges) {
|
|
427
444
|
const relPath = item.relPath || normalizePath(path.relative(rootDir, item.file));
|
|
445
|
+
deleteEmbeddingsForFile?.run(relPath);
|
|
428
446
|
deleteEdgesForFile.run({ f: relPath });
|
|
429
447
|
deleteMetricsForFile.run(relPath);
|
|
430
448
|
deleteNodesForFile.run(relPath);
|
|
@@ -528,7 +546,7 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
528
546
|
|
|
529
547
|
const parsed = allSymbols.size;
|
|
530
548
|
const skipped = filesToParse.length - parsed;
|
|
531
|
-
|
|
549
|
+
info(`Parsed ${parsed} files (${skipped} skipped)`);
|
|
532
550
|
|
|
533
551
|
// Clean up removed file hashes
|
|
534
552
|
if (upsertHash && removed.length > 0) {
|
|
@@ -822,15 +840,33 @@ export async function buildGraph(rootDir, opts = {}) {
|
|
|
822
840
|
}
|
|
823
841
|
|
|
824
842
|
const nodeCount = db.prepare('SELECT COUNT(*) as c FROM nodes').get().c;
|
|
825
|
-
|
|
826
|
-
|
|
843
|
+
info(`Graph built: ${nodeCount} nodes, ${edgeCount} edges`);
|
|
844
|
+
info(`Stored in ${dbPath}`);
|
|
845
|
+
|
|
846
|
+
// Warn about orphaned embeddings that no longer match any node
|
|
847
|
+
if (hasEmbeddings) {
|
|
848
|
+
try {
|
|
849
|
+
const orphaned = db
|
|
850
|
+
.prepare('SELECT COUNT(*) as c FROM embeddings WHERE node_id NOT IN (SELECT id FROM nodes)')
|
|
851
|
+
.get().c;
|
|
852
|
+
if (orphaned > 0) {
|
|
853
|
+
warn(
|
|
854
|
+
`${orphaned} embeddings are orphaned (nodes changed). Run "codegraph embed" to refresh.`,
|
|
855
|
+
);
|
|
856
|
+
}
|
|
857
|
+
} catch {
|
|
858
|
+
/* ignore β embeddings table may have been dropped */
|
|
859
|
+
}
|
|
860
|
+
}
|
|
861
|
+
|
|
827
862
|
db.close();
|
|
828
863
|
|
|
829
864
|
// Write journal header after successful build
|
|
830
865
|
writeJournalHeader(rootDir, Date.now());
|
|
831
866
|
|
|
832
867
|
if (!opts.skipRegistry) {
|
|
833
|
-
const
|
|
868
|
+
const { tmpdir } = await import('node:os');
|
|
869
|
+
const tmpDir = path.resolve(tmpdir());
|
|
834
870
|
const resolvedRoot = path.resolve(rootDir);
|
|
835
871
|
if (resolvedRoot.startsWith(tmpDir)) {
|
|
836
872
|
debug(`Skipping auto-registration for temp directory: ${resolvedRoot}`);
|
package/src/cli.js
CHANGED
|
@@ -5,9 +5,10 @@ import path from 'node:path';
|
|
|
5
5
|
import Database from 'better-sqlite3';
|
|
6
6
|
import { Command } from 'commander';
|
|
7
7
|
import { buildGraph } from './builder.js';
|
|
8
|
+
import { loadConfig } from './config.js';
|
|
8
9
|
import { findCycles, formatCycles } from './cycles.js';
|
|
9
10
|
import { findDbPath } from './db.js';
|
|
10
|
-
import { buildEmbeddings, MODELS, search } from './embedder.js';
|
|
11
|
+
import { buildEmbeddings, EMBEDDING_STRATEGIES, MODELS, search } from './embedder.js';
|
|
11
12
|
import { exportDOT, exportJSON, exportMermaid } from './export.js';
|
|
12
13
|
import { setVerbose } from './logger.js';
|
|
13
14
|
import {
|
|
@@ -36,6 +37,8 @@ import { watchProject } from './watcher.js';
|
|
|
36
37
|
const __cliDir = path.dirname(new URL(import.meta.url).pathname.replace(/^\/([A-Z]:)/i, '$1'));
|
|
37
38
|
const pkg = JSON.parse(fs.readFileSync(path.join(__cliDir, '..', 'package.json'), 'utf-8'));
|
|
38
39
|
|
|
40
|
+
const config = loadConfig(process.cwd());
|
|
41
|
+
|
|
39
42
|
const program = new Command();
|
|
40
43
|
program
|
|
41
44
|
.name('codegraph')
|
|
@@ -48,6 +51,18 @@ program
|
|
|
48
51
|
if (opts.verbose) setVerbose(true);
|
|
49
52
|
});
|
|
50
53
|
|
|
54
|
+
/**
|
|
55
|
+
* Resolve the effective noTests value: CLI flag > config > false.
|
|
56
|
+
* Commander sets opts.tests to false when --no-tests is passed.
|
|
57
|
+
* When --include-tests is passed, always return false (include tests).
|
|
58
|
+
* Otherwise, fall back to config.query.excludeTests.
|
|
59
|
+
*/
|
|
60
|
+
function resolveNoTests(opts) {
|
|
61
|
+
if (opts.includeTests) return false;
|
|
62
|
+
if (opts.tests === false) return true;
|
|
63
|
+
return config.query?.excludeTests || false;
|
|
64
|
+
}
|
|
65
|
+
|
|
51
66
|
program
|
|
52
67
|
.command('build [dir]')
|
|
53
68
|
.description('Parse repo and build graph in .codegraph/graph.db')
|
|
@@ -63,9 +78,10 @@ program
|
|
|
63
78
|
.description('Find a function/class, show callers and callees')
|
|
64
79
|
.option('-d, --db <path>', 'Path to graph.db')
|
|
65
80
|
.option('-T, --no-tests', 'Exclude test/spec files from results')
|
|
81
|
+
.option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
|
|
66
82
|
.option('-j, --json', 'Output as JSON')
|
|
67
83
|
.action((name, opts) => {
|
|
68
|
-
queryName(name, opts.db, { noTests:
|
|
84
|
+
queryName(name, opts.db, { noTests: resolveNoTests(opts), json: opts.json });
|
|
69
85
|
});
|
|
70
86
|
|
|
71
87
|
program
|
|
@@ -73,9 +89,10 @@ program
|
|
|
73
89
|
.description('Show what depends on this file (transitive)')
|
|
74
90
|
.option('-d, --db <path>', 'Path to graph.db')
|
|
75
91
|
.option('-T, --no-tests', 'Exclude test/spec files from results')
|
|
92
|
+
.option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
|
|
76
93
|
.option('-j, --json', 'Output as JSON')
|
|
77
94
|
.action((file, opts) => {
|
|
78
|
-
impactAnalysis(file, opts.db, { noTests:
|
|
95
|
+
impactAnalysis(file, opts.db, { noTests: resolveNoTests(opts), json: opts.json });
|
|
79
96
|
});
|
|
80
97
|
|
|
81
98
|
program
|
|
@@ -84,9 +101,13 @@ program
|
|
|
84
101
|
.option('-d, --db <path>', 'Path to graph.db')
|
|
85
102
|
.option('-n, --limit <number>', 'Number of top nodes', '20')
|
|
86
103
|
.option('-T, --no-tests', 'Exclude test/spec files from results')
|
|
104
|
+
.option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
|
|
87
105
|
.option('-j, --json', 'Output as JSON')
|
|
88
106
|
.action((opts) => {
|
|
89
|
-
moduleMap(opts.db, parseInt(opts.limit, 10), {
|
|
107
|
+
moduleMap(opts.db, parseInt(opts.limit, 10), {
|
|
108
|
+
noTests: resolveNoTests(opts),
|
|
109
|
+
json: opts.json,
|
|
110
|
+
});
|
|
90
111
|
});
|
|
91
112
|
|
|
92
113
|
program
|
|
@@ -94,9 +115,10 @@ program
|
|
|
94
115
|
.description('Show graph health overview: nodes, edges, languages, cycles, hotspots, embeddings')
|
|
95
116
|
.option('-d, --db <path>', 'Path to graph.db')
|
|
96
117
|
.option('-T, --no-tests', 'Exclude test/spec files from results')
|
|
118
|
+
.option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
|
|
97
119
|
.option('-j, --json', 'Output as JSON')
|
|
98
120
|
.action((opts) => {
|
|
99
|
-
stats(opts.db, { noTests:
|
|
121
|
+
stats(opts.db, { noTests: resolveNoTests(opts), json: opts.json });
|
|
100
122
|
});
|
|
101
123
|
|
|
102
124
|
program
|
|
@@ -104,9 +126,10 @@ program
|
|
|
104
126
|
.description('Show what this file imports and what imports it')
|
|
105
127
|
.option('-d, --db <path>', 'Path to graph.db')
|
|
106
128
|
.option('-T, --no-tests', 'Exclude test/spec files from results')
|
|
129
|
+
.option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
|
|
107
130
|
.option('-j, --json', 'Output as JSON')
|
|
108
131
|
.action((file, opts) => {
|
|
109
|
-
fileDeps(file, opts.db, { noTests:
|
|
132
|
+
fileDeps(file, opts.db, { noTests: resolveNoTests(opts), json: opts.json });
|
|
110
133
|
});
|
|
111
134
|
|
|
112
135
|
program
|
|
@@ -117,6 +140,7 @@ program
|
|
|
117
140
|
.option('-f, --file <path>', 'Scope search to functions in this file (partial match)')
|
|
118
141
|
.option('-k, --kind <kind>', 'Filter to a specific symbol kind')
|
|
119
142
|
.option('-T, --no-tests', 'Exclude test/spec files from results')
|
|
143
|
+
.option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
|
|
120
144
|
.option('-j, --json', 'Output as JSON')
|
|
121
145
|
.action((name, opts) => {
|
|
122
146
|
if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) {
|
|
@@ -127,7 +151,7 @@ program
|
|
|
127
151
|
depth: parseInt(opts.depth, 10),
|
|
128
152
|
file: opts.file,
|
|
129
153
|
kind: opts.kind,
|
|
130
|
-
noTests:
|
|
154
|
+
noTests: resolveNoTests(opts),
|
|
131
155
|
json: opts.json,
|
|
132
156
|
});
|
|
133
157
|
});
|
|
@@ -140,6 +164,7 @@ program
|
|
|
140
164
|
.option('-f, --file <path>', 'Scope search to functions in this file (partial match)')
|
|
141
165
|
.option('-k, --kind <kind>', 'Filter to a specific symbol kind')
|
|
142
166
|
.option('-T, --no-tests', 'Exclude test/spec files from results')
|
|
167
|
+
.option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
|
|
143
168
|
.option('-j, --json', 'Output as JSON')
|
|
144
169
|
.action((name, opts) => {
|
|
145
170
|
if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) {
|
|
@@ -150,7 +175,7 @@ program
|
|
|
150
175
|
depth: parseInt(opts.depth, 10),
|
|
151
176
|
file: opts.file,
|
|
152
177
|
kind: opts.kind,
|
|
153
|
-
noTests:
|
|
178
|
+
noTests: resolveNoTests(opts),
|
|
154
179
|
json: opts.json,
|
|
155
180
|
});
|
|
156
181
|
});
|
|
@@ -163,8 +188,9 @@ program
|
|
|
163
188
|
.option('-f, --file <path>', 'Scope search to functions in this file (partial match)')
|
|
164
189
|
.option('-k, --kind <kind>', 'Filter to a specific symbol kind')
|
|
165
190
|
.option('--no-source', 'Metadata only (skip source extraction)')
|
|
166
|
-
.option('--
|
|
191
|
+
.option('--with-test-source', 'Include test source code')
|
|
167
192
|
.option('-T, --no-tests', 'Exclude test/spec files from results')
|
|
193
|
+
.option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
|
|
168
194
|
.option('-j, --json', 'Output as JSON')
|
|
169
195
|
.action((name, opts) => {
|
|
170
196
|
if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) {
|
|
@@ -176,8 +202,8 @@ program
|
|
|
176
202
|
file: opts.file,
|
|
177
203
|
kind: opts.kind,
|
|
178
204
|
noSource: !opts.source,
|
|
179
|
-
noTests:
|
|
180
|
-
includeTests: opts.
|
|
205
|
+
noTests: resolveNoTests(opts),
|
|
206
|
+
includeTests: opts.withTestSource,
|
|
181
207
|
json: opts.json,
|
|
182
208
|
});
|
|
183
209
|
});
|
|
@@ -186,10 +212,16 @@ program
|
|
|
186
212
|
.command('explain <target>')
|
|
187
213
|
.description('Structural summary of a file or function (no LLM needed)')
|
|
188
214
|
.option('-d, --db <path>', 'Path to graph.db')
|
|
215
|
+
.option('--depth <n>', 'Recursively explain dependencies up to N levels deep', '0')
|
|
189
216
|
.option('-T, --no-tests', 'Exclude test/spec files from results')
|
|
217
|
+
.option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
|
|
190
218
|
.option('-j, --json', 'Output as JSON')
|
|
191
219
|
.action((target, opts) => {
|
|
192
|
-
explain(target, opts.db, {
|
|
220
|
+
explain(target, opts.db, {
|
|
221
|
+
depth: parseInt(opts.depth, 10),
|
|
222
|
+
noTests: resolveNoTests(opts),
|
|
223
|
+
json: opts.json,
|
|
224
|
+
});
|
|
193
225
|
});
|
|
194
226
|
|
|
195
227
|
program
|
|
@@ -198,6 +230,7 @@ program
|
|
|
198
230
|
.option('-d, --db <path>', 'Path to graph.db')
|
|
199
231
|
.option('-f, --file <path>', 'File overview: list symbols, imports, exports')
|
|
200
232
|
.option('-T, --no-tests', 'Exclude test/spec files from results')
|
|
233
|
+
.option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
|
|
201
234
|
.option('-j, --json', 'Output as JSON')
|
|
202
235
|
.action((name, opts) => {
|
|
203
236
|
if (!name && !opts.file) {
|
|
@@ -205,7 +238,7 @@ program
|
|
|
205
238
|
process.exit(1);
|
|
206
239
|
}
|
|
207
240
|
const target = opts.file || name;
|
|
208
|
-
where(target, opts.db, { file: !!opts.file, noTests:
|
|
241
|
+
where(target, opts.db, { file: !!opts.file, noTests: resolveNoTests(opts), json: opts.json });
|
|
209
242
|
});
|
|
210
243
|
|
|
211
244
|
program
|
|
@@ -215,6 +248,7 @@ program
|
|
|
215
248
|
.option('--staged', 'Analyze staged changes instead of unstaged')
|
|
216
249
|
.option('--depth <n>', 'Max transitive caller depth', '3')
|
|
217
250
|
.option('-T, --no-tests', 'Exclude test/spec files from results')
|
|
251
|
+
.option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
|
|
218
252
|
.option('-j, --json', 'Output as JSON')
|
|
219
253
|
.option('-f, --format <format>', 'Output format: text, mermaid, json', 'text')
|
|
220
254
|
.action((ref, opts) => {
|
|
@@ -222,7 +256,7 @@ program
|
|
|
222
256
|
ref,
|
|
223
257
|
staged: opts.staged,
|
|
224
258
|
depth: parseInt(opts.depth, 10),
|
|
225
|
-
noTests:
|
|
259
|
+
noTests: resolveNoTests(opts),
|
|
226
260
|
json: opts.json,
|
|
227
261
|
format: opts.format,
|
|
228
262
|
});
|
|
@@ -237,10 +271,16 @@ program
|
|
|
237
271
|
.option('-f, --format <format>', 'Output format: dot, mermaid, json', 'dot')
|
|
238
272
|
.option('--functions', 'Function-level graph instead of file-level')
|
|
239
273
|
.option('-T, --no-tests', 'Exclude test/spec files')
|
|
274
|
+
.option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
|
|
275
|
+
.option('--min-confidence <score>', 'Minimum edge confidence threshold (default: 0.5)', '0.5')
|
|
240
276
|
.option('-o, --output <file>', 'Write to file instead of stdout')
|
|
241
277
|
.action((opts) => {
|
|
242
278
|
const db = new Database(findDbPath(opts.db), { readonly: true });
|
|
243
|
-
const exportOpts = {
|
|
279
|
+
const exportOpts = {
|
|
280
|
+
fileLevel: !opts.functions,
|
|
281
|
+
noTests: resolveNoTests(opts),
|
|
282
|
+
minConfidence: parseFloat(opts.minConfidence),
|
|
283
|
+
};
|
|
244
284
|
|
|
245
285
|
let output;
|
|
246
286
|
switch (opts.format) {
|
|
@@ -271,10 +311,11 @@ program
|
|
|
271
311
|
.option('-d, --db <path>', 'Path to graph.db')
|
|
272
312
|
.option('--functions', 'Function-level cycle detection')
|
|
273
313
|
.option('-T, --no-tests', 'Exclude test/spec files')
|
|
314
|
+
.option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
|
|
274
315
|
.option('-j, --json', 'Output as JSON')
|
|
275
316
|
.action((opts) => {
|
|
276
317
|
const db = new Database(findDbPath(opts.db), { readonly: true });
|
|
277
|
-
const cycles = findCycles(db, { fileLevel: !opts.functions, noTests:
|
|
318
|
+
const cycles = findCycles(db, { fileLevel: !opts.functions, noTests: resolveNoTests(opts) });
|
|
278
319
|
db.close();
|
|
279
320
|
|
|
280
321
|
if (opts.json) {
|
|
@@ -376,10 +417,13 @@ program
|
|
|
376
417
|
.action(() => {
|
|
377
418
|
console.log('\nAvailable embedding models:\n');
|
|
378
419
|
for (const [key, config] of Object.entries(MODELS)) {
|
|
379
|
-
const def = key === '
|
|
380
|
-
|
|
420
|
+
const def = key === 'minilm' ? ' (default)' : '';
|
|
421
|
+
const ctx = config.contextWindow ? `${config.contextWindow} ctx` : '';
|
|
422
|
+
console.log(
|
|
423
|
+
` ${key.padEnd(12)} ${String(config.dim).padStart(4)}d ${ctx.padEnd(9)} ${config.desc}${def}`,
|
|
424
|
+
);
|
|
381
425
|
}
|
|
382
|
-
console.log('\nUsage: codegraph embed --model <name>');
|
|
426
|
+
console.log('\nUsage: codegraph embed --model <name> --strategy <structured|source>');
|
|
383
427
|
console.log(' codegraph search "query" --model <name>\n');
|
|
384
428
|
});
|
|
385
429
|
|
|
@@ -390,12 +434,23 @@ program
|
|
|
390
434
|
)
|
|
391
435
|
.option(
|
|
392
436
|
'-m, --model <name>',
|
|
393
|
-
'Embedding model: minilm, jina-small, jina-base, jina-code, nomic, nomic-v1.5
|
|
394
|
-
'
|
|
437
|
+
'Embedding model: minilm (default), jina-small, jina-base, jina-code, nomic, nomic-v1.5, bge-large. Run `codegraph models` for details',
|
|
438
|
+
'minilm',
|
|
439
|
+
)
|
|
440
|
+
.option(
|
|
441
|
+
'-s, --strategy <name>',
|
|
442
|
+
`Embedding strategy: ${EMBEDDING_STRATEGIES.join(', ')}. "structured" uses graph context (callers/callees), "source" embeds raw code`,
|
|
443
|
+
'structured',
|
|
395
444
|
)
|
|
396
445
|
.action(async (dir, opts) => {
|
|
446
|
+
if (!EMBEDDING_STRATEGIES.includes(opts.strategy)) {
|
|
447
|
+
console.error(
|
|
448
|
+
`Unknown strategy: ${opts.strategy}. Available: ${EMBEDDING_STRATEGIES.join(', ')}`,
|
|
449
|
+
);
|
|
450
|
+
process.exit(1);
|
|
451
|
+
}
|
|
397
452
|
const root = path.resolve(dir || '.');
|
|
398
|
-
await buildEmbeddings(root, opts.model);
|
|
453
|
+
await buildEmbeddings(root, opts.model, undefined, { strategy: opts.strategy });
|
|
399
454
|
});
|
|
400
455
|
|
|
401
456
|
program
|
|
@@ -405,6 +460,7 @@ program
|
|
|
405
460
|
.option('-m, --model <name>', 'Override embedding model (auto-detects from DB)')
|
|
406
461
|
.option('-n, --limit <number>', 'Max results', '15')
|
|
407
462
|
.option('-T, --no-tests', 'Exclude test/spec files from results')
|
|
463
|
+
.option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
|
|
408
464
|
.option('--min-score <score>', 'Minimum similarity threshold', '0.2')
|
|
409
465
|
.option('-k, --kind <kind>', 'Filter by kind: function, method, class')
|
|
410
466
|
.option('--file <pattern>', 'Filter by file path pattern')
|
|
@@ -412,7 +468,7 @@ program
|
|
|
412
468
|
.action(async (query, opts) => {
|
|
413
469
|
await search(query, opts.db, {
|
|
414
470
|
limit: parseInt(opts.limit, 10),
|
|
415
|
-
noTests:
|
|
471
|
+
noTests: resolveNoTests(opts),
|
|
416
472
|
minScore: parseFloat(opts.minScore),
|
|
417
473
|
model: opts.model,
|
|
418
474
|
kind: opts.kind,
|
|
@@ -430,6 +486,7 @@ program
|
|
|
430
486
|
.option('--depth <n>', 'Max directory depth')
|
|
431
487
|
.option('--sort <metric>', 'Sort by: cohesion | fan-in | fan-out | density | files', 'files')
|
|
432
488
|
.option('-T, --no-tests', 'Exclude test/spec files')
|
|
489
|
+
.option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
|
|
433
490
|
.option('-j, --json', 'Output as JSON')
|
|
434
491
|
.action(async (dir, opts) => {
|
|
435
492
|
const { structureData, formatStructure } = await import('./structure.js');
|
|
@@ -437,7 +494,7 @@ program
|
|
|
437
494
|
directory: dir,
|
|
438
495
|
depth: opts.depth ? parseInt(opts.depth, 10) : undefined,
|
|
439
496
|
sort: opts.sort,
|
|
440
|
-
noTests:
|
|
497
|
+
noTests: resolveNoTests(opts),
|
|
441
498
|
});
|
|
442
499
|
if (opts.json) {
|
|
443
500
|
console.log(JSON.stringify(data, null, 2));
|
|
@@ -456,6 +513,7 @@ program
|
|
|
456
513
|
.option('--metric <metric>', 'fan-in | fan-out | density | coupling', 'fan-in')
|
|
457
514
|
.option('--level <level>', 'file | directory', 'file')
|
|
458
515
|
.option('-T, --no-tests', 'Exclude test/spec files from results')
|
|
516
|
+
.option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
|
|
459
517
|
.option('-j, --json', 'Output as JSON')
|
|
460
518
|
.action(async (opts) => {
|
|
461
519
|
const { hotspotsData, formatHotspots } = await import('./structure.js');
|
|
@@ -463,7 +521,7 @@ program
|
|
|
463
521
|
metric: opts.metric,
|
|
464
522
|
level: opts.level,
|
|
465
523
|
limit: parseInt(opts.limit, 10),
|
|
466
|
-
noTests:
|
|
524
|
+
noTests: resolveNoTests(opts),
|
|
467
525
|
});
|
|
468
526
|
if (opts.json) {
|
|
469
527
|
console.log(JSON.stringify(data, null, 2));
|
package/src/config.js
CHANGED
package/src/embedder.js
CHANGED
|
@@ -4,6 +4,18 @@ import Database from 'better-sqlite3';
|
|
|
4
4
|
import { findDbPath, openReadonlyOrFail } from './db.js';
|
|
5
5
|
import { warn } from './logger.js';
|
|
6
6
|
|
|
7
|
+
/**
|
|
8
|
+
* Split an identifier into readable words.
|
|
9
|
+
* camelCase/PascalCase β "camel Case", snake_case β "snake case", kebab-case β "kebab case"
|
|
10
|
+
*/
|
|
11
|
+
function splitIdentifier(name) {
|
|
12
|
+
return name
|
|
13
|
+
.replace(/([a-z])([A-Z])/g, '$1 $2')
|
|
14
|
+
.replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
|
|
15
|
+
.replace(/[_-]+/g, ' ')
|
|
16
|
+
.trim();
|
|
17
|
+
}
|
|
18
|
+
|
|
7
19
|
// Lazy-load transformers (heavy, optional module)
|
|
8
20
|
let pipeline = null;
|
|
9
21
|
let _cos_sim = null;
|
|
@@ -14,48 +26,57 @@ export const MODELS = {
|
|
|
14
26
|
minilm: {
|
|
15
27
|
name: 'Xenova/all-MiniLM-L6-v2',
|
|
16
28
|
dim: 384,
|
|
29
|
+
contextWindow: 256,
|
|
17
30
|
desc: 'Smallest, fastest (~23MB). General text.',
|
|
18
31
|
quantized: true,
|
|
19
32
|
},
|
|
20
33
|
'jina-small': {
|
|
21
34
|
name: 'Xenova/jina-embeddings-v2-small-en',
|
|
22
35
|
dim: 512,
|
|
36
|
+
contextWindow: 8192,
|
|
23
37
|
desc: 'Small, good quality (~33MB). General text.',
|
|
24
38
|
quantized: false,
|
|
25
39
|
},
|
|
26
40
|
'jina-base': {
|
|
27
41
|
name: 'Xenova/jina-embeddings-v2-base-en',
|
|
28
42
|
dim: 768,
|
|
43
|
+
contextWindow: 8192,
|
|
29
44
|
desc: 'Good quality (~137MB). General text, 8192 token context.',
|
|
30
45
|
quantized: false,
|
|
31
46
|
},
|
|
32
47
|
'jina-code': {
|
|
33
48
|
name: 'Xenova/jina-embeddings-v2-base-code',
|
|
34
49
|
dim: 768,
|
|
50
|
+
contextWindow: 8192,
|
|
35
51
|
desc: 'Code-aware (~137MB). Trained on code+text, best for code search.',
|
|
36
52
|
quantized: false,
|
|
37
53
|
},
|
|
38
54
|
nomic: {
|
|
39
55
|
name: 'Xenova/nomic-embed-text-v1',
|
|
40
56
|
dim: 768,
|
|
57
|
+
contextWindow: 8192,
|
|
41
58
|
desc: 'Good local quality (~137MB). 8192 context.',
|
|
42
59
|
quantized: false,
|
|
43
60
|
},
|
|
44
61
|
'nomic-v1.5': {
|
|
45
62
|
name: 'nomic-ai/nomic-embed-text-v1.5',
|
|
46
63
|
dim: 768,
|
|
64
|
+
contextWindow: 8192,
|
|
47
65
|
desc: 'Improved nomic (~137MB). Matryoshka dimensions, 8192 context.',
|
|
48
66
|
quantized: false,
|
|
49
67
|
},
|
|
50
68
|
'bge-large': {
|
|
51
69
|
name: 'Xenova/bge-large-en-v1.5',
|
|
52
70
|
dim: 1024,
|
|
71
|
+
contextWindow: 512,
|
|
53
72
|
desc: 'Best general retrieval (~335MB). Top MTEB scores.',
|
|
54
73
|
quantized: false,
|
|
55
74
|
},
|
|
56
75
|
};
|
|
57
76
|
|
|
58
|
-
export const
|
|
77
|
+
export const EMBEDDING_STRATEGIES = ['structured', 'source'];
|
|
78
|
+
|
|
79
|
+
export const DEFAULT_MODEL = 'minilm';
|
|
59
80
|
const BATCH_SIZE_MAP = {
|
|
60
81
|
minilm: 32,
|
|
61
82
|
'jina-small': 16,
|
|
@@ -77,6 +98,108 @@ function getModelConfig(modelKey) {
|
|
|
77
98
|
return config;
|
|
78
99
|
}
|
|
79
100
|
|
|
101
|
+
/**
|
|
102
|
+
* Rough token estimate (~4 chars per token for code/English).
|
|
103
|
+
* Conservative β avoids adding a tokenizer dependency.
|
|
104
|
+
*/
|
|
105
|
+
export function estimateTokens(text) {
|
|
106
|
+
return Math.ceil(text.length / 4);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Extract leading comment text (JSDoc, //, #, etc.) above a function line.
|
|
111
|
+
* Returns the cleaned comment text or null if none found.
|
|
112
|
+
*/
|
|
113
|
+
function extractLeadingComment(lines, fnLineIndex) {
|
|
114
|
+
const raw = [];
|
|
115
|
+
for (let i = fnLineIndex - 1; i >= Math.max(0, fnLineIndex - 15); i--) {
|
|
116
|
+
const trimmed = lines[i].trim();
|
|
117
|
+
if (/^(\/\/|\/\*|\*\/|\*|#|\/\/\/)/.test(trimmed)) {
|
|
118
|
+
raw.unshift(trimmed);
|
|
119
|
+
} else if (trimmed === '') {
|
|
120
|
+
if (raw.length > 0) break;
|
|
121
|
+
} else {
|
|
122
|
+
break;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
if (raw.length === 0) return null;
|
|
126
|
+
return raw
|
|
127
|
+
.map((line) =>
|
|
128
|
+
line
|
|
129
|
+
.replace(/^\/\*\*?\s?|\*\/$/g, '') // opening /** or /* and closing */
|
|
130
|
+
.replace(/^\*\s?/, '') // middle * lines
|
|
131
|
+
.replace(/^\/\/\/?\s?/, '') // // or ///
|
|
132
|
+
.replace(/^#\s?/, '') // # (Python/Ruby)
|
|
133
|
+
.trim(),
|
|
134
|
+
)
|
|
135
|
+
.filter((l) => l.length > 0)
|
|
136
|
+
.join(' ');
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Build graph-enriched text for a symbol using dependency context.
|
|
141
|
+
* Produces compact, semantic text (~100 tokens) instead of full source code.
|
|
142
|
+
*/
|
|
143
|
+
function buildStructuredText(node, file, lines, calleesStmt, callersStmt) {
|
|
144
|
+
const readable = splitIdentifier(node.name);
|
|
145
|
+
const parts = [`${node.kind} ${node.name} (${readable}) in ${file}`];
|
|
146
|
+
const startLine = Math.max(0, node.line - 1);
|
|
147
|
+
|
|
148
|
+
// Extract parameters from signature (best-effort, single-line)
|
|
149
|
+
const sigLine = lines[startLine] || '';
|
|
150
|
+
const paramMatch = sigLine.match(/\(([^)]*)\)/);
|
|
151
|
+
if (paramMatch?.[1]?.trim()) {
|
|
152
|
+
parts.push(`Parameters: ${paramMatch[1].trim()}`);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Graph context: callees (capped at 10)
|
|
156
|
+
const callees = calleesStmt.all(node.id);
|
|
157
|
+
if (callees.length > 0) {
|
|
158
|
+
parts.push(
|
|
159
|
+
`Calls: ${callees
|
|
160
|
+
.slice(0, 10)
|
|
161
|
+
.map((c) => c.name)
|
|
162
|
+
.join(', ')}`,
|
|
163
|
+
);
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// Graph context: callers (capped at 10)
|
|
167
|
+
const callers = callersStmt.all(node.id);
|
|
168
|
+
if (callers.length > 0) {
|
|
169
|
+
parts.push(
|
|
170
|
+
`Called by: ${callers
|
|
171
|
+
.slice(0, 10)
|
|
172
|
+
.map((c) => c.name)
|
|
173
|
+
.join(', ')}`,
|
|
174
|
+
);
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// Leading comment (high semantic value) or first few lines of code
|
|
178
|
+
const comment = extractLeadingComment(lines, startLine);
|
|
179
|
+
if (comment) {
|
|
180
|
+
parts.push(comment);
|
|
181
|
+
} else {
|
|
182
|
+
const endLine = Math.min(lines.length, startLine + 4);
|
|
183
|
+
const snippet = lines.slice(startLine, endLine).join('\n').trim();
|
|
184
|
+
if (snippet) parts.push(snippet);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
return parts.join('\n');
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Build raw source-code text for a symbol (original strategy).
|
|
192
|
+
*/
|
|
193
|
+
function buildSourceText(node, file, lines) {
|
|
194
|
+
const startLine = Math.max(0, node.line - 1);
|
|
195
|
+
const endLine = node.end_line
|
|
196
|
+
? Math.min(lines.length, node.end_line)
|
|
197
|
+
: Math.min(lines.length, startLine + 15);
|
|
198
|
+
const context = lines.slice(startLine, endLine).join('\n');
|
|
199
|
+
const readable = splitIdentifier(node.name);
|
|
200
|
+
return `${node.kind} ${node.name} (${readable}) in ${file}\n${context}`;
|
|
201
|
+
}
|
|
202
|
+
|
|
80
203
|
/**
|
|
81
204
|
* Lazy-load @huggingface/transformers.
|
|
82
205
|
* This is an optional dependency β gives a clear error if not installed.
|
|
@@ -103,8 +226,27 @@ async function loadModel(modelKey) {
|
|
|
103
226
|
_cos_sim = transformers.cos_sim;
|
|
104
227
|
|
|
105
228
|
console.log(`Loading embedding model: ${config.name} (${config.dim}d)...`);
|
|
106
|
-
const
|
|
107
|
-
|
|
229
|
+
const pipelineOpts = config.quantized ? { quantized: true } : {};
|
|
230
|
+
try {
|
|
231
|
+
extractor = await pipeline('feature-extraction', config.name, pipelineOpts);
|
|
232
|
+
} catch (err) {
|
|
233
|
+
const msg = err.message || String(err);
|
|
234
|
+
if (msg.includes('Unauthorized') || msg.includes('401') || msg.includes('gated')) {
|
|
235
|
+
console.error(
|
|
236
|
+
`\nModel "${config.name}" requires authentication.\n` +
|
|
237
|
+
`This model is gated on HuggingFace and needs an access token.\n\n` +
|
|
238
|
+
`Options:\n` +
|
|
239
|
+
` 1. Set HF_TOKEN env var: export HF_TOKEN=hf_...\n` +
|
|
240
|
+
` 2. Use a public model instead: codegraph embed --model minilm\n`,
|
|
241
|
+
);
|
|
242
|
+
} else {
|
|
243
|
+
console.error(
|
|
244
|
+
`\nFailed to load model "${config.name}": ${msg}\n` +
|
|
245
|
+
`Try a different model: codegraph embed --model minilm\n`,
|
|
246
|
+
);
|
|
247
|
+
}
|
|
248
|
+
process.exit(1);
|
|
249
|
+
}
|
|
108
250
|
activeModel = config.name;
|
|
109
251
|
console.log('Model loaded.');
|
|
110
252
|
return { extractor, config };
|
|
@@ -172,10 +314,14 @@ function initEmbeddingsSchema(db) {
|
|
|
172
314
|
|
|
173
315
|
/**
|
|
174
316
|
* Build embeddings for all functions/methods/classes in the graph.
|
|
317
|
+
* @param {string} rootDir - Project root directory
|
|
318
|
+
* @param {string} modelKey - Model identifier from MODELS registry
|
|
319
|
+
* @param {string} [customDbPath] - Override path to graph.db
|
|
320
|
+
* @param {object} [options] - Embedding options
|
|
321
|
+
* @param {string} [options.strategy='structured'] - 'structured' (graph-enriched) or 'source' (raw code)
|
|
175
322
|
*/
|
|
176
|
-
export async function buildEmbeddings(rootDir, modelKey, customDbPath) {
|
|
177
|
-
|
|
178
|
-
// fs already imported at top
|
|
323
|
+
export async function buildEmbeddings(rootDir, modelKey, customDbPath, options = {}) {
|
|
324
|
+
const strategy = options.strategy || 'structured';
|
|
179
325
|
const dbPath = customDbPath || findDbPath(null);
|
|
180
326
|
|
|
181
327
|
const db = new Database(dbPath);
|
|
@@ -190,7 +336,24 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath) {
|
|
|
190
336
|
)
|
|
191
337
|
.all();
|
|
192
338
|
|
|
193
|
-
console.log(`Building embeddings for ${nodes.length} symbols...`);
|
|
339
|
+
console.log(`Building embeddings for ${nodes.length} symbols (strategy: ${strategy})...`);
|
|
340
|
+
|
|
341
|
+
// Prepare graph-context queries for structured strategy
|
|
342
|
+
let calleesStmt, callersStmt;
|
|
343
|
+
if (strategy === 'structured') {
|
|
344
|
+
calleesStmt = db.prepare(`
|
|
345
|
+
SELECT DISTINCT n.name FROM edges e
|
|
346
|
+
JOIN nodes n ON e.target_id = n.id
|
|
347
|
+
WHERE e.source_id = ? AND e.kind = 'calls'
|
|
348
|
+
ORDER BY n.name
|
|
349
|
+
`);
|
|
350
|
+
callersStmt = db.prepare(`
|
|
351
|
+
SELECT DISTINCT n.name FROM edges e
|
|
352
|
+
JOIN nodes n ON e.source_id = n.id
|
|
353
|
+
WHERE e.target_id = ? AND e.kind = 'calls'
|
|
354
|
+
ORDER BY n.name
|
|
355
|
+
`);
|
|
356
|
+
}
|
|
194
357
|
|
|
195
358
|
const byFile = new Map();
|
|
196
359
|
for (const node of nodes) {
|
|
@@ -201,6 +364,9 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath) {
|
|
|
201
364
|
const texts = [];
|
|
202
365
|
const nodeIds = [];
|
|
203
366
|
const previews = [];
|
|
367
|
+
const config = getModelConfig(modelKey);
|
|
368
|
+
const contextWindow = config.contextWindow;
|
|
369
|
+
let overflowCount = 0;
|
|
204
370
|
|
|
205
371
|
for (const [file, fileNodes] of byFile) {
|
|
206
372
|
const fullPath = path.join(rootDir, file);
|
|
@@ -213,19 +379,31 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath) {
|
|
|
213
379
|
}
|
|
214
380
|
|
|
215
381
|
for (const node of fileNodes) {
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
382
|
+
let text =
|
|
383
|
+
strategy === 'structured'
|
|
384
|
+
? buildStructuredText(node, file, lines, calleesStmt, callersStmt)
|
|
385
|
+
: buildSourceText(node, file, lines);
|
|
386
|
+
|
|
387
|
+
// Detect and handle context window overflow
|
|
388
|
+
const tokens = estimateTokens(text);
|
|
389
|
+
if (tokens > contextWindow) {
|
|
390
|
+
overflowCount++;
|
|
391
|
+
const maxChars = contextWindow * 4;
|
|
392
|
+
text = text.slice(0, maxChars);
|
|
393
|
+
}
|
|
221
394
|
|
|
222
|
-
const text = `${node.kind} ${node.name} in ${file}\n${context}`;
|
|
223
395
|
texts.push(text);
|
|
224
396
|
nodeIds.push(node.id);
|
|
225
397
|
previews.push(`${node.name} (${node.kind}) -- ${file}:${node.line}`);
|
|
226
398
|
}
|
|
227
399
|
}
|
|
228
400
|
|
|
401
|
+
if (overflowCount > 0) {
|
|
402
|
+
warn(
|
|
403
|
+
`${overflowCount} symbol(s) exceeded model context window (${contextWindow} tokens) and were truncated`,
|
|
404
|
+
);
|
|
405
|
+
}
|
|
406
|
+
|
|
229
407
|
console.log(`Embedding ${texts.length} symbols...`);
|
|
230
408
|
const { vectors, dim } = await embed(texts, modelKey);
|
|
231
409
|
|
|
@@ -237,16 +415,19 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath) {
|
|
|
237
415
|
for (let i = 0; i < vectors.length; i++) {
|
|
238
416
|
insert.run(nodeIds[i], Buffer.from(vectors[i].buffer), previews[i]);
|
|
239
417
|
}
|
|
240
|
-
const config = getModelConfig(modelKey);
|
|
241
418
|
insertMeta.run('model', config.name);
|
|
242
419
|
insertMeta.run('dim', String(dim));
|
|
243
420
|
insertMeta.run('count', String(vectors.length));
|
|
421
|
+
insertMeta.run('strategy', strategy);
|
|
244
422
|
insertMeta.run('built_at', new Date().toISOString());
|
|
423
|
+
if (overflowCount > 0) {
|
|
424
|
+
insertMeta.run('truncated_count', String(overflowCount));
|
|
425
|
+
}
|
|
245
426
|
});
|
|
246
427
|
insertAll();
|
|
247
428
|
|
|
248
429
|
console.log(
|
|
249
|
-
`\nStored ${vectors.length} embeddings (${dim}d, ${
|
|
430
|
+
`\nStored ${vectors.length} embeddings (${dim}d, ${config.name}, strategy: ${strategy}) in graph.db`,
|
|
250
431
|
);
|
|
251
432
|
db.close();
|
|
252
433
|
}
|
package/src/export.js
CHANGED
|
@@ -1,12 +1,15 @@
|
|
|
1
1
|
import path from 'node:path';
|
|
2
2
|
import { isTestFile } from './queries.js';
|
|
3
3
|
|
|
4
|
+
const DEFAULT_MIN_CONFIDENCE = 0.5;
|
|
5
|
+
|
|
4
6
|
/**
|
|
5
7
|
* Export the dependency graph in DOT (Graphviz) format.
|
|
6
8
|
*/
|
|
7
9
|
export function exportDOT(db, opts = {}) {
|
|
8
10
|
const fileLevel = opts.fileLevel !== false;
|
|
9
11
|
const noTests = opts.noTests || false;
|
|
12
|
+
const minConf = opts.minConfidence ?? DEFAULT_MIN_CONFIDENCE;
|
|
10
13
|
const lines = [
|
|
11
14
|
'digraph codegraph {',
|
|
12
15
|
' rankdir=LR;',
|
|
@@ -23,8 +26,9 @@ export function exportDOT(db, opts = {}) {
|
|
|
23
26
|
JOIN nodes n1 ON e.source_id = n1.id
|
|
24
27
|
JOIN nodes n2 ON e.target_id = n2.id
|
|
25
28
|
WHERE n1.file != n2.file AND e.kind IN ('imports', 'imports-type', 'calls')
|
|
29
|
+
AND e.confidence >= ?
|
|
26
30
|
`)
|
|
27
|
-
.all();
|
|
31
|
+
.all(minConf);
|
|
28
32
|
if (noTests) edges = edges.filter((e) => !isTestFile(e.source) && !isTestFile(e.target));
|
|
29
33
|
|
|
30
34
|
// Try to use directory nodes from DB (built by structure analysis)
|
|
@@ -102,8 +106,9 @@ export function exportDOT(db, opts = {}) {
|
|
|
102
106
|
JOIN nodes n2 ON e.target_id = n2.id
|
|
103
107
|
WHERE n1.kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module') AND n2.kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module')
|
|
104
108
|
AND e.kind = 'calls'
|
|
109
|
+
AND e.confidence >= ?
|
|
105
110
|
`)
|
|
106
|
-
.all();
|
|
111
|
+
.all(minConf);
|
|
107
112
|
if (noTests)
|
|
108
113
|
edges = edges.filter((e) => !isTestFile(e.source_file) && !isTestFile(e.target_file));
|
|
109
114
|
|
|
@@ -126,6 +131,7 @@ export function exportDOT(db, opts = {}) {
|
|
|
126
131
|
export function exportMermaid(db, opts = {}) {
|
|
127
132
|
const fileLevel = opts.fileLevel !== false;
|
|
128
133
|
const noTests = opts.noTests || false;
|
|
134
|
+
const minConf = opts.minConfidence ?? DEFAULT_MIN_CONFIDENCE;
|
|
129
135
|
const lines = ['graph LR'];
|
|
130
136
|
|
|
131
137
|
if (fileLevel) {
|
|
@@ -136,8 +142,9 @@ export function exportMermaid(db, opts = {}) {
|
|
|
136
142
|
JOIN nodes n1 ON e.source_id = n1.id
|
|
137
143
|
JOIN nodes n2 ON e.target_id = n2.id
|
|
138
144
|
WHERE n1.file != n2.file AND e.kind IN ('imports', 'imports-type', 'calls')
|
|
145
|
+
AND e.confidence >= ?
|
|
139
146
|
`)
|
|
140
|
-
.all();
|
|
147
|
+
.all(minConf);
|
|
141
148
|
if (noTests) edges = edges.filter((e) => !isTestFile(e.source) && !isTestFile(e.target));
|
|
142
149
|
|
|
143
150
|
for (const { source, target } of edges) {
|
|
@@ -155,8 +162,9 @@ export function exportMermaid(db, opts = {}) {
|
|
|
155
162
|
JOIN nodes n2 ON e.target_id = n2.id
|
|
156
163
|
WHERE n1.kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module') AND n2.kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module')
|
|
157
164
|
AND e.kind = 'calls'
|
|
165
|
+
AND e.confidence >= ?
|
|
158
166
|
`)
|
|
159
|
-
.all();
|
|
167
|
+
.all(minConf);
|
|
160
168
|
if (noTests)
|
|
161
169
|
edges = edges.filter((e) => !isTestFile(e.source_file) && !isTestFile(e.target_file));
|
|
162
170
|
|
|
@@ -175,6 +183,7 @@ export function exportMermaid(db, opts = {}) {
|
|
|
175
183
|
*/
|
|
176
184
|
export function exportJSON(db, opts = {}) {
|
|
177
185
|
const noTests = opts.noTests || false;
|
|
186
|
+
const minConf = opts.minConfidence ?? DEFAULT_MIN_CONFIDENCE;
|
|
178
187
|
|
|
179
188
|
let nodes = db
|
|
180
189
|
.prepare(`
|
|
@@ -185,13 +194,13 @@ export function exportJSON(db, opts = {}) {
|
|
|
185
194
|
|
|
186
195
|
let edges = db
|
|
187
196
|
.prepare(`
|
|
188
|
-
SELECT DISTINCT n1.file AS source, n2.file AS target, e.kind
|
|
197
|
+
SELECT DISTINCT n1.file AS source, n2.file AS target, e.kind, e.confidence
|
|
189
198
|
FROM edges e
|
|
190
199
|
JOIN nodes n1 ON e.source_id = n1.id
|
|
191
200
|
JOIN nodes n2 ON e.target_id = n2.id
|
|
192
|
-
WHERE n1.file != n2.file
|
|
201
|
+
WHERE n1.file != n2.file AND e.confidence >= ?
|
|
193
202
|
`)
|
|
194
|
-
.all();
|
|
203
|
+
.all(minConf);
|
|
195
204
|
if (noTests) edges = edges.filter((e) => !isTestFile(e.source) && !isTestFile(e.target));
|
|
196
205
|
|
|
197
206
|
return { nodes, edges };
|
package/src/index.js
CHANGED
package/src/queries.js
CHANGED
|
@@ -334,6 +334,7 @@ export function moduleMapData(customDbPath, limit = 20, opts = {}) {
|
|
|
334
334
|
dir: path.dirname(n.file) || '.',
|
|
335
335
|
inEdges: n.in_edges,
|
|
336
336
|
outEdges: n.out_edges,
|
|
337
|
+
coupling: n.in_edges + n.out_edges,
|
|
337
338
|
}));
|
|
338
339
|
|
|
339
340
|
const totalNodes = db.prepare('SELECT COUNT(*) as c FROM nodes').get().c;
|
|
@@ -1263,10 +1264,10 @@ export function moduleMap(customDbPath, limit = 20, opts = {}) {
|
|
|
1263
1264
|
for (const [dir, files] of [...dirs].sort()) {
|
|
1264
1265
|
console.log(` [${dir}/]`);
|
|
1265
1266
|
for (const f of files) {
|
|
1266
|
-
const
|
|
1267
|
-
const bar = '#'.repeat(Math.min(
|
|
1267
|
+
const coupling = f.inEdges + f.outEdges;
|
|
1268
|
+
const bar = '#'.repeat(Math.min(coupling, 40));
|
|
1268
1269
|
console.log(
|
|
1269
|
-
` ${path.basename(f.file).padEnd(35)} <-${String(f.inEdges).padStart(3)} ->${String(f.outEdges).padStart(3)} ${bar}`,
|
|
1270
|
+
` ${path.basename(f.file).padEnd(35)} <-${String(f.inEdges).padStart(3)} ->${String(f.outEdges).padStart(3)} =${String(coupling).padStart(3)} ${bar}`,
|
|
1270
1271
|
);
|
|
1271
1272
|
}
|
|
1272
1273
|
}
|
|
@@ -1920,6 +1921,7 @@ function explainFunctionImpl(db, target, noTests, getFileLines) {
|
|
|
1920
1921
|
export function explainData(target, customDbPath, opts = {}) {
|
|
1921
1922
|
const db = openReadonlyOrFail(customDbPath);
|
|
1922
1923
|
const noTests = opts.noTests || false;
|
|
1924
|
+
const depth = opts.depth || 0;
|
|
1923
1925
|
const kind = isFileLikeTarget(target) ? 'file' : 'function';
|
|
1924
1926
|
|
|
1925
1927
|
const dbPath = findDbPath(customDbPath);
|
|
@@ -1949,6 +1951,37 @@ export function explainData(target, customDbPath, opts = {}) {
|
|
|
1949
1951
|
? explainFileImpl(db, target, getFileLines)
|
|
1950
1952
|
: explainFunctionImpl(db, target, noTests, getFileLines);
|
|
1951
1953
|
|
|
1954
|
+
// Recursive dependency explanation for function targets
|
|
1955
|
+
if (kind === 'function' && depth > 0 && results.length > 0) {
|
|
1956
|
+
const visited = new Set(results.map((r) => `${r.name}:${r.file}:${r.line}`));
|
|
1957
|
+
|
|
1958
|
+
function explainCallees(parentResults, currentDepth) {
|
|
1959
|
+
if (currentDepth <= 0) return;
|
|
1960
|
+
for (const r of parentResults) {
|
|
1961
|
+
const newCallees = [];
|
|
1962
|
+
for (const callee of r.callees) {
|
|
1963
|
+
const key = `${callee.name}:${callee.file}:${callee.line}`;
|
|
1964
|
+
if (visited.has(key)) continue;
|
|
1965
|
+
visited.add(key);
|
|
1966
|
+
const calleeResults = explainFunctionImpl(db, callee.name, noTests, getFileLines);
|
|
1967
|
+
const exact = calleeResults.find(
|
|
1968
|
+
(cr) => cr.file === callee.file && cr.line === callee.line,
|
|
1969
|
+
);
|
|
1970
|
+
if (exact) {
|
|
1971
|
+
exact._depth = (r._depth || 0) + 1;
|
|
1972
|
+
newCallees.push(exact);
|
|
1973
|
+
}
|
|
1974
|
+
}
|
|
1975
|
+
if (newCallees.length > 0) {
|
|
1976
|
+
r.depDetails = newCallees;
|
|
1977
|
+
explainCallees(newCallees, currentDepth - 1);
|
|
1978
|
+
}
|
|
1979
|
+
}
|
|
1980
|
+
}
|
|
1981
|
+
|
|
1982
|
+
explainCallees(results, depth);
|
|
1983
|
+
}
|
|
1984
|
+
|
|
1952
1985
|
db.close();
|
|
1953
1986
|
return { target, kind, results };
|
|
1954
1987
|
}
|
|
@@ -2008,46 +2041,63 @@ export function explain(target, customDbPath, opts = {}) {
|
|
|
2008
2041
|
console.log();
|
|
2009
2042
|
}
|
|
2010
2043
|
} else {
|
|
2011
|
-
|
|
2044
|
+
function printFunctionExplain(r, indent = '') {
|
|
2012
2045
|
const lineRange = r.endLine ? `${r.line}-${r.endLine}` : `${r.line}`;
|
|
2013
2046
|
const lineInfo = r.lineCount ? `${r.lineCount} lines` : '';
|
|
2014
2047
|
const summaryPart = r.summary ? ` | ${r.summary}` : '';
|
|
2015
|
-
|
|
2048
|
+
const depthLevel = r._depth || 0;
|
|
2049
|
+
const heading = depthLevel === 0 ? '#' : '##'.padEnd(depthLevel + 2, '#');
|
|
2050
|
+
console.log(`\n${indent}${heading} ${r.name} (${r.kind}) ${r.file}:${lineRange}`);
|
|
2016
2051
|
if (lineInfo || r.summary) {
|
|
2017
|
-
console.log(
|
|
2052
|
+
console.log(`${indent} ${lineInfo}${summaryPart}`);
|
|
2018
2053
|
}
|
|
2019
2054
|
if (r.signature) {
|
|
2020
|
-
if (r.signature.params != null)
|
|
2021
|
-
|
|
2055
|
+
if (r.signature.params != null)
|
|
2056
|
+
console.log(`${indent} Parameters: (${r.signature.params})`);
|
|
2057
|
+
if (r.signature.returnType) console.log(`${indent} Returns: ${r.signature.returnType}`);
|
|
2022
2058
|
}
|
|
2023
2059
|
|
|
2024
2060
|
if (r.callees.length > 0) {
|
|
2025
|
-
console.log(`\n
|
|
2061
|
+
console.log(`\n${indent} Calls (${r.callees.length}):`);
|
|
2026
2062
|
for (const c of r.callees) {
|
|
2027
|
-
console.log(
|
|
2063
|
+
console.log(`${indent} ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`);
|
|
2028
2064
|
}
|
|
2029
2065
|
}
|
|
2030
2066
|
|
|
2031
2067
|
if (r.callers.length > 0) {
|
|
2032
|
-
console.log(`\n
|
|
2068
|
+
console.log(`\n${indent} Called by (${r.callers.length}):`);
|
|
2033
2069
|
for (const c of r.callers) {
|
|
2034
|
-
console.log(
|
|
2070
|
+
console.log(`${indent} ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`);
|
|
2035
2071
|
}
|
|
2036
2072
|
}
|
|
2037
2073
|
|
|
2038
2074
|
if (r.relatedTests.length > 0) {
|
|
2039
2075
|
const label = r.relatedTests.length === 1 ? 'file' : 'files';
|
|
2040
|
-
console.log(`\n
|
|
2076
|
+
console.log(`\n${indent} Tests (${r.relatedTests.length} ${label}):`);
|
|
2041
2077
|
for (const t of r.relatedTests) {
|
|
2042
|
-
console.log(
|
|
2078
|
+
console.log(`${indent} ${t.file}`);
|
|
2043
2079
|
}
|
|
2044
2080
|
}
|
|
2045
2081
|
|
|
2046
2082
|
if (r.callees.length === 0 && r.callers.length === 0) {
|
|
2047
|
-
console.log(
|
|
2083
|
+
console.log(
|
|
2084
|
+
`${indent} (no call edges found -- may be invoked dynamically or via re-exports)`,
|
|
2085
|
+
);
|
|
2086
|
+
}
|
|
2087
|
+
|
|
2088
|
+
// Render recursive dependency details
|
|
2089
|
+
if (r.depDetails && r.depDetails.length > 0) {
|
|
2090
|
+
console.log(`\n${indent} --- Dependencies (depth ${depthLevel + 1}) ---`);
|
|
2091
|
+
for (const dep of r.depDetails) {
|
|
2092
|
+
printFunctionExplain(dep, `${indent} `);
|
|
2093
|
+
}
|
|
2048
2094
|
}
|
|
2049
2095
|
console.log();
|
|
2050
2096
|
}
|
|
2097
|
+
|
|
2098
|
+
for (const r of data.results) {
|
|
2099
|
+
printFunctionExplain(r);
|
|
2100
|
+
}
|
|
2051
2101
|
}
|
|
2052
2102
|
}
|
|
2053
2103
|
|
package/src/structure.js
CHANGED
|
@@ -231,7 +231,8 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director
|
|
|
231
231
|
*/
|
|
232
232
|
export function structureData(customDbPath, opts = {}) {
|
|
233
233
|
const db = openReadonlyOrFail(customDbPath);
|
|
234
|
-
const
|
|
234
|
+
const rawDir = opts.directory || null;
|
|
235
|
+
const filterDir = rawDir && normalizePath(rawDir) !== '.' ? rawDir : null;
|
|
235
236
|
const maxDepth = opts.depth || null;
|
|
236
237
|
const sortBy = opts.sort || 'files';
|
|
237
238
|
const noTests = opts.noTests || false;
|