@optave/codegraph 2.2.2-dev.c252ef9 β†’ 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -373,7 +373,7 @@ Codegraph also extracts symbols from common callback patterns: Commander `.comma
373
373
 
374
374
  ## πŸ“Š Performance
375
375
 
376
- Self-measured on every release via CI ([full history](generated/BENCHMARKS.md)):
376
+ Self-measured on every release via CI ([build benchmarks](generated/BUILD-BENCHMARKS.md) | [embedding benchmarks](generated/EMBEDDING-BENCHMARKS.md)):
377
377
 
378
378
  | Metric | Latest |
379
379
  |---|---|
@@ -384,6 +384,20 @@ Self-measured on every release via CI ([full history](generated/BENCHMARKS.md)):
384
384
 
385
385
  Metrics are normalized per file for cross-version comparability. Times above are for a full initial build β€” incremental rebuilds only re-parse changed files.
386
386
 
387
+ ### Lightweight Footprint
388
+
389
+ <a href="https://www.npmjs.com/package/@optave/codegraph"><img src="https://img.shields.io/npm/unpacked-size/@optave/codegraph?style=flat-square&label=unpacked%20size" alt="npm unpacked size" /></a>
390
+
391
+ Only **3 runtime dependencies** β€” everything else is optional or a devDependency:
392
+
393
+ | Dependency | What it does | | |
394
+ |---|---|---|---|
395
+ | [better-sqlite3](https://github.com/WiseLibs/better-sqlite3) | Fast, synchronous SQLite driver | ![GitHub stars](https://img.shields.io/github/stars/WiseLibs/better-sqlite3?style=flat-square&label=%E2%AD%90) | ![npm downloads](https://img.shields.io/npm/dw/better-sqlite3?style=flat-square&label=%F0%9F%93%A5%2Fwk) |
396
+ | [commander](https://github.com/tj/commander.js) | CLI argument parsing | ![GitHub stars](https://img.shields.io/github/stars/tj/commander.js?style=flat-square&label=%E2%AD%90) | ![npm downloads](https://img.shields.io/npm/dw/commander?style=flat-square&label=%F0%9F%93%A5%2Fwk) |
397
+ | [web-tree-sitter](https://github.com/tree-sitter/tree-sitter) | WASM tree-sitter bindings | ![GitHub stars](https://img.shields.io/github/stars/tree-sitter/tree-sitter?style=flat-square&label=%E2%AD%90) | ![npm downloads](https://img.shields.io/npm/dw/web-tree-sitter?style=flat-square&label=%F0%9F%93%A5%2Fwk) |
398
+
399
+ Optional: `@huggingface/transformers` (semantic search), `@modelcontextprotocol/sdk` (MCP server) β€” lazy-loaded only when needed.
400
+
387
401
  ## πŸ€– AI Agent Integration
388
402
 
389
403
  ### MCP Server
@@ -583,15 +597,16 @@ const { results: fused } = await multiSearchData(
583
597
 
584
598
  ## πŸ—ΊοΈ Roadmap
585
599
 
586
- See **[ROADMAP.md](ROADMAP.md)** for the full development roadmap. Current plan:
600
+ See **[ROADMAP.md](ROADMAP.md)** for the full development roadmap and **[STABILITY.md](STABILITY.md)** for the stability policy and versioning guarantees. Current plan:
587
601
 
588
602
  1. ~~**Rust Core**~~ β€” **Complete** (v1.3.0) β€” native tree-sitter parsing via napi-rs, parallel multi-core parsing, incremental re-parsing, import resolution & cycle detection in Rust
589
603
  2. ~~**Foundation Hardening**~~ β€” **Complete** (v1.4.0) β€” parser registry, 12-tool MCP server with multi-repo support, test coverage 62%β†’75%, `apiKeyCommand` secret resolution, global repo registry
590
- 3. **Intelligent Embeddings** β€” LLM-generated descriptions, hybrid search
591
- 4. **Natural Language Queries** β€” `codegraph ask` command, conversational sessions
592
- 5. **Expanded Language Support** β€” 8 new languages (12 β†’ 20)
593
- 6. **GitHub Integration & CI** β€” reusable GitHub Action, PR review, SARIF output
594
- 7. **Visualization & Advanced** β€” web UI, dead code detection, monorepo support, agentic search
604
+ 3. **Architectural Refactoring** β€” parser plugin system, repository pattern, pipeline builder, engine strategy, domain errors, curated API
605
+ 4. **Intelligent Embeddings** β€” LLM-generated descriptions, hybrid search
606
+ 5. **Natural Language Queries** β€” `codegraph ask` command, conversational sessions
607
+ 6. **Expanded Language Support** β€” 8 new languages (12 β†’ 20)
608
+ 7. **GitHub Integration & CI** β€” reusable GitHub Action, PR review, SARIF output
609
+ 8. **Visualization & Advanced** β€” web UI, dead code detection, monorepo support, agentic search
595
610
 
596
611
  ## 🀝 Contributing
597
612
 
package/package.json CHANGED
@@ -1,13 +1,14 @@
1
1
  {
2
2
  "name": "@optave/codegraph",
3
- "version": "2.2.2-dev.c252ef9",
3
+ "version": "2.3.0",
4
4
  "description": "Local code graph CLI β€” parse codebases with tree-sitter, build dependency graphs, query them",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
7
7
  "exports": {
8
8
  ".": {
9
9
  "import": "./src/index.js"
10
- }
10
+ },
11
+ "./package.json": "./package.json"
11
12
  },
12
13
  "bin": {
13
14
  "codegraph": "./src/cli.js"
@@ -61,10 +62,10 @@
61
62
  "optionalDependencies": {
62
63
  "@huggingface/transformers": "^3.8.1",
63
64
  "@modelcontextprotocol/sdk": "^1.0.0",
64
- "@optave/codegraph-darwin-arm64": "2.2.2-dev.c252ef9",
65
- "@optave/codegraph-darwin-x64": "2.2.2-dev.c252ef9",
66
- "@optave/codegraph-linux-x64-gnu": "2.2.2-dev.c252ef9",
67
- "@optave/codegraph-win32-x64-msvc": "2.2.2-dev.c252ef9"
65
+ "@optave/codegraph-darwin-arm64": "2.3.0",
66
+ "@optave/codegraph-darwin-x64": "2.3.0",
67
+ "@optave/codegraph-linux-x64-gnu": "2.3.0",
68
+ "@optave/codegraph-win32-x64-msvc": "2.3.0"
68
69
  },
69
70
  "devDependencies": {
70
71
  "@biomejs/biome": "^2.4.4",
package/src/builder.js CHANGED
@@ -1,12 +1,11 @@
1
1
  import { createHash } from 'node:crypto';
2
2
  import fs from 'node:fs';
3
- import os from 'node:os';
4
3
  import path from 'node:path';
5
4
  import { loadConfig } from './config.js';
6
5
  import { EXTENSIONS, IGNORE_DIRS, normalizePath } from './constants.js';
7
6
  import { initSchema, openDb } from './db.js';
8
7
  import { readJournal, writeJournalHeader } from './journal.js';
9
- import { debug, warn } from './logger.js';
8
+ import { debug, info, warn } from './logger.js';
10
9
  import { getActiveEngine, parseFilesAuto } from './parser.js';
11
10
  import { computeConfidence, resolveImportPath, resolveImportsBatch } from './resolve.js';
12
11
 
@@ -345,7 +344,7 @@ export async function buildGraph(rootDir, opts = {}) {
345
344
  // Engine selection: 'native', 'wasm', or 'auto' (default)
346
345
  const engineOpts = { engine: opts.engine || 'auto' };
347
346
  const { name: engineName, version: engineVersion } = getActiveEngine(engineOpts);
348
- console.log(`Using ${engineName} engine${engineVersion ? ` (v${engineVersion})` : ''}`);
347
+ info(`Using ${engineName} engine${engineVersion ? ` (v${engineVersion})` : ''}`);
349
348
 
350
349
  const aliases = loadPathAliases(rootDir);
351
350
  // Merge config aliases
@@ -358,7 +357,7 @@ export async function buildGraph(rootDir, opts = {}) {
358
357
  }
359
358
 
360
359
  if (aliases.baseUrl || Object.keys(aliases.paths).length > 0) {
361
- console.log(
360
+ info(
362
361
  `Loaded path aliases: baseUrl=${aliases.baseUrl || 'none'}, ${Object.keys(aliases.paths).length} path mappings`,
363
362
  );
364
363
  }
@@ -366,7 +365,7 @@ export async function buildGraph(rootDir, opts = {}) {
366
365
  const collected = collectFiles(rootDir, [], config, new Set());
367
366
  const files = collected.files;
368
367
  const discoveredDirs = collected.directories;
369
- console.log(`Found ${files.length} files to parse`);
368
+ info(`Found ${files.length} files to parse`);
370
369
 
371
370
  // Check for incremental build
372
371
  const { changed, removed, isFullBuild } = incremental
@@ -397,19 +396,36 @@ export async function buildGraph(rootDir, opts = {}) {
397
396
  /* ignore heal errors */
398
397
  }
399
398
  }
400
- console.log('No changes detected. Graph is up to date.');
399
+ info('No changes detected. Graph is up to date.');
401
400
  db.close();
402
401
  writeJournalHeader(rootDir, Date.now());
403
402
  return;
404
403
  }
405
404
 
405
+ // Check if embeddings table exists (created by `embed`, not by initSchema)
406
+ let hasEmbeddings = false;
407
+ try {
408
+ db.prepare('SELECT 1 FROM embeddings LIMIT 1').get();
409
+ hasEmbeddings = true;
410
+ } catch {
411
+ /* table doesn't exist */
412
+ }
413
+
406
414
  if (isFullBuild) {
415
+ const deletions =
416
+ 'PRAGMA foreign_keys = OFF; DELETE FROM node_metrics; DELETE FROM edges; DELETE FROM nodes; PRAGMA foreign_keys = ON;';
407
417
  db.exec(
408
- 'PRAGMA foreign_keys = OFF; DELETE FROM node_metrics; DELETE FROM edges; DELETE FROM nodes; PRAGMA foreign_keys = ON;',
418
+ hasEmbeddings
419
+ ? `${deletions.replace('PRAGMA foreign_keys = ON;', '')} DELETE FROM embeddings; PRAGMA foreign_keys = ON;`
420
+ : deletions,
409
421
  );
410
422
  } else {
411
- console.log(`Incremental: ${parseChanges.length} changed, ${removed.length} removed`);
412
- // Remove metrics/edges/nodes for changed and removed files
423
+ info(`Incremental: ${parseChanges.length} changed, ${removed.length} removed`);
424
+ // Remove embeddings/metrics/edges/nodes for changed and removed files
425
+ // Embeddings must be deleted BEFORE nodes (we need node IDs to find them)
426
+ const deleteEmbeddingsForFile = hasEmbeddings
427
+ ? db.prepare('DELETE FROM embeddings WHERE node_id IN (SELECT id FROM nodes WHERE file = ?)')
428
+ : null;
413
429
  const deleteNodesForFile = db.prepare('DELETE FROM nodes WHERE file = ?');
414
430
  const deleteEdgesForFile = db.prepare(`
415
431
  DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = @f)
@@ -419,12 +435,14 @@ export async function buildGraph(rootDir, opts = {}) {
419
435
  'DELETE FROM node_metrics WHERE node_id IN (SELECT id FROM nodes WHERE file = ?)',
420
436
  );
421
437
  for (const relPath of removed) {
438
+ deleteEmbeddingsForFile?.run(relPath);
422
439
  deleteEdgesForFile.run({ f: relPath });
423
440
  deleteMetricsForFile.run(relPath);
424
441
  deleteNodesForFile.run(relPath);
425
442
  }
426
443
  for (const item of parseChanges) {
427
444
  const relPath = item.relPath || normalizePath(path.relative(rootDir, item.file));
445
+ deleteEmbeddingsForFile?.run(relPath);
428
446
  deleteEdgesForFile.run({ f: relPath });
429
447
  deleteMetricsForFile.run(relPath);
430
448
  deleteNodesForFile.run(relPath);
@@ -528,7 +546,7 @@ export async function buildGraph(rootDir, opts = {}) {
528
546
 
529
547
  const parsed = allSymbols.size;
530
548
  const skipped = filesToParse.length - parsed;
531
- console.log(`Parsed ${parsed} files (${skipped} skipped)`);
549
+ info(`Parsed ${parsed} files (${skipped} skipped)`);
532
550
 
533
551
  // Clean up removed file hashes
534
552
  if (upsertHash && removed.length > 0) {
@@ -822,15 +840,33 @@ export async function buildGraph(rootDir, opts = {}) {
822
840
  }
823
841
 
824
842
  const nodeCount = db.prepare('SELECT COUNT(*) as c FROM nodes').get().c;
825
- console.log(`Graph built: ${nodeCount} nodes, ${edgeCount} edges`);
826
- console.log(`Stored in ${dbPath}`);
843
+ info(`Graph built: ${nodeCount} nodes, ${edgeCount} edges`);
844
+ info(`Stored in ${dbPath}`);
845
+
846
+ // Warn about orphaned embeddings that no longer match any node
847
+ if (hasEmbeddings) {
848
+ try {
849
+ const orphaned = db
850
+ .prepare('SELECT COUNT(*) as c FROM embeddings WHERE node_id NOT IN (SELECT id FROM nodes)')
851
+ .get().c;
852
+ if (orphaned > 0) {
853
+ warn(
854
+ `${orphaned} embeddings are orphaned (nodes changed). Run "codegraph embed" to refresh.`,
855
+ );
856
+ }
857
+ } catch {
858
+ /* ignore β€” embeddings table may have been dropped */
859
+ }
860
+ }
861
+
827
862
  db.close();
828
863
 
829
864
  // Write journal header after successful build
830
865
  writeJournalHeader(rootDir, Date.now());
831
866
 
832
867
  if (!opts.skipRegistry) {
833
- const tmpDir = path.resolve(os.tmpdir());
868
+ const { tmpdir } = await import('node:os');
869
+ const tmpDir = path.resolve(tmpdir());
834
870
  const resolvedRoot = path.resolve(rootDir);
835
871
  if (resolvedRoot.startsWith(tmpDir)) {
836
872
  debug(`Skipping auto-registration for temp directory: ${resolvedRoot}`);
package/src/cli.js CHANGED
@@ -2,12 +2,12 @@
2
2
 
3
3
  import fs from 'node:fs';
4
4
  import path from 'node:path';
5
- import Database from 'better-sqlite3';
6
5
  import { Command } from 'commander';
7
6
  import { buildGraph } from './builder.js';
7
+ import { loadConfig } from './config.js';
8
8
  import { findCycles, formatCycles } from './cycles.js';
9
- import { findDbPath } from './db.js';
10
- import { buildEmbeddings, MODELS, search } from './embedder.js';
9
+ import { openReadonlyOrFail } from './db.js';
10
+ import { buildEmbeddings, EMBEDDING_STRATEGIES, MODELS, search } from './embedder.js';
11
11
  import { exportDOT, exportJSON, exportMermaid } from './export.js';
12
12
  import { setVerbose } from './logger.js';
13
13
  import {
@@ -36,6 +36,8 @@ import { watchProject } from './watcher.js';
36
36
  const __cliDir = path.dirname(new URL(import.meta.url).pathname.replace(/^\/([A-Z]:)/i, '$1'));
37
37
  const pkg = JSON.parse(fs.readFileSync(path.join(__cliDir, '..', 'package.json'), 'utf-8'));
38
38
 
39
+ const config = loadConfig(process.cwd());
40
+
39
41
  const program = new Command();
40
42
  program
41
43
  .name('codegraph')
@@ -48,6 +50,18 @@ program
48
50
  if (opts.verbose) setVerbose(true);
49
51
  });
50
52
 
53
+ /**
54
+ * Resolve the effective noTests value: CLI flag > config > false.
55
+ * Commander sets opts.tests to false when --no-tests is passed.
56
+ * When --include-tests is passed, always return false (include tests).
57
+ * Otherwise, fall back to config.query.excludeTests.
58
+ */
59
+ function resolveNoTests(opts) {
60
+ if (opts.includeTests) return false;
61
+ if (opts.tests === false) return true;
62
+ return config.query?.excludeTests || false;
63
+ }
64
+
51
65
  program
52
66
  .command('build [dir]')
53
67
  .description('Parse repo and build graph in .codegraph/graph.db')
@@ -63,9 +77,10 @@ program
63
77
  .description('Find a function/class, show callers and callees')
64
78
  .option('-d, --db <path>', 'Path to graph.db')
65
79
  .option('-T, --no-tests', 'Exclude test/spec files from results')
80
+ .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
66
81
  .option('-j, --json', 'Output as JSON')
67
82
  .action((name, opts) => {
68
- queryName(name, opts.db, { noTests: !opts.tests, json: opts.json });
83
+ queryName(name, opts.db, { noTests: resolveNoTests(opts), json: opts.json });
69
84
  });
70
85
 
71
86
  program
@@ -73,9 +88,10 @@ program
73
88
  .description('Show what depends on this file (transitive)')
74
89
  .option('-d, --db <path>', 'Path to graph.db')
75
90
  .option('-T, --no-tests', 'Exclude test/spec files from results')
91
+ .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
76
92
  .option('-j, --json', 'Output as JSON')
77
93
  .action((file, opts) => {
78
- impactAnalysis(file, opts.db, { noTests: !opts.tests, json: opts.json });
94
+ impactAnalysis(file, opts.db, { noTests: resolveNoTests(opts), json: opts.json });
79
95
  });
80
96
 
81
97
  program
@@ -84,9 +100,13 @@ program
84
100
  .option('-d, --db <path>', 'Path to graph.db')
85
101
  .option('-n, --limit <number>', 'Number of top nodes', '20')
86
102
  .option('-T, --no-tests', 'Exclude test/spec files from results')
103
+ .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
87
104
  .option('-j, --json', 'Output as JSON')
88
105
  .action((opts) => {
89
- moduleMap(opts.db, parseInt(opts.limit, 10), { noTests: !opts.tests, json: opts.json });
106
+ moduleMap(opts.db, parseInt(opts.limit, 10), {
107
+ noTests: resolveNoTests(opts),
108
+ json: opts.json,
109
+ });
90
110
  });
91
111
 
92
112
  program
@@ -94,9 +114,10 @@ program
94
114
  .description('Show graph health overview: nodes, edges, languages, cycles, hotspots, embeddings')
95
115
  .option('-d, --db <path>', 'Path to graph.db')
96
116
  .option('-T, --no-tests', 'Exclude test/spec files from results')
117
+ .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
97
118
  .option('-j, --json', 'Output as JSON')
98
119
  .action((opts) => {
99
- stats(opts.db, { noTests: !opts.tests, json: opts.json });
120
+ stats(opts.db, { noTests: resolveNoTests(opts), json: opts.json });
100
121
  });
101
122
 
102
123
  program
@@ -104,9 +125,10 @@ program
104
125
  .description('Show what this file imports and what imports it')
105
126
  .option('-d, --db <path>', 'Path to graph.db')
106
127
  .option('-T, --no-tests', 'Exclude test/spec files from results')
128
+ .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
107
129
  .option('-j, --json', 'Output as JSON')
108
130
  .action((file, opts) => {
109
- fileDeps(file, opts.db, { noTests: !opts.tests, json: opts.json });
131
+ fileDeps(file, opts.db, { noTests: resolveNoTests(opts), json: opts.json });
110
132
  });
111
133
 
112
134
  program
@@ -117,6 +139,7 @@ program
117
139
  .option('-f, --file <path>', 'Scope search to functions in this file (partial match)')
118
140
  .option('-k, --kind <kind>', 'Filter to a specific symbol kind')
119
141
  .option('-T, --no-tests', 'Exclude test/spec files from results')
142
+ .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
120
143
  .option('-j, --json', 'Output as JSON')
121
144
  .action((name, opts) => {
122
145
  if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) {
@@ -127,7 +150,7 @@ program
127
150
  depth: parseInt(opts.depth, 10),
128
151
  file: opts.file,
129
152
  kind: opts.kind,
130
- noTests: !opts.tests,
153
+ noTests: resolveNoTests(opts),
131
154
  json: opts.json,
132
155
  });
133
156
  });
@@ -140,6 +163,7 @@ program
140
163
  .option('-f, --file <path>', 'Scope search to functions in this file (partial match)')
141
164
  .option('-k, --kind <kind>', 'Filter to a specific symbol kind')
142
165
  .option('-T, --no-tests', 'Exclude test/spec files from results')
166
+ .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
143
167
  .option('-j, --json', 'Output as JSON')
144
168
  .action((name, opts) => {
145
169
  if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) {
@@ -150,7 +174,7 @@ program
150
174
  depth: parseInt(opts.depth, 10),
151
175
  file: opts.file,
152
176
  kind: opts.kind,
153
- noTests: !opts.tests,
177
+ noTests: resolveNoTests(opts),
154
178
  json: opts.json,
155
179
  });
156
180
  });
@@ -163,8 +187,9 @@ program
163
187
  .option('-f, --file <path>', 'Scope search to functions in this file (partial match)')
164
188
  .option('-k, --kind <kind>', 'Filter to a specific symbol kind')
165
189
  .option('--no-source', 'Metadata only (skip source extraction)')
166
- .option('--include-tests', 'Include test source code')
190
+ .option('--with-test-source', 'Include test source code')
167
191
  .option('-T, --no-tests', 'Exclude test/spec files from results')
192
+ .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
168
193
  .option('-j, --json', 'Output as JSON')
169
194
  .action((name, opts) => {
170
195
  if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) {
@@ -176,8 +201,8 @@ program
176
201
  file: opts.file,
177
202
  kind: opts.kind,
178
203
  noSource: !opts.source,
179
- noTests: !opts.tests,
180
- includeTests: opts.includeTests,
204
+ noTests: resolveNoTests(opts),
205
+ includeTests: opts.withTestSource,
181
206
  json: opts.json,
182
207
  });
183
208
  });
@@ -186,10 +211,16 @@ program
186
211
  .command('explain <target>')
187
212
  .description('Structural summary of a file or function (no LLM needed)')
188
213
  .option('-d, --db <path>', 'Path to graph.db')
214
+ .option('--depth <n>', 'Recursively explain dependencies up to N levels deep', '0')
189
215
  .option('-T, --no-tests', 'Exclude test/spec files from results')
216
+ .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
190
217
  .option('-j, --json', 'Output as JSON')
191
218
  .action((target, opts) => {
192
- explain(target, opts.db, { noTests: !opts.tests, json: opts.json });
219
+ explain(target, opts.db, {
220
+ depth: parseInt(opts.depth, 10),
221
+ noTests: resolveNoTests(opts),
222
+ json: opts.json,
223
+ });
193
224
  });
194
225
 
195
226
  program
@@ -198,6 +229,7 @@ program
198
229
  .option('-d, --db <path>', 'Path to graph.db')
199
230
  .option('-f, --file <path>', 'File overview: list symbols, imports, exports')
200
231
  .option('-T, --no-tests', 'Exclude test/spec files from results')
232
+ .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
201
233
  .option('-j, --json', 'Output as JSON')
202
234
  .action((name, opts) => {
203
235
  if (!name && !opts.file) {
@@ -205,7 +237,7 @@ program
205
237
  process.exit(1);
206
238
  }
207
239
  const target = opts.file || name;
208
- where(target, opts.db, { file: !!opts.file, noTests: !opts.tests, json: opts.json });
240
+ where(target, opts.db, { file: !!opts.file, noTests: resolveNoTests(opts), json: opts.json });
209
241
  });
210
242
 
211
243
  program
@@ -215,6 +247,7 @@ program
215
247
  .option('--staged', 'Analyze staged changes instead of unstaged')
216
248
  .option('--depth <n>', 'Max transitive caller depth', '3')
217
249
  .option('-T, --no-tests', 'Exclude test/spec files from results')
250
+ .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
218
251
  .option('-j, --json', 'Output as JSON')
219
252
  .option('-f, --format <format>', 'Output format: text, mermaid, json', 'text')
220
253
  .action((ref, opts) => {
@@ -222,7 +255,7 @@ program
222
255
  ref,
223
256
  staged: opts.staged,
224
257
  depth: parseInt(opts.depth, 10),
225
- noTests: !opts.tests,
258
+ noTests: resolveNoTests(opts),
226
259
  json: opts.json,
227
260
  format: opts.format,
228
261
  });
@@ -237,10 +270,16 @@ program
237
270
  .option('-f, --format <format>', 'Output format: dot, mermaid, json', 'dot')
238
271
  .option('--functions', 'Function-level graph instead of file-level')
239
272
  .option('-T, --no-tests', 'Exclude test/spec files')
273
+ .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
274
+ .option('--min-confidence <score>', 'Minimum edge confidence threshold (default: 0.5)', '0.5')
240
275
  .option('-o, --output <file>', 'Write to file instead of stdout')
241
276
  .action((opts) => {
242
- const db = new Database(findDbPath(opts.db), { readonly: true });
243
- const exportOpts = { fileLevel: !opts.functions, noTests: !opts.tests };
277
+ const db = openReadonlyOrFail(opts.db);
278
+ const exportOpts = {
279
+ fileLevel: !opts.functions,
280
+ noTests: resolveNoTests(opts),
281
+ minConfidence: parseFloat(opts.minConfidence),
282
+ };
244
283
 
245
284
  let output;
246
285
  switch (opts.format) {
@@ -271,10 +310,11 @@ program
271
310
  .option('-d, --db <path>', 'Path to graph.db')
272
311
  .option('--functions', 'Function-level cycle detection')
273
312
  .option('-T, --no-tests', 'Exclude test/spec files')
313
+ .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
274
314
  .option('-j, --json', 'Output as JSON')
275
315
  .action((opts) => {
276
- const db = new Database(findDbPath(opts.db), { readonly: true });
277
- const cycles = findCycles(db, { fileLevel: !opts.functions, noTests: !opts.tests });
316
+ const db = openReadonlyOrFail(opts.db);
317
+ const cycles = findCycles(db, { fileLevel: !opts.functions, noTests: resolveNoTests(opts) });
278
318
  db.close();
279
319
 
280
320
  if (opts.json) {
@@ -376,10 +416,13 @@ program
376
416
  .action(() => {
377
417
  console.log('\nAvailable embedding models:\n');
378
418
  for (const [key, config] of Object.entries(MODELS)) {
379
- const def = key === 'nomic-v1.5' ? ' (default)' : '';
380
- console.log(` ${key.padEnd(12)} ${String(config.dim).padStart(4)}d ${config.desc}${def}`);
419
+ const def = key === 'minilm' ? ' (default)' : '';
420
+ const ctx = config.contextWindow ? `${config.contextWindow} ctx` : '';
421
+ console.log(
422
+ ` ${key.padEnd(12)} ${String(config.dim).padStart(4)}d ${ctx.padEnd(9)} ${config.desc}${def}`,
423
+ );
381
424
  }
382
- console.log('\nUsage: codegraph embed --model <name>');
425
+ console.log('\nUsage: codegraph embed --model <name> --strategy <structured|source>');
383
426
  console.log(' codegraph search "query" --model <name>\n');
384
427
  });
385
428
 
@@ -390,12 +433,23 @@ program
390
433
  )
391
434
  .option(
392
435
  '-m, --model <name>',
393
- 'Embedding model: minilm, jina-small, jina-base, jina-code, nomic, nomic-v1.5 (default), bge-large. Run `codegraph models` for details',
394
- 'nomic-v1.5',
436
+ 'Embedding model: minilm (default), jina-small, jina-base, jina-code, nomic, nomic-v1.5, bge-large. Run `codegraph models` for details',
437
+ 'minilm',
438
+ )
439
+ .option(
440
+ '-s, --strategy <name>',
441
+ `Embedding strategy: ${EMBEDDING_STRATEGIES.join(', ')}. "structured" uses graph context (callers/callees), "source" embeds raw code`,
442
+ 'structured',
395
443
  )
396
444
  .action(async (dir, opts) => {
445
+ if (!EMBEDDING_STRATEGIES.includes(opts.strategy)) {
446
+ console.error(
447
+ `Unknown strategy: ${opts.strategy}. Available: ${EMBEDDING_STRATEGIES.join(', ')}`,
448
+ );
449
+ process.exit(1);
450
+ }
397
451
  const root = path.resolve(dir || '.');
398
- await buildEmbeddings(root, opts.model);
452
+ await buildEmbeddings(root, opts.model, undefined, { strategy: opts.strategy });
399
453
  });
400
454
 
401
455
  program
@@ -405,6 +459,7 @@ program
405
459
  .option('-m, --model <name>', 'Override embedding model (auto-detects from DB)')
406
460
  .option('-n, --limit <number>', 'Max results', '15')
407
461
  .option('-T, --no-tests', 'Exclude test/spec files from results')
462
+ .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
408
463
  .option('--min-score <score>', 'Minimum similarity threshold', '0.2')
409
464
  .option('-k, --kind <kind>', 'Filter by kind: function, method, class')
410
465
  .option('--file <pattern>', 'Filter by file path pattern')
@@ -412,7 +467,7 @@ program
412
467
  .action(async (query, opts) => {
413
468
  await search(query, opts.db, {
414
469
  limit: parseInt(opts.limit, 10),
415
- noTests: !opts.tests,
470
+ noTests: resolveNoTests(opts),
416
471
  minScore: parseFloat(opts.minScore),
417
472
  model: opts.model,
418
473
  kind: opts.kind,
@@ -430,6 +485,7 @@ program
430
485
  .option('--depth <n>', 'Max directory depth')
431
486
  .option('--sort <metric>', 'Sort by: cohesion | fan-in | fan-out | density | files', 'files')
432
487
  .option('-T, --no-tests', 'Exclude test/spec files')
488
+ .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
433
489
  .option('-j, --json', 'Output as JSON')
434
490
  .action(async (dir, opts) => {
435
491
  const { structureData, formatStructure } = await import('./structure.js');
@@ -437,7 +493,7 @@ program
437
493
  directory: dir,
438
494
  depth: opts.depth ? parseInt(opts.depth, 10) : undefined,
439
495
  sort: opts.sort,
440
- noTests: !opts.tests,
496
+ noTests: resolveNoTests(opts),
441
497
  });
442
498
  if (opts.json) {
443
499
  console.log(JSON.stringify(data, null, 2));
@@ -456,6 +512,7 @@ program
456
512
  .option('--metric <metric>', 'fan-in | fan-out | density | coupling', 'fan-in')
457
513
  .option('--level <level>', 'file | directory', 'file')
458
514
  .option('-T, --no-tests', 'Exclude test/spec files from results')
515
+ .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
459
516
  .option('-j, --json', 'Output as JSON')
460
517
  .action(async (opts) => {
461
518
  const { hotspotsData, formatHotspots } = await import('./structure.js');
@@ -463,7 +520,7 @@ program
463
520
  metric: opts.metric,
464
521
  level: opts.level,
465
522
  limit: parseInt(opts.limit, 10),
466
- noTests: !opts.tests,
523
+ noTests: resolveNoTests(opts),
467
524
  });
468
525
  if (opts.json) {
469
526
  console.log(JSON.stringify(data, null, 2));
package/src/config.js CHANGED
@@ -18,6 +18,7 @@ export const DEFAULTS = {
18
18
  query: {
19
19
  defaultDepth: 3,
20
20
  defaultLimit: 20,
21
+ excludeTests: false,
21
22
  },
22
23
  embeddings: { model: 'nomic-v1.5', llmProvider: null },
23
24
  llm: { provider: null, model: null, baseUrl: null, apiKey: null, apiKeyCommand: null },
package/src/embedder.js CHANGED
@@ -4,6 +4,18 @@ import Database from 'better-sqlite3';
4
4
  import { findDbPath, openReadonlyOrFail } from './db.js';
5
5
  import { warn } from './logger.js';
6
6
 
7
+ /**
8
+ * Split an identifier into readable words.
9
+ * camelCase/PascalCase β†’ "camel Case", snake_case β†’ "snake case", kebab-case β†’ "kebab case"
10
+ */
11
+ function splitIdentifier(name) {
12
+ return name
13
+ .replace(/([a-z])([A-Z])/g, '$1 $2')
14
+ .replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
15
+ .replace(/[_-]+/g, ' ')
16
+ .trim();
17
+ }
18
+
7
19
  // Lazy-load transformers (heavy, optional module)
8
20
  let pipeline = null;
9
21
  let _cos_sim = null;
@@ -14,48 +26,57 @@ export const MODELS = {
14
26
  minilm: {
15
27
  name: 'Xenova/all-MiniLM-L6-v2',
16
28
  dim: 384,
29
+ contextWindow: 256,
17
30
  desc: 'Smallest, fastest (~23MB). General text.',
18
31
  quantized: true,
19
32
  },
20
33
  'jina-small': {
21
34
  name: 'Xenova/jina-embeddings-v2-small-en',
22
35
  dim: 512,
36
+ contextWindow: 8192,
23
37
  desc: 'Small, good quality (~33MB). General text.',
24
38
  quantized: false,
25
39
  },
26
40
  'jina-base': {
27
41
  name: 'Xenova/jina-embeddings-v2-base-en',
28
42
  dim: 768,
43
+ contextWindow: 8192,
29
44
  desc: 'Good quality (~137MB). General text, 8192 token context.',
30
45
  quantized: false,
31
46
  },
32
47
  'jina-code': {
33
48
  name: 'Xenova/jina-embeddings-v2-base-code',
34
49
  dim: 768,
50
+ contextWindow: 8192,
35
51
  desc: 'Code-aware (~137MB). Trained on code+text, best for code search.',
36
52
  quantized: false,
37
53
  },
38
54
  nomic: {
39
55
  name: 'Xenova/nomic-embed-text-v1',
40
56
  dim: 768,
57
+ contextWindow: 8192,
41
58
  desc: 'Good local quality (~137MB). 8192 context.',
42
59
  quantized: false,
43
60
  },
44
61
  'nomic-v1.5': {
45
62
  name: 'nomic-ai/nomic-embed-text-v1.5',
46
63
  dim: 768,
64
+ contextWindow: 8192,
47
65
  desc: 'Improved nomic (~137MB). Matryoshka dimensions, 8192 context.',
48
66
  quantized: false,
49
67
  },
50
68
  'bge-large': {
51
69
  name: 'Xenova/bge-large-en-v1.5',
52
70
  dim: 1024,
71
+ contextWindow: 512,
53
72
  desc: 'Best general retrieval (~335MB). Top MTEB scores.',
54
73
  quantized: false,
55
74
  },
56
75
  };
57
76
 
58
- export const DEFAULT_MODEL = 'nomic-v1.5';
77
+ export const EMBEDDING_STRATEGIES = ['structured', 'source'];
78
+
79
+ export const DEFAULT_MODEL = 'minilm';
59
80
  const BATCH_SIZE_MAP = {
60
81
  minilm: 32,
61
82
  'jina-small': 16,
@@ -77,6 +98,108 @@ function getModelConfig(modelKey) {
77
98
  return config;
78
99
  }
79
100
 
101
+ /**
102
+ * Rough token estimate (~4 chars per token for code/English).
103
+ * Conservative β€” avoids adding a tokenizer dependency.
104
+ */
105
+ export function estimateTokens(text) {
106
+ return Math.ceil(text.length / 4);
107
+ }
108
+
109
+ /**
110
+ * Extract leading comment text (JSDoc, //, #, etc.) above a function line.
111
+ * Returns the cleaned comment text or null if none found.
112
+ */
113
+ function extractLeadingComment(lines, fnLineIndex) {
114
+ const raw = [];
115
+ for (let i = fnLineIndex - 1; i >= Math.max(0, fnLineIndex - 15); i--) {
116
+ const trimmed = lines[i].trim();
117
+ if (/^(\/\/|\/\*|\*\/|\*|#|\/\/\/)/.test(trimmed)) {
118
+ raw.unshift(trimmed);
119
+ } else if (trimmed === '') {
120
+ if (raw.length > 0) break;
121
+ } else {
122
+ break;
123
+ }
124
+ }
125
+ if (raw.length === 0) return null;
126
+ return raw
127
+ .map((line) =>
128
+ line
129
+ .replace(/^\/\*\*?\s?|\*\/$/g, '') // opening /** or /* and closing */
130
+ .replace(/^\*\s?/, '') // middle * lines
131
+ .replace(/^\/\/\/?\s?/, '') // // or ///
132
+ .replace(/^#\s?/, '') // # (Python/Ruby)
133
+ .trim(),
134
+ )
135
+ .filter((l) => l.length > 0)
136
+ .join(' ');
137
+ }
138
+
139
+ /**
140
+ * Build graph-enriched text for a symbol using dependency context.
141
+ * Produces compact, semantic text (~100 tokens) instead of full source code.
142
+ */
143
+ function buildStructuredText(node, file, lines, calleesStmt, callersStmt) {
144
+ const readable = splitIdentifier(node.name);
145
+ const parts = [`${node.kind} ${node.name} (${readable}) in ${file}`];
146
+ const startLine = Math.max(0, node.line - 1);
147
+
148
+ // Extract parameters from signature (best-effort, single-line)
149
+ const sigLine = lines[startLine] || '';
150
+ const paramMatch = sigLine.match(/\(([^)]*)\)/);
151
+ if (paramMatch?.[1]?.trim()) {
152
+ parts.push(`Parameters: ${paramMatch[1].trim()}`);
153
+ }
154
+
155
+ // Graph context: callees (capped at 10)
156
+ const callees = calleesStmt.all(node.id);
157
+ if (callees.length > 0) {
158
+ parts.push(
159
+ `Calls: ${callees
160
+ .slice(0, 10)
161
+ .map((c) => c.name)
162
+ .join(', ')}`,
163
+ );
164
+ }
165
+
166
+ // Graph context: callers (capped at 10)
167
+ const callers = callersStmt.all(node.id);
168
+ if (callers.length > 0) {
169
+ parts.push(
170
+ `Called by: ${callers
171
+ .slice(0, 10)
172
+ .map((c) => c.name)
173
+ .join(', ')}`,
174
+ );
175
+ }
176
+
177
+ // Leading comment (high semantic value) or first few lines of code
178
+ const comment = extractLeadingComment(lines, startLine);
179
+ if (comment) {
180
+ parts.push(comment);
181
+ } else {
182
+ const endLine = Math.min(lines.length, startLine + 4);
183
+ const snippet = lines.slice(startLine, endLine).join('\n').trim();
184
+ if (snippet) parts.push(snippet);
185
+ }
186
+
187
+ return parts.join('\n');
188
+ }
189
+
190
+ /**
191
+ * Build raw source-code text for a symbol (original strategy).
192
+ */
193
+ function buildSourceText(node, file, lines) {
194
+ const startLine = Math.max(0, node.line - 1);
195
+ const endLine = node.end_line
196
+ ? Math.min(lines.length, node.end_line)
197
+ : Math.min(lines.length, startLine + 15);
198
+ const context = lines.slice(startLine, endLine).join('\n');
199
+ const readable = splitIdentifier(node.name);
200
+ return `${node.kind} ${node.name} (${readable}) in ${file}\n${context}`;
201
+ }
202
+
80
203
  /**
81
204
  * Lazy-load @huggingface/transformers.
82
205
  * This is an optional dependency β€” gives a clear error if not installed.
@@ -103,8 +226,27 @@ async function loadModel(modelKey) {
103
226
  _cos_sim = transformers.cos_sim;
104
227
 
105
228
  console.log(`Loading embedding model: ${config.name} (${config.dim}d)...`);
106
- const opts = config.quantized ? { quantized: true } : {};
107
- extractor = await pipeline('feature-extraction', config.name, opts);
229
+ const pipelineOpts = config.quantized ? { quantized: true } : {};
230
+ try {
231
+ extractor = await pipeline('feature-extraction', config.name, pipelineOpts);
232
+ } catch (err) {
233
+ const msg = err.message || String(err);
234
+ if (msg.includes('Unauthorized') || msg.includes('401') || msg.includes('gated')) {
235
+ console.error(
236
+ `\nModel "${config.name}" requires authentication.\n` +
237
+ `This model is gated on HuggingFace and needs an access token.\n\n` +
238
+ `Options:\n` +
239
+ ` 1. Set HF_TOKEN env var: export HF_TOKEN=hf_...\n` +
240
+ ` 2. Use a public model instead: codegraph embed --model minilm\n`,
241
+ );
242
+ } else {
243
+ console.error(
244
+ `\nFailed to load model "${config.name}": ${msg}\n` +
245
+ `Try a different model: codegraph embed --model minilm\n`,
246
+ );
247
+ }
248
+ process.exit(1);
249
+ }
108
250
  activeModel = config.name;
109
251
  console.log('Model loaded.');
110
252
  return { extractor, config };
@@ -172,12 +314,24 @@ function initEmbeddingsSchema(db) {
172
314
 
173
315
  /**
174
316
  * Build embeddings for all functions/methods/classes in the graph.
317
+ * @param {string} rootDir - Project root directory
318
+ * @param {string} modelKey - Model identifier from MODELS registry
319
+ * @param {string} [customDbPath] - Override path to graph.db
320
+ * @param {object} [options] - Embedding options
321
+ * @param {string} [options.strategy='structured'] - 'structured' (graph-enriched) or 'source' (raw code)
175
322
  */
176
- export async function buildEmbeddings(rootDir, modelKey, customDbPath) {
177
- // path already imported at top
178
- // fs already imported at top
323
+ export async function buildEmbeddings(rootDir, modelKey, customDbPath, options = {}) {
324
+ const strategy = options.strategy || 'structured';
179
325
  const dbPath = customDbPath || findDbPath(null);
180
326
 
327
+ if (!fs.existsSync(dbPath)) {
328
+ console.error(
329
+ `No codegraph database found at ${dbPath}.\n` +
330
+ `Run "codegraph build" first to analyze your codebase.`,
331
+ );
332
+ process.exit(1);
333
+ }
334
+
181
335
  const db = new Database(dbPath);
182
336
  initEmbeddingsSchema(db);
183
337
 
@@ -190,7 +344,24 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath) {
190
344
  )
191
345
  .all();
192
346
 
193
- console.log(`Building embeddings for ${nodes.length} symbols...`);
347
+ console.log(`Building embeddings for ${nodes.length} symbols (strategy: ${strategy})...`);
348
+
349
+ // Prepare graph-context queries for structured strategy
350
+ let calleesStmt, callersStmt;
351
+ if (strategy === 'structured') {
352
+ calleesStmt = db.prepare(`
353
+ SELECT DISTINCT n.name FROM edges e
354
+ JOIN nodes n ON e.target_id = n.id
355
+ WHERE e.source_id = ? AND e.kind = 'calls'
356
+ ORDER BY n.name
357
+ `);
358
+ callersStmt = db.prepare(`
359
+ SELECT DISTINCT n.name FROM edges e
360
+ JOIN nodes n ON e.source_id = n.id
361
+ WHERE e.target_id = ? AND e.kind = 'calls'
362
+ ORDER BY n.name
363
+ `);
364
+ }
194
365
 
195
366
  const byFile = new Map();
196
367
  for (const node of nodes) {
@@ -201,6 +372,9 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath) {
201
372
  const texts = [];
202
373
  const nodeIds = [];
203
374
  const previews = [];
375
+ const config = getModelConfig(modelKey);
376
+ const contextWindow = config.contextWindow;
377
+ let overflowCount = 0;
204
378
 
205
379
  for (const [file, fileNodes] of byFile) {
206
380
  const fullPath = path.join(rootDir, file);
@@ -213,19 +387,31 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath) {
213
387
  }
214
388
 
215
389
  for (const node of fileNodes) {
216
- const startLine = Math.max(0, node.line - 1);
217
- const endLine = node.end_line
218
- ? Math.min(lines.length, node.end_line)
219
- : Math.min(lines.length, startLine + 15);
220
- const context = lines.slice(startLine, endLine).join('\n');
390
+ let text =
391
+ strategy === 'structured'
392
+ ? buildStructuredText(node, file, lines, calleesStmt, callersStmt)
393
+ : buildSourceText(node, file, lines);
394
+
395
+ // Detect and handle context window overflow
396
+ const tokens = estimateTokens(text);
397
+ if (tokens > contextWindow) {
398
+ overflowCount++;
399
+ const maxChars = contextWindow * 4;
400
+ text = text.slice(0, maxChars);
401
+ }
221
402
 
222
- const text = `${node.kind} ${node.name} in ${file}\n${context}`;
223
403
  texts.push(text);
224
404
  nodeIds.push(node.id);
225
405
  previews.push(`${node.name} (${node.kind}) -- ${file}:${node.line}`);
226
406
  }
227
407
  }
228
408
 
409
+ if (overflowCount > 0) {
410
+ warn(
411
+ `${overflowCount} symbol(s) exceeded model context window (${contextWindow} tokens) and were truncated`,
412
+ );
413
+ }
414
+
229
415
  console.log(`Embedding ${texts.length} symbols...`);
230
416
  const { vectors, dim } = await embed(texts, modelKey);
231
417
 
@@ -237,16 +423,19 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath) {
237
423
  for (let i = 0; i < vectors.length; i++) {
238
424
  insert.run(nodeIds[i], Buffer.from(vectors[i].buffer), previews[i]);
239
425
  }
240
- const config = getModelConfig(modelKey);
241
426
  insertMeta.run('model', config.name);
242
427
  insertMeta.run('dim', String(dim));
243
428
  insertMeta.run('count', String(vectors.length));
429
+ insertMeta.run('strategy', strategy);
244
430
  insertMeta.run('built_at', new Date().toISOString());
431
+ if (overflowCount > 0) {
432
+ insertMeta.run('truncated_count', String(overflowCount));
433
+ }
245
434
  });
246
435
  insertAll();
247
436
 
248
437
  console.log(
249
- `\nStored ${vectors.length} embeddings (${dim}d, ${getModelConfig(modelKey).name}) in graph.db`,
438
+ `\nStored ${vectors.length} embeddings (${dim}d, ${config.name}, strategy: ${strategy}) in graph.db`,
250
439
  );
251
440
  db.close();
252
441
  }
package/src/export.js CHANGED
@@ -1,12 +1,15 @@
1
1
  import path from 'node:path';
2
2
  import { isTestFile } from './queries.js';
3
3
 
4
+ const DEFAULT_MIN_CONFIDENCE = 0.5;
5
+
4
6
  /**
5
7
  * Export the dependency graph in DOT (Graphviz) format.
6
8
  */
7
9
  export function exportDOT(db, opts = {}) {
8
10
  const fileLevel = opts.fileLevel !== false;
9
11
  const noTests = opts.noTests || false;
12
+ const minConf = opts.minConfidence ?? DEFAULT_MIN_CONFIDENCE;
10
13
  const lines = [
11
14
  'digraph codegraph {',
12
15
  ' rankdir=LR;',
@@ -23,8 +26,9 @@ export function exportDOT(db, opts = {}) {
23
26
  JOIN nodes n1 ON e.source_id = n1.id
24
27
  JOIN nodes n2 ON e.target_id = n2.id
25
28
  WHERE n1.file != n2.file AND e.kind IN ('imports', 'imports-type', 'calls')
29
+ AND e.confidence >= ?
26
30
  `)
27
- .all();
31
+ .all(minConf);
28
32
  if (noTests) edges = edges.filter((e) => !isTestFile(e.source) && !isTestFile(e.target));
29
33
 
30
34
  // Try to use directory nodes from DB (built by structure analysis)
@@ -102,8 +106,9 @@ export function exportDOT(db, opts = {}) {
102
106
  JOIN nodes n2 ON e.target_id = n2.id
103
107
  WHERE n1.kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module') AND n2.kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module')
104
108
  AND e.kind = 'calls'
109
+ AND e.confidence >= ?
105
110
  `)
106
- .all();
111
+ .all(minConf);
107
112
  if (noTests)
108
113
  edges = edges.filter((e) => !isTestFile(e.source_file) && !isTestFile(e.target_file));
109
114
 
@@ -126,6 +131,7 @@ export function exportDOT(db, opts = {}) {
126
131
  export function exportMermaid(db, opts = {}) {
127
132
  const fileLevel = opts.fileLevel !== false;
128
133
  const noTests = opts.noTests || false;
134
+ const minConf = opts.minConfidence ?? DEFAULT_MIN_CONFIDENCE;
129
135
  const lines = ['graph LR'];
130
136
 
131
137
  if (fileLevel) {
@@ -136,8 +142,9 @@ export function exportMermaid(db, opts = {}) {
136
142
  JOIN nodes n1 ON e.source_id = n1.id
137
143
  JOIN nodes n2 ON e.target_id = n2.id
138
144
  WHERE n1.file != n2.file AND e.kind IN ('imports', 'imports-type', 'calls')
145
+ AND e.confidence >= ?
139
146
  `)
140
- .all();
147
+ .all(minConf);
141
148
  if (noTests) edges = edges.filter((e) => !isTestFile(e.source) && !isTestFile(e.target));
142
149
 
143
150
  for (const { source, target } of edges) {
@@ -155,8 +162,9 @@ export function exportMermaid(db, opts = {}) {
155
162
  JOIN nodes n2 ON e.target_id = n2.id
156
163
  WHERE n1.kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module') AND n2.kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module')
157
164
  AND e.kind = 'calls'
165
+ AND e.confidence >= ?
158
166
  `)
159
- .all();
167
+ .all(minConf);
160
168
  if (noTests)
161
169
  edges = edges.filter((e) => !isTestFile(e.source_file) && !isTestFile(e.target_file));
162
170
 
@@ -175,6 +183,7 @@ export function exportMermaid(db, opts = {}) {
175
183
  */
176
184
  export function exportJSON(db, opts = {}) {
177
185
  const noTests = opts.noTests || false;
186
+ const minConf = opts.minConfidence ?? DEFAULT_MIN_CONFIDENCE;
178
187
 
179
188
  let nodes = db
180
189
  .prepare(`
@@ -185,13 +194,13 @@ export function exportJSON(db, opts = {}) {
185
194
 
186
195
  let edges = db
187
196
  .prepare(`
188
- SELECT DISTINCT n1.file AS source, n2.file AS target, e.kind
197
+ SELECT DISTINCT n1.file AS source, n2.file AS target, e.kind, e.confidence
189
198
  FROM edges e
190
199
  JOIN nodes n1 ON e.source_id = n1.id
191
200
  JOIN nodes n2 ON e.target_id = n2.id
192
- WHERE n1.file != n2.file
201
+ WHERE n1.file != n2.file AND e.confidence >= ?
193
202
  `)
194
- .all();
203
+ .all(minConf);
195
204
  if (noTests) edges = edges.filter((e) => !isTestFile(e.source) && !isTestFile(e.target));
196
205
 
197
206
  return { nodes, edges };
package/src/index.js CHANGED
@@ -21,7 +21,9 @@ export {
21
21
  buildEmbeddings,
22
22
  cosineSim,
23
23
  DEFAULT_MODEL,
24
+ EMBEDDING_STRATEGIES,
24
25
  embed,
26
+ estimateTokens,
25
27
  MODELS,
26
28
  multiSearchData,
27
29
  search,
package/src/queries.js CHANGED
@@ -334,6 +334,7 @@ export function moduleMapData(customDbPath, limit = 20, opts = {}) {
334
334
  dir: path.dirname(n.file) || '.',
335
335
  inEdges: n.in_edges,
336
336
  outEdges: n.out_edges,
337
+ coupling: n.in_edges + n.out_edges,
337
338
  }));
338
339
 
339
340
  const totalNodes = db.prepare('SELECT COUNT(*) as c FROM nodes').get().c;
@@ -1263,10 +1264,10 @@ export function moduleMap(customDbPath, limit = 20, opts = {}) {
1263
1264
  for (const [dir, files] of [...dirs].sort()) {
1264
1265
  console.log(` [${dir}/]`);
1265
1266
  for (const f of files) {
1266
- const total = f.inEdges + f.outEdges;
1267
- const bar = '#'.repeat(Math.min(total, 40));
1267
+ const coupling = f.inEdges + f.outEdges;
1268
+ const bar = '#'.repeat(Math.min(coupling, 40));
1268
1269
  console.log(
1269
- ` ${path.basename(f.file).padEnd(35)} <-${String(f.inEdges).padStart(3)} ->${String(f.outEdges).padStart(3)} ${bar}`,
1270
+ ` ${path.basename(f.file).padEnd(35)} <-${String(f.inEdges).padStart(3)} ->${String(f.outEdges).padStart(3)} =${String(coupling).padStart(3)} ${bar}`,
1270
1271
  );
1271
1272
  }
1272
1273
  }
@@ -1920,6 +1921,7 @@ function explainFunctionImpl(db, target, noTests, getFileLines) {
1920
1921
  export function explainData(target, customDbPath, opts = {}) {
1921
1922
  const db = openReadonlyOrFail(customDbPath);
1922
1923
  const noTests = opts.noTests || false;
1924
+ const depth = opts.depth || 0;
1923
1925
  const kind = isFileLikeTarget(target) ? 'file' : 'function';
1924
1926
 
1925
1927
  const dbPath = findDbPath(customDbPath);
@@ -1949,6 +1951,37 @@ export function explainData(target, customDbPath, opts = {}) {
1949
1951
  ? explainFileImpl(db, target, getFileLines)
1950
1952
  : explainFunctionImpl(db, target, noTests, getFileLines);
1951
1953
 
1954
+ // Recursive dependency explanation for function targets
1955
+ if (kind === 'function' && depth > 0 && results.length > 0) {
1956
+ const visited = new Set(results.map((r) => `${r.name}:${r.file}:${r.line}`));
1957
+
1958
+ function explainCallees(parentResults, currentDepth) {
1959
+ if (currentDepth <= 0) return;
1960
+ for (const r of parentResults) {
1961
+ const newCallees = [];
1962
+ for (const callee of r.callees) {
1963
+ const key = `${callee.name}:${callee.file}:${callee.line}`;
1964
+ if (visited.has(key)) continue;
1965
+ visited.add(key);
1966
+ const calleeResults = explainFunctionImpl(db, callee.name, noTests, getFileLines);
1967
+ const exact = calleeResults.find(
1968
+ (cr) => cr.file === callee.file && cr.line === callee.line,
1969
+ );
1970
+ if (exact) {
1971
+ exact._depth = (r._depth || 0) + 1;
1972
+ newCallees.push(exact);
1973
+ }
1974
+ }
1975
+ if (newCallees.length > 0) {
1976
+ r.depDetails = newCallees;
1977
+ explainCallees(newCallees, currentDepth - 1);
1978
+ }
1979
+ }
1980
+ }
1981
+
1982
+ explainCallees(results, depth);
1983
+ }
1984
+
1952
1985
  db.close();
1953
1986
  return { target, kind, results };
1954
1987
  }
@@ -2008,46 +2041,63 @@ export function explain(target, customDbPath, opts = {}) {
2008
2041
  console.log();
2009
2042
  }
2010
2043
  } else {
2011
- for (const r of data.results) {
2044
+ function printFunctionExplain(r, indent = '') {
2012
2045
  const lineRange = r.endLine ? `${r.line}-${r.endLine}` : `${r.line}`;
2013
2046
  const lineInfo = r.lineCount ? `${r.lineCount} lines` : '';
2014
2047
  const summaryPart = r.summary ? ` | ${r.summary}` : '';
2015
- console.log(`\n# ${r.name} (${r.kind}) ${r.file}:${lineRange}`);
2048
+ const depthLevel = r._depth || 0;
2049
+ const heading = depthLevel === 0 ? '#' : '##'.padEnd(depthLevel + 2, '#');
2050
+ console.log(`\n${indent}${heading} ${r.name} (${r.kind}) ${r.file}:${lineRange}`);
2016
2051
  if (lineInfo || r.summary) {
2017
- console.log(` ${lineInfo}${summaryPart}`);
2052
+ console.log(`${indent} ${lineInfo}${summaryPart}`);
2018
2053
  }
2019
2054
  if (r.signature) {
2020
- if (r.signature.params != null) console.log(` Parameters: (${r.signature.params})`);
2021
- if (r.signature.returnType) console.log(` Returns: ${r.signature.returnType}`);
2055
+ if (r.signature.params != null)
2056
+ console.log(`${indent} Parameters: (${r.signature.params})`);
2057
+ if (r.signature.returnType) console.log(`${indent} Returns: ${r.signature.returnType}`);
2022
2058
  }
2023
2059
 
2024
2060
  if (r.callees.length > 0) {
2025
- console.log(`\n## Calls (${r.callees.length})`);
2061
+ console.log(`\n${indent} Calls (${r.callees.length}):`);
2026
2062
  for (const c of r.callees) {
2027
- console.log(` ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`);
2063
+ console.log(`${indent} ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`);
2028
2064
  }
2029
2065
  }
2030
2066
 
2031
2067
  if (r.callers.length > 0) {
2032
- console.log(`\n## Called by (${r.callers.length})`);
2068
+ console.log(`\n${indent} Called by (${r.callers.length}):`);
2033
2069
  for (const c of r.callers) {
2034
- console.log(` ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`);
2070
+ console.log(`${indent} ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`);
2035
2071
  }
2036
2072
  }
2037
2073
 
2038
2074
  if (r.relatedTests.length > 0) {
2039
2075
  const label = r.relatedTests.length === 1 ? 'file' : 'files';
2040
- console.log(`\n## Tests (${r.relatedTests.length} ${label})`);
2076
+ console.log(`\n${indent} Tests (${r.relatedTests.length} ${label}):`);
2041
2077
  for (const t of r.relatedTests) {
2042
- console.log(` ${t.file}`);
2078
+ console.log(`${indent} ${t.file}`);
2043
2079
  }
2044
2080
  }
2045
2081
 
2046
2082
  if (r.callees.length === 0 && r.callers.length === 0) {
2047
- console.log(` (no call edges found -- may be invoked dynamically or via re-exports)`);
2083
+ console.log(
2084
+ `${indent} (no call edges found -- may be invoked dynamically or via re-exports)`,
2085
+ );
2086
+ }
2087
+
2088
+ // Render recursive dependency details
2089
+ if (r.depDetails && r.depDetails.length > 0) {
2090
+ console.log(`\n${indent} --- Dependencies (depth ${depthLevel + 1}) ---`);
2091
+ for (const dep of r.depDetails) {
2092
+ printFunctionExplain(dep, `${indent} `);
2093
+ }
2048
2094
  }
2049
2095
  console.log();
2050
2096
  }
2097
+
2098
+ for (const r of data.results) {
2099
+ printFunctionExplain(r);
2100
+ }
2051
2101
  }
2052
2102
  }
2053
2103
 
package/src/structure.js CHANGED
@@ -231,7 +231,8 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director
231
231
  */
232
232
  export function structureData(customDbPath, opts = {}) {
233
233
  const db = openReadonlyOrFail(customDbPath);
234
- const filterDir = opts.directory || null;
234
+ const rawDir = opts.directory || null;
235
+ const filterDir = rawDir && normalizePath(rawDir) !== '.' ? rawDir : null;
235
236
  const maxDepth = opts.depth || null;
236
237
  const sortBy = opts.sort || 'files';
237
238
  const noTests = opts.noTests || false;