@optave/codegraph 2.2.2-dev.c252ef9 β†’ 2.2.3-dev.44e8146

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -583,15 +583,16 @@ const { results: fused } = await multiSearchData(
583
583
 
584
584
  ## πŸ—ΊοΈ Roadmap
585
585
 
586
- See **[ROADMAP.md](ROADMAP.md)** for the full development roadmap. Current plan:
586
+ See **[ROADMAP.md](ROADMAP.md)** for the full development roadmap and **[STABILITY.md](STABILITY.md)** for the stability policy and versioning guarantees. Current plan:
587
587
 
588
588
  1. ~~**Rust Core**~~ β€” **Complete** (v1.3.0) β€” native tree-sitter parsing via napi-rs, parallel multi-core parsing, incremental re-parsing, import resolution & cycle detection in Rust
589
589
  2. ~~**Foundation Hardening**~~ β€” **Complete** (v1.4.0) β€” parser registry, 12-tool MCP server with multi-repo support, test coverage 62%β†’75%, `apiKeyCommand` secret resolution, global repo registry
590
- 3. **Intelligent Embeddings** β€” LLM-generated descriptions, hybrid search
591
- 4. **Natural Language Queries** β€” `codegraph ask` command, conversational sessions
592
- 5. **Expanded Language Support** β€” 8 new languages (12 β†’ 20)
593
- 6. **GitHub Integration & CI** β€” reusable GitHub Action, PR review, SARIF output
594
- 7. **Visualization & Advanced** β€” web UI, dead code detection, monorepo support, agentic search
590
+ 3. **Architectural Refactoring** β€” parser plugin system, repository pattern, pipeline builder, engine strategy, domain errors, curated API
591
+ 4. **Intelligent Embeddings** β€” LLM-generated descriptions, hybrid search
592
+ 5. **Natural Language Queries** β€” `codegraph ask` command, conversational sessions
593
+ 6. **Expanded Language Support** β€” 8 new languages (12 β†’ 20)
594
+ 7. **GitHub Integration & CI** β€” reusable GitHub Action, PR review, SARIF output
595
+ 8. **Visualization & Advanced** β€” web UI, dead code detection, monorepo support, agentic search
595
596
 
596
597
  ## 🀝 Contributing
597
598
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@optave/codegraph",
3
- "version": "2.2.2-dev.c252ef9",
3
+ "version": "2.2.3-dev.44e8146",
4
4
  "description": "Local code graph CLI β€” parse codebases with tree-sitter, build dependency graphs, query them",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
@@ -61,10 +61,10 @@
61
61
  "optionalDependencies": {
62
62
  "@huggingface/transformers": "^3.8.1",
63
63
  "@modelcontextprotocol/sdk": "^1.0.0",
64
- "@optave/codegraph-darwin-arm64": "2.2.2-dev.c252ef9",
65
- "@optave/codegraph-darwin-x64": "2.2.2-dev.c252ef9",
66
- "@optave/codegraph-linux-x64-gnu": "2.2.2-dev.c252ef9",
67
- "@optave/codegraph-win32-x64-msvc": "2.2.2-dev.c252ef9"
64
+ "@optave/codegraph-darwin-arm64": "2.2.3-dev.44e8146",
65
+ "@optave/codegraph-darwin-x64": "2.2.3-dev.44e8146",
66
+ "@optave/codegraph-linux-x64-gnu": "2.2.3-dev.44e8146",
67
+ "@optave/codegraph-win32-x64-msvc": "2.2.3-dev.44e8146"
68
68
  },
69
69
  "devDependencies": {
70
70
  "@biomejs/biome": "^2.4.4",
package/src/builder.js CHANGED
@@ -1,12 +1,11 @@
1
1
  import { createHash } from 'node:crypto';
2
2
  import fs from 'node:fs';
3
- import os from 'node:os';
4
3
  import path from 'node:path';
5
4
  import { loadConfig } from './config.js';
6
5
  import { EXTENSIONS, IGNORE_DIRS, normalizePath } from './constants.js';
7
6
  import { initSchema, openDb } from './db.js';
8
7
  import { readJournal, writeJournalHeader } from './journal.js';
9
- import { debug, warn } from './logger.js';
8
+ import { debug, info, warn } from './logger.js';
10
9
  import { getActiveEngine, parseFilesAuto } from './parser.js';
11
10
  import { computeConfidence, resolveImportPath, resolveImportsBatch } from './resolve.js';
12
11
 
@@ -345,7 +344,7 @@ export async function buildGraph(rootDir, opts = {}) {
345
344
  // Engine selection: 'native', 'wasm', or 'auto' (default)
346
345
  const engineOpts = { engine: opts.engine || 'auto' };
347
346
  const { name: engineName, version: engineVersion } = getActiveEngine(engineOpts);
348
- console.log(`Using ${engineName} engine${engineVersion ? ` (v${engineVersion})` : ''}`);
347
+ info(`Using ${engineName} engine${engineVersion ? ` (v${engineVersion})` : ''}`);
349
348
 
350
349
  const aliases = loadPathAliases(rootDir);
351
350
  // Merge config aliases
@@ -358,7 +357,7 @@ export async function buildGraph(rootDir, opts = {}) {
358
357
  }
359
358
 
360
359
  if (aliases.baseUrl || Object.keys(aliases.paths).length > 0) {
361
- console.log(
360
+ info(
362
361
  `Loaded path aliases: baseUrl=${aliases.baseUrl || 'none'}, ${Object.keys(aliases.paths).length} path mappings`,
363
362
  );
364
363
  }
@@ -366,7 +365,7 @@ export async function buildGraph(rootDir, opts = {}) {
366
365
  const collected = collectFiles(rootDir, [], config, new Set());
367
366
  const files = collected.files;
368
367
  const discoveredDirs = collected.directories;
369
- console.log(`Found ${files.length} files to parse`);
368
+ info(`Found ${files.length} files to parse`);
370
369
 
371
370
  // Check for incremental build
372
371
  const { changed, removed, isFullBuild } = incremental
@@ -397,19 +396,36 @@ export async function buildGraph(rootDir, opts = {}) {
397
396
  /* ignore heal errors */
398
397
  }
399
398
  }
400
- console.log('No changes detected. Graph is up to date.');
399
+ info('No changes detected. Graph is up to date.');
401
400
  db.close();
402
401
  writeJournalHeader(rootDir, Date.now());
403
402
  return;
404
403
  }
405
404
 
405
+ // Check if embeddings table exists (created by `embed`, not by initSchema)
406
+ let hasEmbeddings = false;
407
+ try {
408
+ db.prepare('SELECT 1 FROM embeddings LIMIT 1').get();
409
+ hasEmbeddings = true;
410
+ } catch {
411
+ /* table doesn't exist */
412
+ }
413
+
406
414
  if (isFullBuild) {
415
+ const deletions =
416
+ 'PRAGMA foreign_keys = OFF; DELETE FROM node_metrics; DELETE FROM edges; DELETE FROM nodes; PRAGMA foreign_keys = ON;';
407
417
  db.exec(
408
- 'PRAGMA foreign_keys = OFF; DELETE FROM node_metrics; DELETE FROM edges; DELETE FROM nodes; PRAGMA foreign_keys = ON;',
418
+ hasEmbeddings
419
+ ? `${deletions.replace('PRAGMA foreign_keys = ON;', '')} DELETE FROM embeddings; PRAGMA foreign_keys = ON;`
420
+ : deletions,
409
421
  );
410
422
  } else {
411
- console.log(`Incremental: ${parseChanges.length} changed, ${removed.length} removed`);
412
- // Remove metrics/edges/nodes for changed and removed files
423
+ info(`Incremental: ${parseChanges.length} changed, ${removed.length} removed`);
424
+ // Remove embeddings/metrics/edges/nodes for changed and removed files
425
+ // Embeddings must be deleted BEFORE nodes (we need node IDs to find them)
426
+ const deleteEmbeddingsForFile = hasEmbeddings
427
+ ? db.prepare('DELETE FROM embeddings WHERE node_id IN (SELECT id FROM nodes WHERE file = ?)')
428
+ : null;
413
429
  const deleteNodesForFile = db.prepare('DELETE FROM nodes WHERE file = ?');
414
430
  const deleteEdgesForFile = db.prepare(`
415
431
  DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = @f)
@@ -419,12 +435,14 @@ export async function buildGraph(rootDir, opts = {}) {
419
435
  'DELETE FROM node_metrics WHERE node_id IN (SELECT id FROM nodes WHERE file = ?)',
420
436
  );
421
437
  for (const relPath of removed) {
438
+ deleteEmbeddingsForFile?.run(relPath);
422
439
  deleteEdgesForFile.run({ f: relPath });
423
440
  deleteMetricsForFile.run(relPath);
424
441
  deleteNodesForFile.run(relPath);
425
442
  }
426
443
  for (const item of parseChanges) {
427
444
  const relPath = item.relPath || normalizePath(path.relative(rootDir, item.file));
445
+ deleteEmbeddingsForFile?.run(relPath);
428
446
  deleteEdgesForFile.run({ f: relPath });
429
447
  deleteMetricsForFile.run(relPath);
430
448
  deleteNodesForFile.run(relPath);
@@ -528,7 +546,7 @@ export async function buildGraph(rootDir, opts = {}) {
528
546
 
529
547
  const parsed = allSymbols.size;
530
548
  const skipped = filesToParse.length - parsed;
531
- console.log(`Parsed ${parsed} files (${skipped} skipped)`);
549
+ info(`Parsed ${parsed} files (${skipped} skipped)`);
532
550
 
533
551
  // Clean up removed file hashes
534
552
  if (upsertHash && removed.length > 0) {
@@ -822,15 +840,33 @@ export async function buildGraph(rootDir, opts = {}) {
822
840
  }
823
841
 
824
842
  const nodeCount = db.prepare('SELECT COUNT(*) as c FROM nodes').get().c;
825
- console.log(`Graph built: ${nodeCount} nodes, ${edgeCount} edges`);
826
- console.log(`Stored in ${dbPath}`);
843
+ info(`Graph built: ${nodeCount} nodes, ${edgeCount} edges`);
844
+ info(`Stored in ${dbPath}`);
845
+
846
+ // Warn about orphaned embeddings that no longer match any node
847
+ if (hasEmbeddings) {
848
+ try {
849
+ const orphaned = db
850
+ .prepare('SELECT COUNT(*) as c FROM embeddings WHERE node_id NOT IN (SELECT id FROM nodes)')
851
+ .get().c;
852
+ if (orphaned > 0) {
853
+ warn(
854
+ `${orphaned} embeddings are orphaned (nodes changed). Run "codegraph embed" to refresh.`,
855
+ );
856
+ }
857
+ } catch {
858
+ /* ignore β€” embeddings table may have been dropped */
859
+ }
860
+ }
861
+
827
862
  db.close();
828
863
 
829
864
  // Write journal header after successful build
830
865
  writeJournalHeader(rootDir, Date.now());
831
866
 
832
867
  if (!opts.skipRegistry) {
833
- const tmpDir = path.resolve(os.tmpdir());
868
+ const { tmpdir } = await import('node:os');
869
+ const tmpDir = path.resolve(tmpdir());
834
870
  const resolvedRoot = path.resolve(rootDir);
835
871
  if (resolvedRoot.startsWith(tmpDir)) {
836
872
  debug(`Skipping auto-registration for temp directory: ${resolvedRoot}`);
package/src/cli.js CHANGED
@@ -5,9 +5,10 @@ import path from 'node:path';
5
5
  import Database from 'better-sqlite3';
6
6
  import { Command } from 'commander';
7
7
  import { buildGraph } from './builder.js';
8
+ import { loadConfig } from './config.js';
8
9
  import { findCycles, formatCycles } from './cycles.js';
9
10
  import { findDbPath } from './db.js';
10
- import { buildEmbeddings, MODELS, search } from './embedder.js';
11
+ import { buildEmbeddings, EMBEDDING_STRATEGIES, MODELS, search } from './embedder.js';
11
12
  import { exportDOT, exportJSON, exportMermaid } from './export.js';
12
13
  import { setVerbose } from './logger.js';
13
14
  import {
@@ -36,6 +37,8 @@ import { watchProject } from './watcher.js';
36
37
  const __cliDir = path.dirname(new URL(import.meta.url).pathname.replace(/^\/([A-Z]:)/i, '$1'));
37
38
  const pkg = JSON.parse(fs.readFileSync(path.join(__cliDir, '..', 'package.json'), 'utf-8'));
38
39
 
40
+ const config = loadConfig(process.cwd());
41
+
39
42
  const program = new Command();
40
43
  program
41
44
  .name('codegraph')
@@ -48,6 +51,18 @@ program
48
51
  if (opts.verbose) setVerbose(true);
49
52
  });
50
53
 
54
+ /**
55
+ * Resolve the effective noTests value: CLI flag > config > false.
56
+ * Commander sets opts.tests to false when --no-tests is passed.
57
+ * When --include-tests is passed, always return false (include tests).
58
+ * Otherwise, fall back to config.query.excludeTests.
59
+ */
60
+ function resolveNoTests(opts) {
61
+ if (opts.includeTests) return false;
62
+ if (opts.tests === false) return true;
63
+ return config.query?.excludeTests || false;
64
+ }
65
+
51
66
  program
52
67
  .command('build [dir]')
53
68
  .description('Parse repo and build graph in .codegraph/graph.db')
@@ -63,9 +78,10 @@ program
63
78
  .description('Find a function/class, show callers and callees')
64
79
  .option('-d, --db <path>', 'Path to graph.db')
65
80
  .option('-T, --no-tests', 'Exclude test/spec files from results')
81
+ .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
66
82
  .option('-j, --json', 'Output as JSON')
67
83
  .action((name, opts) => {
68
- queryName(name, opts.db, { noTests: !opts.tests, json: opts.json });
84
+ queryName(name, opts.db, { noTests: resolveNoTests(opts), json: opts.json });
69
85
  });
70
86
 
71
87
  program
@@ -73,9 +89,10 @@ program
73
89
  .description('Show what depends on this file (transitive)')
74
90
  .option('-d, --db <path>', 'Path to graph.db')
75
91
  .option('-T, --no-tests', 'Exclude test/spec files from results')
92
+ .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
76
93
  .option('-j, --json', 'Output as JSON')
77
94
  .action((file, opts) => {
78
- impactAnalysis(file, opts.db, { noTests: !opts.tests, json: opts.json });
95
+ impactAnalysis(file, opts.db, { noTests: resolveNoTests(opts), json: opts.json });
79
96
  });
80
97
 
81
98
  program
@@ -84,9 +101,13 @@ program
84
101
  .option('-d, --db <path>', 'Path to graph.db')
85
102
  .option('-n, --limit <number>', 'Number of top nodes', '20')
86
103
  .option('-T, --no-tests', 'Exclude test/spec files from results')
104
+ .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
87
105
  .option('-j, --json', 'Output as JSON')
88
106
  .action((opts) => {
89
- moduleMap(opts.db, parseInt(opts.limit, 10), { noTests: !opts.tests, json: opts.json });
107
+ moduleMap(opts.db, parseInt(opts.limit, 10), {
108
+ noTests: resolveNoTests(opts),
109
+ json: opts.json,
110
+ });
90
111
  });
91
112
 
92
113
  program
@@ -94,9 +115,10 @@ program
94
115
  .description('Show graph health overview: nodes, edges, languages, cycles, hotspots, embeddings')
95
116
  .option('-d, --db <path>', 'Path to graph.db')
96
117
  .option('-T, --no-tests', 'Exclude test/spec files from results')
118
+ .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
97
119
  .option('-j, --json', 'Output as JSON')
98
120
  .action((opts) => {
99
- stats(opts.db, { noTests: !opts.tests, json: opts.json });
121
+ stats(opts.db, { noTests: resolveNoTests(opts), json: opts.json });
100
122
  });
101
123
 
102
124
  program
@@ -104,9 +126,10 @@ program
104
126
  .description('Show what this file imports and what imports it')
105
127
  .option('-d, --db <path>', 'Path to graph.db')
106
128
  .option('-T, --no-tests', 'Exclude test/spec files from results')
129
+ .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
107
130
  .option('-j, --json', 'Output as JSON')
108
131
  .action((file, opts) => {
109
- fileDeps(file, opts.db, { noTests: !opts.tests, json: opts.json });
132
+ fileDeps(file, opts.db, { noTests: resolveNoTests(opts), json: opts.json });
110
133
  });
111
134
 
112
135
  program
@@ -117,6 +140,7 @@ program
117
140
  .option('-f, --file <path>', 'Scope search to functions in this file (partial match)')
118
141
  .option('-k, --kind <kind>', 'Filter to a specific symbol kind')
119
142
  .option('-T, --no-tests', 'Exclude test/spec files from results')
143
+ .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
120
144
  .option('-j, --json', 'Output as JSON')
121
145
  .action((name, opts) => {
122
146
  if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) {
@@ -127,7 +151,7 @@ program
127
151
  depth: parseInt(opts.depth, 10),
128
152
  file: opts.file,
129
153
  kind: opts.kind,
130
- noTests: !opts.tests,
154
+ noTests: resolveNoTests(opts),
131
155
  json: opts.json,
132
156
  });
133
157
  });
@@ -140,6 +164,7 @@ program
140
164
  .option('-f, --file <path>', 'Scope search to functions in this file (partial match)')
141
165
  .option('-k, --kind <kind>', 'Filter to a specific symbol kind')
142
166
  .option('-T, --no-tests', 'Exclude test/spec files from results')
167
+ .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
143
168
  .option('-j, --json', 'Output as JSON')
144
169
  .action((name, opts) => {
145
170
  if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) {
@@ -150,7 +175,7 @@ program
150
175
  depth: parseInt(opts.depth, 10),
151
176
  file: opts.file,
152
177
  kind: opts.kind,
153
- noTests: !opts.tests,
178
+ noTests: resolveNoTests(opts),
154
179
  json: opts.json,
155
180
  });
156
181
  });
@@ -163,8 +188,9 @@ program
163
188
  .option('-f, --file <path>', 'Scope search to functions in this file (partial match)')
164
189
  .option('-k, --kind <kind>', 'Filter to a specific symbol kind')
165
190
  .option('--no-source', 'Metadata only (skip source extraction)')
166
- .option('--include-tests', 'Include test source code')
191
+ .option('--with-test-source', 'Include test source code')
167
192
  .option('-T, --no-tests', 'Exclude test/spec files from results')
193
+ .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
168
194
  .option('-j, --json', 'Output as JSON')
169
195
  .action((name, opts) => {
170
196
  if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) {
@@ -176,8 +202,8 @@ program
176
202
  file: opts.file,
177
203
  kind: opts.kind,
178
204
  noSource: !opts.source,
179
- noTests: !opts.tests,
180
- includeTests: opts.includeTests,
205
+ noTests: resolveNoTests(opts),
206
+ includeTests: opts.withTestSource,
181
207
  json: opts.json,
182
208
  });
183
209
  });
@@ -186,10 +212,16 @@ program
186
212
  .command('explain <target>')
187
213
  .description('Structural summary of a file or function (no LLM needed)')
188
214
  .option('-d, --db <path>', 'Path to graph.db')
215
+ .option('--depth <n>', 'Recursively explain dependencies up to N levels deep', '0')
189
216
  .option('-T, --no-tests', 'Exclude test/spec files from results')
217
+ .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
190
218
  .option('-j, --json', 'Output as JSON')
191
219
  .action((target, opts) => {
192
- explain(target, opts.db, { noTests: !opts.tests, json: opts.json });
220
+ explain(target, opts.db, {
221
+ depth: parseInt(opts.depth, 10),
222
+ noTests: resolveNoTests(opts),
223
+ json: opts.json,
224
+ });
193
225
  });
194
226
 
195
227
  program
@@ -198,6 +230,7 @@ program
198
230
  .option('-d, --db <path>', 'Path to graph.db')
199
231
  .option('-f, --file <path>', 'File overview: list symbols, imports, exports')
200
232
  .option('-T, --no-tests', 'Exclude test/spec files from results')
233
+ .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
201
234
  .option('-j, --json', 'Output as JSON')
202
235
  .action((name, opts) => {
203
236
  if (!name && !opts.file) {
@@ -205,7 +238,7 @@ program
205
238
  process.exit(1);
206
239
  }
207
240
  const target = opts.file || name;
208
- where(target, opts.db, { file: !!opts.file, noTests: !opts.tests, json: opts.json });
241
+ where(target, opts.db, { file: !!opts.file, noTests: resolveNoTests(opts), json: opts.json });
209
242
  });
210
243
 
211
244
  program
@@ -215,6 +248,7 @@ program
215
248
  .option('--staged', 'Analyze staged changes instead of unstaged')
216
249
  .option('--depth <n>', 'Max transitive caller depth', '3')
217
250
  .option('-T, --no-tests', 'Exclude test/spec files from results')
251
+ .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
218
252
  .option('-j, --json', 'Output as JSON')
219
253
  .option('-f, --format <format>', 'Output format: text, mermaid, json', 'text')
220
254
  .action((ref, opts) => {
@@ -222,7 +256,7 @@ program
222
256
  ref,
223
257
  staged: opts.staged,
224
258
  depth: parseInt(opts.depth, 10),
225
- noTests: !opts.tests,
259
+ noTests: resolveNoTests(opts),
226
260
  json: opts.json,
227
261
  format: opts.format,
228
262
  });
@@ -237,10 +271,16 @@ program
237
271
  .option('-f, --format <format>', 'Output format: dot, mermaid, json', 'dot')
238
272
  .option('--functions', 'Function-level graph instead of file-level')
239
273
  .option('-T, --no-tests', 'Exclude test/spec files')
274
+ .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
275
+ .option('--min-confidence <score>', 'Minimum edge confidence threshold (default: 0.5)', '0.5')
240
276
  .option('-o, --output <file>', 'Write to file instead of stdout')
241
277
  .action((opts) => {
242
278
  const db = new Database(findDbPath(opts.db), { readonly: true });
243
- const exportOpts = { fileLevel: !opts.functions, noTests: !opts.tests };
279
+ const exportOpts = {
280
+ fileLevel: !opts.functions,
281
+ noTests: resolveNoTests(opts),
282
+ minConfidence: parseFloat(opts.minConfidence),
283
+ };
244
284
 
245
285
  let output;
246
286
  switch (opts.format) {
@@ -271,10 +311,11 @@ program
271
311
  .option('-d, --db <path>', 'Path to graph.db')
272
312
  .option('--functions', 'Function-level cycle detection')
273
313
  .option('-T, --no-tests', 'Exclude test/spec files')
314
+ .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
274
315
  .option('-j, --json', 'Output as JSON')
275
316
  .action((opts) => {
276
317
  const db = new Database(findDbPath(opts.db), { readonly: true });
277
- const cycles = findCycles(db, { fileLevel: !opts.functions, noTests: !opts.tests });
318
+ const cycles = findCycles(db, { fileLevel: !opts.functions, noTests: resolveNoTests(opts) });
278
319
  db.close();
279
320
 
280
321
  if (opts.json) {
@@ -376,10 +417,13 @@ program
376
417
  .action(() => {
377
418
  console.log('\nAvailable embedding models:\n');
378
419
  for (const [key, config] of Object.entries(MODELS)) {
379
- const def = key === 'nomic-v1.5' ? ' (default)' : '';
380
- console.log(` ${key.padEnd(12)} ${String(config.dim).padStart(4)}d ${config.desc}${def}`);
420
+ const def = key === 'minilm' ? ' (default)' : '';
421
+ const ctx = config.contextWindow ? `${config.contextWindow} ctx` : '';
422
+ console.log(
423
+ ` ${key.padEnd(12)} ${String(config.dim).padStart(4)}d ${ctx.padEnd(9)} ${config.desc}${def}`,
424
+ );
381
425
  }
382
- console.log('\nUsage: codegraph embed --model <name>');
426
+ console.log('\nUsage: codegraph embed --model <name> --strategy <structured|source>');
383
427
  console.log(' codegraph search "query" --model <name>\n');
384
428
  });
385
429
 
@@ -390,12 +434,23 @@ program
390
434
  )
391
435
  .option(
392
436
  '-m, --model <name>',
393
- 'Embedding model: minilm, jina-small, jina-base, jina-code, nomic, nomic-v1.5 (default), bge-large. Run `codegraph models` for details',
394
- 'nomic-v1.5',
437
+ 'Embedding model: minilm (default), jina-small, jina-base, jina-code, nomic, nomic-v1.5, bge-large. Run `codegraph models` for details',
438
+ 'minilm',
439
+ )
440
+ .option(
441
+ '-s, --strategy <name>',
442
+ `Embedding strategy: ${EMBEDDING_STRATEGIES.join(', ')}. "structured" uses graph context (callers/callees), "source" embeds raw code`,
443
+ 'structured',
395
444
  )
396
445
  .action(async (dir, opts) => {
446
+ if (!EMBEDDING_STRATEGIES.includes(opts.strategy)) {
447
+ console.error(
448
+ `Unknown strategy: ${opts.strategy}. Available: ${EMBEDDING_STRATEGIES.join(', ')}`,
449
+ );
450
+ process.exit(1);
451
+ }
397
452
  const root = path.resolve(dir || '.');
398
- await buildEmbeddings(root, opts.model);
453
+ await buildEmbeddings(root, opts.model, undefined, { strategy: opts.strategy });
399
454
  });
400
455
 
401
456
  program
@@ -405,6 +460,7 @@ program
405
460
  .option('-m, --model <name>', 'Override embedding model (auto-detects from DB)')
406
461
  .option('-n, --limit <number>', 'Max results', '15')
407
462
  .option('-T, --no-tests', 'Exclude test/spec files from results')
463
+ .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
408
464
  .option('--min-score <score>', 'Minimum similarity threshold', '0.2')
409
465
  .option('-k, --kind <kind>', 'Filter by kind: function, method, class')
410
466
  .option('--file <pattern>', 'Filter by file path pattern')
@@ -412,7 +468,7 @@ program
412
468
  .action(async (query, opts) => {
413
469
  await search(query, opts.db, {
414
470
  limit: parseInt(opts.limit, 10),
415
- noTests: !opts.tests,
471
+ noTests: resolveNoTests(opts),
416
472
  minScore: parseFloat(opts.minScore),
417
473
  model: opts.model,
418
474
  kind: opts.kind,
@@ -430,6 +486,7 @@ program
430
486
  .option('--depth <n>', 'Max directory depth')
431
487
  .option('--sort <metric>', 'Sort by: cohesion | fan-in | fan-out | density | files', 'files')
432
488
  .option('-T, --no-tests', 'Exclude test/spec files')
489
+ .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
433
490
  .option('-j, --json', 'Output as JSON')
434
491
  .action(async (dir, opts) => {
435
492
  const { structureData, formatStructure } = await import('./structure.js');
@@ -437,7 +494,7 @@ program
437
494
  directory: dir,
438
495
  depth: opts.depth ? parseInt(opts.depth, 10) : undefined,
439
496
  sort: opts.sort,
440
- noTests: !opts.tests,
497
+ noTests: resolveNoTests(opts),
441
498
  });
442
499
  if (opts.json) {
443
500
  console.log(JSON.stringify(data, null, 2));
@@ -456,6 +513,7 @@ program
456
513
  .option('--metric <metric>', 'fan-in | fan-out | density | coupling', 'fan-in')
457
514
  .option('--level <level>', 'file | directory', 'file')
458
515
  .option('-T, --no-tests', 'Exclude test/spec files from results')
516
+ .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
459
517
  .option('-j, --json', 'Output as JSON')
460
518
  .action(async (opts) => {
461
519
  const { hotspotsData, formatHotspots } = await import('./structure.js');
@@ -463,7 +521,7 @@ program
463
521
  metric: opts.metric,
464
522
  level: opts.level,
465
523
  limit: parseInt(opts.limit, 10),
466
- noTests: !opts.tests,
524
+ noTests: resolveNoTests(opts),
467
525
  });
468
526
  if (opts.json) {
469
527
  console.log(JSON.stringify(data, null, 2));
package/src/config.js CHANGED
@@ -18,6 +18,7 @@ export const DEFAULTS = {
18
18
  query: {
19
19
  defaultDepth: 3,
20
20
  defaultLimit: 20,
21
+ excludeTests: false,
21
22
  },
22
23
  embeddings: { model: 'nomic-v1.5', llmProvider: null },
23
24
  llm: { provider: null, model: null, baseUrl: null, apiKey: null, apiKeyCommand: null },
package/src/embedder.js CHANGED
@@ -4,6 +4,18 @@ import Database from 'better-sqlite3';
4
4
  import { findDbPath, openReadonlyOrFail } from './db.js';
5
5
  import { warn } from './logger.js';
6
6
 
7
+ /**
8
+ * Split an identifier into readable words.
9
+ * camelCase/PascalCase β†’ "camel Case", snake_case β†’ "snake case", kebab-case β†’ "kebab case"
10
+ */
11
+ function splitIdentifier(name) {
12
+ return name
13
+ .replace(/([a-z])([A-Z])/g, '$1 $2')
14
+ .replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
15
+ .replace(/[_-]+/g, ' ')
16
+ .trim();
17
+ }
18
+
7
19
  // Lazy-load transformers (heavy, optional module)
8
20
  let pipeline = null;
9
21
  let _cos_sim = null;
@@ -14,48 +26,57 @@ export const MODELS = {
14
26
  minilm: {
15
27
  name: 'Xenova/all-MiniLM-L6-v2',
16
28
  dim: 384,
29
+ contextWindow: 256,
17
30
  desc: 'Smallest, fastest (~23MB). General text.',
18
31
  quantized: true,
19
32
  },
20
33
  'jina-small': {
21
34
  name: 'Xenova/jina-embeddings-v2-small-en',
22
35
  dim: 512,
36
+ contextWindow: 8192,
23
37
  desc: 'Small, good quality (~33MB). General text.',
24
38
  quantized: false,
25
39
  },
26
40
  'jina-base': {
27
41
  name: 'Xenova/jina-embeddings-v2-base-en',
28
42
  dim: 768,
43
+ contextWindow: 8192,
29
44
  desc: 'Good quality (~137MB). General text, 8192 token context.',
30
45
  quantized: false,
31
46
  },
32
47
  'jina-code': {
33
48
  name: 'Xenova/jina-embeddings-v2-base-code',
34
49
  dim: 768,
50
+ contextWindow: 8192,
35
51
  desc: 'Code-aware (~137MB). Trained on code+text, best for code search.',
36
52
  quantized: false,
37
53
  },
38
54
  nomic: {
39
55
  name: 'Xenova/nomic-embed-text-v1',
40
56
  dim: 768,
57
+ contextWindow: 8192,
41
58
  desc: 'Good local quality (~137MB). 8192 context.',
42
59
  quantized: false,
43
60
  },
44
61
  'nomic-v1.5': {
45
62
  name: 'nomic-ai/nomic-embed-text-v1.5',
46
63
  dim: 768,
64
+ contextWindow: 8192,
47
65
  desc: 'Improved nomic (~137MB). Matryoshka dimensions, 8192 context.',
48
66
  quantized: false,
49
67
  },
50
68
  'bge-large': {
51
69
  name: 'Xenova/bge-large-en-v1.5',
52
70
  dim: 1024,
71
+ contextWindow: 512,
53
72
  desc: 'Best general retrieval (~335MB). Top MTEB scores.',
54
73
  quantized: false,
55
74
  },
56
75
  };
57
76
 
58
- export const DEFAULT_MODEL = 'nomic-v1.5';
77
+ export const EMBEDDING_STRATEGIES = ['structured', 'source'];
78
+
79
+ export const DEFAULT_MODEL = 'minilm';
59
80
  const BATCH_SIZE_MAP = {
60
81
  minilm: 32,
61
82
  'jina-small': 16,
@@ -77,6 +98,108 @@ function getModelConfig(modelKey) {
77
98
  return config;
78
99
  }
79
100
 
101
+ /**
102
+ * Rough token estimate (~4 chars per token for code/English).
103
+ * Conservative β€” avoids adding a tokenizer dependency.
104
+ */
105
+ export function estimateTokens(text) {
106
+ return Math.ceil(text.length / 4);
107
+ }
108
+
109
+ /**
110
+ * Extract leading comment text (JSDoc, //, #, etc.) above a function line.
111
+ * Returns the cleaned comment text or null if none found.
112
+ */
113
+ function extractLeadingComment(lines, fnLineIndex) {
114
+ const raw = [];
115
+ for (let i = fnLineIndex - 1; i >= Math.max(0, fnLineIndex - 15); i--) {
116
+ const trimmed = lines[i].trim();
117
+ if (/^(\/\/|\/\*|\*\/|\*|#|\/\/\/)/.test(trimmed)) {
118
+ raw.unshift(trimmed);
119
+ } else if (trimmed === '') {
120
+ if (raw.length > 0) break;
121
+ } else {
122
+ break;
123
+ }
124
+ }
125
+ if (raw.length === 0) return null;
126
+ return raw
127
+ .map((line) =>
128
+ line
129
+ .replace(/^\/\*\*?\s?|\*\/$/g, '') // opening /** or /* and closing */
130
+ .replace(/^\*\s?/, '') // middle * lines
131
+ .replace(/^\/\/\/?\s?/, '') // // or ///
132
+ .replace(/^#\s?/, '') // # (Python/Ruby)
133
+ .trim(),
134
+ )
135
+ .filter((l) => l.length > 0)
136
+ .join(' ');
137
+ }
138
+
139
+ /**
140
+ * Build graph-enriched text for a symbol using dependency context.
141
+ * Produces compact, semantic text (~100 tokens) instead of full source code.
142
+ */
143
+ function buildStructuredText(node, file, lines, calleesStmt, callersStmt) {
144
+ const readable = splitIdentifier(node.name);
145
+ const parts = [`${node.kind} ${node.name} (${readable}) in ${file}`];
146
+ const startLine = Math.max(0, node.line - 1);
147
+
148
+ // Extract parameters from signature (best-effort, single-line)
149
+ const sigLine = lines[startLine] || '';
150
+ const paramMatch = sigLine.match(/\(([^)]*)\)/);
151
+ if (paramMatch?.[1]?.trim()) {
152
+ parts.push(`Parameters: ${paramMatch[1].trim()}`);
153
+ }
154
+
155
+ // Graph context: callees (capped at 10)
156
+ const callees = calleesStmt.all(node.id);
157
+ if (callees.length > 0) {
158
+ parts.push(
159
+ `Calls: ${callees
160
+ .slice(0, 10)
161
+ .map((c) => c.name)
162
+ .join(', ')}`,
163
+ );
164
+ }
165
+
166
+ // Graph context: callers (capped at 10)
167
+ const callers = callersStmt.all(node.id);
168
+ if (callers.length > 0) {
169
+ parts.push(
170
+ `Called by: ${callers
171
+ .slice(0, 10)
172
+ .map((c) => c.name)
173
+ .join(', ')}`,
174
+ );
175
+ }
176
+
177
+ // Leading comment (high semantic value) or first few lines of code
178
+ const comment = extractLeadingComment(lines, startLine);
179
+ if (comment) {
180
+ parts.push(comment);
181
+ } else {
182
+ const endLine = Math.min(lines.length, startLine + 4);
183
+ const snippet = lines.slice(startLine, endLine).join('\n').trim();
184
+ if (snippet) parts.push(snippet);
185
+ }
186
+
187
+ return parts.join('\n');
188
+ }
189
+
190
+ /**
191
+ * Build raw source-code text for a symbol (original strategy).
192
+ */
193
+ function buildSourceText(node, file, lines) {
194
+ const startLine = Math.max(0, node.line - 1);
195
+ const endLine = node.end_line
196
+ ? Math.min(lines.length, node.end_line)
197
+ : Math.min(lines.length, startLine + 15);
198
+ const context = lines.slice(startLine, endLine).join('\n');
199
+ const readable = splitIdentifier(node.name);
200
+ return `${node.kind} ${node.name} (${readable}) in ${file}\n${context}`;
201
+ }
202
+
80
203
  /**
81
204
  * Lazy-load @huggingface/transformers.
82
205
  * This is an optional dependency β€” gives a clear error if not installed.
@@ -103,8 +226,27 @@ async function loadModel(modelKey) {
103
226
  _cos_sim = transformers.cos_sim;
104
227
 
105
228
  console.log(`Loading embedding model: ${config.name} (${config.dim}d)...`);
106
- const opts = config.quantized ? { quantized: true } : {};
107
- extractor = await pipeline('feature-extraction', config.name, opts);
229
+ const pipelineOpts = config.quantized ? { quantized: true } : {};
230
+ try {
231
+ extractor = await pipeline('feature-extraction', config.name, pipelineOpts);
232
+ } catch (err) {
233
+ const msg = err.message || String(err);
234
+ if (msg.includes('Unauthorized') || msg.includes('401') || msg.includes('gated')) {
235
+ console.error(
236
+ `\nModel "${config.name}" requires authentication.\n` +
237
+ `This model is gated on HuggingFace and needs an access token.\n\n` +
238
+ `Options:\n` +
239
+ ` 1. Set HF_TOKEN env var: export HF_TOKEN=hf_...\n` +
240
+ ` 2. Use a public model instead: codegraph embed --model minilm\n`,
241
+ );
242
+ } else {
243
+ console.error(
244
+ `\nFailed to load model "${config.name}": ${msg}\n` +
245
+ `Try a different model: codegraph embed --model minilm\n`,
246
+ );
247
+ }
248
+ process.exit(1);
249
+ }
108
250
  activeModel = config.name;
109
251
  console.log('Model loaded.');
110
252
  return { extractor, config };
@@ -172,10 +314,14 @@ function initEmbeddingsSchema(db) {
172
314
 
173
315
  /**
174
316
  * Build embeddings for all functions/methods/classes in the graph.
317
+ * @param {string} rootDir - Project root directory
318
+ * @param {string} modelKey - Model identifier from MODELS registry
319
+ * @param {string} [customDbPath] - Override path to graph.db
320
+ * @param {object} [options] - Embedding options
321
+ * @param {string} [options.strategy='structured'] - 'structured' (graph-enriched) or 'source' (raw code)
175
322
  */
176
- export async function buildEmbeddings(rootDir, modelKey, customDbPath) {
177
- // path already imported at top
178
- // fs already imported at top
323
+ export async function buildEmbeddings(rootDir, modelKey, customDbPath, options = {}) {
324
+ const strategy = options.strategy || 'structured';
179
325
  const dbPath = customDbPath || findDbPath(null);
180
326
 
181
327
  const db = new Database(dbPath);
@@ -190,7 +336,24 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath) {
190
336
  )
191
337
  .all();
192
338
 
193
- console.log(`Building embeddings for ${nodes.length} symbols...`);
339
+ console.log(`Building embeddings for ${nodes.length} symbols (strategy: ${strategy})...`);
340
+
341
+ // Prepare graph-context queries for structured strategy
342
+ let calleesStmt, callersStmt;
343
+ if (strategy === 'structured') {
344
+ calleesStmt = db.prepare(`
345
+ SELECT DISTINCT n.name FROM edges e
346
+ JOIN nodes n ON e.target_id = n.id
347
+ WHERE e.source_id = ? AND e.kind = 'calls'
348
+ ORDER BY n.name
349
+ `);
350
+ callersStmt = db.prepare(`
351
+ SELECT DISTINCT n.name FROM edges e
352
+ JOIN nodes n ON e.source_id = n.id
353
+ WHERE e.target_id = ? AND e.kind = 'calls'
354
+ ORDER BY n.name
355
+ `);
356
+ }
194
357
 
195
358
  const byFile = new Map();
196
359
  for (const node of nodes) {
@@ -201,6 +364,9 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath) {
201
364
  const texts = [];
202
365
  const nodeIds = [];
203
366
  const previews = [];
367
+ const config = getModelConfig(modelKey);
368
+ const contextWindow = config.contextWindow;
369
+ let overflowCount = 0;
204
370
 
205
371
  for (const [file, fileNodes] of byFile) {
206
372
  const fullPath = path.join(rootDir, file);
@@ -213,19 +379,31 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath) {
213
379
  }
214
380
 
215
381
  for (const node of fileNodes) {
216
- const startLine = Math.max(0, node.line - 1);
217
- const endLine = node.end_line
218
- ? Math.min(lines.length, node.end_line)
219
- : Math.min(lines.length, startLine + 15);
220
- const context = lines.slice(startLine, endLine).join('\n');
382
+ let text =
383
+ strategy === 'structured'
384
+ ? buildStructuredText(node, file, lines, calleesStmt, callersStmt)
385
+ : buildSourceText(node, file, lines);
386
+
387
+ // Detect and handle context window overflow
388
+ const tokens = estimateTokens(text);
389
+ if (tokens > contextWindow) {
390
+ overflowCount++;
391
+ const maxChars = contextWindow * 4;
392
+ text = text.slice(0, maxChars);
393
+ }
221
394
 
222
- const text = `${node.kind} ${node.name} in ${file}\n${context}`;
223
395
  texts.push(text);
224
396
  nodeIds.push(node.id);
225
397
  previews.push(`${node.name} (${node.kind}) -- ${file}:${node.line}`);
226
398
  }
227
399
  }
228
400
 
401
+ if (overflowCount > 0) {
402
+ warn(
403
+ `${overflowCount} symbol(s) exceeded model context window (${contextWindow} tokens) and were truncated`,
404
+ );
405
+ }
406
+
229
407
  console.log(`Embedding ${texts.length} symbols...`);
230
408
  const { vectors, dim } = await embed(texts, modelKey);
231
409
 
@@ -237,16 +415,19 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath) {
237
415
  for (let i = 0; i < vectors.length; i++) {
238
416
  insert.run(nodeIds[i], Buffer.from(vectors[i].buffer), previews[i]);
239
417
  }
240
- const config = getModelConfig(modelKey);
241
418
  insertMeta.run('model', config.name);
242
419
  insertMeta.run('dim', String(dim));
243
420
  insertMeta.run('count', String(vectors.length));
421
+ insertMeta.run('strategy', strategy);
244
422
  insertMeta.run('built_at', new Date().toISOString());
423
+ if (overflowCount > 0) {
424
+ insertMeta.run('truncated_count', String(overflowCount));
425
+ }
245
426
  });
246
427
  insertAll();
247
428
 
248
429
  console.log(
249
- `\nStored ${vectors.length} embeddings (${dim}d, ${getModelConfig(modelKey).name}) in graph.db`,
430
+ `\nStored ${vectors.length} embeddings (${dim}d, ${config.name}, strategy: ${strategy}) in graph.db`,
250
431
  );
251
432
  db.close();
252
433
  }
package/src/export.js CHANGED
@@ -1,12 +1,15 @@
1
1
  import path from 'node:path';
2
2
  import { isTestFile } from './queries.js';
3
3
 
4
+ const DEFAULT_MIN_CONFIDENCE = 0.5;
5
+
4
6
  /**
5
7
  * Export the dependency graph in DOT (Graphviz) format.
6
8
  */
7
9
  export function exportDOT(db, opts = {}) {
8
10
  const fileLevel = opts.fileLevel !== false;
9
11
  const noTests = opts.noTests || false;
12
+ const minConf = opts.minConfidence ?? DEFAULT_MIN_CONFIDENCE;
10
13
  const lines = [
11
14
  'digraph codegraph {',
12
15
  ' rankdir=LR;',
@@ -23,8 +26,9 @@ export function exportDOT(db, opts = {}) {
23
26
  JOIN nodes n1 ON e.source_id = n1.id
24
27
  JOIN nodes n2 ON e.target_id = n2.id
25
28
  WHERE n1.file != n2.file AND e.kind IN ('imports', 'imports-type', 'calls')
29
+ AND e.confidence >= ?
26
30
  `)
27
- .all();
31
+ .all(minConf);
28
32
  if (noTests) edges = edges.filter((e) => !isTestFile(e.source) && !isTestFile(e.target));
29
33
 
30
34
  // Try to use directory nodes from DB (built by structure analysis)
@@ -102,8 +106,9 @@ export function exportDOT(db, opts = {}) {
102
106
  JOIN nodes n2 ON e.target_id = n2.id
103
107
  WHERE n1.kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module') AND n2.kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module')
104
108
  AND e.kind = 'calls'
109
+ AND e.confidence >= ?
105
110
  `)
106
- .all();
111
+ .all(minConf);
107
112
  if (noTests)
108
113
  edges = edges.filter((e) => !isTestFile(e.source_file) && !isTestFile(e.target_file));
109
114
 
@@ -126,6 +131,7 @@ export function exportDOT(db, opts = {}) {
126
131
  export function exportMermaid(db, opts = {}) {
127
132
  const fileLevel = opts.fileLevel !== false;
128
133
  const noTests = opts.noTests || false;
134
+ const minConf = opts.minConfidence ?? DEFAULT_MIN_CONFIDENCE;
129
135
  const lines = ['graph LR'];
130
136
 
131
137
  if (fileLevel) {
@@ -136,8 +142,9 @@ export function exportMermaid(db, opts = {}) {
136
142
  JOIN nodes n1 ON e.source_id = n1.id
137
143
  JOIN nodes n2 ON e.target_id = n2.id
138
144
  WHERE n1.file != n2.file AND e.kind IN ('imports', 'imports-type', 'calls')
145
+ AND e.confidence >= ?
139
146
  `)
140
- .all();
147
+ .all(minConf);
141
148
  if (noTests) edges = edges.filter((e) => !isTestFile(e.source) && !isTestFile(e.target));
142
149
 
143
150
  for (const { source, target } of edges) {
@@ -155,8 +162,9 @@ export function exportMermaid(db, opts = {}) {
155
162
  JOIN nodes n2 ON e.target_id = n2.id
156
163
  WHERE n1.kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module') AND n2.kind IN ('function', 'method', 'class', 'interface', 'type', 'struct', 'enum', 'trait', 'record', 'module')
157
164
  AND e.kind = 'calls'
165
+ AND e.confidence >= ?
158
166
  `)
159
- .all();
167
+ .all(minConf);
160
168
  if (noTests)
161
169
  edges = edges.filter((e) => !isTestFile(e.source_file) && !isTestFile(e.target_file));
162
170
 
@@ -175,6 +183,7 @@ export function exportMermaid(db, opts = {}) {
175
183
  */
176
184
  export function exportJSON(db, opts = {}) {
177
185
  const noTests = opts.noTests || false;
186
+ const minConf = opts.minConfidence ?? DEFAULT_MIN_CONFIDENCE;
178
187
 
179
188
  let nodes = db
180
189
  .prepare(`
@@ -185,13 +194,13 @@ export function exportJSON(db, opts = {}) {
185
194
 
186
195
  let edges = db
187
196
  .prepare(`
188
- SELECT DISTINCT n1.file AS source, n2.file AS target, e.kind
197
+ SELECT DISTINCT n1.file AS source, n2.file AS target, e.kind, e.confidence
189
198
  FROM edges e
190
199
  JOIN nodes n1 ON e.source_id = n1.id
191
200
  JOIN nodes n2 ON e.target_id = n2.id
192
- WHERE n1.file != n2.file
201
+ WHERE n1.file != n2.file AND e.confidence >= ?
193
202
  `)
194
- .all();
203
+ .all(minConf);
195
204
  if (noTests) edges = edges.filter((e) => !isTestFile(e.source) && !isTestFile(e.target));
196
205
 
197
206
  return { nodes, edges };
package/src/index.js CHANGED
@@ -21,7 +21,9 @@ export {
21
21
  buildEmbeddings,
22
22
  cosineSim,
23
23
  DEFAULT_MODEL,
24
+ EMBEDDING_STRATEGIES,
24
25
  embed,
26
+ estimateTokens,
25
27
  MODELS,
26
28
  multiSearchData,
27
29
  search,
package/src/queries.js CHANGED
@@ -334,6 +334,7 @@ export function moduleMapData(customDbPath, limit = 20, opts = {}) {
334
334
  dir: path.dirname(n.file) || '.',
335
335
  inEdges: n.in_edges,
336
336
  outEdges: n.out_edges,
337
+ coupling: n.in_edges + n.out_edges,
337
338
  }));
338
339
 
339
340
  const totalNodes = db.prepare('SELECT COUNT(*) as c FROM nodes').get().c;
@@ -1263,10 +1264,10 @@ export function moduleMap(customDbPath, limit = 20, opts = {}) {
1263
1264
  for (const [dir, files] of [...dirs].sort()) {
1264
1265
  console.log(` [${dir}/]`);
1265
1266
  for (const f of files) {
1266
- const total = f.inEdges + f.outEdges;
1267
- const bar = '#'.repeat(Math.min(total, 40));
1267
+ const coupling = f.inEdges + f.outEdges;
1268
+ const bar = '#'.repeat(Math.min(coupling, 40));
1268
1269
  console.log(
1269
- ` ${path.basename(f.file).padEnd(35)} <-${String(f.inEdges).padStart(3)} ->${String(f.outEdges).padStart(3)} ${bar}`,
1270
+ ` ${path.basename(f.file).padEnd(35)} <-${String(f.inEdges).padStart(3)} ->${String(f.outEdges).padStart(3)} =${String(coupling).padStart(3)} ${bar}`,
1270
1271
  );
1271
1272
  }
1272
1273
  }
@@ -1920,6 +1921,7 @@ function explainFunctionImpl(db, target, noTests, getFileLines) {
1920
1921
  export function explainData(target, customDbPath, opts = {}) {
1921
1922
  const db = openReadonlyOrFail(customDbPath);
1922
1923
  const noTests = opts.noTests || false;
1924
+ const depth = opts.depth || 0;
1923
1925
  const kind = isFileLikeTarget(target) ? 'file' : 'function';
1924
1926
 
1925
1927
  const dbPath = findDbPath(customDbPath);
@@ -1949,6 +1951,37 @@ export function explainData(target, customDbPath, opts = {}) {
1949
1951
  ? explainFileImpl(db, target, getFileLines)
1950
1952
  : explainFunctionImpl(db, target, noTests, getFileLines);
1951
1953
 
1954
+ // Recursive dependency explanation for function targets
1955
+ if (kind === 'function' && depth > 0 && results.length > 0) {
1956
+ const visited = new Set(results.map((r) => `${r.name}:${r.file}:${r.line}`));
1957
+
1958
+ function explainCallees(parentResults, currentDepth) {
1959
+ if (currentDepth <= 0) return;
1960
+ for (const r of parentResults) {
1961
+ const newCallees = [];
1962
+ for (const callee of r.callees) {
1963
+ const key = `${callee.name}:${callee.file}:${callee.line}`;
1964
+ if (visited.has(key)) continue;
1965
+ visited.add(key);
1966
+ const calleeResults = explainFunctionImpl(db, callee.name, noTests, getFileLines);
1967
+ const exact = calleeResults.find(
1968
+ (cr) => cr.file === callee.file && cr.line === callee.line,
1969
+ );
1970
+ if (exact) {
1971
+ exact._depth = (r._depth || 0) + 1;
1972
+ newCallees.push(exact);
1973
+ }
1974
+ }
1975
+ if (newCallees.length > 0) {
1976
+ r.depDetails = newCallees;
1977
+ explainCallees(newCallees, currentDepth - 1);
1978
+ }
1979
+ }
1980
+ }
1981
+
1982
+ explainCallees(results, depth);
1983
+ }
1984
+
1952
1985
  db.close();
1953
1986
  return { target, kind, results };
1954
1987
  }
@@ -2008,46 +2041,63 @@ export function explain(target, customDbPath, opts = {}) {
2008
2041
  console.log();
2009
2042
  }
2010
2043
  } else {
2011
- for (const r of data.results) {
2044
+ function printFunctionExplain(r, indent = '') {
2012
2045
  const lineRange = r.endLine ? `${r.line}-${r.endLine}` : `${r.line}`;
2013
2046
  const lineInfo = r.lineCount ? `${r.lineCount} lines` : '';
2014
2047
  const summaryPart = r.summary ? ` | ${r.summary}` : '';
2015
- console.log(`\n# ${r.name} (${r.kind}) ${r.file}:${lineRange}`);
2048
+ const depthLevel = r._depth || 0;
2049
+ const heading = depthLevel === 0 ? '#' : '##'.padEnd(depthLevel + 2, '#');
2050
+ console.log(`\n${indent}${heading} ${r.name} (${r.kind}) ${r.file}:${lineRange}`);
2016
2051
  if (lineInfo || r.summary) {
2017
- console.log(` ${lineInfo}${summaryPart}`);
2052
+ console.log(`${indent} ${lineInfo}${summaryPart}`);
2018
2053
  }
2019
2054
  if (r.signature) {
2020
- if (r.signature.params != null) console.log(` Parameters: (${r.signature.params})`);
2021
- if (r.signature.returnType) console.log(` Returns: ${r.signature.returnType}`);
2055
+ if (r.signature.params != null)
2056
+ console.log(`${indent} Parameters: (${r.signature.params})`);
2057
+ if (r.signature.returnType) console.log(`${indent} Returns: ${r.signature.returnType}`);
2022
2058
  }
2023
2059
 
2024
2060
  if (r.callees.length > 0) {
2025
- console.log(`\n## Calls (${r.callees.length})`);
2061
+ console.log(`\n${indent} Calls (${r.callees.length}):`);
2026
2062
  for (const c of r.callees) {
2027
- console.log(` ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`);
2063
+ console.log(`${indent} ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`);
2028
2064
  }
2029
2065
  }
2030
2066
 
2031
2067
  if (r.callers.length > 0) {
2032
- console.log(`\n## Called by (${r.callers.length})`);
2068
+ console.log(`\n${indent} Called by (${r.callers.length}):`);
2033
2069
  for (const c of r.callers) {
2034
- console.log(` ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`);
2070
+ console.log(`${indent} ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`);
2035
2071
  }
2036
2072
  }
2037
2073
 
2038
2074
  if (r.relatedTests.length > 0) {
2039
2075
  const label = r.relatedTests.length === 1 ? 'file' : 'files';
2040
- console.log(`\n## Tests (${r.relatedTests.length} ${label})`);
2076
+ console.log(`\n${indent} Tests (${r.relatedTests.length} ${label}):`);
2041
2077
  for (const t of r.relatedTests) {
2042
- console.log(` ${t.file}`);
2078
+ console.log(`${indent} ${t.file}`);
2043
2079
  }
2044
2080
  }
2045
2081
 
2046
2082
  if (r.callees.length === 0 && r.callers.length === 0) {
2047
- console.log(` (no call edges found -- may be invoked dynamically or via re-exports)`);
2083
+ console.log(
2084
+ `${indent} (no call edges found -- may be invoked dynamically or via re-exports)`,
2085
+ );
2086
+ }
2087
+
2088
+ // Render recursive dependency details
2089
+ if (r.depDetails && r.depDetails.length > 0) {
2090
+ console.log(`\n${indent} --- Dependencies (depth ${depthLevel + 1}) ---`);
2091
+ for (const dep of r.depDetails) {
2092
+ printFunctionExplain(dep, `${indent} `);
2093
+ }
2048
2094
  }
2049
2095
  console.log();
2050
2096
  }
2097
+
2098
+ for (const r of data.results) {
2099
+ printFunctionExplain(r);
2100
+ }
2051
2101
  }
2052
2102
  }
2053
2103
 
package/src/structure.js CHANGED
@@ -231,7 +231,8 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director
231
231
  */
232
232
  export function structureData(customDbPath, opts = {}) {
233
233
  const db = openReadonlyOrFail(customDbPath);
234
- const filterDir = opts.directory || null;
234
+ const rawDir = opts.directory || null;
235
+ const filterDir = rawDir && normalizePath(rawDir) !== '.' ? rawDir : null;
235
236
  const maxDepth = opts.depth || null;
236
237
  const sortBy = opts.sort || 'files';
237
238
  const noTests = opts.noTests || false;