npm - @optave/codegraph - Versions diffs - 2.2.1 → 2.2.3-dev.44e8146 - Mend

@optave/codegraph 2.2.1 → 2.2.3-dev.44e8146

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/README.md +117 -126
package/package.json +5 -5
package/src/builder.js +96 -18
package/src/cli.js +85 -25
package/src/config.js +1 -0
package/src/embedder.js +196 -15
package/src/export.js +16 -7
package/src/extractors/javascript.js +6 -8
package/src/index.js +3 -0
package/src/mcp.js +21 -7
package/src/queries.js +222 -18
package/src/structure.js +2 -1
package/src/watcher.js +2 -1

package/src/cli.js CHANGED Viewed

@@ -5,9 +5,10 @@ import path from 'node:path';
 import Database from 'better-sqlite3';
 import { Command } from 'commander';
 import { buildGraph } from './builder.js';
+import { loadConfig } from './config.js';
 import { findCycles, formatCycles } from './cycles.js';
 import { findDbPath } from './db.js';
-import { buildEmbeddings, MODELS, search } from './embedder.js';
+import { buildEmbeddings, EMBEDDING_STRATEGIES, MODELS, search } from './embedder.js';
 import { exportDOT, exportJSON, exportMermaid } from './export.js';
 import { setVerbose } from './logger.js';
 import {
@@ -36,6 +37,8 @@ import { watchProject } from './watcher.js';
 const __cliDir = path.dirname(new URL(import.meta.url).pathname.replace(/^\/([A-Z]:)/i, '$1'));
 const pkg = JSON.parse(fs.readFileSync(path.join(__cliDir, '..', 'package.json'), 'utf-8'));
+const config = loadConfig(process.cwd());
 const program = new Command();
 program
   .name('codegraph')
@@ -48,6 +51,18 @@ program
     if (opts.verbose) setVerbose(true);
   });
+/**
+ * Resolve the effective noTests value: CLI flag > config > false.
+ * Commander sets opts.tests to false when --no-tests is passed.
+ * When --include-tests is passed, always return false (include tests).
+ * Otherwise, fall back to config.query.excludeTests.
+ */
+function resolveNoTests(opts) {
+  if (opts.includeTests) return false;
+  if (opts.tests === false) return true;
+  return config.query?.excludeTests || false;
+}
 program
   .command('build [dir]')
   .description('Parse repo and build graph in .codegraph/graph.db')
@@ -63,9 +78,10 @@ program
   .description('Find a function/class, show callers and callees')
   .option('-d, --db <path>', 'Path to graph.db')
   .option('-T, --no-tests', 'Exclude test/spec files from results')
+  .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
   .option('-j, --json', 'Output as JSON')
   .action((name, opts) => {
-    queryName(name, opts.db, { noTests: !opts.tests, json: opts.json });
+    queryName(name, opts.db, { noTests: resolveNoTests(opts), json: opts.json });
   });
 program
@@ -73,9 +89,10 @@ program
   .description('Show what depends on this file (transitive)')
   .option('-d, --db <path>', 'Path to graph.db')
   .option('-T, --no-tests', 'Exclude test/spec files from results')
+  .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
   .option('-j, --json', 'Output as JSON')
   .action((file, opts) => {
-    impactAnalysis(file, opts.db, { noTests: !opts.tests, json: opts.json });
+    impactAnalysis(file, opts.db, { noTests: resolveNoTests(opts), json: opts.json });
   });
 program
@@ -84,9 +101,13 @@ program
   .option('-d, --db <path>', 'Path to graph.db')
   .option('-n, --limit <number>', 'Number of top nodes', '20')
   .option('-T, --no-tests', 'Exclude test/spec files from results')
+  .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
   .option('-j, --json', 'Output as JSON')
   .action((opts) => {
-    moduleMap(opts.db, parseInt(opts.limit, 10), { noTests: !opts.tests, json: opts.json });
+    moduleMap(opts.db, parseInt(opts.limit, 10), {
+      noTests: resolveNoTests(opts),
+      json: opts.json,
+    });
   });
 program
@@ -94,9 +115,10 @@ program
   .description('Show graph health overview: nodes, edges, languages, cycles, hotspots, embeddings')
   .option('-d, --db <path>', 'Path to graph.db')
   .option('-T, --no-tests', 'Exclude test/spec files from results')
+  .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
   .option('-j, --json', 'Output as JSON')
   .action((opts) => {
-    stats(opts.db, { noTests: !opts.tests, json: opts.json });
+    stats(opts.db, { noTests: resolveNoTests(opts), json: opts.json });
   });
 program
@@ -104,9 +126,10 @@ program
   .description('Show what this file imports and what imports it')
   .option('-d, --db <path>', 'Path to graph.db')
   .option('-T, --no-tests', 'Exclude test/spec files from results')
+  .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
   .option('-j, --json', 'Output as JSON')
   .action((file, opts) => {
-    fileDeps(file, opts.db, { noTests: !opts.tests, json: opts.json });
+    fileDeps(file, opts.db, { noTests: resolveNoTests(opts), json: opts.json });
   });
 program
@@ -117,6 +140,7 @@ program
   .option('-f, --file <path>', 'Scope search to functions in this file (partial match)')
   .option('-k, --kind <kind>', 'Filter to a specific symbol kind')
   .option('-T, --no-tests', 'Exclude test/spec files from results')
+  .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
   .option('-j, --json', 'Output as JSON')
   .action((name, opts) => {
     if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) {
@@ -127,7 +151,7 @@ program
       depth: parseInt(opts.depth, 10),
       file: opts.file,
       kind: opts.kind,
-      noTests: !opts.tests,
+      noTests: resolveNoTests(opts),
       json: opts.json,
     });
   });
@@ -140,6 +164,7 @@ program
   .option('-f, --file <path>', 'Scope search to functions in this file (partial match)')
   .option('-k, --kind <kind>', 'Filter to a specific symbol kind')
   .option('-T, --no-tests', 'Exclude test/spec files from results')
+  .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
   .option('-j, --json', 'Output as JSON')
   .action((name, opts) => {
     if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) {
@@ -150,7 +175,7 @@ program
       depth: parseInt(opts.depth, 10),
       file: opts.file,
       kind: opts.kind,
-      noTests: !opts.tests,
+      noTests: resolveNoTests(opts),
       json: opts.json,
     });
   });
@@ -163,8 +188,9 @@ program
   .option('-f, --file <path>', 'Scope search to functions in this file (partial match)')
   .option('-k, --kind <kind>', 'Filter to a specific symbol kind')
   .option('--no-source', 'Metadata only (skip source extraction)')
-  .option('--include-tests', 'Include test source code')
+  .option('--with-test-source', 'Include test source code')
   .option('-T, --no-tests', 'Exclude test/spec files from results')
+  .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
   .option('-j, --json', 'Output as JSON')
   .action((name, opts) => {
     if (opts.kind && !ALL_SYMBOL_KINDS.includes(opts.kind)) {
@@ -176,8 +202,8 @@ program
       file: opts.file,
       kind: opts.kind,
       noSource: !opts.source,
-      noTests: !opts.tests,
-      includeTests: opts.includeTests,
+      noTests: resolveNoTests(opts),
+      includeTests: opts.withTestSource,
       json: opts.json,
     });
   });
@@ -186,10 +212,16 @@ program
   .command('explain <target>')
   .description('Structural summary of a file or function (no LLM needed)')
   .option('-d, --db <path>', 'Path to graph.db')
+  .option('--depth <n>', 'Recursively explain dependencies up to N levels deep', '0')
   .option('-T, --no-tests', 'Exclude test/spec files from results')
+  .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
   .option('-j, --json', 'Output as JSON')
   .action((target, opts) => {
-    explain(target, opts.db, { noTests: !opts.tests, json: opts.json });
+    explain(target, opts.db, {
+      depth: parseInt(opts.depth, 10),
+      noTests: resolveNoTests(opts),
+      json: opts.json,
+    });
   });
 program
@@ -198,6 +230,7 @@ program
   .option('-d, --db <path>', 'Path to graph.db')
   .option('-f, --file <path>', 'File overview: list symbols, imports, exports')
   .option('-T, --no-tests', 'Exclude test/spec files from results')
+  .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
   .option('-j, --json', 'Output as JSON')
   .action((name, opts) => {
     if (!name && !opts.file) {
@@ -205,7 +238,7 @@ program
       process.exit(1);
     }
     const target = opts.file || name;
-    where(target, opts.db, { file: !!opts.file, noTests: !opts.tests, json: opts.json });
+    where(target, opts.db, { file: !!opts.file, noTests: resolveNoTests(opts), json: opts.json });
   });
 program
@@ -215,14 +248,17 @@ program
   .option('--staged', 'Analyze staged changes instead of unstaged')
   .option('--depth <n>', 'Max transitive caller depth', '3')
   .option('-T, --no-tests', 'Exclude test/spec files from results')
+  .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
   .option('-j, --json', 'Output as JSON')
+  .option('-f, --format <format>', 'Output format: text, mermaid, json', 'text')
   .action((ref, opts) => {
     diffImpact(opts.db, {
       ref,
       staged: opts.staged,
       depth: parseInt(opts.depth, 10),
-      noTests: !opts.tests,
+      noTests: resolveNoTests(opts),
       json: opts.json,
+      format: opts.format,
     });
   });
@@ -235,10 +271,16 @@ program
   .option('-f, --format <format>', 'Output format: dot, mermaid, json', 'dot')
   .option('--functions', 'Function-level graph instead of file-level')
   .option('-T, --no-tests', 'Exclude test/spec files')
+  .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
+  .option('--min-confidence <score>', 'Minimum edge confidence threshold (default: 0.5)', '0.5')
   .option('-o, --output <file>', 'Write to file instead of stdout')
   .action((opts) => {
     const db = new Database(findDbPath(opts.db), { readonly: true });
-    const exportOpts = { fileLevel: !opts.functions, noTests: !opts.tests };
+    const exportOpts = {
+      fileLevel: !opts.functions,
+      noTests: resolveNoTests(opts),
+      minConfidence: parseFloat(opts.minConfidence),
+    };
     let output;
     switch (opts.format) {
@@ -269,10 +311,11 @@ program
   .option('-d, --db <path>', 'Path to graph.db')
   .option('--functions', 'Function-level cycle detection')
   .option('-T, --no-tests', 'Exclude test/spec files')
+  .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
   .option('-j, --json', 'Output as JSON')
   .action((opts) => {
     const db = new Database(findDbPath(opts.db), { readonly: true });
-    const cycles = findCycles(db, { fileLevel: !opts.functions, noTests: !opts.tests });
+    const cycles = findCycles(db, { fileLevel: !opts.functions, noTests: resolveNoTests(opts) });
     db.close();
     if (opts.json) {
@@ -374,10 +417,13 @@ program
   .action(() => {
     console.log('\nAvailable embedding models:\n');
     for (const [key, config] of Object.entries(MODELS)) {
-      const def = key === 'nomic-v1.5' ? ' (default)' : '';
-      console.log(`  ${key.padEnd(12)} ${String(config.dim).padStart(4)}d  ${config.desc}${def}`);
+      const def = key === 'minilm' ? ' (default)' : '';
+      const ctx = config.contextWindow ? `${config.contextWindow} ctx` : '';
+      console.log(
+        `  ${key.padEnd(12)} ${String(config.dim).padStart(4)}d  ${ctx.padEnd(9)} ${config.desc}${def}`,
+      );
     }
-    console.log('\nUsage: codegraph embed --model <name>');
+    console.log('\nUsage: codegraph embed --model <name> --strategy <structured|source>');
     console.log('       codegraph search "query" --model <name>\n');
   });
@@ -388,12 +434,23 @@ program
   )
   .option(
     '-m, --model <name>',
-    'Embedding model: minilm, jina-small, jina-base, jina-code, nomic, nomic-v1.5 (default), bge-large. Run `codegraph models` for details',
-    'nomic-v1.5',
+    'Embedding model: minilm (default), jina-small, jina-base, jina-code, nomic, nomic-v1.5, bge-large. Run `codegraph models` for details',
+    'minilm',
+  )
+  .option(
+    '-s, --strategy <name>',
+    `Embedding strategy: ${EMBEDDING_STRATEGIES.join(', ')}. "structured" uses graph context (callers/callees), "source" embeds raw code`,
+    'structured',
   )
   .action(async (dir, opts) => {
+    if (!EMBEDDING_STRATEGIES.includes(opts.strategy)) {
+      console.error(
+        `Unknown strategy: ${opts.strategy}. Available: ${EMBEDDING_STRATEGIES.join(', ')}`,
+      );
+      process.exit(1);
+    }
     const root = path.resolve(dir || '.');
-    await buildEmbeddings(root, opts.model);
+    await buildEmbeddings(root, opts.model, undefined, { strategy: opts.strategy });
   });
 program
@@ -403,6 +460,7 @@ program
   .option('-m, --model <name>', 'Override embedding model (auto-detects from DB)')
   .option('-n, --limit <number>', 'Max results', '15')
   .option('-T, --no-tests', 'Exclude test/spec files from results')
+  .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
   .option('--min-score <score>', 'Minimum similarity threshold', '0.2')
   .option('-k, --kind <kind>', 'Filter by kind: function, method, class')
   .option('--file <pattern>', 'Filter by file path pattern')
@@ -410,7 +468,7 @@ program
   .action(async (query, opts) => {
     await search(query, opts.db, {
       limit: parseInt(opts.limit, 10),
-      noTests: !opts.tests,
+      noTests: resolveNoTests(opts),
       minScore: parseFloat(opts.minScore),
       model: opts.model,
       kind: opts.kind,
@@ -428,6 +486,7 @@ program
   .option('--depth <n>', 'Max directory depth')
   .option('--sort <metric>', 'Sort by: cohesion | fan-in | fan-out | density | files', 'files')
   .option('-T, --no-tests', 'Exclude test/spec files')
+  .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
   .option('-j, --json', 'Output as JSON')
   .action(async (dir, opts) => {
     const { structureData, formatStructure } = await import('./structure.js');
@@ -435,7 +494,7 @@ program
       directory: dir,
       depth: opts.depth ? parseInt(opts.depth, 10) : undefined,
       sort: opts.sort,
-      noTests: !opts.tests,
+      noTests: resolveNoTests(opts),
     });
     if (opts.json) {
       console.log(JSON.stringify(data, null, 2));
@@ -454,6 +513,7 @@ program
   .option('--metric <metric>', 'fan-in | fan-out | density | coupling', 'fan-in')
   .option('--level <level>', 'file | directory', 'file')
   .option('-T, --no-tests', 'Exclude test/spec files from results')
+  .option('--include-tests', 'Include test/spec files (overrides excludeTests config)')
   .option('-j, --json', 'Output as JSON')
   .action(async (opts) => {
     const { hotspotsData, formatHotspots } = await import('./structure.js');
@@ -461,7 +521,7 @@ program
       metric: opts.metric,
       level: opts.level,
       limit: parseInt(opts.limit, 10),
-      noTests: !opts.tests,
+      noTests: resolveNoTests(opts),
     });
     if (opts.json) {
       console.log(JSON.stringify(data, null, 2));

package/src/config.js CHANGED Viewed

@@ -18,6 +18,7 @@ export const DEFAULTS = {
   query: {
     defaultDepth: 3,
     defaultLimit: 20,
+    excludeTests: false,
   },
   embeddings: { model: 'nomic-v1.5', llmProvider: null },
   llm: { provider: null, model: null, baseUrl: null, apiKey: null, apiKeyCommand: null },

package/src/embedder.js CHANGED Viewed

@@ -4,6 +4,18 @@ import Database from 'better-sqlite3';
 import { findDbPath, openReadonlyOrFail } from './db.js';
 import { warn } from './logger.js';
+/**
+ * Split an identifier into readable words.
+ * camelCase/PascalCase → "camel Case", snake_case → "snake case", kebab-case → "kebab case"
+ */
+function splitIdentifier(name) {
+  return name
+    .replace(/([a-z])([A-Z])/g, '$1 $2')
+    .replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
+    .replace(/[_-]+/g, ' ')
+    .trim();
+}
 // Lazy-load transformers (heavy, optional module)
 let pipeline = null;
 let _cos_sim = null;
@@ -14,48 +26,57 @@ export const MODELS = {
   minilm: {
     name: 'Xenova/all-MiniLM-L6-v2',
     dim: 384,
+    contextWindow: 256,
     desc: 'Smallest, fastest (~23MB). General text.',
     quantized: true,
   },
   'jina-small': {
     name: 'Xenova/jina-embeddings-v2-small-en',
     dim: 512,
+    contextWindow: 8192,
     desc: 'Small, good quality (~33MB). General text.',
     quantized: false,
   },
   'jina-base': {
     name: 'Xenova/jina-embeddings-v2-base-en',
     dim: 768,
+    contextWindow: 8192,
     desc: 'Good quality (~137MB). General text, 8192 token context.',
     quantized: false,
   },
   'jina-code': {
     name: 'Xenova/jina-embeddings-v2-base-code',
     dim: 768,
+    contextWindow: 8192,
     desc: 'Code-aware (~137MB). Trained on code+text, best for code search.',
     quantized: false,
   },
   nomic: {
     name: 'Xenova/nomic-embed-text-v1',
     dim: 768,
+    contextWindow: 8192,
     desc: 'Good local quality (~137MB). 8192 context.',
     quantized: false,
   },
   'nomic-v1.5': {
     name: 'nomic-ai/nomic-embed-text-v1.5',
     dim: 768,
+    contextWindow: 8192,
     desc: 'Improved nomic (~137MB). Matryoshka dimensions, 8192 context.',
     quantized: false,
   },
   'bge-large': {
     name: 'Xenova/bge-large-en-v1.5',
     dim: 1024,
+    contextWindow: 512,
     desc: 'Best general retrieval (~335MB). Top MTEB scores.',
     quantized: false,
   },
 };
-export const DEFAULT_MODEL = 'nomic-v1.5';
+export const EMBEDDING_STRATEGIES = ['structured', 'source'];
+export const DEFAULT_MODEL = 'minilm';
 const BATCH_SIZE_MAP = {
   minilm: 32,
   'jina-small': 16,
@@ -77,6 +98,108 @@ function getModelConfig(modelKey) {
   return config;
 }
+/**
+ * Rough token estimate (~4 chars per token for code/English).
+ * Conservative — avoids adding a tokenizer dependency.
+ */
+export function estimateTokens(text) {
+  return Math.ceil(text.length / 4);
+}
+/**
+ * Extract leading comment text (JSDoc, //, #, etc.) above a function line.
+ * Returns the cleaned comment text or null if none found.
+ */
+function extractLeadingComment(lines, fnLineIndex) {
+  const raw = [];
+  for (let i = fnLineIndex - 1; i >= Math.max(0, fnLineIndex - 15); i--) {
+    const trimmed = lines[i].trim();
+    if (/^(\/\/|\/\*|\*\/|\*|#|\/\/\/)/.test(trimmed)) {
+      raw.unshift(trimmed);
+    } else if (trimmed === '') {
+      if (raw.length > 0) break;
+    } else {
+      break;
+    }
+  }
+  if (raw.length === 0) return null;
+  return raw
+    .map((line) =>
+      line
+        .replace(/^\/\*\*?\s?|\*\/$/g, '') // opening /** or /* and closing */
+        .replace(/^\*\s?/, '') // middle * lines
+        .replace(/^\/\/\/?\s?/, '') // // or ///
+        .replace(/^#\s?/, '') // # (Python/Ruby)
+        .trim(),
+    )
+    .filter((l) => l.length > 0)
+    .join(' ');
+}
+/**
+ * Build graph-enriched text for a symbol using dependency context.
+ * Produces compact, semantic text (~100 tokens) instead of full source code.
+ */
+function buildStructuredText(node, file, lines, calleesStmt, callersStmt) {
+  const readable = splitIdentifier(node.name);
+  const parts = [`${node.kind} ${node.name} (${readable}) in ${file}`];
+  const startLine = Math.max(0, node.line - 1);
+  // Extract parameters from signature (best-effort, single-line)
+  const sigLine = lines[startLine] || '';
+  const paramMatch = sigLine.match(/\(([^)]*)\)/);
+  if (paramMatch?.[1]?.trim()) {
+    parts.push(`Parameters: ${paramMatch[1].trim()}`);
+  }
+  // Graph context: callees (capped at 10)
+  const callees = calleesStmt.all(node.id);
+  if (callees.length > 0) {
+    parts.push(
+      `Calls: ${callees
+        .slice(0, 10)
+        .map((c) => c.name)
+        .join(', ')}`,
+    );
+  }
+  // Graph context: callers (capped at 10)
+  const callers = callersStmt.all(node.id);
+  if (callers.length > 0) {
+    parts.push(
+      `Called by: ${callers
+        .slice(0, 10)
+        .map((c) => c.name)
+        .join(', ')}`,
+    );
+  }
+  // Leading comment (high semantic value) or first few lines of code
+  const comment = extractLeadingComment(lines, startLine);
+  if (comment) {
+    parts.push(comment);
+  } else {
+    const endLine = Math.min(lines.length, startLine + 4);
+    const snippet = lines.slice(startLine, endLine).join('\n').trim();
+    if (snippet) parts.push(snippet);
+  }
+  return parts.join('\n');
+}
+/**
+ * Build raw source-code text for a symbol (original strategy).
+ */
+function buildSourceText(node, file, lines) {
+  const startLine = Math.max(0, node.line - 1);
+  const endLine = node.end_line
+    ? Math.min(lines.length, node.end_line)
+    : Math.min(lines.length, startLine + 15);
+  const context = lines.slice(startLine, endLine).join('\n');
+  const readable = splitIdentifier(node.name);
+  return `${node.kind} ${node.name} (${readable}) in ${file}\n${context}`;
+}
 /**
  * Lazy-load @huggingface/transformers.
  * This is an optional dependency — gives a clear error if not installed.
@@ -103,8 +226,27 @@ async function loadModel(modelKey) {
   _cos_sim = transformers.cos_sim;
   console.log(`Loading embedding model: ${config.name} (${config.dim}d)...`);
-  const opts = config.quantized ? { quantized: true } : {};
-  extractor = await pipeline('feature-extraction', config.name, opts);
+  const pipelineOpts = config.quantized ? { quantized: true } : {};
+  try {
+    extractor = await pipeline('feature-extraction', config.name, pipelineOpts);
+  } catch (err) {
+    const msg = err.message || String(err);
+    if (msg.includes('Unauthorized') || msg.includes('401') || msg.includes('gated')) {
+      console.error(
+        `\nModel "${config.name}" requires authentication.\n` +
+          `This model is gated on HuggingFace and needs an access token.\n\n` +
+          `Options:\n` +
+          `  1. Set HF_TOKEN env var: export HF_TOKEN=hf_...\n` +
+          `  2. Use a public model instead: codegraph embed --model minilm\n`,
+      );
+    } else {
+      console.error(
+        `\nFailed to load model "${config.name}": ${msg}\n` +
+          `Try a different model: codegraph embed --model minilm\n`,
+      );
+    }
+    process.exit(1);
+  }
   activeModel = config.name;
   console.log('Model loaded.');
   return { extractor, config };
@@ -172,10 +314,14 @@ function initEmbeddingsSchema(db) {
 /**
  * Build embeddings for all functions/methods/classes in the graph.
+ * @param {string} rootDir - Project root directory
+ * @param {string} modelKey - Model identifier from MODELS registry
+ * @param {string} [customDbPath] - Override path to graph.db
+ * @param {object} [options] - Embedding options
+ * @param {string} [options.strategy='structured'] - 'structured' (graph-enriched) or 'source' (raw code)
  */
-export async function buildEmbeddings(rootDir, modelKey, customDbPath) {
-  // path already imported at top
-  // fs already imported at top
+export async function buildEmbeddings(rootDir, modelKey, customDbPath, options = {}) {
+  const strategy = options.strategy || 'structured';
   const dbPath = customDbPath || findDbPath(null);
   const db = new Database(dbPath);
@@ -190,7 +336,24 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath) {
     )
     .all();
-  console.log(`Building embeddings for ${nodes.length} symbols...`);
+  console.log(`Building embeddings for ${nodes.length} symbols (strategy: ${strategy})...`);
+  // Prepare graph-context queries for structured strategy
+  let calleesStmt, callersStmt;
+  if (strategy === 'structured') {
+    calleesStmt = db.prepare(`
+      SELECT DISTINCT n.name FROM edges e
+      JOIN nodes n ON e.target_id = n.id
+      WHERE e.source_id = ? AND e.kind = 'calls'
+      ORDER BY n.name
+    `);
+    callersStmt = db.prepare(`
+      SELECT DISTINCT n.name FROM edges e
+      JOIN nodes n ON e.source_id = n.id
+      WHERE e.target_id = ? AND e.kind = 'calls'
+      ORDER BY n.name
+    `);
+  }
   const byFile = new Map();
   for (const node of nodes) {
@@ -201,6 +364,9 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath) {
   const texts = [];
   const nodeIds = [];
   const previews = [];
+  const config = getModelConfig(modelKey);
+  const contextWindow = config.contextWindow;
+  let overflowCount = 0;
   for (const [file, fileNodes] of byFile) {
     const fullPath = path.join(rootDir, file);
@@ -213,19 +379,31 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath) {
     }
     for (const node of fileNodes) {
-      const startLine = Math.max(0, node.line - 1);
-      const endLine = node.end_line
-        ? Math.min(lines.length, node.end_line)
-        : Math.min(lines.length, startLine + 15);
-      const context = lines.slice(startLine, endLine).join('\n');
+      let text =
+        strategy === 'structured'
+          ? buildStructuredText(node, file, lines, calleesStmt, callersStmt)
+          : buildSourceText(node, file, lines);
+      // Detect and handle context window overflow
+      const tokens = estimateTokens(text);
+      if (tokens > contextWindow) {
+        overflowCount++;
+        const maxChars = contextWindow * 4;
+        text = text.slice(0, maxChars);
+      }
-      const text = `${node.kind} ${node.name} in ${file}\n${context}`;
       texts.push(text);
       nodeIds.push(node.id);
       previews.push(`${node.name} (${node.kind}) -- ${file}:${node.line}`);
     }
   }
+  if (overflowCount > 0) {
+    warn(
+      `${overflowCount} symbol(s) exceeded model context window (${contextWindow} tokens) and were truncated`,
+    );
+  }
   console.log(`Embedding ${texts.length} symbols...`);
   const { vectors, dim } = await embed(texts, modelKey);
@@ -237,16 +415,19 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath) {
     for (let i = 0; i < vectors.length; i++) {
       insert.run(nodeIds[i], Buffer.from(vectors[i].buffer), previews[i]);
     }
-    const config = getModelConfig(modelKey);
     insertMeta.run('model', config.name);
     insertMeta.run('dim', String(dim));
     insertMeta.run('count', String(vectors.length));
+    insertMeta.run('strategy', strategy);
     insertMeta.run('built_at', new Date().toISOString());
+    if (overflowCount > 0) {
+      insertMeta.run('truncated_count', String(overflowCount));
+    }
   });
   insertAll();
   console.log(
-    `\nStored ${vectors.length} embeddings (${dim}d, ${getModelConfig(modelKey).name}) in graph.db`,
+    `\nStored ${vectors.length} embeddings (${dim}d, ${config.name}, strategy: ${strategy}) in graph.db`,
   );
   db.close();
 }