@optave/codegraph 3.1.2 → 3.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. package/README.md +19 -21
  2. package/package.json +10 -7
  3. package/src/analysis/context.js +408 -0
  4. package/src/analysis/dependencies.js +341 -0
  5. package/src/analysis/exports.js +130 -0
  6. package/src/analysis/impact.js +463 -0
  7. package/src/analysis/module-map.js +322 -0
  8. package/src/analysis/roles.js +45 -0
  9. package/src/analysis/symbol-lookup.js +232 -0
  10. package/src/ast-analysis/shared.js +5 -4
  11. package/src/batch.js +2 -1
  12. package/src/builder/context.js +85 -0
  13. package/src/builder/helpers.js +218 -0
  14. package/src/builder/incremental.js +178 -0
  15. package/src/builder/pipeline.js +130 -0
  16. package/src/builder/stages/build-edges.js +297 -0
  17. package/src/builder/stages/build-structure.js +113 -0
  18. package/src/builder/stages/collect-files.js +44 -0
  19. package/src/builder/stages/detect-changes.js +413 -0
  20. package/src/builder/stages/finalize.js +139 -0
  21. package/src/builder/stages/insert-nodes.js +195 -0
  22. package/src/builder/stages/parse-files.js +28 -0
  23. package/src/builder/stages/resolve-imports.js +143 -0
  24. package/src/builder/stages/run-analyses.js +44 -0
  25. package/src/builder.js +10 -1472
  26. package/src/cfg.js +1 -2
  27. package/src/cli/commands/ast.js +26 -0
  28. package/src/cli/commands/audit.js +46 -0
  29. package/src/cli/commands/batch.js +68 -0
  30. package/src/cli/commands/branch-compare.js +21 -0
  31. package/src/cli/commands/build.js +26 -0
  32. package/src/cli/commands/cfg.js +30 -0
  33. package/src/cli/commands/check.js +79 -0
  34. package/src/cli/commands/children.js +31 -0
  35. package/src/cli/commands/co-change.js +65 -0
  36. package/src/cli/commands/communities.js +23 -0
  37. package/src/cli/commands/complexity.js +45 -0
  38. package/src/cli/commands/context.js +34 -0
  39. package/src/cli/commands/cycles.js +28 -0
  40. package/src/cli/commands/dataflow.js +32 -0
  41. package/src/cli/commands/deps.js +16 -0
  42. package/src/cli/commands/diff-impact.js +30 -0
  43. package/src/cli/commands/embed.js +30 -0
  44. package/src/cli/commands/export.js +75 -0
  45. package/src/cli/commands/exports.js +18 -0
  46. package/src/cli/commands/flow.js +36 -0
  47. package/src/cli/commands/fn-impact.js +30 -0
  48. package/src/cli/commands/impact.js +16 -0
  49. package/src/cli/commands/info.js +76 -0
  50. package/src/cli/commands/map.js +19 -0
  51. package/src/cli/commands/mcp.js +18 -0
  52. package/src/cli/commands/models.js +19 -0
  53. package/src/cli/commands/owners.js +25 -0
  54. package/src/cli/commands/path.js +36 -0
  55. package/src/cli/commands/plot.js +80 -0
  56. package/src/cli/commands/query.js +49 -0
  57. package/src/cli/commands/registry.js +100 -0
  58. package/src/cli/commands/roles.js +34 -0
  59. package/src/cli/commands/search.js +42 -0
  60. package/src/cli/commands/sequence.js +32 -0
  61. package/src/cli/commands/snapshot.js +61 -0
  62. package/src/cli/commands/stats.js +15 -0
  63. package/src/cli/commands/structure.js +32 -0
  64. package/src/cli/commands/triage.js +78 -0
  65. package/src/cli/commands/watch.js +12 -0
  66. package/src/cli/commands/where.js +24 -0
  67. package/src/cli/index.js +118 -0
  68. package/src/cli/shared/options.js +39 -0
  69. package/src/cli/shared/output.js +1 -0
  70. package/src/cli.js +11 -1514
  71. package/src/commands/check.js +5 -5
  72. package/src/commands/manifesto.js +3 -3
  73. package/src/commands/structure.js +1 -1
  74. package/src/communities.js +15 -87
  75. package/src/complexity.js +1 -1
  76. package/src/cycles.js +30 -85
  77. package/src/dataflow.js +1 -2
  78. package/src/db/connection.js +4 -4
  79. package/src/db/migrations.js +41 -0
  80. package/src/db/query-builder.js +6 -5
  81. package/src/db/repository/base.js +201 -0
  82. package/src/db/repository/cached-stmt.js +19 -0
  83. package/src/db/repository/cfg.js +27 -38
  84. package/src/db/repository/cochange.js +16 -3
  85. package/src/db/repository/complexity.js +11 -6
  86. package/src/db/repository/dataflow.js +6 -1
  87. package/src/db/repository/edges.js +120 -98
  88. package/src/db/repository/embeddings.js +14 -3
  89. package/src/db/repository/graph-read.js +32 -9
  90. package/src/db/repository/in-memory-repository.js +584 -0
  91. package/src/db/repository/index.js +6 -1
  92. package/src/db/repository/nodes.js +110 -40
  93. package/src/db/repository/sqlite-repository.js +219 -0
  94. package/src/db.js +5 -0
  95. package/src/embeddings/generator.js +163 -0
  96. package/src/embeddings/index.js +13 -0
  97. package/src/embeddings/models.js +218 -0
  98. package/src/embeddings/search/cli-formatter.js +151 -0
  99. package/src/embeddings/search/filters.js +46 -0
  100. package/src/embeddings/search/hybrid.js +121 -0
  101. package/src/embeddings/search/keyword.js +68 -0
  102. package/src/embeddings/search/prepare.js +66 -0
  103. package/src/embeddings/search/semantic.js +145 -0
  104. package/src/embeddings/stores/fts5.js +27 -0
  105. package/src/embeddings/stores/sqlite-blob.js +24 -0
  106. package/src/embeddings/strategies/source.js +14 -0
  107. package/src/embeddings/strategies/structured.js +43 -0
  108. package/src/embeddings/strategies/text-utils.js +43 -0
  109. package/src/errors.js +78 -0
  110. package/src/export.js +217 -520
  111. package/src/extractors/csharp.js +10 -2
  112. package/src/extractors/go.js +3 -1
  113. package/src/extractors/helpers.js +71 -0
  114. package/src/extractors/java.js +9 -2
  115. package/src/extractors/javascript.js +38 -1
  116. package/src/extractors/php.js +3 -1
  117. package/src/extractors/python.js +14 -3
  118. package/src/extractors/rust.js +3 -1
  119. package/src/graph/algorithms/bfs.js +49 -0
  120. package/src/graph/algorithms/centrality.js +16 -0
  121. package/src/graph/algorithms/index.js +5 -0
  122. package/src/graph/algorithms/louvain.js +26 -0
  123. package/src/graph/algorithms/shortest-path.js +41 -0
  124. package/src/graph/algorithms/tarjan.js +49 -0
  125. package/src/graph/builders/dependency.js +91 -0
  126. package/src/graph/builders/index.js +3 -0
  127. package/src/graph/builders/structure.js +40 -0
  128. package/src/graph/builders/temporal.js +33 -0
  129. package/src/graph/classifiers/index.js +2 -0
  130. package/src/graph/classifiers/risk.js +85 -0
  131. package/src/graph/classifiers/roles.js +64 -0
  132. package/src/graph/index.js +13 -0
  133. package/src/graph/model.js +230 -0
  134. package/src/index.js +33 -204
  135. package/src/infrastructure/result-formatter.js +2 -21
  136. package/src/mcp/index.js +2 -0
  137. package/src/mcp/middleware.js +26 -0
  138. package/src/mcp/server.js +128 -0
  139. package/src/mcp/tool-registry.js +801 -0
  140. package/src/mcp/tools/ast-query.js +14 -0
  141. package/src/mcp/tools/audit.js +21 -0
  142. package/src/mcp/tools/batch-query.js +11 -0
  143. package/src/mcp/tools/branch-compare.js +10 -0
  144. package/src/mcp/tools/cfg.js +21 -0
  145. package/src/mcp/tools/check.js +43 -0
  146. package/src/mcp/tools/co-changes.js +20 -0
  147. package/src/mcp/tools/code-owners.js +12 -0
  148. package/src/mcp/tools/communities.js +15 -0
  149. package/src/mcp/tools/complexity.js +18 -0
  150. package/src/mcp/tools/context.js +17 -0
  151. package/src/mcp/tools/dataflow.js +26 -0
  152. package/src/mcp/tools/diff-impact.js +24 -0
  153. package/src/mcp/tools/execution-flow.js +26 -0
  154. package/src/mcp/tools/export-graph.js +57 -0
  155. package/src/mcp/tools/file-deps.js +12 -0
  156. package/src/mcp/tools/file-exports.js +13 -0
  157. package/src/mcp/tools/find-cycles.js +15 -0
  158. package/src/mcp/tools/fn-impact.js +15 -0
  159. package/src/mcp/tools/impact-analysis.js +12 -0
  160. package/src/mcp/tools/index.js +71 -0
  161. package/src/mcp/tools/list-functions.js +14 -0
  162. package/src/mcp/tools/list-repos.js +11 -0
  163. package/src/mcp/tools/module-map.js +6 -0
  164. package/src/mcp/tools/node-roles.js +14 -0
  165. package/src/mcp/tools/path.js +12 -0
  166. package/src/mcp/tools/query.js +30 -0
  167. package/src/mcp/tools/semantic-search.js +65 -0
  168. package/src/mcp/tools/sequence.js +17 -0
  169. package/src/mcp/tools/structure.js +15 -0
  170. package/src/mcp/tools/symbol-children.js +14 -0
  171. package/src/mcp/tools/triage.js +35 -0
  172. package/src/mcp/tools/where.js +13 -0
  173. package/src/mcp.js +2 -1470
  174. package/src/native.js +34 -10
  175. package/src/parser.js +53 -2
  176. package/src/presentation/colors.js +44 -0
  177. package/src/presentation/export.js +444 -0
  178. package/src/presentation/result-formatter.js +21 -0
  179. package/src/presentation/sequence-renderer.js +43 -0
  180. package/src/presentation/table.js +47 -0
  181. package/src/presentation/viewer.js +634 -0
  182. package/src/queries.js +35 -2276
  183. package/src/resolve.js +1 -1
  184. package/src/sequence.js +2 -38
  185. package/src/shared/file-utils.js +153 -0
  186. package/src/shared/generators.js +125 -0
  187. package/src/shared/hierarchy.js +27 -0
  188. package/src/shared/normalize.js +59 -0
  189. package/src/snapshot.js +6 -5
  190. package/src/structure.js +15 -40
  191. package/src/triage.js +20 -72
  192. package/src/viewer.js +35 -656
  193. package/src/watcher.js +8 -148
  194. package/src/embedder.js +0 -1097
package/src/embedder.js DELETED
@@ -1,1097 +0,0 @@
1
- import { execFileSync } from 'node:child_process';
2
- import fs from 'node:fs';
3
- import path from 'node:path';
4
- import { createInterface } from 'node:readline';
5
- import {
6
- closeDb,
7
- findCalleeNames,
8
- findCallerNames,
9
- findDbPath,
10
- openDb,
11
- openReadonlyOrFail,
12
- } from './db.js';
13
- import { info, warn } from './logger.js';
14
- import { normalizeSymbol } from './queries.js';
15
-
16
- /**
17
- * Split an identifier into readable words.
18
- * camelCase/PascalCase → "camel Case", snake_case → "snake case", kebab-case → "kebab case"
19
- */
20
- function splitIdentifier(name) {
21
- return name
22
- .replace(/([a-z])([A-Z])/g, '$1 $2')
23
- .replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
24
- .replace(/[_-]+/g, ' ')
25
- .trim();
26
- }
27
-
28
- /**
29
- * Match a file path against a glob pattern.
30
- * Supports *, **, and ? wildcards. Zero dependencies.
31
- */
32
- function globMatch(filePath, pattern) {
33
- // Normalize separators to forward slashes
34
- const normalized = filePath.replace(/\\/g, '/');
35
- // Escape regex specials except glob chars
36
- let regex = pattern.replace(/\\/g, '/').replace(/[.+^${}()|[\]\\]/g, '\\$&');
37
- // Replace ** first (matches any path segment), then * and ?
38
- regex = regex.replace(/\*\*/g, '\0');
39
- regex = regex.replace(/\*/g, '[^/]*');
40
- regex = regex.replace(/\0/g, '.*');
41
- regex = regex.replace(/\?/g, '[^/]');
42
- try {
43
- return new RegExp(`^${regex}$`).test(normalized);
44
- } catch {
45
- // Malformed pattern — fall back to substring match
46
- return normalized.includes(pattern);
47
- }
48
- }
49
-
50
- // Lazy-load transformers (heavy, optional module)
51
- let pipeline = null;
52
- let _cos_sim = null;
53
- let extractor = null;
54
- let activeModel = null;
55
-
56
- export const MODELS = {
57
- minilm: {
58
- name: 'Xenova/all-MiniLM-L6-v2',
59
- dim: 384,
60
- contextWindow: 256,
61
- desc: 'Smallest, fastest (~23MB). General text.',
62
- quantized: true,
63
- },
64
- 'jina-small': {
65
- name: 'Xenova/jina-embeddings-v2-small-en',
66
- dim: 512,
67
- contextWindow: 8192,
68
- desc: 'Small, good quality (~33MB). General text.',
69
- quantized: false,
70
- },
71
- 'jina-base': {
72
- name: 'Xenova/jina-embeddings-v2-base-en',
73
- dim: 768,
74
- contextWindow: 8192,
75
- desc: 'Good quality (~137MB). General text, 8192 token context.',
76
- quantized: false,
77
- },
78
- 'jina-code': {
79
- name: 'Xenova/jina-embeddings-v2-base-code',
80
- dim: 768,
81
- contextWindow: 8192,
82
- desc: 'Code-aware (~137MB). Trained on code+text, best for code search.',
83
- quantized: false,
84
- },
85
- nomic: {
86
- name: 'Xenova/nomic-embed-text-v1',
87
- dim: 768,
88
- contextWindow: 8192,
89
- desc: 'Good local quality (~137MB). 8192 context.',
90
- quantized: false,
91
- },
92
- 'nomic-v1.5': {
93
- name: 'nomic-ai/nomic-embed-text-v1.5',
94
- dim: 768,
95
- contextWindow: 8192,
96
- desc: 'Improved nomic (~137MB). Matryoshka dimensions, 8192 context.',
97
- quantized: false,
98
- },
99
- 'bge-large': {
100
- name: 'Xenova/bge-large-en-v1.5',
101
- dim: 1024,
102
- contextWindow: 512,
103
- desc: 'Best general retrieval (~335MB). Top MTEB scores.',
104
- quantized: false,
105
- },
106
- };
107
-
108
- export const EMBEDDING_STRATEGIES = ['structured', 'source'];
109
-
110
- export const DEFAULT_MODEL = 'nomic-v1.5';
111
- const BATCH_SIZE_MAP = {
112
- minilm: 32,
113
- 'jina-small': 16,
114
- 'jina-base': 8,
115
- 'jina-code': 8,
116
- nomic: 8,
117
- 'nomic-v1.5': 8,
118
- 'bge-large': 4,
119
- };
120
- const DEFAULT_BATCH_SIZE = 32;
121
-
122
- function getModelConfig(modelKey) {
123
- const key = modelKey || DEFAULT_MODEL;
124
- const config = MODELS[key];
125
- if (!config) {
126
- console.error(`Unknown model: ${key}. Available: ${Object.keys(MODELS).join(', ')}`);
127
- process.exit(1);
128
- }
129
- return config;
130
- }
131
-
132
- /**
133
- * Rough token estimate (~4 chars per token for code/English).
134
- * Conservative — avoids adding a tokenizer dependency.
135
- */
136
- export function estimateTokens(text) {
137
- return Math.ceil(text.length / 4);
138
- }
139
-
140
- /**
141
- * Extract leading comment text (JSDoc, //, #, etc.) above a function line.
142
- * Returns the cleaned comment text or null if none found.
143
- */
144
- function extractLeadingComment(lines, fnLineIndex) {
145
- if (fnLineIndex > lines.length) return null;
146
- const raw = [];
147
- for (let i = fnLineIndex - 1; i >= Math.max(0, fnLineIndex - 15); i--) {
148
- if (i >= lines.length) continue;
149
- const trimmed = lines[i].trim();
150
- if (/^(\/\/|\/\*|\*\/|\*|#|\/\/\/)/.test(trimmed)) {
151
- raw.unshift(trimmed);
152
- } else if (trimmed === '') {
153
- if (raw.length > 0) break;
154
- } else {
155
- break;
156
- }
157
- }
158
- if (raw.length === 0) return null;
159
- return raw
160
- .map((line) =>
161
- line
162
- .replace(/^\/\*\*?\s?|\*\/$/g, '') // opening /** or /* and closing */
163
- .replace(/^\*\s?/, '') // middle * lines
164
- .replace(/^\/\/\/?\s?/, '') // // or ///
165
- .replace(/^#\s?/, '') // # (Python/Ruby)
166
- .trim(),
167
- )
168
- .filter((l) => l.length > 0)
169
- .join(' ');
170
- }
171
-
172
- /**
173
- * Build graph-enriched text for a symbol using dependency context.
174
- * Produces compact, semantic text (~100 tokens) instead of full source code.
175
- */
176
- function buildStructuredText(node, file, lines, db) {
177
- const readable = splitIdentifier(node.name);
178
- const parts = [`${node.kind} ${node.name} (${readable}) in ${file}`];
179
- const startLine = Math.max(0, node.line - 1);
180
-
181
- // Extract parameters from signature (best-effort, single-line)
182
- const sigLine = lines[startLine] || '';
183
- const paramMatch = sigLine.match(/\(([^)]*)\)/);
184
- if (paramMatch?.[1]?.trim()) {
185
- parts.push(`Parameters: ${paramMatch[1].trim()}`);
186
- }
187
-
188
- // Graph context: callees (capped at 10)
189
- const callees = findCalleeNames(db, node.id);
190
- if (callees.length > 0) {
191
- parts.push(`Calls: ${callees.slice(0, 10).join(', ')}`);
192
- }
193
-
194
- // Graph context: callers (capped at 10)
195
- const callers = findCallerNames(db, node.id);
196
- if (callers.length > 0) {
197
- parts.push(`Called by: ${callers.slice(0, 10).join(', ')}`);
198
- }
199
-
200
- // Leading comment (high semantic value) or first few lines of code
201
- const comment = extractLeadingComment(lines, startLine);
202
- if (comment) {
203
- parts.push(comment);
204
- } else {
205
- const endLine = Math.min(lines.length, startLine + 4);
206
- const snippet = lines.slice(startLine, endLine).join('\n').trim();
207
- if (snippet) parts.push(snippet);
208
- }
209
-
210
- return parts.join('\n');
211
- }
212
-
213
- /**
214
- * Build raw source-code text for a symbol (original strategy).
215
- */
216
- function buildSourceText(node, file, lines) {
217
- const startLine = Math.max(0, node.line - 1);
218
- const endLine = node.end_line
219
- ? Math.min(lines.length, node.end_line)
220
- : Math.min(lines.length, startLine + 15);
221
- const context = lines.slice(startLine, endLine).join('\n');
222
- const readable = splitIdentifier(node.name);
223
- return `${node.kind} ${node.name} (${readable}) in ${file}\n${context}`;
224
- }
225
-
226
- /**
227
- * Prompt the user to install a missing package interactively.
228
- * Returns true if the package was installed, false otherwise.
229
- * Skips the prompt entirely in non-TTY environments (CI, piped stdin).
230
- */
231
- function promptInstall(packageName) {
232
- if (!process.stdin.isTTY) return Promise.resolve(false);
233
-
234
- return new Promise((resolve) => {
235
- const rl = createInterface({ input: process.stdin, output: process.stderr });
236
- rl.question(`Semantic search requires ${packageName}. Install it now? [y/N] `, (answer) => {
237
- rl.close();
238
- if (answer.trim().toLowerCase() !== 'y') return resolve(false);
239
- try {
240
- execFileSync('npm', ['install', packageName], {
241
- stdio: 'inherit',
242
- timeout: 300_000,
243
- });
244
- resolve(true);
245
- } catch {
246
- resolve(false);
247
- }
248
- });
249
- });
250
- }
251
-
252
- /**
253
- * Lazy-load @huggingface/transformers.
254
- * If the package is missing, prompts the user to install it interactively.
255
- * In non-TTY environments, prints an error and exits.
256
- */
257
- async function loadTransformers() {
258
- try {
259
- return await import('@huggingface/transformers');
260
- } catch {
261
- const pkg = '@huggingface/transformers';
262
- const installed = await promptInstall(pkg);
263
- if (installed) {
264
- try {
265
- return await import(pkg);
266
- } catch {
267
- console.error(`\n${pkg} was installed but failed to load. Please check your environment.`);
268
- process.exit(1);
269
- }
270
- }
271
- console.error(`Semantic search requires ${pkg}.\n` + `Install it with: npm install ${pkg}`);
272
- process.exit(1);
273
- }
274
- }
275
-
276
- /**
277
- * Dispose the current ONNX session and free memory.
278
- * Safe to call when no model is loaded (no-op).
279
- */
280
- export async function disposeModel() {
281
- if (extractor) {
282
- await extractor.dispose();
283
- extractor = null;
284
- }
285
- activeModel = null;
286
- }
287
-
288
- async function loadModel(modelKey) {
289
- const config = getModelConfig(modelKey);
290
-
291
- if (extractor && activeModel === config.name) return { extractor, config };
292
-
293
- // Dispose previous model before loading a different one
294
- await disposeModel();
295
-
296
- const transformers = await loadTransformers();
297
- pipeline = transformers.pipeline;
298
- _cos_sim = transformers.cos_sim;
299
-
300
- info(`Loading embedding model: ${config.name} (${config.dim}d)...`);
301
- const pipelineOpts = config.quantized ? { quantized: true } : {};
302
- try {
303
- extractor = await pipeline('feature-extraction', config.name, pipelineOpts);
304
- } catch (err) {
305
- const msg = err.message || String(err);
306
- if (msg.includes('Unauthorized') || msg.includes('401') || msg.includes('gated')) {
307
- console.error(
308
- `\nModel "${config.name}" requires authentication.\n` +
309
- `This model is gated on HuggingFace and needs an access token.\n\n` +
310
- `Options:\n` +
311
- ` 1. Set HF_TOKEN env var: export HF_TOKEN=hf_...\n` +
312
- ` 2. Use a public model instead: codegraph embed --model minilm\n`,
313
- );
314
- } else {
315
- console.error(
316
- `\nFailed to load model "${config.name}": ${msg}\n` +
317
- `Try a different model: codegraph embed --model minilm\n`,
318
- );
319
- }
320
- process.exit(1);
321
- }
322
- activeModel = config.name;
323
- info('Model loaded.');
324
- return { extractor, config };
325
- }
326
-
327
- /**
328
- * Generate embeddings for an array of texts.
329
- */
330
- export async function embed(texts, modelKey) {
331
- const { extractor: ext, config } = await loadModel(modelKey);
332
- const dim = config.dim;
333
- const results = [];
334
- const batchSize = BATCH_SIZE_MAP[modelKey || DEFAULT_MODEL] || DEFAULT_BATCH_SIZE;
335
-
336
- for (let i = 0; i < texts.length; i += batchSize) {
337
- const batch = texts.slice(i, i + batchSize);
338
- const output = await ext(batch, { pooling: 'mean', normalize: true });
339
-
340
- for (let j = 0; j < batch.length; j++) {
341
- const start = j * dim;
342
- const vec = new Float32Array(dim);
343
- for (let k = 0; k < dim; k++) {
344
- vec[k] = output.data[start + k];
345
- }
346
- results.push(vec);
347
- }
348
-
349
- if (texts.length > batchSize) {
350
- process.stdout.write(` Embedded ${Math.min(i + batchSize, texts.length)}/${texts.length}\r`);
351
- }
352
- }
353
-
354
- return { vectors: results, dim };
355
- }
356
-
357
- /**
358
- * Cosine similarity between two Float32Arrays.
359
- */
360
- export function cosineSim(a, b) {
361
- let dot = 0,
362
- normA = 0,
363
- normB = 0;
364
- for (let i = 0; i < a.length; i++) {
365
- dot += a[i] * b[i];
366
- normA += a[i] * a[i];
367
- normB += b[i] * b[i];
368
- }
369
- return dot / (Math.sqrt(normA) * Math.sqrt(normB));
370
- }
371
-
372
- function initEmbeddingsSchema(db) {
373
- db.exec(`
374
- CREATE TABLE IF NOT EXISTS embeddings (
375
- node_id INTEGER PRIMARY KEY,
376
- vector BLOB NOT NULL,
377
- text_preview TEXT,
378
- FOREIGN KEY(node_id) REFERENCES nodes(id)
379
- );
380
- CREATE TABLE IF NOT EXISTS embedding_meta (
381
- key TEXT PRIMARY KEY,
382
- value TEXT
383
- );
384
- `);
385
-
386
- // Add full_text column (idempotent — ignore if already exists)
387
- try {
388
- db.exec('ALTER TABLE embeddings ADD COLUMN full_text TEXT');
389
- } catch {
390
- /* column already exists */
391
- }
392
-
393
- // FTS5 virtual table for BM25 keyword search
394
- db.exec(`
395
- CREATE VIRTUAL TABLE IF NOT EXISTS fts_index USING fts5(
396
- name,
397
- content,
398
- tokenize='unicode61'
399
- );
400
- `);
401
- }
402
-
403
- /**
404
- * Build embeddings for all functions/methods/classes in the graph.
405
- * @param {string} rootDir - Project root directory
406
- * @param {string} modelKey - Model identifier from MODELS registry
407
- * @param {string} [customDbPath] - Override path to graph.db
408
- * @param {object} [options] - Embedding options
409
- * @param {string} [options.strategy='structured'] - 'structured' (graph-enriched) or 'source' (raw code)
410
- */
411
- export async function buildEmbeddings(rootDir, modelKey, customDbPath, options = {}) {
412
- const strategy = options.strategy || 'structured';
413
- const dbPath = customDbPath || findDbPath(null);
414
-
415
- if (!fs.existsSync(dbPath)) {
416
- console.error(
417
- `No codegraph database found at ${dbPath}.\n` +
418
- `Run "codegraph build" first to analyze your codebase.`,
419
- );
420
- process.exit(1);
421
- }
422
-
423
- const db = openDb(dbPath);
424
- initEmbeddingsSchema(db);
425
-
426
- db.exec('DELETE FROM embeddings');
427
- db.exec('DELETE FROM embedding_meta');
428
- db.exec('DELETE FROM fts_index');
429
-
430
- const nodes = db
431
- .prepare(
432
- `SELECT * FROM nodes WHERE kind IN ('function', 'method', 'class') ORDER BY file, line`,
433
- )
434
- .all();
435
-
436
- console.log(`Building embeddings for ${nodes.length} symbols (strategy: ${strategy})...`);
437
-
438
- const byFile = new Map();
439
- for (const node of nodes) {
440
- if (!byFile.has(node.file)) byFile.set(node.file, []);
441
- byFile.get(node.file).push(node);
442
- }
443
-
444
- const texts = [];
445
- const nodeIds = [];
446
- const nodeNames = [];
447
- const previews = [];
448
- const config = getModelConfig(modelKey);
449
- const contextWindow = config.contextWindow;
450
- let overflowCount = 0;
451
-
452
- for (const [file, fileNodes] of byFile) {
453
- const fullPath = path.join(rootDir, file);
454
- let lines;
455
- try {
456
- lines = fs.readFileSync(fullPath, 'utf-8').split('\n');
457
- } catch (err) {
458
- warn(`Cannot read ${file} for embeddings: ${err.message}`);
459
- continue;
460
- }
461
-
462
- for (const node of fileNodes) {
463
- let text =
464
- strategy === 'structured'
465
- ? buildStructuredText(node, file, lines, db)
466
- : buildSourceText(node, file, lines);
467
-
468
- // Detect and handle context window overflow
469
- const tokens = estimateTokens(text);
470
- if (tokens > contextWindow) {
471
- overflowCount++;
472
- const maxChars = contextWindow * 4;
473
- text = text.slice(0, maxChars);
474
- }
475
-
476
- texts.push(text);
477
- nodeIds.push(node.id);
478
- nodeNames.push(node.name);
479
- previews.push(`${node.name} (${node.kind}) -- ${file}:${node.line}`);
480
- }
481
- }
482
-
483
- if (overflowCount > 0) {
484
- warn(
485
- `${overflowCount} symbol(s) exceeded model context window (${contextWindow} tokens) and were truncated`,
486
- );
487
- }
488
-
489
- console.log(`Embedding ${texts.length} symbols...`);
490
- const { vectors, dim } = await embed(texts, modelKey);
491
-
492
- const insert = db.prepare(
493
- 'INSERT OR REPLACE INTO embeddings (node_id, vector, text_preview, full_text) VALUES (?, ?, ?, ?)',
494
- );
495
- const insertFts = db.prepare('INSERT INTO fts_index(rowid, name, content) VALUES (?, ?, ?)');
496
- const insertMeta = db.prepare('INSERT OR REPLACE INTO embedding_meta (key, value) VALUES (?, ?)');
497
- const insertAll = db.transaction(() => {
498
- for (let i = 0; i < vectors.length; i++) {
499
- insert.run(nodeIds[i], Buffer.from(vectors[i].buffer), previews[i], texts[i]);
500
- insertFts.run(nodeIds[i], nodeNames[i], texts[i]);
501
- }
502
- insertMeta.run('model', config.name);
503
- insertMeta.run('dim', String(dim));
504
- insertMeta.run('count', String(vectors.length));
505
- insertMeta.run('fts_count', String(vectors.length));
506
- insertMeta.run('strategy', strategy);
507
- insertMeta.run('built_at', new Date().toISOString());
508
- if (overflowCount > 0) {
509
- insertMeta.run('truncated_count', String(overflowCount));
510
- }
511
- });
512
- insertAll();
513
-
514
- console.log(
515
- `\nStored ${vectors.length} embeddings (${dim}d, ${config.name}, strategy: ${strategy}) in graph.db`,
516
- );
517
- closeDb(db);
518
- }
519
-
520
- /**
521
- * Shared setup for search functions: opens DB, validates embeddings/model, loads rows.
522
- * Returns { db, rows, modelKey, storedDim } or null on failure (prints error).
523
- */
524
- function _prepareSearch(customDbPath, opts = {}) {
525
- const db = openReadonlyOrFail(customDbPath);
526
-
527
- let count;
528
- try {
529
- count = db.prepare('SELECT COUNT(*) as c FROM embeddings').get().c;
530
- } catch {
531
- console.log('No embeddings table found. Run `codegraph embed` first.');
532
- db.close();
533
- return null;
534
- }
535
- if (count === 0) {
536
- console.log('No embeddings found. Run `codegraph embed` first.');
537
- db.close();
538
- return null;
539
- }
540
-
541
- let storedModel = null;
542
- let storedDim = null;
543
- try {
544
- const modelRow = db.prepare("SELECT value FROM embedding_meta WHERE key = 'model'").get();
545
- const dimRow = db.prepare("SELECT value FROM embedding_meta WHERE key = 'dim'").get();
546
- if (modelRow) storedModel = modelRow.value;
547
- if (dimRow) storedDim = parseInt(dimRow.value, 10);
548
- } catch {
549
- /* old DB without meta table */
550
- }
551
-
552
- let modelKey = opts.model || null;
553
- if (!modelKey && storedModel) {
554
- for (const [key, config] of Object.entries(MODELS)) {
555
- if (config.name === storedModel) {
556
- modelKey = key;
557
- break;
558
- }
559
- }
560
- }
561
-
562
- // Pre-filter: allow filtering by kind or file pattern to reduce search space
563
- const noTests = opts.noTests || false;
564
- const TEST_PATTERN = /\.(test|spec)\.|__test__|__tests__|\.stories\./;
565
- let sql = `
566
- SELECT e.node_id, e.vector, e.text_preview, n.name, n.kind, n.file, n.line, n.end_line, n.role
567
- FROM embeddings e
568
- JOIN nodes n ON e.node_id = n.id
569
- `;
570
- const params = [];
571
- const conditions = [];
572
- if (opts.kind) {
573
- conditions.push('n.kind = ?');
574
- params.push(opts.kind);
575
- }
576
- const isGlob = opts.filePattern && /[*?[\]]/.test(opts.filePattern);
577
- if (opts.filePattern && !isGlob) {
578
- conditions.push('n.file LIKE ?');
579
- params.push(`%${opts.filePattern}%`);
580
- }
581
- if (conditions.length > 0) {
582
- sql += ` WHERE ${conditions.join(' AND ')}`;
583
- }
584
-
585
- let rows = db.prepare(sql).all(...params);
586
- if (isGlob) {
587
- rows = rows.filter((row) => globMatch(row.file, opts.filePattern));
588
- }
589
- if (noTests) {
590
- rows = rows.filter((row) => !TEST_PATTERN.test(row.file));
591
- }
592
-
593
- return { db, rows, modelKey, storedDim };
594
- }
595
-
596
- /**
597
- * Single-query semantic search — returns data instead of printing.
598
- * Returns { results: [{ name, kind, file, line, similarity }] } or null on failure.
599
- */
600
- export async function searchData(query, customDbPath, opts = {}) {
601
- const limit = opts.limit || 15;
602
- const minScore = opts.minScore || 0.2;
603
-
604
- const prepared = _prepareSearch(customDbPath, opts);
605
- if (!prepared) return null;
606
- const { db, rows, modelKey, storedDim } = prepared;
607
-
608
- try {
609
- const {
610
- vectors: [queryVec],
611
- dim,
612
- } = await embed([query], modelKey);
613
-
614
- if (storedDim && dim !== storedDim) {
615
- console.log(
616
- `Warning: query model dimension (${dim}) doesn't match stored embeddings (${storedDim}).`,
617
- );
618
- console.log(` Re-run \`codegraph embed\` with the same model, or use --model to match.`);
619
- return null;
620
- }
621
-
622
- const hc = new Map();
623
- const results = [];
624
- for (const row of rows) {
625
- const vec = new Float32Array(new Uint8Array(row.vector).buffer);
626
- const sim = cosineSim(queryVec, vec);
627
-
628
- if (sim >= minScore) {
629
- results.push({
630
- ...normalizeSymbol(row, db, hc),
631
- similarity: sim,
632
- });
633
- }
634
- }
635
-
636
- results.sort((a, b) => b.similarity - a.similarity);
637
- return { results: results.slice(0, limit) };
638
- } finally {
639
- db.close();
640
- }
641
- }
642
-
643
- /**
644
- * Multi-query semantic search with Reciprocal Rank Fusion (RRF).
645
- * Returns { results: [{ name, kind, file, line, rrf, queryScores }] } or null on failure.
646
- */
647
- export async function multiSearchData(queries, customDbPath, opts = {}) {
648
- const limit = opts.limit || 15;
649
- const minScore = opts.minScore || 0.2;
650
- const k = opts.rrfK || 60;
651
-
652
- const prepared = _prepareSearch(customDbPath, opts);
653
- if (!prepared) return null;
654
- const { db, rows, modelKey, storedDim } = prepared;
655
-
656
- try {
657
- const { vectors: queryVecs, dim } = await embed(queries, modelKey);
658
-
659
- // Warn about similar queries that may bias RRF results
660
- const SIMILARITY_WARN_THRESHOLD = 0.85;
661
- for (let i = 0; i < queryVecs.length; i++) {
662
- for (let j = i + 1; j < queryVecs.length; j++) {
663
- const sim = cosineSim(queryVecs[i], queryVecs[j]);
664
- if (sim >= SIMILARITY_WARN_THRESHOLD) {
665
- warn(
666
- `Queries "${queries[i]}" and "${queries[j]}" are very similar ` +
667
- `(${(sim * 100).toFixed(0)}% cosine similarity). ` +
668
- `This may bias RRF results toward their shared matches. ` +
669
- `Consider using more distinct queries.`,
670
- );
671
- }
672
- }
673
- }
674
-
675
- if (storedDim && dim !== storedDim) {
676
- console.log(
677
- `Warning: query model dimension (${dim}) doesn't match stored embeddings (${storedDim}).`,
678
- );
679
- console.log(` Re-run \`codegraph embed\` with the same model, or use --model to match.`);
680
- return null;
681
- }
682
-
683
- // Parse row vectors once
684
- const rowVecs = rows.map((row) => new Float32Array(new Uint8Array(row.vector).buffer));
685
-
686
- // For each query: compute similarities, filter by minScore, rank
687
- const perQueryRanked = queries.map((_query, qi) => {
688
- const scored = [];
689
- for (let ri = 0; ri < rows.length; ri++) {
690
- const sim = cosineSim(queryVecs[qi], rowVecs[ri]);
691
- if (sim >= minScore) {
692
- scored.push({ rowIndex: ri, similarity: sim });
693
- }
694
- }
695
- scored.sort((a, b) => b.similarity - a.similarity);
696
- // Assign 1-indexed ranks
697
- return scored.map((item, rank) => ({ ...item, rank: rank + 1 }));
698
- });
699
-
700
- // Fuse results using RRF: for each unique row, sum 1/(k + rank_i) across queries
701
- const fusionMap = new Map(); // rowIndex -> { rrfScore, queryScores[] }
702
- for (let qi = 0; qi < queries.length; qi++) {
703
- for (const item of perQueryRanked[qi]) {
704
- if (!fusionMap.has(item.rowIndex)) {
705
- fusionMap.set(item.rowIndex, { rrfScore: 0, queryScores: [] });
706
- }
707
- const entry = fusionMap.get(item.rowIndex);
708
- entry.rrfScore += 1 / (k + item.rank);
709
- entry.queryScores.push({
710
- query: queries[qi],
711
- similarity: item.similarity,
712
- rank: item.rank,
713
- });
714
- }
715
- }
716
-
717
- // Build results sorted by RRF score
718
- const hc = new Map();
719
- const results = [];
720
- for (const [rowIndex, entry] of fusionMap) {
721
- const row = rows[rowIndex];
722
- results.push({
723
- ...normalizeSymbol(row, db, hc),
724
- rrf: entry.rrfScore,
725
- queryScores: entry.queryScores,
726
- });
727
- }
728
-
729
- results.sort((a, b) => b.rrf - a.rrf);
730
- return { results: results.slice(0, limit) };
731
- } finally {
732
- db.close();
733
- }
734
- }
735
-
736
- /**
737
- * Sanitize a user query for FTS5 MATCH syntax.
738
- * Wraps each token as an implicit OR and escapes special FTS5 characters.
739
- */
740
- function sanitizeFtsQuery(query) {
741
- // Remove FTS5 special chars that could cause syntax errors
742
- const cleaned = query.replace(/[*"():^{}~<>]/g, ' ').trim();
743
- if (!cleaned) return null;
744
- // Split into tokens, wrap with OR for multi-token queries
745
- const tokens = cleaned.split(/\s+/).filter((t) => t.length > 0);
746
- if (tokens.length === 0) return null;
747
- if (tokens.length === 1) return `"${tokens[0]}"`;
748
- return tokens.map((t) => `"${t}"`).join(' OR ');
749
- }
750
-
751
- /**
752
- * Check if the FTS5 index exists in the database.
753
- * Returns true if fts_index table exists and has rows, false otherwise.
754
- */
755
- function hasFtsIndex(db) {
756
- try {
757
- const row = db.prepare('SELECT COUNT(*) as c FROM fts_index').get();
758
- return row.c > 0;
759
- } catch {
760
- return false;
761
- }
762
- }
763
-
764
- /**
765
- * BM25 keyword search via FTS5.
766
- * Returns { results: [{ name, kind, file, line, bm25Score }] } or null if no FTS5 index.
767
- */
768
- export function ftsSearchData(query, customDbPath, opts = {}) {
769
- const limit = opts.limit || 15;
770
- const noTests = opts.noTests || false;
771
- const TEST_PATTERN = /\.(test|spec)\.|__test__|__tests__|\.stories\./;
772
-
773
- const db = openReadonlyOrFail(customDbPath);
774
-
775
- try {
776
- if (!hasFtsIndex(db)) {
777
- return null;
778
- }
779
-
780
- const ftsQuery = sanitizeFtsQuery(query);
781
- if (!ftsQuery) {
782
- return { results: [] };
783
- }
784
-
785
- let sql = `
786
- SELECT f.rowid AS node_id, rank AS bm25_score,
787
- n.name, n.kind, n.file, n.line, n.end_line, n.role
788
- FROM fts_index f
789
- JOIN nodes n ON f.rowid = n.id
790
- WHERE fts_index MATCH ?
791
- `;
792
- const params = [ftsQuery];
793
-
794
- if (opts.kind) {
795
- sql += ' AND n.kind = ?';
796
- params.push(opts.kind);
797
- }
798
-
799
- const isGlob = opts.filePattern && /[*?[\]]/.test(opts.filePattern);
800
- if (opts.filePattern && !isGlob) {
801
- sql += ' AND n.file LIKE ?';
802
- params.push(`%${opts.filePattern}%`);
803
- }
804
-
805
- sql += ' ORDER BY rank LIMIT ?';
806
- params.push(limit * 5); // fetch generous set for post-filtering
807
-
808
- let rows;
809
- try {
810
- rows = db.prepare(sql).all(...params);
811
- } catch {
812
- // Invalid FTS5 query syntax — return empty
813
- return { results: [] };
814
- }
815
-
816
- if (isGlob) {
817
- rows = rows.filter((row) => globMatch(row.file, opts.filePattern));
818
- }
819
- if (noTests) {
820
- rows = rows.filter((row) => !TEST_PATTERN.test(row.file));
821
- }
822
-
823
- const hc = new Map();
824
- const results = rows.slice(0, limit).map((row) => ({
825
- ...normalizeSymbol(row, db, hc),
826
- bm25Score: -row.bm25_score, // FTS5 rank is negative; negate for display
827
- }));
828
-
829
- return { results };
830
- } finally {
831
- db.close();
832
- }
833
- }
834
-
835
- /**
836
- * Hybrid BM25 + semantic search with RRF fusion.
837
- * Returns { results: [{ name, kind, file, line, rrf, bm25Score, bm25Rank, similarity, semanticRank }] }
838
- * or null if no FTS5 index (caller should fall back to semantic-only).
839
- */
840
- export async function hybridSearchData(query, customDbPath, opts = {}) {
841
- const limit = opts.limit || 15;
842
- const k = opts.rrfK || 60;
843
- const topK = (opts.limit || 15) * 5;
844
-
845
- // Split semicolons for multi-query support
846
- const queries =
847
- typeof query === 'string'
848
- ? query
849
- .split(';')
850
- .map((q) => q.trim())
851
- .filter((q) => q.length > 0)
852
- : [query];
853
-
854
- // Check FTS5 availability first (sync, cheap)
855
- const checkDb = openReadonlyOrFail(customDbPath);
856
- const ftsAvailable = hasFtsIndex(checkDb);
857
- checkDb.close();
858
- if (!ftsAvailable) return null;
859
-
860
- // Collect ranked lists: for each query, one BM25 list + one semantic list
861
- const rankedLists = [];
862
-
863
- for (const q of queries) {
864
- // BM25 ranked list (sync)
865
- const bm25Data = ftsSearchData(q, customDbPath, { ...opts, limit: topK });
866
- if (bm25Data?.results) {
867
- rankedLists.push(
868
- bm25Data.results.map((r, idx) => ({
869
- key: `${r.name}:${r.file}:${r.line}`,
870
- rank: idx + 1,
871
- source: 'bm25',
872
- ...r,
873
- })),
874
- );
875
- }
876
-
877
- // Semantic ranked list (async)
878
- const semData = await searchData(q, customDbPath, {
879
- ...opts,
880
- limit: topK,
881
- minScore: opts.minScore || 0.2,
882
- });
883
- if (semData?.results) {
884
- rankedLists.push(
885
- semData.results.map((r, idx) => ({
886
- key: `${r.name}:${r.file}:${r.line}`,
887
- rank: idx + 1,
888
- source: 'semantic',
889
- ...r,
890
- })),
891
- );
892
- }
893
- }
894
-
895
- // RRF fusion across all ranked lists
896
- const fusionMap = new Map();
897
- for (const list of rankedLists) {
898
- for (const item of list) {
899
- if (!fusionMap.has(item.key)) {
900
- fusionMap.set(item.key, {
901
- name: item.name,
902
- kind: item.kind,
903
- file: item.file,
904
- line: item.line,
905
- endLine: item.endLine ?? null,
906
- role: item.role ?? null,
907
- fileHash: item.fileHash ?? null,
908
- rrfScore: 0,
909
- bm25Score: null,
910
- bm25Rank: null,
911
- similarity: null,
912
- semanticRank: null,
913
- });
914
- }
915
- const entry = fusionMap.get(item.key);
916
- entry.rrfScore += 1 / (k + item.rank);
917
- if (item.source === 'bm25') {
918
- if (entry.bm25Rank === null || item.rank < entry.bm25Rank) {
919
- entry.bm25Score = item.bm25Score;
920
- entry.bm25Rank = item.rank;
921
- }
922
- } else {
923
- if (entry.semanticRank === null || item.rank < entry.semanticRank) {
924
- entry.similarity = item.similarity;
925
- entry.semanticRank = item.rank;
926
- }
927
- }
928
- }
929
- }
930
-
931
- const results = [...fusionMap.values()]
932
- .sort((a, b) => b.rrfScore - a.rrfScore)
933
- .slice(0, limit)
934
- .map((e) => ({
935
- name: e.name,
936
- kind: e.kind,
937
- file: e.file,
938
- line: e.line,
939
- endLine: e.endLine,
940
- role: e.role,
941
- fileHash: e.fileHash,
942
- rrf: e.rrfScore,
943
- bm25Score: e.bm25Score,
944
- bm25Rank: e.bm25Rank,
945
- similarity: e.similarity,
946
- semanticRank: e.semanticRank,
947
- }));
948
-
949
- return { results };
950
- }
951
-
952
- /**
953
- * Search with mode support — CLI wrapper with multi-query detection.
954
- * Modes: 'hybrid' (default), 'semantic', 'keyword'
955
- */
956
- export async function search(query, customDbPath, opts = {}) {
957
- const mode = opts.mode || 'hybrid';
958
-
959
- // Split by semicolons, trim, filter empties
960
- const queries = query
961
- .split(';')
962
- .map((q) => q.trim())
963
- .filter((q) => q.length > 0);
964
-
965
- const kindIcon = (kind) => (kind === 'function' ? 'f' : kind === 'class' ? '*' : 'o');
966
-
967
- // ─── Keyword-only mode ──────────────────────────────────────────────
968
- if (mode === 'keyword') {
969
- const singleQuery = queries.length === 1 ? queries[0] : query;
970
- const data = ftsSearchData(singleQuery, customDbPath, opts);
971
- if (!data) {
972
- console.log('No FTS5 index found. Run `codegraph embed` to build the keyword index.');
973
- return;
974
- }
975
-
976
- if (opts.json) {
977
- console.log(JSON.stringify(data, null, 2));
978
- return;
979
- }
980
-
981
- console.log(`\nKeyword search: "${singleQuery}" (BM25)\n`);
982
- if (data.results.length === 0) {
983
- console.log(' No results found.');
984
- } else {
985
- for (const r of data.results) {
986
- console.log(
987
- ` BM25 ${r.bm25Score.toFixed(2)} ${kindIcon(r.kind)} ${r.name} -- ${r.file}:${r.line}`,
988
- );
989
- }
990
- }
991
- console.log(`\n ${data.results.length} results shown\n`);
992
- return;
993
- }
994
-
995
- // ─── Semantic-only mode ─────────────────────────────────────────────
996
- if (mode === 'semantic') {
997
- if (queries.length <= 1) {
998
- const singleQuery = queries[0] || query;
999
- const data = await searchData(singleQuery, customDbPath, opts);
1000
- if (!data) return;
1001
-
1002
- if (opts.json) {
1003
- console.log(JSON.stringify(data, null, 2));
1004
- return;
1005
- }
1006
-
1007
- console.log(`\nSemantic search: "${singleQuery}"\n`);
1008
- if (data.results.length === 0) {
1009
- console.log(' No results above threshold.');
1010
- } else {
1011
- for (const r of data.results) {
1012
- const bar = '#'.repeat(Math.round(r.similarity * 20));
1013
- console.log(` ${(r.similarity * 100).toFixed(1)}% ${bar}`);
1014
- console.log(` ${kindIcon(r.kind)} ${r.name} -- ${r.file}:${r.line}`);
1015
- }
1016
- }
1017
- console.log(`\n ${data.results.length} results shown\n`);
1018
- } else {
1019
- const data = await multiSearchData(queries, customDbPath, opts);
1020
- if (!data) return;
1021
-
1022
- if (opts.json) {
1023
- console.log(JSON.stringify(data, null, 2));
1024
- return;
1025
- }
1026
-
1027
- console.log(`\nMulti-query semantic search (RRF, k=${opts.rrfK || 60}):`);
1028
- for (let i = 0; i < queries.length; i++) console.log(` [${i + 1}] "${queries[i]}"`);
1029
- console.log();
1030
- if (data.results.length === 0) {
1031
- console.log(' No results above threshold.');
1032
- } else {
1033
- for (const r of data.results) {
1034
- console.log(
1035
- ` RRF ${r.rrf.toFixed(4)} ${kindIcon(r.kind)} ${r.name} -- ${r.file}:${r.line}`,
1036
- );
1037
- for (const qs of r.queryScores) {
1038
- const bar = '#'.repeat(Math.round(qs.similarity * 20));
1039
- console.log(
1040
- ` [${queries.indexOf(qs.query) + 1}] ${(qs.similarity * 100).toFixed(1)}% ${bar} (rank ${qs.rank})`,
1041
- );
1042
- }
1043
- }
1044
- }
1045
- console.log(`\n ${data.results.length} results shown\n`);
1046
- }
1047
- return;
1048
- }
1049
-
1050
- // ─── Hybrid mode (default) ──────────────────────────────────────────
1051
- const data = await hybridSearchData(query, customDbPath, opts);
1052
-
1053
- if (!data) {
1054
- // No FTS5 index — fall back to semantic-only
1055
- warn(
1056
- 'FTS5 index not found — using semantic search only. Re-run `codegraph embed` to enable hybrid mode.',
1057
- );
1058
- return search(query, customDbPath, { ...opts, mode: 'semantic' });
1059
- }
1060
-
1061
- if (opts.json) {
1062
- console.log(JSON.stringify(data, null, 2));
1063
- return;
1064
- }
1065
-
1066
- const rrfK = opts.rrfK || 60;
1067
- if (queries.length <= 1) {
1068
- const singleQuery = queries[0] || query;
1069
- console.log(`\nHybrid search: "${singleQuery}" (BM25 + semantic, RRF k=${rrfK})\n`);
1070
- } else {
1071
- console.log(`\nHybrid multi-query search (BM25 + semantic, RRF k=${rrfK}):`);
1072
- for (let i = 0; i < queries.length; i++) console.log(` [${i + 1}] "${queries[i]}"`);
1073
- console.log();
1074
- }
1075
-
1076
- if (data.results.length === 0) {
1077
- console.log(' No results found.');
1078
- } else {
1079
- for (const r of data.results) {
1080
- console.log(
1081
- ` RRF ${r.rrf.toFixed(4)} ${kindIcon(r.kind)} ${r.name} -- ${r.file}:${r.line}`,
1082
- );
1083
- const parts = [];
1084
- if (r.bm25Rank != null) {
1085
- parts.push(`BM25: rank ${r.bm25Rank} (score ${r.bm25Score.toFixed(2)})`);
1086
- }
1087
- if (r.semanticRank != null) {
1088
- parts.push(`Semantic: rank ${r.semanticRank} (${(r.similarity * 100).toFixed(1)}%)`);
1089
- }
1090
- if (parts.length > 0) {
1091
- console.log(` ${parts.join(' | ')}`);
1092
- }
1093
- }
1094
- }
1095
-
1096
- console.log(`\n ${data.results.length} results shown\n`);
1097
- }