@optave/codegraph 3.1.2 → 3.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. package/README.md +19 -21
  2. package/package.json +10 -7
  3. package/src/analysis/context.js +408 -0
  4. package/src/analysis/dependencies.js +341 -0
  5. package/src/analysis/exports.js +130 -0
  6. package/src/analysis/impact.js +463 -0
  7. package/src/analysis/module-map.js +322 -0
  8. package/src/analysis/roles.js +45 -0
  9. package/src/analysis/symbol-lookup.js +232 -0
  10. package/src/ast-analysis/shared.js +5 -4
  11. package/src/batch.js +2 -1
  12. package/src/builder/context.js +85 -0
  13. package/src/builder/helpers.js +218 -0
  14. package/src/builder/incremental.js +178 -0
  15. package/src/builder/pipeline.js +130 -0
  16. package/src/builder/stages/build-edges.js +297 -0
  17. package/src/builder/stages/build-structure.js +113 -0
  18. package/src/builder/stages/collect-files.js +44 -0
  19. package/src/builder/stages/detect-changes.js +413 -0
  20. package/src/builder/stages/finalize.js +139 -0
  21. package/src/builder/stages/insert-nodes.js +195 -0
  22. package/src/builder/stages/parse-files.js +28 -0
  23. package/src/builder/stages/resolve-imports.js +143 -0
  24. package/src/builder/stages/run-analyses.js +44 -0
  25. package/src/builder.js +10 -1472
  26. package/src/cfg.js +1 -2
  27. package/src/cli/commands/ast.js +26 -0
  28. package/src/cli/commands/audit.js +46 -0
  29. package/src/cli/commands/batch.js +68 -0
  30. package/src/cli/commands/branch-compare.js +21 -0
  31. package/src/cli/commands/build.js +26 -0
  32. package/src/cli/commands/cfg.js +30 -0
  33. package/src/cli/commands/check.js +79 -0
  34. package/src/cli/commands/children.js +31 -0
  35. package/src/cli/commands/co-change.js +65 -0
  36. package/src/cli/commands/communities.js +23 -0
  37. package/src/cli/commands/complexity.js +45 -0
  38. package/src/cli/commands/context.js +34 -0
  39. package/src/cli/commands/cycles.js +28 -0
  40. package/src/cli/commands/dataflow.js +32 -0
  41. package/src/cli/commands/deps.js +16 -0
  42. package/src/cli/commands/diff-impact.js +30 -0
  43. package/src/cli/commands/embed.js +30 -0
  44. package/src/cli/commands/export.js +75 -0
  45. package/src/cli/commands/exports.js +18 -0
  46. package/src/cli/commands/flow.js +36 -0
  47. package/src/cli/commands/fn-impact.js +30 -0
  48. package/src/cli/commands/impact.js +16 -0
  49. package/src/cli/commands/info.js +76 -0
  50. package/src/cli/commands/map.js +19 -0
  51. package/src/cli/commands/mcp.js +18 -0
  52. package/src/cli/commands/models.js +19 -0
  53. package/src/cli/commands/owners.js +25 -0
  54. package/src/cli/commands/path.js +36 -0
  55. package/src/cli/commands/plot.js +80 -0
  56. package/src/cli/commands/query.js +49 -0
  57. package/src/cli/commands/registry.js +100 -0
  58. package/src/cli/commands/roles.js +34 -0
  59. package/src/cli/commands/search.js +42 -0
  60. package/src/cli/commands/sequence.js +32 -0
  61. package/src/cli/commands/snapshot.js +61 -0
  62. package/src/cli/commands/stats.js +15 -0
  63. package/src/cli/commands/structure.js +32 -0
  64. package/src/cli/commands/triage.js +78 -0
  65. package/src/cli/commands/watch.js +12 -0
  66. package/src/cli/commands/where.js +24 -0
  67. package/src/cli/index.js +118 -0
  68. package/src/cli/shared/options.js +39 -0
  69. package/src/cli/shared/output.js +1 -0
  70. package/src/cli.js +11 -1514
  71. package/src/commands/check.js +5 -5
  72. package/src/commands/manifesto.js +3 -3
  73. package/src/commands/structure.js +1 -1
  74. package/src/communities.js +15 -87
  75. package/src/complexity.js +1 -1
  76. package/src/cycles.js +30 -85
  77. package/src/dataflow.js +1 -2
  78. package/src/db/connection.js +4 -4
  79. package/src/db/migrations.js +41 -0
  80. package/src/db/query-builder.js +6 -5
  81. package/src/db/repository/base.js +201 -0
  82. package/src/db/repository/cached-stmt.js +19 -0
  83. package/src/db/repository/cfg.js +27 -38
  84. package/src/db/repository/cochange.js +16 -3
  85. package/src/db/repository/complexity.js +11 -6
  86. package/src/db/repository/dataflow.js +6 -1
  87. package/src/db/repository/edges.js +120 -98
  88. package/src/db/repository/embeddings.js +14 -3
  89. package/src/db/repository/graph-read.js +32 -9
  90. package/src/db/repository/in-memory-repository.js +584 -0
  91. package/src/db/repository/index.js +6 -1
  92. package/src/db/repository/nodes.js +110 -40
  93. package/src/db/repository/sqlite-repository.js +219 -0
  94. package/src/db.js +5 -0
  95. package/src/embeddings/generator.js +163 -0
  96. package/src/embeddings/index.js +13 -0
  97. package/src/embeddings/models.js +218 -0
  98. package/src/embeddings/search/cli-formatter.js +151 -0
  99. package/src/embeddings/search/filters.js +46 -0
  100. package/src/embeddings/search/hybrid.js +121 -0
  101. package/src/embeddings/search/keyword.js +68 -0
  102. package/src/embeddings/search/prepare.js +66 -0
  103. package/src/embeddings/search/semantic.js +145 -0
  104. package/src/embeddings/stores/fts5.js +27 -0
  105. package/src/embeddings/stores/sqlite-blob.js +24 -0
  106. package/src/embeddings/strategies/source.js +14 -0
  107. package/src/embeddings/strategies/structured.js +43 -0
  108. package/src/embeddings/strategies/text-utils.js +43 -0
  109. package/src/errors.js +78 -0
  110. package/src/export.js +217 -520
  111. package/src/extractors/csharp.js +10 -2
  112. package/src/extractors/go.js +3 -1
  113. package/src/extractors/helpers.js +71 -0
  114. package/src/extractors/java.js +9 -2
  115. package/src/extractors/javascript.js +38 -1
  116. package/src/extractors/php.js +3 -1
  117. package/src/extractors/python.js +14 -3
  118. package/src/extractors/rust.js +3 -1
  119. package/src/graph/algorithms/bfs.js +49 -0
  120. package/src/graph/algorithms/centrality.js +16 -0
  121. package/src/graph/algorithms/index.js +5 -0
  122. package/src/graph/algorithms/louvain.js +26 -0
  123. package/src/graph/algorithms/shortest-path.js +41 -0
  124. package/src/graph/algorithms/tarjan.js +49 -0
  125. package/src/graph/builders/dependency.js +91 -0
  126. package/src/graph/builders/index.js +3 -0
  127. package/src/graph/builders/structure.js +40 -0
  128. package/src/graph/builders/temporal.js +33 -0
  129. package/src/graph/classifiers/index.js +2 -0
  130. package/src/graph/classifiers/risk.js +85 -0
  131. package/src/graph/classifiers/roles.js +64 -0
  132. package/src/graph/index.js +13 -0
  133. package/src/graph/model.js +230 -0
  134. package/src/index.js +33 -204
  135. package/src/infrastructure/result-formatter.js +2 -21
  136. package/src/mcp/index.js +2 -0
  137. package/src/mcp/middleware.js +26 -0
  138. package/src/mcp/server.js +128 -0
  139. package/src/mcp/tool-registry.js +801 -0
  140. package/src/mcp/tools/ast-query.js +14 -0
  141. package/src/mcp/tools/audit.js +21 -0
  142. package/src/mcp/tools/batch-query.js +11 -0
  143. package/src/mcp/tools/branch-compare.js +10 -0
  144. package/src/mcp/tools/cfg.js +21 -0
  145. package/src/mcp/tools/check.js +43 -0
  146. package/src/mcp/tools/co-changes.js +20 -0
  147. package/src/mcp/tools/code-owners.js +12 -0
  148. package/src/mcp/tools/communities.js +15 -0
  149. package/src/mcp/tools/complexity.js +18 -0
  150. package/src/mcp/tools/context.js +17 -0
  151. package/src/mcp/tools/dataflow.js +26 -0
  152. package/src/mcp/tools/diff-impact.js +24 -0
  153. package/src/mcp/tools/execution-flow.js +26 -0
  154. package/src/mcp/tools/export-graph.js +57 -0
  155. package/src/mcp/tools/file-deps.js +12 -0
  156. package/src/mcp/tools/file-exports.js +13 -0
  157. package/src/mcp/tools/find-cycles.js +15 -0
  158. package/src/mcp/tools/fn-impact.js +15 -0
  159. package/src/mcp/tools/impact-analysis.js +12 -0
  160. package/src/mcp/tools/index.js +71 -0
  161. package/src/mcp/tools/list-functions.js +14 -0
  162. package/src/mcp/tools/list-repos.js +11 -0
  163. package/src/mcp/tools/module-map.js +6 -0
  164. package/src/mcp/tools/node-roles.js +14 -0
  165. package/src/mcp/tools/path.js +12 -0
  166. package/src/mcp/tools/query.js +30 -0
  167. package/src/mcp/tools/semantic-search.js +65 -0
  168. package/src/mcp/tools/sequence.js +17 -0
  169. package/src/mcp/tools/structure.js +15 -0
  170. package/src/mcp/tools/symbol-children.js +14 -0
  171. package/src/mcp/tools/triage.js +35 -0
  172. package/src/mcp/tools/where.js +13 -0
  173. package/src/mcp.js +2 -1470
  174. package/src/native.js +34 -10
  175. package/src/parser.js +53 -2
  176. package/src/presentation/colors.js +44 -0
  177. package/src/presentation/export.js +444 -0
  178. package/src/presentation/result-formatter.js +21 -0
  179. package/src/presentation/sequence-renderer.js +43 -0
  180. package/src/presentation/table.js +47 -0
  181. package/src/presentation/viewer.js +634 -0
  182. package/src/queries.js +35 -2276
  183. package/src/resolve.js +1 -1
  184. package/src/sequence.js +2 -38
  185. package/src/shared/file-utils.js +153 -0
  186. package/src/shared/generators.js +125 -0
  187. package/src/shared/hierarchy.js +27 -0
  188. package/src/shared/normalize.js +59 -0
  189. package/src/snapshot.js +6 -5
  190. package/src/structure.js +15 -40
  191. package/src/triage.js +20 -72
  192. package/src/viewer.js +35 -656
  193. package/src/watcher.js +8 -148
  194. package/src/embedder.js +0 -1097
@@ -0,0 +1,218 @@
1
+ import { execFileSync } from 'node:child_process';
2
+ import { createInterface } from 'node:readline';
3
+ import { ConfigError, EngineError } from '../errors.js';
4
+ import { info } from '../logger.js';
5
+
6
+ // Lazy-load transformers (heavy, optional module)
7
+ let pipeline = null;
8
+ let extractor = null;
9
+ let activeModel = null;
10
+
11
+ export const MODELS = {
12
+ minilm: {
13
+ name: 'Xenova/all-MiniLM-L6-v2',
14
+ dim: 384,
15
+ contextWindow: 256,
16
+ desc: 'Smallest, fastest (~23MB). General text.',
17
+ quantized: true,
18
+ },
19
+ 'jina-small': {
20
+ name: 'Xenova/jina-embeddings-v2-small-en',
21
+ dim: 512,
22
+ contextWindow: 8192,
23
+ desc: 'Small, good quality (~33MB). General text.',
24
+ quantized: false,
25
+ },
26
+ 'jina-base': {
27
+ name: 'Xenova/jina-embeddings-v2-base-en',
28
+ dim: 768,
29
+ contextWindow: 8192,
30
+ desc: 'Good quality (~137MB). General text, 8192 token context.',
31
+ quantized: false,
32
+ },
33
+ 'jina-code': {
34
+ name: 'Xenova/jina-embeddings-v2-base-code',
35
+ dim: 768,
36
+ contextWindow: 8192,
37
+ desc: 'Code-aware (~137MB). Trained on code+text, best for code search.',
38
+ quantized: false,
39
+ },
40
+ nomic: {
41
+ name: 'Xenova/nomic-embed-text-v1',
42
+ dim: 768,
43
+ contextWindow: 8192,
44
+ desc: 'Good local quality (~137MB). 8192 context.',
45
+ quantized: false,
46
+ },
47
+ 'nomic-v1.5': {
48
+ name: 'nomic-ai/nomic-embed-text-v1.5',
49
+ dim: 768,
50
+ contextWindow: 8192,
51
+ desc: 'Improved nomic (~137MB). Matryoshka dimensions, 8192 context.',
52
+ quantized: false,
53
+ },
54
+ 'bge-large': {
55
+ name: 'Xenova/bge-large-en-v1.5',
56
+ dim: 1024,
57
+ contextWindow: 512,
58
+ desc: 'Best general retrieval (~335MB). Top MTEB scores.',
59
+ quantized: false,
60
+ },
61
+ };
62
+
63
+ export const EMBEDDING_STRATEGIES = ['structured', 'source'];
64
+
65
+ export const DEFAULT_MODEL = 'nomic-v1.5';
66
+ const BATCH_SIZE_MAP = {
67
+ minilm: 32,
68
+ 'jina-small': 16,
69
+ 'jina-base': 8,
70
+ 'jina-code': 8,
71
+ nomic: 8,
72
+ 'nomic-v1.5': 8,
73
+ 'bge-large': 4,
74
+ };
75
+ const DEFAULT_BATCH_SIZE = 32;
76
+
77
+ /** @internal Used by generator.js — not part of the public barrel. */
78
+ export function getModelConfig(modelKey) {
79
+ const key = modelKey || DEFAULT_MODEL;
80
+ const config = MODELS[key];
81
+ if (!config) {
82
+ throw new ConfigError(`Unknown model: ${key}. Available: ${Object.keys(MODELS).join(', ')}`);
83
+ }
84
+ return config;
85
+ }
86
+
87
+ /**
88
+ * Prompt the user to install a missing package interactively.
89
+ * Returns true if the package was installed, false otherwise.
90
+ * Skips the prompt entirely in non-TTY environments (CI, piped stdin).
91
+ * @internal Not part of the public barrel.
92
+ */
93
+ export function promptInstall(packageName) {
94
+ if (!process.stdin.isTTY) return Promise.resolve(false);
95
+
96
+ return new Promise((resolve) => {
97
+ const rl = createInterface({ input: process.stdin, output: process.stderr });
98
+ rl.question(`Semantic search requires ${packageName}. Install it now? [y/N] `, (answer) => {
99
+ rl.close();
100
+ if (answer.trim().toLowerCase() !== 'y') return resolve(false);
101
+ try {
102
+ execFileSync('npm', ['install', packageName], {
103
+ stdio: 'inherit',
104
+ timeout: 300_000,
105
+ });
106
+ resolve(true);
107
+ } catch {
108
+ resolve(false);
109
+ }
110
+ });
111
+ });
112
+ }
113
+
114
+ /**
115
+ * Lazy-load @huggingface/transformers.
116
+ * If the package is missing, prompts the user to install it interactively.
117
+ * In non-TTY environments, prints an error and exits.
118
+ * @internal Not part of the public barrel.
119
+ */
120
+ export async function loadTransformers() {
121
+ try {
122
+ return await import('@huggingface/transformers');
123
+ } catch {
124
+ const pkg = '@huggingface/transformers';
125
+ const installed = await promptInstall(pkg);
126
+ if (installed) {
127
+ try {
128
+ return await import(pkg);
129
+ } catch (loadErr) {
130
+ throw new EngineError(
131
+ `${pkg} was installed but failed to load. Please check your environment.`,
132
+ { cause: loadErr },
133
+ );
134
+ }
135
+ }
136
+ throw new EngineError(`Semantic search requires ${pkg}.\nInstall it with: npm install ${pkg}`);
137
+ }
138
+ }
139
+
140
+ /**
141
+ * Dispose the current ONNX session and free memory.
142
+ * Safe to call when no model is loaded (no-op).
143
+ */
144
+ export async function disposeModel() {
145
+ if (extractor) {
146
+ await extractor.dispose();
147
+ extractor = null;
148
+ }
149
+ activeModel = null;
150
+ }
151
+
152
+ async function loadModel(modelKey) {
153
+ const config = getModelConfig(modelKey);
154
+
155
+ if (extractor && activeModel === config.name) return { extractor, config };
156
+
157
+ // Dispose previous model before loading a different one
158
+ await disposeModel();
159
+
160
+ const transformers = await loadTransformers();
161
+ pipeline = transformers.pipeline;
162
+
163
+ info(`Loading embedding model: ${config.name} (${config.dim}d)...`);
164
+ const pipelineOpts = config.quantized ? { quantized: true } : {};
165
+ try {
166
+ extractor = await pipeline('feature-extraction', config.name, pipelineOpts);
167
+ } catch (err) {
168
+ const msg = err.message || String(err);
169
+ if (msg.includes('Unauthorized') || msg.includes('401') || msg.includes('gated')) {
170
+ throw new EngineError(
171
+ `Model "${config.name}" requires authentication.\n` +
172
+ `This model is gated on HuggingFace and needs an access token.\n\n` +
173
+ `Options:\n` +
174
+ ` 1. Set HF_TOKEN env var: export HF_TOKEN=hf_...\n` +
175
+ ` 2. Use a public model instead: codegraph embed --model minilm`,
176
+ { cause: err },
177
+ );
178
+ }
179
+ throw new EngineError(
180
+ `Failed to load model "${config.name}": ${msg}\n` +
181
+ `Try a different model: codegraph embed --model minilm`,
182
+ { cause: err },
183
+ );
184
+ }
185
+ activeModel = config.name;
186
+ info('Model loaded.');
187
+ return { extractor, config };
188
+ }
189
+
190
+ /**
191
+ * Generate embeddings for an array of texts.
192
+ */
193
+ export async function embed(texts, modelKey) {
194
+ const { extractor: ext, config } = await loadModel(modelKey);
195
+ const dim = config.dim;
196
+ const results = [];
197
+ const batchSize = BATCH_SIZE_MAP[modelKey || DEFAULT_MODEL] || DEFAULT_BATCH_SIZE;
198
+
199
+ for (let i = 0; i < texts.length; i += batchSize) {
200
+ const batch = texts.slice(i, i + batchSize);
201
+ const output = await ext(batch, { pooling: 'mean', normalize: true });
202
+
203
+ for (let j = 0; j < batch.length; j++) {
204
+ const start = j * dim;
205
+ const vec = new Float32Array(dim);
206
+ for (let k = 0; k < dim; k++) {
207
+ vec[k] = output.data[start + k];
208
+ }
209
+ results.push(vec);
210
+ }
211
+
212
+ if (texts.length > batchSize) {
213
+ process.stdout.write(` Embedded ${Math.min(i + batchSize, texts.length)}/${texts.length}\r`);
214
+ }
215
+ }
216
+
217
+ return { vectors: results, dim };
218
+ }
@@ -0,0 +1,151 @@
1
+ import { warn } from '../../logger.js';
2
+ import { hybridSearchData } from './hybrid.js';
3
+ import { ftsSearchData } from './keyword.js';
4
+ import { multiSearchData, searchData } from './semantic.js';
5
+
6
+ /**
7
+ * Search with mode support — CLI wrapper with multi-query detection.
8
+ * Modes: 'hybrid' (default), 'semantic', 'keyword'
9
+ */
10
+ export async function search(query, customDbPath, opts = {}) {
11
+ const mode = opts.mode || 'hybrid';
12
+
13
+ // Split by semicolons, trim, filter empties
14
+ const queries = query
15
+ .split(';')
16
+ .map((q) => q.trim())
17
+ .filter((q) => q.length > 0);
18
+
19
+ const kindIcon = (kind) => (kind === 'function' ? 'f' : kind === 'class' ? '*' : 'o');
20
+
21
+ // ─── Keyword-only mode ──────────────────────────────────────────────
22
+ if (mode === 'keyword') {
23
+ const singleQuery = queries.length === 1 ? queries[0] : query;
24
+ const data = ftsSearchData(singleQuery, customDbPath, opts);
25
+ if (!data) {
26
+ console.log('No FTS5 index found. Run `codegraph embed` to build the keyword index.');
27
+ return;
28
+ }
29
+
30
+ if (opts.json) {
31
+ console.log(JSON.stringify(data, null, 2));
32
+ return;
33
+ }
34
+
35
+ console.log(`\nKeyword search: "${singleQuery}" (BM25)\n`);
36
+ if (data.results.length === 0) {
37
+ console.log(' No results found.');
38
+ } else {
39
+ for (const r of data.results) {
40
+ console.log(
41
+ ` BM25 ${r.bm25Score.toFixed(2)} ${kindIcon(r.kind)} ${r.name} -- ${r.file}:${r.line}`,
42
+ );
43
+ }
44
+ }
45
+ console.log(`\n ${data.results.length} results shown\n`);
46
+ return;
47
+ }
48
+
49
+ // ─── Semantic-only mode ─────────────────────────────────────────────
50
+ if (mode === 'semantic') {
51
+ if (queries.length <= 1) {
52
+ const singleQuery = queries[0] || query;
53
+ const data = await searchData(singleQuery, customDbPath, opts);
54
+ if (!data) return;
55
+
56
+ if (opts.json) {
57
+ console.log(JSON.stringify(data, null, 2));
58
+ return;
59
+ }
60
+
61
+ console.log(`\nSemantic search: "${singleQuery}"\n`);
62
+ if (data.results.length === 0) {
63
+ console.log(' No results above threshold.');
64
+ } else {
65
+ for (const r of data.results) {
66
+ const bar = '#'.repeat(Math.round(r.similarity * 20));
67
+ console.log(` ${(r.similarity * 100).toFixed(1)}% ${bar}`);
68
+ console.log(` ${kindIcon(r.kind)} ${r.name} -- ${r.file}:${r.line}`);
69
+ }
70
+ }
71
+ console.log(`\n ${data.results.length} results shown\n`);
72
+ } else {
73
+ const data = await multiSearchData(queries, customDbPath, opts);
74
+ if (!data) return;
75
+
76
+ if (opts.json) {
77
+ console.log(JSON.stringify(data, null, 2));
78
+ return;
79
+ }
80
+
81
+ console.log(`\nMulti-query semantic search (RRF, k=${opts.rrfK || 60}):`);
82
+ for (let i = 0; i < queries.length; i++) console.log(` [${i + 1}] "${queries[i]}"`);
83
+ console.log();
84
+ if (data.results.length === 0) {
85
+ console.log(' No results above threshold.');
86
+ } else {
87
+ for (const r of data.results) {
88
+ console.log(
89
+ ` RRF ${r.rrf.toFixed(4)} ${kindIcon(r.kind)} ${r.name} -- ${r.file}:${r.line}`,
90
+ );
91
+ for (const qs of r.queryScores) {
92
+ const bar = '#'.repeat(Math.round(qs.similarity * 20));
93
+ console.log(
94
+ ` [${queries.indexOf(qs.query) + 1}] ${(qs.similarity * 100).toFixed(1)}% ${bar} (rank ${qs.rank})`,
95
+ );
96
+ }
97
+ }
98
+ }
99
+ console.log(`\n ${data.results.length} results shown\n`);
100
+ }
101
+ return;
102
+ }
103
+
104
+ // ─── Hybrid mode (default) ──────────────────────────────────────────
105
+ const data = await hybridSearchData(query, customDbPath, opts);
106
+
107
+ if (!data) {
108
+ // No FTS5 index — fall back to semantic-only
109
+ warn(
110
+ 'FTS5 index not found — using semantic search only. Re-run `codegraph embed` to enable hybrid mode.',
111
+ );
112
+ return search(query, customDbPath, { ...opts, mode: 'semantic' });
113
+ }
114
+
115
+ if (opts.json) {
116
+ console.log(JSON.stringify(data, null, 2));
117
+ return;
118
+ }
119
+
120
+ const rrfK = opts.rrfK || 60;
121
+ if (queries.length <= 1) {
122
+ const singleQuery = queries[0] || query;
123
+ console.log(`\nHybrid search: "${singleQuery}" (BM25 + semantic, RRF k=${rrfK})\n`);
124
+ } else {
125
+ console.log(`\nHybrid multi-query search (BM25 + semantic, RRF k=${rrfK}):`);
126
+ for (let i = 0; i < queries.length; i++) console.log(` [${i + 1}] "${queries[i]}"`);
127
+ console.log();
128
+ }
129
+
130
+ if (data.results.length === 0) {
131
+ console.log(' No results found.');
132
+ } else {
133
+ for (const r of data.results) {
134
+ console.log(
135
+ ` RRF ${r.rrf.toFixed(4)} ${kindIcon(r.kind)} ${r.name} -- ${r.file}:${r.line}`,
136
+ );
137
+ const parts = [];
138
+ if (r.bm25Rank != null) {
139
+ parts.push(`BM25: rank ${r.bm25Rank} (score ${r.bm25Score.toFixed(2)})`);
140
+ }
141
+ if (r.semanticRank != null) {
142
+ parts.push(`Semantic: rank ${r.semanticRank} (${(r.similarity * 100).toFixed(1)}%)`);
143
+ }
144
+ if (parts.length > 0) {
145
+ console.log(` ${parts.join(' | ')}`);
146
+ }
147
+ }
148
+ }
149
+
150
+ console.log(`\n ${data.results.length} results shown\n`);
151
+ }
@@ -0,0 +1,46 @@
1
+ /**
2
+ * Match a file path against a glob pattern.
3
+ * Supports *, **, and ? wildcards. Zero dependencies.
4
+ */
5
+ export function globMatch(filePath, pattern) {
6
+ // Normalize separators to forward slashes
7
+ const normalized = filePath.replace(/\\/g, '/');
8
+ // Escape regex specials except glob chars
9
+ let regex = pattern.replace(/\\/g, '/').replace(/[.+^${}()|[\]\\]/g, '\\$&');
10
+ // Replace ** first (matches any path segment), then * and ?
11
+ regex = regex.replace(/\*\*/g, '\0');
12
+ regex = regex.replace(/\*/g, '[^/]*');
13
+ regex = regex.replace(/\0/g, '.*');
14
+ regex = regex.replace(/\?/g, '[^/]');
15
+ try {
16
+ return new RegExp(`^${regex}$`).test(normalized);
17
+ } catch {
18
+ // Malformed pattern — fall back to substring match
19
+ return normalized.includes(pattern);
20
+ }
21
+ }
22
+
23
+ const TEST_PATTERN = /\.(test|spec)\.|__test__|__tests__|\.stories\./;
24
+
25
+ /**
26
+ * Apply post-query filters (glob pattern, noTests) to a set of rows.
27
+ * Mutates nothing — returns a new filtered array.
28
+ * @param {Array} rows - Rows with at least a `file` property
29
+ * @param {object} opts
30
+ * @param {string} [opts.filePattern] - Glob pattern (only applied if it contains glob chars)
31
+ * @param {boolean} [opts.noTests] - Exclude test/spec files
32
+ * @param {boolean} [opts.isGlob] - Pre-computed: does filePattern contain glob chars?
33
+ * @returns {Array}
34
+ */
35
+ export function applyFilters(rows, opts = {}) {
36
+ let filtered = rows;
37
+ const isGlob =
38
+ opts.isGlob !== undefined ? opts.isGlob : opts.filePattern && /[*?[\]]/.test(opts.filePattern);
39
+ if (isGlob) {
40
+ filtered = filtered.filter((row) => globMatch(row.file, opts.filePattern));
41
+ }
42
+ if (opts.noTests) {
43
+ filtered = filtered.filter((row) => !TEST_PATTERN.test(row.file));
44
+ }
45
+ return filtered;
46
+ }
@@ -0,0 +1,121 @@
1
+ import { openReadonlyOrFail } from '../../db.js';
2
+ import { hasFtsIndex } from '../stores/fts5.js';
3
+ import { ftsSearchData } from './keyword.js';
4
+ import { searchData } from './semantic.js';
5
+
6
+ /**
7
+ * Hybrid BM25 + semantic search with RRF fusion.
8
+ * Returns { results: [{ name, kind, file, line, rrf, bm25Score, bm25Rank, similarity, semanticRank }] }
9
+ * or null if no FTS5 index (caller should fall back to semantic-only).
10
+ */
11
+ export async function hybridSearchData(query, customDbPath, opts = {}) {
12
+ const limit = opts.limit || 15;
13
+ const k = opts.rrfK || 60;
14
+ const topK = (opts.limit || 15) * 5;
15
+
16
+ // Split semicolons for multi-query support
17
+ const queries =
18
+ typeof query === 'string'
19
+ ? query
20
+ .split(';')
21
+ .map((q) => q.trim())
22
+ .filter((q) => q.length > 0)
23
+ : [query];
24
+
25
+ // Check FTS5 availability first (sync, cheap)
26
+ const checkDb = openReadonlyOrFail(customDbPath);
27
+ const ftsAvailable = hasFtsIndex(checkDb);
28
+ checkDb.close();
29
+ if (!ftsAvailable) return null;
30
+
31
+ // Collect ranked lists: for each query, one BM25 list + one semantic list
32
+ const rankedLists = [];
33
+
34
+ for (const q of queries) {
35
+ // BM25 ranked list (sync)
36
+ const bm25Data = ftsSearchData(q, customDbPath, { ...opts, limit: topK });
37
+ if (bm25Data?.results) {
38
+ rankedLists.push(
39
+ bm25Data.results.map((r, idx) => ({
40
+ key: `${r.name}:${r.file}:${r.line}`,
41
+ rank: idx + 1,
42
+ source: 'bm25',
43
+ ...r,
44
+ })),
45
+ );
46
+ }
47
+
48
+ // Semantic ranked list (async)
49
+ const semData = await searchData(q, customDbPath, {
50
+ ...opts,
51
+ limit: topK,
52
+ minScore: opts.minScore || 0.2,
53
+ });
54
+ if (semData?.results) {
55
+ rankedLists.push(
56
+ semData.results.map((r, idx) => ({
57
+ key: `${r.name}:${r.file}:${r.line}`,
58
+ rank: idx + 1,
59
+ source: 'semantic',
60
+ ...r,
61
+ })),
62
+ );
63
+ }
64
+ }
65
+
66
+ // RRF fusion across all ranked lists
67
+ const fusionMap = new Map();
68
+ for (const list of rankedLists) {
69
+ for (const item of list) {
70
+ if (!fusionMap.has(item.key)) {
71
+ fusionMap.set(item.key, {
72
+ name: item.name,
73
+ kind: item.kind,
74
+ file: item.file,
75
+ line: item.line,
76
+ endLine: item.endLine ?? null,
77
+ role: item.role ?? null,
78
+ fileHash: item.fileHash ?? null,
79
+ rrfScore: 0,
80
+ bm25Score: null,
81
+ bm25Rank: null,
82
+ similarity: null,
83
+ semanticRank: null,
84
+ });
85
+ }
86
+ const entry = fusionMap.get(item.key);
87
+ entry.rrfScore += 1 / (k + item.rank);
88
+ if (item.source === 'bm25') {
89
+ if (entry.bm25Rank === null || item.rank < entry.bm25Rank) {
90
+ entry.bm25Score = item.bm25Score;
91
+ entry.bm25Rank = item.rank;
92
+ }
93
+ } else {
94
+ if (entry.semanticRank === null || item.rank < entry.semanticRank) {
95
+ entry.similarity = item.similarity;
96
+ entry.semanticRank = item.rank;
97
+ }
98
+ }
99
+ }
100
+ }
101
+
102
+ const results = [...fusionMap.values()]
103
+ .sort((a, b) => b.rrfScore - a.rrfScore)
104
+ .slice(0, limit)
105
+ .map((e) => ({
106
+ name: e.name,
107
+ kind: e.kind,
108
+ file: e.file,
109
+ line: e.line,
110
+ endLine: e.endLine,
111
+ role: e.role,
112
+ fileHash: e.fileHash,
113
+ rrf: e.rrfScore,
114
+ bm25Score: e.bm25Score,
115
+ bm25Rank: e.bm25Rank,
116
+ similarity: e.similarity,
117
+ semanticRank: e.semanticRank,
118
+ }));
119
+
120
+ return { results };
121
+ }
@@ -0,0 +1,68 @@
1
+ import { openReadonlyOrFail } from '../../db.js';
2
+ import { normalizeSymbol } from '../../queries.js';
3
+ import { hasFtsIndex, sanitizeFtsQuery } from '../stores/fts5.js';
4
+ import { applyFilters } from './filters.js';
5
+
6
+ /**
7
+ * BM25 keyword search via FTS5.
8
+ * Returns { results: [{ name, kind, file, line, bm25Score }] } or null if no FTS5 index.
9
+ */
10
+ export function ftsSearchData(query, customDbPath, opts = {}) {
11
+ const limit = opts.limit || 15;
12
+
13
+ const db = openReadonlyOrFail(customDbPath);
14
+
15
+ try {
16
+ if (!hasFtsIndex(db)) {
17
+ return null;
18
+ }
19
+
20
+ const ftsQuery = sanitizeFtsQuery(query);
21
+ if (!ftsQuery) {
22
+ return { results: [] };
23
+ }
24
+
25
+ let sql = `
26
+ SELECT f.rowid AS node_id, rank AS bm25_score,
27
+ n.name, n.kind, n.file, n.line, n.end_line, n.role
28
+ FROM fts_index f
29
+ JOIN nodes n ON f.rowid = n.id
30
+ WHERE fts_index MATCH ?
31
+ `;
32
+ const params = [ftsQuery];
33
+
34
+ if (opts.kind) {
35
+ sql += ' AND n.kind = ?';
36
+ params.push(opts.kind);
37
+ }
38
+
39
+ const isGlob = opts.filePattern && /[*?[\]]/.test(opts.filePattern);
40
+ if (opts.filePattern && !isGlob) {
41
+ sql += ' AND n.file LIKE ?';
42
+ params.push(`%${opts.filePattern}%`);
43
+ }
44
+
45
+ sql += ' ORDER BY rank LIMIT ?';
46
+ params.push(limit * 5); // fetch generous set for post-filtering
47
+
48
+ let rows;
49
+ try {
50
+ rows = db.prepare(sql).all(...params);
51
+ } catch {
52
+ // Invalid FTS5 query syntax — return empty
53
+ return { results: [] };
54
+ }
55
+
56
+ rows = applyFilters(rows, { ...opts, isGlob });
57
+
58
+ const hc = new Map();
59
+ const results = rows.slice(0, limit).map((row) => ({
60
+ ...normalizeSymbol(row, db, hc),
61
+ bm25Score: -row.bm25_score, // FTS5 rank is negative; negate for display
62
+ }));
63
+
64
+ return { results };
65
+ } finally {
66
+ db.close();
67
+ }
68
+ }
@@ -0,0 +1,66 @@
1
+ import { getEmbeddingCount, getEmbeddingMeta } from '../../db/repository/embeddings.js';
2
+ import { openReadonlyOrFail } from '../../db.js';
3
+ import { MODELS } from '../models.js';
4
+ import { applyFilters } from './filters.js';
5
+
6
+ /**
7
+ * Shared setup for search functions: opens DB, validates embeddings/model, loads rows.
8
+ * Returns { db, rows, modelKey, storedDim } or null on failure (prints error).
9
+ * On null return, the DB is closed. On exception, the DB is also closed
10
+ * (callers only need to close DB from the returned object on the happy path).
11
+ */
12
+ export function prepareSearch(customDbPath, opts = {}) {
13
+ const db = openReadonlyOrFail(customDbPath);
14
+
15
+ try {
16
+ const count = getEmbeddingCount(db);
17
+ if (count === 0) {
18
+ console.log('No embeddings found. Run `codegraph embed` first.');
19
+ db.close();
20
+ return null;
21
+ }
22
+
23
+ const storedModel = getEmbeddingMeta(db, 'model') || null;
24
+ const dimStr = getEmbeddingMeta(db, 'dim');
25
+ const storedDim = dimStr ? parseInt(dimStr, 10) : null;
26
+
27
+ let modelKey = opts.model || null;
28
+ if (!modelKey && storedModel) {
29
+ for (const [key, config] of Object.entries(MODELS)) {
30
+ if (config.name === storedModel) {
31
+ modelKey = key;
32
+ break;
33
+ }
34
+ }
35
+ }
36
+
37
+ // Pre-filter: allow filtering by kind or file pattern to reduce search space
38
+ const isGlob = opts.filePattern && /[*?[\]]/.test(opts.filePattern);
39
+ let sql = `
40
+ SELECT e.node_id, e.vector, e.text_preview, n.name, n.kind, n.file, n.line, n.end_line, n.role
41
+ FROM embeddings e
42
+ JOIN nodes n ON e.node_id = n.id
43
+ `;
44
+ const params = [];
45
+ const conditions = [];
46
+ if (opts.kind) {
47
+ conditions.push('n.kind = ?');
48
+ params.push(opts.kind);
49
+ }
50
+ if (opts.filePattern && !isGlob) {
51
+ conditions.push('n.file LIKE ?');
52
+ params.push(`%${opts.filePattern}%`);
53
+ }
54
+ if (conditions.length > 0) {
55
+ sql += ` WHERE ${conditions.join(' AND ')}`;
56
+ }
57
+
58
+ let rows = db.prepare(sql).all(...params);
59
+ rows = applyFilters(rows, { ...opts, isGlob });
60
+
61
+ return { db, rows, modelKey, storedDim };
62
+ } catch (err) {
63
+ db.close();
64
+ throw err;
65
+ }
66
+ }