@optave/codegraph 3.1.3 → 3.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (232) hide show
  1. package/README.md +38 -84
  2. package/package.json +13 -8
  3. package/src/ast-analysis/engine.js +32 -12
  4. package/src/ast-analysis/shared.js +6 -5
  5. package/src/cli/commands/ast.js +22 -0
  6. package/src/cli/commands/audit.js +45 -0
  7. package/src/cli/commands/batch.js +68 -0
  8. package/src/cli/commands/branch-compare.js +21 -0
  9. package/src/cli/commands/build.js +26 -0
  10. package/src/cli/commands/cfg.js +26 -0
  11. package/src/cli/commands/check.js +74 -0
  12. package/src/cli/commands/children.js +28 -0
  13. package/src/cli/commands/co-change.js +67 -0
  14. package/src/cli/commands/communities.js +19 -0
  15. package/src/cli/commands/complexity.js +46 -0
  16. package/src/cli/commands/context.js +30 -0
  17. package/src/cli/commands/cycles.js +32 -0
  18. package/src/cli/commands/dataflow.js +28 -0
  19. package/src/cli/commands/deps.js +12 -0
  20. package/src/cli/commands/diff-impact.js +26 -0
  21. package/src/cli/commands/embed.js +30 -0
  22. package/src/cli/commands/export.js +78 -0
  23. package/src/cli/commands/exports.js +14 -0
  24. package/src/cli/commands/flow.js +32 -0
  25. package/src/cli/commands/fn-impact.js +26 -0
  26. package/src/cli/commands/impact.js +12 -0
  27. package/src/cli/commands/info.js +76 -0
  28. package/src/cli/commands/map.js +19 -0
  29. package/src/cli/commands/mcp.js +18 -0
  30. package/src/cli/commands/models.js +19 -0
  31. package/src/cli/commands/owners.js +25 -0
  32. package/src/cli/commands/path.js +36 -0
  33. package/src/cli/commands/plot.js +89 -0
  34. package/src/cli/commands/query.js +45 -0
  35. package/src/cli/commands/registry.js +100 -0
  36. package/src/cli/commands/roles.js +30 -0
  37. package/src/cli/commands/search.js +42 -0
  38. package/src/cli/commands/sequence.js +28 -0
  39. package/src/cli/commands/snapshot.js +66 -0
  40. package/src/cli/commands/stats.js +15 -0
  41. package/src/cli/commands/structure.js +33 -0
  42. package/src/cli/commands/triage.js +78 -0
  43. package/src/cli/commands/watch.js +12 -0
  44. package/src/cli/commands/where.js +20 -0
  45. package/src/cli/index.js +124 -0
  46. package/src/cli/shared/open-graph.js +13 -0
  47. package/src/cli/shared/options.js +59 -0
  48. package/src/cli/shared/output.js +1 -0
  49. package/src/cli.js +11 -1522
  50. package/src/db/connection.js +130 -7
  51. package/src/{db.js → db/index.js} +17 -5
  52. package/src/db/migrations.js +42 -1
  53. package/src/db/query-builder.js +20 -12
  54. package/src/db/repository/base.js +201 -0
  55. package/src/db/repository/graph-read.js +7 -4
  56. package/src/db/repository/in-memory-repository.js +575 -0
  57. package/src/db/repository/index.js +5 -1
  58. package/src/db/repository/nodes.js +60 -6
  59. package/src/db/repository/sqlite-repository.js +219 -0
  60. package/src/domain/analysis/context.js +408 -0
  61. package/src/domain/analysis/dependencies.js +341 -0
  62. package/src/domain/analysis/exports.js +134 -0
  63. package/src/domain/analysis/impact.js +466 -0
  64. package/src/domain/analysis/module-map.js +322 -0
  65. package/src/domain/analysis/roles.js +45 -0
  66. package/src/domain/analysis/symbol-lookup.js +238 -0
  67. package/src/domain/graph/builder/context.js +85 -0
  68. package/src/domain/graph/builder/helpers.js +218 -0
  69. package/src/domain/graph/builder/incremental.js +178 -0
  70. package/src/domain/graph/builder/pipeline.js +130 -0
  71. package/src/domain/graph/builder/stages/build-edges.js +297 -0
  72. package/src/domain/graph/builder/stages/build-structure.js +113 -0
  73. package/src/domain/graph/builder/stages/collect-files.js +44 -0
  74. package/src/domain/graph/builder/stages/detect-changes.js +413 -0
  75. package/src/domain/graph/builder/stages/finalize.js +139 -0
  76. package/src/domain/graph/builder/stages/insert-nodes.js +195 -0
  77. package/src/domain/graph/builder/stages/parse-files.js +28 -0
  78. package/src/domain/graph/builder/stages/resolve-imports.js +143 -0
  79. package/src/domain/graph/builder/stages/run-analyses.js +44 -0
  80. package/src/domain/graph/builder.js +11 -0
  81. package/src/{change-journal.js → domain/graph/change-journal.js} +1 -1
  82. package/src/domain/graph/cycles.js +82 -0
  83. package/src/{journal.js → domain/graph/journal.js} +1 -1
  84. package/src/{resolve.js → domain/graph/resolve.js} +3 -3
  85. package/src/{watcher.js → domain/graph/watcher.js} +10 -150
  86. package/src/{parser.js → domain/parser.js} +5 -5
  87. package/src/domain/queries.js +48 -0
  88. package/src/domain/search/generator.js +163 -0
  89. package/src/domain/search/index.js +13 -0
  90. package/src/domain/search/models.js +218 -0
  91. package/src/domain/search/search/cli-formatter.js +151 -0
  92. package/src/domain/search/search/filters.js +46 -0
  93. package/src/domain/search/search/hybrid.js +121 -0
  94. package/src/domain/search/search/keyword.js +68 -0
  95. package/src/domain/search/search/prepare.js +66 -0
  96. package/src/domain/search/search/semantic.js +145 -0
  97. package/src/domain/search/stores/fts5.js +27 -0
  98. package/src/domain/search/stores/sqlite-blob.js +24 -0
  99. package/src/domain/search/strategies/source.js +14 -0
  100. package/src/domain/search/strategies/structured.js +43 -0
  101. package/src/domain/search/strategies/text-utils.js +43 -0
  102. package/src/extractors/csharp.js +10 -2
  103. package/src/extractors/go.js +3 -1
  104. package/src/extractors/helpers.js +71 -0
  105. package/src/extractors/java.js +9 -2
  106. package/src/extractors/javascript.js +39 -2
  107. package/src/extractors/php.js +3 -1
  108. package/src/extractors/python.js +14 -3
  109. package/src/extractors/rust.js +3 -1
  110. package/src/{ast.js → features/ast.js} +8 -8
  111. package/src/{audit.js → features/audit.js} +16 -44
  112. package/src/{batch.js → features/batch.js} +6 -5
  113. package/src/{boundaries.js → features/boundaries.js} +2 -2
  114. package/src/{branch-compare.js → features/branch-compare.js} +3 -3
  115. package/src/{cfg.js → features/cfg.js} +11 -12
  116. package/src/{check.js → features/check.js} +13 -30
  117. package/src/{cochange.js → features/cochange.js} +5 -5
  118. package/src/{communities.js → features/communities.js} +18 -90
  119. package/src/{complexity.js → features/complexity.js} +13 -13
  120. package/src/{dataflow.js → features/dataflow.js} +12 -13
  121. package/src/features/export.js +378 -0
  122. package/src/{flow.js → features/flow.js} +4 -4
  123. package/src/features/graph-enrichment.js +327 -0
  124. package/src/{manifesto.js → features/manifesto.js} +6 -6
  125. package/src/{owners.js → features/owners.js} +2 -2
  126. package/src/{sequence.js → features/sequence.js} +16 -52
  127. package/src/{snapshot.js → features/snapshot.js} +8 -7
  128. package/src/{structure.js → features/structure.js} +20 -45
  129. package/src/{triage.js → features/triage.js} +27 -79
  130. package/src/graph/algorithms/bfs.js +49 -0
  131. package/src/graph/algorithms/centrality.js +16 -0
  132. package/src/graph/algorithms/index.js +5 -0
  133. package/src/graph/algorithms/louvain.js +26 -0
  134. package/src/graph/algorithms/shortest-path.js +41 -0
  135. package/src/graph/algorithms/tarjan.js +49 -0
  136. package/src/graph/builders/dependency.js +110 -0
  137. package/src/graph/builders/index.js +3 -0
  138. package/src/graph/builders/structure.js +40 -0
  139. package/src/graph/builders/temporal.js +33 -0
  140. package/src/graph/classifiers/index.js +2 -0
  141. package/src/graph/classifiers/risk.js +85 -0
  142. package/src/graph/classifiers/roles.js +64 -0
  143. package/src/graph/index.js +13 -0
  144. package/src/graph/model.js +230 -0
  145. package/src/index.cjs +16 -0
  146. package/src/index.js +42 -219
  147. package/src/{native.js → infrastructure/native.js} +3 -1
  148. package/src/infrastructure/result-formatter.js +2 -21
  149. package/src/mcp/index.js +2 -0
  150. package/src/mcp/middleware.js +26 -0
  151. package/src/mcp/server.js +128 -0
  152. package/src/{mcp.js → mcp/tool-registry.js} +6 -675
  153. package/src/mcp/tools/ast-query.js +14 -0
  154. package/src/mcp/tools/audit.js +21 -0
  155. package/src/mcp/tools/batch-query.js +11 -0
  156. package/src/mcp/tools/branch-compare.js +12 -0
  157. package/src/mcp/tools/cfg.js +21 -0
  158. package/src/mcp/tools/check.js +43 -0
  159. package/src/mcp/tools/co-changes.js +20 -0
  160. package/src/mcp/tools/code-owners.js +12 -0
  161. package/src/mcp/tools/communities.js +15 -0
  162. package/src/mcp/tools/complexity.js +18 -0
  163. package/src/mcp/tools/context.js +17 -0
  164. package/src/mcp/tools/dataflow.js +26 -0
  165. package/src/mcp/tools/diff-impact.js +24 -0
  166. package/src/mcp/tools/execution-flow.js +26 -0
  167. package/src/mcp/tools/export-graph.js +57 -0
  168. package/src/mcp/tools/file-deps.js +12 -0
  169. package/src/mcp/tools/file-exports.js +13 -0
  170. package/src/mcp/tools/find-cycles.js +15 -0
  171. package/src/mcp/tools/fn-impact.js +15 -0
  172. package/src/mcp/tools/impact-analysis.js +12 -0
  173. package/src/mcp/tools/index.js +71 -0
  174. package/src/mcp/tools/list-functions.js +14 -0
  175. package/src/mcp/tools/list-repos.js +11 -0
  176. package/src/mcp/tools/module-map.js +6 -0
  177. package/src/mcp/tools/node-roles.js +14 -0
  178. package/src/mcp/tools/path.js +12 -0
  179. package/src/mcp/tools/query.js +30 -0
  180. package/src/mcp/tools/semantic-search.js +65 -0
  181. package/src/mcp/tools/sequence.js +17 -0
  182. package/src/mcp/tools/structure.js +15 -0
  183. package/src/mcp/tools/symbol-children.js +14 -0
  184. package/src/mcp/tools/triage.js +35 -0
  185. package/src/mcp/tools/where.js +13 -0
  186. package/src/{commands → presentation}/audit.js +2 -2
  187. package/src/{commands → presentation}/batch.js +1 -1
  188. package/src/{commands → presentation}/branch-compare.js +2 -2
  189. package/src/{commands → presentation}/cfg.js +1 -1
  190. package/src/{commands → presentation}/check.js +6 -6
  191. package/src/presentation/colors.js +44 -0
  192. package/src/{commands → presentation}/communities.js +1 -1
  193. package/src/{commands → presentation}/complexity.js +1 -1
  194. package/src/{commands → presentation}/dataflow.js +1 -1
  195. package/src/presentation/export.js +444 -0
  196. package/src/{commands → presentation}/flow.js +2 -2
  197. package/src/{commands → presentation}/manifesto.js +4 -4
  198. package/src/{commands → presentation}/owners.js +1 -1
  199. package/src/presentation/queries-cli/exports.js +46 -0
  200. package/src/presentation/queries-cli/impact.js +198 -0
  201. package/src/presentation/queries-cli/index.js +5 -0
  202. package/src/presentation/queries-cli/inspect.js +334 -0
  203. package/src/presentation/queries-cli/overview.js +197 -0
  204. package/src/presentation/queries-cli/path.js +58 -0
  205. package/src/presentation/queries-cli.js +27 -0
  206. package/src/{commands → presentation}/query.js +1 -1
  207. package/src/presentation/result-formatter.js +144 -0
  208. package/src/presentation/sequence-renderer.js +43 -0
  209. package/src/{commands → presentation}/sequence.js +2 -2
  210. package/src/{commands → presentation}/structure.js +2 -2
  211. package/src/presentation/table.js +47 -0
  212. package/src/{commands → presentation}/triage.js +1 -1
  213. package/src/{viewer.js → presentation/viewer.js} +68 -382
  214. package/src/{constants.js → shared/constants.js} +1 -1
  215. package/src/shared/errors.js +78 -0
  216. package/src/shared/file-utils.js +153 -0
  217. package/src/shared/generators.js +125 -0
  218. package/src/shared/hierarchy.js +27 -0
  219. package/src/shared/normalize.js +59 -0
  220. package/src/builder.js +0 -1486
  221. package/src/cycles.js +0 -137
  222. package/src/embedder.js +0 -1097
  223. package/src/export.js +0 -681
  224. package/src/queries-cli.js +0 -866
  225. package/src/queries.js +0 -2289
  226. /package/src/{config.js → infrastructure/config.js} +0 -0
  227. /package/src/{logger.js → infrastructure/logger.js} +0 -0
  228. /package/src/{registry.js → infrastructure/registry.js} +0 -0
  229. /package/src/{update-check.js → infrastructure/update-check.js} +0 -0
  230. /package/src/{commands → presentation}/cochange.js +0 -0
  231. /package/src/{kinds.js → shared/kinds.js} +0 -0
  232. /package/src/{paginate.js → shared/paginate.js} +0 -0
@@ -0,0 +1,151 @@
1
+ import { warn } from '../../../infrastructure/logger.js';
2
+ import { hybridSearchData } from './hybrid.js';
3
+ import { ftsSearchData } from './keyword.js';
4
+ import { multiSearchData, searchData } from './semantic.js';
5
+
6
+ /**
7
+ * Search with mode support — CLI wrapper with multi-query detection.
8
+ * Modes: 'hybrid' (default), 'semantic', 'keyword'
9
+ */
10
+ export async function search(query, customDbPath, opts = {}) {
11
+ const mode = opts.mode || 'hybrid';
12
+
13
+ // Split by semicolons, trim, filter empties
14
+ const queries = query
15
+ .split(';')
16
+ .map((q) => q.trim())
17
+ .filter((q) => q.length > 0);
18
+
19
+ const kindIcon = (kind) => (kind === 'function' ? 'f' : kind === 'class' ? '*' : 'o');
20
+
21
+ // ─── Keyword-only mode ──────────────────────────────────────────────
22
+ if (mode === 'keyword') {
23
+ const singleQuery = queries.length === 1 ? queries[0] : query;
24
+ const data = ftsSearchData(singleQuery, customDbPath, opts);
25
+ if (!data) {
26
+ console.log('No FTS5 index found. Run `codegraph embed` to build the keyword index.');
27
+ return;
28
+ }
29
+
30
+ if (opts.json) {
31
+ console.log(JSON.stringify(data, null, 2));
32
+ return;
33
+ }
34
+
35
+ console.log(`\nKeyword search: "${singleQuery}" (BM25)\n`);
36
+ if (data.results.length === 0) {
37
+ console.log(' No results found.');
38
+ } else {
39
+ for (const r of data.results) {
40
+ console.log(
41
+ ` BM25 ${r.bm25Score.toFixed(2)} ${kindIcon(r.kind)} ${r.name} -- ${r.file}:${r.line}`,
42
+ );
43
+ }
44
+ }
45
+ console.log(`\n ${data.results.length} results shown\n`);
46
+ return;
47
+ }
48
+
49
+ // ─── Semantic-only mode ─────────────────────────────────────────────
50
+ if (mode === 'semantic') {
51
+ if (queries.length <= 1) {
52
+ const singleQuery = queries[0] || query;
53
+ const data = await searchData(singleQuery, customDbPath, opts);
54
+ if (!data) return;
55
+
56
+ if (opts.json) {
57
+ console.log(JSON.stringify(data, null, 2));
58
+ return;
59
+ }
60
+
61
+ console.log(`\nSemantic search: "${singleQuery}"\n`);
62
+ if (data.results.length === 0) {
63
+ console.log(' No results above threshold.');
64
+ } else {
65
+ for (const r of data.results) {
66
+ const bar = '#'.repeat(Math.round(r.similarity * 20));
67
+ console.log(` ${(r.similarity * 100).toFixed(1)}% ${bar}`);
68
+ console.log(` ${kindIcon(r.kind)} ${r.name} -- ${r.file}:${r.line}`);
69
+ }
70
+ }
71
+ console.log(`\n ${data.results.length} results shown\n`);
72
+ } else {
73
+ const data = await multiSearchData(queries, customDbPath, opts);
74
+ if (!data) return;
75
+
76
+ if (opts.json) {
77
+ console.log(JSON.stringify(data, null, 2));
78
+ return;
79
+ }
80
+
81
+ console.log(`\nMulti-query semantic search (RRF, k=${opts.rrfK || 60}):`);
82
+ for (let i = 0; i < queries.length; i++) console.log(` [${i + 1}] "${queries[i]}"`);
83
+ console.log();
84
+ if (data.results.length === 0) {
85
+ console.log(' No results above threshold.');
86
+ } else {
87
+ for (const r of data.results) {
88
+ console.log(
89
+ ` RRF ${r.rrf.toFixed(4)} ${kindIcon(r.kind)} ${r.name} -- ${r.file}:${r.line}`,
90
+ );
91
+ for (const qs of r.queryScores) {
92
+ const bar = '#'.repeat(Math.round(qs.similarity * 20));
93
+ console.log(
94
+ ` [${queries.indexOf(qs.query) + 1}] ${(qs.similarity * 100).toFixed(1)}% ${bar} (rank ${qs.rank})`,
95
+ );
96
+ }
97
+ }
98
+ }
99
+ console.log(`\n ${data.results.length} results shown\n`);
100
+ }
101
+ return;
102
+ }
103
+
104
+ // ─── Hybrid mode (default) ──────────────────────────────────────────
105
+ const data = await hybridSearchData(query, customDbPath, opts);
106
+
107
+ if (!data) {
108
+ // No FTS5 index — fall back to semantic-only
109
+ warn(
110
+ 'FTS5 index not found — using semantic search only. Re-run `codegraph embed` to enable hybrid mode.',
111
+ );
112
+ return search(query, customDbPath, { ...opts, mode: 'semantic' });
113
+ }
114
+
115
+ if (opts.json) {
116
+ console.log(JSON.stringify(data, null, 2));
117
+ return;
118
+ }
119
+
120
+ const rrfK = opts.rrfK || 60;
121
+ if (queries.length <= 1) {
122
+ const singleQuery = queries[0] || query;
123
+ console.log(`\nHybrid search: "${singleQuery}" (BM25 + semantic, RRF k=${rrfK})\n`);
124
+ } else {
125
+ console.log(`\nHybrid multi-query search (BM25 + semantic, RRF k=${rrfK}):`);
126
+ for (let i = 0; i < queries.length; i++) console.log(` [${i + 1}] "${queries[i]}"`);
127
+ console.log();
128
+ }
129
+
130
+ if (data.results.length === 0) {
131
+ console.log(' No results found.');
132
+ } else {
133
+ for (const r of data.results) {
134
+ console.log(
135
+ ` RRF ${r.rrf.toFixed(4)} ${kindIcon(r.kind)} ${r.name} -- ${r.file}:${r.line}`,
136
+ );
137
+ const parts = [];
138
+ if (r.bm25Rank != null) {
139
+ parts.push(`BM25: rank ${r.bm25Rank} (score ${r.bm25Score.toFixed(2)})`);
140
+ }
141
+ if (r.semanticRank != null) {
142
+ parts.push(`Semantic: rank ${r.semanticRank} (${(r.similarity * 100).toFixed(1)}%)`);
143
+ }
144
+ if (parts.length > 0) {
145
+ console.log(` ${parts.join(' | ')}`);
146
+ }
147
+ }
148
+ }
149
+
150
+ console.log(`\n ${data.results.length} results shown\n`);
151
+ }
@@ -0,0 +1,46 @@
1
+ /**
2
+ * Match a file path against a glob pattern.
3
+ * Supports *, **, and ? wildcards. Zero dependencies.
4
+ */
5
+ export function globMatch(filePath, pattern) {
6
+ // Normalize separators to forward slashes
7
+ const normalized = filePath.replace(/\\/g, '/');
8
+ // Escape regex specials except glob chars
9
+ let regex = pattern.replace(/\\/g, '/').replace(/[.+^${}()|[\]\\]/g, '\\$&');
10
+ // Replace ** first (matches any path segment), then * and ?
11
+ regex = regex.replace(/\*\*/g, '\0');
12
+ regex = regex.replace(/\*/g, '[^/]*');
13
+ regex = regex.replace(/\0/g, '.*');
14
+ regex = regex.replace(/\?/g, '[^/]');
15
+ try {
16
+ return new RegExp(`^${regex}$`).test(normalized);
17
+ } catch {
18
+ // Malformed pattern — fall back to substring match
19
+ return normalized.includes(pattern);
20
+ }
21
+ }
22
+
23
+ const TEST_PATTERN = /\.(test|spec)\.|__test__|__tests__|\.stories\./;
24
+
25
+ /**
26
+ * Apply post-query filters (glob pattern, noTests) to a set of rows.
27
+ * Mutates nothing — returns a new filtered array.
28
+ * @param {Array} rows - Rows with at least a `file` property
29
+ * @param {object} opts
30
+ * @param {string} [opts.filePattern] - Glob pattern (only applied if it contains glob chars)
31
+ * @param {boolean} [opts.noTests] - Exclude test/spec files
32
+ * @param {boolean} [opts.isGlob] - Pre-computed: does filePattern contain glob chars?
33
+ * @returns {Array}
34
+ */
35
+ export function applyFilters(rows, opts = {}) {
36
+ let filtered = rows;
37
+ const isGlob =
38
+ opts.isGlob !== undefined ? opts.isGlob : opts.filePattern && /[*?[\]]/.test(opts.filePattern);
39
+ if (isGlob) {
40
+ filtered = filtered.filter((row) => globMatch(row.file, opts.filePattern));
41
+ }
42
+ if (opts.noTests) {
43
+ filtered = filtered.filter((row) => !TEST_PATTERN.test(row.file));
44
+ }
45
+ return filtered;
46
+ }
@@ -0,0 +1,121 @@
1
+ import { openReadonlyOrFail } from '../../../db/index.js';
2
+ import { hasFtsIndex } from '../stores/fts5.js';
3
+ import { ftsSearchData } from './keyword.js';
4
+ import { searchData } from './semantic.js';
5
+
6
+ /**
7
+ * Hybrid BM25 + semantic search with RRF fusion.
8
+ * Returns { results: [{ name, kind, file, line, rrf, bm25Score, bm25Rank, similarity, semanticRank }] }
9
+ * or null if no FTS5 index (caller should fall back to semantic-only).
10
+ */
11
+ export async function hybridSearchData(query, customDbPath, opts = {}) {
12
+ const limit = opts.limit || 15;
13
+ const k = opts.rrfK || 60;
14
+ const topK = (opts.limit || 15) * 5;
15
+
16
+ // Split semicolons for multi-query support
17
+ const queries =
18
+ typeof query === 'string'
19
+ ? query
20
+ .split(';')
21
+ .map((q) => q.trim())
22
+ .filter((q) => q.length > 0)
23
+ : [query];
24
+
25
+ // Check FTS5 availability first (sync, cheap)
26
+ const checkDb = openReadonlyOrFail(customDbPath);
27
+ const ftsAvailable = hasFtsIndex(checkDb);
28
+ checkDb.close();
29
+ if (!ftsAvailable) return null;
30
+
31
+ // Collect ranked lists: for each query, one BM25 list + one semantic list
32
+ const rankedLists = [];
33
+
34
+ for (const q of queries) {
35
+ // BM25 ranked list (sync)
36
+ const bm25Data = ftsSearchData(q, customDbPath, { ...opts, limit: topK });
37
+ if (bm25Data?.results) {
38
+ rankedLists.push(
39
+ bm25Data.results.map((r, idx) => ({
40
+ key: `${r.name}:${r.file}:${r.line}`,
41
+ rank: idx + 1,
42
+ source: 'bm25',
43
+ ...r,
44
+ })),
45
+ );
46
+ }
47
+
48
+ // Semantic ranked list (async)
49
+ const semData = await searchData(q, customDbPath, {
50
+ ...opts,
51
+ limit: topK,
52
+ minScore: opts.minScore || 0.2,
53
+ });
54
+ if (semData?.results) {
55
+ rankedLists.push(
56
+ semData.results.map((r, idx) => ({
57
+ key: `${r.name}:${r.file}:${r.line}`,
58
+ rank: idx + 1,
59
+ source: 'semantic',
60
+ ...r,
61
+ })),
62
+ );
63
+ }
64
+ }
65
+
66
+ // RRF fusion across all ranked lists
67
+ const fusionMap = new Map();
68
+ for (const list of rankedLists) {
69
+ for (const item of list) {
70
+ if (!fusionMap.has(item.key)) {
71
+ fusionMap.set(item.key, {
72
+ name: item.name,
73
+ kind: item.kind,
74
+ file: item.file,
75
+ line: item.line,
76
+ endLine: item.endLine ?? null,
77
+ role: item.role ?? null,
78
+ fileHash: item.fileHash ?? null,
79
+ rrfScore: 0,
80
+ bm25Score: null,
81
+ bm25Rank: null,
82
+ similarity: null,
83
+ semanticRank: null,
84
+ });
85
+ }
86
+ const entry = fusionMap.get(item.key);
87
+ entry.rrfScore += 1 / (k + item.rank);
88
+ if (item.source === 'bm25') {
89
+ if (entry.bm25Rank === null || item.rank < entry.bm25Rank) {
90
+ entry.bm25Score = item.bm25Score;
91
+ entry.bm25Rank = item.rank;
92
+ }
93
+ } else {
94
+ if (entry.semanticRank === null || item.rank < entry.semanticRank) {
95
+ entry.similarity = item.similarity;
96
+ entry.semanticRank = item.rank;
97
+ }
98
+ }
99
+ }
100
+ }
101
+
102
+ const results = [...fusionMap.values()]
103
+ .sort((a, b) => b.rrfScore - a.rrfScore)
104
+ .slice(0, limit)
105
+ .map((e) => ({
106
+ name: e.name,
107
+ kind: e.kind,
108
+ file: e.file,
109
+ line: e.line,
110
+ endLine: e.endLine,
111
+ role: e.role,
112
+ fileHash: e.fileHash,
113
+ rrf: e.rrfScore,
114
+ bm25Score: e.bm25Score,
115
+ bm25Rank: e.bm25Rank,
116
+ similarity: e.similarity,
117
+ semanticRank: e.semanticRank,
118
+ }));
119
+
120
+ return { results };
121
+ }
@@ -0,0 +1,68 @@
1
+ import { openReadonlyOrFail } from '../../../db/index.js';
2
+ import { normalizeSymbol } from '../../queries.js';
3
+ import { hasFtsIndex, sanitizeFtsQuery } from '../stores/fts5.js';
4
+ import { applyFilters } from './filters.js';
5
+
6
+ /**
7
+ * BM25 keyword search via FTS5.
8
+ * Returns { results: [{ name, kind, file, line, bm25Score }] } or null if no FTS5 index.
9
+ */
10
+ export function ftsSearchData(query, customDbPath, opts = {}) {
11
+ const limit = opts.limit || 15;
12
+
13
+ const db = openReadonlyOrFail(customDbPath);
14
+
15
+ try {
16
+ if (!hasFtsIndex(db)) {
17
+ return null;
18
+ }
19
+
20
+ const ftsQuery = sanitizeFtsQuery(query);
21
+ if (!ftsQuery) {
22
+ return { results: [] };
23
+ }
24
+
25
+ let sql = `
26
+ SELECT f.rowid AS node_id, rank AS bm25_score,
27
+ n.name, n.kind, n.file, n.line, n.end_line, n.role
28
+ FROM fts_index f
29
+ JOIN nodes n ON f.rowid = n.id
30
+ WHERE fts_index MATCH ?
31
+ `;
32
+ const params = [ftsQuery];
33
+
34
+ if (opts.kind) {
35
+ sql += ' AND n.kind = ?';
36
+ params.push(opts.kind);
37
+ }
38
+
39
+ const isGlob = opts.filePattern && /[*?[\]]/.test(opts.filePattern);
40
+ if (opts.filePattern && !isGlob) {
41
+ sql += ' AND n.file LIKE ?';
42
+ params.push(`%${opts.filePattern}%`);
43
+ }
44
+
45
+ sql += ' ORDER BY rank LIMIT ?';
46
+ params.push(limit * 5); // fetch generous set for post-filtering
47
+
48
+ let rows;
49
+ try {
50
+ rows = db.prepare(sql).all(...params);
51
+ } catch {
52
+ // Invalid FTS5 query syntax — return empty
53
+ return { results: [] };
54
+ }
55
+
56
+ rows = applyFilters(rows, { ...opts, isGlob });
57
+
58
+ const hc = new Map();
59
+ const results = rows.slice(0, limit).map((row) => ({
60
+ ...normalizeSymbol(row, db, hc),
61
+ bm25Score: -row.bm25_score, // FTS5 rank is negative; negate for display
62
+ }));
63
+
64
+ return { results };
65
+ } finally {
66
+ db.close();
67
+ }
68
+ }
@@ -0,0 +1,66 @@
1
+ import { openReadonlyOrFail } from '../../../db/index.js';
2
+ import { getEmbeddingCount, getEmbeddingMeta } from '../../../db/repository/embeddings.js';
3
+ import { MODELS } from '../models.js';
4
+ import { applyFilters } from './filters.js';
5
+
6
+ /**
7
+ * Shared setup for search functions: opens DB, validates embeddings/model, loads rows.
8
+ * Returns { db, rows, modelKey, storedDim } or null on failure (prints error).
9
+ * On null return, the DB is closed. On exception, the DB is also closed
10
+ * (callers only need to close DB from the returned object on the happy path).
11
+ */
12
+ export function prepareSearch(customDbPath, opts = {}) {
13
+ const db = openReadonlyOrFail(customDbPath);
14
+
15
+ try {
16
+ const count = getEmbeddingCount(db);
17
+ if (count === 0) {
18
+ console.log('No embeddings found. Run `codegraph embed` first.');
19
+ db.close();
20
+ return null;
21
+ }
22
+
23
+ const storedModel = getEmbeddingMeta(db, 'model') || null;
24
+ const dimStr = getEmbeddingMeta(db, 'dim');
25
+ const storedDim = dimStr ? parseInt(dimStr, 10) : null;
26
+
27
+ let modelKey = opts.model || null;
28
+ if (!modelKey && storedModel) {
29
+ for (const [key, config] of Object.entries(MODELS)) {
30
+ if (config.name === storedModel) {
31
+ modelKey = key;
32
+ break;
33
+ }
34
+ }
35
+ }
36
+
37
+ // Pre-filter: allow filtering by kind or file pattern to reduce search space
38
+ const isGlob = opts.filePattern && /[*?[\]]/.test(opts.filePattern);
39
+ let sql = `
40
+ SELECT e.node_id, e.vector, e.text_preview, n.name, n.kind, n.file, n.line, n.end_line, n.role
41
+ FROM embeddings e
42
+ JOIN nodes n ON e.node_id = n.id
43
+ `;
44
+ const params = [];
45
+ const conditions = [];
46
+ if (opts.kind) {
47
+ conditions.push('n.kind = ?');
48
+ params.push(opts.kind);
49
+ }
50
+ if (opts.filePattern && !isGlob) {
51
+ conditions.push('n.file LIKE ?');
52
+ params.push(`%${opts.filePattern}%`);
53
+ }
54
+ if (conditions.length > 0) {
55
+ sql += ` WHERE ${conditions.join(' AND ')}`;
56
+ }
57
+
58
+ let rows = db.prepare(sql).all(...params);
59
+ rows = applyFilters(rows, { ...opts, isGlob });
60
+
61
+ return { db, rows, modelKey, storedDim };
62
+ } catch (err) {
63
+ db.close();
64
+ throw err;
65
+ }
66
+ }
@@ -0,0 +1,145 @@
1
+ import { warn } from '../../../infrastructure/logger.js';
2
+ import { normalizeSymbol } from '../../queries.js';
3
+ import { embed } from '../models.js';
4
+ import { cosineSim } from '../stores/sqlite-blob.js';
5
+ import { prepareSearch } from './prepare.js';
6
+
7
+ /**
8
+ * Single-query semantic search — returns data instead of printing.
9
+ * Returns { results: [{ name, kind, file, line, similarity }] } or null on failure.
10
+ */
11
+ export async function searchData(query, customDbPath, opts = {}) {
12
+ const limit = opts.limit || 15;
13
+ const minScore = opts.minScore || 0.2;
14
+
15
+ const prepared = prepareSearch(customDbPath, opts);
16
+ if (!prepared) return null;
17
+ const { db, rows, modelKey, storedDim } = prepared;
18
+
19
+ try {
20
+ const {
21
+ vectors: [queryVec],
22
+ dim,
23
+ } = await embed([query], modelKey);
24
+
25
+ if (storedDim && dim !== storedDim) {
26
+ console.log(
27
+ `Warning: query model dimension (${dim}) doesn't match stored embeddings (${storedDim}).`,
28
+ );
29
+ console.log(` Re-run \`codegraph embed\` with the same model, or use --model to match.`);
30
+ return null;
31
+ }
32
+
33
+ const hc = new Map();
34
+ const results = [];
35
+ for (const row of rows) {
36
+ const vec = new Float32Array(new Uint8Array(row.vector).buffer);
37
+ const sim = cosineSim(queryVec, vec);
38
+
39
+ if (sim >= minScore) {
40
+ results.push({
41
+ ...normalizeSymbol(row, db, hc),
42
+ similarity: sim,
43
+ });
44
+ }
45
+ }
46
+
47
+ results.sort((a, b) => b.similarity - a.similarity);
48
+ return { results: results.slice(0, limit) };
49
+ } finally {
50
+ db.close();
51
+ }
52
+ }
53
+
54
+ /**
55
+ * Multi-query semantic search with Reciprocal Rank Fusion (RRF).
56
+ * Returns { results: [{ name, kind, file, line, rrf, queryScores }] } or null on failure.
57
+ */
58
+ export async function multiSearchData(queries, customDbPath, opts = {}) {
59
+ const limit = opts.limit || 15;
60
+ const minScore = opts.minScore || 0.2;
61
+ const k = opts.rrfK || 60;
62
+
63
+ const prepared = prepareSearch(customDbPath, opts);
64
+ if (!prepared) return null;
65
+ const { db, rows, modelKey, storedDim } = prepared;
66
+
67
+ try {
68
+ const { vectors: queryVecs, dim } = await embed(queries, modelKey);
69
+
70
+ // Warn about similar queries that may bias RRF results
71
+ const SIMILARITY_WARN_THRESHOLD = 0.85;
72
+ for (let i = 0; i < queryVecs.length; i++) {
73
+ for (let j = i + 1; j < queryVecs.length; j++) {
74
+ const sim = cosineSim(queryVecs[i], queryVecs[j]);
75
+ if (sim >= SIMILARITY_WARN_THRESHOLD) {
76
+ warn(
77
+ `Queries "${queries[i]}" and "${queries[j]}" are very similar ` +
78
+ `(${(sim * 100).toFixed(0)}% cosine similarity). ` +
79
+ `This may bias RRF results toward their shared matches. ` +
80
+ `Consider using more distinct queries.`,
81
+ );
82
+ }
83
+ }
84
+ }
85
+
86
+ if (storedDim && dim !== storedDim) {
87
+ console.log(
88
+ `Warning: query model dimension (${dim}) doesn't match stored embeddings (${storedDim}).`,
89
+ );
90
+ console.log(` Re-run \`codegraph embed\` with the same model, or use --model to match.`);
91
+ return null;
92
+ }
93
+
94
+ // Parse row vectors once
95
+ const rowVecs = rows.map((row) => new Float32Array(new Uint8Array(row.vector).buffer));
96
+
97
+ // For each query: compute similarities, filter by minScore, rank
98
+ const perQueryRanked = queries.map((_query, qi) => {
99
+ const scored = [];
100
+ for (let ri = 0; ri < rows.length; ri++) {
101
+ const sim = cosineSim(queryVecs[qi], rowVecs[ri]);
102
+ if (sim >= minScore) {
103
+ scored.push({ rowIndex: ri, similarity: sim });
104
+ }
105
+ }
106
+ scored.sort((a, b) => b.similarity - a.similarity);
107
+ // Assign 1-indexed ranks
108
+ return scored.map((item, rank) => ({ ...item, rank: rank + 1 }));
109
+ });
110
+
111
+ // Fuse results using RRF: for each unique row, sum 1/(k + rank_i) across queries
112
+ const fusionMap = new Map(); // rowIndex -> { rrfScore, queryScores[] }
113
+ for (let qi = 0; qi < queries.length; qi++) {
114
+ for (const item of perQueryRanked[qi]) {
115
+ if (!fusionMap.has(item.rowIndex)) {
116
+ fusionMap.set(item.rowIndex, { rrfScore: 0, queryScores: [] });
117
+ }
118
+ const entry = fusionMap.get(item.rowIndex);
119
+ entry.rrfScore += 1 / (k + item.rank);
120
+ entry.queryScores.push({
121
+ query: queries[qi],
122
+ similarity: item.similarity,
123
+ rank: item.rank,
124
+ });
125
+ }
126
+ }
127
+
128
+ // Build results sorted by RRF score
129
+ const hc = new Map();
130
+ const results = [];
131
+ for (const [rowIndex, entry] of fusionMap) {
132
+ const row = rows[rowIndex];
133
+ results.push({
134
+ ...normalizeSymbol(row, db, hc),
135
+ rrf: entry.rrfScore,
136
+ queryScores: entry.queryScores,
137
+ });
138
+ }
139
+
140
+ results.sort((a, b) => b.rrf - a.rrf);
141
+ return { results: results.slice(0, limit) };
142
+ } finally {
143
+ db.close();
144
+ }
145
+ }
@@ -0,0 +1,27 @@
1
+ /**
2
+ * Sanitize a user query for FTS5 MATCH syntax.
3
+ * Wraps each token as an implicit OR and escapes special FTS5 characters.
4
+ */
5
+ export function sanitizeFtsQuery(query) {
6
+ // Remove FTS5 special chars that could cause syntax errors
7
+ const cleaned = query.replace(/[*"():^{}~<>]/g, ' ').trim();
8
+ if (!cleaned) return null;
9
+ // Split into tokens, wrap with OR for multi-token queries
10
+ const tokens = cleaned.split(/\s+/).filter((t) => t.length > 0);
11
+ if (tokens.length === 0) return null;
12
+ if (tokens.length === 1) return `"${tokens[0]}"`;
13
+ return tokens.map((t) => `"${t}"`).join(' OR ');
14
+ }
15
+
16
+ /**
17
+ * Check if the FTS5 index exists in the database.
18
+ * Returns true if fts_index table exists and has rows, false otherwise.
19
+ */
20
+ export function hasFtsIndex(db) {
21
+ try {
22
+ const row = db.prepare('SELECT COUNT(*) as c FROM fts_index').get();
23
+ return row.c > 0;
24
+ } catch {
25
+ return false;
26
+ }
27
+ }
@@ -0,0 +1,24 @@
1
+ /**
2
+ * @typedef {object} VectorStore
3
+ * @property {(queryVec: Float32Array, rows: Array<{vector: Buffer}>) => Array<{index: number, score: number}>} search
4
+ * Score every row against a query vector and return scored indices.
5
+ *
6
+ * Future implementations (e.g. HNSW via `hnsw.js`) implement this same shape
7
+ * for approximate nearest-neighbor search.
8
+ */
9
+
10
+ /**
11
+ * Cosine similarity between two Float32Arrays.
12
+ */
13
+ export function cosineSim(a, b) {
14
+ let dot = 0,
15
+ normA = 0,
16
+ normB = 0;
17
+ for (let i = 0; i < a.length; i++) {
18
+ dot += a[i] * b[i];
19
+ normA += a[i] * a[i];
20
+ normB += b[i] * b[i];
21
+ }
22
+ const denom = Math.sqrt(normA) * Math.sqrt(normB);
23
+ return denom === 0 ? 0 : dot / denom;
24
+ }
@@ -0,0 +1,14 @@
1
+ import { splitIdentifier } from './text-utils.js';
2
+
3
+ /**
4
+ * Build raw source-code text for a symbol (original strategy).
5
+ */
6
+ export function buildSourceText(node, file, lines) {
7
+ const startLine = Math.max(0, node.line - 1);
8
+ const endLine = node.end_line
9
+ ? Math.min(lines.length, node.end_line)
10
+ : Math.min(lines.length, startLine + 15);
11
+ const context = lines.slice(startLine, endLine).join('\n');
12
+ const readable = splitIdentifier(node.name);
13
+ return `${node.kind} ${node.name} (${readable}) in ${file}\n${context}`;
14
+ }