@optave/codegraph 3.1.3 → 3.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (232) hide show
  1. package/README.md +38 -84
  2. package/package.json +13 -8
  3. package/src/ast-analysis/engine.js +32 -12
  4. package/src/ast-analysis/shared.js +6 -5
  5. package/src/cli/commands/ast.js +22 -0
  6. package/src/cli/commands/audit.js +45 -0
  7. package/src/cli/commands/batch.js +68 -0
  8. package/src/cli/commands/branch-compare.js +21 -0
  9. package/src/cli/commands/build.js +26 -0
  10. package/src/cli/commands/cfg.js +26 -0
  11. package/src/cli/commands/check.js +74 -0
  12. package/src/cli/commands/children.js +28 -0
  13. package/src/cli/commands/co-change.js +67 -0
  14. package/src/cli/commands/communities.js +19 -0
  15. package/src/cli/commands/complexity.js +46 -0
  16. package/src/cli/commands/context.js +30 -0
  17. package/src/cli/commands/cycles.js +32 -0
  18. package/src/cli/commands/dataflow.js +28 -0
  19. package/src/cli/commands/deps.js +12 -0
  20. package/src/cli/commands/diff-impact.js +26 -0
  21. package/src/cli/commands/embed.js +30 -0
  22. package/src/cli/commands/export.js +78 -0
  23. package/src/cli/commands/exports.js +14 -0
  24. package/src/cli/commands/flow.js +32 -0
  25. package/src/cli/commands/fn-impact.js +26 -0
  26. package/src/cli/commands/impact.js +12 -0
  27. package/src/cli/commands/info.js +76 -0
  28. package/src/cli/commands/map.js +19 -0
  29. package/src/cli/commands/mcp.js +18 -0
  30. package/src/cli/commands/models.js +19 -0
  31. package/src/cli/commands/owners.js +25 -0
  32. package/src/cli/commands/path.js +36 -0
  33. package/src/cli/commands/plot.js +89 -0
  34. package/src/cli/commands/query.js +45 -0
  35. package/src/cli/commands/registry.js +100 -0
  36. package/src/cli/commands/roles.js +30 -0
  37. package/src/cli/commands/search.js +42 -0
  38. package/src/cli/commands/sequence.js +28 -0
  39. package/src/cli/commands/snapshot.js +66 -0
  40. package/src/cli/commands/stats.js +15 -0
  41. package/src/cli/commands/structure.js +33 -0
  42. package/src/cli/commands/triage.js +78 -0
  43. package/src/cli/commands/watch.js +12 -0
  44. package/src/cli/commands/where.js +20 -0
  45. package/src/cli/index.js +124 -0
  46. package/src/cli/shared/open-graph.js +13 -0
  47. package/src/cli/shared/options.js +59 -0
  48. package/src/cli/shared/output.js +1 -0
  49. package/src/cli.js +11 -1522
  50. package/src/db/connection.js +130 -7
  51. package/src/{db.js → db/index.js} +17 -5
  52. package/src/db/migrations.js +42 -1
  53. package/src/db/query-builder.js +20 -12
  54. package/src/db/repository/base.js +201 -0
  55. package/src/db/repository/graph-read.js +7 -4
  56. package/src/db/repository/in-memory-repository.js +575 -0
  57. package/src/db/repository/index.js +5 -1
  58. package/src/db/repository/nodes.js +60 -6
  59. package/src/db/repository/sqlite-repository.js +219 -0
  60. package/src/domain/analysis/context.js +408 -0
  61. package/src/domain/analysis/dependencies.js +341 -0
  62. package/src/domain/analysis/exports.js +134 -0
  63. package/src/domain/analysis/impact.js +466 -0
  64. package/src/domain/analysis/module-map.js +322 -0
  65. package/src/domain/analysis/roles.js +45 -0
  66. package/src/domain/analysis/symbol-lookup.js +238 -0
  67. package/src/domain/graph/builder/context.js +85 -0
  68. package/src/domain/graph/builder/helpers.js +218 -0
  69. package/src/domain/graph/builder/incremental.js +178 -0
  70. package/src/domain/graph/builder/pipeline.js +130 -0
  71. package/src/domain/graph/builder/stages/build-edges.js +297 -0
  72. package/src/domain/graph/builder/stages/build-structure.js +113 -0
  73. package/src/domain/graph/builder/stages/collect-files.js +44 -0
  74. package/src/domain/graph/builder/stages/detect-changes.js +413 -0
  75. package/src/domain/graph/builder/stages/finalize.js +139 -0
  76. package/src/domain/graph/builder/stages/insert-nodes.js +195 -0
  77. package/src/domain/graph/builder/stages/parse-files.js +28 -0
  78. package/src/domain/graph/builder/stages/resolve-imports.js +143 -0
  79. package/src/domain/graph/builder/stages/run-analyses.js +44 -0
  80. package/src/domain/graph/builder.js +11 -0
  81. package/src/{change-journal.js → domain/graph/change-journal.js} +1 -1
  82. package/src/domain/graph/cycles.js +82 -0
  83. package/src/{journal.js → domain/graph/journal.js} +1 -1
  84. package/src/{resolve.js → domain/graph/resolve.js} +3 -3
  85. package/src/{watcher.js → domain/graph/watcher.js} +10 -150
  86. package/src/{parser.js → domain/parser.js} +5 -5
  87. package/src/domain/queries.js +48 -0
  88. package/src/domain/search/generator.js +163 -0
  89. package/src/domain/search/index.js +13 -0
  90. package/src/domain/search/models.js +218 -0
  91. package/src/domain/search/search/cli-formatter.js +151 -0
  92. package/src/domain/search/search/filters.js +46 -0
  93. package/src/domain/search/search/hybrid.js +121 -0
  94. package/src/domain/search/search/keyword.js +68 -0
  95. package/src/domain/search/search/prepare.js +66 -0
  96. package/src/domain/search/search/semantic.js +145 -0
  97. package/src/domain/search/stores/fts5.js +27 -0
  98. package/src/domain/search/stores/sqlite-blob.js +24 -0
  99. package/src/domain/search/strategies/source.js +14 -0
  100. package/src/domain/search/strategies/structured.js +43 -0
  101. package/src/domain/search/strategies/text-utils.js +43 -0
  102. package/src/extractors/csharp.js +10 -2
  103. package/src/extractors/go.js +3 -1
  104. package/src/extractors/helpers.js +71 -0
  105. package/src/extractors/java.js +9 -2
  106. package/src/extractors/javascript.js +39 -2
  107. package/src/extractors/php.js +3 -1
  108. package/src/extractors/python.js +14 -3
  109. package/src/extractors/rust.js +3 -1
  110. package/src/{ast.js → features/ast.js} +8 -8
  111. package/src/{audit.js → features/audit.js} +16 -44
  112. package/src/{batch.js → features/batch.js} +6 -5
  113. package/src/{boundaries.js → features/boundaries.js} +2 -2
  114. package/src/{branch-compare.js → features/branch-compare.js} +3 -3
  115. package/src/{cfg.js → features/cfg.js} +11 -12
  116. package/src/{check.js → features/check.js} +13 -30
  117. package/src/{cochange.js → features/cochange.js} +5 -5
  118. package/src/{communities.js → features/communities.js} +18 -90
  119. package/src/{complexity.js → features/complexity.js} +13 -13
  120. package/src/{dataflow.js → features/dataflow.js} +12 -13
  121. package/src/features/export.js +378 -0
  122. package/src/{flow.js → features/flow.js} +4 -4
  123. package/src/features/graph-enrichment.js +327 -0
  124. package/src/{manifesto.js → features/manifesto.js} +6 -6
  125. package/src/{owners.js → features/owners.js} +2 -2
  126. package/src/{sequence.js → features/sequence.js} +16 -52
  127. package/src/{snapshot.js → features/snapshot.js} +8 -7
  128. package/src/{structure.js → features/structure.js} +20 -45
  129. package/src/{triage.js → features/triage.js} +27 -79
  130. package/src/graph/algorithms/bfs.js +49 -0
  131. package/src/graph/algorithms/centrality.js +16 -0
  132. package/src/graph/algorithms/index.js +5 -0
  133. package/src/graph/algorithms/louvain.js +26 -0
  134. package/src/graph/algorithms/shortest-path.js +41 -0
  135. package/src/graph/algorithms/tarjan.js +49 -0
  136. package/src/graph/builders/dependency.js +110 -0
  137. package/src/graph/builders/index.js +3 -0
  138. package/src/graph/builders/structure.js +40 -0
  139. package/src/graph/builders/temporal.js +33 -0
  140. package/src/graph/classifiers/index.js +2 -0
  141. package/src/graph/classifiers/risk.js +85 -0
  142. package/src/graph/classifiers/roles.js +64 -0
  143. package/src/graph/index.js +13 -0
  144. package/src/graph/model.js +230 -0
  145. package/src/index.cjs +16 -0
  146. package/src/index.js +42 -219
  147. package/src/{native.js → infrastructure/native.js} +3 -1
  148. package/src/infrastructure/result-formatter.js +2 -21
  149. package/src/mcp/index.js +2 -0
  150. package/src/mcp/middleware.js +26 -0
  151. package/src/mcp/server.js +128 -0
  152. package/src/{mcp.js → mcp/tool-registry.js} +6 -675
  153. package/src/mcp/tools/ast-query.js +14 -0
  154. package/src/mcp/tools/audit.js +21 -0
  155. package/src/mcp/tools/batch-query.js +11 -0
  156. package/src/mcp/tools/branch-compare.js +12 -0
  157. package/src/mcp/tools/cfg.js +21 -0
  158. package/src/mcp/tools/check.js +43 -0
  159. package/src/mcp/tools/co-changes.js +20 -0
  160. package/src/mcp/tools/code-owners.js +12 -0
  161. package/src/mcp/tools/communities.js +15 -0
  162. package/src/mcp/tools/complexity.js +18 -0
  163. package/src/mcp/tools/context.js +17 -0
  164. package/src/mcp/tools/dataflow.js +26 -0
  165. package/src/mcp/tools/diff-impact.js +24 -0
  166. package/src/mcp/tools/execution-flow.js +26 -0
  167. package/src/mcp/tools/export-graph.js +57 -0
  168. package/src/mcp/tools/file-deps.js +12 -0
  169. package/src/mcp/tools/file-exports.js +13 -0
  170. package/src/mcp/tools/find-cycles.js +15 -0
  171. package/src/mcp/tools/fn-impact.js +15 -0
  172. package/src/mcp/tools/impact-analysis.js +12 -0
  173. package/src/mcp/tools/index.js +71 -0
  174. package/src/mcp/tools/list-functions.js +14 -0
  175. package/src/mcp/tools/list-repos.js +11 -0
  176. package/src/mcp/tools/module-map.js +6 -0
  177. package/src/mcp/tools/node-roles.js +14 -0
  178. package/src/mcp/tools/path.js +12 -0
  179. package/src/mcp/tools/query.js +30 -0
  180. package/src/mcp/tools/semantic-search.js +65 -0
  181. package/src/mcp/tools/sequence.js +17 -0
  182. package/src/mcp/tools/structure.js +15 -0
  183. package/src/mcp/tools/symbol-children.js +14 -0
  184. package/src/mcp/tools/triage.js +35 -0
  185. package/src/mcp/tools/where.js +13 -0
  186. package/src/{commands → presentation}/audit.js +2 -2
  187. package/src/{commands → presentation}/batch.js +1 -1
  188. package/src/{commands → presentation}/branch-compare.js +2 -2
  189. package/src/{commands → presentation}/cfg.js +1 -1
  190. package/src/{commands → presentation}/check.js +6 -6
  191. package/src/presentation/colors.js +44 -0
  192. package/src/{commands → presentation}/communities.js +1 -1
  193. package/src/{commands → presentation}/complexity.js +1 -1
  194. package/src/{commands → presentation}/dataflow.js +1 -1
  195. package/src/presentation/export.js +444 -0
  196. package/src/{commands → presentation}/flow.js +2 -2
  197. package/src/{commands → presentation}/manifesto.js +4 -4
  198. package/src/{commands → presentation}/owners.js +1 -1
  199. package/src/presentation/queries-cli/exports.js +46 -0
  200. package/src/presentation/queries-cli/impact.js +198 -0
  201. package/src/presentation/queries-cli/index.js +5 -0
  202. package/src/presentation/queries-cli/inspect.js +334 -0
  203. package/src/presentation/queries-cli/overview.js +197 -0
  204. package/src/presentation/queries-cli/path.js +58 -0
  205. package/src/presentation/queries-cli.js +27 -0
  206. package/src/{commands → presentation}/query.js +1 -1
  207. package/src/presentation/result-formatter.js +144 -0
  208. package/src/presentation/sequence-renderer.js +43 -0
  209. package/src/{commands → presentation}/sequence.js +2 -2
  210. package/src/{commands → presentation}/structure.js +2 -2
  211. package/src/presentation/table.js +47 -0
  212. package/src/{commands → presentation}/triage.js +1 -1
  213. package/src/{viewer.js → presentation/viewer.js} +68 -382
  214. package/src/{constants.js → shared/constants.js} +1 -1
  215. package/src/shared/errors.js +78 -0
  216. package/src/shared/file-utils.js +153 -0
  217. package/src/shared/generators.js +125 -0
  218. package/src/shared/hierarchy.js +27 -0
  219. package/src/shared/normalize.js +59 -0
  220. package/src/builder.js +0 -1486
  221. package/src/cycles.js +0 -137
  222. package/src/embedder.js +0 -1097
  223. package/src/export.js +0 -681
  224. package/src/queries-cli.js +0 -866
  225. package/src/queries.js +0 -2289
  226. /package/src/{config.js → infrastructure/config.js} +0 -0
  227. /package/src/{logger.js → infrastructure/logger.js} +0 -0
  228. /package/src/{registry.js → infrastructure/registry.js} +0 -0
  229. /package/src/{update-check.js → infrastructure/update-check.js} +0 -0
  230. /package/src/{commands → presentation}/cochange.js +0 -0
  231. /package/src/{kinds.js → shared/kinds.js} +0 -0
  232. /package/src/{paginate.js → shared/paginate.js} +0 -0
@@ -1,13 +1,13 @@
1
1
  import fs from 'node:fs';
2
2
  import path from 'node:path';
3
- import { readFileSafe } from './builder.js';
3
+ import { closeDb, getNodeId as getNodeIdQuery, initSchema, openDb } from '../../db/index.js';
4
+ import { info } from '../../infrastructure/logger.js';
5
+ import { EXTENSIONS, IGNORE_DIRS, normalizePath } from '../../shared/constants.js';
6
+ import { DbError } from '../../shared/errors.js';
7
+ import { createParseTreeCache, getActiveEngine } from '../parser.js';
8
+ import { rebuildFile } from './builder/incremental.js';
4
9
  import { appendChangeEvents, buildChangeEvent, diffSymbols } from './change-journal.js';
5
- import { EXTENSIONS, IGNORE_DIRS, normalizePath } from './constants.js';
6
- import { closeDb, getNodeId as getNodeIdQuery, initSchema, openDb } from './db.js';
7
10
  import { appendJournalEntries } from './journal.js';
8
- import { info, warn } from './logger.js';
9
- import { createParseTreeCache, getActiveEngine, parseFileIncremental } from './parser.js';
10
- import { resolveImportPath } from './resolve.js';
11
11
 
12
12
  function shouldIgnore(filePath) {
13
13
  const parts = filePath.split(path.sep);
@@ -18,152 +18,10 @@ function isTrackedExt(filePath) {
18
18
  return EXTENSIONS.has(path.extname(filePath));
19
19
  }
20
20
 
21
- /**
22
- * Parse a single file and update the database incrementally.
23
- */
24
- async function updateFile(_db, rootDir, filePath, stmts, engineOpts, cache) {
25
- const relPath = normalizePath(path.relative(rootDir, filePath));
26
-
27
- const oldNodes = stmts.countNodes.get(relPath)?.c || 0;
28
- const _oldEdges = stmts.countEdgesForFile.get(relPath)?.c || 0;
29
- const oldSymbols = stmts.listSymbols.all(relPath);
30
-
31
- stmts.deleteEdgesForFile.run(relPath);
32
- stmts.deleteNodes.run(relPath);
33
-
34
- if (!fs.existsSync(filePath)) {
35
- if (cache) cache.remove(filePath);
36
- const symbolDiff = diffSymbols(oldSymbols, []);
37
- return {
38
- file: relPath,
39
- nodesAdded: 0,
40
- nodesRemoved: oldNodes,
41
- edgesAdded: 0,
42
- deleted: true,
43
- event: 'deleted',
44
- symbolDiff,
45
- nodesBefore: oldNodes,
46
- nodesAfter: 0,
47
- };
48
- }
49
-
50
- let code;
51
- try {
52
- code = readFileSafe(filePath);
53
- } catch (err) {
54
- warn(`Cannot read ${relPath}: ${err.message}`);
55
- return null;
56
- }
57
-
58
- const symbols = await parseFileIncremental(cache, filePath, code, engineOpts);
59
- if (!symbols) return null;
60
-
61
- stmts.insertNode.run(relPath, 'file', relPath, 0, null);
62
-
63
- for (const def of symbols.definitions) {
64
- stmts.insertNode.run(def.name, def.kind, relPath, def.line, def.endLine || null);
65
- }
66
- for (const exp of symbols.exports) {
67
- stmts.insertNode.run(exp.name, exp.kind, relPath, exp.line, null);
68
- }
69
-
70
- const newNodes = stmts.countNodes.get(relPath)?.c || 0;
71
- const newSymbols = stmts.listSymbols.all(relPath);
72
-
73
- let edgesAdded = 0;
74
- const fileNodeRow = stmts.getNodeId.get(relPath, 'file', relPath, 0);
75
- if (!fileNodeRow)
76
- return { file: relPath, nodesAdded: newNodes, nodesRemoved: oldNodes, edgesAdded: 0 };
77
- const fileNodeId = fileNodeRow.id;
78
-
79
- // Load aliases for full import resolution
80
- const aliases = { baseUrl: null, paths: {} };
81
-
82
- for (const imp of symbols.imports) {
83
- const resolvedPath = resolveImportPath(
84
- path.join(rootDir, relPath),
85
- imp.source,
86
- rootDir,
87
- aliases,
88
- );
89
- const targetRow = stmts.getNodeId.get(resolvedPath, 'file', resolvedPath, 0);
90
- if (targetRow) {
91
- const edgeKind = imp.reexport ? 'reexports' : imp.typeOnly ? 'imports-type' : 'imports';
92
- stmts.insertEdge.run(fileNodeId, targetRow.id, edgeKind, 1.0, 0);
93
- edgesAdded++;
94
- }
95
- }
96
-
97
- const importedNames = new Map();
98
- for (const imp of symbols.imports) {
99
- const resolvedPath = resolveImportPath(
100
- path.join(rootDir, relPath),
101
- imp.source,
102
- rootDir,
103
- aliases,
104
- );
105
- for (const name of imp.names) {
106
- importedNames.set(name.replace(/^\*\s+as\s+/, ''), resolvedPath);
107
- }
108
- }
109
-
110
- for (const call of symbols.calls) {
111
- let caller = null;
112
- for (const def of symbols.definitions) {
113
- if (def.line <= call.line) {
114
- const row = stmts.getNodeId.get(def.name, def.kind, relPath, def.line);
115
- if (row) caller = row;
116
- }
117
- }
118
- if (!caller) caller = fileNodeRow;
119
-
120
- const importedFrom = importedNames.get(call.name);
121
- let targets;
122
- if (importedFrom) {
123
- targets = stmts.findNodeInFile.all(call.name, importedFrom);
124
- }
125
- if (!targets || targets.length === 0) {
126
- targets = stmts.findNodeInFile.all(call.name, relPath);
127
- if (targets.length === 0) {
128
- targets = stmts.findNodeByName.all(call.name);
129
- }
130
- }
131
-
132
- for (const t of targets) {
133
- if (t.id !== caller.id) {
134
- stmts.insertEdge.run(
135
- caller.id,
136
- t.id,
137
- 'calls',
138
- importedFrom ? 1.0 : 0.5,
139
- call.dynamic ? 1 : 0,
140
- );
141
- edgesAdded++;
142
- }
143
- }
144
- }
145
-
146
- const symbolDiff = diffSymbols(oldSymbols, newSymbols);
147
- const event = oldNodes === 0 ? 'added' : 'modified';
148
-
149
- return {
150
- file: relPath,
151
- nodesAdded: newNodes,
152
- nodesRemoved: oldNodes,
153
- edgesAdded,
154
- deleted: false,
155
- event,
156
- symbolDiff,
157
- nodesBefore: oldNodes,
158
- nodesAfter: newNodes,
159
- };
160
- }
161
-
162
21
  export async function watchProject(rootDir, opts = {}) {
163
22
  const dbPath = path.join(rootDir, '.codegraph', 'graph.db');
164
23
  if (!fs.existsSync(dbPath)) {
165
- console.error('No graph.db found. Run `codegraph build` first.');
166
- process.exit(1);
24
+ throw new DbError('No graph.db found. Run `codegraph build` first.', { file: dbPath });
167
25
  }
168
26
 
169
27
  const db = openDb(dbPath);
@@ -227,7 +85,9 @@ export async function watchProject(rootDir, opts = {}) {
227
85
 
228
86
  const results = [];
229
87
  for (const filePath of files) {
230
- const result = await updateFile(db, rootDir, filePath, stmts, engineOpts, cache);
88
+ const result = await rebuildFile(db, rootDir, filePath, stmts, engineOpts, cache, {
89
+ diffSymbols,
90
+ });
231
91
  if (result) results.push(result);
232
92
  }
233
93
  const updates = results;
@@ -2,8 +2,8 @@ import fs from 'node:fs';
2
2
  import path from 'node:path';
3
3
  import { fileURLToPath } from 'node:url';
4
4
  import { Language, Parser, Query } from 'web-tree-sitter';
5
- import { warn } from './logger.js';
6
- import { getNative, getNativePackageVersion, loadNative } from './native.js';
5
+ import { warn } from '../infrastructure/logger.js';
6
+ import { getNative, getNativePackageVersion, loadNative } from '../infrastructure/native.js';
7
7
 
8
8
  // Re-export all extractors for backward compatibility
9
9
  export {
@@ -16,7 +16,7 @@ export {
16
16
  extractRubySymbols,
17
17
  extractRustSymbols,
18
18
  extractSymbols,
19
- } from './extractors/index.js';
19
+ } from '../extractors/index.js';
20
20
 
21
21
  import {
22
22
  extractCSharpSymbols,
@@ -28,12 +28,12 @@ import {
28
28
  extractRubySymbols,
29
29
  extractRustSymbols,
30
30
  extractSymbols,
31
- } from './extractors/index.js';
31
+ } from '../extractors/index.js';
32
32
 
33
33
  const __dirname = path.dirname(fileURLToPath(import.meta.url));
34
34
 
35
35
  function grammarPath(name) {
36
- return path.join(__dirname, '..', 'grammars', name);
36
+ return path.join(__dirname, '..', '..', 'grammars', name);
37
37
  }
38
38
 
39
39
  let _initialized = false;
@@ -0,0 +1,48 @@
1
+ /**
2
+ * queries.js — Barrel re-export file.
3
+ *
4
+ * All query logic lives in the sub-modules under src/analysis/ and src/shared/.
5
+ * This file exists purely for backward compatibility so that all existing
6
+ * importers continue to work without changes.
7
+ */
8
+
9
+ // ── Re-export from dedicated module for backward compat ───────────────────
10
+ export { isTestFile, TEST_PATTERN } from '../infrastructure/test-filter.js';
11
+ export { iterListFunctions, iterRoles, iterWhere } from '../shared/generators.js';
12
+ // ── Kind/edge constants (canonical source: kinds.js) ─────────────────────
13
+ export {
14
+ ALL_SYMBOL_KINDS,
15
+ CORE_EDGE_KINDS,
16
+ CORE_SYMBOL_KINDS,
17
+ EVERY_EDGE_KIND,
18
+ EVERY_SYMBOL_KIND,
19
+ EXTENDED_SYMBOL_KINDS,
20
+ STRUCTURAL_EDGE_KINDS,
21
+ VALID_ROLES,
22
+ } from '../shared/kinds.js';
23
+ // ── Shared utilities ─────────────────────────────────────────────────────
24
+ export { kindIcon, normalizeSymbol } from '../shared/normalize.js';
25
+ export { contextData, explainData } from './analysis/context.js';
26
+ export { fileDepsData, fnDepsData, pathData } from './analysis/dependencies.js';
27
+ export { exportsData } from './analysis/exports.js';
28
+ export {
29
+ diffImpactData,
30
+ diffImpactMermaid,
31
+ fnImpactData,
32
+ impactAnalysisData,
33
+ } from './analysis/impact.js';
34
+ export {
35
+ FALSE_POSITIVE_CALLER_THRESHOLD,
36
+ FALSE_POSITIVE_NAMES,
37
+ moduleMapData,
38
+ statsData,
39
+ } from './analysis/module-map.js';
40
+ export { rolesData } from './analysis/roles.js';
41
+ // ── Analysis modules ─────────────────────────────────────────────────────
42
+ export {
43
+ childrenData,
44
+ findMatchingNodes,
45
+ listFunctionsData,
46
+ queryNameData,
47
+ whereData,
48
+ } from './analysis/symbol-lookup.js';
@@ -0,0 +1,163 @@
1
+ import fs from 'node:fs';
2
+ import path from 'node:path';
3
+ import { closeDb, findDbPath, openDb } from '../../db/index.js';
4
+ import { warn } from '../../infrastructure/logger.js';
5
+ import { DbError } from '../../shared/errors.js';
6
+ import { embed, getModelConfig } from './models.js';
7
+ import { buildSourceText } from './strategies/source.js';
8
+ import { buildStructuredText } from './strategies/structured.js';
9
+
10
+ /**
11
+ * Rough token estimate (~4 chars per token for code/English).
12
+ * Conservative — avoids adding a tokenizer dependency.
13
+ */
14
+ export function estimateTokens(text) {
15
+ return Math.ceil(text.length / 4);
16
+ }
17
+
18
+ function initEmbeddingsSchema(db) {
19
+ db.exec(`
20
+ CREATE TABLE IF NOT EXISTS embeddings (
21
+ node_id INTEGER PRIMARY KEY,
22
+ vector BLOB NOT NULL,
23
+ text_preview TEXT,
24
+ FOREIGN KEY(node_id) REFERENCES nodes(id)
25
+ );
26
+ CREATE TABLE IF NOT EXISTS embedding_meta (
27
+ key TEXT PRIMARY KEY,
28
+ value TEXT
29
+ );
30
+ `);
31
+
32
+ // Add full_text column (idempotent — ignore if already exists)
33
+ try {
34
+ db.exec('ALTER TABLE embeddings ADD COLUMN full_text TEXT');
35
+ } catch {
36
+ /* column already exists */
37
+ }
38
+
39
+ // FTS5 virtual table for BM25 keyword search
40
+ db.exec(`
41
+ CREATE VIRTUAL TABLE IF NOT EXISTS fts_index USING fts5(
42
+ name,
43
+ content,
44
+ tokenize='unicode61'
45
+ );
46
+ `);
47
+ }
48
+
49
+ /**
50
+ * Build embeddings for all functions/methods/classes in the graph.
51
+ * @param {string} rootDir - Project root directory
52
+ * @param {string} modelKey - Model identifier from MODELS registry
53
+ * @param {string} [customDbPath] - Override path to graph.db
54
+ * @param {object} [options] - Embedding options
55
+ * @param {string} [options.strategy='structured'] - 'structured' (graph-enriched) or 'source' (raw code)
56
+ */
57
+ export async function buildEmbeddings(rootDir, modelKey, customDbPath, options = {}) {
58
+ const strategy = options.strategy || 'structured';
59
+ const dbPath = customDbPath || findDbPath(null);
60
+
61
+ if (!fs.existsSync(dbPath)) {
62
+ throw new DbError(
63
+ `No codegraph database found at ${dbPath}.\nRun "codegraph build" first to analyze your codebase.`,
64
+ { file: dbPath },
65
+ );
66
+ }
67
+
68
+ const db = openDb(dbPath);
69
+ initEmbeddingsSchema(db);
70
+
71
+ db.exec('DELETE FROM embeddings');
72
+ db.exec('DELETE FROM embedding_meta');
73
+ db.exec('DELETE FROM fts_index');
74
+
75
+ const nodes = db
76
+ .prepare(
77
+ `SELECT * FROM nodes WHERE kind IN ('function', 'method', 'class') ORDER BY file, line`,
78
+ )
79
+ .all();
80
+
81
+ console.log(`Building embeddings for ${nodes.length} symbols (strategy: ${strategy})...`);
82
+
83
+ const byFile = new Map();
84
+ for (const node of nodes) {
85
+ if (!byFile.has(node.file)) byFile.set(node.file, []);
86
+ byFile.get(node.file).push(node);
87
+ }
88
+
89
+ const texts = [];
90
+ const nodeIds = [];
91
+ const nodeNames = [];
92
+ const previews = [];
93
+ const config = getModelConfig(modelKey);
94
+ const contextWindow = config.contextWindow;
95
+ let overflowCount = 0;
96
+
97
+ for (const [file, fileNodes] of byFile) {
98
+ const fullPath = path.join(rootDir, file);
99
+ let lines;
100
+ try {
101
+ lines = fs.readFileSync(fullPath, 'utf-8').split('\n');
102
+ } catch (err) {
103
+ warn(`Cannot read ${file} for embeddings: ${err.message}`);
104
+ continue;
105
+ }
106
+
107
+ for (const node of fileNodes) {
108
+ let text =
109
+ strategy === 'structured'
110
+ ? buildStructuredText(node, file, lines, db)
111
+ : buildSourceText(node, file, lines);
112
+
113
+ // Detect and handle context window overflow
114
+ const tokens = estimateTokens(text);
115
+ if (tokens > contextWindow) {
116
+ overflowCount++;
117
+ const maxChars = contextWindow * 4;
118
+ text = text.slice(0, maxChars);
119
+ }
120
+
121
+ texts.push(text);
122
+ nodeIds.push(node.id);
123
+ nodeNames.push(node.name);
124
+ previews.push(`${node.name} (${node.kind}) -- ${file}:${node.line}`);
125
+ }
126
+ }
127
+
128
+ if (overflowCount > 0) {
129
+ warn(
130
+ `${overflowCount} symbol(s) exceeded model context window (${contextWindow} tokens) and were truncated`,
131
+ );
132
+ }
133
+
134
+ console.log(`Embedding ${texts.length} symbols...`);
135
+ const { vectors, dim } = await embed(texts, modelKey);
136
+
137
+ const insert = db.prepare(
138
+ 'INSERT OR REPLACE INTO embeddings (node_id, vector, text_preview, full_text) VALUES (?, ?, ?, ?)',
139
+ );
140
+ const insertFts = db.prepare('INSERT INTO fts_index(rowid, name, content) VALUES (?, ?, ?)');
141
+ const insertMeta = db.prepare('INSERT OR REPLACE INTO embedding_meta (key, value) VALUES (?, ?)');
142
+ const insertAll = db.transaction(() => {
143
+ for (let i = 0; i < vectors.length; i++) {
144
+ insert.run(nodeIds[i], Buffer.from(vectors[i].buffer), previews[i], texts[i]);
145
+ insertFts.run(nodeIds[i], nodeNames[i], texts[i]);
146
+ }
147
+ insertMeta.run('model', config.name);
148
+ insertMeta.run('dim', String(dim));
149
+ insertMeta.run('count', String(vectors.length));
150
+ insertMeta.run('fts_count', String(vectors.length));
151
+ insertMeta.run('strategy', strategy);
152
+ insertMeta.run('built_at', new Date().toISOString());
153
+ if (overflowCount > 0) {
154
+ insertMeta.run('truncated_count', String(overflowCount));
155
+ }
156
+ });
157
+ insertAll();
158
+
159
+ console.log(
160
+ `\nStored ${vectors.length} embeddings (${dim}d, ${config.name}, strategy: ${strategy}) in graph.db`,
161
+ );
162
+ closeDb(db);
163
+ }
@@ -0,0 +1,13 @@
1
+ /**
2
+ * Embeddings subsystem — public API barrel.
3
+ *
4
+ * Re-exports everything consumers previously imported from `../embedder.js`.
5
+ */
6
+
7
+ export { buildEmbeddings, estimateTokens } from './generator.js';
8
+ export { DEFAULT_MODEL, disposeModel, EMBEDDING_STRATEGIES, embed, MODELS } from './models.js';
9
+ export { search } from './search/cli-formatter.js';
10
+ export { hybridSearchData } from './search/hybrid.js';
11
+ export { ftsSearchData } from './search/keyword.js';
12
+ export { multiSearchData, searchData } from './search/semantic.js';
13
+ export { cosineSim } from './stores/sqlite-blob.js';
@@ -0,0 +1,218 @@
1
+ import { execFileSync } from 'node:child_process';
2
+ import { createInterface } from 'node:readline';
3
+ import { info } from '../../infrastructure/logger.js';
4
+ import { ConfigError, EngineError } from '../../shared/errors.js';
5
+
6
+ // Lazy-load transformers (heavy, optional module)
7
+ let pipeline = null;
8
+ let extractor = null;
9
+ let activeModel = null;
10
+
11
+ export const MODELS = {
12
+ minilm: {
13
+ name: 'Xenova/all-MiniLM-L6-v2',
14
+ dim: 384,
15
+ contextWindow: 256,
16
+ desc: 'Smallest, fastest (~23MB). General text.',
17
+ quantized: true,
18
+ },
19
+ 'jina-small': {
20
+ name: 'Xenova/jina-embeddings-v2-small-en',
21
+ dim: 512,
22
+ contextWindow: 8192,
23
+ desc: 'Small, good quality (~33MB). General text.',
24
+ quantized: false,
25
+ },
26
+ 'jina-base': {
27
+ name: 'Xenova/jina-embeddings-v2-base-en',
28
+ dim: 768,
29
+ contextWindow: 8192,
30
+ desc: 'Good quality (~137MB). General text, 8192 token context.',
31
+ quantized: false,
32
+ },
33
+ 'jina-code': {
34
+ name: 'Xenova/jina-embeddings-v2-base-code',
35
+ dim: 768,
36
+ contextWindow: 8192,
37
+ desc: 'Code-aware (~137MB). Trained on code+text, best for code search.',
38
+ quantized: false,
39
+ },
40
+ nomic: {
41
+ name: 'Xenova/nomic-embed-text-v1',
42
+ dim: 768,
43
+ contextWindow: 8192,
44
+ desc: 'Good local quality (~137MB). 8192 context.',
45
+ quantized: false,
46
+ },
47
+ 'nomic-v1.5': {
48
+ name: 'nomic-ai/nomic-embed-text-v1.5',
49
+ dim: 768,
50
+ contextWindow: 8192,
51
+ desc: 'Improved nomic (~137MB). Matryoshka dimensions, 8192 context.',
52
+ quantized: false,
53
+ },
54
+ 'bge-large': {
55
+ name: 'Xenova/bge-large-en-v1.5',
56
+ dim: 1024,
57
+ contextWindow: 512,
58
+ desc: 'Best general retrieval (~335MB). Top MTEB scores.',
59
+ quantized: false,
60
+ },
61
+ };
62
+
63
+ export const EMBEDDING_STRATEGIES = ['structured', 'source'];
64
+
65
+ export const DEFAULT_MODEL = 'nomic-v1.5';
66
+ const BATCH_SIZE_MAP = {
67
+ minilm: 32,
68
+ 'jina-small': 16,
69
+ 'jina-base': 8,
70
+ 'jina-code': 8,
71
+ nomic: 8,
72
+ 'nomic-v1.5': 8,
73
+ 'bge-large': 4,
74
+ };
75
+ const DEFAULT_BATCH_SIZE = 32;
76
+
77
+ /** @internal Used by generator.js — not part of the public barrel. */
78
+ export function getModelConfig(modelKey) {
79
+ const key = modelKey || DEFAULT_MODEL;
80
+ const config = MODELS[key];
81
+ if (!config) {
82
+ throw new ConfigError(`Unknown model: ${key}. Available: ${Object.keys(MODELS).join(', ')}`);
83
+ }
84
+ return config;
85
+ }
86
+
87
+ /**
88
+ * Prompt the user to install a missing package interactively.
89
+ * Returns true if the package was installed, false otherwise.
90
+ * Skips the prompt entirely in non-TTY environments (CI, piped stdin).
91
+ * @internal Not part of the public barrel.
92
+ */
93
+ export function promptInstall(packageName) {
94
+ if (!process.stdin.isTTY) return Promise.resolve(false);
95
+
96
+ return new Promise((resolve) => {
97
+ const rl = createInterface({ input: process.stdin, output: process.stderr });
98
+ rl.question(`Semantic search requires ${packageName}. Install it now? [y/N] `, (answer) => {
99
+ rl.close();
100
+ if (answer.trim().toLowerCase() !== 'y') return resolve(false);
101
+ try {
102
+ execFileSync('npm', ['install', packageName], {
103
+ stdio: 'inherit',
104
+ timeout: 300_000,
105
+ });
106
+ resolve(true);
107
+ } catch {
108
+ resolve(false);
109
+ }
110
+ });
111
+ });
112
+ }
113
+
114
+ /**
115
+ * Lazy-load @huggingface/transformers.
116
+ * If the package is missing, prompts the user to install it interactively.
117
+ * In non-TTY environments, prints an error and exits.
118
+ * @internal Not part of the public barrel.
119
+ */
120
+ export async function loadTransformers() {
121
+ try {
122
+ return await import('@huggingface/transformers');
123
+ } catch {
124
+ const pkg = '@huggingface/transformers';
125
+ const installed = await promptInstall(pkg);
126
+ if (installed) {
127
+ try {
128
+ return await import(pkg);
129
+ } catch (loadErr) {
130
+ throw new EngineError(
131
+ `${pkg} was installed but failed to load. Please check your environment.`,
132
+ { cause: loadErr },
133
+ );
134
+ }
135
+ }
136
+ throw new EngineError(`Semantic search requires ${pkg}.\nInstall it with: npm install ${pkg}`);
137
+ }
138
+ }
139
+
140
+ /**
141
+ * Dispose the current ONNX session and free memory.
142
+ * Safe to call when no model is loaded (no-op).
143
+ */
144
+ export async function disposeModel() {
145
+ if (extractor) {
146
+ await extractor.dispose();
147
+ extractor = null;
148
+ }
149
+ activeModel = null;
150
+ }
151
+
152
+ async function loadModel(modelKey) {
153
+ const config = getModelConfig(modelKey);
154
+
155
+ if (extractor && activeModel === config.name) return { extractor, config };
156
+
157
+ // Dispose previous model before loading a different one
158
+ await disposeModel();
159
+
160
+ const transformers = await loadTransformers();
161
+ pipeline = transformers.pipeline;
162
+
163
+ info(`Loading embedding model: ${config.name} (${config.dim}d)...`);
164
+ const pipelineOpts = config.quantized ? { quantized: true } : {};
165
+ try {
166
+ extractor = await pipeline('feature-extraction', config.name, pipelineOpts);
167
+ } catch (err) {
168
+ const msg = err.message || String(err);
169
+ if (msg.includes('Unauthorized') || msg.includes('401') || msg.includes('gated')) {
170
+ throw new EngineError(
171
+ `Model "${config.name}" requires authentication.\n` +
172
+ `This model is gated on HuggingFace and needs an access token.\n\n` +
173
+ `Options:\n` +
174
+ ` 1. Set HF_TOKEN env var: export HF_TOKEN=hf_...\n` +
175
+ ` 2. Use a public model instead: codegraph embed --model minilm`,
176
+ { cause: err },
177
+ );
178
+ }
179
+ throw new EngineError(
180
+ `Failed to load model "${config.name}": ${msg}\n` +
181
+ `Try a different model: codegraph embed --model minilm`,
182
+ { cause: err },
183
+ );
184
+ }
185
+ activeModel = config.name;
186
+ info('Model loaded.');
187
+ return { extractor, config };
188
+ }
189
+
190
+ /**
191
+ * Generate embeddings for an array of texts.
192
+ */
193
+ export async function embed(texts, modelKey) {
194
+ const { extractor: ext, config } = await loadModel(modelKey);
195
+ const dim = config.dim;
196
+ const results = [];
197
+ const batchSize = BATCH_SIZE_MAP[modelKey || DEFAULT_MODEL] || DEFAULT_BATCH_SIZE;
198
+
199
+ for (let i = 0; i < texts.length; i += batchSize) {
200
+ const batch = texts.slice(i, i + batchSize);
201
+ const output = await ext(batch, { pooling: 'mean', normalize: true });
202
+
203
+ for (let j = 0; j < batch.length; j++) {
204
+ const start = j * dim;
205
+ const vec = new Float32Array(dim);
206
+ for (let k = 0; k < dim; k++) {
207
+ vec[k] = output.data[start + k];
208
+ }
209
+ results.push(vec);
210
+ }
211
+
212
+ if (texts.length > batchSize) {
213
+ process.stdout.write(` Embedded ${Math.min(i + batchSize, texts.length)}/${texts.length}\r`);
214
+ }
215
+ }
216
+
217
+ return { vectors: results, dim };
218
+ }