@optave/codegraph 3.1.2 → 3.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. package/README.md +19 -21
  2. package/package.json +10 -7
  3. package/src/analysis/context.js +408 -0
  4. package/src/analysis/dependencies.js +341 -0
  5. package/src/analysis/exports.js +130 -0
  6. package/src/analysis/impact.js +463 -0
  7. package/src/analysis/module-map.js +322 -0
  8. package/src/analysis/roles.js +45 -0
  9. package/src/analysis/symbol-lookup.js +232 -0
  10. package/src/ast-analysis/shared.js +5 -4
  11. package/src/batch.js +2 -1
  12. package/src/builder/context.js +85 -0
  13. package/src/builder/helpers.js +218 -0
  14. package/src/builder/incremental.js +178 -0
  15. package/src/builder/pipeline.js +130 -0
  16. package/src/builder/stages/build-edges.js +297 -0
  17. package/src/builder/stages/build-structure.js +113 -0
  18. package/src/builder/stages/collect-files.js +44 -0
  19. package/src/builder/stages/detect-changes.js +413 -0
  20. package/src/builder/stages/finalize.js +139 -0
  21. package/src/builder/stages/insert-nodes.js +195 -0
  22. package/src/builder/stages/parse-files.js +28 -0
  23. package/src/builder/stages/resolve-imports.js +143 -0
  24. package/src/builder/stages/run-analyses.js +44 -0
  25. package/src/builder.js +10 -1472
  26. package/src/cfg.js +1 -2
  27. package/src/cli/commands/ast.js +26 -0
  28. package/src/cli/commands/audit.js +46 -0
  29. package/src/cli/commands/batch.js +68 -0
  30. package/src/cli/commands/branch-compare.js +21 -0
  31. package/src/cli/commands/build.js +26 -0
  32. package/src/cli/commands/cfg.js +30 -0
  33. package/src/cli/commands/check.js +79 -0
  34. package/src/cli/commands/children.js +31 -0
  35. package/src/cli/commands/co-change.js +65 -0
  36. package/src/cli/commands/communities.js +23 -0
  37. package/src/cli/commands/complexity.js +45 -0
  38. package/src/cli/commands/context.js +34 -0
  39. package/src/cli/commands/cycles.js +28 -0
  40. package/src/cli/commands/dataflow.js +32 -0
  41. package/src/cli/commands/deps.js +16 -0
  42. package/src/cli/commands/diff-impact.js +30 -0
  43. package/src/cli/commands/embed.js +30 -0
  44. package/src/cli/commands/export.js +75 -0
  45. package/src/cli/commands/exports.js +18 -0
  46. package/src/cli/commands/flow.js +36 -0
  47. package/src/cli/commands/fn-impact.js +30 -0
  48. package/src/cli/commands/impact.js +16 -0
  49. package/src/cli/commands/info.js +76 -0
  50. package/src/cli/commands/map.js +19 -0
  51. package/src/cli/commands/mcp.js +18 -0
  52. package/src/cli/commands/models.js +19 -0
  53. package/src/cli/commands/owners.js +25 -0
  54. package/src/cli/commands/path.js +36 -0
  55. package/src/cli/commands/plot.js +80 -0
  56. package/src/cli/commands/query.js +49 -0
  57. package/src/cli/commands/registry.js +100 -0
  58. package/src/cli/commands/roles.js +34 -0
  59. package/src/cli/commands/search.js +42 -0
  60. package/src/cli/commands/sequence.js +32 -0
  61. package/src/cli/commands/snapshot.js +61 -0
  62. package/src/cli/commands/stats.js +15 -0
  63. package/src/cli/commands/structure.js +32 -0
  64. package/src/cli/commands/triage.js +78 -0
  65. package/src/cli/commands/watch.js +12 -0
  66. package/src/cli/commands/where.js +24 -0
  67. package/src/cli/index.js +118 -0
  68. package/src/cli/shared/options.js +39 -0
  69. package/src/cli/shared/output.js +1 -0
  70. package/src/cli.js +11 -1514
  71. package/src/commands/check.js +5 -5
  72. package/src/commands/manifesto.js +3 -3
  73. package/src/commands/structure.js +1 -1
  74. package/src/communities.js +15 -87
  75. package/src/complexity.js +1 -1
  76. package/src/cycles.js +30 -85
  77. package/src/dataflow.js +1 -2
  78. package/src/db/connection.js +4 -4
  79. package/src/db/migrations.js +41 -0
  80. package/src/db/query-builder.js +6 -5
  81. package/src/db/repository/base.js +201 -0
  82. package/src/db/repository/cached-stmt.js +19 -0
  83. package/src/db/repository/cfg.js +27 -38
  84. package/src/db/repository/cochange.js +16 -3
  85. package/src/db/repository/complexity.js +11 -6
  86. package/src/db/repository/dataflow.js +6 -1
  87. package/src/db/repository/edges.js +120 -98
  88. package/src/db/repository/embeddings.js +14 -3
  89. package/src/db/repository/graph-read.js +32 -9
  90. package/src/db/repository/in-memory-repository.js +584 -0
  91. package/src/db/repository/index.js +6 -1
  92. package/src/db/repository/nodes.js +110 -40
  93. package/src/db/repository/sqlite-repository.js +219 -0
  94. package/src/db.js +5 -0
  95. package/src/embeddings/generator.js +163 -0
  96. package/src/embeddings/index.js +13 -0
  97. package/src/embeddings/models.js +218 -0
  98. package/src/embeddings/search/cli-formatter.js +151 -0
  99. package/src/embeddings/search/filters.js +46 -0
  100. package/src/embeddings/search/hybrid.js +121 -0
  101. package/src/embeddings/search/keyword.js +68 -0
  102. package/src/embeddings/search/prepare.js +66 -0
  103. package/src/embeddings/search/semantic.js +145 -0
  104. package/src/embeddings/stores/fts5.js +27 -0
  105. package/src/embeddings/stores/sqlite-blob.js +24 -0
  106. package/src/embeddings/strategies/source.js +14 -0
  107. package/src/embeddings/strategies/structured.js +43 -0
  108. package/src/embeddings/strategies/text-utils.js +43 -0
  109. package/src/errors.js +78 -0
  110. package/src/export.js +217 -520
  111. package/src/extractors/csharp.js +10 -2
  112. package/src/extractors/go.js +3 -1
  113. package/src/extractors/helpers.js +71 -0
  114. package/src/extractors/java.js +9 -2
  115. package/src/extractors/javascript.js +38 -1
  116. package/src/extractors/php.js +3 -1
  117. package/src/extractors/python.js +14 -3
  118. package/src/extractors/rust.js +3 -1
  119. package/src/graph/algorithms/bfs.js +49 -0
  120. package/src/graph/algorithms/centrality.js +16 -0
  121. package/src/graph/algorithms/index.js +5 -0
  122. package/src/graph/algorithms/louvain.js +26 -0
  123. package/src/graph/algorithms/shortest-path.js +41 -0
  124. package/src/graph/algorithms/tarjan.js +49 -0
  125. package/src/graph/builders/dependency.js +91 -0
  126. package/src/graph/builders/index.js +3 -0
  127. package/src/graph/builders/structure.js +40 -0
  128. package/src/graph/builders/temporal.js +33 -0
  129. package/src/graph/classifiers/index.js +2 -0
  130. package/src/graph/classifiers/risk.js +85 -0
  131. package/src/graph/classifiers/roles.js +64 -0
  132. package/src/graph/index.js +13 -0
  133. package/src/graph/model.js +230 -0
  134. package/src/index.js +33 -204
  135. package/src/infrastructure/result-formatter.js +2 -21
  136. package/src/mcp/index.js +2 -0
  137. package/src/mcp/middleware.js +26 -0
  138. package/src/mcp/server.js +128 -0
  139. package/src/mcp/tool-registry.js +801 -0
  140. package/src/mcp/tools/ast-query.js +14 -0
  141. package/src/mcp/tools/audit.js +21 -0
  142. package/src/mcp/tools/batch-query.js +11 -0
  143. package/src/mcp/tools/branch-compare.js +10 -0
  144. package/src/mcp/tools/cfg.js +21 -0
  145. package/src/mcp/tools/check.js +43 -0
  146. package/src/mcp/tools/co-changes.js +20 -0
  147. package/src/mcp/tools/code-owners.js +12 -0
  148. package/src/mcp/tools/communities.js +15 -0
  149. package/src/mcp/tools/complexity.js +18 -0
  150. package/src/mcp/tools/context.js +17 -0
  151. package/src/mcp/tools/dataflow.js +26 -0
  152. package/src/mcp/tools/diff-impact.js +24 -0
  153. package/src/mcp/tools/execution-flow.js +26 -0
  154. package/src/mcp/tools/export-graph.js +57 -0
  155. package/src/mcp/tools/file-deps.js +12 -0
  156. package/src/mcp/tools/file-exports.js +13 -0
  157. package/src/mcp/tools/find-cycles.js +15 -0
  158. package/src/mcp/tools/fn-impact.js +15 -0
  159. package/src/mcp/tools/impact-analysis.js +12 -0
  160. package/src/mcp/tools/index.js +71 -0
  161. package/src/mcp/tools/list-functions.js +14 -0
  162. package/src/mcp/tools/list-repos.js +11 -0
  163. package/src/mcp/tools/module-map.js +6 -0
  164. package/src/mcp/tools/node-roles.js +14 -0
  165. package/src/mcp/tools/path.js +12 -0
  166. package/src/mcp/tools/query.js +30 -0
  167. package/src/mcp/tools/semantic-search.js +65 -0
  168. package/src/mcp/tools/sequence.js +17 -0
  169. package/src/mcp/tools/structure.js +15 -0
  170. package/src/mcp/tools/symbol-children.js +14 -0
  171. package/src/mcp/tools/triage.js +35 -0
  172. package/src/mcp/tools/where.js +13 -0
  173. package/src/mcp.js +2 -1470
  174. package/src/native.js +34 -10
  175. package/src/parser.js +53 -2
  176. package/src/presentation/colors.js +44 -0
  177. package/src/presentation/export.js +444 -0
  178. package/src/presentation/result-formatter.js +21 -0
  179. package/src/presentation/sequence-renderer.js +43 -0
  180. package/src/presentation/table.js +47 -0
  181. package/src/presentation/viewer.js +634 -0
  182. package/src/queries.js +35 -2276
  183. package/src/resolve.js +1 -1
  184. package/src/sequence.js +2 -38
  185. package/src/shared/file-utils.js +153 -0
  186. package/src/shared/generators.js +125 -0
  187. package/src/shared/hierarchy.js +27 -0
  188. package/src/shared/normalize.js +59 -0
  189. package/src/snapshot.js +6 -5
  190. package/src/structure.js +15 -40
  191. package/src/triage.js +20 -72
  192. package/src/viewer.js +35 -656
  193. package/src/watcher.js +8 -148
  194. package/src/embedder.js +0 -1097
@@ -1,5 +1,7 @@
1
+ import { ConfigError } from '../../errors.js';
1
2
  import { EVERY_SYMBOL_KIND, VALID_ROLES } from '../../kinds.js';
2
3
  import { NodeQuery } from '../query-builder.js';
4
+ import { cachedStmt } from './cached-stmt.js';
3
5
 
4
6
  // ─── Query-builder based lookups (moved from src/db/repository.js) ─────
5
7
 
@@ -36,10 +38,12 @@ export function findNodesWithFanIn(db, namePattern, opts = {}) {
36
38
  */
37
39
  export function findNodesForTriage(db, opts = {}) {
38
40
  if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) {
39
- throw new Error(`Invalid kind: ${opts.kind} (expected one of ${EVERY_SYMBOL_KIND.join(', ')})`);
41
+ throw new ConfigError(
42
+ `Invalid kind: ${opts.kind} (expected one of ${EVERY_SYMBOL_KIND.join(', ')})`,
43
+ );
40
44
  }
41
45
  if (opts.role && !VALID_ROLES.includes(opts.role)) {
42
- throw new Error(`Invalid role: ${opts.role} (expected one of ${VALID_ROLES.join(', ')})`);
46
+ throw new ConfigError(`Invalid role: ${opts.role} (expected one of ${VALID_ROLES.join(', ')})`);
43
47
  }
44
48
 
45
49
  const kindsToUse = opts.kind ? [opts.kind] : ['function', 'method', 'class'];
@@ -100,13 +104,27 @@ export function iterateFunctionNodes(db, opts = {}) {
100
104
  return _functionNodeQuery(opts).iterate(db);
101
105
  }
102
106
 
107
+ // ─── Statement caches (one prepared statement per db instance) ────────────
108
+ // WeakMap keys on the db object so statements are GC'd when the db closes.
109
+ const _countNodesStmt = new WeakMap();
110
+ const _countEdgesStmt = new WeakMap();
111
+ const _countFilesStmt = new WeakMap();
112
+ const _findNodeByIdStmt = new WeakMap();
113
+ const _findNodesByFileStmt = new WeakMap();
114
+ const _findFileNodesStmt = new WeakMap();
115
+ const _getNodeIdStmt = new WeakMap();
116
+ const _getFunctionNodeIdStmt = new WeakMap();
117
+ const _bulkNodeIdsByFileStmt = new WeakMap();
118
+ const _findNodeChildrenStmt = new WeakMap();
119
+ const _findNodeByQualifiedNameStmt = new WeakMap();
120
+
103
121
  /**
104
122
  * Count total nodes.
105
123
  * @param {object} db
106
124
  * @returns {number}
107
125
  */
108
126
  export function countNodes(db) {
109
- return db.prepare('SELECT COUNT(*) AS cnt FROM nodes').get().cnt;
127
+ return cachedStmt(_countNodesStmt, db, 'SELECT COUNT(*) AS cnt FROM nodes').get().cnt;
110
128
  }
111
129
 
112
130
  /**
@@ -115,7 +133,7 @@ export function countNodes(db) {
115
133
  * @returns {number}
116
134
  */
117
135
  export function countEdges(db) {
118
- return db.prepare('SELECT COUNT(*) AS cnt FROM edges').get().cnt;
136
+ return cachedStmt(_countEdgesStmt, db, 'SELECT COUNT(*) AS cnt FROM edges').get().cnt;
119
137
  }
120
138
 
121
139
  /**
@@ -124,7 +142,7 @@ export function countEdges(db) {
124
142
  * @returns {number}
125
143
  */
126
144
  export function countFiles(db) {
127
- return db.prepare('SELECT COUNT(DISTINCT file) AS cnt FROM nodes').get().cnt;
145
+ return cachedStmt(_countFilesStmt, db, 'SELECT COUNT(DISTINCT file) AS cnt FROM nodes').get().cnt;
128
146
  }
129
147
 
130
148
  // ─── Shared node lookups ───────────────────────────────────────────────
@@ -136,7 +154,7 @@ export function countFiles(db) {
136
154
  * @returns {object|undefined}
137
155
  */
138
156
  export function findNodeById(db, id) {
139
- return db.prepare('SELECT * FROM nodes WHERE id = ?').get(id);
157
+ return cachedStmt(_findNodeByIdStmt, db, 'SELECT * FROM nodes WHERE id = ?').get(id);
140
158
  }
141
159
 
142
160
  /**
@@ -146,9 +164,11 @@ export function findNodeById(db, id) {
146
164
  * @returns {object[]}
147
165
  */
148
166
  export function findNodesByFile(db, file) {
149
- return db
150
- .prepare("SELECT * FROM nodes WHERE file = ? AND kind != 'file' ORDER BY line")
151
- .all(file);
167
+ return cachedStmt(
168
+ _findNodesByFileStmt,
169
+ db,
170
+ "SELECT * FROM nodes WHERE file = ? AND kind != 'file' ORDER BY line",
171
+ ).all(file);
152
172
  }
153
173
 
154
174
  /**
@@ -158,15 +178,13 @@ export function findNodesByFile(db, file) {
158
178
  * @returns {object[]}
159
179
  */
160
180
  export function findFileNodes(db, fileLike) {
161
- return db.prepare("SELECT * FROM nodes WHERE file LIKE ? AND kind = 'file'").all(fileLike);
181
+ return cachedStmt(
182
+ _findFileNodesStmt,
183
+ db,
184
+ "SELECT * FROM nodes WHERE file LIKE ? AND kind = 'file'",
185
+ ).all(fileLike);
162
186
  }
163
187
 
164
- // ─── Statement caches (one prepared statement per db instance) ────────────
165
- // WeakMap keys on the db object so statements are GC'd when the db closes.
166
- const _getNodeIdStmt = new WeakMap();
167
- const _getFunctionNodeIdStmt = new WeakMap();
168
- const _bulkNodeIdsByFileStmt = new WeakMap();
169
-
170
188
  /**
171
189
  * Look up a node's ID by its unique (name, kind, file, line) tuple.
172
190
  * Shared by builder, watcher, structure, complexity, cfg, engine.
@@ -178,12 +196,11 @@ const _bulkNodeIdsByFileStmt = new WeakMap();
178
196
  * @returns {number|undefined}
179
197
  */
180
198
  export function getNodeId(db, name, kind, file, line) {
181
- let stmt = _getNodeIdStmt.get(db);
182
- if (!stmt) {
183
- stmt = db.prepare('SELECT id FROM nodes WHERE name = ? AND kind = ? AND file = ? AND line = ?');
184
- _getNodeIdStmt.set(db, stmt);
185
- }
186
- return stmt.get(name, kind, file, line)?.id;
199
+ return cachedStmt(
200
+ _getNodeIdStmt,
201
+ db,
202
+ 'SELECT id FROM nodes WHERE name = ? AND kind = ? AND file = ? AND line = ?',
203
+ ).get(name, kind, file, line)?.id;
187
204
  }
188
205
 
189
206
  /**
@@ -196,14 +213,11 @@ export function getNodeId(db, name, kind, file, line) {
196
213
  * @returns {number|undefined}
197
214
  */
198
215
  export function getFunctionNodeId(db, name, file, line) {
199
- let stmt = _getFunctionNodeIdStmt.get(db);
200
- if (!stmt) {
201
- stmt = db.prepare(
202
- "SELECT id FROM nodes WHERE name = ? AND kind IN ('function','method') AND file = ? AND line = ?",
203
- );
204
- _getFunctionNodeIdStmt.set(db, stmt);
205
- }
206
- return stmt.get(name, file, line)?.id;
216
+ return cachedStmt(
217
+ _getFunctionNodeIdStmt,
218
+ db,
219
+ "SELECT id FROM nodes WHERE name = ? AND kind IN ('function','method') AND file = ? AND line = ?",
220
+ ).get(name, file, line)?.id;
207
221
  }
208
222
 
209
223
  /**
@@ -215,22 +229,78 @@ export function getFunctionNodeId(db, name, file, line) {
215
229
  * @returns {{ id: number, name: string, kind: string, line: number }[]}
216
230
  */
217
231
  export function bulkNodeIdsByFile(db, file) {
218
- let stmt = _bulkNodeIdsByFileStmt.get(db);
219
- if (!stmt) {
220
- stmt = db.prepare('SELECT id, name, kind, line FROM nodes WHERE file = ?');
221
- _bulkNodeIdsByFileStmt.set(db, stmt);
222
- }
223
- return stmt.all(file);
232
+ return cachedStmt(
233
+ _bulkNodeIdsByFileStmt,
234
+ db,
235
+ 'SELECT id, name, kind, line FROM nodes WHERE file = ?',
236
+ ).all(file);
224
237
  }
225
238
 
226
239
  /**
227
240
  * Find child nodes (parameters, properties, constants) of a parent.
228
241
  * @param {object} db
229
242
  * @param {number} parentId
230
- * @returns {{ name: string, kind: string, line: number, end_line: number|null }[]}
243
+ * @returns {{ name: string, kind: string, line: number, end_line: number|null, qualified_name: string|null, scope: string|null, visibility: string|null }[]}
231
244
  */
232
245
  export function findNodeChildren(db, parentId) {
233
- return db
234
- .prepare('SELECT name, kind, line, end_line FROM nodes WHERE parent_id = ? ORDER BY line')
235
- .all(parentId);
246
+ return cachedStmt(
247
+ _findNodeChildrenStmt,
248
+ db,
249
+ 'SELECT name, kind, line, end_line, qualified_name, scope, visibility FROM nodes WHERE parent_id = ? ORDER BY line',
250
+ ).all(parentId);
251
+ }
252
+
253
+ /** Escape LIKE wildcards in a literal string segment. */
254
+ function escapeLike(s) {
255
+ return s.replace(/[%_\\]/g, '\\$&');
256
+ }
257
+
258
+ /**
259
+ * Find all nodes that belong to a given scope (by scope column).
260
+ * Enables "all methods of class X" without traversing edges.
261
+ * @param {object} db
262
+ * @param {string} scopeName - The scope to search for (e.g., class name)
263
+ * @param {object} [opts]
264
+ * @param {string} [opts.kind] - Filter by node kind
265
+ * @param {string} [opts.file] - Filter by file path (LIKE match)
266
+ * @returns {object[]}
267
+ */
268
+ export function findNodesByScope(db, scopeName, opts = {}) {
269
+ let sql = 'SELECT * FROM nodes WHERE scope = ?';
270
+ const params = [scopeName];
271
+ if (opts.kind) {
272
+ sql += ' AND kind = ?';
273
+ params.push(opts.kind);
274
+ }
275
+ if (opts.file) {
276
+ sql += " AND file LIKE ? ESCAPE '\\'";
277
+ params.push(`%${escapeLike(opts.file)}%`);
278
+ }
279
+ sql += ' ORDER BY file, line';
280
+ return db.prepare(sql).all(...params);
281
+ }
282
+
283
+ /**
284
+ * Find nodes by qualified name. Returns all matches since the same
285
+ * qualified_name can exist in different files (e.g., two classes named
286
+ * `DateHelper.format` in separate modules). Pass `opts.file` to narrow.
287
+ * @param {object} db
288
+ * @param {string} qualifiedName - e.g., 'DateHelper.format'
289
+ * @param {object} [opts]
290
+ * @param {string} [opts.file] - Filter by file path (LIKE match)
291
+ * @returns {object[]}
292
+ */
293
+ export function findNodeByQualifiedName(db, qualifiedName, opts = {}) {
294
+ if (opts.file) {
295
+ return db
296
+ .prepare(
297
+ "SELECT * FROM nodes WHERE qualified_name = ? AND file LIKE ? ESCAPE '\\' ORDER BY file, line",
298
+ )
299
+ .all(qualifiedName, `%${escapeLike(opts.file)}%`);
300
+ }
301
+ return cachedStmt(
302
+ _findNodeByQualifiedNameStmt,
303
+ db,
304
+ 'SELECT * FROM nodes WHERE qualified_name = ? ORDER BY file, line',
305
+ ).all(qualifiedName);
236
306
  }
@@ -0,0 +1,219 @@
1
+ import { Repository } from './base.js';
2
+ import { hasCfgTables } from './cfg.js';
3
+ import { getComplexityForNode } from './complexity.js';
4
+ import { hasDataflowTable } from './dataflow.js';
5
+ import {
6
+ countCrossFileCallers,
7
+ findAllIncomingEdges,
8
+ findAllOutgoingEdges,
9
+ findCalleeNames,
10
+ findCallees,
11
+ findCallerNames,
12
+ findCallers,
13
+ findCrossFileCallTargets,
14
+ findDistinctCallers,
15
+ findImportDependents,
16
+ findImportSources,
17
+ findImportTargets,
18
+ findIntraFileCallEdges,
19
+ getClassHierarchy,
20
+ } from './edges.js';
21
+ import { hasEmbeddings } from './embeddings.js';
22
+ import { getCallableNodes, getCallEdges, getFileNodesAll, getImportEdges } from './graph-read.js';
23
+ import {
24
+ bulkNodeIdsByFile,
25
+ countEdges,
26
+ countFiles,
27
+ countNodes,
28
+ findFileNodes,
29
+ findNodeById,
30
+ findNodeByQualifiedName,
31
+ findNodeChildren,
32
+ findNodesByFile,
33
+ findNodesByScope,
34
+ findNodesForTriage,
35
+ findNodesWithFanIn,
36
+ getFunctionNodeId,
37
+ getNodeId,
38
+ iterateFunctionNodes,
39
+ listFunctionNodes,
40
+ } from './nodes.js';
41
+
42
+ /**
43
+ * SqliteRepository — wraps existing `fn(db, ...)` repository functions
44
+ * behind the Repository interface so callers can use `repo.method(...)`.
45
+ */
46
+ export class SqliteRepository extends Repository {
47
+ #db;
48
+
49
+ /** @param {object} db - better-sqlite3 Database instance */
50
+ constructor(db) {
51
+ super();
52
+ this.#db = db;
53
+ }
54
+
55
+ /** Expose the underlying db for code that still needs raw access. */
56
+ get db() {
57
+ return this.#db;
58
+ }
59
+
60
+ // ── Node lookups ──────────────────────────────────────────────────
61
+
62
+ findNodeById(id) {
63
+ return findNodeById(this.#db, id);
64
+ }
65
+
66
+ findNodesByFile(file) {
67
+ return findNodesByFile(this.#db, file);
68
+ }
69
+
70
+ findFileNodes(fileLike) {
71
+ return findFileNodes(this.#db, fileLike);
72
+ }
73
+
74
+ findNodesWithFanIn(namePattern, opts) {
75
+ return findNodesWithFanIn(this.#db, namePattern, opts);
76
+ }
77
+
78
+ countNodes() {
79
+ return countNodes(this.#db);
80
+ }
81
+
82
+ countEdges() {
83
+ return countEdges(this.#db);
84
+ }
85
+
86
+ countFiles() {
87
+ return countFiles(this.#db);
88
+ }
89
+
90
+ getNodeId(name, kind, file, line) {
91
+ return getNodeId(this.#db, name, kind, file, line);
92
+ }
93
+
94
+ getFunctionNodeId(name, file, line) {
95
+ return getFunctionNodeId(this.#db, name, file, line);
96
+ }
97
+
98
+ bulkNodeIdsByFile(file) {
99
+ return bulkNodeIdsByFile(this.#db, file);
100
+ }
101
+
102
+ findNodeChildren(parentId) {
103
+ return findNodeChildren(this.#db, parentId);
104
+ }
105
+
106
+ findNodesByScope(scopeName, opts) {
107
+ return findNodesByScope(this.#db, scopeName, opts);
108
+ }
109
+
110
+ findNodeByQualifiedName(qualifiedName, opts) {
111
+ return findNodeByQualifiedName(this.#db, qualifiedName, opts);
112
+ }
113
+
114
+ listFunctionNodes(opts) {
115
+ return listFunctionNodes(this.#db, opts);
116
+ }
117
+
118
+ iterateFunctionNodes(opts) {
119
+ return iterateFunctionNodes(this.#db, opts);
120
+ }
121
+
122
+ findNodesForTriage(opts) {
123
+ return findNodesForTriage(this.#db, opts);
124
+ }
125
+
126
+ // ── Edge queries ──────────────────────────────────────────────────
127
+
128
+ findCallees(nodeId) {
129
+ return findCallees(this.#db, nodeId);
130
+ }
131
+
132
+ findCallers(nodeId) {
133
+ return findCallers(this.#db, nodeId);
134
+ }
135
+
136
+ findDistinctCallers(nodeId) {
137
+ return findDistinctCallers(this.#db, nodeId);
138
+ }
139
+
140
+ findAllOutgoingEdges(nodeId) {
141
+ return findAllOutgoingEdges(this.#db, nodeId);
142
+ }
143
+
144
+ findAllIncomingEdges(nodeId) {
145
+ return findAllIncomingEdges(this.#db, nodeId);
146
+ }
147
+
148
+ findCalleeNames(nodeId) {
149
+ return findCalleeNames(this.#db, nodeId);
150
+ }
151
+
152
+ findCallerNames(nodeId) {
153
+ return findCallerNames(this.#db, nodeId);
154
+ }
155
+
156
+ findImportTargets(nodeId) {
157
+ return findImportTargets(this.#db, nodeId);
158
+ }
159
+
160
+ findImportSources(nodeId) {
161
+ return findImportSources(this.#db, nodeId);
162
+ }
163
+
164
+ findImportDependents(nodeId) {
165
+ return findImportDependents(this.#db, nodeId);
166
+ }
167
+
168
+ findCrossFileCallTargets(file) {
169
+ return findCrossFileCallTargets(this.#db, file);
170
+ }
171
+
172
+ countCrossFileCallers(nodeId, file) {
173
+ return countCrossFileCallers(this.#db, nodeId, file);
174
+ }
175
+
176
+ getClassHierarchy(classNodeId) {
177
+ return getClassHierarchy(this.#db, classNodeId);
178
+ }
179
+
180
+ findIntraFileCallEdges(file) {
181
+ return findIntraFileCallEdges(this.#db, file);
182
+ }
183
+
184
+ // ── Graph-read queries ────────────────────────────────────────────
185
+
186
+ getCallableNodes() {
187
+ return getCallableNodes(this.#db);
188
+ }
189
+
190
+ getCallEdges() {
191
+ return getCallEdges(this.#db);
192
+ }
193
+
194
+ getFileNodesAll() {
195
+ return getFileNodesAll(this.#db);
196
+ }
197
+
198
+ getImportEdges() {
199
+ return getImportEdges(this.#db);
200
+ }
201
+
202
+ // ── Optional table checks ─────────────────────────────────────────
203
+
204
+ hasCfgTables() {
205
+ return hasCfgTables(this.#db);
206
+ }
207
+
208
+ hasEmbeddings() {
209
+ return hasEmbeddings(this.#db);
210
+ }
211
+
212
+ hasDataflowTable() {
213
+ return hasDataflowTable(this.#db);
214
+ }
215
+
216
+ getComplexityForNode(nodeId) {
217
+ return getComplexityForNode(this.#db, nodeId);
218
+ }
219
+ }
package/src/db.js CHANGED
@@ -29,8 +29,10 @@ export {
29
29
  findImportTargets,
30
30
  findIntraFileCallEdges,
31
31
  findNodeById,
32
+ findNodeByQualifiedName,
32
33
  findNodeChildren,
33
34
  findNodesByFile,
35
+ findNodesByScope,
34
36
  findNodesForTriage,
35
37
  findNodesWithFanIn,
36
38
  getCallableNodes,
@@ -50,9 +52,12 @@ export {
50
52
  hasCoChanges,
51
53
  hasDataflowTable,
52
54
  hasEmbeddings,
55
+ InMemoryRepository,
53
56
  iterateFunctionNodes,
54
57
  listFunctionNodes,
55
58
  purgeFileData,
56
59
  purgeFilesData,
60
+ Repository,
61
+ SqliteRepository,
57
62
  upsertCoChangeMeta,
58
63
  } from './db/repository/index.js';
@@ -0,0 +1,163 @@
1
+ import fs from 'node:fs';
2
+ import path from 'node:path';
3
+ import { closeDb, findDbPath, openDb } from '../db.js';
4
+ import { DbError } from '../errors.js';
5
+ import { warn } from '../logger.js';
6
+ import { embed, getModelConfig } from './models.js';
7
+ import { buildSourceText } from './strategies/source.js';
8
+ import { buildStructuredText } from './strategies/structured.js';
9
+
10
+ /**
11
+ * Rough token estimate (~4 chars per token for code/English).
12
+ * Conservative — avoids adding a tokenizer dependency.
13
+ */
14
+ export function estimateTokens(text) {
15
+ return Math.ceil(text.length / 4);
16
+ }
17
+
18
+ function initEmbeddingsSchema(db) {
19
+ db.exec(`
20
+ CREATE TABLE IF NOT EXISTS embeddings (
21
+ node_id INTEGER PRIMARY KEY,
22
+ vector BLOB NOT NULL,
23
+ text_preview TEXT,
24
+ FOREIGN KEY(node_id) REFERENCES nodes(id)
25
+ );
26
+ CREATE TABLE IF NOT EXISTS embedding_meta (
27
+ key TEXT PRIMARY KEY,
28
+ value TEXT
29
+ );
30
+ `);
31
+
32
+ // Add full_text column (idempotent — ignore if already exists)
33
+ try {
34
+ db.exec('ALTER TABLE embeddings ADD COLUMN full_text TEXT');
35
+ } catch {
36
+ /* column already exists */
37
+ }
38
+
39
+ // FTS5 virtual table for BM25 keyword search
40
+ db.exec(`
41
+ CREATE VIRTUAL TABLE IF NOT EXISTS fts_index USING fts5(
42
+ name,
43
+ content,
44
+ tokenize='unicode61'
45
+ );
46
+ `);
47
+ }
48
+
49
+ /**
50
+ * Build embeddings for all functions/methods/classes in the graph.
51
+ * @param {string} rootDir - Project root directory
52
+ * @param {string} modelKey - Model identifier from MODELS registry
53
+ * @param {string} [customDbPath] - Override path to graph.db
54
+ * @param {object} [options] - Embedding options
55
+ * @param {string} [options.strategy='structured'] - 'structured' (graph-enriched) or 'source' (raw code)
56
+ */
57
+ export async function buildEmbeddings(rootDir, modelKey, customDbPath, options = {}) {
58
+ const strategy = options.strategy || 'structured';
59
+ const dbPath = customDbPath || findDbPath(null);
60
+
61
+ if (!fs.existsSync(dbPath)) {
62
+ throw new DbError(
63
+ `No codegraph database found at ${dbPath}.\nRun "codegraph build" first to analyze your codebase.`,
64
+ { file: dbPath },
65
+ );
66
+ }
67
+
68
+ const db = openDb(dbPath);
69
+ initEmbeddingsSchema(db);
70
+
71
+ db.exec('DELETE FROM embeddings');
72
+ db.exec('DELETE FROM embedding_meta');
73
+ db.exec('DELETE FROM fts_index');
74
+
75
+ const nodes = db
76
+ .prepare(
77
+ `SELECT * FROM nodes WHERE kind IN ('function', 'method', 'class') ORDER BY file, line`,
78
+ )
79
+ .all();
80
+
81
+ console.log(`Building embeddings for ${nodes.length} symbols (strategy: ${strategy})...`);
82
+
83
+ const byFile = new Map();
84
+ for (const node of nodes) {
85
+ if (!byFile.has(node.file)) byFile.set(node.file, []);
86
+ byFile.get(node.file).push(node);
87
+ }
88
+
89
+ const texts = [];
90
+ const nodeIds = [];
91
+ const nodeNames = [];
92
+ const previews = [];
93
+ const config = getModelConfig(modelKey);
94
+ const contextWindow = config.contextWindow;
95
+ let overflowCount = 0;
96
+
97
+ for (const [file, fileNodes] of byFile) {
98
+ const fullPath = path.join(rootDir, file);
99
+ let lines;
100
+ try {
101
+ lines = fs.readFileSync(fullPath, 'utf-8').split('\n');
102
+ } catch (err) {
103
+ warn(`Cannot read ${file} for embeddings: ${err.message}`);
104
+ continue;
105
+ }
106
+
107
+ for (const node of fileNodes) {
108
+ let text =
109
+ strategy === 'structured'
110
+ ? buildStructuredText(node, file, lines, db)
111
+ : buildSourceText(node, file, lines);
112
+
113
+ // Detect and handle context window overflow
114
+ const tokens = estimateTokens(text);
115
+ if (tokens > contextWindow) {
116
+ overflowCount++;
117
+ const maxChars = contextWindow * 4;
118
+ text = text.slice(0, maxChars);
119
+ }
120
+
121
+ texts.push(text);
122
+ nodeIds.push(node.id);
123
+ nodeNames.push(node.name);
124
+ previews.push(`${node.name} (${node.kind}) -- ${file}:${node.line}`);
125
+ }
126
+ }
127
+
128
+ if (overflowCount > 0) {
129
+ warn(
130
+ `${overflowCount} symbol(s) exceeded model context window (${contextWindow} tokens) and were truncated`,
131
+ );
132
+ }
133
+
134
+ console.log(`Embedding ${texts.length} symbols...`);
135
+ const { vectors, dim } = await embed(texts, modelKey);
136
+
137
+ const insert = db.prepare(
138
+ 'INSERT OR REPLACE INTO embeddings (node_id, vector, text_preview, full_text) VALUES (?, ?, ?, ?)',
139
+ );
140
+ const insertFts = db.prepare('INSERT INTO fts_index(rowid, name, content) VALUES (?, ?, ?)');
141
+ const insertMeta = db.prepare('INSERT OR REPLACE INTO embedding_meta (key, value) VALUES (?, ?)');
142
+ const insertAll = db.transaction(() => {
143
+ for (let i = 0; i < vectors.length; i++) {
144
+ insert.run(nodeIds[i], Buffer.from(vectors[i].buffer), previews[i], texts[i]);
145
+ insertFts.run(nodeIds[i], nodeNames[i], texts[i]);
146
+ }
147
+ insertMeta.run('model', config.name);
148
+ insertMeta.run('dim', String(dim));
149
+ insertMeta.run('count', String(vectors.length));
150
+ insertMeta.run('fts_count', String(vectors.length));
151
+ insertMeta.run('strategy', strategy);
152
+ insertMeta.run('built_at', new Date().toISOString());
153
+ if (overflowCount > 0) {
154
+ insertMeta.run('truncated_count', String(overflowCount));
155
+ }
156
+ });
157
+ insertAll();
158
+
159
+ console.log(
160
+ `\nStored ${vectors.length} embeddings (${dim}d, ${config.name}, strategy: ${strategy}) in graph.db`,
161
+ );
162
+ closeDb(db);
163
+ }
@@ -0,0 +1,13 @@
1
+ /**
2
+ * Embeddings subsystem — public API barrel.
3
+ *
4
+ * Re-exports everything consumers previously imported from `../embedder.js`.
5
+ */
6
+
7
+ export { buildEmbeddings, estimateTokens } from './generator.js';
8
+ export { DEFAULT_MODEL, disposeModel, EMBEDDING_STRATEGIES, embed, MODELS } from './models.js';
9
+ export { search } from './search/cli-formatter.js';
10
+ export { hybridSearchData } from './search/hybrid.js';
11
+ export { ftsSearchData } from './search/keyword.js';
12
+ export { multiSearchData, searchData } from './search/semantic.js';
13
+ export { cosineSim } from './stores/sqlite-blob.js';