@zuvia-software-solutions/code-mapper 1.4.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. package/dist/cli/ai-context.js +1 -1
  2. package/dist/cli/analyze.d.ts +1 -0
  3. package/dist/cli/analyze.js +73 -82
  4. package/dist/cli/augment.js +0 -2
  5. package/dist/cli/eval-server.d.ts +2 -2
  6. package/dist/cli/eval-server.js +6 -6
  7. package/dist/cli/index.js +6 -10
  8. package/dist/cli/mcp.d.ts +1 -3
  9. package/dist/cli/mcp.js +3 -3
  10. package/dist/cli/refresh.d.ts +2 -2
  11. package/dist/cli/refresh.js +24 -29
  12. package/dist/cli/status.js +4 -13
  13. package/dist/cli/tool.d.ts +5 -4
  14. package/dist/cli/tool.js +8 -10
  15. package/dist/config/ignore-service.js +14 -34
  16. package/dist/core/augmentation/engine.js +53 -83
  17. package/dist/core/db/adapter.d.ts +99 -0
  18. package/dist/core/db/adapter.js +402 -0
  19. package/dist/core/db/graph-loader.d.ts +27 -0
  20. package/dist/core/db/graph-loader.js +148 -0
  21. package/dist/core/db/queries.d.ts +160 -0
  22. package/dist/core/db/queries.js +441 -0
  23. package/dist/core/db/schema.d.ts +108 -0
  24. package/dist/core/db/schema.js +136 -0
  25. package/dist/core/embeddings/embedder.d.ts +21 -12
  26. package/dist/core/embeddings/embedder.js +104 -50
  27. package/dist/core/embeddings/embedding-pipeline.d.ts +48 -22
  28. package/dist/core/embeddings/embedding-pipeline.js +220 -262
  29. package/dist/core/embeddings/text-generator.js +4 -19
  30. package/dist/core/embeddings/types.d.ts +1 -1
  31. package/dist/core/graph/graph.d.ts +1 -1
  32. package/dist/core/graph/graph.js +1 -0
  33. package/dist/core/graph/types.d.ts +11 -9
  34. package/dist/core/graph/types.js +4 -1
  35. package/dist/core/incremental/refresh.d.ts +46 -0
  36. package/dist/core/incremental/refresh.js +503 -0
  37. package/dist/core/incremental/types.d.ts +2 -1
  38. package/dist/core/incremental/types.js +42 -44
  39. package/dist/core/ingestion/ast-cache.js +1 -0
  40. package/dist/core/ingestion/call-processor.d.ts +15 -3
  41. package/dist/core/ingestion/call-processor.js +448 -60
  42. package/dist/core/ingestion/cluster-enricher.d.ts +1 -1
  43. package/dist/core/ingestion/cluster-enricher.js +2 -0
  44. package/dist/core/ingestion/community-processor.d.ts +1 -1
  45. package/dist/core/ingestion/community-processor.js +8 -3
  46. package/dist/core/ingestion/export-detection.d.ts +1 -1
  47. package/dist/core/ingestion/export-detection.js +1 -1
  48. package/dist/core/ingestion/filesystem-walker.js +1 -1
  49. package/dist/core/ingestion/heritage-processor.d.ts +2 -2
  50. package/dist/core/ingestion/heritage-processor.js +22 -11
  51. package/dist/core/ingestion/import-processor.d.ts +2 -2
  52. package/dist/core/ingestion/import-processor.js +24 -9
  53. package/dist/core/ingestion/language-config.js +7 -4
  54. package/dist/core/ingestion/mro-processor.d.ts +1 -1
  55. package/dist/core/ingestion/mro-processor.js +23 -11
  56. package/dist/core/ingestion/named-binding-extraction.js +5 -5
  57. package/dist/core/ingestion/parsing-processor.d.ts +4 -4
  58. package/dist/core/ingestion/parsing-processor.js +26 -18
  59. package/dist/core/ingestion/pipeline.d.ts +4 -2
  60. package/dist/core/ingestion/pipeline.js +50 -20
  61. package/dist/core/ingestion/process-processor.d.ts +2 -2
  62. package/dist/core/ingestion/process-processor.js +28 -14
  63. package/dist/core/ingestion/resolution-context.d.ts +1 -1
  64. package/dist/core/ingestion/resolution-context.js +14 -4
  65. package/dist/core/ingestion/resolvers/csharp.js +4 -3
  66. package/dist/core/ingestion/resolvers/go.js +3 -1
  67. package/dist/core/ingestion/resolvers/jvm.js +13 -4
  68. package/dist/core/ingestion/resolvers/standard.js +2 -2
  69. package/dist/core/ingestion/resolvers/utils.js +6 -2
  70. package/dist/core/ingestion/route-stitcher.d.ts +15 -0
  71. package/dist/core/ingestion/route-stitcher.js +92 -0
  72. package/dist/core/ingestion/structure-processor.d.ts +1 -1
  73. package/dist/core/ingestion/structure-processor.js +3 -2
  74. package/dist/core/ingestion/symbol-table.d.ts +2 -0
  75. package/dist/core/ingestion/symbol-table.js +5 -1
  76. package/dist/core/ingestion/tree-sitter-queries.d.ts +2 -2
  77. package/dist/core/ingestion/tree-sitter-queries.js +177 -0
  78. package/dist/core/ingestion/type-env.js +20 -0
  79. package/dist/core/ingestion/type-extractors/csharp.js +4 -3
  80. package/dist/core/ingestion/type-extractors/go.js +23 -12
  81. package/dist/core/ingestion/type-extractors/php.js +18 -10
  82. package/dist/core/ingestion/type-extractors/ruby.js +15 -3
  83. package/dist/core/ingestion/type-extractors/rust.js +3 -2
  84. package/dist/core/ingestion/type-extractors/shared.js +3 -2
  85. package/dist/core/ingestion/type-extractors/typescript.js +11 -5
  86. package/dist/core/ingestion/utils.d.ts +27 -4
  87. package/dist/core/ingestion/utils.js +145 -100
  88. package/dist/core/ingestion/workers/parse-worker.d.ts +1 -0
  89. package/dist/core/ingestion/workers/parse-worker.js +97 -29
  90. package/dist/core/ingestion/workers/worker-pool.js +3 -0
  91. package/dist/core/search/bm25-index.d.ts +15 -8
  92. package/dist/core/search/bm25-index.js +48 -98
  93. package/dist/core/search/hybrid-search.d.ts +9 -3
  94. package/dist/core/search/hybrid-search.js +30 -25
  95. package/dist/core/search/reranker.js +9 -7
  96. package/dist/core/search/types.d.ts +0 -4
  97. package/dist/core/semantic/tsgo-service.d.ts +7 -1
  98. package/dist/core/semantic/tsgo-service.js +165 -66
  99. package/dist/lib/tsgo-test.d.ts +2 -0
  100. package/dist/lib/tsgo-test.js +6 -0
  101. package/dist/lib/type-utils.d.ts +25 -0
  102. package/dist/lib/type-utils.js +22 -0
  103. package/dist/lib/utils.d.ts +3 -2
  104. package/dist/lib/utils.js +3 -2
  105. package/dist/mcp/compatible-stdio-transport.js +1 -1
  106. package/dist/mcp/local/local-backend.d.ts +29 -56
  107. package/dist/mcp/local/local-backend.js +808 -1118
  108. package/dist/mcp/resources.js +35 -25
  109. package/dist/mcp/server.d.ts +1 -1
  110. package/dist/mcp/server.js +5 -5
  111. package/dist/mcp/tools.js +24 -25
  112. package/dist/storage/repo-manager.d.ts +2 -12
  113. package/dist/storage/repo-manager.js +1 -47
  114. package/dist/types/pipeline.d.ts +8 -5
  115. package/dist/types/pipeline.js +5 -0
  116. package/package.json +18 -11
  117. package/dist/cli/serve.d.ts +0 -5
  118. package/dist/cli/serve.js +0 -8
  119. package/dist/core/incremental/child-process.d.ts +0 -8
  120. package/dist/core/incremental/child-process.js +0 -649
  121. package/dist/core/incremental/refresh-coordinator.d.ts +0 -32
  122. package/dist/core/incremental/refresh-coordinator.js +0 -147
  123. package/dist/core/lbug/csv-generator.d.ts +0 -28
  124. package/dist/core/lbug/csv-generator.js +0 -355
  125. package/dist/core/lbug/lbug-adapter.d.ts +0 -96
  126. package/dist/core/lbug/lbug-adapter.js +0 -753
  127. package/dist/core/lbug/schema.d.ts +0 -46
  128. package/dist/core/lbug/schema.js +0 -402
  129. package/dist/mcp/core/embedder.d.ts +0 -24
  130. package/dist/mcp/core/embedder.js +0 -168
  131. package/dist/mcp/core/lbug-adapter.d.ts +0 -29
  132. package/dist/mcp/core/lbug-adapter.js +0 -330
  133. package/dist/server/api.d.ts +0 -5
  134. package/dist/server/api.js +0 -340
  135. package/dist/server/mcp-http.d.ts +0 -7
  136. package/dist/server/mcp-http.js +0 -95
  137. package/models/mlx-embedder.py +0 -185
@@ -0,0 +1,402 @@
1
+ // code-mapper/src/core/db/adapter.ts
2
+ /**
3
+ * @file Unified SQLite database adapter for code-mapper.
4
+ *
5
+ * Single module for both CLI and MCP database access.
6
+ * WAL mode: concurrent readers + single writer, zero lock conflicts.
7
+ * Synchronous better-sqlite3 API: no connection pools needed.
8
+ *
9
+ * All public functions are typed with branded IDs and schema types
10
+ * from schema.ts — the compiler prevents passing wrong ID types
11
+ * or invalid labels/edge types.
12
+ */
13
+ import Database from 'better-sqlite3';
14
+ import path from 'path';
15
+ import fs from 'fs';
16
+ import { SCHEMA_SQL, toNodeId, } from './schema.js';
17
+ // ---------------------------------------------------------------------------
18
+ // Singleton instances keyed by resolved dbPath
19
+ // ---------------------------------------------------------------------------
20
+ const instances = new Map();
21
+ // ---------------------------------------------------------------------------
22
+ // Lifecycle
23
+ // ---------------------------------------------------------------------------
24
+ /** Open (or reuse) a SQLite database. Creates schema if new. */
25
+ export function openDb(dbPath) {
26
+ const resolved = path.resolve(dbPath);
27
+ const existing = instances.get(resolved);
28
+ if (existing)
29
+ return existing;
30
+ const dir = path.dirname(resolved);
31
+ if (!fs.existsSync(dir))
32
+ fs.mkdirSync(dir, { recursive: true });
33
+ const db = new Database(resolved);
34
+ // Performance pragmas
35
+ db.pragma('journal_mode = WAL');
36
+ db.pragma('synchronous = NORMAL');
37
+ db.pragma('cache_size = -64000'); // 64MB
38
+ db.pragma('mmap_size = 268435456'); // 256MB
39
+ db.pragma('foreign_keys = OFF');
40
+ // Create schema (IF NOT EXISTS — idempotent)
41
+ db.exec(SCHEMA_SQL);
42
+ instances.set(resolved, db);
43
+ return db;
44
+ }
45
+ /** Close one or all databases. */
46
+ export function closeDb(dbPath) {
47
+ if (dbPath) {
48
+ const resolved = path.resolve(dbPath);
49
+ const db = instances.get(resolved);
50
+ if (db) {
51
+ db.close();
52
+ instances.delete(resolved);
53
+ }
54
+ return;
55
+ }
56
+ for (const [key, db] of instances) {
57
+ db.close();
58
+ instances.delete(key);
59
+ }
60
+ }
61
+ /** Delete and recreate a database (full re-analyze). */
62
+ export function resetDb(dbPath) {
63
+ closeDb(dbPath);
64
+ const resolved = path.resolve(dbPath);
65
+ for (const suffix of ['', '-wal', '-shm']) {
66
+ try {
67
+ fs.unlinkSync(resolved + suffix);
68
+ }
69
+ catch { }
70
+ }
71
+ return openDb(dbPath);
72
+ }
73
+ /** Check if a database is open. */
74
+ export function isDbOpen(dbPath) {
75
+ return instances.has(path.resolve(dbPath));
76
+ }
77
+ // ---------------------------------------------------------------------------
78
+ // Node operations
79
+ // ---------------------------------------------------------------------------
80
+ /**
81
+ * Expand an identifier into natural language words for FTS matching.
82
+ * camelCase, PascalCase, snake_case, UPPER_CASE → space-separated words.
83
+ *
84
+ * Examples:
85
+ * buildTestGraph → "build test graph"
86
+ * processCallsFromExtracted → "process calls from extracted"
87
+ * MAX_CHAIN_DEPTH → "max chain depth"
88
+ * deleteNodesByFile → "delete nodes by file"
89
+ */
90
+ export function expandIdentifier(name) {
91
+ if (!name)
92
+ return '';
93
+ return name
94
+ // Insert space before uppercase runs: "processHTTPRequest" → "process HTTP Request"
95
+ .replace(/([a-z])([A-Z])/g, '$1 $2')
96
+ // Insert space between uppercase run and lowercase: "HTTPRequest" → "HTTP Request"
97
+ .replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
98
+ // Replace underscores/hyphens with spaces
99
+ .replace(/[_\-]+/g, ' ')
100
+ // Collapse multiple spaces
101
+ .replace(/\s+/g, ' ')
102
+ .toLowerCase()
103
+ .trim();
104
+ }
105
+ const INSERT_NODE_SQL = `
106
+ INSERT OR REPLACE INTO nodes (
107
+ id, label, name, filePath, startLine, endLine, isExported, content, description,
108
+ heuristicLabel, cohesion, symbolCount, keywords, enrichedBy,
109
+ processType, stepCount, communities, entryPointId, terminalId,
110
+ parameterCount, returnType, nameExpanded
111
+ ) VALUES (
112
+ @id, @label, @name, @filePath, @startLine, @endLine, @isExported, @content, @description,
113
+ @heuristicLabel, @cohesion, @symbolCount, @keywords, @enrichedBy,
114
+ @processType, @stepCount, @communities, @entryPointId, @terminalId,
115
+ @parameterCount, @returnType, @nameExpanded
116
+ )
117
+ `;
118
+ /** Insert or replace a node. Automatically expands name for FTS natural language matching. */
119
+ export function insertNode(db, node) {
120
+ db.prepare(INSERT_NODE_SQL).run({
121
+ id: node.id,
122
+ label: node.label,
123
+ name: node.name ?? '',
124
+ filePath: node.filePath ?? '',
125
+ startLine: node.startLine ?? null,
126
+ endLine: node.endLine ?? null,
127
+ isExported: node.isExported ?? null,
128
+ content: node.content ?? '',
129
+ description: node.description ?? '',
130
+ heuristicLabel: node.heuristicLabel ?? null,
131
+ cohesion: node.cohesion ?? null,
132
+ symbolCount: node.symbolCount ?? null,
133
+ keywords: node.keywords ?? null,
134
+ enrichedBy: node.enrichedBy ?? null,
135
+ processType: node.processType ?? null,
136
+ stepCount: node.stepCount ?? null,
137
+ communities: node.communities ?? null,
138
+ entryPointId: node.entryPointId ?? null,
139
+ terminalId: node.terminalId ?? null,
140
+ parameterCount: node.parameterCount ?? null,
141
+ returnType: node.returnType ?? null,
142
+ nameExpanded: node.nameExpanded ?? expandIdentifier(node.name ?? ''),
143
+ });
144
+ }
145
+ /** Get a node by ID. Returns undefined if not found. */
146
+ export function getNode(db, id) {
147
+ return db.prepare('SELECT * FROM nodes WHERE id = ?').get(id);
148
+ }
149
+ /** Find nodes by name (optionally filtered by label). */
150
+ export function findNodesByName(db, name, label, limit = 10) {
151
+ if (label) {
152
+ return db.prepare('SELECT * FROM nodes WHERE name = ? AND label = ? LIMIT ?').all(name, label, limit);
153
+ }
154
+ return db.prepare('SELECT * FROM nodes WHERE name = ? LIMIT ?').all(name, limit);
155
+ }
156
+ /** Find nodes by file path. */
157
+ export function findNodesByFile(db, filePath) {
158
+ return db.prepare('SELECT * FROM nodes WHERE filePath = ?').all(filePath);
159
+ }
160
+ /** Find the node containing a given line in a file (best match). */
161
+ export function findNodeAtLine(db, filePath, line, excludeLabel) {
162
+ const sql = excludeLabel
163
+ ? 'SELECT * FROM nodes WHERE filePath = ? AND startLine <= ? AND endLine >= ? AND label != ? ORDER BY (endLine - startLine) LIMIT 1'
164
+ : 'SELECT * FROM nodes WHERE filePath = ? AND startLine <= ? AND endLine >= ? ORDER BY (endLine - startLine) LIMIT 1';
165
+ return excludeLabel
166
+ ? db.prepare(sql).get(filePath, line, line, excludeLabel)
167
+ : db.prepare(sql).get(filePath, line, line);
168
+ }
169
+ /** Count nodes, optionally by label. */
170
+ export function countNodes(db, label) {
171
+ if (label) {
172
+ return db.prepare('SELECT COUNT(*) AS cnt FROM nodes WHERE label = ?').get(label).cnt;
173
+ }
174
+ return db.prepare('SELECT COUNT(*) AS cnt FROM nodes').get().cnt;
175
+ }
176
+ /** Delete all nodes (and related edges/embeddings) for a file path. Returns count deleted. */
177
+ export function deleteNodesByFile(db, filePath) {
178
+ const nodeIds = db.prepare('SELECT id FROM nodes WHERE filePath = ?').all(filePath);
179
+ if (nodeIds.length === 0)
180
+ return 0;
181
+ const ids = nodeIds.map(n => n.id);
182
+ const ph = ids.map(() => '?').join(',');
183
+ // Delete edges FROM this file's nodes (outgoing). Incoming edges from other
184
+ // files are preserved — the node IDs are deterministic (label:filePath:name),
185
+ // so re-inserted nodes get the same ID and the edges remain valid.
186
+ db.prepare(`DELETE FROM edges WHERE sourceId IN (${ph})`).run(...ids);
187
+ db.prepare(`DELETE FROM embeddings WHERE nodeId IN (${ph})`).run(...ids);
188
+ return db.prepare('DELETE FROM nodes WHERE filePath = ?').run(filePath).changes;
189
+ }
190
+ // ---------------------------------------------------------------------------
191
+ // Edge operations
192
+ // ---------------------------------------------------------------------------
193
+ const INSERT_EDGE_SQL = `
194
+ INSERT OR IGNORE INTO edges (id, sourceId, targetId, type, confidence, reason, step, callLine)
195
+ VALUES (@id, @sourceId, @targetId, @type, @confidence, @reason, @step, @callLine)
196
+ `;
197
+ /** Insert an edge (ignores duplicates). */
198
+ export function insertEdge(db, edge) {
199
+ db.prepare(INSERT_EDGE_SQL).run({
200
+ id: edge.id,
201
+ sourceId: edge.sourceId,
202
+ targetId: edge.targetId,
203
+ type: edge.type,
204
+ confidence: edge.confidence ?? 1.0,
205
+ reason: edge.reason ?? '',
206
+ step: edge.step ?? 0,
207
+ callLine: edge.callLine ?? null,
208
+ });
209
+ }
210
+ /** Find edges from a source node, optionally filtered by type. */
211
+ export function findEdgesFrom(db, sourceId, type, minConfidence = 0) {
212
+ if (type) {
213
+ return db.prepare('SELECT * FROM edges WHERE sourceId = ? AND type = ? AND confidence >= ?').all(sourceId, type, minConfidence);
214
+ }
215
+ return db.prepare('SELECT * FROM edges WHERE sourceId = ? AND confidence >= ?').all(sourceId, minConfidence);
216
+ }
217
+ /** Find edges to a target node, optionally filtered by type. */
218
+ export function findEdgesTo(db, targetId, type, minConfidence = 0) {
219
+ if (type) {
220
+ return db.prepare('SELECT * FROM edges WHERE targetId = ? AND type = ? AND confidence >= ?').all(targetId, type, minConfidence);
221
+ }
222
+ return db.prepare('SELECT * FROM edges WHERE targetId = ? AND confidence >= ?').all(targetId, minConfidence);
223
+ }
224
+ /** Count edges. */
225
+ export function countEdges(db) {
226
+ return db.prepare('SELECT COUNT(*) AS cnt FROM edges').get().cnt;
227
+ }
228
+ // ---------------------------------------------------------------------------
229
+ // Embedding operations
230
+ // ---------------------------------------------------------------------------
231
+ /** Insert or replace an embedding (stores as binary Float32Array). */
232
+ export function insertEmbedding(db, nodeId, embedding) {
233
+ const buf = Buffer.from(new Float32Array(embedding).buffer);
234
+ db.prepare('INSERT OR REPLACE INTO embeddings (nodeId, embedding) VALUES (?, ?)').run(nodeId, buf);
235
+ }
236
+ /** Delete embeddings for nodes in a given file. */
237
+ export function deleteEmbeddingsByFile(db, filePath) {
238
+ const nodeIds = db.prepare('SELECT id FROM nodes WHERE filePath = ?').all(filePath);
239
+ if (nodeIds.length === 0)
240
+ return;
241
+ const ph = nodeIds.map(() => '?').join(',');
242
+ db.prepare(`DELETE FROM embeddings WHERE nodeId IN (${ph})`).run(...nodeIds.map(n => n.id));
243
+ }
244
+ /** Count embeddings. */
245
+ export function countEmbeddings(db) {
246
+ return db.prepare('SELECT COUNT(*) AS cnt FROM embeddings').get().cnt;
247
+ }
248
+ /** Cosine similarity between two vectors. */
249
+ function cosineSimilarity(a, b) {
250
+ let dot = 0, normA = 0, normB = 0;
251
+ for (let i = 0; i < a.length; i++) {
252
+ const ai = a[i] ?? 0;
253
+ const bi = b[i] ?? 0;
254
+ dot += ai * bi;
255
+ normA += ai * ai;
256
+ normB += bi * bi;
257
+ }
258
+ const denom = Math.sqrt(normA) * Math.sqrt(normB);
259
+ return denom === 0 ? 0 : dot / denom;
260
+ }
261
+ /** Vector similarity search (brute-force cosine). Fast enough for <200K vectors at 256 dims. */
262
+ export function searchVector(db, queryVec, limit = 10, maxDistance = 0.5) {
263
+ const rows = db.prepare('SELECT nodeId, embedding FROM embeddings').all();
264
+ const results = [];
265
+ for (const row of rows) {
266
+ const vec = new Float32Array(row.embedding.buffer, row.embedding.byteOffset, row.embedding.byteLength / 4);
267
+ const distance = 1 - cosineSimilarity(queryVec, vec);
268
+ if (distance < maxDistance) {
269
+ results.push({ nodeId: toNodeId(row.nodeId), distance });
270
+ }
271
+ }
272
+ results.sort((a, b) => a.distance - b.distance);
273
+ return results.slice(0, limit);
274
+ }
275
+ // ---------------------------------------------------------------------------
276
+ // FTS search
277
+ // ---------------------------------------------------------------------------
278
+ /** Full-text search across nodes. FTS5 auto-maintains the index. */
279
+ export function searchFTS(db, query, limit = 20) {
280
+ // Sanitize query for FTS5 — escape double quotes, handle special chars
281
+ let safeQuery = query.replace(/"/g, '""').replace(/[*(){}[\]^~\\:]/g, ' ').trim();
282
+ if (!safeQuery)
283
+ return [];
284
+ // Multi-word queries: use OR so natural language works ("find nodes by name in database")
285
+ // FTS5 default is AND — every word must match. OR is more forgiving for NL queries.
286
+ const words = safeQuery.split(/\s+/).filter(w => w.length > 1);
287
+ if (words.length > 1) {
288
+ safeQuery = words.join(' OR ');
289
+ }
290
+ try {
291
+ return db.prepare(`
292
+ SELECT n.id, n.name, n.label, n.filePath, -fts.rank AS score
293
+ FROM nodes_fts fts
294
+ JOIN nodes n ON n.rowid = fts.rowid
295
+ WHERE nodes_fts MATCH ?
296
+ ORDER BY fts.rank
297
+ LIMIT ?
298
+ `).all(safeQuery, limit);
299
+ }
300
+ catch {
301
+ // FTS query syntax error — fall back to LIKE
302
+ return db.prepare(`
303
+ SELECT id, name, label, filePath, 1.0 AS score
304
+ FROM nodes
305
+ WHERE name LIKE ? OR content LIKE ?
306
+ LIMIT ?
307
+ `).all(`%${safeQuery}%`, `%${safeQuery}%`, limit);
308
+ }
309
+ }
310
+ // ---------------------------------------------------------------------------
311
+ // Stats
312
+ // ---------------------------------------------------------------------------
313
+ /** Get node count, edge count, and embedding count. */
314
+ export function getStats(db) {
315
+ return {
316
+ nodes: countNodes(db),
317
+ edges: countEdges(db),
318
+ embeddings: countEmbeddings(db),
319
+ };
320
+ }
321
+ // ---------------------------------------------------------------------------
322
+ // Batch operations (transactional)
323
+ // ---------------------------------------------------------------------------
324
+ /** Batch insert nodes in a single transaction. */
325
+ export function insertNodesBatch(db, nodes) {
326
+ const stmt = db.prepare(INSERT_NODE_SQL);
327
+ const txn = db.transaction((items) => {
328
+ for (const node of items) {
329
+ stmt.run({
330
+ id: node.id, label: node.label, name: node.name ?? '', filePath: node.filePath ?? '',
331
+ startLine: node.startLine ?? null, endLine: node.endLine ?? null, isExported: node.isExported ?? null,
332
+ content: node.content ?? '', description: node.description ?? '',
333
+ heuristicLabel: node.heuristicLabel ?? null, cohesion: node.cohesion ?? null,
334
+ symbolCount: node.symbolCount ?? null, keywords: node.keywords ?? null, enrichedBy: node.enrichedBy ?? null,
335
+ processType: node.processType ?? null, stepCount: node.stepCount ?? null,
336
+ communities: node.communities ?? null, entryPointId: node.entryPointId ?? null,
337
+ terminalId: node.terminalId ?? null, parameterCount: node.parameterCount ?? null,
338
+ returnType: node.returnType ?? null,
339
+ nameExpanded: node.nameExpanded ?? expandIdentifier(node.name ?? ''),
340
+ });
341
+ }
342
+ });
343
+ txn(nodes);
344
+ }
345
+ /** Batch insert edges in a single transaction. */
346
+ export function insertEdgesBatch(db, edges) {
347
+ const stmt = db.prepare(INSERT_EDGE_SQL);
348
+ const txn = db.transaction((items) => {
349
+ for (const edge of items) {
350
+ stmt.run({
351
+ id: edge.id, sourceId: edge.sourceId, targetId: edge.targetId, type: edge.type,
352
+ confidence: edge.confidence ?? 1.0, reason: edge.reason ?? '', step: edge.step ?? 0,
353
+ callLine: edge.callLine ?? null,
354
+ });
355
+ }
356
+ });
357
+ txn(edges);
358
+ }
359
+ /** Batch insert embeddings in a single transaction. */
360
+ export function insertEmbeddingsBatch(db, items) {
361
+ const stmt = db.prepare('INSERT OR REPLACE INTO embeddings (nodeId, embedding, textHash) VALUES (?, ?, ?)');
362
+ const txn = db.transaction((batch) => {
363
+ for (const item of batch) {
364
+ const buf = Buffer.from(new Float32Array(item.embedding).buffer);
365
+ stmt.run(item.nodeId, buf, item.textHash ?? null);
366
+ }
367
+ });
368
+ txn(items);
369
+ }
370
+ /** Get all textHashes from the embeddings table for hash-based skip on re-index */
371
+ export function getEmbeddingHashes(db) {
372
+ const rows = db.prepare('SELECT nodeId, textHash FROM embeddings WHERE textHash IS NOT NULL').all();
373
+ const map = new Map();
374
+ for (const row of rows) {
375
+ map.set(row.nodeId, row.textHash);
376
+ }
377
+ return map;
378
+ }
379
+ // ---------------------------------------------------------------------------
380
+ // Raw SQL escape (for dynamic queries in local-backend.ts)
381
+ // ---------------------------------------------------------------------------
382
+ /** Escape a string for use in SQL single-quoted literals. */
383
+ export function escapeSql(value) {
384
+ return value.replace(/'/g, "''");
385
+ }
386
+ // ---------------------------------------------------------------------------
387
+ // Raw query execution (for migration compatibility)
388
+ // ---------------------------------------------------------------------------
389
+ /** Execute a raw SQL query and return rows. */
390
+ export function rawQuery(db, sql, params) {
391
+ const stmt = db.prepare(sql);
392
+ if (params)
393
+ return stmt.all(...params);
394
+ return stmt.all();
395
+ }
396
+ /** Execute a raw SQL statement (no return). */
397
+ export function rawRun(db, sql, params) {
398
+ const stmt = db.prepare(sql);
399
+ if (params)
400
+ return stmt.run(...params);
401
+ return stmt.run();
402
+ }
@@ -0,0 +1,27 @@
1
+ /**
2
+ * @file Loads a KnowledgeGraph into SQLite.
3
+ *
4
+ * Replaces the old CSV-generator + graph DB COPY pipeline (~800 lines)
5
+ * with direct batch INSERTs in transactions. SQLite handles this in <2s
6
+ * for typical codebases (1-5K nodes) because:
7
+ * - Prepared statements are reused across the batch
8
+ * - WAL mode doesn't block readers during the write
9
+ * - No CSV serialization/deserialization overhead
10
+ */
11
+ import type Database from 'better-sqlite3';
12
+ import { type KnowledgeGraph } from '../graph/types.js';
13
+ /** Progress callback */
14
+ export type LoadProgressCallback = (message: string) => void;
15
+ /** Result of loading a graph */
16
+ export interface LoadResult {
17
+ readonly nodesInserted: number;
18
+ readonly edgesInserted: number;
19
+ readonly warnings: readonly string[];
20
+ }
21
+ /**
22
+ * Load an in-memory KnowledgeGraph into a SQLite database.
23
+ *
24
+ * This is the full-pipeline path used by `code-mapper analyze`.
25
+ * For incremental updates, use the adapter functions directly.
26
+ */
27
+ export declare function loadGraphToDb(db: Database.Database, graph: KnowledgeGraph, repoPath: string, onProgress?: LoadProgressCallback): LoadResult;
@@ -0,0 +1,148 @@
1
+ // code-mapper/src/core/db/graph-loader.ts
2
+ /**
3
+ * @file Loads a KnowledgeGraph into SQLite.
4
+ *
5
+ * Replaces the old CSV-generator + graph DB COPY pipeline (~800 lines)
6
+ * with direct batch INSERTs in transactions. SQLite handles this in <2s
7
+ * for typical codebases (1-5K nodes) because:
8
+ * - Prepared statements are reused across the batch
9
+ * - WAL mode doesn't block readers during the write
10
+ * - No CSV serialization/deserialization overhead
11
+ */
12
+ import fs from 'fs';
13
+ import path from 'path';
14
+ import {} from '../graph/types.js';
15
+ import { NODE_LABELS, toNodeId, toEdgeId, } from './schema.js';
16
+ import {} from './schema.js';
17
+ import { insertNodesBatch, insertEdgesBatch } from './adapter.js';
18
+ /** Maximum characters of source content to store per File node */
19
+ const MAX_FILE_CONTENT = 100_000;
20
+ /** Maximum characters of source snippet to store per symbol node */
21
+ const MAX_SNIPPET = 50_000;
22
+ /**
23
+ * Check if content is likely binary (contains null bytes or high ratio of non-printable chars)
24
+ */
25
+ function isBinaryContent(content) {
26
+ if (content.length === 0)
27
+ return false;
28
+ let nonPrintable = 0;
29
+ const sampleSize = Math.min(content.length, 1024);
30
+ for (let i = 0; i < sampleSize; i++) {
31
+ const code = content.charCodeAt(i);
32
+ if (code === 0)
33
+ return true;
34
+ if (code < 32 && code !== 9 && code !== 10 && code !== 13)
35
+ nonPrintable++;
36
+ }
37
+ return nonPrintable / sampleSize > 0.1;
38
+ }
39
+ /**
40
+ * Read and truncate file content for a node
41
+ */
42
+ function getNodeContent(repoPath, node) {
43
+ const filePath = node.filePath;
44
+ if (!filePath)
45
+ return '';
46
+ if (node.label === 'Folder')
47
+ return '';
48
+ const absPath = path.join(repoPath, filePath);
49
+ let content;
50
+ try {
51
+ content = fs.readFileSync(absPath, 'utf-8');
52
+ }
53
+ catch {
54
+ return '';
55
+ }
56
+ if (isBinaryContent(content))
57
+ return '[Binary file]';
58
+ if (node.label === 'File') {
59
+ return content.length > MAX_FILE_CONTENT
60
+ ? content.slice(0, MAX_FILE_CONTENT) + '\n... [truncated]'
61
+ : content;
62
+ }
63
+ const startLine = node.startLine;
64
+ const endLine = node.endLine;
65
+ if (startLine === undefined || endLine === undefined)
66
+ return '';
67
+ const lines = content.split('\n');
68
+ const start = Math.max(0, startLine - 2);
69
+ const end = Math.min(lines.length - 1, endLine + 2);
70
+ const snippet = lines.slice(start, end + 1).join('\n');
71
+ return snippet.length > MAX_SNIPPET
72
+ ? snippet.slice(0, MAX_SNIPPET) + '\n... [truncated]'
73
+ : snippet;
74
+ }
75
+ /**
76
+ * Load an in-memory KnowledgeGraph into a SQLite database.
77
+ *
78
+ * This is the full-pipeline path used by `code-mapper analyze`.
79
+ * For incremental updates, use the adapter functions directly.
80
+ */
81
+ export function loadGraphToDb(db, graph, repoPath, onProgress) {
82
+ const log = onProgress ?? (() => { });
83
+ const warnings = [];
84
+ const validLabels = new Set(NODE_LABELS);
85
+ // ── Collect nodes ────────────────────────────────────────────────────
86
+ log('Preparing nodes...');
87
+ const nodeInserts = [];
88
+ for (const node of graph.iterNodes()) {
89
+ if (!validLabels.has(node.label)) {
90
+ warnings.push(`Skipped node with unknown label: ${node.label}`);
91
+ continue;
92
+ }
93
+ const content = getNodeContent(repoPath, {
94
+ label: node.label,
95
+ filePath: node.properties.filePath,
96
+ ...(node.properties.startLine !== undefined ? { startLine: node.properties.startLine } : {}),
97
+ ...(node.properties.endLine !== undefined ? { endLine: node.properties.endLine } : {}),
98
+ });
99
+ nodeInserts.push({
100
+ id: toNodeId(node.id),
101
+ label: node.label,
102
+ name: node.properties.name ?? '',
103
+ filePath: node.properties.filePath ?? '',
104
+ startLine: node.properties.startLine ?? null,
105
+ endLine: node.properties.endLine ?? null,
106
+ isExported: node.properties.isExported ? 1 : null,
107
+ content,
108
+ description: node.properties.description ?? '',
109
+ heuristicLabel: node.properties.heuristicLabel ?? null,
110
+ cohesion: node.properties.cohesion ?? null,
111
+ symbolCount: node.properties.symbolCount ?? null,
112
+ keywords: node.properties.keywords ? JSON.stringify(node.properties.keywords) : null,
113
+ enrichedBy: node.properties.enrichedBy ?? null,
114
+ processType: node.properties.processType ?? null,
115
+ stepCount: node.properties.stepCount ?? null,
116
+ communities: node.properties.communities ? JSON.stringify(node.properties.communities) : null,
117
+ entryPointId: node.properties.entryPointId ?? null,
118
+ terminalId: node.properties.terminalId ?? null,
119
+ parameterCount: node.properties.parameterCount ?? null,
120
+ returnType: node.properties.returnType ?? null,
121
+ });
122
+ }
123
+ log(`Inserting ${nodeInserts.length} nodes...`);
124
+ insertNodesBatch(db, nodeInserts);
125
+ // ── Collect edges ────────────────────────────────────────────────────
126
+ log('Preparing edges...');
127
+ const edgeInserts = [];
128
+ for (const rel of graph.iterRelationships()) {
129
+ edgeInserts.push({
130
+ id: toEdgeId(rel.id),
131
+ sourceId: toNodeId(rel.sourceId),
132
+ targetId: toNodeId(rel.targetId),
133
+ type: rel.type,
134
+ confidence: rel.confidence,
135
+ reason: rel.reason ?? '',
136
+ step: rel.step ?? 0,
137
+ callLine: rel.callLine ?? null,
138
+ });
139
+ }
140
+ log(`Inserting ${edgeInserts.length} edges...`);
141
+ insertEdgesBatch(db, edgeInserts);
142
+ log(`Loaded ${nodeInserts.length} nodes, ${edgeInserts.length} edges`);
143
+ return {
144
+ nodesInserted: nodeInserts.length,
145
+ edgesInserted: edgeInserts.length,
146
+ warnings,
147
+ };
148
+ }