@zuvia-software-solutions/code-mapper 1.4.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/ai-context.js +1 -1
- package/dist/cli/analyze.d.ts +1 -0
- package/dist/cli/analyze.js +73 -82
- package/dist/cli/augment.js +0 -2
- package/dist/cli/eval-server.d.ts +2 -2
- package/dist/cli/eval-server.js +6 -6
- package/dist/cli/index.js +6 -10
- package/dist/cli/mcp.d.ts +1 -3
- package/dist/cli/mcp.js +3 -3
- package/dist/cli/refresh.d.ts +2 -2
- package/dist/cli/refresh.js +24 -29
- package/dist/cli/status.js +4 -13
- package/dist/cli/tool.d.ts +5 -4
- package/dist/cli/tool.js +8 -10
- package/dist/config/ignore-service.js +14 -34
- package/dist/core/augmentation/engine.js +53 -83
- package/dist/core/db/adapter.d.ts +99 -0
- package/dist/core/db/adapter.js +402 -0
- package/dist/core/db/graph-loader.d.ts +27 -0
- package/dist/core/db/graph-loader.js +148 -0
- package/dist/core/db/queries.d.ts +160 -0
- package/dist/core/db/queries.js +441 -0
- package/dist/core/db/schema.d.ts +108 -0
- package/dist/core/db/schema.js +136 -0
- package/dist/core/embeddings/embedder.d.ts +21 -12
- package/dist/core/embeddings/embedder.js +104 -50
- package/dist/core/embeddings/embedding-pipeline.d.ts +48 -22
- package/dist/core/embeddings/embedding-pipeline.js +220 -262
- package/dist/core/embeddings/text-generator.js +4 -19
- package/dist/core/embeddings/types.d.ts +1 -1
- package/dist/core/graph/graph.d.ts +1 -1
- package/dist/core/graph/graph.js +1 -0
- package/dist/core/graph/types.d.ts +11 -9
- package/dist/core/graph/types.js +4 -1
- package/dist/core/incremental/refresh.d.ts +46 -0
- package/dist/core/incremental/refresh.js +503 -0
- package/dist/core/incremental/types.d.ts +2 -1
- package/dist/core/incremental/types.js +42 -44
- package/dist/core/ingestion/ast-cache.js +1 -0
- package/dist/core/ingestion/call-processor.d.ts +15 -3
- package/dist/core/ingestion/call-processor.js +448 -60
- package/dist/core/ingestion/cluster-enricher.d.ts +1 -1
- package/dist/core/ingestion/cluster-enricher.js +2 -0
- package/dist/core/ingestion/community-processor.d.ts +1 -1
- package/dist/core/ingestion/community-processor.js +8 -3
- package/dist/core/ingestion/export-detection.d.ts +1 -1
- package/dist/core/ingestion/export-detection.js +1 -1
- package/dist/core/ingestion/filesystem-walker.js +1 -1
- package/dist/core/ingestion/heritage-processor.d.ts +2 -2
- package/dist/core/ingestion/heritage-processor.js +22 -11
- package/dist/core/ingestion/import-processor.d.ts +2 -2
- package/dist/core/ingestion/import-processor.js +24 -9
- package/dist/core/ingestion/language-config.js +7 -4
- package/dist/core/ingestion/mro-processor.d.ts +1 -1
- package/dist/core/ingestion/mro-processor.js +23 -11
- package/dist/core/ingestion/named-binding-extraction.js +5 -5
- package/dist/core/ingestion/parsing-processor.d.ts +4 -4
- package/dist/core/ingestion/parsing-processor.js +26 -18
- package/dist/core/ingestion/pipeline.d.ts +4 -2
- package/dist/core/ingestion/pipeline.js +50 -20
- package/dist/core/ingestion/process-processor.d.ts +2 -2
- package/dist/core/ingestion/process-processor.js +28 -14
- package/dist/core/ingestion/resolution-context.d.ts +1 -1
- package/dist/core/ingestion/resolution-context.js +14 -4
- package/dist/core/ingestion/resolvers/csharp.js +4 -3
- package/dist/core/ingestion/resolvers/go.js +3 -1
- package/dist/core/ingestion/resolvers/jvm.js +13 -4
- package/dist/core/ingestion/resolvers/standard.js +2 -2
- package/dist/core/ingestion/resolvers/utils.js +6 -2
- package/dist/core/ingestion/route-stitcher.d.ts +15 -0
- package/dist/core/ingestion/route-stitcher.js +92 -0
- package/dist/core/ingestion/structure-processor.d.ts +1 -1
- package/dist/core/ingestion/structure-processor.js +3 -2
- package/dist/core/ingestion/symbol-table.d.ts +2 -0
- package/dist/core/ingestion/symbol-table.js +5 -1
- package/dist/core/ingestion/tree-sitter-queries.d.ts +2 -2
- package/dist/core/ingestion/tree-sitter-queries.js +177 -0
- package/dist/core/ingestion/type-env.js +20 -0
- package/dist/core/ingestion/type-extractors/csharp.js +4 -3
- package/dist/core/ingestion/type-extractors/go.js +23 -12
- package/dist/core/ingestion/type-extractors/php.js +18 -10
- package/dist/core/ingestion/type-extractors/ruby.js +15 -3
- package/dist/core/ingestion/type-extractors/rust.js +3 -2
- package/dist/core/ingestion/type-extractors/shared.js +3 -2
- package/dist/core/ingestion/type-extractors/typescript.js +11 -5
- package/dist/core/ingestion/utils.d.ts +27 -4
- package/dist/core/ingestion/utils.js +145 -100
- package/dist/core/ingestion/workers/parse-worker.d.ts +1 -0
- package/dist/core/ingestion/workers/parse-worker.js +97 -29
- package/dist/core/ingestion/workers/worker-pool.js +3 -0
- package/dist/core/search/bm25-index.d.ts +15 -8
- package/dist/core/search/bm25-index.js +48 -98
- package/dist/core/search/hybrid-search.d.ts +9 -3
- package/dist/core/search/hybrid-search.js +30 -25
- package/dist/core/search/reranker.js +9 -7
- package/dist/core/search/types.d.ts +0 -4
- package/dist/core/semantic/tsgo-service.d.ts +7 -1
- package/dist/core/semantic/tsgo-service.js +165 -66
- package/dist/lib/tsgo-test.d.ts +2 -0
- package/dist/lib/tsgo-test.js +6 -0
- package/dist/lib/type-utils.d.ts +25 -0
- package/dist/lib/type-utils.js +22 -0
- package/dist/lib/utils.d.ts +3 -2
- package/dist/lib/utils.js +3 -2
- package/dist/mcp/compatible-stdio-transport.js +1 -1
- package/dist/mcp/local/local-backend.d.ts +29 -56
- package/dist/mcp/local/local-backend.js +808 -1118
- package/dist/mcp/resources.js +35 -25
- package/dist/mcp/server.d.ts +1 -1
- package/dist/mcp/server.js +5 -5
- package/dist/mcp/tools.js +24 -25
- package/dist/storage/repo-manager.d.ts +2 -12
- package/dist/storage/repo-manager.js +1 -47
- package/dist/types/pipeline.d.ts +8 -5
- package/dist/types/pipeline.js +5 -0
- package/package.json +18 -11
- package/dist/cli/serve.d.ts +0 -5
- package/dist/cli/serve.js +0 -8
- package/dist/core/incremental/child-process.d.ts +0 -8
- package/dist/core/incremental/child-process.js +0 -649
- package/dist/core/incremental/refresh-coordinator.d.ts +0 -32
- package/dist/core/incremental/refresh-coordinator.js +0 -147
- package/dist/core/lbug/csv-generator.d.ts +0 -28
- package/dist/core/lbug/csv-generator.js +0 -355
- package/dist/core/lbug/lbug-adapter.d.ts +0 -96
- package/dist/core/lbug/lbug-adapter.js +0 -753
- package/dist/core/lbug/schema.d.ts +0 -46
- package/dist/core/lbug/schema.js +0 -402
- package/dist/mcp/core/embedder.d.ts +0 -24
- package/dist/mcp/core/embedder.js +0 -168
- package/dist/mcp/core/lbug-adapter.d.ts +0 -29
- package/dist/mcp/core/lbug-adapter.js +0 -330
- package/dist/server/api.d.ts +0 -5
- package/dist/server/api.js +0 -340
- package/dist/server/mcp-http.d.ts +0 -7
- package/dist/server/mcp-http.js +0 -95
- package/models/mlx-embedder.py +0 -185
|
@@ -0,0 +1,402 @@
|
|
|
1
|
+
// code-mapper/src/core/db/adapter.ts
|
|
2
|
+
/**
|
|
3
|
+
* @file Unified SQLite database adapter for code-mapper.
|
|
4
|
+
*
|
|
5
|
+
* Single module for both CLI and MCP database access.
|
|
6
|
+
* WAL mode: concurrent readers + single writer, zero lock conflicts.
|
|
7
|
+
* Synchronous better-sqlite3 API: no connection pools needed.
|
|
8
|
+
*
|
|
9
|
+
* All public functions are typed with branded IDs and schema types
|
|
10
|
+
* from schema.ts — the compiler prevents passing wrong ID types
|
|
11
|
+
* or invalid labels/edge types.
|
|
12
|
+
*/
|
|
13
|
+
import Database from 'better-sqlite3';
|
|
14
|
+
import path from 'path';
|
|
15
|
+
import fs from 'fs';
|
|
16
|
+
import { SCHEMA_SQL, toNodeId, } from './schema.js';
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
// Singleton instances keyed by resolved dbPath
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
const instances = new Map();
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
// Lifecycle
|
|
23
|
+
// ---------------------------------------------------------------------------
|
|
24
|
+
/** Open (or reuse) a SQLite database. Creates schema if new. */
|
|
25
|
+
export function openDb(dbPath) {
|
|
26
|
+
const resolved = path.resolve(dbPath);
|
|
27
|
+
const existing = instances.get(resolved);
|
|
28
|
+
if (existing)
|
|
29
|
+
return existing;
|
|
30
|
+
const dir = path.dirname(resolved);
|
|
31
|
+
if (!fs.existsSync(dir))
|
|
32
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
33
|
+
const db = new Database(resolved);
|
|
34
|
+
// Performance pragmas
|
|
35
|
+
db.pragma('journal_mode = WAL');
|
|
36
|
+
db.pragma('synchronous = NORMAL');
|
|
37
|
+
db.pragma('cache_size = -64000'); // 64MB
|
|
38
|
+
db.pragma('mmap_size = 268435456'); // 256MB
|
|
39
|
+
db.pragma('foreign_keys = OFF');
|
|
40
|
+
// Create schema (IF NOT EXISTS — idempotent)
|
|
41
|
+
db.exec(SCHEMA_SQL);
|
|
42
|
+
instances.set(resolved, db);
|
|
43
|
+
return db;
|
|
44
|
+
}
|
|
45
|
+
/** Close one or all databases. */
|
|
46
|
+
export function closeDb(dbPath) {
|
|
47
|
+
if (dbPath) {
|
|
48
|
+
const resolved = path.resolve(dbPath);
|
|
49
|
+
const db = instances.get(resolved);
|
|
50
|
+
if (db) {
|
|
51
|
+
db.close();
|
|
52
|
+
instances.delete(resolved);
|
|
53
|
+
}
|
|
54
|
+
return;
|
|
55
|
+
}
|
|
56
|
+
for (const [key, db] of instances) {
|
|
57
|
+
db.close();
|
|
58
|
+
instances.delete(key);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
/** Delete and recreate a database (full re-analyze). */
|
|
62
|
+
export function resetDb(dbPath) {
|
|
63
|
+
closeDb(dbPath);
|
|
64
|
+
const resolved = path.resolve(dbPath);
|
|
65
|
+
for (const suffix of ['', '-wal', '-shm']) {
|
|
66
|
+
try {
|
|
67
|
+
fs.unlinkSync(resolved + suffix);
|
|
68
|
+
}
|
|
69
|
+
catch { }
|
|
70
|
+
}
|
|
71
|
+
return openDb(dbPath);
|
|
72
|
+
}
|
|
73
|
+
/** Check if a database is open. */
|
|
74
|
+
export function isDbOpen(dbPath) {
|
|
75
|
+
return instances.has(path.resolve(dbPath));
|
|
76
|
+
}
|
|
77
|
+
// ---------------------------------------------------------------------------
|
|
78
|
+
// Node operations
|
|
79
|
+
// ---------------------------------------------------------------------------
|
|
80
|
+
/**
|
|
81
|
+
* Expand an identifier into natural language words for FTS matching.
|
|
82
|
+
* camelCase, PascalCase, snake_case, UPPER_CASE → space-separated words.
|
|
83
|
+
*
|
|
84
|
+
* Examples:
|
|
85
|
+
* buildTestGraph → "build test graph"
|
|
86
|
+
* processCallsFromExtracted → "process calls from extracted"
|
|
87
|
+
* MAX_CHAIN_DEPTH → "max chain depth"
|
|
88
|
+
* deleteNodesByFile → "delete nodes by file"
|
|
89
|
+
*/
|
|
90
|
+
export function expandIdentifier(name) {
|
|
91
|
+
if (!name)
|
|
92
|
+
return '';
|
|
93
|
+
return name
|
|
94
|
+
// Insert space before uppercase runs: "processHTTPRequest" → "process HTTP Request"
|
|
95
|
+
.replace(/([a-z])([A-Z])/g, '$1 $2')
|
|
96
|
+
// Insert space between uppercase run and lowercase: "HTTPRequest" → "HTTP Request"
|
|
97
|
+
.replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
|
|
98
|
+
// Replace underscores/hyphens with spaces
|
|
99
|
+
.replace(/[_\-]+/g, ' ')
|
|
100
|
+
// Collapse multiple spaces
|
|
101
|
+
.replace(/\s+/g, ' ')
|
|
102
|
+
.toLowerCase()
|
|
103
|
+
.trim();
|
|
104
|
+
}
|
|
105
|
+
const INSERT_NODE_SQL = `
|
|
106
|
+
INSERT OR REPLACE INTO nodes (
|
|
107
|
+
id, label, name, filePath, startLine, endLine, isExported, content, description,
|
|
108
|
+
heuristicLabel, cohesion, symbolCount, keywords, enrichedBy,
|
|
109
|
+
processType, stepCount, communities, entryPointId, terminalId,
|
|
110
|
+
parameterCount, returnType, nameExpanded
|
|
111
|
+
) VALUES (
|
|
112
|
+
@id, @label, @name, @filePath, @startLine, @endLine, @isExported, @content, @description,
|
|
113
|
+
@heuristicLabel, @cohesion, @symbolCount, @keywords, @enrichedBy,
|
|
114
|
+
@processType, @stepCount, @communities, @entryPointId, @terminalId,
|
|
115
|
+
@parameterCount, @returnType, @nameExpanded
|
|
116
|
+
)
|
|
117
|
+
`;
|
|
118
|
+
/** Insert or replace a node. Automatically expands name for FTS natural language matching. */
|
|
119
|
+
export function insertNode(db, node) {
|
|
120
|
+
db.prepare(INSERT_NODE_SQL).run({
|
|
121
|
+
id: node.id,
|
|
122
|
+
label: node.label,
|
|
123
|
+
name: node.name ?? '',
|
|
124
|
+
filePath: node.filePath ?? '',
|
|
125
|
+
startLine: node.startLine ?? null,
|
|
126
|
+
endLine: node.endLine ?? null,
|
|
127
|
+
isExported: node.isExported ?? null,
|
|
128
|
+
content: node.content ?? '',
|
|
129
|
+
description: node.description ?? '',
|
|
130
|
+
heuristicLabel: node.heuristicLabel ?? null,
|
|
131
|
+
cohesion: node.cohesion ?? null,
|
|
132
|
+
symbolCount: node.symbolCount ?? null,
|
|
133
|
+
keywords: node.keywords ?? null,
|
|
134
|
+
enrichedBy: node.enrichedBy ?? null,
|
|
135
|
+
processType: node.processType ?? null,
|
|
136
|
+
stepCount: node.stepCount ?? null,
|
|
137
|
+
communities: node.communities ?? null,
|
|
138
|
+
entryPointId: node.entryPointId ?? null,
|
|
139
|
+
terminalId: node.terminalId ?? null,
|
|
140
|
+
parameterCount: node.parameterCount ?? null,
|
|
141
|
+
returnType: node.returnType ?? null,
|
|
142
|
+
nameExpanded: node.nameExpanded ?? expandIdentifier(node.name ?? ''),
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
/** Get a node by ID. Returns undefined if not found. */
|
|
146
|
+
export function getNode(db, id) {
|
|
147
|
+
return db.prepare('SELECT * FROM nodes WHERE id = ?').get(id);
|
|
148
|
+
}
|
|
149
|
+
/** Find nodes by name (optionally filtered by label). */
|
|
150
|
+
export function findNodesByName(db, name, label, limit = 10) {
|
|
151
|
+
if (label) {
|
|
152
|
+
return db.prepare('SELECT * FROM nodes WHERE name = ? AND label = ? LIMIT ?').all(name, label, limit);
|
|
153
|
+
}
|
|
154
|
+
return db.prepare('SELECT * FROM nodes WHERE name = ? LIMIT ?').all(name, limit);
|
|
155
|
+
}
|
|
156
|
+
/** Find nodes by file path. */
|
|
157
|
+
export function findNodesByFile(db, filePath) {
|
|
158
|
+
return db.prepare('SELECT * FROM nodes WHERE filePath = ?').all(filePath);
|
|
159
|
+
}
|
|
160
|
+
/** Find the node containing a given line in a file (best match). */
|
|
161
|
+
export function findNodeAtLine(db, filePath, line, excludeLabel) {
|
|
162
|
+
const sql = excludeLabel
|
|
163
|
+
? 'SELECT * FROM nodes WHERE filePath = ? AND startLine <= ? AND endLine >= ? AND label != ? ORDER BY (endLine - startLine) LIMIT 1'
|
|
164
|
+
: 'SELECT * FROM nodes WHERE filePath = ? AND startLine <= ? AND endLine >= ? ORDER BY (endLine - startLine) LIMIT 1';
|
|
165
|
+
return excludeLabel
|
|
166
|
+
? db.prepare(sql).get(filePath, line, line, excludeLabel)
|
|
167
|
+
: db.prepare(sql).get(filePath, line, line);
|
|
168
|
+
}
|
|
169
|
+
/** Count nodes, optionally by label. */
|
|
170
|
+
export function countNodes(db, label) {
|
|
171
|
+
if (label) {
|
|
172
|
+
return db.prepare('SELECT COUNT(*) AS cnt FROM nodes WHERE label = ?').get(label).cnt;
|
|
173
|
+
}
|
|
174
|
+
return db.prepare('SELECT COUNT(*) AS cnt FROM nodes').get().cnt;
|
|
175
|
+
}
|
|
176
|
+
/** Delete all nodes (and related edges/embeddings) for a file path. Returns count deleted. */
|
|
177
|
+
export function deleteNodesByFile(db, filePath) {
|
|
178
|
+
const nodeIds = db.prepare('SELECT id FROM nodes WHERE filePath = ?').all(filePath);
|
|
179
|
+
if (nodeIds.length === 0)
|
|
180
|
+
return 0;
|
|
181
|
+
const ids = nodeIds.map(n => n.id);
|
|
182
|
+
const ph = ids.map(() => '?').join(',');
|
|
183
|
+
// Delete edges FROM this file's nodes (outgoing). Incoming edges from other
|
|
184
|
+
// files are preserved — the node IDs are deterministic (label:filePath:name),
|
|
185
|
+
// so re-inserted nodes get the same ID and the edges remain valid.
|
|
186
|
+
db.prepare(`DELETE FROM edges WHERE sourceId IN (${ph})`).run(...ids);
|
|
187
|
+
db.prepare(`DELETE FROM embeddings WHERE nodeId IN (${ph})`).run(...ids);
|
|
188
|
+
return db.prepare('DELETE FROM nodes WHERE filePath = ?').run(filePath).changes;
|
|
189
|
+
}
|
|
190
|
+
// ---------------------------------------------------------------------------
|
|
191
|
+
// Edge operations
|
|
192
|
+
// ---------------------------------------------------------------------------
|
|
193
|
+
const INSERT_EDGE_SQL = `
|
|
194
|
+
INSERT OR IGNORE INTO edges (id, sourceId, targetId, type, confidence, reason, step, callLine)
|
|
195
|
+
VALUES (@id, @sourceId, @targetId, @type, @confidence, @reason, @step, @callLine)
|
|
196
|
+
`;
|
|
197
|
+
/** Insert an edge (ignores duplicates). */
|
|
198
|
+
export function insertEdge(db, edge) {
|
|
199
|
+
db.prepare(INSERT_EDGE_SQL).run({
|
|
200
|
+
id: edge.id,
|
|
201
|
+
sourceId: edge.sourceId,
|
|
202
|
+
targetId: edge.targetId,
|
|
203
|
+
type: edge.type,
|
|
204
|
+
confidence: edge.confidence ?? 1.0,
|
|
205
|
+
reason: edge.reason ?? '',
|
|
206
|
+
step: edge.step ?? 0,
|
|
207
|
+
callLine: edge.callLine ?? null,
|
|
208
|
+
});
|
|
209
|
+
}
|
|
210
|
+
/** Find edges from a source node, optionally filtered by type. */
|
|
211
|
+
export function findEdgesFrom(db, sourceId, type, minConfidence = 0) {
|
|
212
|
+
if (type) {
|
|
213
|
+
return db.prepare('SELECT * FROM edges WHERE sourceId = ? AND type = ? AND confidence >= ?').all(sourceId, type, minConfidence);
|
|
214
|
+
}
|
|
215
|
+
return db.prepare('SELECT * FROM edges WHERE sourceId = ? AND confidence >= ?').all(sourceId, minConfidence);
|
|
216
|
+
}
|
|
217
|
+
/** Find edges to a target node, optionally filtered by type. */
|
|
218
|
+
export function findEdgesTo(db, targetId, type, minConfidence = 0) {
|
|
219
|
+
if (type) {
|
|
220
|
+
return db.prepare('SELECT * FROM edges WHERE targetId = ? AND type = ? AND confidence >= ?').all(targetId, type, minConfidence);
|
|
221
|
+
}
|
|
222
|
+
return db.prepare('SELECT * FROM edges WHERE targetId = ? AND confidence >= ?').all(targetId, minConfidence);
|
|
223
|
+
}
|
|
224
|
+
/** Count edges. */
|
|
225
|
+
export function countEdges(db) {
|
|
226
|
+
return db.prepare('SELECT COUNT(*) AS cnt FROM edges').get().cnt;
|
|
227
|
+
}
|
|
228
|
+
// ---------------------------------------------------------------------------
|
|
229
|
+
// Embedding operations
|
|
230
|
+
// ---------------------------------------------------------------------------
|
|
231
|
+
/** Insert or replace an embedding (stores as binary Float32Array). */
|
|
232
|
+
export function insertEmbedding(db, nodeId, embedding) {
|
|
233
|
+
const buf = Buffer.from(new Float32Array(embedding).buffer);
|
|
234
|
+
db.prepare('INSERT OR REPLACE INTO embeddings (nodeId, embedding) VALUES (?, ?)').run(nodeId, buf);
|
|
235
|
+
}
|
|
236
|
+
/** Delete embeddings for nodes in a given file. */
|
|
237
|
+
export function deleteEmbeddingsByFile(db, filePath) {
|
|
238
|
+
const nodeIds = db.prepare('SELECT id FROM nodes WHERE filePath = ?').all(filePath);
|
|
239
|
+
if (nodeIds.length === 0)
|
|
240
|
+
return;
|
|
241
|
+
const ph = nodeIds.map(() => '?').join(',');
|
|
242
|
+
db.prepare(`DELETE FROM embeddings WHERE nodeId IN (${ph})`).run(...nodeIds.map(n => n.id));
|
|
243
|
+
}
|
|
244
|
+
/** Count embeddings. */
|
|
245
|
+
export function countEmbeddings(db) {
|
|
246
|
+
return db.prepare('SELECT COUNT(*) AS cnt FROM embeddings').get().cnt;
|
|
247
|
+
}
|
|
248
|
+
/** Cosine similarity between two vectors. */
|
|
249
|
+
function cosineSimilarity(a, b) {
|
|
250
|
+
let dot = 0, normA = 0, normB = 0;
|
|
251
|
+
for (let i = 0; i < a.length; i++) {
|
|
252
|
+
const ai = a[i] ?? 0;
|
|
253
|
+
const bi = b[i] ?? 0;
|
|
254
|
+
dot += ai * bi;
|
|
255
|
+
normA += ai * ai;
|
|
256
|
+
normB += bi * bi;
|
|
257
|
+
}
|
|
258
|
+
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
259
|
+
return denom === 0 ? 0 : dot / denom;
|
|
260
|
+
}
|
|
261
|
+
/** Vector similarity search (brute-force cosine). Fast enough for <200K vectors at 256 dims. */
|
|
262
|
+
export function searchVector(db, queryVec, limit = 10, maxDistance = 0.5) {
|
|
263
|
+
const rows = db.prepare('SELECT nodeId, embedding FROM embeddings').all();
|
|
264
|
+
const results = [];
|
|
265
|
+
for (const row of rows) {
|
|
266
|
+
const vec = new Float32Array(row.embedding.buffer, row.embedding.byteOffset, row.embedding.byteLength / 4);
|
|
267
|
+
const distance = 1 - cosineSimilarity(queryVec, vec);
|
|
268
|
+
if (distance < maxDistance) {
|
|
269
|
+
results.push({ nodeId: toNodeId(row.nodeId), distance });
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
results.sort((a, b) => a.distance - b.distance);
|
|
273
|
+
return results.slice(0, limit);
|
|
274
|
+
}
|
|
275
|
+
// ---------------------------------------------------------------------------
|
|
276
|
+
// FTS search
|
|
277
|
+
// ---------------------------------------------------------------------------
|
|
278
|
+
/** Full-text search across nodes. FTS5 auto-maintains the index. */
|
|
279
|
+
export function searchFTS(db, query, limit = 20) {
|
|
280
|
+
// Sanitize query for FTS5 — escape double quotes, handle special chars
|
|
281
|
+
let safeQuery = query.replace(/"/g, '""').replace(/[*(){}[\]^~\\:]/g, ' ').trim();
|
|
282
|
+
if (!safeQuery)
|
|
283
|
+
return [];
|
|
284
|
+
// Multi-word queries: use OR so natural language works ("find nodes by name in database")
|
|
285
|
+
// FTS5 default is AND — every word must match. OR is more forgiving for NL queries.
|
|
286
|
+
const words = safeQuery.split(/\s+/).filter(w => w.length > 1);
|
|
287
|
+
if (words.length > 1) {
|
|
288
|
+
safeQuery = words.join(' OR ');
|
|
289
|
+
}
|
|
290
|
+
try {
|
|
291
|
+
return db.prepare(`
|
|
292
|
+
SELECT n.id, n.name, n.label, n.filePath, -fts.rank AS score
|
|
293
|
+
FROM nodes_fts fts
|
|
294
|
+
JOIN nodes n ON n.rowid = fts.rowid
|
|
295
|
+
WHERE nodes_fts MATCH ?
|
|
296
|
+
ORDER BY fts.rank
|
|
297
|
+
LIMIT ?
|
|
298
|
+
`).all(safeQuery, limit);
|
|
299
|
+
}
|
|
300
|
+
catch {
|
|
301
|
+
// FTS query syntax error — fall back to LIKE
|
|
302
|
+
return db.prepare(`
|
|
303
|
+
SELECT id, name, label, filePath, 1.0 AS score
|
|
304
|
+
FROM nodes
|
|
305
|
+
WHERE name LIKE ? OR content LIKE ?
|
|
306
|
+
LIMIT ?
|
|
307
|
+
`).all(`%${safeQuery}%`, `%${safeQuery}%`, limit);
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
// ---------------------------------------------------------------------------
|
|
311
|
+
// Stats
|
|
312
|
+
// ---------------------------------------------------------------------------
|
|
313
|
+
/** Get node count, edge count, and embedding count. */
|
|
314
|
+
export function getStats(db) {
|
|
315
|
+
return {
|
|
316
|
+
nodes: countNodes(db),
|
|
317
|
+
edges: countEdges(db),
|
|
318
|
+
embeddings: countEmbeddings(db),
|
|
319
|
+
};
|
|
320
|
+
}
|
|
321
|
+
// ---------------------------------------------------------------------------
|
|
322
|
+
// Batch operations (transactional)
|
|
323
|
+
// ---------------------------------------------------------------------------
|
|
324
|
+
/** Batch insert nodes in a single transaction. */
|
|
325
|
+
export function insertNodesBatch(db, nodes) {
|
|
326
|
+
const stmt = db.prepare(INSERT_NODE_SQL);
|
|
327
|
+
const txn = db.transaction((items) => {
|
|
328
|
+
for (const node of items) {
|
|
329
|
+
stmt.run({
|
|
330
|
+
id: node.id, label: node.label, name: node.name ?? '', filePath: node.filePath ?? '',
|
|
331
|
+
startLine: node.startLine ?? null, endLine: node.endLine ?? null, isExported: node.isExported ?? null,
|
|
332
|
+
content: node.content ?? '', description: node.description ?? '',
|
|
333
|
+
heuristicLabel: node.heuristicLabel ?? null, cohesion: node.cohesion ?? null,
|
|
334
|
+
symbolCount: node.symbolCount ?? null, keywords: node.keywords ?? null, enrichedBy: node.enrichedBy ?? null,
|
|
335
|
+
processType: node.processType ?? null, stepCount: node.stepCount ?? null,
|
|
336
|
+
communities: node.communities ?? null, entryPointId: node.entryPointId ?? null,
|
|
337
|
+
terminalId: node.terminalId ?? null, parameterCount: node.parameterCount ?? null,
|
|
338
|
+
returnType: node.returnType ?? null,
|
|
339
|
+
nameExpanded: node.nameExpanded ?? expandIdentifier(node.name ?? ''),
|
|
340
|
+
});
|
|
341
|
+
}
|
|
342
|
+
});
|
|
343
|
+
txn(nodes);
|
|
344
|
+
}
|
|
345
|
+
/** Batch insert edges in a single transaction. */
|
|
346
|
+
export function insertEdgesBatch(db, edges) {
|
|
347
|
+
const stmt = db.prepare(INSERT_EDGE_SQL);
|
|
348
|
+
const txn = db.transaction((items) => {
|
|
349
|
+
for (const edge of items) {
|
|
350
|
+
stmt.run({
|
|
351
|
+
id: edge.id, sourceId: edge.sourceId, targetId: edge.targetId, type: edge.type,
|
|
352
|
+
confidence: edge.confidence ?? 1.0, reason: edge.reason ?? '', step: edge.step ?? 0,
|
|
353
|
+
callLine: edge.callLine ?? null,
|
|
354
|
+
});
|
|
355
|
+
}
|
|
356
|
+
});
|
|
357
|
+
txn(edges);
|
|
358
|
+
}
|
|
359
|
+
/** Batch insert embeddings in a single transaction. */
|
|
360
|
+
export function insertEmbeddingsBatch(db, items) {
|
|
361
|
+
const stmt = db.prepare('INSERT OR REPLACE INTO embeddings (nodeId, embedding, textHash) VALUES (?, ?, ?)');
|
|
362
|
+
const txn = db.transaction((batch) => {
|
|
363
|
+
for (const item of batch) {
|
|
364
|
+
const buf = Buffer.from(new Float32Array(item.embedding).buffer);
|
|
365
|
+
stmt.run(item.nodeId, buf, item.textHash ?? null);
|
|
366
|
+
}
|
|
367
|
+
});
|
|
368
|
+
txn(items);
|
|
369
|
+
}
|
|
370
|
+
/** Get all textHashes from the embeddings table for hash-based skip on re-index */
|
|
371
|
+
export function getEmbeddingHashes(db) {
|
|
372
|
+
const rows = db.prepare('SELECT nodeId, textHash FROM embeddings WHERE textHash IS NOT NULL').all();
|
|
373
|
+
const map = new Map();
|
|
374
|
+
for (const row of rows) {
|
|
375
|
+
map.set(row.nodeId, row.textHash);
|
|
376
|
+
}
|
|
377
|
+
return map;
|
|
378
|
+
}
|
|
379
|
+
// ---------------------------------------------------------------------------
|
|
380
|
+
// Raw SQL escape (for dynamic queries in local-backend.ts)
|
|
381
|
+
// ---------------------------------------------------------------------------
|
|
382
|
+
/** Escape a string for use in SQL single-quoted literals. */
|
|
383
|
+
export function escapeSql(value) {
|
|
384
|
+
return value.replace(/'/g, "''");
|
|
385
|
+
}
|
|
386
|
+
// ---------------------------------------------------------------------------
|
|
387
|
+
// Raw query execution (for migration compatibility)
|
|
388
|
+
// ---------------------------------------------------------------------------
|
|
389
|
+
/** Execute a raw SQL query and return rows. */
|
|
390
|
+
export function rawQuery(db, sql, params) {
|
|
391
|
+
const stmt = db.prepare(sql);
|
|
392
|
+
if (params)
|
|
393
|
+
return stmt.all(...params);
|
|
394
|
+
return stmt.all();
|
|
395
|
+
}
|
|
396
|
+
/** Execute a raw SQL statement (no return). */
|
|
397
|
+
export function rawRun(db, sql, params) {
|
|
398
|
+
const stmt = db.prepare(sql);
|
|
399
|
+
if (params)
|
|
400
|
+
return stmt.run(...params);
|
|
401
|
+
return stmt.run();
|
|
402
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Loads a KnowledgeGraph into SQLite.
|
|
3
|
+
*
|
|
4
|
+
* Replaces the old CSV-generator + graph DB COPY pipeline (~800 lines)
|
|
5
|
+
* with direct batch INSERTs in transactions. SQLite handles this in <2s
|
|
6
|
+
* for typical codebases (1-5K nodes) because:
|
|
7
|
+
* - Prepared statements are reused across the batch
|
|
8
|
+
* - WAL mode doesn't block readers during the write
|
|
9
|
+
* - No CSV serialization/deserialization overhead
|
|
10
|
+
*/
|
|
11
|
+
import type Database from 'better-sqlite3';
|
|
12
|
+
import { type KnowledgeGraph } from '../graph/types.js';
|
|
13
|
+
/** Progress callback */
|
|
14
|
+
export type LoadProgressCallback = (message: string) => void;
|
|
15
|
+
/** Result of loading a graph */
|
|
16
|
+
export interface LoadResult {
|
|
17
|
+
readonly nodesInserted: number;
|
|
18
|
+
readonly edgesInserted: number;
|
|
19
|
+
readonly warnings: readonly string[];
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Load an in-memory KnowledgeGraph into a SQLite database.
|
|
23
|
+
*
|
|
24
|
+
* This is the full-pipeline path used by `code-mapper analyze`.
|
|
25
|
+
* For incremental updates, use the adapter functions directly.
|
|
26
|
+
*/
|
|
27
|
+
export declare function loadGraphToDb(db: Database.Database, graph: KnowledgeGraph, repoPath: string, onProgress?: LoadProgressCallback): LoadResult;
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
// code-mapper/src/core/db/graph-loader.ts
|
|
2
|
+
/**
|
|
3
|
+
* @file Loads a KnowledgeGraph into SQLite.
|
|
4
|
+
*
|
|
5
|
+
* Replaces the old CSV-generator + graph DB COPY pipeline (~800 lines)
|
|
6
|
+
* with direct batch INSERTs in transactions. SQLite handles this in <2s
|
|
7
|
+
* for typical codebases (1-5K nodes) because:
|
|
8
|
+
* - Prepared statements are reused across the batch
|
|
9
|
+
* - WAL mode doesn't block readers during the write
|
|
10
|
+
* - No CSV serialization/deserialization overhead
|
|
11
|
+
*/
|
|
12
|
+
import fs from 'fs';
|
|
13
|
+
import path from 'path';
|
|
14
|
+
import {} from '../graph/types.js';
|
|
15
|
+
import { NODE_LABELS, toNodeId, toEdgeId, } from './schema.js';
|
|
16
|
+
import {} from './schema.js';
|
|
17
|
+
import { insertNodesBatch, insertEdgesBatch } from './adapter.js';
|
|
18
|
+
/** Maximum characters of source content to store per File node */
|
|
19
|
+
const MAX_FILE_CONTENT = 100_000;
|
|
20
|
+
/** Maximum characters of source snippet to store per symbol node */
|
|
21
|
+
const MAX_SNIPPET = 50_000;
|
|
22
|
+
/**
|
|
23
|
+
* Check if content is likely binary (contains null bytes or high ratio of non-printable chars)
|
|
24
|
+
*/
|
|
25
|
+
function isBinaryContent(content) {
|
|
26
|
+
if (content.length === 0)
|
|
27
|
+
return false;
|
|
28
|
+
let nonPrintable = 0;
|
|
29
|
+
const sampleSize = Math.min(content.length, 1024);
|
|
30
|
+
for (let i = 0; i < sampleSize; i++) {
|
|
31
|
+
const code = content.charCodeAt(i);
|
|
32
|
+
if (code === 0)
|
|
33
|
+
return true;
|
|
34
|
+
if (code < 32 && code !== 9 && code !== 10 && code !== 13)
|
|
35
|
+
nonPrintable++;
|
|
36
|
+
}
|
|
37
|
+
return nonPrintable / sampleSize > 0.1;
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Read and truncate file content for a node
|
|
41
|
+
*/
|
|
42
|
+
function getNodeContent(repoPath, node) {
|
|
43
|
+
const filePath = node.filePath;
|
|
44
|
+
if (!filePath)
|
|
45
|
+
return '';
|
|
46
|
+
if (node.label === 'Folder')
|
|
47
|
+
return '';
|
|
48
|
+
const absPath = path.join(repoPath, filePath);
|
|
49
|
+
let content;
|
|
50
|
+
try {
|
|
51
|
+
content = fs.readFileSync(absPath, 'utf-8');
|
|
52
|
+
}
|
|
53
|
+
catch {
|
|
54
|
+
return '';
|
|
55
|
+
}
|
|
56
|
+
if (isBinaryContent(content))
|
|
57
|
+
return '[Binary file]';
|
|
58
|
+
if (node.label === 'File') {
|
|
59
|
+
return content.length > MAX_FILE_CONTENT
|
|
60
|
+
? content.slice(0, MAX_FILE_CONTENT) + '\n... [truncated]'
|
|
61
|
+
: content;
|
|
62
|
+
}
|
|
63
|
+
const startLine = node.startLine;
|
|
64
|
+
const endLine = node.endLine;
|
|
65
|
+
if (startLine === undefined || endLine === undefined)
|
|
66
|
+
return '';
|
|
67
|
+
const lines = content.split('\n');
|
|
68
|
+
const start = Math.max(0, startLine - 2);
|
|
69
|
+
const end = Math.min(lines.length - 1, endLine + 2);
|
|
70
|
+
const snippet = lines.slice(start, end + 1).join('\n');
|
|
71
|
+
return snippet.length > MAX_SNIPPET
|
|
72
|
+
? snippet.slice(0, MAX_SNIPPET) + '\n... [truncated]'
|
|
73
|
+
: snippet;
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Load an in-memory KnowledgeGraph into a SQLite database.
|
|
77
|
+
*
|
|
78
|
+
* This is the full-pipeline path used by `code-mapper analyze`.
|
|
79
|
+
* For incremental updates, use the adapter functions directly.
|
|
80
|
+
*/
|
|
81
|
+
export function loadGraphToDb(db, graph, repoPath, onProgress) {
|
|
82
|
+
const log = onProgress ?? (() => { });
|
|
83
|
+
const warnings = [];
|
|
84
|
+
const validLabels = new Set(NODE_LABELS);
|
|
85
|
+
// ── Collect nodes ────────────────────────────────────────────────────
|
|
86
|
+
log('Preparing nodes...');
|
|
87
|
+
const nodeInserts = [];
|
|
88
|
+
for (const node of graph.iterNodes()) {
|
|
89
|
+
if (!validLabels.has(node.label)) {
|
|
90
|
+
warnings.push(`Skipped node with unknown label: ${node.label}`);
|
|
91
|
+
continue;
|
|
92
|
+
}
|
|
93
|
+
const content = getNodeContent(repoPath, {
|
|
94
|
+
label: node.label,
|
|
95
|
+
filePath: node.properties.filePath,
|
|
96
|
+
...(node.properties.startLine !== undefined ? { startLine: node.properties.startLine } : {}),
|
|
97
|
+
...(node.properties.endLine !== undefined ? { endLine: node.properties.endLine } : {}),
|
|
98
|
+
});
|
|
99
|
+
nodeInserts.push({
|
|
100
|
+
id: toNodeId(node.id),
|
|
101
|
+
label: node.label,
|
|
102
|
+
name: node.properties.name ?? '',
|
|
103
|
+
filePath: node.properties.filePath ?? '',
|
|
104
|
+
startLine: node.properties.startLine ?? null,
|
|
105
|
+
endLine: node.properties.endLine ?? null,
|
|
106
|
+
isExported: node.properties.isExported ? 1 : null,
|
|
107
|
+
content,
|
|
108
|
+
description: node.properties.description ?? '',
|
|
109
|
+
heuristicLabel: node.properties.heuristicLabel ?? null,
|
|
110
|
+
cohesion: node.properties.cohesion ?? null,
|
|
111
|
+
symbolCount: node.properties.symbolCount ?? null,
|
|
112
|
+
keywords: node.properties.keywords ? JSON.stringify(node.properties.keywords) : null,
|
|
113
|
+
enrichedBy: node.properties.enrichedBy ?? null,
|
|
114
|
+
processType: node.properties.processType ?? null,
|
|
115
|
+
stepCount: node.properties.stepCount ?? null,
|
|
116
|
+
communities: node.properties.communities ? JSON.stringify(node.properties.communities) : null,
|
|
117
|
+
entryPointId: node.properties.entryPointId ?? null,
|
|
118
|
+
terminalId: node.properties.terminalId ?? null,
|
|
119
|
+
parameterCount: node.properties.parameterCount ?? null,
|
|
120
|
+
returnType: node.properties.returnType ?? null,
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
log(`Inserting ${nodeInserts.length} nodes...`);
|
|
124
|
+
insertNodesBatch(db, nodeInserts);
|
|
125
|
+
// ── Collect edges ────────────────────────────────────────────────────
|
|
126
|
+
log('Preparing edges...');
|
|
127
|
+
const edgeInserts = [];
|
|
128
|
+
for (const rel of graph.iterRelationships()) {
|
|
129
|
+
edgeInserts.push({
|
|
130
|
+
id: toEdgeId(rel.id),
|
|
131
|
+
sourceId: toNodeId(rel.sourceId),
|
|
132
|
+
targetId: toNodeId(rel.targetId),
|
|
133
|
+
type: rel.type,
|
|
134
|
+
confidence: rel.confidence,
|
|
135
|
+
reason: rel.reason ?? '',
|
|
136
|
+
step: rel.step ?? 0,
|
|
137
|
+
callLine: rel.callLine ?? null,
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
log(`Inserting ${edgeInserts.length} edges...`);
|
|
141
|
+
insertEdgesBatch(db, edgeInserts);
|
|
142
|
+
log(`Loaded ${nodeInserts.length} nodes, ${edgeInserts.length} edges`);
|
|
143
|
+
return {
|
|
144
|
+
nodesInserted: nodeInserts.length,
|
|
145
|
+
edgesInserted: edgeInserts.length,
|
|
146
|
+
warnings,
|
|
147
|
+
};
|
|
148
|
+
}
|