cntx-ui 3.0.7 → 3.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin/cntx-ui.js +70 -0
- package/dist/lib/agent-runtime.js +269 -0
- package/dist/lib/agent-tools.js +162 -0
- package/dist/lib/api-router.js +387 -0
- package/dist/lib/bundle-manager.js +236 -0
- package/dist/lib/configuration-manager.js +230 -0
- package/dist/lib/database-manager.js +277 -0
- package/dist/lib/file-system-manager.js +305 -0
- package/dist/lib/function-level-chunker.js +144 -0
- package/dist/lib/heuristics-manager.js +491 -0
- package/dist/lib/mcp-server.js +159 -0
- package/dist/lib/mcp-transport.js +10 -0
- package/dist/lib/semantic-splitter.js +335 -0
- package/dist/lib/simple-vector-store.js +98 -0
- package/dist/lib/treesitter-semantic-chunker.js +277 -0
- package/dist/lib/websocket-manager.js +268 -0
- package/dist/server.js +225 -0
- package/package.json +18 -8
- package/bin/cntx-ui-mcp.sh +0 -3
- package/bin/cntx-ui.js +0 -123
- package/lib/agent-runtime.js +0 -371
- package/lib/agent-tools.js +0 -370
- package/lib/api-router.js +0 -1026
- package/lib/bundle-manager.js +0 -326
- package/lib/configuration-manager.js +0 -760
- package/lib/database-manager.js +0 -397
- package/lib/file-system-manager.js +0 -489
- package/lib/function-level-chunker.js +0 -406
- package/lib/heuristics-manager.js +0 -529
- package/lib/mcp-server.js +0 -1380
- package/lib/mcp-transport.js +0 -97
- package/lib/semantic-splitter.js +0 -304
- package/lib/simple-vector-store.js +0 -108
- package/lib/treesitter-semantic-chunker.js +0 -1485
- package/lib/websocket-manager.js +0 -470
- package/server.js +0 -687
package/lib/mcp-transport.js
DELETED
|
@@ -1,97 +0,0 @@
|
|
|
1
|
-
import { MCPServer } from './mcp-server.js';
|
|
2
|
-
|
|
3
|
-
export class MCPTransport {
|
|
4
|
-
constructor(cntxServer) {
|
|
5
|
-
this.mcpServer = new MCPServer(cntxServer);
|
|
6
|
-
this.buffer = '';
|
|
7
|
-
}
|
|
8
|
-
|
|
9
|
-
// Start stdio transport
|
|
10
|
-
start() {
|
|
11
|
-
console.error('🚀 MCP server starting on stdio transport');
|
|
12
|
-
|
|
13
|
-
// Handle incoming messages from stdin
|
|
14
|
-
process.stdin.on('data', (data) => {
|
|
15
|
-
this.handleIncomingData(data.toString());
|
|
16
|
-
});
|
|
17
|
-
|
|
18
|
-
// Handle process cleanup
|
|
19
|
-
process.on('SIGINT', () => {
|
|
20
|
-
console.error('📡 MCP server shutting down');
|
|
21
|
-
process.exit(0);
|
|
22
|
-
});
|
|
23
|
-
|
|
24
|
-
process.on('SIGTERM', () => {
|
|
25
|
-
console.error('📡 MCP server shutting down');
|
|
26
|
-
process.exit(0);
|
|
27
|
-
});
|
|
28
|
-
|
|
29
|
-
// Set stdin to raw mode for proper JSON-RPC communication
|
|
30
|
-
process.stdin.setEncoding('utf8');
|
|
31
|
-
|
|
32
|
-
console.error('✅ MCP server ready for JSON-RPC messages');
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
// Handle incoming data and parse JSON-RPC messages
|
|
36
|
-
async handleIncomingData(data) {
|
|
37
|
-
this.buffer += data;
|
|
38
|
-
|
|
39
|
-
// Split by newlines to handle multiple messages
|
|
40
|
-
const lines = this.buffer.split('\n');
|
|
41
|
-
this.buffer = lines.pop() || ''; // Keep incomplete line in buffer
|
|
42
|
-
|
|
43
|
-
for (const line of lines) {
|
|
44
|
-
if (line.trim()) {
|
|
45
|
-
try {
|
|
46
|
-
const message = JSON.parse(line.trim());
|
|
47
|
-
await this.processMessage(message);
|
|
48
|
-
} catch (error) {
|
|
49
|
-
console.error('❌ Failed to parse JSON-RPC message:', error.message);
|
|
50
|
-
this.sendError(null, -32700, 'Parse error');
|
|
51
|
-
}
|
|
52
|
-
}
|
|
53
|
-
}
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
// Process a single JSON-RPC message
|
|
57
|
-
async processMessage(message) {
|
|
58
|
-
try {
|
|
59
|
-
const response = await this.mcpServer.handleMessage(message);
|
|
60
|
-
|
|
61
|
-
// Only send response if not null (notifications don't need responses)
|
|
62
|
-
if (response !== null) {
|
|
63
|
-
this.sendMessage(response);
|
|
64
|
-
}
|
|
65
|
-
} catch (error) {
|
|
66
|
-
console.error('❌ Error processing message:', error.message);
|
|
67
|
-
this.sendError(message.id || null, -32603, 'Internal error');
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
// Send a message via stdout
|
|
72
|
-
sendMessage(message) {
|
|
73
|
-
const messageStr = JSON.stringify(message);
|
|
74
|
-
process.stdout.write(messageStr + '\n');
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
// Send an error response
|
|
78
|
-
sendError(id, code, message, data = null) {
|
|
79
|
-
const error = { code, message };
|
|
80
|
-
if (data) error.data = data;
|
|
81
|
-
|
|
82
|
-
const response = {
|
|
83
|
-
jsonrpc: '2.0',
|
|
84
|
-
id,
|
|
85
|
-
error
|
|
86
|
-
};
|
|
87
|
-
|
|
88
|
-
this.sendMessage(response);
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
// Factory function to start MCP transport
|
|
93
|
-
export function startMCPTransport(cntxServer) {
|
|
94
|
-
const transport = new MCPTransport(cntxServer);
|
|
95
|
-
transport.start();
|
|
96
|
-
return transport;
|
|
97
|
-
}
|
package/lib/semantic-splitter.js
DELETED
|
@@ -1,304 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Semantic Splitter - High-Performance AST-based Chunker
|
|
3
|
-
* Uses tree-sitter for surgical, function-level code extraction
|
|
4
|
-
* Integrated with HeuristicsManager for intelligent categorization
|
|
5
|
-
*/
|
|
6
|
-
|
|
7
|
-
import { readFileSync, existsSync } from 'fs'
|
|
8
|
-
import { join, extname } from 'path'
|
|
9
|
-
import Parser from 'tree-sitter'
|
|
10
|
-
import JavaScript from 'tree-sitter-javascript'
|
|
11
|
-
import TypeScript from 'tree-sitter-typescript'
|
|
12
|
-
import HeuristicsManager from './heuristics-manager.js'
|
|
13
|
-
|
|
14
|
-
export default class SemanticSplitter {
|
|
15
|
-
constructor(options = {}) {
|
|
16
|
-
this.options = {
|
|
17
|
-
maxChunkSize: 3000, // Max chars per chunk
|
|
18
|
-
includeContext: true, // Include imports/types needed
|
|
19
|
-
minFunctionSize: 40, // Skip tiny functions
|
|
20
|
-
...options
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
// Initialize tree-sitter parsers
|
|
24
|
-
this.parsers = {
|
|
25
|
-
javascript: new Parser(),
|
|
26
|
-
typescript: new Parser(),
|
|
27
|
-
tsx: new Parser()
|
|
28
|
-
}
|
|
29
|
-
this.parsers.javascript.setLanguage(JavaScript)
|
|
30
|
-
this.parsers.typescript.setLanguage(TypeScript.typescript)
|
|
31
|
-
this.parsers.tsx.setLanguage(TypeScript.tsx)
|
|
32
|
-
|
|
33
|
-
this.heuristicsManager = new HeuristicsManager()
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
getParser(filePath) {
|
|
37
|
-
const ext = extname(filePath)
|
|
38
|
-
switch (ext) {
|
|
39
|
-
case '.ts': return this.parsers.typescript
|
|
40
|
-
case '.tsx': return this.parsers.tsx
|
|
41
|
-
default: return this.parsers.javascript
|
|
42
|
-
}
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
/**
|
|
46
|
-
* Main entry point - extract semantic chunks from project
|
|
47
|
-
* Now accepts a pre-filtered list of files from FileSystemManager
|
|
48
|
-
*/
|
|
49
|
-
async extractSemanticChunks(projectPath, files = [], bundleConfig = null) {
|
|
50
|
-
console.log('🔪 Starting surgical semantic splitting via tree-sitter...')
|
|
51
|
-
console.log(`📂 Project path: ${projectPath}`)
|
|
52
|
-
|
|
53
|
-
this.bundleConfig = bundleConfig
|
|
54
|
-
console.log(`📁 Processing ${files.length} filtered files`)
|
|
55
|
-
|
|
56
|
-
const allChunks = []
|
|
57
|
-
|
|
58
|
-
for (const filePath of files) {
|
|
59
|
-
try {
|
|
60
|
-
const fileChunks = this.processFile(filePath, projectPath)
|
|
61
|
-
allChunks.push(...fileChunks)
|
|
62
|
-
} catch (error) {
|
|
63
|
-
console.warn(`Failed to process ${filePath}: ${error.message}`)
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
console.log(`🧩 Created ${allChunks.length} semantic chunks across project`)
|
|
68
|
-
return {
|
|
69
|
-
summary: {
|
|
70
|
-
totalFiles: files.length,
|
|
71
|
-
totalChunks: allChunks.length,
|
|
72
|
-
averageSize: allChunks.length > 0 ? allChunks.reduce((sum, c) => sum + c.code.length, 0) / allChunks.length : 0
|
|
73
|
-
},
|
|
74
|
-
chunks: allChunks
|
|
75
|
-
}
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
processFile(relativePath, projectPath) {
|
|
79
|
-
const fullPath = join(projectPath, relativePath)
|
|
80
|
-
if (!existsSync(fullPath)) return []
|
|
81
|
-
|
|
82
|
-
const content = readFileSync(fullPath, 'utf8')
|
|
83
|
-
const parser = this.getParser(relativePath)
|
|
84
|
-
const tree = parser.parse(content)
|
|
85
|
-
const root = tree.rootNode
|
|
86
|
-
|
|
87
|
-
const elements = {
|
|
88
|
-
functions: [],
|
|
89
|
-
types: [],
|
|
90
|
-
imports: this.extractImports(root, content, relativePath)
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
// Traverse AST for functions and types
|
|
94
|
-
this.traverse(root, content, relativePath, elements)
|
|
95
|
-
|
|
96
|
-
// Create chunks from elements
|
|
97
|
-
return this.createChunks(elements, content, relativePath)
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
traverse(node, content, filePath, elements) {
|
|
101
|
-
// Detect Function Declarations
|
|
102
|
-
if (node.type === 'function_declaration' || node.type === 'method_definition' || node.type === 'arrow_function') {
|
|
103
|
-
const func = this.mapFunctionNode(node, content, filePath)
|
|
104
|
-
if (func && func.code.length > this.options.minFunctionSize) {
|
|
105
|
-
elements.functions.push(func)
|
|
106
|
-
}
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
// Detect Type Definitions (TS)
|
|
110
|
-
if (node.type === 'interface_declaration' || node.type === 'type_alias_declaration') {
|
|
111
|
-
const typeDef = this.mapTypeNode(node, content, filePath)
|
|
112
|
-
if (typeDef) elements.types.push(typeDef)
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
// Recurse unless we've already captured the block (like a function body)
|
|
116
|
-
if (node.type !== 'function_declaration' && node.type !== 'method_definition') {
|
|
117
|
-
for (let i = 0; i < node.namedChildCount; i++) {
|
|
118
|
-
this.traverse(node.namedChild(i), content, filePath, elements)
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
mapFunctionNode(node, content, filePath) {
|
|
124
|
-
let name = 'anonymous';
|
|
125
|
-
|
|
126
|
-
// Find name identifier based on node type
|
|
127
|
-
if (node.type === 'function_declaration' || node.type === 'method_definition') {
|
|
128
|
-
const nameNode = node.childForFieldName('name');
|
|
129
|
-
if (nameNode) name = content.slice(nameNode.startIndex, nameNode.endIndex);
|
|
130
|
-
} else if (node.type === 'arrow_function') {
|
|
131
|
-
// 1. Check if assigned to a variable: const foo = () => {}
|
|
132
|
-
const parent = node.parent;
|
|
133
|
-
if (parent && parent.type === 'variable_declarator') {
|
|
134
|
-
const nameNode = parent.childForFieldName('name');
|
|
135
|
-
if (nameNode) name = content.slice(nameNode.startIndex, nameNode.endIndex);
|
|
136
|
-
}
|
|
137
|
-
// 2. Check if part of an object property: { foo: () => {} }
|
|
138
|
-
else if (parent && parent.type === 'pair') {
|
|
139
|
-
const keyNode = parent.childForFieldName('key');
|
|
140
|
-
if (keyNode) name = content.slice(keyNode.startIndex, keyNode.endIndex);
|
|
141
|
-
}
|
|
142
|
-
// 3. Check if part of an assignment: this.foo = () => {}
|
|
143
|
-
else if (parent && parent.type === 'assignment_expression') {
|
|
144
|
-
const leftNode = parent.childForFieldName('left');
|
|
145
|
-
if (leftNode) name = content.slice(leftNode.startIndex, leftNode.endIndex);
|
|
146
|
-
}
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
const code = content.slice(node.startIndex, node.endIndex)
|
|
150
|
-
|
|
151
|
-
return {
|
|
152
|
-
name,
|
|
153
|
-
type: node.type,
|
|
154
|
-
filePath,
|
|
155
|
-
startLine: node.startPosition.row + 1,
|
|
156
|
-
code,
|
|
157
|
-
isExported: this.isExported(node),
|
|
158
|
-
isAsync: code.includes('async')
|
|
159
|
-
}
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
mapTypeNode(node, content, filePath) {
|
|
163
|
-
const nameNode = node.childForFieldName('name')
|
|
164
|
-
if (!nameNode) return null
|
|
165
|
-
|
|
166
|
-
return {
|
|
167
|
-
name: content.slice(nameNode.startIndex, nameNode.endIndex),
|
|
168
|
-
type: node.type,
|
|
169
|
-
filePath,
|
|
170
|
-
startLine: node.startPosition.row + 1,
|
|
171
|
-
code: content.slice(node.startIndex, node.endIndex),
|
|
172
|
-
isExported: this.isExported(node)
|
|
173
|
-
}
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
extractImports(root, content, filePath) {
|
|
177
|
-
const imports = []
|
|
178
|
-
// Simple traversal for import statements
|
|
179
|
-
for (let i = 0; i < root.namedChildCount; i++) {
|
|
180
|
-
const node = root.namedChild(i)
|
|
181
|
-
if (node.type === 'import_statement') {
|
|
182
|
-
imports.push({
|
|
183
|
-
statement: content.slice(node.startIndex, node.endIndex),
|
|
184
|
-
filePath
|
|
185
|
-
})
|
|
186
|
-
}
|
|
187
|
-
}
|
|
188
|
-
return imports
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
isExported(node) {
|
|
192
|
-
let current = node
|
|
193
|
-
while (current) {
|
|
194
|
-
if (current.type === 'export_statement') return true
|
|
195
|
-
current = current.parent
|
|
196
|
-
}
|
|
197
|
-
return false
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
createChunks(elements, content, filePath) {
|
|
201
|
-
const chunks = [];
|
|
202
|
-
const pathParts = filePath.toLowerCase().split(/[\\\/]/);
|
|
203
|
-
|
|
204
|
-
for (const func of elements.functions) {
|
|
205
|
-
// Pass full context to heuristics
|
|
206
|
-
const heuristicContext = {
|
|
207
|
-
...func,
|
|
208
|
-
includes: elements,
|
|
209
|
-
pathParts
|
|
210
|
-
};
|
|
211
|
-
|
|
212
|
-
const purpose = this.heuristicsManager.determinePurpose(heuristicContext);
|
|
213
|
-
const businessDomain = this.heuristicsManager.inferBusinessDomains(heuristicContext);
|
|
214
|
-
const technicalPatterns = this.heuristicsManager.inferTechnicalPatterns(heuristicContext);
|
|
215
|
-
const tags = this.generateTags(func);
|
|
216
|
-
|
|
217
|
-
let chunkCode = '';
|
|
218
|
-
if (this.options.includeContext) {
|
|
219
|
-
const relevantImports = elements.imports
|
|
220
|
-
.filter(imp => this.isImportRelevant(imp.statement, func.code))
|
|
221
|
-
.map(imp => imp.statement)
|
|
222
|
-
.join('\n');
|
|
223
|
-
|
|
224
|
-
if (relevantImports) chunkCode += relevantImports + '\n\n';
|
|
225
|
-
}
|
|
226
|
-
chunkCode += func.code;
|
|
227
|
-
|
|
228
|
-
chunks.push({
|
|
229
|
-
id: `${filePath}:${func.name}:${func.startLine}`,
|
|
230
|
-
name: func.name,
|
|
231
|
-
filePath,
|
|
232
|
-
type: 'function',
|
|
233
|
-
subtype: func.type,
|
|
234
|
-
code: chunkCode,
|
|
235
|
-
startLine: func.startLine,
|
|
236
|
-
complexity: this.calculateComplexity(func.code),
|
|
237
|
-
purpose,
|
|
238
|
-
tags,
|
|
239
|
-
businessDomain,
|
|
240
|
-
technicalPatterns,
|
|
241
|
-
includes: {
|
|
242
|
-
imports: elements.imports.map(i => i.statement),
|
|
243
|
-
types: elements.types.map(t => t.name)
|
|
244
|
-
},
|
|
245
|
-
bundles: this.getFileBundles(filePath)
|
|
246
|
-
});
|
|
247
|
-
}
|
|
248
|
-
|
|
249
|
-
return chunks;
|
|
250
|
-
}
|
|
251
|
-
|
|
252
|
-
isImportRelevant(importStatement, functionCode) {
|
|
253
|
-
// Heuristic: does the function use any name from the import?
|
|
254
|
-
const match = importStatement.match(/import\s+(?:\{([^}]+)\}|(\w+))/i)
|
|
255
|
-
if (!match) return false
|
|
256
|
-
const importedNames = match[1] ? match[1].split(',').map(n => n.trim()) : [match[2]]
|
|
257
|
-
return importedNames.some(name => functionCode.includes(name))
|
|
258
|
-
}
|
|
259
|
-
|
|
260
|
-
calculateComplexity(code) {
|
|
261
|
-
const indicators = ['if', 'else', 'for', 'while', 'switch', 'case', 'catch', '?', '&&', '||'];
|
|
262
|
-
let score = 1;
|
|
263
|
-
indicators.forEach(ind => {
|
|
264
|
-
// Escape special regex characters
|
|
265
|
-
const escaped = ind.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
266
|
-
// Only use word boundaries for word-like indicators
|
|
267
|
-
const pattern = /^[a-zA-Z]+$/.test(ind) ? `\\b${escaped}\\b` : escaped;
|
|
268
|
-
const regex = new RegExp(pattern, 'g');
|
|
269
|
-
score += (code.match(regex) || []).length;
|
|
270
|
-
});
|
|
271
|
-
return {
|
|
272
|
-
score,
|
|
273
|
-
level: score < 5 ? 'low' : score < 15 ? 'medium' : 'high'
|
|
274
|
-
};
|
|
275
|
-
}
|
|
276
|
-
|
|
277
|
-
generateTags(func) {
|
|
278
|
-
const tags = [func.type]
|
|
279
|
-
if (func.isExported) tags.push('exported')
|
|
280
|
-
if (func.isAsync) tags.push('async')
|
|
281
|
-
if (func.code.length > 2000) tags.push('large')
|
|
282
|
-
return tags
|
|
283
|
-
}
|
|
284
|
-
|
|
285
|
-
getFileBundles(filePath) {
|
|
286
|
-
if (!this.bundleConfig?.bundles) return []
|
|
287
|
-
const bundles = []
|
|
288
|
-
for (const [name, patterns] of Object.entries(this.bundleConfig.bundles)) {
|
|
289
|
-
if (name === 'master') continue
|
|
290
|
-
for (const pattern of patterns) {
|
|
291
|
-
if (this.matchesPattern(filePath, pattern)) {
|
|
292
|
-
bundles.push(name)
|
|
293
|
-
break
|
|
294
|
-
}
|
|
295
|
-
}
|
|
296
|
-
}
|
|
297
|
-
return bundles
|
|
298
|
-
}
|
|
299
|
-
|
|
300
|
-
matchesPattern(filePath, pattern) {
|
|
301
|
-
const regex = pattern.replace(/\*\*/g, '.*').replace(/\*/g, '[^/]*').replace(/\./g, '\\.')
|
|
302
|
-
return new RegExp(`^${regex}$`).test(filePath)
|
|
303
|
-
}
|
|
304
|
-
}
|
|
@@ -1,108 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Simple Vector Store with SQLite Persistence
|
|
3
|
-
* Powered by Transformers.js for local embeddings
|
|
4
|
-
* Persists vectors to SQLite for instant startup
|
|
5
|
-
*/
|
|
6
|
-
|
|
7
|
-
import { pipeline } from '@xenova/transformers';
|
|
8
|
-
|
|
9
|
-
export default class SimpleVectorStore {
|
|
10
|
-
constructor(databaseManager, options = {}) {
|
|
11
|
-
this.db = databaseManager;
|
|
12
|
-
this.modelName = options.modelName || 'Xenova/all-MiniLM-L6-v2';
|
|
13
|
-
this.pipe = null;
|
|
14
|
-
this.initialized = false;
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
async init() {
|
|
18
|
-
if (this.initialized) return;
|
|
19
|
-
console.log(`🤖 Initializing local RAG engine (${this.modelName})...`);
|
|
20
|
-
this.pipe = await pipeline('feature-extraction', this.modelName);
|
|
21
|
-
this.initialized = true;
|
|
22
|
-
console.log('✅ Local RAG engine ready');
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
async generateEmbedding(text) {
|
|
26
|
-
await this.init();
|
|
27
|
-
const output = await this.pipe(text, { pooling: 'mean', normalize: true });
|
|
28
|
-
return new Float32Array(output.data);
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
/**
|
|
32
|
-
* Upsert a chunk's embedding to persistence
|
|
33
|
-
*/
|
|
34
|
-
async upsertChunk(chunk) {
|
|
35
|
-
const chunkId = chunk.id;
|
|
36
|
-
// Check if we already have it in DB
|
|
37
|
-
const existing = this.db.getEmbedding(chunkId);
|
|
38
|
-
if (existing) return existing;
|
|
39
|
-
|
|
40
|
-
// Generate new embedding
|
|
41
|
-
const textToEmbed = `${chunk.name} ${chunk.purpose} ${chunk.code}`;
|
|
42
|
-
const embedding = await this.generateEmbedding(textToEmbed);
|
|
43
|
-
|
|
44
|
-
// Save to SQLite
|
|
45
|
-
this.db.saveEmbedding(chunkId, embedding, this.modelName);
|
|
46
|
-
return embedding;
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
/**
|
|
50
|
-
* Semantic Search across persistent embeddings
|
|
51
|
-
*/
|
|
52
|
-
async search(query, options = {}) {
|
|
53
|
-
const { limit = 10, threshold = 0.5 } = options;
|
|
54
|
-
const queryEmbedding = await this.generateEmbedding(query);
|
|
55
|
-
|
|
56
|
-
// Load all embeddings from DB
|
|
57
|
-
const rows = this.db.db.prepare('SELECT chunk_id, embedding FROM vector_embeddings WHERE model_name = ?').all(this.modelName);
|
|
58
|
-
|
|
59
|
-
const results = [];
|
|
60
|
-
const batchSize = 100;
|
|
61
|
-
|
|
62
|
-
// Process in batches to prevent blocking the event loop
|
|
63
|
-
for (let i = 0; i < rows.length; i += batchSize) {
|
|
64
|
-
const batch = rows.slice(i, i + batchSize);
|
|
65
|
-
|
|
66
|
-
for (const row of batch) {
|
|
67
|
-
const embedding = new Float32Array(row.embedding.buffer, row.embedding.byteOffset, row.embedding.byteLength / 4);
|
|
68
|
-
const similarity = this.cosineSimilarity(queryEmbedding, embedding);
|
|
69
|
-
|
|
70
|
-
if (similarity >= threshold) {
|
|
71
|
-
results.push({
|
|
72
|
-
chunkId: row.chunk_id,
|
|
73
|
-
similarity
|
|
74
|
-
});
|
|
75
|
-
}
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
// Give other tasks a chance to run
|
|
79
|
-
if (i + batchSize < rows.length) {
|
|
80
|
-
await new Promise(resolve => setImmediate(resolve));
|
|
81
|
-
}
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
// Sort by similarity and get chunk details
|
|
85
|
-
return results
|
|
86
|
-
.sort((a, b) => b.similarity - a.similarity)
|
|
87
|
-
.slice(0, limit)
|
|
88
|
-
.map(res => {
|
|
89
|
-
const chunk = this.db.db.prepare('SELECT * FROM semantic_chunks WHERE id = ?').get(res.chunkId);
|
|
90
|
-
return {
|
|
91
|
-
...this.db.mapChunkRow(chunk),
|
|
92
|
-
similarity: res.similarity
|
|
93
|
-
};
|
|
94
|
-
});
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
cosineSimilarity(vecA, vecB) {
|
|
98
|
-
let dotProduct = 0;
|
|
99
|
-
let normA = 0;
|
|
100
|
-
let normB = 0;
|
|
101
|
-
for (let i = 0; i < vecA.length; i++) {
|
|
102
|
-
dotProduct += vecA[i] * vecB[i];
|
|
103
|
-
normA += vecA[i] * vecA[i];
|
|
104
|
-
normB += vecB[i] * vecB[i];
|
|
105
|
-
}
|
|
106
|
-
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
|
|
107
|
-
}
|
|
108
|
-
}
|